churn_analysis.go 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. package main
  2. import (
  3. "fmt"
  4. "io"
  5. "sort"
  6. "strings"
  7. "unicode/utf8"
  8. "github.com/gogo/protobuf/proto"
  9. "github.com/sergi/go-diff/diffmatchpatch"
  10. "gopkg.in/src-d/go-git.v4"
  11. "gopkg.in/src-d/go-git.v4/plumbing"
  12. "gopkg.in/src-d/go-git.v4/plumbing/object"
  13. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  14. "gopkg.in/src-d/hercules.v9"
  15. )
  16. // ChurnAnalysis contains the intermediate state which is mutated by Consume(). It should implement
  17. // hercules.LeafPipelineItem.
  18. type ChurnAnalysis struct {
  19. // No special merge logic is required
  20. hercules.NoopMerger
  21. // Process each merge only once
  22. hercules.OneShotMergeProcessor
  23. TrackPeople bool
  24. global []editInfo
  25. people map[int][]editInfo
  26. // references IdentityDetector.ReversedPeopleDict
  27. reversedPeopleDict []string
  28. }
  29. type editInfo struct {
  30. Tick int
  31. Added int
  32. Removed int
  33. }
  34. // ChurnAnalysisResult is returned by Finalize() and represents the analysis result.
  35. type ChurnAnalysisResult struct {
  36. Global Edits
  37. People map[string]Edits
  38. }
  39. type Edits struct {
  40. Ticks []int
  41. Additions []int
  42. Removals []int
  43. }
  44. const (
  45. ConfigChurnTrackPeople = "Churn.TrackPeople"
  46. )
  47. // Analysis' name in the graph is usually the same as the type's name, however, does not have to.
  48. func (churn *ChurnAnalysis) Name() string {
  49. return "ChurnAnalysis"
  50. }
  51. // LeafPipelineItem-s normally do not act as intermediate nodes and thus we return an empty slice.
  52. func (churn *ChurnAnalysis) Provides() []string {
  53. return []string{}
  54. }
  55. // Requires returns the list of dependencies which must be supplied in Consume().
  56. // file_diff - line diff for each commit change
  57. // changes - list of changed files for each commit
  58. // blob_cache - set of blobs affected by each commit
  59. // dick - number of ticks since start for each commit
  60. // author - author of the commit
  61. func (churn *ChurnAnalysis) Requires() []string {
  62. arr := [...]string{
  63. hercules.DependencyFileDiff,
  64. hercules.DependencyTreeChanges,
  65. hercules.DependencyBlobCache,
  66. hercules.DependencyTick,
  67. hercules.DependencyAuthor}
  68. return arr[:]
  69. }
  70. // ListConfigurationOptions tells the engine which parameters can be changed through the command
  71. // line.
  72. func (churn *ChurnAnalysis) ListConfigurationOptions() []hercules.ConfigurationOption {
  73. opts := [...]hercules.ConfigurationOption{{
  74. Name: ConfigChurnTrackPeople,
  75. Description: "Record detailed statistics per each developer.",
  76. Flag: "churn-people",
  77. Type: hercules.BoolConfigurationOption,
  78. Default: false},
  79. }
  80. return opts[:]
  81. }
  82. // Flag returns the command line switch which activates the analysis.
  83. func (churn *ChurnAnalysis) Flag() string {
  84. return "churn"
  85. }
  86. // Description returns the text which explains what the analysis is doing.
  87. func (churn *ChurnAnalysis) Description() string {
  88. return "Collects the daily numbers of inserted and removed lines."
  89. }
  90. // Configure applies the parameters specified in the command line. Map keys correspond to "Name".
  91. func (churn *ChurnAnalysis) Configure(facts map[string]interface{}) error {
  92. if val, exists := facts[ConfigChurnTrackPeople].(bool); exists {
  93. churn.TrackPeople = val
  94. }
  95. if churn.TrackPeople {
  96. churn.reversedPeopleDict = facts[hercules.FactIdentityDetectorReversedPeopleDict].([]string)
  97. }
  98. return nil
  99. }
  100. // Initialize resets the internal temporary data structures and prepares the object for Consume().
  101. func (churn *ChurnAnalysis) Initialize(repository *git.Repository) error {
  102. churn.global = []editInfo{}
  103. churn.people = map[int][]editInfo{}
  104. churn.OneShotMergeProcessor.Initialize()
  105. return nil
  106. }
  107. func (churn *ChurnAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  108. if !churn.ShouldConsumeCommit(deps) {
  109. return nil, nil
  110. }
  111. fileDiffs := deps[hercules.DependencyFileDiff].(map[string]hercules.FileDiffData)
  112. treeDiffs := deps[hercules.DependencyTreeChanges].(object.Changes)
  113. cache := deps[hercules.DependencyBlobCache].(map[plumbing.Hash]*hercules.CachedBlob)
  114. tick := deps[hercules.DependencyTick].(int)
  115. author := deps[hercules.DependencyAuthor].(int)
  116. for _, change := range treeDiffs {
  117. action, err := change.Action()
  118. if err != nil {
  119. return nil, err
  120. }
  121. added := 0
  122. removed := 0
  123. switch action {
  124. case merkletrie.Insert:
  125. added, _ = cache[change.To.TreeEntry.Hash].CountLines()
  126. case merkletrie.Delete:
  127. removed, _ = cache[change.From.TreeEntry.Hash].CountLines()
  128. case merkletrie.Modify:
  129. diffs := fileDiffs[change.To.Name]
  130. for _, edit := range diffs.Diffs {
  131. length := utf8.RuneCountInString(edit.Text)
  132. switch edit.Type {
  133. case diffmatchpatch.DiffEqual:
  134. continue
  135. case diffmatchpatch.DiffInsert:
  136. added += length
  137. case diffmatchpatch.DiffDelete:
  138. removed += length
  139. }
  140. }
  141. }
  142. if err != nil {
  143. return nil, err
  144. }
  145. ei := editInfo{Tick: tick, Added: added, Removed: removed}
  146. churn.global = append(churn.global, ei)
  147. if churn.TrackPeople {
  148. seq, exists := churn.people[author]
  149. if !exists {
  150. seq = []editInfo{}
  151. }
  152. seq = append(seq, ei)
  153. churn.people[author] = seq
  154. }
  155. }
  156. return nil, nil
  157. }
  158. // Fork clones the same item several times on branches.
  159. func (churn *ChurnAnalysis) Fork(n int) []hercules.PipelineItem {
  160. return hercules.ForkSamePipelineItem(churn, n)
  161. }
  162. func (churn *ChurnAnalysis) Finalize() interface{} {
  163. result := ChurnAnalysisResult{
  164. Global: editInfosToEdits(churn.global),
  165. People: map[string]Edits{},
  166. }
  167. if churn.TrackPeople {
  168. for key, val := range churn.people {
  169. result.People[churn.reversedPeopleDict[key]] = editInfosToEdits(val)
  170. }
  171. }
  172. return result
  173. }
  174. func (churn *ChurnAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  175. burndownResult := result.(ChurnAnalysisResult)
  176. if binary {
  177. return churn.serializeBinary(&burndownResult, writer)
  178. }
  179. churn.serializeText(&burndownResult, writer)
  180. return nil
  181. }
  182. func (churn *ChurnAnalysis) serializeText(result *ChurnAnalysisResult, writer io.Writer) {
  183. fmt.Fprintln(writer, " global:")
  184. printEdits(result.Global, writer, 4)
  185. for key, val := range result.People {
  186. fmt.Fprintf(writer, " %s:\n", hercules.SafeYamlString(key))
  187. printEdits(val, writer, 4)
  188. }
  189. }
  190. func (churn *ChurnAnalysis) serializeBinary(result *ChurnAnalysisResult, writer io.Writer) error {
  191. message := ChurnAnalysisResultMessage{
  192. Global: editsToEditsMessage(result.Global),
  193. People: map[string]*EditsMessage{},
  194. }
  195. for key, val := range result.People {
  196. message.People[key] = editsToEditsMessage(val)
  197. }
  198. serialized, err := proto.Marshal(&message)
  199. if err != nil {
  200. return err
  201. }
  202. writer.Write(serialized)
  203. return nil
  204. }
  205. func editInfosToEdits(eis []editInfo) Edits {
  206. aux := map[int]*editInfo{}
  207. for _, ei := range eis {
  208. ptr := aux[ei.Tick]
  209. if ptr == nil {
  210. ptr = &editInfo{Tick: ei.Tick}
  211. }
  212. ptr.Added += ei.Added
  213. ptr.Removed += ei.Removed
  214. aux[ei.Tick] = ptr
  215. }
  216. seq := []int{}
  217. for key := range aux {
  218. seq = append(seq, key)
  219. }
  220. sort.Ints(seq)
  221. edits := Edits{
  222. Ticks: make([]int, len(seq)),
  223. Additions: make([]int, len(seq)),
  224. Removals: make([]int, len(seq)),
  225. }
  226. for i, tick := range seq {
  227. edits.Ticks[i] = tick
  228. edits.Additions[i] = aux[tick].Added
  229. edits.Removals[i] = aux[tick].Removed
  230. }
  231. return edits
  232. }
  233. func printEdits(edits Edits, writer io.Writer, indent int) {
  234. strIndent := strings.Repeat(" ", indent)
  235. printArray := func(arr []int, name string) {
  236. fmt.Fprintf(writer, "%s%s: [", strIndent, name)
  237. for i, v := range arr {
  238. if i < len(arr)-1 {
  239. fmt.Fprintf(writer, "%d, ", v)
  240. } else {
  241. fmt.Fprintf(writer, "%d]\n", v)
  242. }
  243. }
  244. }
  245. printArray(edits.Ticks, "ticks")
  246. printArray(edits.Additions, "additions")
  247. printArray(edits.Removals, "removals")
  248. }
  249. func editsToEditsMessage(edits Edits) *EditsMessage {
  250. message := &EditsMessage{
  251. Ticks: make([]uint32, len(edits.Ticks)),
  252. Additions: make([]uint32, len(edits.Additions)),
  253. Removals: make([]uint32, len(edits.Removals)),
  254. }
  255. copyInts := func(arr []int, where []uint32) {
  256. for i, v := range arr {
  257. where[i] = uint32(v)
  258. }
  259. }
  260. copyInts(edits.Ticks, message.Ticks)
  261. copyInts(edits.Additions, message.Additions)
  262. copyInts(edits.Removals, message.Removals)
  263. return message
  264. }
  265. func init() {
  266. hercules.Registry.Register(&ChurnAnalysis{})
  267. }