churn_analysis.go 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. package main
  2. import (
  3. "fmt"
  4. "io"
  5. "sort"
  6. "strings"
  7. "unicode/utf8"
  8. "github.com/gogo/protobuf/proto"
  9. "github.com/sergi/go-diff/diffmatchpatch"
  10. "gopkg.in/src-d/go-git.v4"
  11. "gopkg.in/src-d/go-git.v4/plumbing"
  12. "gopkg.in/src-d/go-git.v4/plumbing/object"
  13. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  14. "gopkg.in/src-d/hercules.v4"
  15. "gopkg.in/src-d/hercules.v4/yaml"
  16. )
  17. // ChurnAnalysis contains the intermediate state which is mutated by Consume(). It should implement
  18. // hercules.LeafPipelineItem.
  19. type ChurnAnalysis struct {
  20. // No special merge logic is required
  21. hercules.NoopMerger
  22. // Process each merge only once
  23. hercules.OneShotMergeProcessor
  24. TrackPeople bool
  25. global []editInfo
  26. people map[int][]editInfo
  27. // references IdentityDetector.ReversedPeopleDict
  28. reversedPeopleDict []string
  29. }
  30. type editInfo struct {
  31. Day int
  32. Added int
  33. Removed int
  34. }
  35. // ChurnAnalysisResult is returned by Finalize() and represents the analysis result.
  36. type ChurnAnalysisResult struct {
  37. Global Edits
  38. People map[string]Edits
  39. }
  40. type Edits struct {
  41. Days []int
  42. Additions []int
  43. Removals []int
  44. }
  45. const (
  46. ConfigChurnTrackPeople = "Churn.TrackPeople"
  47. )
  48. // Analysis' name in the graph is usually the same as the type's name, however, does not have to.
  49. func (churn *ChurnAnalysis) Name() string {
  50. return "ChurnAnalysis"
  51. }
  52. // LeafPipelineItem-s normally do not act as intermediate nodes and thus we return an empty slice.
  53. func (churn *ChurnAnalysis) Provides() []string {
  54. return []string{}
  55. }
  56. // Requires returns the list of dependencies which must be supplied in Consume().
  57. // file_diff - line diff for each commit change
  58. // changes - list of changed files for each commit
  59. // blob_cache - set of blobs affected by each commit
  60. // day - number of days since start for each commit
  61. // author - author of the commit
  62. func (churn *ChurnAnalysis) Requires() []string {
  63. arr := [...]string{
  64. hercules.DependencyFileDiff,
  65. hercules.DependencyTreeChanges,
  66. hercules.DependencyBlobCache,
  67. hercules.DependencyDay,
  68. hercules.DependencyAuthor}
  69. return arr[:]
  70. }
  71. // ListConfigurationOptions tells the engine which parameters can be changed through the command
  72. // line.
  73. func (churn *ChurnAnalysis) ListConfigurationOptions() []hercules.ConfigurationOption {
  74. opts := [...]hercules.ConfigurationOption{{
  75. Name: ConfigChurnTrackPeople,
  76. Description: "Record detailed statistics per each developer.",
  77. Flag: "churn-people",
  78. Type: hercules.BoolConfigurationOption,
  79. Default: false},
  80. }
  81. return opts[:]
  82. }
  83. // Flag returns the command line switch which activates the analysis.
  84. func (churn *ChurnAnalysis) Flag() string {
  85. return "churn"
  86. }
  87. // Configure applies the parameters specified in the command line. Map keys correspond to "Name".
  88. func (churn *ChurnAnalysis) Configure(facts map[string]interface{}) {
  89. if val, exists := facts[ConfigChurnTrackPeople].(bool); exists {
  90. churn.TrackPeople = val
  91. }
  92. if churn.TrackPeople {
  93. churn.reversedPeopleDict = facts[hercules.FactIdentityDetectorReversedPeopleDict].([]string)
  94. }
  95. }
  96. // Initialize resets the internal temporary data structures and prepares the object for Consume().
  97. func (churn *ChurnAnalysis) Initialize(repository *git.Repository) {
  98. churn.global = []editInfo{}
  99. churn.people = map[int][]editInfo{}
  100. churn.OneShotMergeProcessor.Initialize()
  101. }
  102. func (churn *ChurnAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  103. if !churn.ShouldConsumeCommit(deps) {
  104. return nil, nil
  105. }
  106. fileDiffs := deps[hercules.DependencyFileDiff].(map[string]hercules.FileDiffData)
  107. treeDiffs := deps[hercules.DependencyTreeChanges].(object.Changes)
  108. cache := deps[hercules.DependencyBlobCache].(map[plumbing.Hash]*object.Blob)
  109. day := deps[hercules.DependencyDay].(int)
  110. author := deps[hercules.DependencyAuthor].(int)
  111. for _, change := range treeDiffs {
  112. action, err := change.Action()
  113. if err != nil {
  114. return nil, err
  115. }
  116. added := 0
  117. removed := 0
  118. switch action {
  119. case merkletrie.Insert:
  120. added, err = hercules.CountLines(cache[change.To.TreeEntry.Hash])
  121. if err != nil && err.Error() == "binary" {
  122. err = nil
  123. }
  124. case merkletrie.Delete:
  125. removed, err = hercules.CountLines(cache[change.From.TreeEntry.Hash])
  126. if err != nil && err.Error() == "binary" {
  127. err = nil
  128. }
  129. case merkletrie.Modify:
  130. diffs := fileDiffs[change.To.Name]
  131. for _, edit := range diffs.Diffs {
  132. length := utf8.RuneCountInString(edit.Text)
  133. switch edit.Type {
  134. case diffmatchpatch.DiffEqual:
  135. continue
  136. case diffmatchpatch.DiffInsert:
  137. added += length
  138. case diffmatchpatch.DiffDelete:
  139. removed += length
  140. }
  141. }
  142. }
  143. if err != nil {
  144. return nil, err
  145. }
  146. ei := editInfo{Day: day, Added: added, Removed: removed}
  147. churn.global = append(churn.global, ei)
  148. if churn.TrackPeople {
  149. seq, exists := churn.people[author]
  150. if !exists {
  151. seq = []editInfo{}
  152. }
  153. seq = append(seq, ei)
  154. churn.people[author] = seq
  155. }
  156. }
  157. return nil, nil
  158. }
  159. // Fork clones the same item several times on branches.
  160. func (churn *ChurnAnalysis) Fork(n int) []hercules.PipelineItem {
  161. return hercules.ForkSamePipelineItem(churn, n)
  162. }
  163. func (churn *ChurnAnalysis) Finalize() interface{} {
  164. result := ChurnAnalysisResult{
  165. Global: editInfosToEdits(churn.global),
  166. People: map[string]Edits{},
  167. }
  168. if churn.TrackPeople {
  169. for key, val := range churn.people {
  170. result.People[churn.reversedPeopleDict[key]] = editInfosToEdits(val)
  171. }
  172. }
  173. return result
  174. }
  175. func (churn *ChurnAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  176. burndownResult := result.(ChurnAnalysisResult)
  177. if binary {
  178. return churn.serializeBinary(&burndownResult, writer)
  179. }
  180. churn.serializeText(&burndownResult, writer)
  181. return nil
  182. }
  183. func (churn *ChurnAnalysis) serializeText(result *ChurnAnalysisResult, writer io.Writer) {
  184. fmt.Fprintln(writer, " global:")
  185. printEdits(result.Global, writer, 4)
  186. for key, val := range result.People {
  187. fmt.Fprintf(writer, " %s:\n", yaml.SafeString(key))
  188. printEdits(val, writer, 4)
  189. }
  190. }
  191. func (churn *ChurnAnalysis) serializeBinary(result *ChurnAnalysisResult, writer io.Writer) error {
  192. message := ChurnAnalysisResultMessage{
  193. Global: editsToEditsMessage(result.Global),
  194. People: map[string]*EditsMessage{},
  195. }
  196. for key, val := range result.People {
  197. message.People[key] = editsToEditsMessage(val)
  198. }
  199. serialized, err := proto.Marshal(&message)
  200. if err != nil {
  201. return err
  202. }
  203. writer.Write(serialized)
  204. return nil
  205. }
  206. func editInfosToEdits(eis []editInfo) Edits {
  207. aux := map[int]*editInfo{}
  208. for _, ei := range eis {
  209. ptr := aux[ei.Day]
  210. if ptr == nil {
  211. ptr = &editInfo{Day: ei.Day}
  212. }
  213. ptr.Added += ei.Added
  214. ptr.Removed += ei.Removed
  215. aux[ei.Day] = ptr
  216. }
  217. seq := []int{}
  218. for key := range aux {
  219. seq = append(seq, key)
  220. }
  221. sort.Ints(seq)
  222. edits := Edits{
  223. Days: make([]int, len(seq)),
  224. Additions: make([]int, len(seq)),
  225. Removals: make([]int, len(seq)),
  226. }
  227. for i, day := range seq {
  228. edits.Days[i] = day
  229. edits.Additions[i] = aux[day].Added
  230. edits.Removals[i] = aux[day].Removed
  231. }
  232. return edits
  233. }
  234. func printEdits(edits Edits, writer io.Writer, indent int) {
  235. strIndent := strings.Repeat(" ", indent)
  236. printArray := func(arr []int, name string) {
  237. fmt.Fprintf(writer, "%s%s: [", strIndent, name)
  238. for i, v := range arr {
  239. if i < len(arr)-1 {
  240. fmt.Fprintf(writer, "%d, ", v)
  241. } else {
  242. fmt.Fprintf(writer, "%d]\n", v)
  243. }
  244. }
  245. }
  246. printArray(edits.Days, "days")
  247. printArray(edits.Additions, "additions")
  248. printArray(edits.Removals, "removals")
  249. }
  250. func editsToEditsMessage(edits Edits) *EditsMessage {
  251. message := &EditsMessage{
  252. Days: make([]uint32, len(edits.Days)),
  253. Additions: make([]uint32, len(edits.Additions)),
  254. Removals: make([]uint32, len(edits.Removals)),
  255. }
  256. copyInts := func(arr []int, where []uint32) {
  257. for i, v := range arr {
  258. where[i] = uint32(v)
  259. }
  260. }
  261. copyInts(edits.Days, message.Days)
  262. copyInts(edits.Additions, message.Additions)
  263. copyInts(edits.Removals, message.Removals)
  264. return message
  265. }
  266. func init() {
  267. hercules.Registry.Register(&ChurnAnalysis{})
  268. }