churn_analysis.go 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. package main
  2. import (
  3. "fmt"
  4. "io"
  5. "sort"
  6. "strings"
  7. "unicode/utf8"
  8. "gopkg.in/src-d/go-git.v4"
  9. "gopkg.in/src-d/go-git.v4/plumbing/object"
  10. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  11. "gopkg.in/src-d/go-git.v4/plumbing"
  12. "gopkg.in/src-d/hercules.v3"
  13. "gopkg.in/src-d/hercules.v3/yaml"
  14. "github.com/gogo/protobuf/proto"
  15. "github.com/sergi/go-diff/diffmatchpatch"
  16. )
  17. // ChurnAnalysis contains the intermediate state which is mutated by Consume(). It should implement
  18. // hercules.LeafPipelineItem.
  19. type ChurnAnalysis struct {
  20. TrackPeople bool
  21. global []editInfo
  22. people map[int][]editInfo
  23. // references IdentityDetector.ReversedPeopleDict
  24. reversedPeopleDict []string
  25. }
  26. type editInfo struct {
  27. Day int
  28. Added int
  29. Removed int
  30. }
  31. // ChurnAnalysisResult is returned by Finalize() and represents the analysis result.
  32. type ChurnAnalysisResult struct {
  33. Global Edits
  34. People map[string]Edits
  35. }
  36. type Edits struct {
  37. Days []int
  38. Additions []int
  39. Removals []int
  40. }
  41. const (
  42. ConfigChurnTrackPeople = "Churn.TrackPeople"
  43. )
  44. // Analysis' name in the graph is usually the same as the type's name, however, does not have to.
  45. func (churn *ChurnAnalysis) Name() string {
  46. return "ChurnAnalysis"
  47. }
  48. // LeafPipelineItem-s normally do not act as intermediate nodes and thus we return an empty slice.
  49. func (churn *ChurnAnalysis) Provides() []string {
  50. return []string{}
  51. }
  52. // Requires returns the list of dependencies which must be supplied in Consume().
  53. // file_diff - line diff for each commit change
  54. // changes - list of changed files for each commit
  55. // blob_cache - set of blobs affected by each commit
  56. // day - number of days since start for each commit
  57. // author - author of the commit
  58. func (churn *ChurnAnalysis) Requires() []string {
  59. arr := [...]string{
  60. hercules.DependencyFileDiff,
  61. hercules.DependencyTreeChanges,
  62. hercules.DependencyBlobCache,
  63. hercules.DependencyDay,
  64. hercules.DependencyAuthor}
  65. return arr[:]
  66. }
  67. // ListConfigurationOptions tells the engine which parameters can be changed through the command
  68. // line.
  69. func (churn *ChurnAnalysis) ListConfigurationOptions() []hercules.ConfigurationOption {
  70. opts := [...]hercules.ConfigurationOption {{
  71. Name: ConfigChurnTrackPeople,
  72. Description: "Record detailed statistics per each developer.",
  73. Flag: "churn-people",
  74. Type: hercules.BoolConfigurationOption,
  75. Default: false},
  76. }
  77. return opts[:]
  78. }
  79. // Flag returns the command line switch which activates the analysis.
  80. func (churn *ChurnAnalysis) Flag() string {
  81. return "churn"
  82. }
  83. // Configure applies the parameters specified in the command line. Map keys correspond to "Name".
  84. func (churn *ChurnAnalysis) Configure(facts map[string]interface{}) {
  85. if val, exists := facts[ConfigChurnTrackPeople].(bool); exists {
  86. churn.TrackPeople = val
  87. }
  88. if churn.TrackPeople {
  89. churn.reversedPeopleDict = facts[hercules.FactIdentityDetectorReversedPeopleDict].([]string)
  90. }
  91. }
  92. // Initialize resets the internal temporary data structures and prepares the object for Consume().
  93. func (churn *ChurnAnalysis) Initialize(repository *git.Repository) {
  94. churn.global = []editInfo{}
  95. churn.people = map[int][]editInfo{}
  96. }
  97. func (churn *ChurnAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  98. fileDiffs := deps[hercules.DependencyFileDiff].(map[string]hercules.FileDiffData)
  99. treeDiffs := deps[hercules.DependencyTreeChanges].(object.Changes)
  100. cache := deps[hercules.DependencyBlobCache].(map[plumbing.Hash]*object.Blob)
  101. day := deps[hercules.DependencyDay].(int)
  102. author := deps[hercules.DependencyAuthor].(int)
  103. for _, change := range treeDiffs {
  104. action, err := change.Action()
  105. if err != nil {
  106. return nil, err
  107. }
  108. added := 0; removed := 0
  109. switch action {
  110. case merkletrie.Insert:
  111. added, err = hercules.CountLines(cache[change.To.TreeEntry.Hash])
  112. if err != nil && err.Error() == "binary" {
  113. err = nil
  114. }
  115. case merkletrie.Delete:
  116. removed, err = hercules.CountLines(cache[change.From.TreeEntry.Hash])
  117. if err != nil && err.Error() == "binary" {
  118. err = nil
  119. }
  120. case merkletrie.Modify:
  121. diffs := fileDiffs[change.To.Name]
  122. for _, edit := range diffs.Diffs {
  123. length := utf8.RuneCountInString(edit.Text)
  124. switch edit.Type {
  125. case diffmatchpatch.DiffEqual:
  126. continue
  127. case diffmatchpatch.DiffInsert:
  128. added += length
  129. case diffmatchpatch.DiffDelete:
  130. removed += length
  131. }
  132. }
  133. }
  134. if err != nil {
  135. return nil, err
  136. }
  137. ei := editInfo{Day: day, Added: added, Removed: removed}
  138. churn.global = append(churn.global, ei)
  139. if churn.TrackPeople {
  140. seq, exists := churn.people[author]
  141. if !exists {
  142. seq = []editInfo{}
  143. }
  144. seq = append(seq, ei)
  145. churn.people[author] = seq
  146. }
  147. }
  148. return nil, nil
  149. }
  150. func (churn *ChurnAnalysis) Finalize() interface{} {
  151. result := ChurnAnalysisResult{
  152. Global: editInfosToEdits(churn.global),
  153. People: map[string]Edits{},
  154. }
  155. if churn.TrackPeople {
  156. for key, val := range churn.people {
  157. result.People[churn.reversedPeopleDict[key]] = editInfosToEdits(val)
  158. }
  159. }
  160. return result
  161. }
  162. func (churn *ChurnAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  163. burndownResult := result.(ChurnAnalysisResult)
  164. if binary {
  165. return churn.serializeBinary(&burndownResult, writer)
  166. }
  167. churn.serializeText(&burndownResult, writer)
  168. return nil
  169. }
  170. func (churn *ChurnAnalysis) serializeText(result *ChurnAnalysisResult, writer io.Writer) {
  171. fmt.Fprintln(writer, " global:")
  172. printEdits(result.Global, writer, 4)
  173. for key, val := range result.People {
  174. fmt.Fprintf(writer, " %s:\n", yaml.SafeString(key))
  175. printEdits(val, writer, 4)
  176. }
  177. }
  178. func (churn *ChurnAnalysis) serializeBinary(result *ChurnAnalysisResult, writer io.Writer) error {
  179. message := ChurnAnalysisResultMessage{
  180. Global: editsToEditsMessage(result.Global),
  181. People: map[string]*EditsMessage{},
  182. }
  183. for key, val := range result.People {
  184. message.People[key] = editsToEditsMessage(val)
  185. }
  186. serialized, err := proto.Marshal(&message)
  187. if err != nil {
  188. return err
  189. }
  190. writer.Write(serialized)
  191. return nil
  192. }
  193. func editInfosToEdits(eis []editInfo) Edits {
  194. aux := map[int]*editInfo{}
  195. for _, ei := range eis {
  196. ptr := aux[ei.Day]
  197. if ptr == nil {
  198. ptr = &editInfo{Day: ei.Day}
  199. }
  200. ptr.Added += ei.Added
  201. ptr.Removed += ei.Removed
  202. aux[ei.Day] = ptr
  203. }
  204. seq := []int{}
  205. for key := range aux {
  206. seq = append(seq, key)
  207. }
  208. sort.Ints(seq)
  209. edits := Edits{
  210. Days: make([]int, len(seq)),
  211. Additions: make([]int, len(seq)),
  212. Removals: make([]int, len(seq)),
  213. }
  214. for i, day := range seq {
  215. edits.Days[i] = day
  216. edits.Additions[i] = aux[day].Added
  217. edits.Removals[i] = aux[day].Removed
  218. }
  219. return edits
  220. }
  221. func printEdits(edits Edits, writer io.Writer, indent int) {
  222. strIndent := strings.Repeat(" ", indent)
  223. printArray := func(arr []int, name string) {
  224. fmt.Fprintf(writer, "%s%s: [", strIndent, name)
  225. for i, v := range arr {
  226. if i < len(arr) - 1 {
  227. fmt.Fprintf(writer, "%d, ", v)
  228. } else {
  229. fmt.Fprintf(writer, "%d]\n", v)
  230. }
  231. }
  232. }
  233. printArray(edits.Days, "days")
  234. printArray(edits.Additions, "additions")
  235. printArray(edits.Removals, "removals")
  236. }
  237. func editsToEditsMessage(edits Edits) *EditsMessage {
  238. message := &EditsMessage{
  239. Days: make([]uint32, len(edits.Days)),
  240. Additions: make([]uint32, len(edits.Additions)),
  241. Removals: make([]uint32, len(edits.Removals)),
  242. }
  243. copyInts := func(arr []int, where []uint32) {
  244. for i, v := range arr {
  245. where[i] = uint32(v)
  246. }
  247. }
  248. copyInts(edits.Days, message.Days)
  249. copyInts(edits.Additions, message.Additions)
  250. copyInts(edits.Removals, message.Removals)
  251. return message
  252. }
  253. func init() {
  254. hercules.Registry.Register(&ChurnAnalysis{})
  255. }