devs.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. package leaves
  2. import (
  3. "fmt"
  4. "io"
  5. "sort"
  6. "unicode/utf8"
  7. "github.com/gogo/protobuf/proto"
  8. "github.com/sergi/go-diff/diffmatchpatch"
  9. "gopkg.in/src-d/go-git.v4"
  10. "gopkg.in/src-d/go-git.v4/plumbing"
  11. "gopkg.in/src-d/go-git.v4/plumbing/object"
  12. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  13. "gopkg.in/src-d/hercules.v5/internal/core"
  14. "gopkg.in/src-d/hercules.v5/internal/pb"
  15. items "gopkg.in/src-d/hercules.v5/internal/plumbing"
  16. "gopkg.in/src-d/hercules.v5/internal/plumbing/identity"
  17. "gopkg.in/src-d/hercules.v5/internal/yaml"
  18. )
  19. // DevsAnalysis calculates the number of commits through time per developer.
  20. // It also records the numbers of added, deleted and changed lines through time per developer.
  21. type DevsAnalysis struct {
  22. core.NoopMerger
  23. core.OneShotMergeProcessor
  24. // ConsiderEmptyCommits indicates whether empty commits (e.g., merges) should be taken
  25. // into account.
  26. ConsiderEmptyCommits bool
  27. // days maps days to developers to stats
  28. days map[int]map[int]*DevDay
  29. // reversedPeopleDict references IdentityDetector.ReversedPeopleDict
  30. reversedPeopleDict []string
  31. }
  32. // DevsResult is returned by DevsAnalysis.Finalize() and carries the daily statistics
  33. // per developer.
  34. type DevsResult struct {
  35. // Days is <day index> -> <developer index> -> daily stats
  36. Days map[int]map[int]*DevDay
  37. // reversedPeopleDict references IdentityDetector.ReversedPeopleDict
  38. reversedPeopleDict []string
  39. }
  40. // DevDay is the statistics for a development day and a particular developer.
  41. type DevDay struct {
  42. // Commits is the number of commits made by a particular developer in a particular day.
  43. Commits int
  44. // Added is the number of added lines by a particular developer in a particular day.
  45. Added int
  46. // Removed is the number of removed lines by a particular developer in a particular day.
  47. Removed int
  48. // Changed is the number of changed lines by a particular developer in a particular day.
  49. Changed int
  50. }
  51. const (
  52. // ConfigDevsConsiderEmptyCommits is the name of the option to set DevsAnalysis.ConsiderEmptyCommits.
  53. ConfigDevsConsiderEmptyCommits = "Devs.ConsiderEmptyCommits"
  54. )
  55. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  56. func (devs *DevsAnalysis) Name() string {
  57. return "Devs"
  58. }
  59. // Provides returns the list of names of entities which are produced by this PipelineItem.
  60. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  61. // to this list. Also used by core.Registry to build the global map of providers.
  62. func (devs *DevsAnalysis) Provides() []string {
  63. return []string{}
  64. }
  65. // Requires returns the list of names of entities which are needed by this PipelineItem.
  66. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  67. // entities are Provides() upstream.
  68. func (devs *DevsAnalysis) Requires() []string {
  69. arr := [...]string{
  70. identity.DependencyAuthor, items.DependencyTreeChanges, items.DependencyFileDiff,
  71. items.DependencyBlobCache, items.DependencyDay}
  72. return arr[:]
  73. }
  74. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  75. func (devs *DevsAnalysis) ListConfigurationOptions() []core.ConfigurationOption {
  76. options := [...]core.ConfigurationOption{{
  77. Name: ConfigDevsConsiderEmptyCommits,
  78. Description: "Take into account empty commits such as trivial merges.",
  79. Flag: "--empty-commits",
  80. Type: core.BoolConfigurationOption,
  81. Default: false}}
  82. return options[:]
  83. }
  84. // Configure sets the properties previously published by ListConfigurationOptions().
  85. func (devs *DevsAnalysis) Configure(facts map[string]interface{}) {
  86. if val, exists := facts[ConfigDevsConsiderEmptyCommits].(bool); exists {
  87. devs.ConsiderEmptyCommits = val
  88. }
  89. if val, exists := facts[identity.FactIdentityDetectorReversedPeopleDict].([]string); exists {
  90. devs.reversedPeopleDict = val
  91. }
  92. }
  93. // Flag for the command line switch which enables this analysis.
  94. func (devs *DevsAnalysis) Flag() string {
  95. return "devs"
  96. }
  97. // Description returns the text which explains what the analysis is doing.
  98. func (devs *DevsAnalysis) Description() string {
  99. return "Calculates the number of commits, added, removed and changed lines per developer through time."
  100. }
  101. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  102. // calls. The repository which is going to be analysed is supplied as an argument.
  103. func (devs *DevsAnalysis) Initialize(repository *git.Repository) {
  104. devs.days = map[int]map[int]*DevDay{}
  105. devs.OneShotMergeProcessor.Initialize()
  106. }
  107. // Consume runs this PipelineItem on the next commit data.
  108. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  109. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  110. // This function returns the mapping with analysis results. The keys must be the same as
  111. // in Provides(). If there was an error, nil is returned.
  112. func (devs *DevsAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  113. if !devs.ShouldConsumeCommit(deps) {
  114. return nil, nil
  115. }
  116. author := deps[identity.DependencyAuthor].(int)
  117. treeDiff := deps[items.DependencyTreeChanges].(object.Changes)
  118. if len(treeDiff) == 0 && !devs.ConsiderEmptyCommits {
  119. return nil, nil
  120. }
  121. day := deps[items.DependencyDay].(int)
  122. devsDay, exists := devs.days[day]
  123. if !exists {
  124. devsDay = map[int]*DevDay{}
  125. devs.days[day] = devsDay
  126. }
  127. dd, exists := devsDay[author]
  128. if !exists {
  129. dd = &DevDay{}
  130. devsDay[author] = dd
  131. }
  132. dd.Commits++
  133. cache := deps[items.DependencyBlobCache].(map[plumbing.Hash]*items.CachedBlob)
  134. fileDiffs := deps[items.DependencyFileDiff].(map[string]items.FileDiffData)
  135. for _, change := range treeDiff {
  136. action, err := change.Action()
  137. if err != nil {
  138. return nil, err
  139. }
  140. switch action {
  141. case merkletrie.Insert:
  142. blob := cache[change.To.TreeEntry.Hash]
  143. lines, err := blob.CountLines()
  144. if err != nil {
  145. // binary
  146. continue
  147. }
  148. dd.Added += lines
  149. case merkletrie.Delete:
  150. blob := cache[change.From.TreeEntry.Hash]
  151. lines, err := blob.CountLines()
  152. if err != nil {
  153. // binary
  154. continue
  155. }
  156. dd.Removed += lines
  157. case merkletrie.Modify:
  158. thisDiffs := fileDiffs[change.To.Name]
  159. var removedPending int
  160. for _, edit := range thisDiffs.Diffs {
  161. switch edit.Type {
  162. case diffmatchpatch.DiffEqual:
  163. if removedPending > 0 {
  164. dd.Removed += removedPending
  165. }
  166. removedPending = 0
  167. case diffmatchpatch.DiffInsert:
  168. added := utf8.RuneCountInString(edit.Text)
  169. if removedPending > added {
  170. dd.Changed += added
  171. dd.Removed += removedPending - added
  172. } else {
  173. dd.Changed += removedPending
  174. dd.Added += added - removedPending
  175. }
  176. removedPending = 0
  177. case diffmatchpatch.DiffDelete:
  178. removedPending = utf8.RuneCountInString(edit.Text)
  179. }
  180. }
  181. if removedPending > 0 {
  182. dd.Removed += removedPending
  183. }
  184. }
  185. }
  186. return nil, nil
  187. }
  188. // Finalize returns the result of the analysis. Further Consume() calls are not expected.
  189. func (devs *DevsAnalysis) Finalize() interface{} {
  190. return DevsResult{
  191. Days: devs.days,
  192. reversedPeopleDict: devs.reversedPeopleDict,
  193. }
  194. }
  195. // Fork clones this pipeline item.
  196. func (devs *DevsAnalysis) Fork(n int) []core.PipelineItem {
  197. return core.ForkSamePipelineItem(devs, n)
  198. }
  199. // Serialize converts the analysis result as returned by Finalize() to text or bytes.
  200. // The text format is YAML and the bytes format is Protocol Buffers.
  201. func (devs *DevsAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  202. devsResult := result.(DevsResult)
  203. if binary {
  204. return devs.serializeBinary(&devsResult, writer)
  205. }
  206. devs.serializeText(&devsResult, writer)
  207. return nil
  208. }
  209. // Deserialize converts the specified protobuf bytes to DevsResult.
  210. func (devs *DevsAnalysis) Deserialize(pbmessage []byte) (interface{}, error) {
  211. message := pb.DevsAnalysisResults{}
  212. err := proto.Unmarshal(pbmessage, &message)
  213. if err != nil {
  214. return nil, err
  215. }
  216. days := map[int]map[int]*DevDay{}
  217. for day, dd := range message.Days {
  218. rdd := map[int]*DevDay{}
  219. days[int(day)] = rdd
  220. for dev, stats := range dd.Devs {
  221. if dev == -1 {
  222. dev = identity.AuthorMissing
  223. }
  224. rdd[int(dev)] = &DevDay{
  225. Commits: int(stats.Commits),
  226. Added: int(stats.Added),
  227. Removed: int(stats.Removed),
  228. Changed: int(stats.Changed),
  229. }
  230. }
  231. }
  232. result := DevsResult{
  233. Days: days,
  234. reversedPeopleDict: message.DevIndex,
  235. }
  236. return result, nil
  237. }
  238. // MergeResults combines two DevsAnalysis-es together.
  239. func (devs *DevsAnalysis) MergeResults(r1, r2 interface{}, c1, c2 *core.CommonAnalysisResult) interface{} {
  240. cr1 := r1.(DevsResult)
  241. cr2 := r2.(DevsResult)
  242. merged := DevsResult{}
  243. type devIndexPair struct {
  244. Index1 int
  245. Index2 int
  246. }
  247. devIndex := map[string]devIndexPair{}
  248. for dev, devName := range cr1.reversedPeopleDict {
  249. devIndex[devName] = devIndexPair{Index1: dev+1, Index2: devIndex[devName].Index2}
  250. }
  251. for dev, devName := range cr2.reversedPeopleDict {
  252. devIndex[devName] = devIndexPair{Index1: devIndex[devName].Index1, Index2: dev+1}
  253. }
  254. jointDevSeq := make([]string, len(devIndex))
  255. {
  256. i := 0
  257. for dev := range devIndex {
  258. jointDevSeq[i] = dev
  259. i++
  260. }
  261. }
  262. sort.Strings(jointDevSeq)
  263. merged.reversedPeopleDict = jointDevSeq
  264. invDevIndex1 := map[int]int{}
  265. invDevIndex2 := map[int]int{}
  266. for i, dev := range jointDevSeq {
  267. pair := devIndex[dev]
  268. if pair.Index1 > 0 {
  269. invDevIndex1[pair.Index1-1] = i
  270. }
  271. if pair.Index2 > 0 {
  272. invDevIndex2[pair.Index2-1] = i
  273. }
  274. }
  275. newDays := map[int]map[int]*DevDay{}
  276. merged.Days = newDays
  277. for day, dd := range cr1.Days {
  278. newdd, exists := newDays[day]
  279. if !exists {
  280. newdd = map[int]*DevDay{}
  281. newDays[day] = newdd
  282. }
  283. for dev, stats := range dd {
  284. newdev := dev
  285. if newdev != identity.AuthorMissing {
  286. newdev = invDevIndex1[dev]
  287. }
  288. newstats, exists := newdd[newdev]
  289. if !exists {
  290. newstats = &DevDay{}
  291. newdd[newdev] = newstats
  292. }
  293. newstats.Commits += stats.Commits
  294. newstats.Added += stats.Added
  295. newstats.Removed += stats.Removed
  296. newstats.Changed += stats.Changed
  297. }
  298. }
  299. for day, dd := range cr2.Days {
  300. newdd, exists := newDays[day]
  301. if !exists {
  302. newdd = map[int]*DevDay{}
  303. newDays[day] = newdd
  304. }
  305. for dev, stats := range dd {
  306. newdev := dev
  307. if newdev != identity.AuthorMissing {
  308. newdev = invDevIndex2[dev]
  309. }
  310. newstats, exists := newdd[newdev]
  311. if !exists {
  312. newstats = &DevDay{}
  313. newdd[newdev] = newstats
  314. }
  315. newstats.Commits += stats.Commits
  316. newstats.Added += stats.Added
  317. newstats.Removed += stats.Removed
  318. newstats.Changed += stats.Changed
  319. }
  320. }
  321. return merged
  322. }
  323. func (devs *DevsAnalysis) serializeText(result *DevsResult, writer io.Writer) {
  324. fmt.Fprintln(writer, " days:")
  325. days := make([]int, len(result.Days))
  326. {
  327. i := 0
  328. for day := range result.Days {
  329. days[i] = day
  330. i++
  331. }
  332. }
  333. sort.Ints(days)
  334. for _, day := range days {
  335. fmt.Fprintf(writer, " %d:\n", day)
  336. rday := result.Days[day]
  337. devseq := make([]int, len(rday))
  338. {
  339. i := 0
  340. for dev := range rday {
  341. devseq[i] = dev
  342. i++
  343. }
  344. }
  345. sort.Ints(devseq)
  346. for _, dev := range devseq {
  347. stats := rday[dev]
  348. if dev == identity.AuthorMissing {
  349. dev = -1
  350. }
  351. fmt.Fprintf(writer, " %d: [%d, %d, %d, %d]\n",
  352. dev, stats.Commits, stats.Added, stats.Removed, stats.Changed)
  353. }
  354. }
  355. fmt.Fprintln(writer, " people:")
  356. for _, person := range result.reversedPeopleDict {
  357. fmt.Fprintf(writer, " - %s\n", yaml.SafeString(person))
  358. }
  359. }
  360. func (devs *DevsAnalysis) serializeBinary(result *DevsResult, writer io.Writer) error {
  361. message := pb.DevsAnalysisResults{}
  362. message.DevIndex = result.reversedPeopleDict
  363. message.Days = map[int32]*pb.DayDevs{}
  364. for day, devs := range result.Days {
  365. dd := &pb.DayDevs{}
  366. message.Days[int32(day)] = dd
  367. dd.Devs = map[int32]*pb.DevDay{}
  368. for dev, stats := range devs {
  369. if dev == identity.AuthorMissing {
  370. dev = -1
  371. }
  372. dd.Devs[int32(dev)] = &pb.DevDay{
  373. Commits: int32(stats.Commits),
  374. Added: int32(stats.Added),
  375. Changed: int32(stats.Changed),
  376. Removed: int32(stats.Removed),
  377. }
  378. }
  379. }
  380. serialized, err := proto.Marshal(&message)
  381. if err != nil {
  382. return err
  383. }
  384. _, err = writer.Write(serialized)
  385. return err
  386. }
  387. func init() {
  388. core.Registry.Register(&DevsAnalysis{})
  389. }