devs.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. package leaves
  2. import (
  3. "fmt"
  4. "io"
  5. "sort"
  6. "unicode/utf8"
  7. "github.com/gogo/protobuf/proto"
  8. "github.com/sergi/go-diff/diffmatchpatch"
  9. "gopkg.in/src-d/go-git.v4"
  10. "gopkg.in/src-d/go-git.v4/plumbing"
  11. "gopkg.in/src-d/go-git.v4/plumbing/object"
  12. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  13. "gopkg.in/src-d/hercules.v6/internal/core"
  14. "gopkg.in/src-d/hercules.v6/internal/pb"
  15. items "gopkg.in/src-d/hercules.v6/internal/plumbing"
  16. "gopkg.in/src-d/hercules.v6/internal/plumbing/identity"
  17. "gopkg.in/src-d/hercules.v6/internal/yaml"
  18. )
  19. // DevsAnalysis calculates the number of commits through time per developer.
  20. // It also records the numbers of added, deleted and changed lines through time per developer.
  21. type DevsAnalysis struct {
  22. core.NoopMerger
  23. core.OneShotMergeProcessor
  24. // ConsiderEmptyCommits indicates whether empty commits (e.g., merges) should be taken
  25. // into account.
  26. ConsiderEmptyCommits bool
  27. // days maps days to developers to stats
  28. days map[int]map[int]*DevDay
  29. // reversedPeopleDict references IdentityDetector.ReversedPeopleDict
  30. reversedPeopleDict []string
  31. }
  32. // DevsResult is returned by DevsAnalysis.Finalize() and carries the daily statistics
  33. // per developer.
  34. type DevsResult struct {
  35. // Days is <day index> -> <developer index> -> daily stats
  36. Days map[int]map[int]*DevDay
  37. // reversedPeopleDict references IdentityDetector.ReversedPeopleDict
  38. reversedPeopleDict []string
  39. }
  40. // DevDay is the statistics for a development day and a particular developer.
  41. type DevDay struct {
  42. // Commits is the number of commits made by a particular developer in a particular day.
  43. Commits int
  44. // Added is the number of added lines by a particular developer in a particular day.
  45. Added int
  46. // Removed is the number of removed lines by a particular developer in a particular day.
  47. Removed int
  48. // Changed is the number of changed lines by a particular developer in a particular day.
  49. Changed int
  50. }
  51. const (
  52. // ConfigDevsConsiderEmptyCommits is the name of the option to set DevsAnalysis.ConsiderEmptyCommits.
  53. ConfigDevsConsiderEmptyCommits = "Devs.ConsiderEmptyCommits"
  54. )
  55. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  56. func (devs *DevsAnalysis) Name() string {
  57. return "Devs"
  58. }
  59. // Provides returns the list of names of entities which are produced by this PipelineItem.
  60. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  61. // to this list. Also used by core.Registry to build the global map of providers.
  62. func (devs *DevsAnalysis) Provides() []string {
  63. return []string{}
  64. }
  65. // Requires returns the list of names of entities which are needed by this PipelineItem.
  66. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  67. // entities are Provides() upstream.
  68. func (devs *DevsAnalysis) Requires() []string {
  69. arr := [...]string{
  70. identity.DependencyAuthor, items.DependencyTreeChanges, items.DependencyFileDiff,
  71. items.DependencyBlobCache, items.DependencyDay}
  72. return arr[:]
  73. }
  74. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  75. func (devs *DevsAnalysis) ListConfigurationOptions() []core.ConfigurationOption {
  76. options := [...]core.ConfigurationOption{{
  77. Name: ConfigDevsConsiderEmptyCommits,
  78. Description: "Take into account empty commits such as trivial merges.",
  79. Flag: "--empty-commits",
  80. Type: core.BoolConfigurationOption,
  81. Default: false}}
  82. return options[:]
  83. }
  84. // Configure sets the properties previously published by ListConfigurationOptions().
  85. func (devs *DevsAnalysis) Configure(facts map[string]interface{}) error {
  86. if val, exists := facts[ConfigDevsConsiderEmptyCommits].(bool); exists {
  87. devs.ConsiderEmptyCommits = val
  88. }
  89. if val, exists := facts[identity.FactIdentityDetectorReversedPeopleDict].([]string); exists {
  90. devs.reversedPeopleDict = val
  91. }
  92. return nil
  93. }
  94. // Flag for the command line switch which enables this analysis.
  95. func (devs *DevsAnalysis) Flag() string {
  96. return "devs"
  97. }
  98. // Description returns the text which explains what the analysis is doing.
  99. func (devs *DevsAnalysis) Description() string {
  100. return "Calculates the number of commits, added, removed and changed lines per developer through time."
  101. }
  102. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  103. // calls. The repository which is going to be analysed is supplied as an argument.
  104. func (devs *DevsAnalysis) Initialize(repository *git.Repository) error {
  105. devs.days = map[int]map[int]*DevDay{}
  106. devs.OneShotMergeProcessor.Initialize()
  107. return nil
  108. }
  109. // Consume runs this PipelineItem on the next commit data.
  110. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  111. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  112. // This function returns the mapping with analysis results. The keys must be the same as
  113. // in Provides(). If there was an error, nil is returned.
  114. func (devs *DevsAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  115. if !devs.ShouldConsumeCommit(deps) {
  116. return nil, nil
  117. }
  118. author := deps[identity.DependencyAuthor].(int)
  119. treeDiff := deps[items.DependencyTreeChanges].(object.Changes)
  120. if len(treeDiff) == 0 && !devs.ConsiderEmptyCommits {
  121. return nil, nil
  122. }
  123. day := deps[items.DependencyDay].(int)
  124. devsDay, exists := devs.days[day]
  125. if !exists {
  126. devsDay = map[int]*DevDay{}
  127. devs.days[day] = devsDay
  128. }
  129. dd, exists := devsDay[author]
  130. if !exists {
  131. dd = &DevDay{}
  132. devsDay[author] = dd
  133. }
  134. dd.Commits++
  135. cache := deps[items.DependencyBlobCache].(map[plumbing.Hash]*items.CachedBlob)
  136. fileDiffs := deps[items.DependencyFileDiff].(map[string]items.FileDiffData)
  137. for _, change := range treeDiff {
  138. action, err := change.Action()
  139. if err != nil {
  140. return nil, err
  141. }
  142. switch action {
  143. case merkletrie.Insert:
  144. blob := cache[change.To.TreeEntry.Hash]
  145. lines, err := blob.CountLines()
  146. if err != nil {
  147. // binary
  148. continue
  149. }
  150. dd.Added += lines
  151. case merkletrie.Delete:
  152. blob := cache[change.From.TreeEntry.Hash]
  153. lines, err := blob.CountLines()
  154. if err != nil {
  155. // binary
  156. continue
  157. }
  158. dd.Removed += lines
  159. case merkletrie.Modify:
  160. thisDiffs := fileDiffs[change.To.Name]
  161. var removedPending int
  162. for _, edit := range thisDiffs.Diffs {
  163. switch edit.Type {
  164. case diffmatchpatch.DiffEqual:
  165. if removedPending > 0 {
  166. dd.Removed += removedPending
  167. }
  168. removedPending = 0
  169. case diffmatchpatch.DiffInsert:
  170. added := utf8.RuneCountInString(edit.Text)
  171. if removedPending > added {
  172. dd.Changed += added
  173. dd.Removed += removedPending - added
  174. } else {
  175. dd.Changed += removedPending
  176. dd.Added += added - removedPending
  177. }
  178. removedPending = 0
  179. case diffmatchpatch.DiffDelete:
  180. removedPending = utf8.RuneCountInString(edit.Text)
  181. }
  182. }
  183. if removedPending > 0 {
  184. dd.Removed += removedPending
  185. }
  186. }
  187. }
  188. return nil, nil
  189. }
  190. // Finalize returns the result of the analysis. Further Consume() calls are not expected.
  191. func (devs *DevsAnalysis) Finalize() interface{} {
  192. return DevsResult{
  193. Days: devs.days,
  194. reversedPeopleDict: devs.reversedPeopleDict,
  195. }
  196. }
  197. // Fork clones this pipeline item.
  198. func (devs *DevsAnalysis) Fork(n int) []core.PipelineItem {
  199. return core.ForkSamePipelineItem(devs, n)
  200. }
  201. // Serialize converts the analysis result as returned by Finalize() to text or bytes.
  202. // The text format is YAML and the bytes format is Protocol Buffers.
  203. func (devs *DevsAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  204. devsResult := result.(DevsResult)
  205. if binary {
  206. return devs.serializeBinary(&devsResult, writer)
  207. }
  208. devs.serializeText(&devsResult, writer)
  209. return nil
  210. }
  211. // Deserialize converts the specified protobuf bytes to DevsResult.
  212. func (devs *DevsAnalysis) Deserialize(pbmessage []byte) (interface{}, error) {
  213. message := pb.DevsAnalysisResults{}
  214. err := proto.Unmarshal(pbmessage, &message)
  215. if err != nil {
  216. return nil, err
  217. }
  218. days := map[int]map[int]*DevDay{}
  219. for day, dd := range message.Days {
  220. rdd := map[int]*DevDay{}
  221. days[int(day)] = rdd
  222. for dev, stats := range dd.Devs {
  223. if dev == -1 {
  224. dev = identity.AuthorMissing
  225. }
  226. rdd[int(dev)] = &DevDay{
  227. Commits: int(stats.Commits),
  228. Added: int(stats.Added),
  229. Removed: int(stats.Removed),
  230. Changed: int(stats.Changed),
  231. }
  232. }
  233. }
  234. result := DevsResult{
  235. Days: days,
  236. reversedPeopleDict: message.DevIndex,
  237. }
  238. return result, nil
  239. }
  240. // MergeResults combines two DevsAnalysis-es together.
  241. func (devs *DevsAnalysis) MergeResults(r1, r2 interface{}, c1, c2 *core.CommonAnalysisResult) interface{} {
  242. cr1 := r1.(DevsResult)
  243. cr2 := r2.(DevsResult)
  244. merged := DevsResult{}
  245. type devIndexPair struct {
  246. Index1 int
  247. Index2 int
  248. }
  249. devIndex := map[string]devIndexPair{}
  250. for dev, devName := range cr1.reversedPeopleDict {
  251. devIndex[devName] = devIndexPair{Index1: dev + 1, Index2: devIndex[devName].Index2}
  252. }
  253. for dev, devName := range cr2.reversedPeopleDict {
  254. devIndex[devName] = devIndexPair{Index1: devIndex[devName].Index1, Index2: dev + 1}
  255. }
  256. jointDevSeq := make([]string, len(devIndex))
  257. {
  258. i := 0
  259. for dev := range devIndex {
  260. jointDevSeq[i] = dev
  261. i++
  262. }
  263. }
  264. sort.Strings(jointDevSeq)
  265. merged.reversedPeopleDict = jointDevSeq
  266. invDevIndex1 := map[int]int{}
  267. invDevIndex2 := map[int]int{}
  268. for i, dev := range jointDevSeq {
  269. pair := devIndex[dev]
  270. if pair.Index1 > 0 {
  271. invDevIndex1[pair.Index1-1] = i
  272. }
  273. if pair.Index2 > 0 {
  274. invDevIndex2[pair.Index2-1] = i
  275. }
  276. }
  277. newDays := map[int]map[int]*DevDay{}
  278. merged.Days = newDays
  279. for day, dd := range cr1.Days {
  280. newdd, exists := newDays[day]
  281. if !exists {
  282. newdd = map[int]*DevDay{}
  283. newDays[day] = newdd
  284. }
  285. for dev, stats := range dd {
  286. newdev := dev
  287. if newdev != identity.AuthorMissing {
  288. newdev = invDevIndex1[dev]
  289. }
  290. newstats, exists := newdd[newdev]
  291. if !exists {
  292. newstats = &DevDay{}
  293. newdd[newdev] = newstats
  294. }
  295. newstats.Commits += stats.Commits
  296. newstats.Added += stats.Added
  297. newstats.Removed += stats.Removed
  298. newstats.Changed += stats.Changed
  299. }
  300. }
  301. for day, dd := range cr2.Days {
  302. newdd, exists := newDays[day]
  303. if !exists {
  304. newdd = map[int]*DevDay{}
  305. newDays[day] = newdd
  306. }
  307. for dev, stats := range dd {
  308. newdev := dev
  309. if newdev != identity.AuthorMissing {
  310. newdev = invDevIndex2[dev]
  311. }
  312. newstats, exists := newdd[newdev]
  313. if !exists {
  314. newstats = &DevDay{}
  315. newdd[newdev] = newstats
  316. }
  317. newstats.Commits += stats.Commits
  318. newstats.Added += stats.Added
  319. newstats.Removed += stats.Removed
  320. newstats.Changed += stats.Changed
  321. }
  322. }
  323. return merged
  324. }
  325. func (devs *DevsAnalysis) serializeText(result *DevsResult, writer io.Writer) {
  326. fmt.Fprintln(writer, " days:")
  327. days := make([]int, len(result.Days))
  328. {
  329. i := 0
  330. for day := range result.Days {
  331. days[i] = day
  332. i++
  333. }
  334. }
  335. sort.Ints(days)
  336. for _, day := range days {
  337. fmt.Fprintf(writer, " %d:\n", day)
  338. rday := result.Days[day]
  339. devseq := make([]int, len(rday))
  340. {
  341. i := 0
  342. for dev := range rday {
  343. devseq[i] = dev
  344. i++
  345. }
  346. }
  347. sort.Ints(devseq)
  348. for _, dev := range devseq {
  349. stats := rday[dev]
  350. if dev == identity.AuthorMissing {
  351. dev = -1
  352. }
  353. fmt.Fprintf(writer, " %d: [%d, %d, %d, %d]\n",
  354. dev, stats.Commits, stats.Added, stats.Removed, stats.Changed)
  355. }
  356. }
  357. fmt.Fprintln(writer, " people:")
  358. for _, person := range result.reversedPeopleDict {
  359. fmt.Fprintf(writer, " - %s\n", yaml.SafeString(person))
  360. }
  361. }
  362. func (devs *DevsAnalysis) serializeBinary(result *DevsResult, writer io.Writer) error {
  363. message := pb.DevsAnalysisResults{}
  364. message.DevIndex = result.reversedPeopleDict
  365. message.Days = map[int32]*pb.DayDevs{}
  366. for day, devs := range result.Days {
  367. dd := &pb.DayDevs{}
  368. message.Days[int32(day)] = dd
  369. dd.Devs = map[int32]*pb.DevDay{}
  370. for dev, stats := range devs {
  371. if dev == identity.AuthorMissing {
  372. dev = -1
  373. }
  374. dd.Devs[int32(dev)] = &pb.DevDay{
  375. Commits: int32(stats.Commits),
  376. Added: int32(stats.Added),
  377. Changed: int32(stats.Changed),
  378. Removed: int32(stats.Removed),
  379. }
  380. }
  381. }
  382. serialized, err := proto.Marshal(&message)
  383. if err != nil {
  384. return err
  385. }
  386. _, err = writer.Write(serialized)
  387. return err
  388. }
  389. func init() {
  390. core.Registry.Register(&DevsAnalysis{})
  391. }