devs.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. package leaves
  2. import (
  3. "fmt"
  4. "io"
  5. "sort"
  6. "strings"
  7. "unicode/utf8"
  8. "github.com/gogo/protobuf/proto"
  9. "github.com/sergi/go-diff/diffmatchpatch"
  10. "gopkg.in/src-d/go-git.v4"
  11. "gopkg.in/src-d/go-git.v4/plumbing"
  12. "gopkg.in/src-d/go-git.v4/plumbing/object"
  13. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  14. "gopkg.in/src-d/hercules.v8/internal/core"
  15. "gopkg.in/src-d/hercules.v8/internal/pb"
  16. items "gopkg.in/src-d/hercules.v8/internal/plumbing"
  17. "gopkg.in/src-d/hercules.v8/internal/plumbing/identity"
  18. "gopkg.in/src-d/hercules.v8/internal/yaml"
  19. )
  20. // DevsAnalysis calculates the number of commits through time per developer.
  21. // It also records the numbers of added, deleted and changed lines through time per developer.
  22. // Those numbers are additionally measured per language.
  23. type DevsAnalysis struct {
  24. core.NoopMerger
  25. core.OneShotMergeProcessor
  26. // ConsiderEmptyCommits indicates whether empty commits (e.g., merges) should be taken
  27. // into account.
  28. ConsiderEmptyCommits bool
  29. // days maps days to developers to stats
  30. days map[int]map[int]*DevDay
  31. // reversedPeopleDict references IdentityDetector.ReversedPeopleDict
  32. reversedPeopleDict []string
  33. }
  34. // DevsResult is returned by DevsAnalysis.Finalize() and carries the daily statistics
  35. // per developer.
  36. type DevsResult struct {
  37. // Days is <day index> -> <developer index> -> daily stats
  38. Days map[int]map[int]*DevDay
  39. // reversedPeopleDict references IdentityDetector.ReversedPeopleDict
  40. reversedPeopleDict []string
  41. }
  42. // LineStats holds the numbers of inserted, deleted and changed lines.
  43. type LineStats struct {
  44. // Added is the number of added lines by a particular developer in a particular day.
  45. Added int
  46. // Removed is the number of removed lines by a particular developer in a particular day.
  47. Removed int
  48. // Changed is the number of changed lines by a particular developer in a particular day.
  49. Changed int
  50. }
  51. // DevDay is the statistics for a development day and a particular developer.
  52. type DevDay struct {
  53. // Commits is the number of commits made by a particular developer in a particular day.
  54. Commits int
  55. LineStats
  56. // LanguagesDetection carries fine-grained line stats per programming language.
  57. Languages map[string]LineStats
  58. }
  59. const (
  60. // ConfigDevsConsiderEmptyCommits is the name of the option to set DevsAnalysis.ConsiderEmptyCommits.
  61. ConfigDevsConsiderEmptyCommits = "Devs.ConsiderEmptyCommits"
  62. )
  63. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  64. func (devs *DevsAnalysis) Name() string {
  65. return "Devs"
  66. }
  67. // Provides returns the list of names of entities which are produced by this PipelineItem.
  68. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  69. // to this list. Also used by core.Registry to build the global map of providers.
  70. func (devs *DevsAnalysis) Provides() []string {
  71. return []string{}
  72. }
  73. // Requires returns the list of names of entities which are needed by this PipelineItem.
  74. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  75. // entities are Provides() upstream.
  76. func (devs *DevsAnalysis) Requires() []string {
  77. arr := [...]string{
  78. identity.DependencyAuthor, items.DependencyTreeChanges, items.DependencyFileDiff,
  79. items.DependencyBlobCache, items.DependencyDay, items.DependencyLanguages}
  80. return arr[:]
  81. }
  82. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  83. func (devs *DevsAnalysis) ListConfigurationOptions() []core.ConfigurationOption {
  84. options := [...]core.ConfigurationOption{{
  85. Name: ConfigDevsConsiderEmptyCommits,
  86. Description: "Take into account empty commits such as trivial merges.",
  87. Flag: "empty-commits",
  88. Type: core.BoolConfigurationOption,
  89. Default: false}}
  90. return options[:]
  91. }
  92. // Configure sets the properties previously published by ListConfigurationOptions().
  93. func (devs *DevsAnalysis) Configure(facts map[string]interface{}) error {
  94. if val, exists := facts[ConfigDevsConsiderEmptyCommits].(bool); exists {
  95. devs.ConsiderEmptyCommits = val
  96. }
  97. if val, exists := facts[identity.FactIdentityDetectorReversedPeopleDict].([]string); exists {
  98. devs.reversedPeopleDict = val
  99. }
  100. return nil
  101. }
  102. // Flag for the command line switch which enables this analysis.
  103. func (devs *DevsAnalysis) Flag() string {
  104. return "devs"
  105. }
  106. // Description returns the text which explains what the analysis is doing.
  107. func (devs *DevsAnalysis) Description() string {
  108. return "Calculates the number of commits, added, removed and changed lines per developer through time."
  109. }
  110. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  111. // calls. The repository which is going to be analysed is supplied as an argument.
  112. func (devs *DevsAnalysis) Initialize(repository *git.Repository) error {
  113. devs.days = map[int]map[int]*DevDay{}
  114. devs.OneShotMergeProcessor.Initialize()
  115. return nil
  116. }
  117. // Consume runs this PipelineItem on the next commit data.
  118. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  119. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  120. // This function returns the mapping with analysis results. The keys must be the same as
  121. // in Provides(). If there was an error, nil is returned.
  122. func (devs *DevsAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  123. if !devs.ShouldConsumeCommit(deps) {
  124. return nil, nil
  125. }
  126. author := deps[identity.DependencyAuthor].(int)
  127. treeDiff := deps[items.DependencyTreeChanges].(object.Changes)
  128. if len(treeDiff) == 0 && !devs.ConsiderEmptyCommits {
  129. return nil, nil
  130. }
  131. day := deps[items.DependencyDay].(int)
  132. devsDay, exists := devs.days[day]
  133. if !exists {
  134. devsDay = map[int]*DevDay{}
  135. devs.days[day] = devsDay
  136. }
  137. dd, exists := devsDay[author]
  138. if !exists {
  139. dd = &DevDay{Languages: map[string]LineStats{}}
  140. devsDay[author] = dd
  141. }
  142. dd.Commits++
  143. if deps[core.DependencyIsMerge].(bool) {
  144. // we ignore merge commit diffs
  145. // TODO(vmarkovtsev): handle them
  146. return nil, nil
  147. }
  148. cache := deps[items.DependencyBlobCache].(map[plumbing.Hash]*items.CachedBlob)
  149. fileDiffs := deps[items.DependencyFileDiff].(map[string]items.FileDiffData)
  150. langs := deps[items.DependencyLanguages].(map[plumbing.Hash]string)
  151. for _, change := range treeDiff {
  152. action, err := change.Action()
  153. if err != nil {
  154. return nil, err
  155. }
  156. switch action {
  157. case merkletrie.Insert:
  158. blob := cache[change.To.TreeEntry.Hash]
  159. lines, err := blob.CountLines()
  160. if err != nil {
  161. // binary
  162. continue
  163. }
  164. dd.Added += lines
  165. lang := langs[change.To.TreeEntry.Hash]
  166. langStats := dd.Languages[lang]
  167. dd.Languages[lang] = LineStats{
  168. Added: langStats.Added + lines,
  169. Removed: langStats.Removed,
  170. Changed: langStats.Changed,
  171. }
  172. case merkletrie.Delete:
  173. blob := cache[change.From.TreeEntry.Hash]
  174. lines, err := blob.CountLines()
  175. if err != nil {
  176. // binary
  177. continue
  178. }
  179. dd.Removed += lines
  180. lang := langs[change.From.TreeEntry.Hash]
  181. langStats := dd.Languages[lang]
  182. dd.Languages[lang] = LineStats{
  183. Added: langStats.Added,
  184. Removed: langStats.Removed + lines,
  185. Changed: langStats.Changed,
  186. }
  187. case merkletrie.Modify:
  188. lang := langs[change.To.TreeEntry.Hash]
  189. thisDiffs := fileDiffs[change.To.Name]
  190. var removedPending int
  191. for _, edit := range thisDiffs.Diffs {
  192. switch edit.Type {
  193. case diffmatchpatch.DiffEqual:
  194. if removedPending > 0 {
  195. dd.Removed += removedPending
  196. langStats := dd.Languages[lang]
  197. dd.Languages[lang] = LineStats{
  198. Added: langStats.Added,
  199. Removed: langStats.Removed + removedPending,
  200. Changed: langStats.Changed,
  201. }
  202. }
  203. removedPending = 0
  204. case diffmatchpatch.DiffInsert:
  205. added := utf8.RuneCountInString(edit.Text)
  206. if removedPending > added {
  207. removed := removedPending - added
  208. dd.Changed += added
  209. dd.Removed += removed
  210. langStats := dd.Languages[lang]
  211. dd.Languages[lang] = LineStats{
  212. Added: langStats.Added,
  213. Removed: langStats.Removed + removed,
  214. Changed: langStats.Changed + added,
  215. }
  216. } else {
  217. added := added - removedPending
  218. dd.Changed += removedPending
  219. dd.Added += added
  220. langStats := dd.Languages[lang]
  221. dd.Languages[lang] = LineStats{
  222. Added: langStats.Added + added,
  223. Removed: langStats.Removed,
  224. Changed: langStats.Changed + removedPending,
  225. }
  226. }
  227. removedPending = 0
  228. case diffmatchpatch.DiffDelete:
  229. removedPending = utf8.RuneCountInString(edit.Text)
  230. }
  231. }
  232. if removedPending > 0 {
  233. dd.Removed += removedPending
  234. langStats := dd.Languages[lang]
  235. dd.Languages[lang] = LineStats{
  236. Added: langStats.Added,
  237. Removed: langStats.Removed + removedPending,
  238. Changed: langStats.Changed,
  239. }
  240. }
  241. }
  242. }
  243. return nil, nil
  244. }
  245. // Finalize returns the result of the analysis. Further Consume() calls are not expected.
  246. func (devs *DevsAnalysis) Finalize() interface{} {
  247. return DevsResult{
  248. Days: devs.days,
  249. reversedPeopleDict: devs.reversedPeopleDict,
  250. }
  251. }
  252. // Fork clones this pipeline item.
  253. func (devs *DevsAnalysis) Fork(n int) []core.PipelineItem {
  254. return core.ForkSamePipelineItem(devs, n)
  255. }
  256. // Serialize converts the analysis result as returned by Finalize() to text or bytes.
  257. // The text format is YAML and the bytes format is Protocol Buffers.
  258. func (devs *DevsAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  259. devsResult := result.(DevsResult)
  260. if binary {
  261. return devs.serializeBinary(&devsResult, writer)
  262. }
  263. devs.serializeText(&devsResult, writer)
  264. return nil
  265. }
  266. // Deserialize converts the specified protobuf bytes to DevsResult.
  267. func (devs *DevsAnalysis) Deserialize(pbmessage []byte) (interface{}, error) {
  268. message := pb.DevsAnalysisResults{}
  269. err := proto.Unmarshal(pbmessage, &message)
  270. if err != nil {
  271. return nil, err
  272. }
  273. days := map[int]map[int]*DevDay{}
  274. for day, dd := range message.Days {
  275. rdd := map[int]*DevDay{}
  276. days[int(day)] = rdd
  277. for dev, stats := range dd.Devs {
  278. if dev == -1 {
  279. dev = identity.AuthorMissing
  280. }
  281. languages := map[string]LineStats{}
  282. rdd[int(dev)] = &DevDay{
  283. Commits: int(stats.Commits),
  284. LineStats: LineStats{
  285. Added: int(stats.Stats.Added),
  286. Removed: int(stats.Stats.Removed),
  287. Changed: int(stats.Stats.Changed),
  288. },
  289. Languages: languages,
  290. }
  291. for lang, ls := range stats.Languages {
  292. languages[lang] = LineStats{
  293. Added: int(ls.Added),
  294. Removed: int(ls.Removed),
  295. Changed: int(ls.Changed),
  296. }
  297. }
  298. }
  299. }
  300. result := DevsResult{
  301. Days: days,
  302. reversedPeopleDict: message.DevIndex,
  303. }
  304. return result, nil
  305. }
  306. // MergeResults combines two DevsAnalysis-es together.
  307. func (devs *DevsAnalysis) MergeResults(r1, r2 interface{}, c1, c2 *core.CommonAnalysisResult) interface{} {
  308. cr1 := r1.(DevsResult)
  309. cr2 := r2.(DevsResult)
  310. merged := DevsResult{}
  311. type devIndexPair struct {
  312. Index1 int
  313. Index2 int
  314. }
  315. devIndex := map[string]devIndexPair{}
  316. for dev, devName := range cr1.reversedPeopleDict {
  317. devIndex[devName] = devIndexPair{Index1: dev + 1, Index2: devIndex[devName].Index2}
  318. }
  319. for dev, devName := range cr2.reversedPeopleDict {
  320. devIndex[devName] = devIndexPair{Index1: devIndex[devName].Index1, Index2: dev + 1}
  321. }
  322. jointDevSeq := make([]string, len(devIndex))
  323. {
  324. i := 0
  325. for dev := range devIndex {
  326. jointDevSeq[i] = dev
  327. i++
  328. }
  329. }
  330. sort.Strings(jointDevSeq)
  331. merged.reversedPeopleDict = jointDevSeq
  332. invDevIndex1 := map[int]int{}
  333. invDevIndex2 := map[int]int{}
  334. for i, dev := range jointDevSeq {
  335. pair := devIndex[dev]
  336. if pair.Index1 > 0 {
  337. invDevIndex1[pair.Index1-1] = i
  338. }
  339. if pair.Index2 > 0 {
  340. invDevIndex2[pair.Index2-1] = i
  341. }
  342. }
  343. newDays := map[int]map[int]*DevDay{}
  344. merged.Days = newDays
  345. for day, dd := range cr1.Days {
  346. newdd, exists := newDays[day]
  347. if !exists {
  348. newdd = map[int]*DevDay{}
  349. newDays[day] = newdd
  350. }
  351. for dev, stats := range dd {
  352. newdev := dev
  353. if newdev != identity.AuthorMissing {
  354. newdev = invDevIndex1[dev]
  355. }
  356. newstats, exists := newdd[newdev]
  357. if !exists {
  358. newstats = &DevDay{Languages: map[string]LineStats{}}
  359. newdd[newdev] = newstats
  360. }
  361. newstats.Commits += stats.Commits
  362. newstats.Added += stats.Added
  363. newstats.Removed += stats.Removed
  364. newstats.Changed += stats.Changed
  365. for lang, ls := range stats.Languages {
  366. prev := newstats.Languages[lang]
  367. newstats.Languages[lang] = LineStats{
  368. Added: prev.Added + ls.Added,
  369. Removed: prev.Removed + ls.Removed,
  370. Changed: prev.Changed + ls.Changed,
  371. }
  372. }
  373. }
  374. }
  375. for day, dd := range cr2.Days {
  376. newdd, exists := newDays[day]
  377. if !exists {
  378. newdd = map[int]*DevDay{}
  379. newDays[day] = newdd
  380. }
  381. for dev, stats := range dd {
  382. newdev := dev
  383. if newdev != identity.AuthorMissing {
  384. newdev = invDevIndex2[dev]
  385. }
  386. newstats, exists := newdd[newdev]
  387. if !exists {
  388. newstats = &DevDay{Languages: map[string]LineStats{}}
  389. newdd[newdev] = newstats
  390. }
  391. newstats.Commits += stats.Commits
  392. newstats.Added += stats.Added
  393. newstats.Removed += stats.Removed
  394. newstats.Changed += stats.Changed
  395. for lang, ls := range stats.Languages {
  396. prev := newstats.Languages[lang]
  397. newstats.Languages[lang] = LineStats{
  398. Added: prev.Added + ls.Added,
  399. Removed: prev.Removed + ls.Removed,
  400. Changed: prev.Changed + ls.Changed,
  401. }
  402. }
  403. }
  404. }
  405. return merged
  406. }
  407. func (devs *DevsAnalysis) serializeText(result *DevsResult, writer io.Writer) {
  408. fmt.Fprintln(writer, " days:")
  409. days := make([]int, len(result.Days))
  410. {
  411. i := 0
  412. for day := range result.Days {
  413. days[i] = day
  414. i++
  415. }
  416. }
  417. sort.Ints(days)
  418. for _, day := range days {
  419. fmt.Fprintf(writer, " %d:\n", day)
  420. rday := result.Days[day]
  421. devseq := make([]int, len(rday))
  422. {
  423. i := 0
  424. for dev := range rday {
  425. devseq[i] = dev
  426. i++
  427. }
  428. }
  429. sort.Ints(devseq)
  430. for _, dev := range devseq {
  431. stats := rday[dev]
  432. if dev == identity.AuthorMissing {
  433. dev = -1
  434. }
  435. var langs []string
  436. for lang, ls := range stats.Languages {
  437. if lang == "" {
  438. lang = "none"
  439. }
  440. langs = append(langs,
  441. fmt.Sprintf("%s: [%d, %d, %d]", lang, ls.Added, ls.Removed, ls.Changed))
  442. }
  443. sort.Strings(langs)
  444. fmt.Fprintf(writer, " %d: [%d, %d, %d, %d, {%s}]\n",
  445. dev, stats.Commits, stats.Added, stats.Removed, stats.Changed,
  446. strings.Join(langs, ", "))
  447. }
  448. }
  449. fmt.Fprintln(writer, " people:")
  450. for _, person := range result.reversedPeopleDict {
  451. fmt.Fprintf(writer, " - %s\n", yaml.SafeString(person))
  452. }
  453. }
  454. func (devs *DevsAnalysis) serializeBinary(result *DevsResult, writer io.Writer) error {
  455. message := pb.DevsAnalysisResults{}
  456. message.DevIndex = result.reversedPeopleDict
  457. message.Days = map[int32]*pb.DayDevs{}
  458. for day, devs := range result.Days {
  459. dd := &pb.DayDevs{}
  460. message.Days[int32(day)] = dd
  461. dd.Devs = map[int32]*pb.DevDay{}
  462. for dev, stats := range devs {
  463. if dev == identity.AuthorMissing {
  464. dev = -1
  465. }
  466. languages := map[string]*pb.LineStats{}
  467. dd.Devs[int32(dev)] = &pb.DevDay{
  468. Commits: int32(stats.Commits),
  469. Stats: &pb.LineStats{
  470. Added: int32(stats.Added),
  471. Changed: int32(stats.Changed),
  472. Removed: int32(stats.Removed),
  473. },
  474. Languages: languages,
  475. }
  476. for lang, ls := range stats.Languages {
  477. languages[lang] = &pb.LineStats{
  478. Added: int32(ls.Added),
  479. Changed: int32(ls.Changed),
  480. Removed: int32(ls.Removed),
  481. }
  482. }
  483. }
  484. }
  485. serialized, err := proto.Marshal(&message)
  486. if err != nil {
  487. return err
  488. }
  489. _, err = writer.Write(serialized)
  490. return err
  491. }
  492. func init() {
  493. core.Registry.Register(&DevsAnalysis{})
  494. }