devs.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. package leaves
  2. import (
  3. "fmt"
  4. "io"
  5. "sort"
  6. "strings"
  7. "github.com/gogo/protobuf/proto"
  8. "gopkg.in/src-d/go-git.v4"
  9. "gopkg.in/src-d/go-git.v4/plumbing"
  10. "gopkg.in/src-d/go-git.v4/plumbing/object"
  11. "gopkg.in/src-d/hercules.v9/internal/core"
  12. "gopkg.in/src-d/hercules.v9/internal/pb"
  13. items "gopkg.in/src-d/hercules.v9/internal/plumbing"
  14. "gopkg.in/src-d/hercules.v9/internal/plumbing/identity"
  15. "gopkg.in/src-d/hercules.v9/internal/yaml"
  16. )
  17. // DevsAnalysis calculates the number of commits through time per developer.
  18. // It also records the numbers of added, deleted and changed lines through time per developer.
  19. // Those numbers are additionally measured per language.
  20. type DevsAnalysis struct {
  21. core.NoopMerger
  22. core.OneShotMergeProcessor
  23. // ConsiderEmptyCommits indicates whether empty commits (e.g., merges) should be taken
  24. // into account.
  25. ConsiderEmptyCommits bool
  26. // days maps days to developers to stats
  27. days map[int]map[int]*DevDay
  28. // reversedPeopleDict references IdentityDetector.ReversedPeopleDict
  29. reversedPeopleDict []string
  30. }
  31. // DevsResult is returned by DevsAnalysis.Finalize() and carries the daily statistics
  32. // per developer.
  33. type DevsResult struct {
  34. // Days is <day index> -> <developer index> -> daily stats
  35. Days map[int]map[int]*DevDay
  36. // reversedPeopleDict references IdentityDetector.ReversedPeopleDict
  37. reversedPeopleDict []string
  38. }
  39. // DevDay is the statistics for a development day and a particular developer.
  40. type DevDay struct {
  41. // Commits is the number of commits made by a particular developer in a particular day.
  42. Commits int
  43. items.LineStats
  44. // LanguagesDetection carries fine-grained line stats per programming language.
  45. Languages map[string]items.LineStats
  46. }
  47. const (
  48. // ConfigDevsConsiderEmptyCommits is the name of the option to set DevsAnalysis.ConsiderEmptyCommits.
  49. ConfigDevsConsiderEmptyCommits = "Devs.ConsiderEmptyCommits"
  50. )
  51. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  52. func (devs *DevsAnalysis) Name() string {
  53. return "Devs"
  54. }
  55. // Provides returns the list of names of entities which are produced by this PipelineItem.
  56. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  57. // to this list. Also used by core.Registry to build the global map of providers.
  58. func (devs *DevsAnalysis) Provides() []string {
  59. return []string{}
  60. }
  61. // Requires returns the list of names of entities which are needed by this PipelineItem.
  62. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  63. // entities are Provides() upstream.
  64. func (devs *DevsAnalysis) Requires() []string {
  65. arr := [...]string{
  66. identity.DependencyAuthor, items.DependencyTreeChanges, items.DependencyDay,
  67. items.DependencyLanguages, items.DependencyLineStats}
  68. return arr[:]
  69. }
  70. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  71. func (devs *DevsAnalysis) ListConfigurationOptions() []core.ConfigurationOption {
  72. options := [...]core.ConfigurationOption{{
  73. Name: ConfigDevsConsiderEmptyCommits,
  74. Description: "Take into account empty commits such as trivial merges.",
  75. Flag: "empty-commits",
  76. Type: core.BoolConfigurationOption,
  77. Default: false}}
  78. return options[:]
  79. }
  80. // Configure sets the properties previously published by ListConfigurationOptions().
  81. func (devs *DevsAnalysis) Configure(facts map[string]interface{}) error {
  82. if val, exists := facts[ConfigDevsConsiderEmptyCommits].(bool); exists {
  83. devs.ConsiderEmptyCommits = val
  84. }
  85. if val, exists := facts[identity.FactIdentityDetectorReversedPeopleDict].([]string); exists {
  86. devs.reversedPeopleDict = val
  87. }
  88. return nil
  89. }
  90. // Flag for the command line switch which enables this analysis.
  91. func (devs *DevsAnalysis) Flag() string {
  92. return "devs"
  93. }
  94. // Description returns the text which explains what the analysis is doing.
  95. func (devs *DevsAnalysis) Description() string {
  96. return "Calculates the number of commits, added, removed and changed lines per developer through time."
  97. }
  98. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  99. // calls. The repository which is going to be analysed is supplied as an argument.
  100. func (devs *DevsAnalysis) Initialize(repository *git.Repository) error {
  101. devs.days = map[int]map[int]*DevDay{}
  102. devs.OneShotMergeProcessor.Initialize()
  103. return nil
  104. }
  105. // Consume runs this PipelineItem on the next commit data.
  106. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  107. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  108. // This function returns the mapping with analysis results. The keys must be the same as
  109. // in Provides(). If there was an error, nil is returned.
  110. func (devs *DevsAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  111. if !devs.ShouldConsumeCommit(deps) {
  112. return nil, nil
  113. }
  114. author := deps[identity.DependencyAuthor].(int)
  115. treeDiff := deps[items.DependencyTreeChanges].(object.Changes)
  116. if len(treeDiff) == 0 && !devs.ConsiderEmptyCommits {
  117. return nil, nil
  118. }
  119. day := deps[items.DependencyDay].(int)
  120. devsDay, exists := devs.days[day]
  121. if !exists {
  122. devsDay = map[int]*DevDay{}
  123. devs.days[day] = devsDay
  124. }
  125. dd, exists := devsDay[author]
  126. if !exists {
  127. dd = &DevDay{Languages: map[string]items.LineStats{}}
  128. devsDay[author] = dd
  129. }
  130. dd.Commits++
  131. if deps[core.DependencyIsMerge].(bool) {
  132. // we ignore merge commit diffs
  133. // TODO(vmarkovtsev): handle them
  134. return nil, nil
  135. }
  136. langs := deps[items.DependencyLanguages].(map[plumbing.Hash]string)
  137. lineStats := deps[items.DependencyLineStats].(map[object.ChangeEntry]items.LineStats)
  138. for changeEntry, stats := range lineStats {
  139. dd.Added += stats.Added
  140. dd.Removed += stats.Removed
  141. dd.Changed += stats.Changed
  142. lang := langs[changeEntry.TreeEntry.Hash]
  143. langStats := dd.Languages[lang]
  144. dd.Languages[lang] = items.LineStats{
  145. Added: langStats.Added + stats.Added,
  146. Removed: langStats.Removed + stats.Removed,
  147. Changed: langStats.Changed + stats.Changed,
  148. }
  149. }
  150. return nil, nil
  151. }
  152. // Finalize returns the result of the analysis. Further Consume() calls are not expected.
  153. func (devs *DevsAnalysis) Finalize() interface{} {
  154. return DevsResult{
  155. Days: devs.days,
  156. reversedPeopleDict: devs.reversedPeopleDict,
  157. }
  158. }
  159. // Fork clones this pipeline item.
  160. func (devs *DevsAnalysis) Fork(n int) []core.PipelineItem {
  161. return core.ForkSamePipelineItem(devs, n)
  162. }
  163. // Serialize converts the analysis result as returned by Finalize() to text or bytes.
  164. // The text format is YAML and the bytes format is Protocol Buffers.
  165. func (devs *DevsAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  166. devsResult := result.(DevsResult)
  167. if binary {
  168. return devs.serializeBinary(&devsResult, writer)
  169. }
  170. devs.serializeText(&devsResult, writer)
  171. return nil
  172. }
  173. // Deserialize converts the specified protobuf bytes to DevsResult.
  174. func (devs *DevsAnalysis) Deserialize(pbmessage []byte) (interface{}, error) {
  175. message := pb.DevsAnalysisResults{}
  176. err := proto.Unmarshal(pbmessage, &message)
  177. if err != nil {
  178. return nil, err
  179. }
  180. days := map[int]map[int]*DevDay{}
  181. for day, dd := range message.Days {
  182. rdd := map[int]*DevDay{}
  183. days[int(day)] = rdd
  184. for dev, stats := range dd.Devs {
  185. if dev == -1 {
  186. dev = identity.AuthorMissing
  187. }
  188. languages := map[string]items.LineStats{}
  189. rdd[int(dev)] = &DevDay{
  190. Commits: int(stats.Commits),
  191. LineStats: items.LineStats{
  192. Added: int(stats.Stats.Added),
  193. Removed: int(stats.Stats.Removed),
  194. Changed: int(stats.Stats.Changed),
  195. },
  196. Languages: languages,
  197. }
  198. for lang, ls := range stats.Languages {
  199. languages[lang] = items.LineStats{
  200. Added: int(ls.Added),
  201. Removed: int(ls.Removed),
  202. Changed: int(ls.Changed),
  203. }
  204. }
  205. }
  206. }
  207. result := DevsResult{
  208. Days: days,
  209. reversedPeopleDict: message.DevIndex,
  210. }
  211. return result, nil
  212. }
  213. // MergeResults combines two DevsAnalysis-es together.
  214. func (devs *DevsAnalysis) MergeResults(r1, r2 interface{}, c1, c2 *core.CommonAnalysisResult) interface{} {
  215. cr1 := r1.(DevsResult)
  216. cr2 := r2.(DevsResult)
  217. merged := DevsResult{}
  218. type devIndexPair struct {
  219. Index1 int
  220. Index2 int
  221. }
  222. devIndex := map[string]devIndexPair{}
  223. for dev, devName := range cr1.reversedPeopleDict {
  224. devIndex[devName] = devIndexPair{Index1: dev + 1, Index2: devIndex[devName].Index2}
  225. }
  226. for dev, devName := range cr2.reversedPeopleDict {
  227. devIndex[devName] = devIndexPair{Index1: devIndex[devName].Index1, Index2: dev + 1}
  228. }
  229. jointDevSeq := make([]string, len(devIndex))
  230. {
  231. i := 0
  232. for dev := range devIndex {
  233. jointDevSeq[i] = dev
  234. i++
  235. }
  236. }
  237. sort.Strings(jointDevSeq)
  238. merged.reversedPeopleDict = jointDevSeq
  239. invDevIndex1 := map[int]int{}
  240. invDevIndex2 := map[int]int{}
  241. for i, dev := range jointDevSeq {
  242. pair := devIndex[dev]
  243. if pair.Index1 > 0 {
  244. invDevIndex1[pair.Index1-1] = i
  245. }
  246. if pair.Index2 > 0 {
  247. invDevIndex2[pair.Index2-1] = i
  248. }
  249. }
  250. newDays := map[int]map[int]*DevDay{}
  251. merged.Days = newDays
  252. for day, dd := range cr1.Days {
  253. newdd, exists := newDays[day]
  254. if !exists {
  255. newdd = map[int]*DevDay{}
  256. newDays[day] = newdd
  257. }
  258. for dev, stats := range dd {
  259. newdev := dev
  260. if newdev != identity.AuthorMissing {
  261. newdev = invDevIndex1[dev]
  262. }
  263. newstats, exists := newdd[newdev]
  264. if !exists {
  265. newstats = &DevDay{Languages: map[string]items.LineStats{}}
  266. newdd[newdev] = newstats
  267. }
  268. newstats.Commits += stats.Commits
  269. newstats.Added += stats.Added
  270. newstats.Removed += stats.Removed
  271. newstats.Changed += stats.Changed
  272. for lang, ls := range stats.Languages {
  273. prev := newstats.Languages[lang]
  274. newstats.Languages[lang] = items.LineStats{
  275. Added: prev.Added + ls.Added,
  276. Removed: prev.Removed + ls.Removed,
  277. Changed: prev.Changed + ls.Changed,
  278. }
  279. }
  280. }
  281. }
  282. for day, dd := range cr2.Days {
  283. newdd, exists := newDays[day]
  284. if !exists {
  285. newdd = map[int]*DevDay{}
  286. newDays[day] = newdd
  287. }
  288. for dev, stats := range dd {
  289. newdev := dev
  290. if newdev != identity.AuthorMissing {
  291. newdev = invDevIndex2[dev]
  292. }
  293. newstats, exists := newdd[newdev]
  294. if !exists {
  295. newstats = &DevDay{Languages: map[string]items.LineStats{}}
  296. newdd[newdev] = newstats
  297. }
  298. newstats.Commits += stats.Commits
  299. newstats.Added += stats.Added
  300. newstats.Removed += stats.Removed
  301. newstats.Changed += stats.Changed
  302. for lang, ls := range stats.Languages {
  303. prev := newstats.Languages[lang]
  304. newstats.Languages[lang] = items.LineStats{
  305. Added: prev.Added + ls.Added,
  306. Removed: prev.Removed + ls.Removed,
  307. Changed: prev.Changed + ls.Changed,
  308. }
  309. }
  310. }
  311. }
  312. return merged
  313. }
  314. func (devs *DevsAnalysis) serializeText(result *DevsResult, writer io.Writer) {
  315. fmt.Fprintln(writer, " days:")
  316. days := make([]int, len(result.Days))
  317. {
  318. i := 0
  319. for day := range result.Days {
  320. days[i] = day
  321. i++
  322. }
  323. }
  324. sort.Ints(days)
  325. for _, day := range days {
  326. fmt.Fprintf(writer, " %d:\n", day)
  327. rday := result.Days[day]
  328. devseq := make([]int, len(rday))
  329. {
  330. i := 0
  331. for dev := range rday {
  332. devseq[i] = dev
  333. i++
  334. }
  335. }
  336. sort.Ints(devseq)
  337. for _, dev := range devseq {
  338. stats := rday[dev]
  339. if dev == identity.AuthorMissing {
  340. dev = -1
  341. }
  342. var langs []string
  343. for lang, ls := range stats.Languages {
  344. if lang == "" {
  345. lang = "none"
  346. }
  347. langs = append(langs,
  348. fmt.Sprintf("%s: [%d, %d, %d]", lang, ls.Added, ls.Removed, ls.Changed))
  349. }
  350. sort.Strings(langs)
  351. fmt.Fprintf(writer, " %d: [%d, %d, %d, %d, {%s}]\n",
  352. dev, stats.Commits, stats.Added, stats.Removed, stats.Changed,
  353. strings.Join(langs, ", "))
  354. }
  355. }
  356. fmt.Fprintln(writer, " people:")
  357. for _, person := range result.reversedPeopleDict {
  358. fmt.Fprintf(writer, " - %s\n", yaml.SafeString(person))
  359. }
  360. }
  361. func (devs *DevsAnalysis) serializeBinary(result *DevsResult, writer io.Writer) error {
  362. message := pb.DevsAnalysisResults{}
  363. message.DevIndex = result.reversedPeopleDict
  364. message.Days = map[int32]*pb.DayDevs{}
  365. for day, devs := range result.Days {
  366. dd := &pb.DayDevs{}
  367. message.Days[int32(day)] = dd
  368. dd.Devs = map[int32]*pb.DevDay{}
  369. for dev, stats := range devs {
  370. if dev == identity.AuthorMissing {
  371. dev = -1
  372. }
  373. languages := map[string]*pb.LineStats{}
  374. dd.Devs[int32(dev)] = &pb.DevDay{
  375. Commits: int32(stats.Commits),
  376. Stats: &pb.LineStats{
  377. Added: int32(stats.Added),
  378. Changed: int32(stats.Changed),
  379. Removed: int32(stats.Removed),
  380. },
  381. Languages: languages,
  382. }
  383. for lang, ls := range stats.Languages {
  384. languages[lang] = &pb.LineStats{
  385. Added: int32(ls.Added),
  386. Changed: int32(ls.Changed),
  387. Removed: int32(ls.Removed),
  388. }
  389. }
  390. }
  391. }
  392. serialized, err := proto.Marshal(&message)
  393. if err != nil {
  394. return err
  395. }
  396. _, err = writer.Write(serialized)
  397. return err
  398. }
  399. func init() {
  400. core.Registry.Register(&DevsAnalysis{})
  401. }