devs.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. package leaves
  2. import (
  3. "fmt"
  4. "io"
  5. "sort"
  6. "strings"
  7. "github.com/gogo/protobuf/proto"
  8. "gopkg.in/src-d/go-git.v4"
  9. "gopkg.in/src-d/go-git.v4/plumbing"
  10. "gopkg.in/src-d/go-git.v4/plumbing/object"
  11. "gopkg.in/src-d/hercules.v10/internal/core"
  12. "gopkg.in/src-d/hercules.v10/internal/pb"
  13. items "gopkg.in/src-d/hercules.v10/internal/plumbing"
  14. "gopkg.in/src-d/hercules.v10/internal/plumbing/identity"
  15. "gopkg.in/src-d/hercules.v10/internal/yaml"
  16. )
  17. // DevsAnalysis calculates the number of commits through time per developer.
  18. // It also records the numbers of added, deleted and changed lines through time per developer.
  19. // Those numbers are additionally measured per language.
  20. type DevsAnalysis struct {
  21. core.NoopMerger
  22. core.OneShotMergeProcessor
  23. // ConsiderEmptyCommits indicates whether empty commits (e.g., merges) should be taken
  24. // into account.
  25. ConsiderEmptyCommits bool
  26. // ticks maps ticks to developers to stats
  27. ticks map[int]map[int]*DevTick
  28. // reversedPeopleDict references IdentityDetector.ReversedPeopleDict
  29. reversedPeopleDict []string
  30. l core.Logger
  31. }
  32. // DevsResult is returned by DevsAnalysis.Finalize() and carries the daily statistics
  33. // per developer.
  34. type DevsResult struct {
  35. // Ticks is <tick index> -> <developer index> -> daily stats
  36. Ticks map[int]map[int]*DevTick
  37. // reversedPeopleDict references IdentityDetector.ReversedPeopleDict
  38. reversedPeopleDict []string
  39. }
  40. // DevTick is the statistics for a development tick and a particular developer.
  41. type DevTick struct {
  42. // Commits is the number of commits made by a particular developer in a particular tick.
  43. Commits int
  44. items.LineStats
  45. // LanguagesDetection carries fine-grained line stats per programming language.
  46. Languages map[string]items.LineStats
  47. }
  48. const (
  49. // ConfigDevsConsiderEmptyCommits is the name of the option to set DevsAnalysis.ConsiderEmptyCommits.
  50. ConfigDevsConsiderEmptyCommits = "Devs.ConsiderEmptyCommits"
  51. )
  52. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  53. func (devs *DevsAnalysis) Name() string {
  54. return "Devs"
  55. }
  56. // Provides returns the list of names of entities which are produced by this PipelineItem.
  57. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  58. // to this list. Also used by core.Registry to build the global map of providers.
  59. func (devs *DevsAnalysis) Provides() []string {
  60. return []string{}
  61. }
  62. // Requires returns the list of names of entities which are needed by this PipelineItem.
  63. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  64. // entities are Provides() upstream.
  65. func (devs *DevsAnalysis) Requires() []string {
  66. arr := [...]string{
  67. identity.DependencyAuthor, items.DependencyTreeChanges, items.DependencyTick,
  68. items.DependencyLanguages, items.DependencyLineStats}
  69. return arr[:]
  70. }
  71. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  72. func (devs *DevsAnalysis) ListConfigurationOptions() []core.ConfigurationOption {
  73. options := [...]core.ConfigurationOption{{
  74. Name: ConfigDevsConsiderEmptyCommits,
  75. Description: "Take into account empty commits such as trivial merges.",
  76. Flag: "empty-commits",
  77. Type: core.BoolConfigurationOption,
  78. Default: false}}
  79. return options[:]
  80. }
  81. // Configure sets the properties previously published by ListConfigurationOptions().
  82. func (devs *DevsAnalysis) Configure(facts map[string]interface{}) error {
  83. if l, exists := facts[core.ConfigLogger].(core.Logger); exists {
  84. devs.l = l
  85. }
  86. if val, exists := facts[ConfigDevsConsiderEmptyCommits].(bool); exists {
  87. devs.ConsiderEmptyCommits = val
  88. }
  89. if val, exists := facts[identity.FactIdentityDetectorReversedPeopleDict].([]string); exists {
  90. devs.reversedPeopleDict = val
  91. }
  92. return nil
  93. }
  94. // Flag for the command line switch which enables this analysis.
  95. func (devs *DevsAnalysis) Flag() string {
  96. return "devs"
  97. }
  98. // Description returns the text which explains what the analysis is doing.
  99. func (devs *DevsAnalysis) Description() string {
  100. return "Calculates the number of commits, added, removed and changed lines per developer through time."
  101. }
  102. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  103. // calls. The repository which is going to be analysed is supplied as an argument.
  104. func (devs *DevsAnalysis) Initialize(repository *git.Repository) error {
  105. devs.l = core.NewLogger()
  106. devs.ticks = map[int]map[int]*DevTick{}
  107. devs.OneShotMergeProcessor.Initialize()
  108. return nil
  109. }
  110. // Consume runs this PipelineItem on the next commit data.
  111. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  112. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  113. // This function returns the mapping with analysis results. The keys must be the same as
  114. // in Provides(). If there was an error, nil is returned.
  115. func (devs *DevsAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  116. if !devs.ShouldConsumeCommit(deps) {
  117. return nil, nil
  118. }
  119. author := deps[identity.DependencyAuthor].(int)
  120. treeDiff := deps[items.DependencyTreeChanges].(object.Changes)
  121. if len(treeDiff) == 0 && !devs.ConsiderEmptyCommits {
  122. return nil, nil
  123. }
  124. tick := deps[items.DependencyTick].(int)
  125. devstick, exists := devs.ticks[tick]
  126. if !exists {
  127. devstick = map[int]*DevTick{}
  128. devs.ticks[tick] = devstick
  129. }
  130. dd, exists := devstick[author]
  131. if !exists {
  132. dd = &DevTick{Languages: map[string]items.LineStats{}}
  133. devstick[author] = dd
  134. }
  135. dd.Commits++
  136. if deps[core.DependencyIsMerge].(bool) {
  137. // we ignore merge commit diffs
  138. // TODO(vmarkovtsev): handle them
  139. return nil, nil
  140. }
  141. langs := deps[items.DependencyLanguages].(map[plumbing.Hash]string)
  142. lineStats := deps[items.DependencyLineStats].(map[object.ChangeEntry]items.LineStats)
  143. for changeEntry, stats := range lineStats {
  144. dd.Added += stats.Added
  145. dd.Removed += stats.Removed
  146. dd.Changed += stats.Changed
  147. lang := langs[changeEntry.TreeEntry.Hash]
  148. langStats := dd.Languages[lang]
  149. dd.Languages[lang] = items.LineStats{
  150. Added: langStats.Added + stats.Added,
  151. Removed: langStats.Removed + stats.Removed,
  152. Changed: langStats.Changed + stats.Changed,
  153. }
  154. }
  155. return nil, nil
  156. }
  157. // Finalize returns the result of the analysis. Further Consume() calls are not expected.
  158. func (devs *DevsAnalysis) Finalize() interface{} {
  159. return DevsResult{
  160. Ticks: devs.ticks,
  161. reversedPeopleDict: devs.reversedPeopleDict,
  162. }
  163. }
  164. // Fork clones this pipeline item.
  165. func (devs *DevsAnalysis) Fork(n int) []core.PipelineItem {
  166. return core.ForkSamePipelineItem(devs, n)
  167. }
  168. // Serialize converts the analysis result as returned by Finalize() to text or bytes.
  169. // The text format is YAML and the bytes format is Protocol Buffers.
  170. func (devs *DevsAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  171. devsResult := result.(DevsResult)
  172. if binary {
  173. return devs.serializeBinary(&devsResult, writer)
  174. }
  175. devs.serializeText(&devsResult, writer)
  176. return nil
  177. }
  178. // Deserialize converts the specified protobuf bytes to DevsResult.
  179. func (devs *DevsAnalysis) Deserialize(pbmessage []byte) (interface{}, error) {
  180. message := pb.DevsAnalysisResults{}
  181. err := proto.Unmarshal(pbmessage, &message)
  182. if err != nil {
  183. return nil, err
  184. }
  185. ticks := map[int]map[int]*DevTick{}
  186. for tick, dd := range message.Ticks {
  187. rdd := map[int]*DevTick{}
  188. ticks[int(tick)] = rdd
  189. for dev, stats := range dd.Devs {
  190. if dev == -1 {
  191. dev = identity.AuthorMissing
  192. }
  193. languages := map[string]items.LineStats{}
  194. rdd[int(dev)] = &DevTick{
  195. Commits: int(stats.Commits),
  196. LineStats: items.LineStats{
  197. Added: int(stats.Stats.Added),
  198. Removed: int(stats.Stats.Removed),
  199. Changed: int(stats.Stats.Changed),
  200. },
  201. Languages: languages,
  202. }
  203. for lang, ls := range stats.Languages {
  204. languages[lang] = items.LineStats{
  205. Added: int(ls.Added),
  206. Removed: int(ls.Removed),
  207. Changed: int(ls.Changed),
  208. }
  209. }
  210. }
  211. }
  212. result := DevsResult{
  213. Ticks: ticks,
  214. reversedPeopleDict: message.DevIndex,
  215. }
  216. return result, nil
  217. }
  218. // MergeResults combines two DevsAnalysis-es together.
  219. func (devs *DevsAnalysis) MergeResults(r1, r2 interface{}, c1, c2 *core.CommonAnalysisResult) interface{} {
  220. cr1 := r1.(DevsResult)
  221. cr2 := r2.(DevsResult)
  222. merged := DevsResult{}
  223. type devIndexPair struct {
  224. Index1 int
  225. Index2 int
  226. }
  227. devIndex := map[string]devIndexPair{}
  228. for dev, devName := range cr1.reversedPeopleDict {
  229. devIndex[devName] = devIndexPair{Index1: dev + 1, Index2: devIndex[devName].Index2}
  230. }
  231. for dev, devName := range cr2.reversedPeopleDict {
  232. devIndex[devName] = devIndexPair{Index1: devIndex[devName].Index1, Index2: dev + 1}
  233. }
  234. jointDevSeq := make([]string, len(devIndex))
  235. {
  236. i := 0
  237. for dev := range devIndex {
  238. jointDevSeq[i] = dev
  239. i++
  240. }
  241. }
  242. sort.Strings(jointDevSeq)
  243. merged.reversedPeopleDict = jointDevSeq
  244. invDevIndex1 := map[int]int{}
  245. invDevIndex2 := map[int]int{}
  246. for i, dev := range jointDevSeq {
  247. pair := devIndex[dev]
  248. if pair.Index1 > 0 {
  249. invDevIndex1[pair.Index1-1] = i
  250. }
  251. if pair.Index2 > 0 {
  252. invDevIndex2[pair.Index2-1] = i
  253. }
  254. }
  255. newticks := map[int]map[int]*DevTick{}
  256. merged.Ticks = newticks
  257. for tick, dd := range cr1.Ticks {
  258. newdd, exists := newticks[tick]
  259. if !exists {
  260. newdd = map[int]*DevTick{}
  261. newticks[tick] = newdd
  262. }
  263. for dev, stats := range dd {
  264. newdev := dev
  265. if newdev != identity.AuthorMissing {
  266. newdev = invDevIndex1[dev]
  267. }
  268. newstats, exists := newdd[newdev]
  269. if !exists {
  270. newstats = &DevTick{Languages: map[string]items.LineStats{}}
  271. newdd[newdev] = newstats
  272. }
  273. newstats.Commits += stats.Commits
  274. newstats.Added += stats.Added
  275. newstats.Removed += stats.Removed
  276. newstats.Changed += stats.Changed
  277. for lang, ls := range stats.Languages {
  278. prev := newstats.Languages[lang]
  279. newstats.Languages[lang] = items.LineStats{
  280. Added: prev.Added + ls.Added,
  281. Removed: prev.Removed + ls.Removed,
  282. Changed: prev.Changed + ls.Changed,
  283. }
  284. }
  285. }
  286. }
  287. for tick, dd := range cr2.Ticks {
  288. newdd, exists := newticks[tick]
  289. if !exists {
  290. newdd = map[int]*DevTick{}
  291. newticks[tick] = newdd
  292. }
  293. for dev, stats := range dd {
  294. newdev := dev
  295. if newdev != identity.AuthorMissing {
  296. newdev = invDevIndex2[dev]
  297. }
  298. newstats, exists := newdd[newdev]
  299. if !exists {
  300. newstats = &DevTick{Languages: map[string]items.LineStats{}}
  301. newdd[newdev] = newstats
  302. }
  303. newstats.Commits += stats.Commits
  304. newstats.Added += stats.Added
  305. newstats.Removed += stats.Removed
  306. newstats.Changed += stats.Changed
  307. for lang, ls := range stats.Languages {
  308. prev := newstats.Languages[lang]
  309. newstats.Languages[lang] = items.LineStats{
  310. Added: prev.Added + ls.Added,
  311. Removed: prev.Removed + ls.Removed,
  312. Changed: prev.Changed + ls.Changed,
  313. }
  314. }
  315. }
  316. }
  317. return merged
  318. }
  319. func (devs *DevsAnalysis) serializeText(result *DevsResult, writer io.Writer) {
  320. fmt.Fprintln(writer, " ticks:")
  321. ticks := make([]int, len(result.Ticks))
  322. {
  323. i := 0
  324. for tick := range result.Ticks {
  325. ticks[i] = tick
  326. i++
  327. }
  328. }
  329. sort.Ints(ticks)
  330. for _, tick := range ticks {
  331. fmt.Fprintf(writer, " %d:\n", tick)
  332. rtick := result.Ticks[tick]
  333. devseq := make([]int, len(rtick))
  334. {
  335. i := 0
  336. for dev := range rtick {
  337. devseq[i] = dev
  338. i++
  339. }
  340. }
  341. sort.Ints(devseq)
  342. for _, dev := range devseq {
  343. stats := rtick[dev]
  344. if dev == identity.AuthorMissing {
  345. dev = -1
  346. }
  347. var langs []string
  348. for lang, ls := range stats.Languages {
  349. if lang == "" {
  350. lang = "none"
  351. }
  352. langs = append(langs,
  353. fmt.Sprintf("%s: [%d, %d, %d]", lang, ls.Added, ls.Removed, ls.Changed))
  354. }
  355. sort.Strings(langs)
  356. fmt.Fprintf(writer, " %d: [%d, %d, %d, %d, {%s}]\n",
  357. dev, stats.Commits, stats.Added, stats.Removed, stats.Changed,
  358. strings.Join(langs, ", "))
  359. }
  360. }
  361. fmt.Fprintln(writer, " people:")
  362. for _, person := range result.reversedPeopleDict {
  363. fmt.Fprintf(writer, " - %s\n", yaml.SafeString(person))
  364. }
  365. }
  366. func (devs *DevsAnalysis) serializeBinary(result *DevsResult, writer io.Writer) error {
  367. message := pb.DevsAnalysisResults{}
  368. message.DevIndex = result.reversedPeopleDict
  369. message.Ticks = map[int32]*pb.TickDevs{}
  370. for tick, devs := range result.Ticks {
  371. dd := &pb.TickDevs{}
  372. message.Ticks[int32(tick)] = dd
  373. dd.Devs = map[int32]*pb.DevTick{}
  374. for dev, stats := range devs {
  375. if dev == identity.AuthorMissing {
  376. dev = -1
  377. }
  378. languages := map[string]*pb.LineStats{}
  379. dd.Devs[int32(dev)] = &pb.DevTick{
  380. Commits: int32(stats.Commits),
  381. Stats: &pb.LineStats{
  382. Added: int32(stats.Added),
  383. Changed: int32(stats.Changed),
  384. Removed: int32(stats.Removed),
  385. },
  386. Languages: languages,
  387. }
  388. for lang, ls := range stats.Languages {
  389. languages[lang] = &pb.LineStats{
  390. Added: int32(ls.Added),
  391. Changed: int32(ls.Changed),
  392. Removed: int32(ls.Removed),
  393. }
  394. }
  395. }
  396. }
  397. serialized, err := proto.Marshal(&message)
  398. if err != nil {
  399. return err
  400. }
  401. _, err = writer.Write(serialized)
  402. return err
  403. }
  404. func init() {
  405. core.Registry.Register(&DevsAnalysis{})
  406. }