devs.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. package leaves
  2. import (
  3. "errors"
  4. "fmt"
  5. "io"
  6. "sort"
  7. "strings"
  8. "time"
  9. "github.com/gogo/protobuf/proto"
  10. "gopkg.in/src-d/go-git.v4"
  11. "gopkg.in/src-d/go-git.v4/plumbing"
  12. "gopkg.in/src-d/go-git.v4/plumbing/object"
  13. "gopkg.in/src-d/hercules.v10/internal/core"
  14. "gopkg.in/src-d/hercules.v10/internal/pb"
  15. items "gopkg.in/src-d/hercules.v10/internal/plumbing"
  16. "gopkg.in/src-d/hercules.v10/internal/plumbing/identity"
  17. "gopkg.in/src-d/hercules.v10/internal/yaml"
  18. )
  19. // DevsAnalysis calculates the number of commits through time per developer.
  20. // It also records the numbers of added, deleted and changed lines through time per developer.
  21. // Those numbers are additionally measured per language.
  22. type DevsAnalysis struct {
  23. core.NoopMerger
  24. core.OneShotMergeProcessor
  25. // ConsiderEmptyCommits indicates whether empty commits (e.g., merges) should be taken
  26. // into account.
  27. ConsiderEmptyCommits bool
  28. // ticks maps ticks to developers to stats
  29. ticks map[int]map[int]*DevTick
  30. // reversedPeopleDict references IdentityDetector.ReversedPeopleDict
  31. reversedPeopleDict []string
  32. // TickSize references TicksSinceStart.TickSize
  33. tickSize time.Duration
  34. l core.Logger
  35. }
  36. // DevsResult is returned by DevsAnalysis.Finalize() and carries the daily statistics
  37. // per developer.
  38. type DevsResult struct {
  39. // Ticks is <tick index> -> <developer index> -> daily stats
  40. Ticks map[int]map[int]*DevTick
  41. // reversedPeopleDict references IdentityDetector.ReversedPeopleDict
  42. reversedPeopleDict []string
  43. // TickSize references TicksSinceStart.TickSize
  44. tickSize time.Duration
  45. }
  46. // DevTick is the statistics for a development tick and a particular developer.
  47. type DevTick struct {
  48. // Commits is the number of commits made by a particular developer in a particular tick.
  49. Commits int
  50. items.LineStats
  51. // LanguagesDetection carries fine-grained line stats per programming language.
  52. Languages map[string]items.LineStats
  53. }
  54. const (
  55. // ConfigDevsConsiderEmptyCommits is the name of the option to set DevsAnalysis.ConsiderEmptyCommits.
  56. ConfigDevsConsiderEmptyCommits = "Devs.ConsiderEmptyCommits"
  57. )
  58. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  59. func (devs *DevsAnalysis) Name() string {
  60. return "Devs"
  61. }
  62. // Provides returns the list of names of entities which are produced by this PipelineItem.
  63. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  64. // to this list. Also used by core.Registry to build the global map of providers.
  65. func (devs *DevsAnalysis) Provides() []string {
  66. return []string{}
  67. }
  68. // Requires returns the list of names of entities which are needed by this PipelineItem.
  69. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  70. // entities are Provides() upstream.
  71. func (devs *DevsAnalysis) Requires() []string {
  72. return []string{
  73. identity.DependencyAuthor, items.DependencyTreeChanges, items.DependencyTick,
  74. items.DependencyLanguages, items.DependencyLineStats}
  75. }
  76. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  77. func (devs *DevsAnalysis) ListConfigurationOptions() []core.ConfigurationOption {
  78. options := [...]core.ConfigurationOption{{
  79. Name: ConfigDevsConsiderEmptyCommits,
  80. Description: "Take into account empty commits such as trivial merges.",
  81. Flag: "empty-commits",
  82. Type: core.BoolConfigurationOption,
  83. Default: false}}
  84. return options[:]
  85. }
  86. // Configure sets the properties previously published by ListConfigurationOptions().
  87. func (devs *DevsAnalysis) Configure(facts map[string]interface{}) error {
  88. if l, exists := facts[core.ConfigLogger].(core.Logger); exists {
  89. devs.l = l
  90. }
  91. if val, exists := facts[ConfigDevsConsiderEmptyCommits].(bool); exists {
  92. devs.ConsiderEmptyCommits = val
  93. }
  94. if val, exists := facts[identity.FactIdentityDetectorReversedPeopleDict].([]string); exists {
  95. devs.reversedPeopleDict = val
  96. }
  97. if val, exists := facts[items.FactTickSize].(time.Duration); exists {
  98. devs.tickSize = val
  99. }
  100. return nil
  101. }
  102. // Flag for the command line switch which enables this analysis.
  103. func (devs *DevsAnalysis) Flag() string {
  104. return "devs"
  105. }
  106. // Description returns the text which explains what the analysis is doing.
  107. func (devs *DevsAnalysis) Description() string {
  108. return "Calculates the number of commits, added, removed and changed lines per developer through time."
  109. }
  110. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  111. // calls. The repository which is going to be analysed is supplied as an argument.
  112. func (devs *DevsAnalysis) Initialize(repository *git.Repository) error {
  113. if devs.tickSize == 0 {
  114. return errors.New("tick size must be specified")
  115. }
  116. devs.l = core.NewLogger()
  117. devs.ticks = map[int]map[int]*DevTick{}
  118. devs.OneShotMergeProcessor.Initialize()
  119. return nil
  120. }
  121. // Consume runs this PipelineItem on the next commit data.
  122. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  123. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  124. // This function returns the mapping with analysis results. The keys must be the same as
  125. // in Provides(). If there was an error, nil is returned.
  126. func (devs *DevsAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  127. if !devs.ShouldConsumeCommit(deps) {
  128. return nil, nil
  129. }
  130. author := deps[identity.DependencyAuthor].(int)
  131. treeDiff := deps[items.DependencyTreeChanges].(object.Changes)
  132. if len(treeDiff) == 0 && !devs.ConsiderEmptyCommits {
  133. return nil, nil
  134. }
  135. tick := deps[items.DependencyTick].(int)
  136. devstick, exists := devs.ticks[tick]
  137. if !exists {
  138. devstick = map[int]*DevTick{}
  139. devs.ticks[tick] = devstick
  140. }
  141. dd, exists := devstick[author]
  142. if !exists {
  143. dd = &DevTick{Languages: map[string]items.LineStats{}}
  144. devstick[author] = dd
  145. }
  146. dd.Commits++
  147. if deps[core.DependencyIsMerge].(bool) {
  148. // we ignore merge commit diffs
  149. // TODO(vmarkovtsev): handle them
  150. return nil, nil
  151. }
  152. langs := deps[items.DependencyLanguages].(map[plumbing.Hash]string)
  153. lineStats := deps[items.DependencyLineStats].(map[object.ChangeEntry]items.LineStats)
  154. for changeEntry, stats := range lineStats {
  155. dd.Added += stats.Added
  156. dd.Removed += stats.Removed
  157. dd.Changed += stats.Changed
  158. lang := langs[changeEntry.TreeEntry.Hash]
  159. langStats := dd.Languages[lang]
  160. dd.Languages[lang] = items.LineStats{
  161. Added: langStats.Added + stats.Added,
  162. Removed: langStats.Removed + stats.Removed,
  163. Changed: langStats.Changed + stats.Changed,
  164. }
  165. }
  166. return nil, nil
  167. }
  168. // Finalize returns the result of the analysis. Further Consume() calls are not expected.
  169. func (devs *DevsAnalysis) Finalize() interface{} {
  170. return DevsResult{
  171. Ticks: devs.ticks,
  172. reversedPeopleDict: devs.reversedPeopleDict,
  173. tickSize: devs.tickSize,
  174. }
  175. }
  176. // Fork clones this pipeline item.
  177. func (devs *DevsAnalysis) Fork(n int) []core.PipelineItem {
  178. return core.ForkSamePipelineItem(devs, n)
  179. }
  180. // Serialize converts the analysis result as returned by Finalize() to text or bytes.
  181. // The text format is YAML and the bytes format is Protocol Buffers.
  182. func (devs *DevsAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  183. devsResult := result.(DevsResult)
  184. if binary {
  185. return devs.serializeBinary(&devsResult, writer)
  186. }
  187. devs.serializeText(&devsResult, writer)
  188. return nil
  189. }
  190. // Deserialize converts the specified protobuf bytes to DevsResult.
  191. func (devs *DevsAnalysis) Deserialize(pbmessage []byte) (interface{}, error) {
  192. message := pb.DevsAnalysisResults{}
  193. err := proto.Unmarshal(pbmessage, &message)
  194. if err != nil {
  195. return nil, err
  196. }
  197. ticks := map[int]map[int]*DevTick{}
  198. for tick, dd := range message.Ticks {
  199. rdd := map[int]*DevTick{}
  200. ticks[int(tick)] = rdd
  201. for dev, stats := range dd.Devs {
  202. if dev == -1 {
  203. dev = identity.AuthorMissing
  204. }
  205. languages := map[string]items.LineStats{}
  206. rdd[int(dev)] = &DevTick{
  207. Commits: int(stats.Commits),
  208. LineStats: items.LineStats{
  209. Added: int(stats.Stats.Added),
  210. Removed: int(stats.Stats.Removed),
  211. Changed: int(stats.Stats.Changed),
  212. },
  213. Languages: languages,
  214. }
  215. for lang, ls := range stats.Languages {
  216. languages[lang] = items.LineStats{
  217. Added: int(ls.Added),
  218. Removed: int(ls.Removed),
  219. Changed: int(ls.Changed),
  220. }
  221. }
  222. }
  223. }
  224. result := DevsResult{
  225. Ticks: ticks,
  226. reversedPeopleDict: message.DevIndex,
  227. tickSize: time.Duration(message.TickSize),
  228. }
  229. return result, nil
  230. }
  231. // MergeResults combines two DevsAnalysis-es together.
  232. func (devs *DevsAnalysis) MergeResults(r1, r2 interface{}, c1, c2 *core.CommonAnalysisResult) interface{} {
  233. cr1 := r1.(DevsResult)
  234. cr2 := r2.(DevsResult)
  235. if cr1.tickSize != cr2.tickSize {
  236. return fmt.Errorf("mismatching tick sizes (r1: %d, r2: %d) received",
  237. cr1.tickSize, cr2.tickSize)
  238. }
  239. t01 := items.FloorTime(c1.BeginTimeAsTime(), cr1.tickSize)
  240. t02 := items.FloorTime(c2.BeginTimeAsTime(), cr2.tickSize)
  241. t0 := t01
  242. if t02.Before(t0) {
  243. t0 = t02
  244. }
  245. offset1 := int(t01.Sub(t0) / cr1.tickSize)
  246. offset2 := int(t02.Sub(t0) / cr2.tickSize)
  247. merged := DevsResult{tickSize: cr1.tickSize}
  248. var mergedIndex map[string]identity.MergedIndex
  249. mergedIndex, merged.reversedPeopleDict = identity.MergeReversedDictsIdentities(
  250. cr1.reversedPeopleDict, cr2.reversedPeopleDict)
  251. newticks := map[int]map[int]*DevTick{}
  252. merged.Ticks = newticks
  253. for tick, dd := range cr1.Ticks {
  254. tick += offset1
  255. newdd, exists := newticks[tick]
  256. if !exists {
  257. newdd = map[int]*DevTick{}
  258. newticks[tick] = newdd
  259. }
  260. for dev, stats := range dd {
  261. newdev := dev
  262. if newdev != identity.AuthorMissing {
  263. newdev = mergedIndex[cr1.reversedPeopleDict[dev]].Final
  264. }
  265. newstats, exists := newdd[newdev]
  266. if !exists {
  267. newstats = &DevTick{Languages: map[string]items.LineStats{}}
  268. newdd[newdev] = newstats
  269. }
  270. newstats.Commits += stats.Commits
  271. newstats.Added += stats.Added
  272. newstats.Removed += stats.Removed
  273. newstats.Changed += stats.Changed
  274. for lang, ls := range stats.Languages {
  275. prev := newstats.Languages[lang]
  276. newstats.Languages[lang] = items.LineStats{
  277. Added: prev.Added + ls.Added,
  278. Removed: prev.Removed + ls.Removed,
  279. Changed: prev.Changed + ls.Changed,
  280. }
  281. }
  282. }
  283. }
  284. for tick, dd := range cr2.Ticks {
  285. tick += offset2
  286. newdd, exists := newticks[tick]
  287. if !exists {
  288. newdd = map[int]*DevTick{}
  289. newticks[tick] = newdd
  290. }
  291. for dev, stats := range dd {
  292. newdev := dev
  293. if newdev != identity.AuthorMissing {
  294. newdev = mergedIndex[cr2.reversedPeopleDict[dev]].Final
  295. }
  296. newstats, exists := newdd[newdev]
  297. if !exists {
  298. newstats = &DevTick{Languages: map[string]items.LineStats{}}
  299. newdd[newdev] = newstats
  300. }
  301. newstats.Commits += stats.Commits
  302. newstats.Added += stats.Added
  303. newstats.Removed += stats.Removed
  304. newstats.Changed += stats.Changed
  305. for lang, ls := range stats.Languages {
  306. prev := newstats.Languages[lang]
  307. newstats.Languages[lang] = items.LineStats{
  308. Added: prev.Added + ls.Added,
  309. Removed: prev.Removed + ls.Removed,
  310. Changed: prev.Changed + ls.Changed,
  311. }
  312. }
  313. }
  314. }
  315. return merged
  316. }
  317. func (devs *DevsAnalysis) serializeText(result *DevsResult, writer io.Writer) {
  318. fmt.Fprintln(writer, " ticks:")
  319. ticks := make([]int, len(result.Ticks))
  320. {
  321. i := 0
  322. for tick := range result.Ticks {
  323. ticks[i] = tick
  324. i++
  325. }
  326. }
  327. sort.Ints(ticks)
  328. for _, tick := range ticks {
  329. fmt.Fprintf(writer, " %d:\n", tick)
  330. rtick := result.Ticks[tick]
  331. devseq := make([]int, len(rtick))
  332. {
  333. i := 0
  334. for dev := range rtick {
  335. devseq[i] = dev
  336. i++
  337. }
  338. }
  339. sort.Ints(devseq)
  340. for _, dev := range devseq {
  341. stats := rtick[dev]
  342. if dev == identity.AuthorMissing {
  343. dev = -1
  344. }
  345. var langs []string
  346. for lang, ls := range stats.Languages {
  347. if lang == "" {
  348. lang = "none"
  349. }
  350. langs = append(langs,
  351. fmt.Sprintf("%s: [%d, %d, %d]", lang, ls.Added, ls.Removed, ls.Changed))
  352. }
  353. sort.Strings(langs)
  354. fmt.Fprintf(writer, " %d: [%d, %d, %d, %d, {%s}]\n",
  355. dev, stats.Commits, stats.Added, stats.Removed, stats.Changed,
  356. strings.Join(langs, ", "))
  357. }
  358. }
  359. fmt.Fprintln(writer, " people:")
  360. for _, person := range result.reversedPeopleDict {
  361. fmt.Fprintf(writer, " - %s\n", yaml.SafeString(person))
  362. }
  363. fmt.Fprintln(writer, " tick_size:", int(result.tickSize.Seconds()))
  364. }
  365. func (devs *DevsAnalysis) serializeBinary(result *DevsResult, writer io.Writer) error {
  366. message := pb.DevsAnalysisResults{}
  367. message.DevIndex = result.reversedPeopleDict
  368. message.TickSize = int64(result.tickSize)
  369. message.Ticks = map[int32]*pb.TickDevs{}
  370. for tick, devs := range result.Ticks {
  371. dd := &pb.TickDevs{}
  372. message.Ticks[int32(tick)] = dd
  373. dd.Devs = map[int32]*pb.DevTick{}
  374. for dev, stats := range devs {
  375. if dev == identity.AuthorMissing {
  376. dev = -1
  377. }
  378. languages := map[string]*pb.LineStats{}
  379. dd.Devs[int32(dev)] = &pb.DevTick{
  380. Commits: int32(stats.Commits),
  381. Stats: &pb.LineStats{
  382. Added: int32(stats.Added),
  383. Changed: int32(stats.Changed),
  384. Removed: int32(stats.Removed),
  385. },
  386. Languages: languages,
  387. }
  388. for lang, ls := range stats.Languages {
  389. languages[lang] = &pb.LineStats{
  390. Added: int32(ls.Added),
  391. Changed: int32(ls.Changed),
  392. Removed: int32(ls.Removed),
  393. }
  394. }
  395. }
  396. }
  397. serialized, err := proto.Marshal(&message)
  398. if err != nil {
  399. return err
  400. }
  401. _, err = writer.Write(serialized)
  402. return err
  403. }
  404. // GetTickSize returns the tick size used to generate this devs analysis result.
  405. func (dr DevsResult) GetTickSize() time.Duration {
  406. return dr.tickSize
  407. }
  408. // GetIdentities returns the list of developer identities used to generate this devs analysis result.
  409. // The format is |-joined keys, see internals/plumbing/identity for details.
  410. func (dr DevsResult) GetIdentities() []string {
  411. return dr.reversedPeopleDict
  412. }
  413. func init() {
  414. core.Registry.Register(&DevsAnalysis{})
  415. }