devs.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. package leaves
  2. import (
  3. "fmt"
  4. "io"
  5. "sort"
  6. "strings"
  7. "github.com/gogo/protobuf/proto"
  8. "gopkg.in/src-d/go-git.v4"
  9. "gopkg.in/src-d/go-git.v4/plumbing"
  10. "gopkg.in/src-d/go-git.v4/plumbing/object"
  11. "gopkg.in/src-d/hercules.v10/internal/core"
  12. "gopkg.in/src-d/hercules.v10/internal/pb"
  13. items "gopkg.in/src-d/hercules.v10/internal/plumbing"
  14. "gopkg.in/src-d/hercules.v10/internal/plumbing/identity"
  15. "gopkg.in/src-d/hercules.v10/internal/yaml"
  16. )
  17. // DevsAnalysis calculates the number of commits through time per developer.
  18. // It also records the numbers of added, deleted and changed lines through time per developer.
  19. // Those numbers are additionally measured per language.
  20. type DevsAnalysis struct {
  21. core.NoopMerger
  22. core.OneShotMergeProcessor
  23. // ConsiderEmptyCommits indicates whether empty commits (e.g., merges) should be taken
  24. // into account.
  25. ConsiderEmptyCommits bool
  26. // ticks maps ticks to developers to stats
  27. ticks map[int]map[int]*DevTick
  28. // reversedPeopleDict references IdentityDetector.ReversedPeopleDict
  29. reversedPeopleDict []string
  30. l core.Logger
  31. }
  32. // DevsResult is returned by DevsAnalysis.Finalize() and carries the daily statistics
  33. // per developer.
  34. type DevsResult struct {
  35. // Ticks is <tick index> -> <developer index> -> daily stats
  36. Ticks map[int]map[int]*DevTick
  37. // reversedPeopleDict references IdentityDetector.ReversedPeopleDict
  38. reversedPeopleDict []string
  39. }
  40. // DevTick is the statistics for a development tick and a particular developer.
  41. type DevTick struct {
  42. // Commits is the number of commits made by a particular developer in a particular tick.
  43. Commits int
  44. items.LineStats
  45. // LanguagesDetection carries fine-grained line stats per programming language.
  46. Languages map[string]items.LineStats
  47. }
  48. const (
  49. // ConfigDevsConsiderEmptyCommits is the name of the option to set DevsAnalysis.ConsiderEmptyCommits.
  50. ConfigDevsConsiderEmptyCommits = "Devs.ConsiderEmptyCommits"
  51. )
  52. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  53. func (devs *DevsAnalysis) Name() string {
  54. return "Devs"
  55. }
  56. // Provides returns the list of names of entities which are produced by this PipelineItem.
  57. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  58. // to this list. Also used by core.Registry to build the global map of providers.
  59. func (devs *DevsAnalysis) Provides() []string {
  60. return []string{}
  61. }
  62. // Requires returns the list of names of entities which are needed by this PipelineItem.
  63. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  64. // entities are Provides() upstream.
  65. func (devs *DevsAnalysis) Requires() []string {
  66. arr := [...]string{
  67. identity.DependencyAuthor, items.DependencyTreeChanges, items.DependencyTick,
  68. items.DependencyLanguages, items.DependencyLineStats}
  69. return arr[:]
  70. }
  71. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  72. func (devs *DevsAnalysis) ListConfigurationOptions() []core.ConfigurationOption {
  73. options := [...]core.ConfigurationOption{{
  74. Name: ConfigDevsConsiderEmptyCommits,
  75. Description: "Take into account empty commits such as trivial merges.",
  76. Flag: "empty-commits",
  77. Type: core.BoolConfigurationOption,
  78. Default: false}}
  79. return options[:]
  80. }
  81. // Configure sets the properties previously published by ListConfigurationOptions().
  82. func (devs *DevsAnalysis) Configure(facts map[string]interface{}) error {
  83. if l, exists := facts[core.ConfigLogger].(core.Logger); exists {
  84. devs.l = l
  85. }
  86. if val, exists := facts[ConfigDevsConsiderEmptyCommits].(bool); exists {
  87. devs.ConsiderEmptyCommits = val
  88. }
  89. if val, exists := facts[identity.FactIdentityDetectorReversedPeopleDict].([]string); exists {
  90. devs.reversedPeopleDict = val
  91. }
  92. return nil
  93. }
  94. // Flag for the command line switch which enables this analysis.
  95. func (devs *DevsAnalysis) Flag() string {
  96. return "devs"
  97. }
  98. // Description returns the text which explains what the analysis is doing.
  99. func (devs *DevsAnalysis) Description() string {
  100. return "Calculates the number of commits, added, removed and changed lines per developer through time."
  101. }
  102. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  103. // calls. The repository which is going to be analysed is supplied as an argument.
  104. func (devs *DevsAnalysis) Initialize(repository *git.Repository) error {
  105. devs.l = core.NewLogger()
  106. devs.ticks = map[int]map[int]*DevTick{}
  107. devs.OneShotMergeProcessor.Initialize()
  108. return nil
  109. }
  110. // Consume runs this PipelineItem on the next commit data.
  111. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  112. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  113. // This function returns the mapping with analysis results. The keys must be the same as
  114. // in Provides(). If there was an error, nil is returned.
  115. func (devs *DevsAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  116. if !devs.ShouldConsumeCommit(deps) {
  117. return nil, nil
  118. }
  119. author := deps[identity.DependencyAuthor].(int)
  120. treeDiff := deps[items.DependencyTreeChanges].(object.Changes)
  121. if len(treeDiff) == 0 && !devs.ConsiderEmptyCommits {
  122. return nil, nil
  123. }
  124. tick := deps[items.DependencyTick].(int)
  125. devstick, exists := devs.ticks[tick]
  126. if !exists {
  127. devstick = map[int]*DevTick{}
  128. devs.ticks[tick] = devstick
  129. }
  130. dd, exists := devstick[author]
  131. if !exists {
  132. dd = &DevTick{Languages: map[string]items.LineStats{}}
  133. devstick[author] = dd
  134. }
  135. dd.Commits++
  136. if deps[core.DependencyIsMerge].(bool) {
  137. // we ignore merge commit diffs
  138. // TODO(vmarkovtsev): handle them
  139. return nil, nil
  140. }
  141. langs := deps[items.DependencyLanguages].(map[plumbing.Hash]string)
  142. lineStats := deps[items.DependencyLineStats].(map[object.ChangeEntry]items.LineStats)
  143. for changeEntry, stats := range lineStats {
  144. dd.Added += stats.Added
  145. dd.Removed += stats.Removed
  146. dd.Changed += stats.Changed
  147. lang := langs[changeEntry.TreeEntry.Hash]
  148. langStats := dd.Languages[lang]
  149. dd.Languages[lang] = items.LineStats{
  150. Added: langStats.Added + stats.Added,
  151. Removed: langStats.Removed + stats.Removed,
  152. Changed: langStats.Changed + stats.Changed,
  153. }
  154. }
  155. return nil, nil
  156. }
  157. // Finalize returns the result of the analysis. Further Consume() calls are not expected.
  158. func (devs *DevsAnalysis) Finalize() interface{} {
  159. return DevsResult{
  160. Ticks: devs.ticks,
  161. reversedPeopleDict: devs.reversedPeopleDict,
  162. }
  163. }
  164. // Fork clones this pipeline item.
  165. func (devs *DevsAnalysis) Fork(n int) []core.PipelineItem {
  166. return core.ForkSamePipelineItem(devs, n)
  167. }
  168. // Serialize converts the analysis result as returned by Finalize() to text or bytes.
  169. // The text format is YAML and the bytes format is Protocol Buffers.
  170. func (devs *DevsAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  171. devsResult := result.(DevsResult)
  172. if binary {
  173. return devs.serializeBinary(&devsResult, writer)
  174. }
  175. devs.serializeText(&devsResult, writer)
  176. return nil
  177. }
  178. // Deserialize converts the specified protobuf bytes to DevsResult.
  179. func (devs *DevsAnalysis) Deserialize(pbmessage []byte) (interface{}, error) {
  180. message := pb.DevsAnalysisResults{}
  181. err := proto.Unmarshal(pbmessage, &message)
  182. if err != nil {
  183. return nil, err
  184. }
  185. ticks := map[int]map[int]*DevTick{}
  186. for tick, dd := range message.Ticks {
  187. rdd := map[int]*DevTick{}
  188. ticks[int(tick)] = rdd
  189. for dev, stats := range dd.Devs {
  190. if dev == -1 {
  191. dev = identity.AuthorMissing
  192. }
  193. languages := map[string]items.LineStats{}
  194. rdd[int(dev)] = &DevTick{
  195. Commits: int(stats.Commits),
  196. LineStats: items.LineStats{
  197. Added: int(stats.Stats.Added),
  198. Removed: int(stats.Stats.Removed),
  199. Changed: int(stats.Stats.Changed),
  200. },
  201. Languages: languages,
  202. }
  203. for lang, ls := range stats.Languages {
  204. languages[lang] = items.LineStats{
  205. Added: int(ls.Added),
  206. Removed: int(ls.Removed),
  207. Changed: int(ls.Changed),
  208. }
  209. }
  210. }
  211. }
  212. result := DevsResult{
  213. Ticks: ticks,
  214. reversedPeopleDict: message.DevIndex,
  215. }
  216. return result, nil
  217. }
  218. // MergeResults combines two DevsAnalysis-es together.
  219. func (devs *DevsAnalysis) MergeResults(r1, r2 interface{}, c1, c2 *core.CommonAnalysisResult) interface{} {
  220. cr1 := r1.(DevsResult)
  221. cr2 := r2.(DevsResult)
  222. merged := DevsResult{}
  223. var mergedIndex map[string]identity.MergedIndex
  224. mergedIndex, merged.reversedPeopleDict = identity.MergeReversedDictsIdentities(
  225. cr1.reversedPeopleDict, cr2.reversedPeopleDict)
  226. newticks := map[int]map[int]*DevTick{}
  227. merged.Ticks = newticks
  228. for tick, dd := range cr1.Ticks {
  229. newdd, exists := newticks[tick]
  230. if !exists {
  231. newdd = map[int]*DevTick{}
  232. newticks[tick] = newdd
  233. }
  234. for dev, stats := range dd {
  235. newdev := dev
  236. if newdev != identity.AuthorMissing {
  237. newdev = mergedIndex[cr1.reversedPeopleDict[dev]].Final
  238. }
  239. newstats, exists := newdd[newdev]
  240. if !exists {
  241. newstats = &DevTick{Languages: map[string]items.LineStats{}}
  242. newdd[newdev] = newstats
  243. }
  244. newstats.Commits += stats.Commits
  245. newstats.Added += stats.Added
  246. newstats.Removed += stats.Removed
  247. newstats.Changed += stats.Changed
  248. for lang, ls := range stats.Languages {
  249. prev := newstats.Languages[lang]
  250. newstats.Languages[lang] = items.LineStats{
  251. Added: prev.Added + ls.Added,
  252. Removed: prev.Removed + ls.Removed,
  253. Changed: prev.Changed + ls.Changed,
  254. }
  255. }
  256. }
  257. }
  258. for tick, dd := range cr2.Ticks {
  259. newdd, exists := newticks[tick]
  260. if !exists {
  261. newdd = map[int]*DevTick{}
  262. newticks[tick] = newdd
  263. }
  264. for dev, stats := range dd {
  265. newdev := dev
  266. if newdev != identity.AuthorMissing {
  267. newdev = mergedIndex[cr2.reversedPeopleDict[dev]].Final
  268. }
  269. newstats, exists := newdd[newdev]
  270. if !exists {
  271. newstats = &DevTick{Languages: map[string]items.LineStats{}}
  272. newdd[newdev] = newstats
  273. }
  274. newstats.Commits += stats.Commits
  275. newstats.Added += stats.Added
  276. newstats.Removed += stats.Removed
  277. newstats.Changed += stats.Changed
  278. for lang, ls := range stats.Languages {
  279. prev := newstats.Languages[lang]
  280. newstats.Languages[lang] = items.LineStats{
  281. Added: prev.Added + ls.Added,
  282. Removed: prev.Removed + ls.Removed,
  283. Changed: prev.Changed + ls.Changed,
  284. }
  285. }
  286. }
  287. }
  288. return merged
  289. }
  290. func (devs *DevsAnalysis) serializeText(result *DevsResult, writer io.Writer) {
  291. fmt.Fprintln(writer, " ticks:")
  292. ticks := make([]int, len(result.Ticks))
  293. {
  294. i := 0
  295. for tick := range result.Ticks {
  296. ticks[i] = tick
  297. i++
  298. }
  299. }
  300. sort.Ints(ticks)
  301. for _, tick := range ticks {
  302. fmt.Fprintf(writer, " %d:\n", tick)
  303. rtick := result.Ticks[tick]
  304. devseq := make([]int, len(rtick))
  305. {
  306. i := 0
  307. for dev := range rtick {
  308. devseq[i] = dev
  309. i++
  310. }
  311. }
  312. sort.Ints(devseq)
  313. for _, dev := range devseq {
  314. stats := rtick[dev]
  315. if dev == identity.AuthorMissing {
  316. dev = -1
  317. }
  318. var langs []string
  319. for lang, ls := range stats.Languages {
  320. if lang == "" {
  321. lang = "none"
  322. }
  323. langs = append(langs,
  324. fmt.Sprintf("%s: [%d, %d, %d]", lang, ls.Added, ls.Removed, ls.Changed))
  325. }
  326. sort.Strings(langs)
  327. fmt.Fprintf(writer, " %d: [%d, %d, %d, %d, {%s}]\n",
  328. dev, stats.Commits, stats.Added, stats.Removed, stats.Changed,
  329. strings.Join(langs, ", "))
  330. }
  331. }
  332. fmt.Fprintln(writer, " people:")
  333. for _, person := range result.reversedPeopleDict {
  334. fmt.Fprintf(writer, " - %s\n", yaml.SafeString(person))
  335. }
  336. }
  337. func (devs *DevsAnalysis) serializeBinary(result *DevsResult, writer io.Writer) error {
  338. message := pb.DevsAnalysisResults{}
  339. message.DevIndex = result.reversedPeopleDict
  340. message.Ticks = map[int32]*pb.TickDevs{}
  341. for tick, devs := range result.Ticks {
  342. dd := &pb.TickDevs{}
  343. message.Ticks[int32(tick)] = dd
  344. dd.Devs = map[int32]*pb.DevTick{}
  345. for dev, stats := range devs {
  346. if dev == identity.AuthorMissing {
  347. dev = -1
  348. }
  349. languages := map[string]*pb.LineStats{}
  350. dd.Devs[int32(dev)] = &pb.DevTick{
  351. Commits: int32(stats.Commits),
  352. Stats: &pb.LineStats{
  353. Added: int32(stats.Added),
  354. Changed: int32(stats.Changed),
  355. Removed: int32(stats.Removed),
  356. },
  357. Languages: languages,
  358. }
  359. for lang, ls := range stats.Languages {
  360. languages[lang] = &pb.LineStats{
  361. Added: int32(ls.Added),
  362. Changed: int32(ls.Changed),
  363. Removed: int32(ls.Removed),
  364. }
  365. }
  366. }
  367. }
  368. serialized, err := proto.Marshal(&message)
  369. if err != nil {
  370. return err
  371. }
  372. _, err = writer.Write(serialized)
  373. return err
  374. }
  375. func init() {
  376. core.Registry.Register(&DevsAnalysis{})
  377. }