burndown.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526
  1. package hercules
  2. import (
  3. "bufio"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "os"
  8. "unicode/utf8"
  9. "github.com/sergi/go-diff/diffmatchpatch"
  10. "gopkg.in/src-d/go-git.v4"
  11. "gopkg.in/src-d/go-git.v4/plumbing"
  12. "gopkg.in/src-d/go-git.v4/plumbing/object"
  13. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  14. )
  15. // BurndownAnalyser allows to gather the line burndown statistics for a Git repository.
  16. type BurndownAnalysis struct {
  17. // Granularity sets the size of each band - the number of days it spans.
  18. // Smaller values provide better resolution but require more work and eat more
  19. // memory. 30 days is usually enough.
  20. Granularity int
  21. // Sampling sets how detailed is the statistic - the size of the interval in
  22. // days between consecutive measurements. It is usually a good idea to set it
  23. // <= Granularity. Try 15 or 30.
  24. Sampling int
  25. // The number of developers for which to collect the burndown stats. 0 disables it.
  26. PeopleNumber int
  27. // Debug activates the debugging mode. Analyse() runs slower in this mode
  28. // but it accurately checks all the intermediate states for invariant
  29. // violations.
  30. Debug bool
  31. // Repository points to the analysed Git repository struct from go-git.
  32. repository *git.Repository
  33. // globalStatus is the current daily alive number of lines; key is the number
  34. // of days from the beginning of the history.
  35. globalStatus map[int]int64
  36. // globalHistory is the weekly snapshots of globalStatus.
  37. globalHistory [][]int64
  38. // fileHistories is the weekly snapshots of each file's status.
  39. fileHistories map[string][][]int64
  40. // peopleHistories is the weekly snapshots of each person's status.
  41. peopleHistories [][][]int64
  42. // files is the mapping <file path> -> *File.
  43. files map[string]*File
  44. // matrix is the mutual deletions and self insertions.
  45. matrix []map[int]int64
  46. // people is the people's individual time stats.
  47. people []map[int]int64
  48. // day is the most recent day index processed.
  49. day int
  50. // previousDay is the day from the previous sample period -
  51. // different from DaysSinceStart.previousDay.
  52. previousDay int
  53. }
  54. type BurndownResult struct {
  55. GlobalHistory [][]int64
  56. FileHistories map[string][][]int64
  57. PeopleHistories [][][]int64
  58. PeopleMatrix [][]int64
  59. }
  60. func (analyser *BurndownAnalysis) Name() string {
  61. return "Burndown"
  62. }
  63. func (analyser *BurndownAnalysis) Provides() []string {
  64. return []string{}
  65. }
  66. func (analyser *BurndownAnalysis) Requires() []string {
  67. arr := [...]string{"file_diff", "renamed_changes", "blob_cache", "day", "author"}
  68. return arr[:]
  69. }
  70. func (analyser *BurndownAnalysis) Initialize(repository *git.Repository) {
  71. analyser.repository = repository
  72. analyser.globalStatus = map[int]int64{}
  73. analyser.globalHistory = [][]int64{}
  74. analyser.fileHistories = map[string][][]int64{}
  75. analyser.peopleHistories = make([][][]int64, analyser.PeopleNumber)
  76. analyser.files = map[string]*File{}
  77. analyser.matrix = make([]map[int]int64, analyser.PeopleNumber)
  78. analyser.people = make([]map[int]int64, analyser.PeopleNumber)
  79. analyser.day = 0
  80. analyser.previousDay = 0
  81. }
  82. func (analyser *BurndownAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  83. sampling := analyser.Sampling
  84. if sampling == 0 {
  85. sampling = 1
  86. }
  87. author := deps["author"].(int)
  88. analyser.day = deps["day"].(int)
  89. delta := (analyser.day / sampling) - (analyser.previousDay / sampling)
  90. if delta > 0 {
  91. analyser.previousDay = analyser.day
  92. gs, fss, pss := analyser.groupStatus()
  93. analyser.updateHistories(gs, fss, pss, delta)
  94. }
  95. cache := deps["blob_cache"].(map[plumbing.Hash]*object.Blob)
  96. treeDiffs := deps["renamed_changes"].(object.Changes)
  97. fileDiffs := deps["file_diff"].(map[string]FileDiffData)
  98. for _, change := range treeDiffs {
  99. action, err := change.Action()
  100. if err != nil {
  101. return nil, err
  102. }
  103. switch action {
  104. case merkletrie.Insert:
  105. err = analyser.handleInsertion(change, author, cache)
  106. case merkletrie.Delete:
  107. err = analyser.handleDeletion(change, author, cache)
  108. case merkletrie.Modify:
  109. err = analyser.handleModification(change, author, cache, fileDiffs)
  110. }
  111. if err != nil {
  112. return nil, err
  113. }
  114. }
  115. return nil, nil
  116. }
  117. // Finalize() returns the list of snapshots of the cumulative line edit times
  118. // and the similar lists for every file which is alive in HEAD.
  119. // The number of snapshots (the first dimension >[]<[]int64) depends on
  120. // Analyser.Sampling (the more Sampling, the less the value); the length of
  121. // each snapshot depends on Analyser.Granularity (the more Granularity,
  122. // the less the value).
  123. func (analyser *BurndownAnalysis) Finalize() interface{} {
  124. gs, fss, pss := analyser.groupStatus()
  125. analyser.updateHistories(gs, fss, pss, 1)
  126. for key, statuses := range analyser.fileHistories {
  127. if len(statuses) == len(analyser.globalHistory) {
  128. continue
  129. }
  130. padding := make([][]int64, len(analyser.globalHistory)-len(statuses))
  131. for i := range padding {
  132. padding[i] = make([]int64, len(analyser.globalStatus))
  133. }
  134. analyser.fileHistories[key] = append(padding, statuses...)
  135. }
  136. peopleMatrix := make([][]int64, analyser.PeopleNumber)
  137. for i, row := range analyser.matrix {
  138. mrow := make([]int64, analyser.PeopleNumber+2)
  139. peopleMatrix[i] = mrow
  140. for key, val := range row {
  141. if key == MISSING_AUTHOR {
  142. key = -1
  143. } else if key == SELF_AUTHOR {
  144. key = -2
  145. }
  146. mrow[key+2] = val
  147. }
  148. }
  149. return BurndownResult{
  150. GlobalHistory: analyser.globalHistory,
  151. FileHistories: analyser.fileHistories,
  152. PeopleHistories: analyser.peopleHistories,
  153. PeopleMatrix: peopleMatrix}
  154. }
  155. func checkClose(c io.Closer) {
  156. if err := c.Close(); err != nil {
  157. panic(err)
  158. }
  159. }
  160. func countLines(file *object.Blob) (int, error) {
  161. reader, err := file.Reader()
  162. if err != nil {
  163. return 0, err
  164. }
  165. defer checkClose(reader)
  166. var scanner *bufio.Scanner
  167. buffer := make([]byte, bufio.MaxScanTokenSize)
  168. counter := 0
  169. for scanner == nil || scanner.Err() == bufio.ErrTooLong {
  170. if scanner != nil && !utf8.Valid(scanner.Bytes()) {
  171. return -1, errors.New("binary")
  172. }
  173. scanner = bufio.NewScanner(reader)
  174. scanner.Buffer(buffer, 0)
  175. for scanner.Scan() {
  176. if !utf8.Valid(scanner.Bytes()) {
  177. return -1, errors.New("binary")
  178. }
  179. counter++
  180. }
  181. }
  182. return counter, nil
  183. }
  184. func (analyser *BurndownAnalysis) packPersonWithDay(person int, day int) int {
  185. if analyser.PeopleNumber == 0 {
  186. return day
  187. }
  188. result := day
  189. result |= person << 14
  190. // This effectively means max 16384 days (>44 years) and (131072 - 2) devs
  191. return result
  192. }
  193. func (analyser *BurndownAnalysis) unpackPersonWithDay(value int) (int, int) {
  194. if analyser.PeopleNumber == 0 {
  195. return MISSING_AUTHOR, value
  196. }
  197. return value >> 14, value & 0x3FFF
  198. }
  199. func (analyser *BurndownAnalysis) updateStatus(
  200. status interface{}, _ int, previous_time_ int, delta int) {
  201. _, previous_time := analyser.unpackPersonWithDay(previous_time_)
  202. status.(map[int]int64)[previous_time] += int64(delta)
  203. }
  204. func (analyser *BurndownAnalysis) updatePeople(people interface{}, _ int, previous_time_ int, delta int) {
  205. old_author, previous_time := analyser.unpackPersonWithDay(previous_time_)
  206. if old_author == MISSING_AUTHOR {
  207. return
  208. }
  209. casted := people.([]map[int]int64)
  210. stats := casted[old_author]
  211. if stats == nil {
  212. stats = map[int]int64{}
  213. casted[old_author] = stats
  214. }
  215. stats[previous_time] += int64(delta)
  216. }
  217. func (analyser *BurndownAnalysis) updateMatrix(
  218. matrix_ interface{}, current_time int, previous_time int, delta int) {
  219. matrix := matrix_.([]map[int]int64)
  220. new_author, _ := analyser.unpackPersonWithDay(current_time)
  221. old_author, _ := analyser.unpackPersonWithDay(previous_time)
  222. if old_author == MISSING_AUTHOR {
  223. return
  224. }
  225. if new_author == old_author && delta > 0 {
  226. new_author = SELF_AUTHOR
  227. }
  228. row := matrix[old_author]
  229. if row == nil {
  230. row = map[int]int64{}
  231. matrix[old_author] = row
  232. }
  233. cell, exists := row[new_author]
  234. if !exists {
  235. row[new_author] = 0
  236. cell = 0
  237. }
  238. row[new_author] = cell + int64(delta)
  239. }
  240. func (analyser *BurndownAnalysis) newFile(
  241. author int, day int, size int, global map[int]int64, people []map[int]int64,
  242. matrix []map[int]int64) *File {
  243. if analyser.PeopleNumber == 0 {
  244. return NewFile(day, size, NewStatus(global, analyser.updateStatus),
  245. NewStatus(map[int]int64{}, analyser.updateStatus))
  246. }
  247. return NewFile(analyser.packPersonWithDay(author, day), size,
  248. NewStatus(global, analyser.updateStatus),
  249. NewStatus(map[int]int64{}, analyser.updateStatus),
  250. NewStatus(people, analyser.updatePeople),
  251. NewStatus(matrix, analyser.updateMatrix))
  252. }
  253. func (analyser *BurndownAnalysis) handleInsertion(
  254. change *object.Change, author int, cache map[plumbing.Hash]*object.Blob) error {
  255. blob := cache[change.To.TreeEntry.Hash]
  256. lines, err := countLines(blob)
  257. if err != nil {
  258. if err.Error() == "binary" {
  259. return nil
  260. }
  261. return err
  262. }
  263. name := change.To.Name
  264. file, exists := analyser.files[name]
  265. if exists {
  266. return errors.New(fmt.Sprintf("file %s already exists", name))
  267. }
  268. file = analyser.newFile(
  269. author, analyser.day, lines, analyser.globalStatus, analyser.people, analyser.matrix)
  270. analyser.files[name] = file
  271. return nil
  272. }
  273. func (analyser *BurndownAnalysis) handleDeletion(
  274. change *object.Change, author int, cache map[plumbing.Hash]*object.Blob) error {
  275. blob := cache[change.From.TreeEntry.Hash]
  276. lines, err := countLines(blob)
  277. if err != nil {
  278. if err.Error() == "binary" {
  279. return nil
  280. }
  281. return err
  282. }
  283. name := change.From.Name
  284. file := analyser.files[name]
  285. file.Update(analyser.packPersonWithDay(author, analyser.day), 0, 0, lines)
  286. delete(analyser.files, name)
  287. return nil
  288. }
  289. func (analyser *BurndownAnalysis) handleModification(
  290. change *object.Change, author int, cache map[plumbing.Hash]*object.Blob,
  291. diffs map[string]FileDiffData) error {
  292. file, exists := analyser.files[change.From.Name]
  293. if !exists {
  294. return analyser.handleInsertion(change, author, cache)
  295. }
  296. // possible rename
  297. if change.To.Name != change.From.Name {
  298. err := analyser.handleRename(change.From.Name, change.To.Name)
  299. if err != nil {
  300. return err
  301. }
  302. }
  303. thisDiffs := diffs[change.To.Name]
  304. if file.Len() != thisDiffs.OldLinesOfCode {
  305. fmt.Fprintf(os.Stderr, "====TREE====\n%s", file.Dump())
  306. return errors.New(fmt.Sprintf("%s: internal integrity error src %d != %d %s -> %s",
  307. change.To.Name, thisDiffs.OldLinesOfCode, file.Len(),
  308. change.From.TreeEntry.Hash.String(), change.To.TreeEntry.Hash.String()))
  309. }
  310. // we do not call RunesToDiffLines so the number of lines equals
  311. // to the rune count
  312. position := 0
  313. pending := diffmatchpatch.Diff{Text: ""}
  314. apply := func(edit diffmatchpatch.Diff) {
  315. length := utf8.RuneCountInString(edit.Text)
  316. if edit.Type == diffmatchpatch.DiffInsert {
  317. file.Update(analyser.packPersonWithDay(author, analyser.day), position, length, 0)
  318. position += length
  319. } else {
  320. file.Update(analyser.packPersonWithDay(author, analyser.day), position, 0, length)
  321. }
  322. if analyser.Debug {
  323. file.Validate()
  324. }
  325. }
  326. for _, edit := range thisDiffs.Diffs {
  327. dump_before := ""
  328. if analyser.Debug {
  329. dump_before = file.Dump()
  330. }
  331. length := utf8.RuneCountInString(edit.Text)
  332. debug_error := func() {
  333. fmt.Fprintf(os.Stderr, "%s: internal diff error\n", change.To.Name)
  334. fmt.Fprintf(os.Stderr, "Update(%d, %d, %d (0), %d (0))\n", analyser.day, position,
  335. length, utf8.RuneCountInString(pending.Text))
  336. if dump_before != "" {
  337. fmt.Fprintf(os.Stderr, "====TREE BEFORE====\n%s====END====\n", dump_before)
  338. }
  339. fmt.Fprintf(os.Stderr, "====TREE AFTER====\n%s====END====\n", file.Dump())
  340. }
  341. switch edit.Type {
  342. case diffmatchpatch.DiffEqual:
  343. if pending.Text != "" {
  344. apply(pending)
  345. pending.Text = ""
  346. }
  347. position += length
  348. case diffmatchpatch.DiffInsert:
  349. if pending.Text != "" {
  350. if pending.Type == diffmatchpatch.DiffInsert {
  351. debug_error()
  352. return errors.New("DiffInsert may not appear after DiffInsert")
  353. }
  354. file.Update(analyser.packPersonWithDay(author, analyser.day), position, length,
  355. utf8.RuneCountInString(pending.Text))
  356. if analyser.Debug {
  357. file.Validate()
  358. }
  359. position += length
  360. pending.Text = ""
  361. } else {
  362. pending = edit
  363. }
  364. case diffmatchpatch.DiffDelete:
  365. if pending.Text != "" {
  366. debug_error()
  367. return errors.New("DiffDelete may not appear after DiffInsert/DiffDelete")
  368. }
  369. pending = edit
  370. default:
  371. debug_error()
  372. return errors.New(fmt.Sprintf("diff operation is not supported: %d", edit.Type))
  373. }
  374. }
  375. if pending.Text != "" {
  376. apply(pending)
  377. pending.Text = ""
  378. }
  379. if file.Len() != thisDiffs.NewLinesOfCode {
  380. return errors.New(fmt.Sprintf("%s: internal integrity error dst %d != %d",
  381. change.To.Name, thisDiffs.NewLinesOfCode, file.Len()))
  382. }
  383. return nil
  384. }
  385. func (analyser *BurndownAnalysis) handleRename(from, to string) error {
  386. file, exists := analyser.files[from]
  387. if !exists {
  388. return errors.New(fmt.Sprintf("file %s does not exist", from))
  389. }
  390. analyser.files[to] = file
  391. delete(analyser.files, from)
  392. return nil
  393. }
  394. func (analyser *BurndownAnalysis) groupStatus() ([]int64, map[string][]int64, [][]int64) {
  395. granularity := analyser.Granularity
  396. if granularity == 0 {
  397. granularity = 1
  398. }
  399. day := analyser.day
  400. day++
  401. adjust := 0
  402. if day%granularity != 0 {
  403. adjust = 1
  404. }
  405. global := make([]int64, day/granularity+adjust)
  406. var group int64
  407. for i := 0; i < day; i++ {
  408. group += analyser.globalStatus[i]
  409. if (i % granularity) == (granularity - 1) {
  410. global[i/granularity] = group
  411. group = 0
  412. }
  413. }
  414. if day%granularity != 0 {
  415. global[len(global)-1] = group
  416. }
  417. locals := make(map[string][]int64)
  418. for key, file := range analyser.files {
  419. status := make([]int64, day/granularity+adjust)
  420. var group int64
  421. for i := 0; i < day; i++ {
  422. group += file.Status(1).(map[int]int64)[i]
  423. if (i % granularity) == (granularity - 1) {
  424. status[i/granularity] = group
  425. group = 0
  426. }
  427. }
  428. if day%granularity != 0 {
  429. status[len(status)-1] = group
  430. }
  431. locals[key] = status
  432. }
  433. peoples := make([][]int64, len(analyser.people))
  434. for key, person := range analyser.people {
  435. status := make([]int64, day/granularity+adjust)
  436. var group int64
  437. for i := 0; i < day; i++ {
  438. group += person[i]
  439. if (i % granularity) == (granularity - 1) {
  440. status[i/granularity] = group
  441. group = 0
  442. }
  443. }
  444. if day%granularity != 0 {
  445. status[len(status)-1] = group
  446. }
  447. peoples[key] = status
  448. }
  449. return global, locals, peoples
  450. }
  451. func (analyser *BurndownAnalysis) updateHistories(
  452. globalStatus []int64, file_statuses map[string][]int64, people_statuses [][]int64, delta int) {
  453. for i := 0; i < delta; i++ {
  454. analyser.globalHistory = append(analyser.globalHistory, globalStatus)
  455. }
  456. to_delete := make([]string, 0)
  457. for key, fh := range analyser.fileHistories {
  458. ls, exists := file_statuses[key]
  459. if !exists {
  460. to_delete = append(to_delete, key)
  461. } else {
  462. for i := 0; i < delta; i++ {
  463. fh = append(fh, ls)
  464. }
  465. analyser.fileHistories[key] = fh
  466. }
  467. }
  468. for _, key := range to_delete {
  469. delete(analyser.fileHistories, key)
  470. }
  471. for key, ls := range file_statuses {
  472. fh, exists := analyser.fileHistories[key]
  473. if exists {
  474. continue
  475. }
  476. for i := 0; i < delta; i++ {
  477. fh = append(fh, ls)
  478. }
  479. analyser.fileHistories[key] = fh
  480. }
  481. for key, ph := range analyser.peopleHistories {
  482. ls := people_statuses[key]
  483. for i := 0; i < delta; i++ {
  484. ph = append(ph, ls)
  485. }
  486. analyser.peopleHistories[key] = ph
  487. }
  488. }