burndown.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. package hercules
  2. import (
  3. "bufio"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "os"
  8. "unicode/utf8"
  9. "github.com/sergi/go-diff/diffmatchpatch"
  10. "gopkg.in/src-d/go-git.v4"
  11. "gopkg.in/src-d/go-git.v4/plumbing"
  12. "gopkg.in/src-d/go-git.v4/plumbing/object"
  13. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  14. )
  15. // BurndownAnalyser allows to gather the line burndown statistics for a Git repository.
  16. type BurndownAnalysis struct {
  17. // Granularity sets the size of each band - the number of days it spans.
  18. // Smaller values provide better resolution but require more work and eat more
  19. // memory. 30 days is usually enough.
  20. Granularity int
  21. // Sampling sets how detailed is the statistic - the size of the interval in
  22. // days between consecutive measurements. It is usually a good idea to set it
  23. // <= Granularity. Try 15 or 30.
  24. Sampling int
  25. // TrackFiles enables or disables the fine-grained per-file burndown analysis.
  26. // It does not change the top level burndown results.
  27. TrackFiles bool
  28. // The number of developers for which to collect the burndown stats. 0 disables it.
  29. PeopleNumber int
  30. // Debug activates the debugging mode. Analyse() runs slower in this mode
  31. // but it accurately checks all the intermediate states for invariant
  32. // violations.
  33. Debug bool
  34. // Repository points to the analysed Git repository struct from go-git.
  35. repository *git.Repository
  36. // globalStatus is the current daily alive number of lines; key is the number
  37. // of days from the beginning of the history.
  38. globalStatus map[int]int64
  39. // globalHistory is the weekly snapshots of globalStatus.
  40. globalHistory [][]int64
  41. // fileHistories is the weekly snapshots of each file's status.
  42. fileHistories map[string][][]int64
  43. // peopleHistories is the weekly snapshots of each person's status.
  44. peopleHistories [][][]int64
  45. // files is the mapping <file path> -> *File.
  46. files map[string]*File
  47. // matrix is the mutual deletions and self insertions.
  48. matrix []map[int]int64
  49. // people is the people's individual time stats.
  50. people []map[int]int64
  51. // day is the most recent day index processed.
  52. day int
  53. // previousDay is the day from the previous sample period -
  54. // different from DaysSinceStart.previousDay.
  55. previousDay int
  56. }
  57. type BurndownResult struct {
  58. GlobalHistory [][]int64
  59. FileHistories map[string][][]int64
  60. PeopleHistories [][][]int64
  61. PeopleMatrix [][]int64
  62. }
  63. func (analyser *BurndownAnalysis) Name() string {
  64. return "Burndown"
  65. }
  66. func (analyser *BurndownAnalysis) Provides() []string {
  67. return []string{}
  68. }
  69. func (analyser *BurndownAnalysis) Requires() []string {
  70. arr := [...]string{"file_diff", "renamed_changes", "blob_cache", "day", "author"}
  71. return arr[:]
  72. }
  73. func (analyser *BurndownAnalysis) Initialize(repository *git.Repository) {
  74. analyser.repository = repository
  75. analyser.globalStatus = map[int]int64{}
  76. analyser.globalHistory = [][]int64{}
  77. analyser.fileHistories = map[string][][]int64{}
  78. analyser.peopleHistories = make([][][]int64, analyser.PeopleNumber)
  79. analyser.files = map[string]*File{}
  80. analyser.matrix = make([]map[int]int64, analyser.PeopleNumber)
  81. analyser.people = make([]map[int]int64, analyser.PeopleNumber)
  82. analyser.day = 0
  83. analyser.previousDay = 0
  84. }
  85. func (analyser *BurndownAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  86. sampling := analyser.Sampling
  87. if sampling == 0 {
  88. sampling = 1
  89. }
  90. author := deps["author"].(int)
  91. analyser.day = deps["day"].(int)
  92. delta := (analyser.day / sampling) - (analyser.previousDay / sampling)
  93. if delta > 0 {
  94. analyser.previousDay = analyser.day
  95. gs, fss, pss := analyser.groupStatus()
  96. analyser.updateHistories(gs, fss, pss, delta)
  97. }
  98. cache := deps["blob_cache"].(map[plumbing.Hash]*object.Blob)
  99. treeDiffs := deps["renamed_changes"].(object.Changes)
  100. fileDiffs := deps["file_diff"].(map[string]FileDiffData)
  101. for _, change := range treeDiffs {
  102. action, err := change.Action()
  103. if err != nil {
  104. return nil, err
  105. }
  106. switch action {
  107. case merkletrie.Insert:
  108. err = analyser.handleInsertion(change, author, cache)
  109. case merkletrie.Delete:
  110. err = analyser.handleDeletion(change, author, cache)
  111. case merkletrie.Modify:
  112. err = analyser.handleModification(change, author, cache, fileDiffs)
  113. }
  114. if err != nil {
  115. return nil, err
  116. }
  117. }
  118. return nil, nil
  119. }
  120. // Finalize() returns the list of snapshots of the cumulative line edit times
  121. // and the similar lists for every file which is alive in HEAD.
  122. // The number of snapshots (the first dimension >[]<[]int64) depends on
  123. // Analyser.Sampling (the more Sampling, the less the value); the length of
  124. // each snapshot depends on Analyser.Granularity (the more Granularity,
  125. // the less the value).
  126. func (analyser *BurndownAnalysis) Finalize() interface{} {
  127. gs, fss, pss := analyser.groupStatus()
  128. analyser.updateHistories(gs, fss, pss, 1)
  129. for key, statuses := range analyser.fileHistories {
  130. if len(statuses) == len(analyser.globalHistory) {
  131. continue
  132. }
  133. padding := make([][]int64, len(analyser.globalHistory)-len(statuses))
  134. for i := range padding {
  135. padding[i] = make([]int64, len(analyser.globalStatus))
  136. }
  137. analyser.fileHistories[key] = append(padding, statuses...)
  138. }
  139. peopleMatrix := make([][]int64, analyser.PeopleNumber)
  140. for i, row := range analyser.matrix {
  141. mrow := make([]int64, analyser.PeopleNumber+2)
  142. peopleMatrix[i] = mrow
  143. for key, val := range row {
  144. if key == MISSING_AUTHOR {
  145. key = -1
  146. } else if key == SELF_AUTHOR {
  147. key = -2
  148. }
  149. mrow[key+2] = val
  150. }
  151. }
  152. return BurndownResult{
  153. GlobalHistory: analyser.globalHistory,
  154. FileHistories: analyser.fileHistories,
  155. PeopleHistories: analyser.peopleHistories,
  156. PeopleMatrix: peopleMatrix}
  157. }
  158. func checkClose(c io.Closer) {
  159. if err := c.Close(); err != nil {
  160. panic(err)
  161. }
  162. }
  163. func countLines(file *object.Blob) (int, error) {
  164. reader, err := file.Reader()
  165. if err != nil {
  166. return 0, err
  167. }
  168. defer checkClose(reader)
  169. var scanner *bufio.Scanner
  170. buffer := make([]byte, bufio.MaxScanTokenSize)
  171. counter := 0
  172. for scanner == nil || scanner.Err() == bufio.ErrTooLong {
  173. if scanner != nil && !utf8.Valid(scanner.Bytes()) {
  174. return -1, errors.New("binary")
  175. }
  176. scanner = bufio.NewScanner(reader)
  177. scanner.Buffer(buffer, 0)
  178. for scanner.Scan() {
  179. if !utf8.Valid(scanner.Bytes()) {
  180. return -1, errors.New("binary")
  181. }
  182. counter++
  183. }
  184. }
  185. return counter, nil
  186. }
  187. func (analyser *BurndownAnalysis) packPersonWithDay(person int, day int) int {
  188. if analyser.PeopleNumber == 0 {
  189. return day
  190. }
  191. result := day
  192. result |= person << 14
  193. // This effectively means max 16384 days (>44 years) and (131072 - 2) devs
  194. return result
  195. }
  196. func (analyser *BurndownAnalysis) unpackPersonWithDay(value int) (int, int) {
  197. if analyser.PeopleNumber == 0 {
  198. return MISSING_AUTHOR, value
  199. }
  200. return value >> 14, value & 0x3FFF
  201. }
  202. func (analyser *BurndownAnalysis) updateStatus(
  203. status interface{}, _ int, previous_time_ int, delta int) {
  204. _, previous_time := analyser.unpackPersonWithDay(previous_time_)
  205. status.(map[int]int64)[previous_time] += int64(delta)
  206. }
  207. func (analyser *BurndownAnalysis) updatePeople(people interface{}, _ int, previous_time_ int, delta int) {
  208. old_author, previous_time := analyser.unpackPersonWithDay(previous_time_)
  209. if old_author == MISSING_AUTHOR {
  210. return
  211. }
  212. casted := people.([]map[int]int64)
  213. stats := casted[old_author]
  214. if stats == nil {
  215. stats = map[int]int64{}
  216. casted[old_author] = stats
  217. }
  218. stats[previous_time] += int64(delta)
  219. }
  220. func (analyser *BurndownAnalysis) updateMatrix(
  221. matrix_ interface{}, current_time int, previous_time int, delta int) {
  222. matrix := matrix_.([]map[int]int64)
  223. new_author, _ := analyser.unpackPersonWithDay(current_time)
  224. old_author, _ := analyser.unpackPersonWithDay(previous_time)
  225. if old_author == MISSING_AUTHOR {
  226. return
  227. }
  228. if new_author == old_author && delta > 0 {
  229. new_author = SELF_AUTHOR
  230. }
  231. row := matrix[old_author]
  232. if row == nil {
  233. row = map[int]int64{}
  234. matrix[old_author] = row
  235. }
  236. cell, exists := row[new_author]
  237. if !exists {
  238. row[new_author] = 0
  239. cell = 0
  240. }
  241. row[new_author] = cell + int64(delta)
  242. }
  243. func (analyser *BurndownAnalysis) newFile(
  244. author int, day int, size int, global map[int]int64, people []map[int]int64,
  245. matrix []map[int]int64) *File {
  246. statuses := make([]Status, 1)
  247. statuses[0] = NewStatus(global, analyser.updateStatus)
  248. if analyser.TrackFiles {
  249. statuses = append(statuses, NewStatus(map[int]int64{}, analyser.updateStatus))
  250. }
  251. if analyser.PeopleNumber > 0 {
  252. statuses = append(statuses, NewStatus(people, analyser.updatePeople))
  253. statuses = append(statuses, NewStatus(matrix, analyser.updateMatrix))
  254. day = analyser.packPersonWithDay(author, day)
  255. }
  256. return NewFile(day, size, statuses...)
  257. }
  258. func (analyser *BurndownAnalysis) handleInsertion(
  259. change *object.Change, author int, cache map[plumbing.Hash]*object.Blob) error {
  260. blob := cache[change.To.TreeEntry.Hash]
  261. lines, err := countLines(blob)
  262. if err != nil {
  263. if err.Error() == "binary" {
  264. return nil
  265. }
  266. return err
  267. }
  268. name := change.To.Name
  269. file, exists := analyser.files[name]
  270. if exists {
  271. return errors.New(fmt.Sprintf("file %s already exists", name))
  272. }
  273. file = analyser.newFile(
  274. author, analyser.day, lines, analyser.globalStatus, analyser.people, analyser.matrix)
  275. analyser.files[name] = file
  276. return nil
  277. }
  278. func (analyser *BurndownAnalysis) handleDeletion(
  279. change *object.Change, author int, cache map[plumbing.Hash]*object.Blob) error {
  280. blob := cache[change.From.TreeEntry.Hash]
  281. lines, err := countLines(blob)
  282. if err != nil {
  283. if err.Error() == "binary" {
  284. return nil
  285. }
  286. return err
  287. }
  288. name := change.From.Name
  289. file := analyser.files[name]
  290. file.Update(analyser.packPersonWithDay(author, analyser.day), 0, 0, lines)
  291. delete(analyser.files, name)
  292. return nil
  293. }
  294. func (analyser *BurndownAnalysis) handleModification(
  295. change *object.Change, author int, cache map[plumbing.Hash]*object.Blob,
  296. diffs map[string]FileDiffData) error {
  297. file, exists := analyser.files[change.From.Name]
  298. if !exists {
  299. return analyser.handleInsertion(change, author, cache)
  300. }
  301. // possible rename
  302. if change.To.Name != change.From.Name {
  303. err := analyser.handleRename(change.From.Name, change.To.Name)
  304. if err != nil {
  305. return err
  306. }
  307. }
  308. thisDiffs := diffs[change.To.Name]
  309. if file.Len() != thisDiffs.OldLinesOfCode {
  310. fmt.Fprintf(os.Stderr, "====TREE====\n%s", file.Dump())
  311. return errors.New(fmt.Sprintf("%s: internal integrity error src %d != %d %s -> %s",
  312. change.To.Name, thisDiffs.OldLinesOfCode, file.Len(),
  313. change.From.TreeEntry.Hash.String(), change.To.TreeEntry.Hash.String()))
  314. }
  315. // we do not call RunesToDiffLines so the number of lines equals
  316. // to the rune count
  317. position := 0
  318. pending := diffmatchpatch.Diff{Text: ""}
  319. apply := func(edit diffmatchpatch.Diff) {
  320. length := utf8.RuneCountInString(edit.Text)
  321. if edit.Type == diffmatchpatch.DiffInsert {
  322. file.Update(analyser.packPersonWithDay(author, analyser.day), position, length, 0)
  323. position += length
  324. } else {
  325. file.Update(analyser.packPersonWithDay(author, analyser.day), position, 0, length)
  326. }
  327. if analyser.Debug {
  328. file.Validate()
  329. }
  330. }
  331. for _, edit := range thisDiffs.Diffs {
  332. dump_before := ""
  333. if analyser.Debug {
  334. dump_before = file.Dump()
  335. }
  336. length := utf8.RuneCountInString(edit.Text)
  337. debug_error := func() {
  338. fmt.Fprintf(os.Stderr, "%s: internal diff error\n", change.To.Name)
  339. fmt.Fprintf(os.Stderr, "Update(%d, %d, %d (0), %d (0))\n", analyser.day, position,
  340. length, utf8.RuneCountInString(pending.Text))
  341. if dump_before != "" {
  342. fmt.Fprintf(os.Stderr, "====TREE BEFORE====\n%s====END====\n", dump_before)
  343. }
  344. fmt.Fprintf(os.Stderr, "====TREE AFTER====\n%s====END====\n", file.Dump())
  345. }
  346. switch edit.Type {
  347. case diffmatchpatch.DiffEqual:
  348. if pending.Text != "" {
  349. apply(pending)
  350. pending.Text = ""
  351. }
  352. position += length
  353. case diffmatchpatch.DiffInsert:
  354. if pending.Text != "" {
  355. if pending.Type == diffmatchpatch.DiffInsert {
  356. debug_error()
  357. return errors.New("DiffInsert may not appear after DiffInsert")
  358. }
  359. file.Update(analyser.packPersonWithDay(author, analyser.day), position, length,
  360. utf8.RuneCountInString(pending.Text))
  361. if analyser.Debug {
  362. file.Validate()
  363. }
  364. position += length
  365. pending.Text = ""
  366. } else {
  367. pending = edit
  368. }
  369. case diffmatchpatch.DiffDelete:
  370. if pending.Text != "" {
  371. debug_error()
  372. return errors.New("DiffDelete may not appear after DiffInsert/DiffDelete")
  373. }
  374. pending = edit
  375. default:
  376. debug_error()
  377. return errors.New(fmt.Sprintf("diff operation is not supported: %d", edit.Type))
  378. }
  379. }
  380. if pending.Text != "" {
  381. apply(pending)
  382. pending.Text = ""
  383. }
  384. if file.Len() != thisDiffs.NewLinesOfCode {
  385. return errors.New(fmt.Sprintf("%s: internal integrity error dst %d != %d",
  386. change.To.Name, thisDiffs.NewLinesOfCode, file.Len()))
  387. }
  388. return nil
  389. }
  390. func (analyser *BurndownAnalysis) handleRename(from, to string) error {
  391. file, exists := analyser.files[from]
  392. if !exists {
  393. return errors.New(fmt.Sprintf("file %s does not exist", from))
  394. }
  395. analyser.files[to] = file
  396. delete(analyser.files, from)
  397. return nil
  398. }
  399. func (analyser *BurndownAnalysis) groupStatus() ([]int64, map[string][]int64, [][]int64) {
  400. granularity := analyser.Granularity
  401. if granularity == 0 {
  402. granularity = 1
  403. }
  404. day := analyser.day
  405. day++
  406. adjust := 0
  407. if day%granularity != 0 {
  408. adjust = 1
  409. }
  410. global := make([]int64, day/granularity+adjust)
  411. var group int64
  412. for i := 0; i < day; i++ {
  413. group += analyser.globalStatus[i]
  414. if (i % granularity) == (granularity - 1) {
  415. global[i/granularity] = group
  416. group = 0
  417. }
  418. }
  419. if day%granularity != 0 {
  420. global[len(global)-1] = group
  421. }
  422. locals := make(map[string][]int64)
  423. if analyser.TrackFiles {
  424. for key, file := range analyser.files {
  425. status := make([]int64, day/granularity+adjust)
  426. var group int64
  427. for i := 0; i < day; i++ {
  428. group += file.Status(1).(map[int]int64)[i]
  429. if (i % granularity) == (granularity - 1) {
  430. status[i/granularity] = group
  431. group = 0
  432. }
  433. }
  434. if day%granularity != 0 {
  435. status[len(status)-1] = group
  436. }
  437. locals[key] = status
  438. }
  439. }
  440. peoples := make([][]int64, len(analyser.people))
  441. for key, person := range analyser.people {
  442. status := make([]int64, day/granularity+adjust)
  443. var group int64
  444. for i := 0; i < day; i++ {
  445. group += person[i]
  446. if (i % granularity) == (granularity - 1) {
  447. status[i/granularity] = group
  448. group = 0
  449. }
  450. }
  451. if day%granularity != 0 {
  452. status[len(status)-1] = group
  453. }
  454. peoples[key] = status
  455. }
  456. return global, locals, peoples
  457. }
  458. func (analyser *BurndownAnalysis) updateHistories(
  459. globalStatus []int64, file_statuses map[string][]int64, people_statuses [][]int64, delta int) {
  460. for i := 0; i < delta; i++ {
  461. analyser.globalHistory = append(analyser.globalHistory, globalStatus)
  462. }
  463. to_delete := make([]string, 0)
  464. for key, fh := range analyser.fileHistories {
  465. ls, exists := file_statuses[key]
  466. if !exists {
  467. to_delete = append(to_delete, key)
  468. } else {
  469. for i := 0; i < delta; i++ {
  470. fh = append(fh, ls)
  471. }
  472. analyser.fileHistories[key] = fh
  473. }
  474. }
  475. for _, key := range to_delete {
  476. delete(analyser.fileHistories, key)
  477. }
  478. for key, ls := range file_statuses {
  479. fh, exists := analyser.fileHistories[key]
  480. if exists {
  481. continue
  482. }
  483. for i := 0; i < delta; i++ {
  484. fh = append(fh, ls)
  485. }
  486. analyser.fileHistories[key] = fh
  487. }
  488. for key, ph := range analyser.peopleHistories {
  489. ls := people_statuses[key]
  490. for i := 0; i < delta; i++ {
  491. ph = append(ph, ls)
  492. }
  493. analyser.peopleHistories[key] = ph
  494. }
  495. }