analyser.go 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. package hercules
  2. import (
  3. "bufio"
  4. "bytes"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "time"
  9. "unicode/utf8"
  10. "github.com/sergi/go-diff/diffmatchpatch"
  11. "gopkg.in/src-d/go-git.v4"
  12. )
  13. type Analyser struct {
  14. Repository *git.Repository
  15. OnProgress func(int)
  16. }
  17. func checkClose(c io.Closer) {
  18. if err := c.Close(); err != nil {
  19. panic(err)
  20. }
  21. }
  22. func loc(file *git.Blob) (int, error) {
  23. reader, err := file.Reader()
  24. if err != nil {
  25. panic(err)
  26. }
  27. defer checkClose(reader)
  28. scanner := bufio.NewScanner(reader)
  29. counter := 0
  30. for scanner.Scan() {
  31. if !utf8.Valid(scanner.Bytes()) {
  32. return -1, errors.New("binary")
  33. }
  34. counter++
  35. }
  36. return counter, nil
  37. }
  38. func str(file *git.Blob) (string, error) {
  39. reader, err := file.Reader()
  40. if err != nil {
  41. panic(err)
  42. }
  43. defer checkClose(reader)
  44. buf := new(bytes.Buffer)
  45. buf.ReadFrom(reader)
  46. if !utf8.Valid(buf.Bytes()) {
  47. return "", errors.New("binary")
  48. }
  49. return buf.String(), nil
  50. }
  51. func (analyser *Analyser) handleInsertion(
  52. change *git.Change, day int, status map[int]int64, files map[string]*File) {
  53. blob, err := analyser.Repository.Blob(change.To.TreeEntry.Hash)
  54. if err != nil {
  55. panic(err)
  56. }
  57. lines, err := loc(blob)
  58. if err != nil {
  59. return
  60. }
  61. name := change.To.Name
  62. file, exists := files[name]
  63. if exists {
  64. panic(fmt.Sprintf("file %s already exists", name))
  65. }
  66. file = NewFile(day, lines, status)
  67. files[name] = file
  68. }
  69. func (analyser *Analyser) handleDeletion(
  70. change *git.Change, day int, status map[int]int64, files map[string]*File) {
  71. blob, err := analyser.Repository.Blob(change.From.TreeEntry.Hash)
  72. if err != nil {
  73. panic(err)
  74. }
  75. lines, err := loc(blob)
  76. if err != nil {
  77. return
  78. }
  79. name := change.From.Name
  80. file := files[name]
  81. file.Update(day, 0, 0, lines)
  82. delete(files, name)
  83. }
  84. func (analyser *Analyser) handleModification(
  85. change *git.Change, day int, status map[int]int64, files map[string]*File) {
  86. blob_from, err := analyser.Repository.Blob(change.From.TreeEntry.Hash)
  87. if err != nil {
  88. panic(err)
  89. }
  90. blob_to, err := analyser.Repository.Blob(change.To.TreeEntry.Hash)
  91. if err != nil {
  92. panic(err)
  93. }
  94. str_from, err := str(blob_from)
  95. if err != nil {
  96. return
  97. }
  98. str_to, _ := str(blob_to)
  99. file, exists := files[change.From.Name]
  100. if !exists {
  101. panic(fmt.Sprintf("file %s does not exist", change.From.Name))
  102. }
  103. // possible rename
  104. if change.To.Name != change.From.Name {
  105. analyser.handleRename(change.From.Name, change.To.Name, files)
  106. }
  107. dmp := diffmatchpatch.New()
  108. src, dst, _ := dmp.DiffLinesToRunes(str_from, str_to)
  109. diffs := dmp.DiffMainRunes(src, dst, false)
  110. // we do not call RunesToDiffLines so the number of lines equals
  111. // to the rune count
  112. position := 0
  113. for _, edit := range diffs {
  114. length := utf8.RuneCountInString(edit.Text)
  115. switch edit.Type {
  116. case diffmatchpatch.DiffEqual:
  117. position += length
  118. case diffmatchpatch.DiffInsert:
  119. file.Update(day, position, length, 0)
  120. position += length
  121. case diffmatchpatch.DiffDelete:
  122. file.Update(day, position, 0, length)
  123. break
  124. default:
  125. panic(fmt.Sprintf("diff operation is not supported: %d", edit.Type))
  126. }
  127. }
  128. }
  129. func (analyser *Analyser) handleRename(from, to string, files map[string]*File) {
  130. file, exists := files[from]
  131. if !exists {
  132. panic(fmt.Sprintf("file %s does not exist", from))
  133. }
  134. files[to] = file
  135. delete(files, from)
  136. }
  137. func (analyser *Analyser) commits() []*git.Commit {
  138. result := []*git.Commit{}
  139. repository := analyser.Repository
  140. head, err := repository.Head()
  141. if err != nil {
  142. panic(err)
  143. }
  144. commit, err := repository.Commit(head.Hash())
  145. if err != nil {
  146. panic(err)
  147. }
  148. result = append(result, commit)
  149. for ; err != io.EOF; commit, err = commit.Parents().Next() {
  150. if err != nil {
  151. panic(err)
  152. }
  153. result = append(result, commit)
  154. }
  155. // reverse the order
  156. for i, j := 0, len(result)-1; i < j; i, j = i+1, j-1 {
  157. result[i], result[j] = result[j], result[i]
  158. }
  159. return result
  160. }
  161. func (analyser *Analyser) Analyse() ([]map[int]int64, int) {
  162. onProgress := analyser.OnProgress
  163. if onProgress == nil {
  164. onProgress = func(int) {}
  165. }
  166. // current daily alive number of lines; key is the number of days from the
  167. // beginning of the history
  168. status := map[int]int64{}
  169. // weekly snapshots of status
  170. statuses := []map[int]int64{}
  171. // mapping <file path> -> hercules.File
  172. files := map[string]*File{}
  173. // list of commits belonging to the default branch, from oldest to newest
  174. commits := analyser.commits()
  175. var day0 time.Time // will be initialized in the first iteration
  176. var prev_tree *git.Tree = nil
  177. prev_day := 0
  178. for index, commit := range commits {
  179. onProgress(index)
  180. tree, err := commit.Tree()
  181. if err != nil {
  182. panic(err)
  183. }
  184. if index == 0 {
  185. // first iteration - initialize the file objects from the tree
  186. day0 = commit.Author.When
  187. func() {
  188. file_iter := tree.Files()
  189. defer file_iter.Close()
  190. for {
  191. file, err := file_iter.Next()
  192. if err != nil {
  193. if err == io.EOF {
  194. break
  195. }
  196. panic(err)
  197. }
  198. lines, err := loc(&file.Blob)
  199. if err == nil {
  200. files[file.Name] = NewFile(0, lines, status)
  201. }
  202. }
  203. }()
  204. } else {
  205. day := int(commit.Author.When.Sub(day0).Hours() / 24)
  206. delta := (day / 7) - (prev_day / 7)
  207. if delta > 0 {
  208. prev_day = day
  209. status_copy := map[int]int64{}
  210. for k, v := range status {
  211. status_copy[k] = v
  212. }
  213. for i := 0; i < delta; i++ {
  214. statuses = append(statuses, status_copy)
  215. }
  216. }
  217. tree_diff, err := git.DiffTree(prev_tree, tree)
  218. if err != nil {
  219. panic(err)
  220. }
  221. for _, change := range tree_diff {
  222. switch change.Action {
  223. case git.Insert:
  224. analyser.handleInsertion(change, day, status, files)
  225. case git.Delete:
  226. analyser.handleDeletion(change, day, status, files)
  227. case git.Modify:
  228. analyser.handleModification(change, day, status, files)
  229. default:
  230. panic(fmt.Sprintf("unsupported action: %d", change.Action))
  231. }
  232. }
  233. }
  234. prev_tree = tree
  235. }
  236. return statuses, prev_day
  237. }