main.go 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. /*
  2. Package main provides the command line tool to gather the line burndown
  3. statistics from Git repositories. Usage:
  4. hercules <URL or FS path>
  5. */
  6. package main
  7. import (
  8. "flag"
  9. "fmt"
  10. "net/http"
  11. _ "net/http/pprof"
  12. "os"
  13. "runtime/pprof"
  14. "sort"
  15. "strconv"
  16. "strings"
  17. "gopkg.in/src-d/go-billy.v3/osfs"
  18. "gopkg.in/src-d/go-git.v4"
  19. "gopkg.in/src-d/go-git.v4/plumbing/object"
  20. "gopkg.in/src-d/go-git.v4/storage"
  21. "gopkg.in/src-d/go-git.v4/storage/filesystem"
  22. "gopkg.in/src-d/go-git.v4/storage/memory"
  23. "gopkg.in/src-d/hercules.v1"
  24. )
  25. func safeString(str string) string {
  26. str = strings.Replace(str, "\"", "'", -1)
  27. str = strings.Replace(str, "\\", " ", -1)
  28. str = strings.Replace(str, ":", " ", -1)
  29. return "\"" + str + "\""
  30. }
  31. func printMatrix(matrix [][]int64, name string, fixNegative bool) {
  32. // determine the maximum length of each value
  33. var maxnum int64 = -(1 << 32)
  34. var minnum int64 = 1 << 32
  35. for _, status := range matrix {
  36. for _, val := range status {
  37. if val > maxnum {
  38. maxnum = val
  39. }
  40. if val < minnum {
  41. minnum = val
  42. }
  43. }
  44. }
  45. width := len(strconv.FormatInt(maxnum, 10))
  46. if !fixNegative && minnum < 0 {
  47. width = len(strconv.FormatInt(minnum, 10))
  48. }
  49. last := len(matrix[len(matrix)-1])
  50. indent := 2
  51. if name != "" {
  52. fmt.Printf(" %s: |-\n", safeString(name))
  53. indent += 2
  54. }
  55. // print the resulting triangular matrix
  56. for _, status := range matrix {
  57. fmt.Print(strings.Repeat(" ", indent-1))
  58. for i := 0; i < last; i++ {
  59. var val int64
  60. if i < len(status) {
  61. val = status[i]
  62. // not sure why this sometimes happens...
  63. // TODO(vmarkovtsev): find the root cause of tiny negative balances
  64. if fixNegative && val < 0 {
  65. val = 0
  66. }
  67. }
  68. fmt.Printf(" %[1]*[2]d", width, val)
  69. }
  70. fmt.Println()
  71. }
  72. }
  73. func printCouples(result *hercules.CouplesResult, peopleDict []string) {
  74. fmt.Println("files_coocc:")
  75. fmt.Println(" index:")
  76. for _, file := range result.Files {
  77. fmt.Printf(" - %s\n", safeString(file))
  78. }
  79. fmt.Println(" matrix:")
  80. for _, files := range result.FilesMatrix {
  81. fmt.Print(" - {")
  82. indices := []int{}
  83. for file := range files {
  84. indices = append(indices, file)
  85. }
  86. sort.Ints(indices)
  87. for i, file := range indices {
  88. fmt.Printf("%d: %d", file, files[file])
  89. if i < len(indices)-1 {
  90. fmt.Print(", ")
  91. }
  92. }
  93. fmt.Println("}")
  94. }
  95. fmt.Println("people_coocc:")
  96. fmt.Println(" index:")
  97. for _, person := range peopleDict {
  98. fmt.Printf(" - %s\n", safeString(person))
  99. }
  100. fmt.Println(" matrix:")
  101. for _, people := range result.PeopleMatrix {
  102. fmt.Print(" - {")
  103. indices := []int{}
  104. for file := range people {
  105. indices = append(indices, file)
  106. }
  107. sort.Ints(indices)
  108. for i, person := range indices {
  109. fmt.Printf("%d: %d", person, people[person])
  110. if i < len(indices)-1 {
  111. fmt.Print(", ")
  112. }
  113. }
  114. fmt.Println("}")
  115. }
  116. fmt.Println(" author_files:") // sorted by number of files each author changed
  117. peopleFiles := sortByNumberOfFiles(result.PeopleFiles, peopleDict)
  118. for _, authorFiles := range peopleFiles {
  119. fmt.Printf(" - %s:\n", safeString(authorFiles.Author))
  120. sort.Strings(authorFiles.Files)
  121. for _, file := range authorFiles.Files {
  122. fmt.Printf(" - %s\n", safeString(file)) // sorted by path
  123. }
  124. }
  125. }
  126. func sortByNumberOfFiles(peopleFiles [][]string, peopleDict []string) AuthorFilesList {
  127. var pfl AuthorFilesList
  128. for peopleIdx, files := range peopleFiles {
  129. pfl = append(pfl, AuthorFiles{peopleDict[peopleIdx], files})
  130. }
  131. sort.Sort(pfl)
  132. return pfl
  133. }
  134. type AuthorFiles struct {
  135. Author string
  136. Files []string
  137. }
  138. type AuthorFilesList []AuthorFiles
  139. func (s AuthorFilesList) Len() int {
  140. return len(s)
  141. }
  142. func (s AuthorFilesList) Swap(i, j int) {
  143. s[i], s[j] = s[j], s[i]
  144. }
  145. func (s AuthorFilesList) Less(i, j int) bool {
  146. return len(s[i].Files) < len(s[j].Files)
  147. }
  148. func sortedKeys(m map[string][][]int64) []string {
  149. keys := make([]string, 0, len(m))
  150. for k := range m {
  151. keys = append(keys, k)
  152. }
  153. sort.Strings(keys)
  154. return keys
  155. }
  156. func main() {
  157. var with_files bool
  158. var with_people bool
  159. var with_couples bool
  160. var people_dict_path string
  161. var profile bool
  162. var granularity, sampling, similarity_threshold int
  163. var commitsFile string
  164. var debug bool
  165. flag.BoolVar(&with_files, "files", false, "Output detailed statistics per each file.")
  166. flag.BoolVar(&with_people, "people", false, "Output detailed statistics per each developer.")
  167. flag.BoolVar(&with_couples, "couples", false, "Gather the co-occurrence matrix "+
  168. "for files and people.")
  169. flag.StringVar(&people_dict_path, "people-dict", "", "Path to the developers' email associations.")
  170. flag.BoolVar(&profile, "profile", false, "Collect the profile to hercules.pprof.")
  171. flag.IntVar(&granularity, "granularity", 30, "How many days there are in a single band.")
  172. flag.IntVar(&sampling, "sampling", 30, "How frequently to record the state in days.")
  173. flag.IntVar(&similarity_threshold, "M", 90,
  174. "A threshold on the similarity index used to detect renames.")
  175. flag.BoolVar(&debug, "debug", false, "Validate the trees on each step.")
  176. flag.StringVar(&commitsFile, "commits", "", "Path to the text file with the "+
  177. "commit history to follow instead of the default rev-list "+
  178. "--first-parent. The format is the list of hashes, each hash on a "+
  179. "separate line. The first hash is the root.")
  180. flag.Parse()
  181. if granularity <= 0 {
  182. fmt.Fprint(os.Stderr, "Warning: adjusted the granularity to 1 day\n")
  183. granularity = 1
  184. }
  185. if profile {
  186. go http.ListenAndServe("localhost:6060", nil)
  187. prof, _ := os.Create("hercules.pprof")
  188. pprof.StartCPUProfile(prof)
  189. defer pprof.StopCPUProfile()
  190. }
  191. if len(flag.Args()) == 0 || len(flag.Args()) > 3 {
  192. fmt.Fprint(os.Stderr,
  193. "Usage: hercules <path to repo or URL> [<disk cache path>]\n")
  194. os.Exit(1)
  195. }
  196. uri := flag.Arg(0)
  197. var repository *git.Repository
  198. var storage storage.Storer
  199. var err error
  200. if strings.Contains(uri, "://") {
  201. if len(flag.Args()) == 2 {
  202. storage, err = filesystem.NewStorage(osfs.New(flag.Arg(1)))
  203. if err != nil {
  204. panic(err)
  205. }
  206. } else {
  207. storage = memory.NewStorage()
  208. }
  209. fmt.Fprint(os.Stderr, "cloning...\r")
  210. repository, err = git.Clone(storage, nil, &git.CloneOptions{
  211. URL: uri,
  212. })
  213. fmt.Fprint(os.Stderr, " \r")
  214. } else {
  215. if uri[len(uri)-1] == os.PathSeparator {
  216. uri = uri[:len(uri)-1]
  217. }
  218. repository, err = git.PlainOpen(uri)
  219. }
  220. if err != nil {
  221. panic(err)
  222. }
  223. // core logic
  224. pipeline := hercules.NewPipeline(repository)
  225. pipeline.OnProgress = func(commit, length int) {
  226. fmt.Fprintf(os.Stderr, "%d / %d\r", commit, length)
  227. }
  228. // list of commits belonging to the default branch, from oldest to newest
  229. // rev-list --first-parent
  230. var commits []*object.Commit
  231. if commitsFile == "" {
  232. commits = pipeline.Commits()
  233. } else {
  234. commits = hercules.LoadCommitsFromFile(commitsFile, repository)
  235. }
  236. pipeline.AddItem(&hercules.BlobCache{})
  237. pipeline.AddItem(&hercules.DaysSinceStart{})
  238. pipeline.AddItem(&hercules.RenameAnalysis{SimilarityThreshold: similarity_threshold})
  239. pipeline.AddItem(&hercules.TreeDiff{})
  240. id_matcher := &hercules.IdentityDetector{}
  241. if with_people || with_couples {
  242. if people_dict_path != "" {
  243. id_matcher.LoadPeopleDict(people_dict_path)
  244. } else {
  245. id_matcher.GeneratePeopleDict(commits)
  246. }
  247. }
  248. pipeline.AddItem(id_matcher)
  249. burndowner := &hercules.BurndownAnalysis{
  250. Granularity: granularity,
  251. Sampling: sampling,
  252. Debug: debug,
  253. PeopleNumber: len(id_matcher.ReversePeopleDict),
  254. }
  255. pipeline.AddItem(burndowner)
  256. var coupler *hercules.Couples
  257. if with_couples {
  258. coupler = &hercules.Couples{PeopleNumber: len(id_matcher.ReversePeopleDict)}
  259. pipeline.AddItem(coupler)
  260. }
  261. pipeline.Initialize()
  262. result, err := pipeline.Run(commits)
  263. if err != nil {
  264. panic(err)
  265. }
  266. fmt.Fprint(os.Stderr, "writing... \r")
  267. burndown_results := result[burndowner].(hercules.BurndownResult)
  268. var couples_result hercules.CouplesResult
  269. if with_couples {
  270. couples_result = result[coupler].(hercules.CouplesResult)
  271. }
  272. fmt.Fprint(os.Stderr, " \r")
  273. if len(burndown_results.GlobalHistory) == 0 {
  274. return
  275. }
  276. // print the start date, granularity, sampling
  277. fmt.Println("burndown:")
  278. fmt.Println(" version: 1")
  279. fmt.Println(" begin:", commits[0].Author.When.Unix())
  280. fmt.Println(" end:", commits[len(commits)-1].Author.When.Unix())
  281. fmt.Println(" granularity:", granularity)
  282. fmt.Println(" sampling:", sampling)
  283. fmt.Println("project:")
  284. printMatrix(burndown_results.GlobalHistory, uri, true)
  285. if with_files {
  286. fmt.Println("files:")
  287. keys := sortedKeys(burndown_results.FileHistories)
  288. for _, key := range keys {
  289. printMatrix(burndown_results.FileHistories[key], key, true)
  290. }
  291. }
  292. if with_people {
  293. fmt.Println("people_sequence:")
  294. for key := range burndown_results.PeopleHistories {
  295. fmt.Println(" - " + id_matcher.ReversePeopleDict[key])
  296. }
  297. fmt.Println("people:")
  298. for key, val := range burndown_results.PeopleHistories {
  299. printMatrix(val, id_matcher.ReversePeopleDict[key], true)
  300. }
  301. fmt.Println("people_interaction: |-")
  302. printMatrix(burndown_results.PeopleMatrix, "", false)
  303. }
  304. if with_couples {
  305. printCouples(&couples_result, id_matcher.ReversePeopleDict)
  306. }
  307. }