main.go 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. /*
  2. Package main provides the command line tool to gather the line burndown
  3. statistics from Git repositories. Usage:
  4. hercules <URL or FS path>
  5. */
  6. package main
  7. import (
  8. "flag"
  9. "fmt"
  10. "net/http"
  11. _ "net/http/pprof"
  12. "os"
  13. "runtime/pprof"
  14. "sort"
  15. "strconv"
  16. "strings"
  17. "gopkg.in/src-d/go-billy.v3/osfs"
  18. "gopkg.in/src-d/go-git.v4"
  19. "gopkg.in/src-d/go-git.v4/plumbing/object"
  20. "gopkg.in/src-d/go-git.v4/storage"
  21. "gopkg.in/src-d/go-git.v4/storage/filesystem"
  22. "gopkg.in/src-d/go-git.v4/storage/memory"
  23. "gopkg.in/src-d/hercules.v2"
  24. )
  25. func safeString(str string) string {
  26. str = strings.Replace(str, "\"", "'", -1)
  27. str = strings.Replace(str, "\\", " ", -1)
  28. str = strings.Replace(str, ":", " ", -1)
  29. return "\"" + str + "\""
  30. }
  31. func printMatrix(matrix [][]int64, name string, fixNegative bool) {
  32. // determine the maximum length of each value
  33. var maxnum int64 = -(1 << 32)
  34. var minnum int64 = 1 << 32
  35. for _, status := range matrix {
  36. for _, val := range status {
  37. if val > maxnum {
  38. maxnum = val
  39. }
  40. if val < minnum {
  41. minnum = val
  42. }
  43. }
  44. }
  45. width := len(strconv.FormatInt(maxnum, 10))
  46. if !fixNegative && minnum < 0 {
  47. width = len(strconv.FormatInt(minnum, 10))
  48. }
  49. last := len(matrix[len(matrix)-1])
  50. indent := 2
  51. if name != "" {
  52. fmt.Printf(" %s: |-\n", safeString(name))
  53. indent += 2
  54. }
  55. // print the resulting triangular matrix
  56. for _, status := range matrix {
  57. fmt.Print(strings.Repeat(" ", indent-1))
  58. for i := 0; i < last; i++ {
  59. var val int64
  60. if i < len(status) {
  61. val = status[i]
  62. // not sure why this sometimes happens...
  63. // TODO(vmarkovtsev): find the root cause of tiny negative balances
  64. if fixNegative && val < 0 {
  65. val = 0
  66. }
  67. }
  68. fmt.Printf(" %[1]*[2]d", width, val)
  69. }
  70. fmt.Println()
  71. }
  72. }
  73. func printCouples(result *hercules.CouplesResult, peopleDict []string) {
  74. fmt.Println("files_coocc:")
  75. fmt.Println(" index:")
  76. for _, file := range result.Files {
  77. fmt.Printf(" - %s\n", safeString(file))
  78. }
  79. fmt.Println(" matrix:")
  80. for _, files := range result.FilesMatrix {
  81. fmt.Print(" - {")
  82. indices := []int{}
  83. for file := range files {
  84. indices = append(indices, file)
  85. }
  86. sort.Ints(indices)
  87. for i, file := range indices {
  88. fmt.Printf("%d: %d", file, files[file])
  89. if i < len(indices)-1 {
  90. fmt.Print(", ")
  91. }
  92. }
  93. fmt.Println("}")
  94. }
  95. fmt.Println("people_coocc:")
  96. fmt.Println(" index:")
  97. for _, person := range peopleDict {
  98. fmt.Printf(" - %s\n", safeString(person))
  99. }
  100. fmt.Println(" matrix:")
  101. for _, people := range result.PeopleMatrix {
  102. fmt.Print(" - {")
  103. indices := []int{}
  104. for file := range people {
  105. indices = append(indices, file)
  106. }
  107. sort.Ints(indices)
  108. for i, person := range indices {
  109. fmt.Printf("%d: %d", person, people[person])
  110. if i < len(indices)-1 {
  111. fmt.Print(", ")
  112. }
  113. }
  114. fmt.Println("}")
  115. }
  116. fmt.Println(" author_files:") // sorted by number of files each author changed
  117. peopleFiles := sortByNumberOfFiles(result.PeopleFiles, peopleDict)
  118. for _, authorFiles := range peopleFiles {
  119. fmt.Printf(" - %s:\n", safeString(authorFiles.Author))
  120. sort.Strings(authorFiles.Files)
  121. for _, file := range authorFiles.Files {
  122. fmt.Printf(" - %s\n", safeString(file)) // sorted by path
  123. }
  124. }
  125. }
  126. func sortByNumberOfFiles(peopleFiles [][]string, peopleDict []string) AuthorFilesList {
  127. var pfl AuthorFilesList
  128. for peopleIdx, files := range peopleFiles {
  129. if peopleIdx < len(peopleDict) {
  130. pfl = append(pfl, AuthorFiles{peopleDict[peopleIdx], files})
  131. }
  132. }
  133. sort.Sort(pfl)
  134. return pfl
  135. }
  136. type AuthorFiles struct {
  137. Author string
  138. Files []string
  139. }
  140. type AuthorFilesList []AuthorFiles
  141. func (s AuthorFilesList) Len() int {
  142. return len(s)
  143. }
  144. func (s AuthorFilesList) Swap(i, j int) {
  145. s[i], s[j] = s[j], s[i]
  146. }
  147. func (s AuthorFilesList) Less(i, j int) bool {
  148. return len(s[i].Files) < len(s[j].Files)
  149. }
  150. func sortedKeys(m map[string][][]int64) []string {
  151. keys := make([]string, 0, len(m))
  152. for k := range m {
  153. keys = append(keys, k)
  154. }
  155. sort.Strings(keys)
  156. return keys
  157. }
  158. func main() {
  159. var with_files bool
  160. var with_people bool
  161. var with_couples bool
  162. var people_dict_path string
  163. var profile bool
  164. var granularity, sampling, similarity_threshold int
  165. var commitsFile string
  166. var debug bool
  167. flag.BoolVar(&with_files, "files", false, "Output detailed statistics per each file.")
  168. flag.BoolVar(&with_people, "people", false, "Output detailed statistics per each developer.")
  169. flag.BoolVar(&with_couples, "couples", false, "Gather the co-occurrence matrix "+
  170. "for files and people.")
  171. flag.StringVar(&people_dict_path, "people-dict", "", "Path to the developers' email associations.")
  172. flag.BoolVar(&profile, "profile", false, "Collect the profile to hercules.pprof.")
  173. flag.IntVar(&granularity, "granularity", 30, "How many days there are in a single band.")
  174. flag.IntVar(&sampling, "sampling", 30, "How frequently to record the state in days.")
  175. flag.IntVar(&similarity_threshold, "M", 90,
  176. "A threshold on the similarity index used to detect renames.")
  177. flag.BoolVar(&debug, "debug", false, "Validate the trees on each step.")
  178. flag.StringVar(&commitsFile, "commits", "", "Path to the text file with the "+
  179. "commit history to follow instead of the default rev-list "+
  180. "--first-parent. The format is the list of hashes, each hash on a "+
  181. "separate line. The first hash is the root.")
  182. flag.Parse()
  183. if granularity <= 0 {
  184. fmt.Fprint(os.Stderr, "Warning: adjusted the granularity to 1 day\n")
  185. granularity = 1
  186. }
  187. if profile {
  188. go http.ListenAndServe("localhost:6060", nil)
  189. prof, _ := os.Create("hercules.pprof")
  190. pprof.StartCPUProfile(prof)
  191. defer pprof.StopCPUProfile()
  192. }
  193. if len(flag.Args()) == 0 || len(flag.Args()) > 3 {
  194. fmt.Fprint(os.Stderr,
  195. "Usage: hercules <path to repo or URL> [<disk cache path>]\n")
  196. os.Exit(1)
  197. }
  198. uri := flag.Arg(0)
  199. var repository *git.Repository
  200. var storage storage.Storer
  201. var err error
  202. if strings.Contains(uri, "://") {
  203. if len(flag.Args()) == 2 {
  204. storage, err = filesystem.NewStorage(osfs.New(flag.Arg(1)))
  205. if err != nil {
  206. panic(err)
  207. }
  208. } else {
  209. storage = memory.NewStorage()
  210. }
  211. fmt.Fprint(os.Stderr, "cloning...\r")
  212. repository, err = git.Clone(storage, nil, &git.CloneOptions{
  213. URL: uri,
  214. })
  215. fmt.Fprint(os.Stderr, " \r")
  216. } else {
  217. if uri[len(uri)-1] == os.PathSeparator {
  218. uri = uri[:len(uri)-1]
  219. }
  220. repository, err = git.PlainOpen(uri)
  221. }
  222. if err != nil {
  223. panic(err)
  224. }
  225. // core logic
  226. pipeline := hercules.NewPipeline(repository)
  227. pipeline.OnProgress = func(commit, length int) {
  228. if commit < length {
  229. fmt.Fprintf(os.Stderr, "%d / %d\r", commit, length)
  230. } else {
  231. fmt.Fprint(os.Stderr, "finalizing... \r")
  232. }
  233. }
  234. // list of commits belonging to the default branch, from oldest to newest
  235. // rev-list --first-parent
  236. var commits []*object.Commit
  237. if commitsFile == "" {
  238. commits = pipeline.Commits()
  239. } else {
  240. commits = hercules.LoadCommitsFromFile(commitsFile, repository)
  241. }
  242. pipeline.AddItem(&hercules.BlobCache{})
  243. pipeline.AddItem(&hercules.DaysSinceStart{})
  244. pipeline.AddItem(&hercules.RenameAnalysis{SimilarityThreshold: similarity_threshold})
  245. pipeline.AddItem(&hercules.TreeDiff{})
  246. id_matcher := &hercules.IdentityDetector{}
  247. var peopleCount int
  248. if with_people || with_couples {
  249. if people_dict_path != "" {
  250. id_matcher.LoadPeopleDict(people_dict_path)
  251. peopleCount = len(id_matcher.ReversePeopleDict) - 1
  252. } else {
  253. id_matcher.GeneratePeopleDict(commits)
  254. peopleCount = len(id_matcher.ReversePeopleDict)
  255. }
  256. }
  257. pipeline.AddItem(id_matcher)
  258. burndowner := &hercules.BurndownAnalysis{
  259. Granularity: granularity,
  260. Sampling: sampling,
  261. Debug: debug,
  262. PeopleNumber: peopleCount,
  263. }
  264. pipeline.AddItem(burndowner)
  265. var coupler *hercules.Couples
  266. if with_couples {
  267. coupler = &hercules.Couples{PeopleNumber: peopleCount}
  268. pipeline.AddItem(coupler)
  269. }
  270. pipeline.Initialize()
  271. result, err := pipeline.Run(commits)
  272. if err != nil {
  273. panic(err)
  274. }
  275. fmt.Fprint(os.Stderr, "writing... \r")
  276. burndown_results := result[burndowner].(hercules.BurndownResult)
  277. var couples_result hercules.CouplesResult
  278. if with_couples {
  279. couples_result = result[coupler].(hercules.CouplesResult)
  280. }
  281. if len(burndown_results.GlobalHistory) == 0 {
  282. return
  283. }
  284. // print the start date, granularity, sampling
  285. fmt.Println("burndown:")
  286. fmt.Println(" version: 1")
  287. fmt.Println(" begin:", commits[0].Author.When.Unix())
  288. fmt.Println(" end:", commits[len(commits)-1].Author.When.Unix())
  289. fmt.Println(" granularity:", granularity)
  290. fmt.Println(" sampling:", sampling)
  291. fmt.Println("project:")
  292. printMatrix(burndown_results.GlobalHistory, uri, true)
  293. if with_files {
  294. fmt.Println("files:")
  295. keys := sortedKeys(burndown_results.FileHistories)
  296. for _, key := range keys {
  297. printMatrix(burndown_results.FileHistories[key], key, true)
  298. }
  299. }
  300. if with_people {
  301. fmt.Println("people_sequence:")
  302. for key := range burndown_results.PeopleHistories {
  303. fmt.Println(" - " + id_matcher.ReversePeopleDict[key])
  304. }
  305. fmt.Println("people:")
  306. for key, val := range burndown_results.PeopleHistories {
  307. printMatrix(val, id_matcher.ReversePeopleDict[key], true)
  308. }
  309. fmt.Println("people_interaction: |-")
  310. printMatrix(burndown_results.PeopleMatrix, "", false)
  311. }
  312. if with_couples {
  313. printCouples(&couples_result, id_matcher.ReversePeopleDict)
  314. }
  315. }