main.go 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. /*
  2. Package main provides the command line tool to gather the line burndown
  3. statistics from Git repositories. Usage:
  4. hercules <URL or FS path>
  5. */
  6. package main
  7. import (
  8. "bufio"
  9. "flag"
  10. "fmt"
  11. "io"
  12. "net/http"
  13. _ "net/http/pprof"
  14. "os"
  15. "runtime/pprof"
  16. "sort"
  17. "strconv"
  18. "strings"
  19. "gopkg.in/src-d/go-billy.v2/osfs"
  20. "gopkg.in/src-d/go-git.v4"
  21. "gopkg.in/src-d/go-git.v4/plumbing"
  22. "gopkg.in/src-d/go-git.v4/plumbing/object"
  23. "gopkg.in/src-d/go-git.v4/storage"
  24. "gopkg.in/src-d/go-git.v4/storage/filesystem"
  25. "gopkg.in/src-d/go-git.v4/storage/memory"
  26. "gopkg.in/src-d/hercules.v1"
  27. )
  28. // Signature stores the author's identification. Only a single field is used to identify the
  29. // commit: first Email is checked, then Name.
  30. type Signature struct {
  31. Name string
  32. Email string
  33. }
  34. func loadPeopleDict(path string) (map[string]int, map[int]string, int) {
  35. file, err := os.Open(path)
  36. if err != nil {
  37. panic(err)
  38. }
  39. defer file.Close()
  40. scanner := bufio.NewScanner(file)
  41. dict := make(map[string]int)
  42. reverse_dict := make(map[int]string)
  43. size := 0
  44. for scanner.Scan() {
  45. for _, id := range strings.Split(scanner.Text(), "|") {
  46. dict[id] = size
  47. }
  48. reverse_dict[size] = scanner.Text()
  49. size += 1
  50. }
  51. return dict, reverse_dict, size
  52. }
  53. func generatePeopleDict(commits []*object.Commit) (map[string]int, map[int]string, int) {
  54. dict := make(map[string]int)
  55. emails := make(map[int][]string)
  56. names := make(map[int][]string)
  57. size := 0
  58. for _, commit := range commits {
  59. id, exists := dict[commit.Author.Email]
  60. if exists {
  61. _, exists := dict[commit.Author.Name]
  62. if !exists {
  63. dict[commit.Author.Name] = id
  64. names[id] = append(names[id], commit.Author.Name)
  65. }
  66. continue
  67. }
  68. id, exists = dict[commit.Author.Name]
  69. if exists {
  70. dict[commit.Author.Email] = id
  71. emails[id] = append(emails[id], commit.Author.Email)
  72. continue
  73. }
  74. dict[commit.Author.Email] = size
  75. dict[commit.Author.Name] = size
  76. emails[size] = append(emails[size], commit.Author.Email)
  77. names[size] = append(names[size], commit.Author.Name)
  78. size += 1
  79. }
  80. reverse_dict := make(map[int]string)
  81. for _, val := range dict {
  82. reverse_dict[val] = strings.Join(names[val], "|") + "|" + strings.Join(emails[val], "|")
  83. }
  84. return dict, reverse_dict, size
  85. }
  86. func loadCommitsFromFile(path string, repository *git.Repository) []*object.Commit {
  87. var file io.Reader
  88. if path != "-" {
  89. file, err := os.Open(path)
  90. if err != nil {
  91. panic(err)
  92. }
  93. defer file.Close()
  94. } else {
  95. file = os.Stdin
  96. }
  97. scanner := bufio.NewScanner(file)
  98. commits := []*object.Commit{}
  99. for scanner.Scan() {
  100. hash := plumbing.NewHash(scanner.Text())
  101. if len(hash) != 20 {
  102. panic("invalid commit hash " + scanner.Text())
  103. }
  104. commit, err := repository.CommitObject(hash)
  105. if err != nil {
  106. panic(err)
  107. }
  108. commits = append(commits, commit)
  109. }
  110. return commits
  111. }
  112. func printStatuses(statuses [][]int64, name string) {
  113. // determine the maximum length of each value
  114. var maxnum int64
  115. for _, status := range statuses {
  116. for _, val := range status {
  117. if val > maxnum {
  118. maxnum = val
  119. }
  120. }
  121. }
  122. width := len(strconv.FormatInt(maxnum, 10))
  123. last := len(statuses[len(statuses)-1])
  124. if name != "" {
  125. fmt.Println(name)
  126. }
  127. // print the resulting triangle matrix
  128. for _, status := range statuses {
  129. for i := 0; i < last; i++ {
  130. var val int64
  131. if i < len(status) {
  132. val = status[i]
  133. // not sure why this sometimes happens...
  134. // TODO(vmarkovtsev): find the root cause of tiny negative balances
  135. if val < 0 {
  136. val = 0
  137. }
  138. }
  139. fmt.Printf("%[1]*[2]d ", width, val)
  140. }
  141. fmt.Println()
  142. }
  143. }
  144. func sortedKeys(m map[string][][]int64) []string {
  145. keys := make([]string, 0, len(m))
  146. for k := range m {
  147. keys = append(keys, k)
  148. }
  149. sort.Strings(keys)
  150. return keys
  151. }
  152. func main() {
  153. var with_files bool
  154. var with_people bool
  155. var people_dict_path string
  156. var profile bool
  157. var granularity, sampling, similarity_threshold int
  158. var commitsFile string
  159. var debug bool
  160. flag.BoolVar(&with_files, "files", false, "Output detailed statistics per each file.")
  161. flag.BoolVar(&with_people, "people", false, "Output detailed statistics per each developer.")
  162. flag.StringVar(&people_dict_path, "people-dict", "", "Path to the developers' email associations.")
  163. flag.BoolVar(&profile, "profile", false, "Collect the profile to hercules.pprof.")
  164. flag.IntVar(&granularity, "granularity", 30, "How many days there are in a single band.")
  165. flag.IntVar(&sampling, "sampling", 30, "How frequently to record the state in days.")
  166. flag.IntVar(&similarity_threshold, "M", 90,
  167. "A threshold on the similarity index used to detect renames.")
  168. flag.BoolVar(&debug, "debug", false, "Validate the trees on each step.")
  169. flag.StringVar(&commitsFile, "commits", "", "Path to the text file with the "+
  170. "commit history to follow instead of the default rev-list "+
  171. "--first-parent. The format is the list of hashes, each hash on a "+
  172. "separate line. The first hash is the root.")
  173. flag.Parse()
  174. if granularity <= 0 {
  175. fmt.Fprint(os.Stderr, "Warning: adjusted the granularity to 1 day\n")
  176. granularity = 1
  177. }
  178. if profile {
  179. go http.ListenAndServe("localhost:6060", nil)
  180. prof, _ := os.Create("hercules.pprof")
  181. pprof.StartCPUProfile(prof)
  182. defer pprof.StopCPUProfile()
  183. }
  184. if len(flag.Args()) == 0 || len(flag.Args()) > 3 {
  185. fmt.Fprint(os.Stderr,
  186. "Usage: hercules <path to repo or URL> [<disk cache path>]\n")
  187. os.Exit(1)
  188. }
  189. uri := flag.Arg(0)
  190. var repository *git.Repository
  191. var storage storage.Storer
  192. var err error
  193. if strings.Contains(uri, "://") {
  194. if len(flag.Args()) == 2 {
  195. storage, err = filesystem.NewStorage(osfs.New(flag.Arg(1)))
  196. if err != nil {
  197. panic(err)
  198. }
  199. } else {
  200. storage = memory.NewStorage()
  201. }
  202. fmt.Fprint(os.Stderr, "cloning...\r")
  203. repository, err = git.Clone(storage, nil, &git.CloneOptions{
  204. URL: uri,
  205. })
  206. fmt.Fprint(os.Stderr, " \r")
  207. } else {
  208. if uri[len(uri)-1] == os.PathSeparator {
  209. uri = uri[:len(uri)-1]
  210. }
  211. repository, err = git.PlainOpen(uri)
  212. }
  213. if err != nil {
  214. panic(err)
  215. }
  216. // core logic
  217. analyser := hercules.Analyser{
  218. Repository: repository,
  219. OnProgress: func(commit, length int) {
  220. fmt.Fprintf(os.Stderr, "%d / %d\r", commit, length)
  221. },
  222. Granularity: granularity,
  223. Sampling: sampling,
  224. SimilarityThreshold: similarity_threshold,
  225. Debug: debug,
  226. }
  227. // list of commits belonging to the default branch, from oldest to newest
  228. // rev-list --first-parent
  229. var commits []*object.Commit
  230. if commitsFile == "" {
  231. commits = analyser.Commits()
  232. } else {
  233. commits = loadCommitsFromFile(commitsFile, repository)
  234. }
  235. var people_ids map[int]string
  236. if with_people {
  237. var people_dict map[string]int
  238. var people_number int
  239. if people_dict_path != "" {
  240. people_dict, people_ids, people_number = loadPeopleDict(people_dict_path)
  241. } else {
  242. people_dict, people_ids, people_number = generatePeopleDict(commits)
  243. }
  244. analyser.PeopleNumber = people_number
  245. analyser.PeopleDict = people_dict
  246. }
  247. global_statuses, file_statuses, people_statuses, people_matrix := analyser.Analyse(commits)
  248. fmt.Fprint(os.Stderr, " \r")
  249. if len(global_statuses) == 0 {
  250. return
  251. }
  252. // print the start date, granularity, sampling
  253. fmt.Println(commits[0].Author.When.Unix(),
  254. commits[len(commits)-1].Author.When.Unix(),
  255. granularity, sampling)
  256. printStatuses(global_statuses, "")
  257. if with_files {
  258. keys := sortedKeys(file_statuses)
  259. for _, key := range keys {
  260. fmt.Println()
  261. printStatuses(file_statuses[key], key)
  262. }
  263. }
  264. if with_people {
  265. fmt.Printf("%d\n", len(people_statuses))
  266. for key, val := range people_statuses {
  267. fmt.Printf("%d: ", key)
  268. printStatuses(val, people_ids[key])
  269. fmt.Println()
  270. }
  271. for _, row := range(people_matrix) {
  272. for _, cell := range(row) {
  273. fmt.Print(cell, " ")
  274. }
  275. fmt.Print("\n")
  276. }
  277. }
  278. }