file_history.go 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. package leaves
  2. import (
  3. "fmt"
  4. "io"
  5. "log"
  6. "sort"
  7. "strings"
  8. "github.com/gogo/protobuf/proto"
  9. "gopkg.in/src-d/go-git.v4"
  10. "gopkg.in/src-d/go-git.v4/plumbing"
  11. "gopkg.in/src-d/go-git.v4/plumbing/object"
  12. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  13. "gopkg.in/src-d/hercules.v9/internal/core"
  14. "gopkg.in/src-d/hercules.v9/internal/pb"
  15. items "gopkg.in/src-d/hercules.v9/internal/plumbing"
  16. "gopkg.in/src-d/hercules.v9/internal/plumbing/identity"
  17. )
  18. // FileHistoryAnalysis contains the intermediate state which is mutated by Consume(). It should implement
  19. // LeafPipelineItem.
  20. type FileHistoryAnalysis struct {
  21. core.NoopMerger
  22. core.OneShotMergeProcessor
  23. files map[string]*FileHistory
  24. lastCommit *object.Commit
  25. }
  26. // FileHistoryResult is returned by Finalize() and represents the analysis result.
  27. type FileHistoryResult struct {
  28. Files map[string]FileHistory
  29. }
  30. // FileHistory is the gathered stats about a particular file.
  31. type FileHistory struct {
  32. // Hashes is the list of commit hashes which changed this file.
  33. Hashes []plumbing.Hash
  34. // People is the mapping from developers to the number of lines they altered.
  35. People map[int]items.LineStats
  36. }
  37. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  38. func (history *FileHistoryAnalysis) Name() string {
  39. return "FileHistoryAnalysis"
  40. }
  41. // Provides returns the list of names of entities which are produced by this PipelineItem.
  42. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  43. // to this list. Also used by core.Registry to build the global map of providers.
  44. func (history *FileHistoryAnalysis) Provides() []string {
  45. return []string{}
  46. }
  47. // Requires returns the list of names of entities which are needed by this PipelineItem.
  48. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  49. // entities are Provides() upstream.
  50. func (history *FileHistoryAnalysis) Requires() []string {
  51. arr := [...]string{items.DependencyTreeChanges, items.DependencyLineStats, identity.DependencyAuthor}
  52. return arr[:]
  53. }
  54. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  55. func (history *FileHistoryAnalysis) ListConfigurationOptions() []core.ConfigurationOption {
  56. return []core.ConfigurationOption{}
  57. }
  58. // Flag for the command line switch which enables this analysis.
  59. func (history *FileHistoryAnalysis) Flag() string {
  60. return "file-history"
  61. }
  62. // Description returns the text which explains what the analysis is doing.
  63. func (history *FileHistoryAnalysis) Description() string {
  64. return "Each file path is mapped to the list of commits which touch that file and the mapping " +
  65. "from involved developers to the corresponding line statistics: how many lines were added, " +
  66. "removed and changed throughout the whole history."
  67. }
  68. // Configure sets the properties previously published by ListConfigurationOptions().
  69. func (history *FileHistoryAnalysis) Configure(facts map[string]interface{}) error {
  70. return nil
  71. }
  72. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  73. // calls. The repository which is going to be analysed is supplied as an argument.
  74. func (history *FileHistoryAnalysis) Initialize(repository *git.Repository) error {
  75. history.files = map[string]*FileHistory{}
  76. history.OneShotMergeProcessor.Initialize()
  77. return nil
  78. }
  79. // Consume runs this PipelineItem on the next commit data.
  80. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  81. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  82. // This function returns the mapping with analysis results. The keys must be the same as
  83. // in Provides(). If there was an error, nil is returned.
  84. func (history *FileHistoryAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  85. if deps[core.DependencyIsMerge].(bool) {
  86. // we ignore merge commits
  87. // TODO(vmarkovtsev): handle them better
  88. return nil, nil
  89. }
  90. history.lastCommit = deps[core.DependencyCommit].(*object.Commit)
  91. commit := history.lastCommit.Hash
  92. changes := deps[items.DependencyTreeChanges].(object.Changes)
  93. for _, change := range changes {
  94. action, _ := change.Action()
  95. var fh *FileHistory
  96. if action != merkletrie.Delete {
  97. fh = history.files[change.To.Name]
  98. } else {
  99. fh = history.files[change.From.Name]
  100. }
  101. if fh == nil {
  102. fh = &FileHistory{}
  103. history.files[change.To.Name] = fh
  104. }
  105. switch action {
  106. case merkletrie.Insert:
  107. fh.Hashes = []plumbing.Hash{commit}
  108. case merkletrie.Delete:
  109. fh.Hashes = append(fh.Hashes, commit)
  110. case merkletrie.Modify:
  111. hashes := history.files[change.From.Name].Hashes
  112. if change.From.Name != change.To.Name {
  113. delete(history.files, change.From.Name)
  114. }
  115. hashes = append(hashes, commit)
  116. fh.Hashes = hashes
  117. }
  118. }
  119. lineStats := deps[items.DependencyLineStats].(map[object.ChangeEntry]items.LineStats)
  120. author := deps[identity.DependencyAuthor].(int)
  121. for changeEntry, stats := range lineStats {
  122. file := history.files[changeEntry.Name]
  123. if file == nil {
  124. file = &FileHistory{}
  125. history.files[changeEntry.Name] = file
  126. }
  127. people := file.People
  128. if people == nil {
  129. people = map[int]items.LineStats{}
  130. file.People = people
  131. }
  132. oldStats := people[author]
  133. people[author] = items.LineStats{
  134. Added: oldStats.Added + stats.Added,
  135. Removed: oldStats.Removed + stats.Removed,
  136. Changed: oldStats.Changed + stats.Changed,
  137. }
  138. }
  139. return nil, nil
  140. }
  141. // Finalize returns the result of the analysis. Further Consume() calls are not expected.
  142. func (history *FileHistoryAnalysis) Finalize() interface{} {
  143. files := map[string]FileHistory{}
  144. fileIter, err := history.lastCommit.Files()
  145. if err != nil {
  146. log.Panicf("Failed to iterate files of %s", history.lastCommit.Hash.String())
  147. }
  148. err = fileIter.ForEach(func(file *object.File) error {
  149. if fh := history.files[file.Name]; fh != nil {
  150. files[file.Name] = *fh
  151. }
  152. return nil
  153. })
  154. if err != nil {
  155. log.Panicf("Failed to iterate files of %s", history.lastCommit.Hash.String())
  156. }
  157. return FileHistoryResult{Files: files}
  158. }
  159. // Fork clones this PipelineItem.
  160. func (history *FileHistoryAnalysis) Fork(n int) []core.PipelineItem {
  161. return core.ForkSamePipelineItem(history, n)
  162. }
  163. // Serialize converts the analysis result as returned by Finalize() to text or bytes.
  164. // The text format is YAML and the bytes format is Protocol Buffers.
  165. func (history *FileHistoryAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  166. historyResult := result.(FileHistoryResult)
  167. if binary {
  168. return history.serializeBinary(&historyResult, writer)
  169. }
  170. history.serializeText(&historyResult, writer)
  171. return nil
  172. }
  173. func (history *FileHistoryAnalysis) serializeText(result *FileHistoryResult, writer io.Writer) {
  174. keys := make([]string, len(result.Files))
  175. i := 0
  176. for key := range result.Files {
  177. keys[i] = key
  178. i++
  179. }
  180. sort.Strings(keys)
  181. for _, key := range keys {
  182. fmt.Fprintf(writer, " - %s:\n", key)
  183. file := result.Files[key]
  184. hashes := file.Hashes
  185. strhashes := make([]string, len(hashes))
  186. for i, hash := range hashes {
  187. strhashes[i] = "\"" + hash.String() + "\""
  188. }
  189. sort.Strings(strhashes)
  190. fmt.Fprintf(writer, " commits: [%s]\n", strings.Join(strhashes, ","))
  191. strpeople := make([]string, 0, len(file.People))
  192. for key, val := range file.People {
  193. strpeople = append(strpeople, fmt.Sprintf("%d:[%d,%d,%d]", key, val.Added, val.Removed, val.Changed))
  194. }
  195. sort.Strings(strpeople)
  196. fmt.Fprintf(writer, " people: {%s}\n", strings.Join(strpeople, ","))
  197. }
  198. }
  199. func (history *FileHistoryAnalysis) serializeBinary(result *FileHistoryResult, writer io.Writer) error {
  200. message := pb.FileHistoryResultMessage{
  201. Files: map[string]*pb.FileHistory{},
  202. }
  203. for key, vals := range result.Files {
  204. fh := &pb.FileHistory{
  205. Commits: make([]string, len(vals.Hashes)),
  206. ChangesByDeveloper: map[int32]*pb.LineStats{},
  207. }
  208. for i, hash := range vals.Hashes {
  209. fh.Commits[i] = hash.String()
  210. }
  211. for key, val := range vals.People {
  212. fh.ChangesByDeveloper[int32(key)] = &pb.LineStats{
  213. Added: int32(val.Added),
  214. Removed: int32(val.Removed),
  215. Changed: int32(val.Changed),
  216. }
  217. }
  218. message.Files[key] = fh
  219. }
  220. serialized, err := proto.Marshal(&message)
  221. if err != nil {
  222. return err
  223. }
  224. _, err = writer.Write(serialized)
  225. return err
  226. }
  227. func init() {
  228. core.Registry.Register(&FileHistoryAnalysis{})
  229. }