file_history.go 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. package leaves
  2. import (
  3. "fmt"
  4. "io"
  5. "sort"
  6. "strings"
  7. "github.com/gogo/protobuf/proto"
  8. "gopkg.in/src-d/go-git.v4"
  9. "gopkg.in/src-d/go-git.v4/plumbing"
  10. "gopkg.in/src-d/go-git.v4/plumbing/object"
  11. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  12. "gopkg.in/src-d/hercules.v10/internal/core"
  13. "gopkg.in/src-d/hercules.v10/internal/pb"
  14. items "gopkg.in/src-d/hercules.v10/internal/plumbing"
  15. "gopkg.in/src-d/hercules.v10/internal/plumbing/identity"
  16. )
  17. // FileHistoryAnalysis contains the intermediate state which is mutated by Consume(). It should implement
  18. // LeafPipelineItem.
  19. type FileHistoryAnalysis struct {
  20. core.NoopMerger
  21. core.OneShotMergeProcessor
  22. files map[string]*FileHistory
  23. lastCommit *object.Commit
  24. l core.Logger
  25. }
  26. // FileHistoryResult is returned by Finalize() and represents the analysis result.
  27. type FileHistoryResult struct {
  28. Files map[string]FileHistory
  29. }
  30. // FileHistory is the gathered stats about a particular file.
  31. type FileHistory struct {
  32. // Hashes is the list of commit hashes which changed this file.
  33. Hashes []plumbing.Hash
  34. // People is the mapping from developers to the number of lines they altered.
  35. People map[int]items.LineStats
  36. }
  37. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  38. func (history *FileHistoryAnalysis) Name() string {
  39. return "FileHistoryAnalysis"
  40. }
  41. // Provides returns the list of names of entities which are produced by this PipelineItem.
  42. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  43. // to this list. Also used by core.Registry to build the global map of providers.
  44. func (history *FileHistoryAnalysis) Provides() []string {
  45. return []string{}
  46. }
  47. // Requires returns the list of names of entities which are needed by this PipelineItem.
  48. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  49. // entities are Provides() upstream.
  50. func (history *FileHistoryAnalysis) Requires() []string {
  51. arr := [...]string{items.DependencyTreeChanges, items.DependencyLineStats, identity.DependencyAuthor}
  52. return arr[:]
  53. }
  54. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  55. func (history *FileHistoryAnalysis) ListConfigurationOptions() []core.ConfigurationOption {
  56. return []core.ConfigurationOption{}
  57. }
  58. // Flag for the command line switch which enables this analysis.
  59. func (history *FileHistoryAnalysis) Flag() string {
  60. return "file-history"
  61. }
  62. // Description returns the text which explains what the analysis is doing.
  63. func (history *FileHistoryAnalysis) Description() string {
  64. return "Each file path is mapped to the list of commits which touch that file and the mapping " +
  65. "from involved developers to the corresponding line statistics: how many lines were added, " +
  66. "removed and changed throughout the whole history."
  67. }
  68. // Configure sets the properties previously published by ListConfigurationOptions().
  69. func (history *FileHistoryAnalysis) Configure(facts map[string]interface{}) error {
  70. if l, exists := facts[core.ConfigLogger].(core.Logger); exists {
  71. history.l = l
  72. }
  73. return nil
  74. }
  75. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  76. // calls. The repository which is going to be analysed is supplied as an argument.
  77. func (history *FileHistoryAnalysis) Initialize(repository *git.Repository) error {
  78. history.l = core.NewLogger()
  79. history.files = map[string]*FileHistory{}
  80. history.OneShotMergeProcessor.Initialize()
  81. return nil
  82. }
  83. // Consume runs this PipelineItem on the next commit data.
  84. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  85. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  86. // This function returns the mapping with analysis results. The keys must be the same as
  87. // in Provides(). If there was an error, nil is returned.
  88. func (history *FileHistoryAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  89. if deps[core.DependencyIsMerge].(bool) {
  90. // we ignore merge commits
  91. // TODO(vmarkovtsev): handle them better
  92. return nil, nil
  93. }
  94. history.lastCommit = deps[core.DependencyCommit].(*object.Commit)
  95. commit := history.lastCommit.Hash
  96. changes := deps[items.DependencyTreeChanges].(object.Changes)
  97. for _, change := range changes {
  98. action, _ := change.Action()
  99. var fh *FileHistory
  100. if action != merkletrie.Delete {
  101. fh = history.files[change.To.Name]
  102. } else {
  103. fh = history.files[change.From.Name]
  104. }
  105. if fh == nil {
  106. fh = &FileHistory{}
  107. history.files[change.To.Name] = fh
  108. }
  109. switch action {
  110. case merkletrie.Insert:
  111. fh.Hashes = []plumbing.Hash{commit}
  112. case merkletrie.Delete:
  113. fh.Hashes = append(fh.Hashes, commit)
  114. case merkletrie.Modify:
  115. hashes := history.files[change.From.Name].Hashes
  116. if change.From.Name != change.To.Name {
  117. delete(history.files, change.From.Name)
  118. }
  119. hashes = append(hashes, commit)
  120. fh.Hashes = hashes
  121. }
  122. }
  123. lineStats := deps[items.DependencyLineStats].(map[object.ChangeEntry]items.LineStats)
  124. author := deps[identity.DependencyAuthor].(int)
  125. for changeEntry, stats := range lineStats {
  126. file := history.files[changeEntry.Name]
  127. if file == nil {
  128. file = &FileHistory{}
  129. history.files[changeEntry.Name] = file
  130. }
  131. people := file.People
  132. if people == nil {
  133. people = map[int]items.LineStats{}
  134. file.People = people
  135. }
  136. oldStats := people[author]
  137. people[author] = items.LineStats{
  138. Added: oldStats.Added + stats.Added,
  139. Removed: oldStats.Removed + stats.Removed,
  140. Changed: oldStats.Changed + stats.Changed,
  141. }
  142. }
  143. return nil, nil
  144. }
  145. // Finalize returns the result of the analysis. Further Consume() calls are not expected.
  146. func (history *FileHistoryAnalysis) Finalize() interface{} {
  147. files := map[string]FileHistory{}
  148. fileIter, err := history.lastCommit.Files()
  149. if err != nil {
  150. history.l.Errorf("Failed to iterate files of %s", history.lastCommit.Hash.String())
  151. return err
  152. }
  153. err = fileIter.ForEach(func(file *object.File) error {
  154. if fh := history.files[file.Name]; fh != nil {
  155. files[file.Name] = *fh
  156. }
  157. return nil
  158. })
  159. if err != nil {
  160. history.l.Errorf("Failed to iterate files of %s", history.lastCommit.Hash.String())
  161. return err
  162. }
  163. return FileHistoryResult{Files: files}
  164. }
  165. // Fork clones this PipelineItem.
  166. func (history *FileHistoryAnalysis) Fork(n int) []core.PipelineItem {
  167. return core.ForkSamePipelineItem(history, n)
  168. }
  169. // Serialize converts the analysis result as returned by Finalize() to text or bytes.
  170. // The text format is YAML and the bytes format is Protocol Buffers.
  171. func (history *FileHistoryAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  172. historyResult := result.(FileHistoryResult)
  173. if binary {
  174. return history.serializeBinary(&historyResult, writer)
  175. }
  176. history.serializeText(&historyResult, writer)
  177. return nil
  178. }
  179. func (history *FileHistoryAnalysis) serializeText(result *FileHistoryResult, writer io.Writer) {
  180. keys := make([]string, len(result.Files))
  181. i := 0
  182. for key := range result.Files {
  183. keys[i] = key
  184. i++
  185. }
  186. sort.Strings(keys)
  187. for _, key := range keys {
  188. fmt.Fprintf(writer, " - %s:\n", key)
  189. file := result.Files[key]
  190. hashes := file.Hashes
  191. strhashes := make([]string, len(hashes))
  192. for i, hash := range hashes {
  193. strhashes[i] = "\"" + hash.String() + "\""
  194. }
  195. sort.Strings(strhashes)
  196. fmt.Fprintf(writer, " commits: [%s]\n", strings.Join(strhashes, ","))
  197. strpeople := make([]string, 0, len(file.People))
  198. for key, val := range file.People {
  199. strpeople = append(strpeople, fmt.Sprintf("%d:[%d,%d,%d]", key, val.Added, val.Removed, val.Changed))
  200. }
  201. sort.Strings(strpeople)
  202. fmt.Fprintf(writer, " people: {%s}\n", strings.Join(strpeople, ","))
  203. }
  204. }
  205. func (history *FileHistoryAnalysis) serializeBinary(result *FileHistoryResult, writer io.Writer) error {
  206. message := pb.FileHistoryResultMessage{
  207. Files: map[string]*pb.FileHistory{},
  208. }
  209. for key, vals := range result.Files {
  210. fh := &pb.FileHistory{
  211. Commits: make([]string, len(vals.Hashes)),
  212. ChangesByDeveloper: map[int32]*pb.LineStats{},
  213. }
  214. for i, hash := range vals.Hashes {
  215. fh.Commits[i] = hash.String()
  216. }
  217. for key, val := range vals.People {
  218. fh.ChangesByDeveloper[int32(key)] = &pb.LineStats{
  219. Added: int32(val.Added),
  220. Removed: int32(val.Removed),
  221. Changed: int32(val.Changed),
  222. }
  223. }
  224. message.Files[key] = fh
  225. }
  226. serialized, err := proto.Marshal(&message)
  227. if err != nil {
  228. return err
  229. }
  230. _, err = writer.Write(serialized)
  231. return err
  232. }
  233. func init() {
  234. core.Registry.Register(&FileHistoryAnalysis{})
  235. }