file_history.go 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. package leaves
  2. import (
  3. "fmt"
  4. "io"
  5. "sort"
  6. "strings"
  7. "github.com/gogo/protobuf/proto"
  8. "gopkg.in/src-d/go-git.v4"
  9. "gopkg.in/src-d/go-git.v4/plumbing"
  10. "gopkg.in/src-d/go-git.v4/plumbing/object"
  11. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  12. "gopkg.in/src-d/hercules.v10/internal/core"
  13. "gopkg.in/src-d/hercules.v10/internal/pb"
  14. items "gopkg.in/src-d/hercules.v10/internal/plumbing"
  15. "gopkg.in/src-d/hercules.v10/internal/plumbing/identity"
  16. )
  17. // FileHistoryAnalysis contains the intermediate state which is mutated by Consume(). It should implement
  18. // LeafPipelineItem.
  19. type FileHistoryAnalysis struct {
  20. core.NoopMerger
  21. core.OneShotMergeProcessor
  22. files map[string]*FileHistory
  23. lastCommit *object.Commit
  24. l core.Logger
  25. }
  26. // FileHistoryResult is returned by Finalize() and represents the analysis result.
  27. type FileHistoryResult struct {
  28. Files map[string]FileHistory
  29. }
  30. // FileHistory is the gathered stats about a particular file.
  31. type FileHistory struct {
  32. // Hashes is the list of commit hashes which changed this file.
  33. Hashes []plumbing.Hash
  34. // People is the mapping from developers to the number of lines they altered.
  35. People map[int]items.LineStats
  36. }
  37. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  38. func (history *FileHistoryAnalysis) Name() string {
  39. return "FileHistoryAnalysis"
  40. }
  41. // Provides returns the list of names of entities which are produced by this PipelineItem.
  42. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  43. // to this list. Also used by core.Registry to build the global map of providers.
  44. func (history *FileHistoryAnalysis) Provides() []string {
  45. return []string{}
  46. }
  47. // Requires returns the list of names of entities which are needed by this PipelineItem.
  48. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  49. // entities are Provides() upstream.
  50. func (history *FileHistoryAnalysis) Requires() []string {
  51. return []string{items.DependencyTreeChanges, items.DependencyLineStats, identity.DependencyAuthor}
  52. }
  53. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  54. func (history *FileHistoryAnalysis) ListConfigurationOptions() []core.ConfigurationOption {
  55. return []core.ConfigurationOption{}
  56. }
  57. // Flag for the command line switch which enables this analysis.
  58. func (history *FileHistoryAnalysis) Flag() string {
  59. return "file-history"
  60. }
  61. // Description returns the text which explains what the analysis is doing.
  62. func (history *FileHistoryAnalysis) Description() string {
  63. return "Each file path is mapped to the list of commits which touch that file and the mapping " +
  64. "from involved developers to the corresponding line statistics: how many lines were added, " +
  65. "removed and changed throughout the whole history."
  66. }
  67. // Configure sets the properties previously published by ListConfigurationOptions().
  68. func (history *FileHistoryAnalysis) Configure(facts map[string]interface{}) error {
  69. if l, exists := facts[core.ConfigLogger].(core.Logger); exists {
  70. history.l = l
  71. }
  72. return nil
  73. }
  74. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  75. // calls. The repository which is going to be analysed is supplied as an argument.
  76. func (history *FileHistoryAnalysis) Initialize(repository *git.Repository) error {
  77. history.l = core.NewLogger()
  78. history.files = map[string]*FileHistory{}
  79. history.OneShotMergeProcessor.Initialize()
  80. return nil
  81. }
  82. // Consume runs this PipelineItem on the next commit data.
  83. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  84. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  85. // This function returns the mapping with analysis results. The keys must be the same as
  86. // in Provides(). If there was an error, nil is returned.
  87. func (history *FileHistoryAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  88. if deps[core.DependencyIsMerge].(bool) {
  89. // we ignore merge commits
  90. // TODO(vmarkovtsev): handle them better
  91. return nil, nil
  92. }
  93. history.lastCommit = deps[core.DependencyCommit].(*object.Commit)
  94. commit := history.lastCommit.Hash
  95. changes := deps[items.DependencyTreeChanges].(object.Changes)
  96. for _, change := range changes {
  97. action, _ := change.Action()
  98. var fh *FileHistory
  99. if action != merkletrie.Delete {
  100. fh = history.files[change.To.Name]
  101. } else {
  102. fh = history.files[change.From.Name]
  103. }
  104. if fh == nil {
  105. fh = &FileHistory{}
  106. history.files[change.To.Name] = fh
  107. }
  108. switch action {
  109. case merkletrie.Insert:
  110. fh.Hashes = []plumbing.Hash{commit}
  111. case merkletrie.Delete:
  112. fh.Hashes = append(fh.Hashes, commit)
  113. case merkletrie.Modify:
  114. hashes := history.files[change.From.Name].Hashes
  115. if change.From.Name != change.To.Name {
  116. delete(history.files, change.From.Name)
  117. }
  118. hashes = append(hashes, commit)
  119. fh.Hashes = hashes
  120. }
  121. }
  122. lineStats := deps[items.DependencyLineStats].(map[object.ChangeEntry]items.LineStats)
  123. author := deps[identity.DependencyAuthor].(int)
  124. for changeEntry, stats := range lineStats {
  125. file := history.files[changeEntry.Name]
  126. if file == nil {
  127. file = &FileHistory{}
  128. history.files[changeEntry.Name] = file
  129. }
  130. people := file.People
  131. if people == nil {
  132. people = map[int]items.LineStats{}
  133. file.People = people
  134. }
  135. oldStats := people[author]
  136. people[author] = items.LineStats{
  137. Added: oldStats.Added + stats.Added,
  138. Removed: oldStats.Removed + stats.Removed,
  139. Changed: oldStats.Changed + stats.Changed,
  140. }
  141. }
  142. return nil, nil
  143. }
  144. // Finalize returns the result of the analysis. Further Consume() calls are not expected.
  145. func (history *FileHistoryAnalysis) Finalize() interface{} {
  146. files := map[string]FileHistory{}
  147. fileIter, err := history.lastCommit.Files()
  148. if err != nil {
  149. history.l.Errorf("Failed to iterate files of %s", history.lastCommit.Hash.String())
  150. return err
  151. }
  152. err = fileIter.ForEach(func(file *object.File) error {
  153. if fh := history.files[file.Name]; fh != nil {
  154. files[file.Name] = *fh
  155. }
  156. return nil
  157. })
  158. if err != nil {
  159. history.l.Errorf("Failed to iterate files of %s", history.lastCommit.Hash.String())
  160. return err
  161. }
  162. return FileHistoryResult{Files: files}
  163. }
  164. // Fork clones this PipelineItem.
  165. func (history *FileHistoryAnalysis) Fork(n int) []core.PipelineItem {
  166. return core.ForkSamePipelineItem(history, n)
  167. }
  168. // Serialize converts the analysis result as returned by Finalize() to text or bytes.
  169. // The text format is YAML and the bytes format is Protocol Buffers.
  170. func (history *FileHistoryAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  171. historyResult := result.(FileHistoryResult)
  172. if binary {
  173. return history.serializeBinary(&historyResult, writer)
  174. }
  175. history.serializeText(&historyResult, writer)
  176. return nil
  177. }
  178. func (history *FileHistoryAnalysis) serializeText(result *FileHistoryResult, writer io.Writer) {
  179. keys := make([]string, len(result.Files))
  180. i := 0
  181. for key := range result.Files {
  182. keys[i] = key
  183. i++
  184. }
  185. sort.Strings(keys)
  186. for _, key := range keys {
  187. fmt.Fprintf(writer, " - %s:\n", key)
  188. file := result.Files[key]
  189. hashes := file.Hashes
  190. strhashes := make([]string, len(hashes))
  191. for i, hash := range hashes {
  192. strhashes[i] = "\"" + hash.String() + "\""
  193. }
  194. sort.Strings(strhashes)
  195. fmt.Fprintf(writer, " commits: [%s]\n", strings.Join(strhashes, ","))
  196. strpeople := make([]string, 0, len(file.People))
  197. for key, val := range file.People {
  198. strpeople = append(strpeople, fmt.Sprintf("%d:[%d,%d,%d]", key, val.Added, val.Removed, val.Changed))
  199. }
  200. sort.Strings(strpeople)
  201. fmt.Fprintf(writer, " people: {%s}\n", strings.Join(strpeople, ","))
  202. }
  203. }
  204. func (history *FileHistoryAnalysis) serializeBinary(result *FileHistoryResult, writer io.Writer) error {
  205. message := pb.FileHistoryResultMessage{
  206. Files: map[string]*pb.FileHistory{},
  207. }
  208. for key, vals := range result.Files {
  209. fh := &pb.FileHistory{
  210. Commits: make([]string, len(vals.Hashes)),
  211. ChangesByDeveloper: map[int32]*pb.LineStats{},
  212. }
  213. for i, hash := range vals.Hashes {
  214. fh.Commits[i] = hash.String()
  215. }
  216. for key, val := range vals.People {
  217. fh.ChangesByDeveloper[int32(key)] = &pb.LineStats{
  218. Added: int32(val.Added),
  219. Removed: int32(val.Removed),
  220. Changed: int32(val.Changed),
  221. }
  222. }
  223. message.Files[key] = fh
  224. }
  225. serialized, err := proto.Marshal(&message)
  226. if err != nil {
  227. return err
  228. }
  229. _, err = writer.Write(serialized)
  230. return err
  231. }
  232. func init() {
  233. core.Registry.Register(&FileHistoryAnalysis{})
  234. }