file_history.go 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. package leaves
  2. import (
  3. "fmt"
  4. "io"
  5. "log"
  6. "sort"
  7. "strings"
  8. "github.com/gogo/protobuf/proto"
  9. "gopkg.in/src-d/go-git.v4"
  10. "gopkg.in/src-d/go-git.v4/plumbing"
  11. "gopkg.in/src-d/go-git.v4/plumbing/object"
  12. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  13. "gopkg.in/src-d/hercules.v10/internal/core"
  14. "gopkg.in/src-d/hercules.v10/internal/pb"
  15. items "gopkg.in/src-d/hercules.v10/internal/plumbing"
  16. "gopkg.in/src-d/hercules.v10/internal/plumbing/identity"
  17. )
  18. // FileHistoryAnalysis contains the intermediate state which is mutated by Consume(). It should implement
  19. // LeafPipelineItem.
  20. type FileHistoryAnalysis struct {
  21. core.NoopMerger
  22. core.OneShotMergeProcessor
  23. files map[string]*FileHistory
  24. lastCommit *object.Commit
  25. l core.Logger
  26. }
  27. // FileHistoryResult is returned by Finalize() and represents the analysis result.
  28. type FileHistoryResult struct {
  29. Files map[string]FileHistory
  30. }
  31. // FileHistory is the gathered stats about a particular file.
  32. type FileHistory struct {
  33. // Hashes is the list of commit hashes which changed this file.
  34. Hashes []plumbing.Hash
  35. // People is the mapping from developers to the number of lines they altered.
  36. People map[int]items.LineStats
  37. }
  38. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  39. func (history *FileHistoryAnalysis) Name() string {
  40. return "FileHistoryAnalysis"
  41. }
  42. // Provides returns the list of names of entities which are produced by this PipelineItem.
  43. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  44. // to this list. Also used by core.Registry to build the global map of providers.
  45. func (history *FileHistoryAnalysis) Provides() []string {
  46. return []string{}
  47. }
  48. // Requires returns the list of names of entities which are needed by this PipelineItem.
  49. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  50. // entities are Provides() upstream.
  51. func (history *FileHistoryAnalysis) Requires() []string {
  52. arr := [...]string{items.DependencyTreeChanges, items.DependencyLineStats, identity.DependencyAuthor}
  53. return arr[:]
  54. }
  55. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  56. func (history *FileHistoryAnalysis) ListConfigurationOptions() []core.ConfigurationOption {
  57. return []core.ConfigurationOption{}
  58. }
  59. // Flag for the command line switch which enables this analysis.
  60. func (history *FileHistoryAnalysis) Flag() string {
  61. return "file-history"
  62. }
  63. // Description returns the text which explains what the analysis is doing.
  64. func (history *FileHistoryAnalysis) Description() string {
  65. return "Each file path is mapped to the list of commits which touch that file and the mapping " +
  66. "from involved developers to the corresponding line statistics: how many lines were added, " +
  67. "removed and changed throughout the whole history."
  68. }
  69. // Configure sets the properties previously published by ListConfigurationOptions().
  70. func (history *FileHistoryAnalysis) Configure(facts map[string]interface{}) error {
  71. if l, exists := facts[core.ConfigLogger].(core.Logger); exists {
  72. history.l = l
  73. }
  74. return nil
  75. }
  76. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  77. // calls. The repository which is going to be analysed is supplied as an argument.
  78. func (history *FileHistoryAnalysis) Initialize(repository *git.Repository) error {
  79. history.l = core.NewLogger()
  80. history.files = map[string]*FileHistory{}
  81. history.OneShotMergeProcessor.Initialize()
  82. return nil
  83. }
  84. // Consume runs this PipelineItem on the next commit data.
  85. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  86. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  87. // This function returns the mapping with analysis results. The keys must be the same as
  88. // in Provides(). If there was an error, nil is returned.
  89. func (history *FileHistoryAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  90. if deps[core.DependencyIsMerge].(bool) {
  91. // we ignore merge commits
  92. // TODO(vmarkovtsev): handle them better
  93. return nil, nil
  94. }
  95. history.lastCommit = deps[core.DependencyCommit].(*object.Commit)
  96. commit := history.lastCommit.Hash
  97. changes := deps[items.DependencyTreeChanges].(object.Changes)
  98. for _, change := range changes {
  99. action, _ := change.Action()
  100. var fh *FileHistory
  101. if action != merkletrie.Delete {
  102. fh = history.files[change.To.Name]
  103. } else {
  104. fh = history.files[change.From.Name]
  105. }
  106. if fh == nil {
  107. fh = &FileHistory{}
  108. history.files[change.To.Name] = fh
  109. }
  110. switch action {
  111. case merkletrie.Insert:
  112. fh.Hashes = []plumbing.Hash{commit}
  113. case merkletrie.Delete:
  114. fh.Hashes = append(fh.Hashes, commit)
  115. case merkletrie.Modify:
  116. hashes := history.files[change.From.Name].Hashes
  117. if change.From.Name != change.To.Name {
  118. delete(history.files, change.From.Name)
  119. }
  120. hashes = append(hashes, commit)
  121. fh.Hashes = hashes
  122. }
  123. }
  124. lineStats := deps[items.DependencyLineStats].(map[object.ChangeEntry]items.LineStats)
  125. author := deps[identity.DependencyAuthor].(int)
  126. for changeEntry, stats := range lineStats {
  127. file := history.files[changeEntry.Name]
  128. if file == nil {
  129. file = &FileHistory{}
  130. history.files[changeEntry.Name] = file
  131. }
  132. people := file.People
  133. if people == nil {
  134. people = map[int]items.LineStats{}
  135. file.People = people
  136. }
  137. oldStats := people[author]
  138. people[author] = items.LineStats{
  139. Added: oldStats.Added + stats.Added,
  140. Removed: oldStats.Removed + stats.Removed,
  141. Changed: oldStats.Changed + stats.Changed,
  142. }
  143. }
  144. return nil, nil
  145. }
  146. // Finalize returns the result of the analysis. Further Consume() calls are not expected.
  147. func (history *FileHistoryAnalysis) Finalize() interface{} {
  148. files := map[string]FileHistory{}
  149. fileIter, err := history.lastCommit.Files()
  150. if err != nil {
  151. log.Panicf("Failed to iterate files of %s", history.lastCommit.Hash.String())
  152. }
  153. err = fileIter.ForEach(func(file *object.File) error {
  154. if fh := history.files[file.Name]; fh != nil {
  155. files[file.Name] = *fh
  156. }
  157. return nil
  158. })
  159. if err != nil {
  160. log.Panicf("Failed to iterate files of %s", history.lastCommit.Hash.String())
  161. }
  162. return FileHistoryResult{Files: files}
  163. }
  164. // Fork clones this PipelineItem.
  165. func (history *FileHistoryAnalysis) Fork(n int) []core.PipelineItem {
  166. return core.ForkSamePipelineItem(history, n)
  167. }
  168. // Serialize converts the analysis result as returned by Finalize() to text or bytes.
  169. // The text format is YAML and the bytes format is Protocol Buffers.
  170. func (history *FileHistoryAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  171. historyResult := result.(FileHistoryResult)
  172. if binary {
  173. return history.serializeBinary(&historyResult, writer)
  174. }
  175. history.serializeText(&historyResult, writer)
  176. return nil
  177. }
  178. func (history *FileHistoryAnalysis) serializeText(result *FileHistoryResult, writer io.Writer) {
  179. keys := make([]string, len(result.Files))
  180. i := 0
  181. for key := range result.Files {
  182. keys[i] = key
  183. i++
  184. }
  185. sort.Strings(keys)
  186. for _, key := range keys {
  187. fmt.Fprintf(writer, " - %s:\n", key)
  188. file := result.Files[key]
  189. hashes := file.Hashes
  190. strhashes := make([]string, len(hashes))
  191. for i, hash := range hashes {
  192. strhashes[i] = "\"" + hash.String() + "\""
  193. }
  194. sort.Strings(strhashes)
  195. fmt.Fprintf(writer, " commits: [%s]\n", strings.Join(strhashes, ","))
  196. strpeople := make([]string, 0, len(file.People))
  197. for key, val := range file.People {
  198. strpeople = append(strpeople, fmt.Sprintf("%d:[%d,%d,%d]", key, val.Added, val.Removed, val.Changed))
  199. }
  200. sort.Strings(strpeople)
  201. fmt.Fprintf(writer, " people: {%s}\n", strings.Join(strpeople, ","))
  202. }
  203. }
  204. func (history *FileHistoryAnalysis) serializeBinary(result *FileHistoryResult, writer io.Writer) error {
  205. message := pb.FileHistoryResultMessage{
  206. Files: map[string]*pb.FileHistory{},
  207. }
  208. for key, vals := range result.Files {
  209. fh := &pb.FileHistory{
  210. Commits: make([]string, len(vals.Hashes)),
  211. ChangesByDeveloper: map[int32]*pb.LineStats{},
  212. }
  213. for i, hash := range vals.Hashes {
  214. fh.Commits[i] = hash.String()
  215. }
  216. for key, val := range vals.People {
  217. fh.ChangesByDeveloper[int32(key)] = &pb.LineStats{
  218. Added: int32(val.Added),
  219. Removed: int32(val.Removed),
  220. Changed: int32(val.Changed),
  221. }
  222. }
  223. message.Files[key] = fh
  224. }
  225. serialized, err := proto.Marshal(&message)
  226. if err != nil {
  227. return err
  228. }
  229. _, err = writer.Write(serialized)
  230. return err
  231. }
  232. func init() {
  233. core.Registry.Register(&FileHistoryAnalysis{})
  234. }