tree_diff.go 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. package plumbing
  2. import (
  3. "fmt"
  4. "gopkg.in/src-d/enry.v1"
  5. "io"
  6. "log"
  7. "strings"
  8. "gopkg.in/src-d/go-git.v4"
  9. "gopkg.in/src-d/go-git.v4/plumbing/object"
  10. "gopkg.in/src-d/hercules.v4/internal/core"
  11. "gopkg.in/src-d/go-git.v4/plumbing"
  12. )
  13. // TreeDiff generates the list of changes for a commit. A change can be either one or two blobs
  14. // under the same path: "before" and "after". If "before" is nil, the change is an addition.
  15. // If "after" is nil, the change is a removal. Otherwise, it is a modification.
  16. // TreeDiff is a PipelineItem.
  17. type TreeDiff struct {
  18. core.NoopMerger
  19. SkipDirs []string
  20. Languages map[string]bool
  21. previousTree *object.Tree
  22. previousCommit plumbing.Hash
  23. repository *git.Repository
  24. }
  25. const (
  26. // DependencyTreeChanges is the name of the dependency provided by TreeDiff.
  27. DependencyTreeChanges = "changes"
  28. // ConfigTreeDiffEnableBlacklist is the name of the configuration option
  29. // (TreeDiff.Configure()) which allows to skip blacklisted directories.
  30. ConfigTreeDiffEnableBlacklist = "TreeDiff.EnableBlacklist"
  31. // ConfigTreeDiffBlacklistedDirs s the name of the configuration option
  32. // (TreeDiff.Configure()) which allows to set blacklisted directories.
  33. ConfigTreeDiffBlacklistedDirs = "TreeDiff.BlacklistedDirs"
  34. // ConfigTreeDiffLanguages is the name of the configuration option (TreeDiff.Configure())
  35. // which sets the list of programming languages to analyze. Language names are at
  36. // https://doc.bblf.sh/languages.html Names are joined with a comma ",".
  37. // "all" is the special name which disables this filter.
  38. ConfigTreeDiffLanguages = "TreeDiff.Languages"
  39. // allLanguages denotes passing all files in.
  40. allLanguages = "all"
  41. )
  42. var defaultBlacklistedDirs = []string{"vendor/", "vendors/", "node_modules/"}
  43. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  44. func (treediff *TreeDiff) Name() string {
  45. return "TreeDiff"
  46. }
  47. // Provides returns the list of names of entities which are produced by this PipelineItem.
  48. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  49. // to this list. Also used by core.Registry to build the global map of providers.
  50. func (treediff *TreeDiff) Provides() []string {
  51. arr := [...]string{DependencyTreeChanges}
  52. return arr[:]
  53. }
  54. // Requires returns the list of names of entities which are needed by this PipelineItem.
  55. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  56. // entities are Provides() upstream.
  57. func (treediff *TreeDiff) Requires() []string {
  58. return []string{}
  59. }
  60. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  61. func (treediff *TreeDiff) ListConfigurationOptions() []core.ConfigurationOption {
  62. options := [...]core.ConfigurationOption{{
  63. Name: ConfigTreeDiffEnableBlacklist,
  64. Description: "Skip blacklisted directories.",
  65. Flag: "skip-blacklist",
  66. Type: core.BoolConfigurationOption,
  67. Default: false}, {
  68. Name: ConfigTreeDiffBlacklistedDirs,
  69. Description: "List of blacklisted directories. Separated by comma \",\".",
  70. Flag: "blacklisted-dirs",
  71. Type: core.StringsConfigurationOption,
  72. Default: defaultBlacklistedDirs}, {
  73. Name: ConfigTreeDiffLanguages,
  74. Description: fmt.Sprintf(
  75. "List of programming languages to analyze. Separated by comma \",\". " +
  76. "Names are at https://doc.bblf.sh/languages.html \"%s\" is the special name " +
  77. "which disables this filter and lets all the files through.", allLanguages),
  78. Flag: "languages",
  79. Type: core.StringsConfigurationOption,
  80. Default: []string{allLanguages}},
  81. }
  82. return options[:]
  83. }
  84. // Configure sets the properties previously published by ListConfigurationOptions().
  85. func (treediff *TreeDiff) Configure(facts map[string]interface{}) {
  86. if val, exist := facts[ConfigTreeDiffEnableBlacklist]; exist && val.(bool) {
  87. treediff.SkipDirs = facts[ConfigTreeDiffBlacklistedDirs].([]string)
  88. }
  89. if val, exists := facts[ConfigTreeDiffLanguages].(string); exists {
  90. treediff.Languages = map[string]bool{}
  91. for _, lang := range strings.Split(val, ",") {
  92. treediff.Languages[strings.TrimSpace(lang)] = true
  93. }
  94. } else if treediff.Languages == nil {
  95. treediff.Languages = map[string]bool{}
  96. treediff.Languages[allLanguages] = true
  97. }
  98. }
  99. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  100. // calls. The repository which is going to be analysed is supplied as an argument.
  101. func (treediff *TreeDiff) Initialize(repository *git.Repository) {
  102. treediff.previousTree = nil
  103. treediff.repository = repository
  104. if treediff.Languages == nil {
  105. treediff.Languages = map[string]bool{}
  106. treediff.Languages[allLanguages] = true
  107. }
  108. }
  109. // Consume runs this PipelineItem on the next commit data.
  110. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  111. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  112. // This function returns the mapping with analysis results. The keys must be the same as
  113. // in Provides(). If there was an error, nil is returned.
  114. func (treediff *TreeDiff) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  115. commit := deps[core.DependencyCommit].(*object.Commit)
  116. pass := false
  117. for _, hash := range commit.ParentHashes {
  118. if hash == treediff.previousCommit {
  119. pass = true
  120. }
  121. }
  122. if !pass && treediff.previousCommit != plumbing.ZeroHash {
  123. log.Panicf("%s > %s", treediff.previousCommit.String(), commit.Hash.String())
  124. }
  125. tree, err := commit.Tree()
  126. if err != nil {
  127. return nil, err
  128. }
  129. var diff object.Changes
  130. if treediff.previousTree != nil {
  131. diff, err = object.DiffTree(treediff.previousTree, tree)
  132. if err != nil {
  133. return nil, err
  134. }
  135. } else {
  136. diff = []*object.Change{}
  137. err = func() error {
  138. fileIter := tree.Files()
  139. defer fileIter.Close()
  140. for {
  141. file, err := fileIter.Next()
  142. if err != nil {
  143. if err == io.EOF {
  144. break
  145. }
  146. return err
  147. }
  148. pass, err := treediff.checkLanguage(file.Name, file.Hash)
  149. if err != nil {
  150. return err
  151. }
  152. if !pass {
  153. continue
  154. }
  155. diff = append(diff, &object.Change{
  156. To: object.ChangeEntry{Name: file.Name, Tree: tree, TreeEntry: object.TreeEntry{
  157. Name: file.Name, Mode: file.Mode, Hash: file.Hash}}})
  158. }
  159. return nil
  160. }()
  161. if err != nil {
  162. return nil, err
  163. }
  164. }
  165. treediff.previousTree = tree
  166. treediff.previousCommit = commit.Hash
  167. // filter without allocation
  168. filteredDiff := make([]*object.Change, 0, len(diff))
  169. OUTER:
  170. for _, change := range diff {
  171. for _, dir := range treediff.SkipDirs {
  172. if strings.HasPrefix(change.To.Name, dir) || strings.HasPrefix(change.From.Name, dir) {
  173. continue OUTER
  174. }
  175. }
  176. var changeEntry object.ChangeEntry
  177. if change.To.Tree == nil {
  178. changeEntry = change.From
  179. } else {
  180. changeEntry = change.To
  181. }
  182. pass, _ := treediff.checkLanguage(changeEntry.Name, changeEntry.TreeEntry.Hash)
  183. if !pass {
  184. continue
  185. }
  186. filteredDiff = append(filteredDiff, change)
  187. }
  188. diff = filteredDiff
  189. return map[string]interface{}{DependencyTreeChanges: diff}, nil
  190. }
  191. // Fork clones this PipelineItem.
  192. func (treediff *TreeDiff) Fork(n int) []core.PipelineItem {
  193. return core.ForkCopyPipelineItem(treediff, n)
  194. }
  195. // checkLanguage returns whether the blob corresponds to the list of required languages.
  196. func (treediff *TreeDiff) checkLanguage(name string, blobHash plumbing.Hash) (bool, error) {
  197. if treediff.Languages[allLanguages] {
  198. return true, nil
  199. }
  200. blob, err := treediff.repository.BlobObject(blobHash)
  201. if err != nil {
  202. return false, err
  203. }
  204. reader, err := blob.Reader()
  205. if err != nil {
  206. return false, err
  207. }
  208. buffer := make([]byte, 1024)
  209. _, err = reader.Read(buffer)
  210. if err != nil {
  211. return false, err
  212. }
  213. lang := enry.GetLanguage(name, buffer)
  214. return treediff.Languages[lang], nil
  215. }
  216. func init() {
  217. core.Registry.Register(&TreeDiff{})
  218. }