tree_diff.go 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. package plumbing
  2. import (
  3. "fmt"
  4. "gopkg.in/src-d/enry.v1"
  5. "io"
  6. "log"
  7. "regexp"
  8. "strings"
  9. "gopkg.in/src-d/go-git.v4"
  10. "gopkg.in/src-d/go-git.v4/plumbing/object"
  11. "gopkg.in/src-d/hercules.v5/internal/core"
  12. "gopkg.in/src-d/go-git.v4/plumbing"
  13. )
  14. // TreeDiff generates the list of changes for a commit. A change can be either one or two blobs
  15. // under the same path: "before" and "after". If "before" is nil, the change is an addition.
  16. // If "after" is nil, the change is a removal. Otherwise, it is a modification.
  17. // TreeDiff is a PipelineItem.
  18. type TreeDiff struct {
  19. core.NoopMerger
  20. SkipDirs []string
  21. NameFilter *regexp.Regexp
  22. Languages map[string]bool
  23. previousTree *object.Tree
  24. previousCommit plumbing.Hash
  25. repository *git.Repository
  26. }
  27. const (
  28. // DependencyTreeChanges is the name of the dependency provided by TreeDiff.
  29. DependencyTreeChanges = "changes"
  30. // ConfigTreeDiffEnableBlacklist is the name of the configuration option
  31. // (TreeDiff.Configure()) which allows to skip blacklisted directories.
  32. ConfigTreeDiffEnableBlacklist = "TreeDiff.EnableBlacklist"
  33. // ConfigTreeDiffBlacklistedPrefixes s the name of the configuration option
  34. // (TreeDiff.Configure()) which allows to set blacklisted path prefixes -
  35. // directories or complete file names.
  36. ConfigTreeDiffBlacklistedPrefixes = "TreeDiff.BlacklistedPrefixes"
  37. // ConfigTreeDiffLanguages is the name of the configuration option (TreeDiff.Configure())
  38. // which sets the list of programming languages to analyze. Language names are at
  39. // https://doc.bblf.sh/languages.html Names are joined with a comma ",".
  40. // "all" is the special name which disables this filter.
  41. ConfigTreeDiffLanguages = "TreeDiff.Languages"
  42. // allLanguages denotes passing all files in.
  43. allLanguages = "all"
  44. // ConfigTreeDiffFilterRegex is the name of the configuration option
  45. // (TreeDiff.Configure()) which makes FileDiff consider only those files which have names matching this regexp.
  46. ConfigTreeDiffFilterRegex = "TreeDiff.FilteredRegexes"
  47. )
  48. // defaultBlacklistedPrefixes is the list of file path prefixes which should be skipped by default.
  49. var defaultBlacklistedPrefixes = []string{
  50. "vendor/",
  51. "vendors/",
  52. "node_modules/",
  53. "package-lock.json",
  54. }
  55. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  56. func (treediff *TreeDiff) Name() string {
  57. return "TreeDiff"
  58. }
  59. // Provides returns the list of names of entities which are produced by this PipelineItem.
  60. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  61. // to this list. Also used by core.Registry to build the global map of providers.
  62. func (treediff *TreeDiff) Provides() []string {
  63. arr := [...]string{DependencyTreeChanges}
  64. return arr[:]
  65. }
  66. // Requires returns the list of names of entities which are needed by this PipelineItem.
  67. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  68. // entities are Provides() upstream.
  69. func (treediff *TreeDiff) Requires() []string {
  70. return []string{}
  71. }
  72. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  73. func (treediff *TreeDiff) ListConfigurationOptions() []core.ConfigurationOption {
  74. options := [...]core.ConfigurationOption{{
  75. Name: ConfigTreeDiffEnableBlacklist,
  76. Description: "Skip blacklisted directories.",
  77. Flag: "skip-blacklist",
  78. Type: core.BoolConfigurationOption,
  79. Default: false}, {
  80. Name: ConfigTreeDiffBlacklistedPrefixes,
  81. Description: "List of blacklisted path prefixes (e.g. directories or specific files). " +
  82. "Values are in the UNIX format (\"path/to/x\"). Values should *not* start with \"/\". " +
  83. "Separated with commas \",\".",
  84. Flag: "blacklisted-prefixes",
  85. Type: core.StringsConfigurationOption,
  86. Default: defaultBlacklistedPrefixes}, {
  87. Name: ConfigTreeDiffLanguages,
  88. Description: fmt.Sprintf(
  89. "List of programming languages to analyze. Separated by comma \",\". " +
  90. "Names are at https://doc.bblf.sh/languages.html \"%s\" is the special name " +
  91. "which disables this filter and lets all the files through.", allLanguages),
  92. Flag: "languages",
  93. Type: core.StringsConfigurationOption,
  94. Default: []string{allLanguages} }, {
  95. Name: ConfigTreeDiffFilterRegex,
  96. Description: "Whitelist Regex to determine which files to analyze",
  97. Flag: "whitelist",
  98. Type: core.StringConfigurationOption,
  99. Default: ""},
  100. }
  101. return options[:]
  102. }
  103. // Configure sets the properties previously published by ListConfigurationOptions().
  104. func (treediff *TreeDiff) Configure(facts map[string]interface{}) {
  105. if val, exist := facts[ConfigTreeDiffEnableBlacklist]; exist && val.(bool) {
  106. treediff.SkipDirs = facts[ConfigTreeDiffBlacklistedPrefixes].([]string)
  107. }
  108. if val, exists := facts[ConfigTreeDiffLanguages].(string); exists {
  109. treediff.Languages = map[string]bool{}
  110. for _, lang := range strings.Split(val, ",") {
  111. treediff.Languages[strings.TrimSpace(lang)] = true
  112. }
  113. } else if treediff.Languages == nil {
  114. treediff.Languages = map[string]bool{}
  115. treediff.Languages[allLanguages] = true
  116. }
  117. if val, exists := facts[ConfigTreeDiffFilterRegex].(string); exists {
  118. treediff.NameFilter = regexp.MustCompile(val)
  119. }
  120. }
  121. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  122. // calls. The repository which is going to be analysed is supplied as an argument.
  123. func (treediff *TreeDiff) Initialize(repository *git.Repository) {
  124. treediff.previousTree = nil
  125. treediff.repository = repository
  126. if treediff.Languages == nil {
  127. treediff.Languages = map[string]bool{}
  128. treediff.Languages[allLanguages] = true
  129. }
  130. }
  131. // Consume runs this PipelineItem on the next commit data.
  132. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  133. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  134. // This function returns the mapping with analysis results. The keys must be the same as
  135. // in Provides(). If there was an error, nil is returned.
  136. func (treediff *TreeDiff) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  137. commit := deps[core.DependencyCommit].(*object.Commit)
  138. pass := false
  139. for _, hash := range commit.ParentHashes {
  140. if hash == treediff.previousCommit {
  141. pass = true
  142. }
  143. }
  144. if !pass && treediff.previousCommit != plumbing.ZeroHash {
  145. log.Panicf("%s > %s", treediff.previousCommit.String(), commit.Hash.String())
  146. }
  147. tree, err := commit.Tree()
  148. if err != nil {
  149. return nil, err
  150. }
  151. var diff object.Changes
  152. if treediff.previousTree != nil {
  153. diff, err = object.DiffTree(treediff.previousTree, tree)
  154. if err != nil {
  155. return nil, err
  156. }
  157. } else {
  158. diff = []*object.Change{}
  159. err = func() error {
  160. fileIter := tree.Files()
  161. defer fileIter.Close()
  162. for {
  163. file, err := fileIter.Next()
  164. if err != nil {
  165. if err == io.EOF {
  166. break
  167. }
  168. return err
  169. }
  170. pass, err := treediff.checkLanguage(file.Name, file.Hash)
  171. if err != nil {
  172. return err
  173. }
  174. if !pass {
  175. continue
  176. }
  177. diff = append(diff, &object.Change{
  178. To: object.ChangeEntry{Name: file.Name, Tree: tree, TreeEntry: object.TreeEntry{
  179. Name: file.Name, Mode: file.Mode, Hash: file.Hash}}})
  180. }
  181. return nil
  182. }()
  183. if err != nil {
  184. return nil, err
  185. }
  186. }
  187. treediff.previousTree = tree
  188. treediff.previousCommit = commit.Hash
  189. // filter without allocation
  190. filteredDiff := make([]*object.Change, 0, len(diff))
  191. OUTER:
  192. for _, change := range diff {
  193. for _, dir := range treediff.SkipDirs {
  194. if strings.HasPrefix(change.To.Name, dir) || strings.HasPrefix(change.From.Name, dir) {
  195. continue OUTER
  196. }
  197. }
  198. if treediff.NameFilter != nil {
  199. matchedTo := treediff.NameFilter.MatchString(change.To.Name)
  200. matchedFrom := treediff.NameFilter.MatchString(change.From.Name)
  201. if !matchedTo && !matchedFrom {
  202. continue OUTER
  203. }
  204. }
  205. var changeEntry object.ChangeEntry
  206. if change.To.Tree == nil {
  207. changeEntry = change.From
  208. } else {
  209. changeEntry = change.To
  210. }
  211. pass, _ := treediff.checkLanguage(changeEntry.Name, changeEntry.TreeEntry.Hash)
  212. if !pass {
  213. continue
  214. }
  215. filteredDiff = append(filteredDiff, change)
  216. }
  217. diff = filteredDiff
  218. return map[string]interface{}{DependencyTreeChanges: diff}, nil
  219. }
  220. // Fork clones this PipelineItem.
  221. func (treediff *TreeDiff) Fork(n int) []core.PipelineItem {
  222. return core.ForkCopyPipelineItem(treediff, n)
  223. }
  224. // checkLanguage returns whether the blob corresponds to the list of required languages.
  225. func (treediff *TreeDiff) checkLanguage(name string, blobHash plumbing.Hash) (bool, error) {
  226. if treediff.Languages[allLanguages] {
  227. return true, nil
  228. }
  229. blob, err := treediff.repository.BlobObject(blobHash)
  230. if err != nil {
  231. return false, err
  232. }
  233. reader, err := blob.Reader()
  234. if err != nil {
  235. return false, err
  236. }
  237. buffer := make([]byte, 1024)
  238. _, err = reader.Read(buffer)
  239. if err != nil {
  240. return false, err
  241. }
  242. lang := enry.GetLanguage(name, buffer)
  243. return treediff.Languages[lang], nil
  244. }
  245. func init() {
  246. core.Registry.Register(&TreeDiff{})
  247. }