tree_diff.go 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. package plumbing
  2. import (
  3. "fmt"
  4. "io"
  5. "path"
  6. "regexp"
  7. "strings"
  8. "gopkg.in/src-d/enry.v1"
  9. "gopkg.in/src-d/go-git.v4"
  10. "gopkg.in/src-d/go-git.v4/plumbing"
  11. "gopkg.in/src-d/go-git.v4/plumbing/object"
  12. "gopkg.in/src-d/hercules.v10/internal/core"
  13. )
  14. // TreeDiff generates the list of changes for a commit. A change can be either one or two blobs
  15. // under the same path: "before" and "after". If "before" is nil, the change is an addition.
  16. // If "after" is nil, the change is a removal. Otherwise, it is a modification.
  17. // TreeDiff is a PipelineItem.
  18. type TreeDiff struct {
  19. core.NoopMerger
  20. SkipFiles []string
  21. NameFilter *regexp.Regexp
  22. // Languages is the set of allowed languages. The values must be lower case. The default
  23. // (empty) set disables the language filter.
  24. Languages map[string]bool
  25. previousTree *object.Tree
  26. previousCommit plumbing.Hash
  27. repository *git.Repository
  28. l core.Logger
  29. }
  30. const (
  31. // DependencyTreeChanges is the name of the dependency provided by TreeDiff.
  32. DependencyTreeChanges = "changes"
  33. // ConfigTreeDiffEnableBlacklist is the name of the configuration option
  34. // (TreeDiff.Configure()) which allows to skip blacklisted directories.
  35. ConfigTreeDiffEnableBlacklist = "TreeDiff.EnableBlacklist"
  36. // ConfigTreeDiffBlacklistedPrefixes s the name of the configuration option
  37. // (TreeDiff.Configure()) which allows to set blacklisted path prefixes -
  38. // directories or complete file names.
  39. ConfigTreeDiffBlacklistedPrefixes = "TreeDiff.BlacklistedPrefixes"
  40. // ConfigTreeDiffLanguages is the name of the configuration option (TreeDiff.Configure())
  41. // which sets the list of programming languages to analyze. Language names are at
  42. // https://doc.bblf.sh/languages.html Names are joined with a comma ",".
  43. // "all" is the special name which disables this filter.
  44. ConfigTreeDiffLanguages = "TreeDiff.LanguagesDetection"
  45. // allLanguages denotes passing all files in.
  46. allLanguages = "all"
  47. // ConfigTreeDiffFilterRegexp is the name of the configuration option
  48. // (TreeDiff.Configure()) which makes FileDiff consider only those files which have names matching this regexp.
  49. ConfigTreeDiffFilterRegexp = "TreeDiff.FilteredRegexes"
  50. )
  51. // defaultBlacklistedPrefixes is the list of file path prefixes which should be skipped by default.
  52. var defaultBlacklistedPrefixes = []string{
  53. "vendor/",
  54. "vendors/",
  55. "package-lock.json",
  56. "Gopkg.lock",
  57. }
  58. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  59. func (treediff *TreeDiff) Name() string {
  60. return "TreeDiff"
  61. }
  62. // Provides returns the list of names of entities which are produced by this PipelineItem.
  63. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  64. // to this list. Also used by core.Registry to build the global map of providers.
  65. func (treediff *TreeDiff) Provides() []string {
  66. arr := [...]string{DependencyTreeChanges}
  67. return arr[:]
  68. }
  69. // Requires returns the list of names of entities which are needed by this PipelineItem.
  70. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  71. // entities are Provides() upstream.
  72. func (treediff *TreeDiff) Requires() []string {
  73. return []string{}
  74. }
  75. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  76. func (treediff *TreeDiff) ListConfigurationOptions() []core.ConfigurationOption {
  77. options := [...]core.ConfigurationOption{{
  78. Name: ConfigTreeDiffEnableBlacklist,
  79. Description: "Skip blacklisted directories and vendored files (according to " +
  80. "src-d/enry.IsVendor).",
  81. Flag: "skip-blacklist",
  82. Type: core.BoolConfigurationOption,
  83. Default: false}, {
  84. Name: ConfigTreeDiffBlacklistedPrefixes,
  85. Description: "List of blacklisted path prefixes (e.g. directories or specific files). " +
  86. "Values are in the UNIX format (\"path/to/x\"). Values should *not* start with \"/\". " +
  87. "Separated with commas \",\".",
  88. Flag: "blacklisted-prefixes",
  89. Type: core.StringsConfigurationOption,
  90. Default: defaultBlacklistedPrefixes}, {
  91. Name: ConfigTreeDiffLanguages,
  92. Description: fmt.Sprintf(
  93. "List of programming languages to analyze. Separated by comma \",\". "+
  94. "The names are the keys in https://github.com/github/linguist/blob/master/lib/linguist/languages.yml "+
  95. "\"%s\" is the special name which disables this filter and lets all the files through.",
  96. allLanguages),
  97. Flag: "languages",
  98. Type: core.StringsConfigurationOption,
  99. Default: []string{allLanguages}}, {
  100. Name: ConfigTreeDiffFilterRegexp,
  101. Description: "Whitelist regexp to determine which files to analyze.",
  102. Flag: "whitelist",
  103. Type: core.StringConfigurationOption,
  104. Default: ""},
  105. }
  106. return options[:]
  107. }
  108. // Configure sets the properties previously published by ListConfigurationOptions().
  109. func (treediff *TreeDiff) Configure(facts map[string]interface{}) error {
  110. if l, exists := facts[core.ConfigLogger].(core.Logger); exists {
  111. treediff.l = l
  112. }
  113. if val, exists := facts[ConfigTreeDiffEnableBlacklist].(bool); exists && val {
  114. treediff.SkipFiles = facts[ConfigTreeDiffBlacklistedPrefixes].([]string)
  115. }
  116. if val, exists := facts[ConfigTreeDiffLanguages].([]string); exists {
  117. treediff.Languages = map[string]bool{}
  118. for _, lang := range val {
  119. treediff.Languages[strings.ToLower(strings.TrimSpace(lang))] = true
  120. }
  121. } else if treediff.Languages == nil {
  122. treediff.Languages = map[string]bool{}
  123. treediff.Languages[allLanguages] = true
  124. }
  125. if val, exists := facts[ConfigTreeDiffFilterRegexp].(string); exists {
  126. treediff.NameFilter = regexp.MustCompile(val)
  127. }
  128. return nil
  129. }
  130. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  131. // calls. The repository which is going to be analysed is supplied as an argument.
  132. func (treediff *TreeDiff) Initialize(repository *git.Repository) error {
  133. treediff.l = core.NewLogger()
  134. treediff.previousTree = nil
  135. treediff.repository = repository
  136. if treediff.Languages == nil {
  137. treediff.Languages = map[string]bool{}
  138. treediff.Languages[allLanguages] = true
  139. }
  140. return nil
  141. }
  142. // Consume runs this PipelineItem on the next commit data.
  143. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  144. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  145. // This function returns the mapping with analysis results. The keys must be the same as
  146. // in Provides(). If there was an error, nil is returned.
  147. func (treediff *TreeDiff) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  148. commit := deps[core.DependencyCommit].(*object.Commit)
  149. pass := false
  150. for _, hash := range commit.ParentHashes {
  151. if hash == treediff.previousCommit {
  152. pass = true
  153. }
  154. }
  155. if !pass && treediff.previousCommit != plumbing.ZeroHash {
  156. err := fmt.Errorf("%s > %s", treediff.previousCommit.String(), commit.Hash.String())
  157. treediff.l.Critical(err)
  158. return nil, err
  159. }
  160. tree, err := commit.Tree()
  161. if err != nil {
  162. return nil, err
  163. }
  164. var diffs object.Changes
  165. if treediff.previousTree != nil {
  166. diffs, err = object.DiffTree(treediff.previousTree, tree)
  167. if err != nil {
  168. return nil, err
  169. }
  170. } else {
  171. diffs = []*object.Change{}
  172. err = func() error {
  173. fileIter := tree.Files()
  174. defer fileIter.Close()
  175. for {
  176. file, err := fileIter.Next()
  177. if err != nil {
  178. if err == io.EOF {
  179. break
  180. }
  181. return err
  182. }
  183. pass, err := treediff.checkLanguage(file.Name, file.Hash)
  184. if err != nil {
  185. return err
  186. }
  187. if !pass {
  188. continue
  189. }
  190. diffs = append(diffs, &object.Change{
  191. To: object.ChangeEntry{Name: file.Name, Tree: tree, TreeEntry: object.TreeEntry{
  192. Name: file.Name, Mode: file.Mode, Hash: file.Hash}}})
  193. }
  194. return nil
  195. }()
  196. if err != nil {
  197. return nil, err
  198. }
  199. }
  200. treediff.previousTree = tree
  201. treediff.previousCommit = commit.Hash
  202. diffs = treediff.filterDiffs(diffs)
  203. return map[string]interface{}{DependencyTreeChanges: diffs}, nil
  204. }
  205. func (treediff *TreeDiff) filterDiffs(diffs object.Changes) object.Changes {
  206. // filter without allocation
  207. filteredDiffs := make(object.Changes, 0, len(diffs))
  208. OUTER:
  209. for _, change := range diffs {
  210. if len(treediff.SkipFiles) > 0 && (enry.IsVendor(change.To.Name) || enry.IsVendor(change.From.Name)) {
  211. continue
  212. }
  213. for _, dir := range treediff.SkipFiles {
  214. if strings.HasPrefix(change.To.Name, dir) || strings.HasPrefix(change.From.Name, dir) {
  215. continue OUTER
  216. }
  217. }
  218. if treediff.NameFilter != nil {
  219. matchedTo := treediff.NameFilter.MatchString(change.To.Name)
  220. matchedFrom := treediff.NameFilter.MatchString(change.From.Name)
  221. if !matchedTo && !matchedFrom {
  222. continue
  223. }
  224. }
  225. var changeEntry object.ChangeEntry
  226. if change.To.Tree == nil {
  227. changeEntry = change.From
  228. } else {
  229. changeEntry = change.To
  230. }
  231. if pass, _ := treediff.checkLanguage(changeEntry.Name, changeEntry.TreeEntry.Hash); !pass {
  232. continue
  233. }
  234. filteredDiffs = append(filteredDiffs, change)
  235. }
  236. return filteredDiffs
  237. }
  238. // Fork clones this PipelineItem.
  239. func (treediff *TreeDiff) Fork(n int) []core.PipelineItem {
  240. return core.ForkCopyPipelineItem(treediff, n)
  241. }
  242. // checkLanguage returns whether the blob corresponds to the list of required languages.
  243. func (treediff *TreeDiff) checkLanguage(name string, blobHash plumbing.Hash) (bool, error) {
  244. if treediff.Languages[allLanguages] {
  245. return true, nil
  246. }
  247. blob, err := treediff.repository.BlobObject(blobHash)
  248. if err != nil {
  249. return false, err
  250. }
  251. reader, err := blob.Reader()
  252. if err != nil {
  253. return false, err
  254. }
  255. buffer := make([]byte, 1024)
  256. n, err := reader.Read(buffer)
  257. if err != nil && (blob.Size != 0 || err != io.EOF) {
  258. return false, err
  259. }
  260. if n < len(buffer) {
  261. buffer = buffer[:n]
  262. }
  263. lang := strings.ToLower(enry.GetLanguage(path.Base(name), buffer))
  264. return treediff.Languages[lang], nil
  265. }
  266. func init() {
  267. core.Registry.Register(&TreeDiff{})
  268. }