extractor.go 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. package imports
  2. import (
  3. "runtime"
  4. "sync"
  5. "github.com/src-d/imports"
  6. _ "github.com/src-d/imports/languages/all" // register the supported languages
  7. "gopkg.in/src-d/go-git.v4"
  8. gitplumbing "gopkg.in/src-d/go-git.v4/plumbing"
  9. "gopkg.in/src-d/go-git.v4/plumbing/object"
  10. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  11. "gopkg.in/src-d/hercules.v10/internal/core"
  12. "gopkg.in/src-d/hercules.v10/internal/plumbing"
  13. )
  14. // Extractor reports the imports in the changed files.
  15. type Extractor struct {
  16. core.NoopMerger
  17. // Goroutines is the number of goroutines to run for imports extraction.
  18. Goroutines int
  19. // MaxFileSize is the file size threshold. Files that exceed it are ignored.
  20. MaxFileSize int
  21. l core.Logger
  22. }
  23. const (
  24. // DependencyImports is the name of the dependency provided by Extractor.
  25. DependencyImports = "imports"
  26. // ConfigImportsGoroutines is the name of the configuration option for
  27. // Extractor.Configure() to set the number of parallel goroutines for imports extraction.
  28. ConfigImportsGoroutines = "Imports.Goroutines"
  29. // ConfigMaxFileSize is the name of the configuration option for
  30. // Extractor.Configure() to set the file size threshold after which they are ignored.
  31. ConfigMaxFileSize = "Imports.MaxFileSize"
  32. // DefaultMaxFileSize is the default value for Extractor.MaxFileSize.
  33. DefaultMaxFileSize = 1 << 20
  34. )
  35. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  36. func (ex *Extractor) Name() string {
  37. return "Imports"
  38. }
  39. // Provides returns the list of names of entities which are produced by this PipelineItem.
  40. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  41. // to this list. Also used by core.Registry to build the global map of providers.
  42. func (ex *Extractor) Provides() []string {
  43. return []string{DependencyImports}
  44. }
  45. // Requires returns the list of names of entities which are needed by this PipelineItem.
  46. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  47. // entities are Provides() upstream.
  48. func (ex *Extractor) Requires() []string {
  49. return []string{plumbing.DependencyTreeChanges, plumbing.DependencyBlobCache}
  50. }
  51. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  52. func (ex *Extractor) ListConfigurationOptions() []core.ConfigurationOption {
  53. return []core.ConfigurationOption{{
  54. Name: ConfigImportsGoroutines,
  55. Description: "Specifies the number of goroutines to run in parallel for the imports extraction.",
  56. Flag: "import-goroutines",
  57. Type: core.IntConfigurationOption,
  58. Default: runtime.NumCPU()}, {
  59. Name: ConfigMaxFileSize,
  60. Description: "Specifies the file size threshold. Files that exceed it are ignored.",
  61. Flag: "import-max-file-size",
  62. Type: core.IntConfigurationOption,
  63. Default: DefaultMaxFileSize},
  64. }
  65. }
  66. // Configure sets the properties previously published by ListConfigurationOptions().
  67. func (ex *Extractor) Configure(facts map[string]interface{}) error {
  68. if l, exists := facts[core.ConfigLogger].(core.Logger); exists {
  69. ex.l = l
  70. }
  71. if gr, exists := facts[ConfigImportsGoroutines].(int); exists {
  72. if gr < 1 {
  73. if ex.l != nil {
  74. ex.l.Warnf("invalid number of goroutines for the imports extraction: %d. Set to %d.",
  75. gr, runtime.NumCPU())
  76. }
  77. gr = runtime.NumCPU()
  78. }
  79. ex.Goroutines = gr
  80. }
  81. if size, exists := facts[ConfigMaxFileSize].(int); exists {
  82. if size <= 0 {
  83. if ex.l != nil {
  84. ex.l.Warnf("invalid maximum file size: %d. Set to %d.", size, DefaultMaxFileSize)
  85. }
  86. size = DefaultMaxFileSize
  87. }
  88. ex.MaxFileSize = size
  89. }
  90. return nil
  91. }
  92. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  93. // calls. The repository which is going to be analysed is supplied as an argument.
  94. func (ex *Extractor) Initialize(repository *git.Repository) error {
  95. ex.l = core.NewLogger()
  96. if ex.Goroutines < 1 {
  97. ex.Goroutines = runtime.NumCPU()
  98. }
  99. if ex.MaxFileSize == 0 {
  100. ex.MaxFileSize = DefaultMaxFileSize
  101. }
  102. return nil
  103. }
  104. // Consume runs this PipelineItem on the next commit data.
  105. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  106. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  107. // This function returns the mapping with analysis results. The keys must be the same as
  108. // in Provides(). If there was an error, nil is returned.
  109. func (ex *Extractor) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  110. changes := deps[plumbing.DependencyTreeChanges].(object.Changes)
  111. cache := deps[plumbing.DependencyBlobCache].(map[gitplumbing.Hash]*plumbing.CachedBlob)
  112. result := map[gitplumbing.Hash]imports.File{}
  113. jobs := make(chan *object.Change, ex.Goroutines)
  114. resultSync := sync.Mutex{}
  115. wg := sync.WaitGroup{}
  116. wg.Add(ex.Goroutines)
  117. for i := 0; i < ex.Goroutines; i++ {
  118. go func() {
  119. for change := range jobs {
  120. blob := cache[change.To.TreeEntry.Hash]
  121. if blob.Size > int64(ex.MaxFileSize) {
  122. ex.l.Warnf("skipped %s %s: size is too big: %d > %d",
  123. change.To.TreeEntry.Name, change.To.TreeEntry.Hash.String(),
  124. blob.Size, ex.MaxFileSize)
  125. continue
  126. }
  127. file, err := imports.Extract(change.To.TreeEntry.Name, blob.Data)
  128. if err != nil {
  129. ex.l.Errorf("failed to extract imports from %s %s: %v",
  130. change.To.TreeEntry.Name, change.To.TreeEntry.Hash.String(), err)
  131. } else {
  132. resultSync.Lock()
  133. result[change.To.TreeEntry.Hash] = *file
  134. resultSync.Unlock()
  135. }
  136. }
  137. wg.Done()
  138. }()
  139. }
  140. for _, change := range changes {
  141. action, err := change.Action()
  142. if err != nil {
  143. return nil, err
  144. }
  145. switch action {
  146. case merkletrie.Modify, merkletrie.Insert:
  147. jobs <- change
  148. case merkletrie.Delete:
  149. continue
  150. }
  151. }
  152. close(jobs)
  153. wg.Wait()
  154. return map[string]interface{}{DependencyImports: result}, nil
  155. }
  156. // Fork clones this PipelineItem.
  157. func (ex *Extractor) Fork(n int) []core.PipelineItem {
  158. return core.ForkSamePipelineItem(ex, n)
  159. }
  160. func init() {
  161. core.Registry.Register(&Extractor{})
  162. }