blob_cache.go 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. package plumbing
  2. import (
  3. "log"
  4. "gopkg.in/src-d/go-git.v4"
  5. "gopkg.in/src-d/go-git.v4/config"
  6. "gopkg.in/src-d/go-git.v4/plumbing"
  7. "gopkg.in/src-d/go-git.v4/plumbing/object"
  8. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  9. "gopkg.in/src-d/hercules.v4/internal"
  10. "gopkg.in/src-d/hercules.v4/internal/core"
  11. )
  12. // BlobCache loads the blobs which correspond to the changed files in a commit.
  13. // It is a PipelineItem.
  14. // It must provide the old and the new objects; "blobCache" rotates and allows to not load
  15. // the same blobs twice. Outdated objects are removed so "blobCache" never grows big.
  16. type BlobCache struct {
  17. core.NoopMerger
  18. // Specifies how to handle the situation when we encounter a git submodule - an object
  19. // without the blob. If true, we look inside .gitmodules and if we don't find it,
  20. // raise an error. If false, we do not look inside .gitmodules and always succeed.
  21. FailOnMissingSubmodules bool
  22. repository *git.Repository
  23. cache map[plumbing.Hash]*object.Blob
  24. }
  25. const (
  26. // ConfigBlobCacheFailOnMissingSubmodules is the name of the configuration option for
  27. // BlobCache.Configure() to check if the referenced submodules are registered in .gitignore.
  28. ConfigBlobCacheFailOnMissingSubmodules = "BlobCache.FailOnMissingSubmodules"
  29. // DependencyBlobCache identifies the dependency provided by BlobCache.
  30. DependencyBlobCache = "blob_cache"
  31. )
  32. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  33. func (blobCache *BlobCache) Name() string {
  34. return "BlobCache"
  35. }
  36. // Provides returns the list of names of entities which are produced by this PipelineItem.
  37. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  38. // to this list. Also used by core.Registry to build the global map of providers.
  39. func (blobCache *BlobCache) Provides() []string {
  40. arr := [...]string{DependencyBlobCache}
  41. return arr[:]
  42. }
  43. // Requires returns the list of names of entities which are needed by this PipelineItem.
  44. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  45. // entities are Provides() upstream.
  46. func (blobCache *BlobCache) Requires() []string {
  47. arr := [...]string{DependencyTreeChanges}
  48. return arr[:]
  49. }
  50. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  51. func (blobCache *BlobCache) ListConfigurationOptions() []core.ConfigurationOption {
  52. options := [...]core.ConfigurationOption{{
  53. Name: ConfigBlobCacheFailOnMissingSubmodules,
  54. Description: "Specifies whether to panic if any referenced submodule does " +
  55. "not exist in .gitmodules and thus the corresponding Git object cannot be loaded. " +
  56. "Override this if you want to ensure that your repository is integral. ",
  57. Flag: "fail-on-missing-submodules",
  58. Type: core.BoolConfigurationOption,
  59. Default: false}}
  60. return options[:]
  61. }
  62. // Configure sets the properties previously published by ListConfigurationOptions().
  63. func (blobCache *BlobCache) Configure(facts map[string]interface{}) {
  64. if val, exists := facts[ConfigBlobCacheFailOnMissingSubmodules].(bool); exists {
  65. blobCache.FailOnMissingSubmodules = val
  66. }
  67. }
  68. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  69. // calls. The repository which is going to be analysed is supplied as an argument.
  70. func (blobCache *BlobCache) Initialize(repository *git.Repository) {
  71. blobCache.repository = repository
  72. blobCache.cache = map[plumbing.Hash]*object.Blob{}
  73. }
  74. // Consume runs this PipelineItem on the next commit data.
  75. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  76. // Additionally, DependencyCommit is always present there and represents
  77. // the analysed *object.Commit. This function returns the mapping with analysis
  78. // results. The keys must be the same as in Provides(). If there was an error,
  79. // nil is returned.
  80. func (blobCache *BlobCache) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  81. commit := deps[core.DependencyCommit].(*object.Commit)
  82. changes := deps[DependencyTreeChanges].(object.Changes)
  83. cache := map[plumbing.Hash]*object.Blob{}
  84. newCache := map[plumbing.Hash]*object.Blob{}
  85. for _, change := range changes {
  86. action, err := change.Action()
  87. if err != nil {
  88. log.Printf("no action in %s\n", change.To.TreeEntry.Hash)
  89. return nil, err
  90. }
  91. var exists bool
  92. var blob *object.Blob
  93. switch action {
  94. case merkletrie.Insert:
  95. blob, err = blobCache.getBlob(&change.To, commit.File)
  96. if err != nil {
  97. log.Printf("file to %s %s\n",
  98. change.To.Name, change.To.TreeEntry.Hash)
  99. } else {
  100. cache[change.To.TreeEntry.Hash] = blob
  101. newCache[change.To.TreeEntry.Hash] = blob
  102. }
  103. case merkletrie.Delete:
  104. cache[change.From.TreeEntry.Hash], exists =
  105. blobCache.cache[change.From.TreeEntry.Hash]
  106. if !exists {
  107. cache[change.From.TreeEntry.Hash], err =
  108. blobCache.getBlob(&change.From, commit.File)
  109. if err != nil {
  110. if err.Error() != plumbing.ErrObjectNotFound.Error() {
  111. log.Printf("file from %s %s\n", change.From.Name,
  112. change.From.TreeEntry.Hash)
  113. } else {
  114. cache[change.From.TreeEntry.Hash], err =
  115. internal.CreateDummyBlob(change.From.TreeEntry.Hash)
  116. }
  117. }
  118. }
  119. case merkletrie.Modify:
  120. blob, err = blobCache.getBlob(&change.To, commit.File)
  121. if err != nil {
  122. log.Printf("file to %s\n", change.To.Name)
  123. } else {
  124. cache[change.To.TreeEntry.Hash] = blob
  125. newCache[change.To.TreeEntry.Hash] = blob
  126. }
  127. cache[change.From.TreeEntry.Hash], exists =
  128. blobCache.cache[change.From.TreeEntry.Hash]
  129. if !exists {
  130. cache[change.From.TreeEntry.Hash], err =
  131. blobCache.getBlob(&change.From, commit.File)
  132. if err != nil {
  133. log.Printf("file from %s\n", change.From.Name)
  134. }
  135. }
  136. }
  137. if err != nil {
  138. return nil, err
  139. }
  140. }
  141. blobCache.cache = newCache
  142. return map[string]interface{}{DependencyBlobCache: cache}, nil
  143. }
  144. func (blobCache *BlobCache) Fork(n int) []core.PipelineItem {
  145. caches := make([]core.PipelineItem, n)
  146. for i := 0; i < n; i++ {
  147. cache := map[plumbing.Hash]*object.Blob{}
  148. for k, v := range blobCache.cache {
  149. cache[k] = v
  150. }
  151. caches[i] = &BlobCache{
  152. FailOnMissingSubmodules: blobCache.FailOnMissingSubmodules,
  153. repository: blobCache.repository,
  154. cache: cache,
  155. }
  156. }
  157. return caches
  158. }
  159. // FileGetter defines a function which loads the Git file by
  160. // the specified path. The state can be arbitrary though here it always
  161. // corresponds to the currently processed commit.
  162. type FileGetter func(path string) (*object.File, error)
  163. // Returns the blob which corresponds to the specified ChangeEntry.
  164. func (blobCache *BlobCache) getBlob(entry *object.ChangeEntry, fileGetter FileGetter) (
  165. *object.Blob, error) {
  166. blob, err := blobCache.repository.BlobObject(entry.TreeEntry.Hash)
  167. if err != nil {
  168. if err.Error() != plumbing.ErrObjectNotFound.Error() {
  169. log.Printf("getBlob(%s)\n", entry.TreeEntry.Hash.String())
  170. return nil, err
  171. }
  172. if entry.TreeEntry.Mode != 0160000 {
  173. // this is not a submodule
  174. return nil, err
  175. } else if !blobCache.FailOnMissingSubmodules {
  176. return internal.CreateDummyBlob(entry.TreeEntry.Hash)
  177. }
  178. file, errModules := fileGetter(".gitmodules")
  179. if errModules != nil {
  180. return nil, errModules
  181. }
  182. contents, errModules := file.Contents()
  183. if errModules != nil {
  184. return nil, errModules
  185. }
  186. modules := config.NewModules()
  187. errModules = modules.Unmarshal([]byte(contents))
  188. if errModules != nil {
  189. return nil, errModules
  190. }
  191. _, exists := modules.Submodules[entry.Name]
  192. if exists {
  193. // we found that this is a submodule
  194. return internal.CreateDummyBlob(entry.TreeEntry.Hash)
  195. }
  196. return nil, err
  197. }
  198. return blob, nil
  199. }
  200. func init() {
  201. core.Registry.Register(&BlobCache{})
  202. }