blob_cache.go 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. package plumbing
  2. import (
  3. "log"
  4. "gopkg.in/src-d/go-git.v4"
  5. "gopkg.in/src-d/go-git.v4/config"
  6. "gopkg.in/src-d/go-git.v4/plumbing"
  7. "gopkg.in/src-d/go-git.v4/plumbing/object"
  8. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  9. "gopkg.in/src-d/hercules.v4/internal"
  10. "gopkg.in/src-d/hercules.v4/internal/core"
  11. )
  12. // BlobCache loads the blobs which correspond to the changed files in a commit.
  13. // It is a PipelineItem.
  14. // It must provide the old and the new objects; "blobCache" rotates and allows to not load
  15. // the same blobs twice. Outdated objects are removed so "blobCache" never grows big.
  16. type BlobCache struct {
  17. // Specifies how to handle the situation when we encounter a git submodule - an object without
  18. // the blob. If false, we look inside .gitmodules and if don't find, raise an error.
  19. // If true, we do not look inside .gitmodules and always succeed.
  20. IgnoreMissingSubmodules bool
  21. repository *git.Repository
  22. cache map[plumbing.Hash]*object.Blob
  23. }
  24. const (
  25. // ConfigBlobCacheIgnoreMissingSubmodules is the name of the configuration option for
  26. // BlobCache.Configure() to not check if the referenced submodules exist.
  27. ConfigBlobCacheIgnoreMissingSubmodules = "BlobCache.IgnoreMissingSubmodules"
  28. // DependencyBlobCache identifies the dependency provided by BlobCache.
  29. DependencyBlobCache = "blob_cache"
  30. )
  31. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  32. func (blobCache *BlobCache) Name() string {
  33. return "BlobCache"
  34. }
  35. // Provides returns the list of names of entities which are produced by this PipelineItem.
  36. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  37. // to this list. Also used by core.Registry to build the global map of providers.
  38. func (blobCache *BlobCache) Provides() []string {
  39. arr := [...]string{DependencyBlobCache}
  40. return arr[:]
  41. }
  42. // Requires returns the list of names of entities which are needed by this PipelineItem.
  43. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  44. // entities are Provides() upstream.
  45. func (blobCache *BlobCache) Requires() []string {
  46. arr := [...]string{DependencyTreeChanges}
  47. return arr[:]
  48. }
  49. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  50. func (blobCache *BlobCache) ListConfigurationOptions() []core.ConfigurationOption {
  51. options := [...]core.ConfigurationOption{{
  52. Name: ConfigBlobCacheIgnoreMissingSubmodules,
  53. Description: "Specifies whether to panic if some referenced submodules do not exist and thus" +
  54. " the corresponding Git objects cannot be loaded. Override this if you know that the " +
  55. "history is dirty and you want to get things done.",
  56. Flag: "ignore-missing-submodules",
  57. Type: core.BoolConfigurationOption,
  58. Default: false}}
  59. return options[:]
  60. }
  61. // Configure sets the properties previously published by ListConfigurationOptions().
  62. func (blobCache *BlobCache) Configure(facts map[string]interface{}) {
  63. if val, exists := facts[ConfigBlobCacheIgnoreMissingSubmodules].(bool); exists {
  64. blobCache.IgnoreMissingSubmodules = val
  65. }
  66. }
  67. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  68. // calls. The repository which is going to be analysed is supplied as an argument.
  69. func (blobCache *BlobCache) Initialize(repository *git.Repository) {
  70. blobCache.repository = repository
  71. blobCache.cache = map[plumbing.Hash]*object.Blob{}
  72. }
  73. // Consume runs this PipelineItem on the next commit data.
  74. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  75. // Additionally, "commit" is always present there and represents the analysed *object.Commit.
  76. // This function returns the mapping with analysis results. The keys must be the same as
  77. // in Provides(). If there was an error, nil is returned.
  78. func (blobCache *BlobCache) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  79. commit := deps["commit"].(*object.Commit)
  80. changes := deps[DependencyTreeChanges].(object.Changes)
  81. cache := map[plumbing.Hash]*object.Blob{}
  82. newCache := map[plumbing.Hash]*object.Blob{}
  83. for _, change := range changes {
  84. action, err := change.Action()
  85. if err != nil {
  86. log.Printf("no action in %s\n", change.To.TreeEntry.Hash)
  87. return nil, err
  88. }
  89. var exists bool
  90. var blob *object.Blob
  91. switch action {
  92. case merkletrie.Insert:
  93. blob, err = blobCache.getBlob(&change.To, commit.File)
  94. if err != nil {
  95. log.Printf("file to %s %s\n", change.To.Name, change.To.TreeEntry.Hash)
  96. } else {
  97. cache[change.To.TreeEntry.Hash] = blob
  98. newCache[change.To.TreeEntry.Hash] = blob
  99. }
  100. case merkletrie.Delete:
  101. cache[change.From.TreeEntry.Hash], exists = blobCache.cache[change.From.TreeEntry.Hash]
  102. if !exists {
  103. cache[change.From.TreeEntry.Hash], err = blobCache.getBlob(&change.From, commit.File)
  104. if err != nil {
  105. if err.Error() != plumbing.ErrObjectNotFound.Error() {
  106. log.Printf("file from %s %s\n", change.From.Name,
  107. change.From.TreeEntry.Hash)
  108. } else {
  109. cache[change.From.TreeEntry.Hash], err = internal.CreateDummyBlob(
  110. change.From.TreeEntry.Hash)
  111. }
  112. }
  113. }
  114. case merkletrie.Modify:
  115. blob, err = blobCache.getBlob(&change.To, commit.File)
  116. if err != nil {
  117. log.Printf("file to %s\n", change.To.Name)
  118. } else {
  119. cache[change.To.TreeEntry.Hash] = blob
  120. newCache[change.To.TreeEntry.Hash] = blob
  121. }
  122. cache[change.From.TreeEntry.Hash], exists = blobCache.cache[change.From.TreeEntry.Hash]
  123. if !exists {
  124. cache[change.From.TreeEntry.Hash], err = blobCache.getBlob(&change.From, commit.File)
  125. if err != nil {
  126. log.Printf("file from %s\n", change.From.Name)
  127. }
  128. }
  129. }
  130. if err != nil {
  131. return nil, err
  132. }
  133. }
  134. blobCache.cache = newCache
  135. return map[string]interface{}{DependencyBlobCache: cache}, nil
  136. }
  137. // FileGetter defines a function which loads the Git file by the specified path.
  138. // The state can be arbitrary though here it always corresponds to the currently processed
  139. // commit.
  140. type FileGetter func(path string) (*object.File, error)
  141. // Returns the blob which corresponds to the specified ChangeEntry.
  142. func (blobCache *BlobCache) getBlob(entry *object.ChangeEntry, fileGetter FileGetter) (
  143. *object.Blob, error) {
  144. blob, err := blobCache.repository.BlobObject(entry.TreeEntry.Hash)
  145. if err != nil {
  146. if err.Error() != plumbing.ErrObjectNotFound.Error() {
  147. log.Printf("getBlob(%s)\n", entry.TreeEntry.Hash.String())
  148. return nil, err
  149. }
  150. if entry.TreeEntry.Mode != 0160000 {
  151. // this is not a submodule
  152. return nil, err
  153. } else if blobCache.IgnoreMissingSubmodules {
  154. return internal.CreateDummyBlob(entry.TreeEntry.Hash)
  155. }
  156. file, errModules := fileGetter(".gitmodules")
  157. if errModules != nil {
  158. return nil, errModules
  159. }
  160. contents, errModules := file.Contents()
  161. if errModules != nil {
  162. return nil, errModules
  163. }
  164. modules := config.NewModules()
  165. errModules = modules.Unmarshal([]byte(contents))
  166. if errModules != nil {
  167. return nil, errModules
  168. }
  169. _, exists := modules.Submodules[entry.Name]
  170. if exists {
  171. // we found that this is a submodule
  172. return internal.CreateDummyBlob(entry.TreeEntry.Hash)
  173. }
  174. return nil, err
  175. }
  176. return blob, nil
  177. }
  178. func init() {
  179. core.Registry.Register(&BlobCache{})
  180. }