blob_cache.go 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. package hercules
  2. import (
  3. "fmt"
  4. "os"
  5. "gopkg.in/src-d/go-git.v4"
  6. "gopkg.in/src-d/go-git.v4/config"
  7. "gopkg.in/src-d/go-git.v4/plumbing"
  8. "gopkg.in/src-d/go-git.v4/plumbing/object"
  9. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  10. )
  11. // BlobCache loads the blobs which correspond to the changed files in a commit.
  12. // It is a PipelineItem.
  13. // It must provide the old and the new objects; "blobCache" rotates and allows to not load
  14. // the same blobs twice. Outdated objects are removed so "blobCache" never grows big.
  15. type BlobCache struct {
  16. // Specifies how to handle the situation when we encounter a git submodule - an object without
  17. // the blob. If false, we look inside .gitmodules and if don't find, raise an error.
  18. // If true, we do not look inside .gitmodules and always succeed.
  19. IgnoreMissingSubmodules bool
  20. repository *git.Repository
  21. cache map[plumbing.Hash]*object.Blob
  22. }
  23. const (
  24. // ConfigBlobCacheIgnoreMissingSubmodules is the name of the configuration option for
  25. // BlobCache.Configure() to not check if the referenced submodules exist.
  26. ConfigBlobCacheIgnoreMissingSubmodules = "BlobCache.IgnoreMissingSubmodules"
  27. // DependencyBlobCache identifies the dependency provided by BlobCache.
  28. DependencyBlobCache = "blob_cache"
  29. )
  30. func (blobCache *BlobCache) Name() string {
  31. return "BlobCache"
  32. }
  33. func (blobCache *BlobCache) Provides() []string {
  34. arr := [...]string{DependencyBlobCache}
  35. return arr[:]
  36. }
  37. func (blobCache *BlobCache) Requires() []string {
  38. arr := [...]string{DependencyTreeChanges}
  39. return arr[:]
  40. }
  41. func (blobCache *BlobCache) ListConfigurationOptions() []ConfigurationOption {
  42. options := [...]ConfigurationOption{{
  43. Name: ConfigBlobCacheIgnoreMissingSubmodules,
  44. Description: "Specifies whether to panic if some referenced submodules do not exist and thus" +
  45. " the corresponding Git objects cannot be loaded. Override this if you know that the " +
  46. "history is dirty and you want to get things done.",
  47. Flag: "ignore-missing-submodules",
  48. Type: BoolConfigurationOption,
  49. Default: false}}
  50. return options[:]
  51. }
  52. func (blobCache *BlobCache) Configure(facts map[string]interface{}) {
  53. if val, exists := facts[ConfigBlobCacheIgnoreMissingSubmodules].(bool); exists {
  54. blobCache.IgnoreMissingSubmodules = val
  55. }
  56. }
  57. func (blobCache *BlobCache) Initialize(repository *git.Repository) {
  58. blobCache.repository = repository
  59. blobCache.cache = map[plumbing.Hash]*object.Blob{}
  60. }
  61. func (blobCache *BlobCache) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  62. commit := deps["commit"].(*object.Commit)
  63. changes := deps[DependencyTreeChanges].(object.Changes)
  64. cache := map[plumbing.Hash]*object.Blob{}
  65. newCache := map[plumbing.Hash]*object.Blob{}
  66. for _, change := range changes {
  67. action, err := change.Action()
  68. if err != nil {
  69. fmt.Fprintf(os.Stderr, "no action in %s\n", change.To.TreeEntry.Hash)
  70. return nil, err
  71. }
  72. var exists bool
  73. var blob *object.Blob
  74. switch action {
  75. case merkletrie.Insert:
  76. blob, err = blobCache.getBlob(&change.To, commit.File)
  77. if err != nil {
  78. fmt.Fprintf(os.Stderr, "file to %s %s\n", change.To.Name, change.To.TreeEntry.Hash)
  79. } else {
  80. cache[change.To.TreeEntry.Hash] = blob
  81. newCache[change.To.TreeEntry.Hash] = blob
  82. }
  83. case merkletrie.Delete:
  84. cache[change.From.TreeEntry.Hash], exists = blobCache.cache[change.From.TreeEntry.Hash]
  85. if !exists {
  86. cache[change.From.TreeEntry.Hash], err = blobCache.getBlob(&change.From, commit.File)
  87. if err != nil {
  88. if err.Error() != plumbing.ErrObjectNotFound.Error() {
  89. fmt.Fprintf(os.Stderr, "file from %s %s\n", change.From.Name,
  90. change.From.TreeEntry.Hash)
  91. } else {
  92. cache[change.From.TreeEntry.Hash], err = createDummyBlob(
  93. change.From.TreeEntry.Hash)
  94. }
  95. }
  96. }
  97. case merkletrie.Modify:
  98. blob, err = blobCache.getBlob(&change.To, commit.File)
  99. if err != nil {
  100. fmt.Fprintf(os.Stderr, "file to %s\n", change.To.Name)
  101. } else {
  102. cache[change.To.TreeEntry.Hash] = blob
  103. newCache[change.To.TreeEntry.Hash] = blob
  104. }
  105. cache[change.From.TreeEntry.Hash], exists = blobCache.cache[change.From.TreeEntry.Hash]
  106. if !exists {
  107. cache[change.From.TreeEntry.Hash], err = blobCache.getBlob(&change.From, commit.File)
  108. if err != nil {
  109. fmt.Fprintf(os.Stderr, "file from %s\n", change.From.Name)
  110. }
  111. }
  112. }
  113. if err != nil {
  114. return nil, err
  115. }
  116. }
  117. blobCache.cache = newCache
  118. return map[string]interface{}{DependencyBlobCache: cache}, nil
  119. }
  120. // FileGetter defines a function which loads the Git file by the specified path.
  121. // The state can be arbitrary though here it always corresponds to the currently processed
  122. // commit.
  123. type FileGetter func(path string) (*object.File, error)
  124. // Returns the blob which corresponds to the specified ChangeEntry.
  125. func (blobCache *BlobCache) getBlob(entry *object.ChangeEntry, fileGetter FileGetter) (
  126. *object.Blob, error) {
  127. blob, err := blobCache.repository.BlobObject(entry.TreeEntry.Hash)
  128. if err != nil {
  129. if err.Error() != plumbing.ErrObjectNotFound.Error() {
  130. fmt.Fprintf(os.Stderr, "getBlob(%s)\n", entry.TreeEntry.Hash.String())
  131. return nil, err
  132. }
  133. if entry.TreeEntry.Mode != 0160000 {
  134. // this is not a submodule
  135. return nil, err
  136. } else if blobCache.IgnoreMissingSubmodules {
  137. return createDummyBlob(entry.TreeEntry.Hash)
  138. }
  139. file, errModules := fileGetter(".gitmodules")
  140. if errModules != nil {
  141. return nil, errModules
  142. }
  143. contents, errModules := file.Contents()
  144. if errModules != nil {
  145. return nil, errModules
  146. }
  147. modules := config.NewModules()
  148. errModules = modules.Unmarshal([]byte(contents))
  149. if errModules != nil {
  150. return nil, errModules
  151. }
  152. _, exists := modules.Submodules[entry.Name]
  153. if exists {
  154. // we found that this is a submodule
  155. return createDummyBlob(entry.TreeEntry.Hash)
  156. }
  157. return nil, err
  158. }
  159. return blob, nil
  160. }
  161. func init() {
  162. Registry.Register(&BlobCache{})
  163. }