blob_cache.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. package plumbing
  2. import (
  3. "bytes"
  4. "fmt"
  5. "io"
  6. "io/ioutil"
  7. "log"
  8. "github.com/pkg/errors"
  9. "gopkg.in/src-d/go-git.v4"
  10. "gopkg.in/src-d/go-git.v4/config"
  11. "gopkg.in/src-d/go-git.v4/plumbing"
  12. "gopkg.in/src-d/go-git.v4/plumbing/object"
  13. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  14. "gopkg.in/src-d/hercules.v5/internal"
  15. "gopkg.in/src-d/hercules.v5/internal/core"
  16. )
  17. var ErrorBinary = errors.New("binary")
  18. // CachedBlob allows to explicitly cache the binary data associated with the Blob object.
  19. type CachedBlob struct {
  20. object.Blob
  21. Data []byte
  22. }
  23. // Reader returns a reader allow the access to the content of the blob
  24. func (b *CachedBlob) Reader() (io.ReadCloser, error) {
  25. return ioutil.NopCloser(bytes.NewReader(b.Data)), nil
  26. }
  27. func (b *CachedBlob) Cache() error {
  28. reader, err := b.Blob.Reader()
  29. if err != nil {
  30. return err
  31. }
  32. defer reader.Close()
  33. buf := new(bytes.Buffer)
  34. buf.Grow(int(b.Size))
  35. size, err := buf.ReadFrom(reader)
  36. if err != nil {
  37. return err
  38. }
  39. if size != b.Size {
  40. return fmt.Errorf("incomplete read of %s: %d while the declared size is %d",
  41. b.Hash.String(), size, b.Size)
  42. }
  43. b.Data = buf.Bytes()
  44. return nil
  45. }
  46. // CountLines returns the number of lines in the blob or (0, ErrorBinary) if it is binary.
  47. func (b *CachedBlob) CountLines() (int, error) {
  48. if len(b.Data) == 0 {
  49. return 0, nil
  50. }
  51. // 8000 was taken from go-git's utils/binary.IsBinary()
  52. sniffLen := 8000
  53. sniff := b.Data
  54. if len(sniff) > sniffLen {
  55. sniff = sniff[:sniffLen]
  56. }
  57. if bytes.IndexByte(sniff, 0) >= 0 {
  58. return 0, ErrorBinary
  59. }
  60. lines := bytes.Count(b.Data, []byte{'\n'})
  61. if b.Data[len(b.Data)-1] != '\n' {
  62. lines++
  63. }
  64. return lines, nil
  65. }
  66. // BlobCache loads the blobs which correspond to the changed files in a commit.
  67. // It is a PipelineItem.
  68. // It must provide the old and the new objects; "blobCache" rotates and allows to not load
  69. // the same blobs twice. Outdated objects are removed so "blobCache" never grows big.
  70. type BlobCache struct {
  71. core.NoopMerger
  72. // Specifies how to handle the situation when we encounter a git submodule - an object
  73. // without the blob. If true, we look inside .gitmodules and if we don't find it,
  74. // raise an error. If false, we do not look inside .gitmodules and always succeed.
  75. FailOnMissingSubmodules bool
  76. repository *git.Repository
  77. cache map[plumbing.Hash]*CachedBlob
  78. }
  79. const (
  80. // ConfigBlobCacheFailOnMissingSubmodules is the name of the configuration option for
  81. // BlobCache.Configure() to check if the referenced submodules are registered in .gitignore.
  82. ConfigBlobCacheFailOnMissingSubmodules = "BlobCache.FailOnMissingSubmodules"
  83. // DependencyBlobCache identifies the dependency provided by BlobCache.
  84. DependencyBlobCache = "blob_cache"
  85. )
  86. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  87. func (blobCache *BlobCache) Name() string {
  88. return "BlobCache"
  89. }
  90. // Provides returns the list of names of entities which are produced by this PipelineItem.
  91. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  92. // to this list. Also used by core.Registry to build the global map of providers.
  93. func (blobCache *BlobCache) Provides() []string {
  94. arr := [...]string{DependencyBlobCache}
  95. return arr[:]
  96. }
  97. // Requires returns the list of names of entities which are needed by this PipelineItem.
  98. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  99. // entities are Provides() upstream.
  100. func (blobCache *BlobCache) Requires() []string {
  101. arr := [...]string{DependencyTreeChanges}
  102. return arr[:]
  103. }
  104. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  105. func (blobCache *BlobCache) ListConfigurationOptions() []core.ConfigurationOption {
  106. options := [...]core.ConfigurationOption{{
  107. Name: ConfigBlobCacheFailOnMissingSubmodules,
  108. Description: "Specifies whether to panic if any referenced submodule does " +
  109. "not exist in .gitmodules and thus the corresponding Git object cannot be loaded. " +
  110. "Override this if you want to ensure that your repository is integral. ",
  111. Flag: "fail-on-missing-submodules",
  112. Type: core.BoolConfigurationOption,
  113. Default: false}}
  114. return options[:]
  115. }
  116. // Configure sets the properties previously published by ListConfigurationOptions().
  117. func (blobCache *BlobCache) Configure(facts map[string]interface{}) {
  118. if val, exists := facts[ConfigBlobCacheFailOnMissingSubmodules].(bool); exists {
  119. blobCache.FailOnMissingSubmodules = val
  120. }
  121. }
  122. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  123. // calls. The repository which is going to be analysed is supplied as an argument.
  124. func (blobCache *BlobCache) Initialize(repository *git.Repository) {
  125. blobCache.repository = repository
  126. blobCache.cache = map[plumbing.Hash]*CachedBlob{}
  127. }
  128. // Consume runs this PipelineItem on the next commit data.
  129. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  130. // Additionally, DependencyCommit is always present there and represents
  131. // the analysed *object.Commit. This function returns the mapping with analysis
  132. // results. The keys must be the same as in Provides(). If there was an error,
  133. // nil is returned.
  134. func (blobCache *BlobCache) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  135. commit := deps[core.DependencyCommit].(*object.Commit)
  136. changes := deps[DependencyTreeChanges].(object.Changes)
  137. cache := map[plumbing.Hash]*CachedBlob{}
  138. newCache := map[plumbing.Hash]*CachedBlob{}
  139. for _, change := range changes {
  140. action, err := change.Action()
  141. if err != nil {
  142. log.Printf("no action in %s\n", change.To.TreeEntry.Hash)
  143. return nil, err
  144. }
  145. var exists bool
  146. var blob *object.Blob
  147. switch action {
  148. case merkletrie.Insert:
  149. cache[change.To.TreeEntry.Hash] = &CachedBlob{}
  150. newCache[change.To.TreeEntry.Hash] = &CachedBlob{}
  151. blob, err = blobCache.getBlob(&change.To, commit.File)
  152. if err != nil {
  153. log.Printf("file to %s %s: %v\n", change.To.Name, change.To.TreeEntry.Hash, err)
  154. } else {
  155. cb := &CachedBlob{Blob: *blob}
  156. err = cb.Cache()
  157. if err == nil {
  158. cache[change.To.TreeEntry.Hash] = cb
  159. newCache[change.To.TreeEntry.Hash] = cb
  160. } else {
  161. log.Printf("file to %s %s: %v\n", change.To.Name, change.To.TreeEntry.Hash, err)
  162. }
  163. }
  164. case merkletrie.Delete:
  165. cache[change.From.TreeEntry.Hash], exists =
  166. blobCache.cache[change.From.TreeEntry.Hash]
  167. if !exists {
  168. cache[change.From.TreeEntry.Hash] = &CachedBlob{}
  169. blob, err = blobCache.getBlob(&change.From, commit.File)
  170. if err != nil {
  171. if err.Error() != plumbing.ErrObjectNotFound.Error() {
  172. log.Printf("file from %s %s: %v\n", change.From.Name,
  173. change.From.TreeEntry.Hash, err)
  174. } else {
  175. blob, err = internal.CreateDummyBlob(change.From.TreeEntry.Hash)
  176. cache[change.From.TreeEntry.Hash] = &CachedBlob{Blob: *blob}
  177. }
  178. } else {
  179. cb := &CachedBlob{Blob: *blob}
  180. err = cb.Cache()
  181. if err == nil {
  182. cache[change.From.TreeEntry.Hash] = cb
  183. } else {
  184. log.Printf("file from %s %s: %v\n", change.From.Name,
  185. change.From.TreeEntry.Hash, err)
  186. }
  187. }
  188. }
  189. case merkletrie.Modify:
  190. blob, err = blobCache.getBlob(&change.To, commit.File)
  191. cache[change.To.TreeEntry.Hash] = &CachedBlob{}
  192. newCache[change.To.TreeEntry.Hash] = &CachedBlob{}
  193. if err != nil {
  194. log.Printf("file to %s: %v\n", change.To.Name, err)
  195. } else {
  196. cb := &CachedBlob{Blob: *blob}
  197. err = cb.Cache()
  198. if err == nil {
  199. cache[change.To.TreeEntry.Hash] = cb
  200. newCache[change.To.TreeEntry.Hash] = cb
  201. } else {
  202. log.Printf("file to %s: %v\n", change.To.Name, err)
  203. }
  204. }
  205. cache[change.From.TreeEntry.Hash], exists =
  206. blobCache.cache[change.From.TreeEntry.Hash]
  207. if !exists {
  208. cache[change.From.TreeEntry.Hash] = &CachedBlob{}
  209. blob, err = blobCache.getBlob(&change.From, commit.File)
  210. if err != nil {
  211. log.Printf("file from %s: %v\n", change.From.Name, err)
  212. } else {
  213. cb := &CachedBlob{Blob: *blob}
  214. err = cb.Cache()
  215. if err == nil {
  216. cache[change.From.TreeEntry.Hash] = cb
  217. } else {
  218. log.Printf("file from %s: %v\n", change.From.Name, err)
  219. }
  220. }
  221. }
  222. }
  223. if err != nil {
  224. return nil, err
  225. }
  226. }
  227. blobCache.cache = newCache
  228. return map[string]interface{}{DependencyBlobCache: cache}, nil
  229. }
  230. // Fork clones this PipelineItem.
  231. func (blobCache *BlobCache) Fork(n int) []core.PipelineItem {
  232. caches := make([]core.PipelineItem, n)
  233. for i := 0; i < n; i++ {
  234. cache := map[plumbing.Hash]*CachedBlob{}
  235. for k, v := range blobCache.cache {
  236. cache[k] = v
  237. }
  238. caches[i] = &BlobCache{
  239. FailOnMissingSubmodules: blobCache.FailOnMissingSubmodules,
  240. repository: blobCache.repository,
  241. cache: cache,
  242. }
  243. }
  244. return caches
  245. }
  246. // FileGetter defines a function which loads the Git file by
  247. // the specified path. The state can be arbitrary though here it always
  248. // corresponds to the currently processed commit.
  249. type FileGetter func(path string) (*object.File, error)
  250. // Returns the blob which corresponds to the specified ChangeEntry.
  251. func (blobCache *BlobCache) getBlob(entry *object.ChangeEntry, fileGetter FileGetter) (
  252. *object.Blob, error) {
  253. blob, err := blobCache.repository.BlobObject(entry.TreeEntry.Hash)
  254. if err != nil {
  255. if err.Error() != plumbing.ErrObjectNotFound.Error() {
  256. log.Printf("getBlob(%s)\n", entry.TreeEntry.Hash.String())
  257. return nil, err
  258. }
  259. if entry.TreeEntry.Mode != 0160000 {
  260. // this is not a submodule
  261. return nil, err
  262. } else if !blobCache.FailOnMissingSubmodules {
  263. return internal.CreateDummyBlob(entry.TreeEntry.Hash)
  264. }
  265. file, errModules := fileGetter(".gitmodules")
  266. if errModules != nil {
  267. return nil, errModules
  268. }
  269. contents, errModules := file.Contents()
  270. if errModules != nil {
  271. return nil, errModules
  272. }
  273. modules := config.NewModules()
  274. errModules = modules.Unmarshal([]byte(contents))
  275. if errModules != nil {
  276. return nil, errModules
  277. }
  278. _, exists := modules.Submodules[entry.Name]
  279. if exists {
  280. // we found that this is a submodule
  281. return internal.CreateDummyBlob(entry.TreeEntry.Hash)
  282. }
  283. return nil, err
  284. }
  285. return blob, nil
  286. }
  287. func init() {
  288. core.Registry.Register(&BlobCache{})
  289. }