blob_cache.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. package plumbing
  2. import (
  3. "bytes"
  4. "fmt"
  5. "io"
  6. "io/ioutil"
  7. "github.com/pkg/errors"
  8. "gopkg.in/src-d/go-git.v4"
  9. "gopkg.in/src-d/go-git.v4/config"
  10. "gopkg.in/src-d/go-git.v4/plumbing"
  11. "gopkg.in/src-d/go-git.v4/plumbing/object"
  12. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  13. "gopkg.in/src-d/hercules.v10/internal"
  14. "gopkg.in/src-d/hercules.v10/internal/core"
  15. )
  16. // ErrorBinary is raised in CachedBlob.CountLines() if the file is binary.
  17. var ErrorBinary = errors.New("binary")
  18. // CachedBlob allows to explicitly cache the binary data associated with the Blob object.
  19. type CachedBlob struct {
  20. object.Blob
  21. // Data is the read contents of the blob object.
  22. Data []byte
  23. }
  24. // Reader returns a reader allow the access to the content of the blob
  25. func (b *CachedBlob) Reader() (io.ReadCloser, error) {
  26. return ioutil.NopCloser(bytes.NewReader(b.Data)), nil
  27. }
  28. // Cache reads the underlying blob object and sets CachedBlob.Data.
  29. func (b *CachedBlob) Cache() error {
  30. reader, err := b.Blob.Reader()
  31. if err != nil {
  32. return err
  33. }
  34. defer reader.Close()
  35. buf := new(bytes.Buffer)
  36. buf.Grow(int(b.Size))
  37. size, err := buf.ReadFrom(reader)
  38. if err != nil {
  39. return err
  40. }
  41. if size != b.Size {
  42. return fmt.Errorf("incomplete read of %s: %d while the declared size is %d",
  43. b.Hash.String(), size, b.Size)
  44. }
  45. b.Data = buf.Bytes()
  46. return nil
  47. }
  48. // CountLines returns the number of lines in the blob or (0, ErrorBinary) if it is binary.
  49. func (b *CachedBlob) CountLines() (int, error) {
  50. if len(b.Data) == 0 {
  51. return 0, nil
  52. }
  53. // 8000 was taken from go-git's utils/binary.IsBinary()
  54. sniffLen := 8000
  55. sniff := b.Data
  56. if len(sniff) > sniffLen {
  57. sniff = sniff[:sniffLen]
  58. }
  59. if bytes.IndexByte(sniff, 0) >= 0 {
  60. return 0, ErrorBinary
  61. }
  62. lines := bytes.Count(b.Data, []byte{'\n'})
  63. if b.Data[len(b.Data)-1] != '\n' {
  64. lines++
  65. }
  66. return lines, nil
  67. }
  68. // BlobCache loads the blobs which correspond to the changed files in a commit.
  69. // It is a PipelineItem.
  70. // It must provide the old and the new objects; "blobCache" rotates and allows to not load
  71. // the same blobs twice. Outdated objects are removed so "blobCache" never grows big.
  72. type BlobCache struct {
  73. core.NoopMerger
  74. // Specifies how to handle the situation when we encounter a git submodule - an object
  75. // without the blob. If true, we look inside .gitmodules and if we don't find it,
  76. // raise an error. If false, we do not look inside .gitmodules and always succeed.
  77. FailOnMissingSubmodules bool
  78. repository *git.Repository
  79. cache map[plumbing.Hash]*CachedBlob
  80. l core.Logger
  81. }
  82. const (
  83. // ConfigBlobCacheFailOnMissingSubmodules is the name of the configuration option for
  84. // BlobCache.Configure() to check if the referenced submodules are registered in .gitignore.
  85. ConfigBlobCacheFailOnMissingSubmodules = "BlobCache.FailOnMissingSubmodules"
  86. // DependencyBlobCache identifies the dependency provided by BlobCache.
  87. DependencyBlobCache = "blob_cache"
  88. )
  89. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  90. func (blobCache *BlobCache) Name() string {
  91. return "BlobCache"
  92. }
  93. // Provides returns the list of names of entities which are produced by this PipelineItem.
  94. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  95. // to this list. Also used by core.Registry to build the global map of providers.
  96. func (blobCache *BlobCache) Provides() []string {
  97. arr := [...]string{DependencyBlobCache}
  98. return arr[:]
  99. }
  100. // Requires returns the list of names of entities which are needed by this PipelineItem.
  101. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  102. // entities are Provides() upstream.
  103. func (blobCache *BlobCache) Requires() []string {
  104. arr := [...]string{DependencyTreeChanges}
  105. return arr[:]
  106. }
  107. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  108. func (blobCache *BlobCache) ListConfigurationOptions() []core.ConfigurationOption {
  109. options := [...]core.ConfigurationOption{{
  110. Name: ConfigBlobCacheFailOnMissingSubmodules,
  111. Description: "Specifies whether to panic if any referenced submodule does " +
  112. "not exist in .gitmodules and thus the corresponding Git object cannot be loaded. " +
  113. "Override this if you want to ensure that your repository is integral. ",
  114. Flag: "fail-on-missing-submodules",
  115. Type: core.BoolConfigurationOption,
  116. Default: false}}
  117. return options[:]
  118. }
  119. // Configure sets the properties previously published by ListConfigurationOptions().
  120. func (blobCache *BlobCache) Configure(facts map[string]interface{}) error {
  121. if l, exists := facts[core.ConfigLogger].(core.Logger); exists {
  122. blobCache.l = l
  123. } else {
  124. blobCache.l = core.NewLogger()
  125. }
  126. if val, exists := facts[ConfigBlobCacheFailOnMissingSubmodules].(bool); exists {
  127. blobCache.FailOnMissingSubmodules = val
  128. }
  129. return nil
  130. }
  131. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  132. // calls. The repository which is going to be analysed is supplied as an argument.
  133. func (blobCache *BlobCache) Initialize(repository *git.Repository) error {
  134. blobCache.l = core.NewLogger()
  135. blobCache.repository = repository
  136. blobCache.cache = map[plumbing.Hash]*CachedBlob{}
  137. return nil
  138. }
  139. // Consume runs this PipelineItem on the next commit data.
  140. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  141. // Additionally, DependencyCommit is always present there and represents
  142. // the analysed *object.Commit. This function returns the mapping with analysis
  143. // results. The keys must be the same as in Provides(). If there was an error,
  144. // nil is returned.
  145. func (blobCache *BlobCache) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  146. commit := deps[core.DependencyCommit].(*object.Commit)
  147. changes := deps[DependencyTreeChanges].(object.Changes)
  148. cache := map[plumbing.Hash]*CachedBlob{}
  149. newCache := map[plumbing.Hash]*CachedBlob{}
  150. for _, change := range changes {
  151. action, err := change.Action()
  152. if err != nil {
  153. blobCache.l.Errorf("no action in %s\n", change.To.TreeEntry.Hash)
  154. return nil, err
  155. }
  156. var exists bool
  157. var blob *object.Blob
  158. switch action {
  159. case merkletrie.Insert:
  160. cache[change.To.TreeEntry.Hash] = &CachedBlob{}
  161. newCache[change.To.TreeEntry.Hash] = &CachedBlob{}
  162. blob, err = blobCache.getBlob(&change.To, commit.File)
  163. if err != nil {
  164. blobCache.l.Errorf("file to %s %s: %v\n", change.To.Name, change.To.TreeEntry.Hash, err)
  165. } else {
  166. cb := &CachedBlob{Blob: *blob}
  167. err = cb.Cache()
  168. if err == nil {
  169. cache[change.To.TreeEntry.Hash] = cb
  170. newCache[change.To.TreeEntry.Hash] = cb
  171. } else {
  172. blobCache.l.Errorf("file to %s %s: %v\n", change.To.Name, change.To.TreeEntry.Hash, err)
  173. }
  174. }
  175. case merkletrie.Delete:
  176. cache[change.From.TreeEntry.Hash], exists =
  177. blobCache.cache[change.From.TreeEntry.Hash]
  178. if !exists {
  179. cache[change.From.TreeEntry.Hash] = &CachedBlob{}
  180. blob, err = blobCache.getBlob(&change.From, commit.File)
  181. if err != nil {
  182. if err.Error() != plumbing.ErrObjectNotFound.Error() {
  183. blobCache.l.Errorf("file from %s %s: %v\n", change.From.Name,
  184. change.From.TreeEntry.Hash, err)
  185. } else {
  186. blob, err = internal.CreateDummyBlob(change.From.TreeEntry.Hash)
  187. cache[change.From.TreeEntry.Hash] = &CachedBlob{Blob: *blob}
  188. }
  189. } else {
  190. cb := &CachedBlob{Blob: *blob}
  191. err = cb.Cache()
  192. if err == nil {
  193. cache[change.From.TreeEntry.Hash] = cb
  194. } else {
  195. blobCache.l.Errorf("file from %s %s: %v\n", change.From.Name,
  196. change.From.TreeEntry.Hash, err)
  197. }
  198. }
  199. }
  200. case merkletrie.Modify:
  201. blob, err = blobCache.getBlob(&change.To, commit.File)
  202. cache[change.To.TreeEntry.Hash] = &CachedBlob{}
  203. newCache[change.To.TreeEntry.Hash] = &CachedBlob{}
  204. if err != nil {
  205. blobCache.l.Errorf("file to %s: %v\n", change.To.Name, err)
  206. } else {
  207. cb := &CachedBlob{Blob: *blob}
  208. err = cb.Cache()
  209. if err == nil {
  210. cache[change.To.TreeEntry.Hash] = cb
  211. newCache[change.To.TreeEntry.Hash] = cb
  212. } else {
  213. blobCache.l.Errorf("file to %s: %v\n", change.To.Name, err)
  214. }
  215. }
  216. cache[change.From.TreeEntry.Hash], exists =
  217. blobCache.cache[change.From.TreeEntry.Hash]
  218. if !exists {
  219. cache[change.From.TreeEntry.Hash] = &CachedBlob{}
  220. blob, err = blobCache.getBlob(&change.From, commit.File)
  221. if err != nil {
  222. blobCache.l.Errorf("file from %s: %v\n", change.From.Name, err)
  223. } else {
  224. cb := &CachedBlob{Blob: *blob}
  225. err = cb.Cache()
  226. if err == nil {
  227. cache[change.From.TreeEntry.Hash] = cb
  228. } else {
  229. blobCache.l.Errorf("file from %s: %v\n", change.From.Name, err)
  230. }
  231. }
  232. }
  233. }
  234. if err != nil {
  235. return nil, err
  236. }
  237. }
  238. blobCache.cache = newCache
  239. return map[string]interface{}{DependencyBlobCache: cache}, nil
  240. }
  241. // Fork clones this PipelineItem.
  242. func (blobCache *BlobCache) Fork(n int) []core.PipelineItem {
  243. caches := make([]core.PipelineItem, n)
  244. for i := 0; i < n; i++ {
  245. cache := map[plumbing.Hash]*CachedBlob{}
  246. for k, v := range blobCache.cache {
  247. cache[k] = v
  248. }
  249. caches[i] = &BlobCache{
  250. FailOnMissingSubmodules: blobCache.FailOnMissingSubmodules,
  251. repository: blobCache.repository,
  252. cache: cache,
  253. }
  254. }
  255. return caches
  256. }
  257. // FileGetter defines a function which loads the Git file by
  258. // the specified path. The state can be arbitrary though here it always
  259. // corresponds to the currently processed commit.
  260. type FileGetter func(path string) (*object.File, error)
  261. // Returns the blob which corresponds to the specified ChangeEntry.
  262. func (blobCache *BlobCache) getBlob(entry *object.ChangeEntry, fileGetter FileGetter) (
  263. *object.Blob, error) {
  264. blob, err := blobCache.repository.BlobObject(entry.TreeEntry.Hash)
  265. if err != nil {
  266. if err.Error() != plumbing.ErrObjectNotFound.Error() {
  267. blobCache.l.Errorf("getBlob(%s)\n", entry.TreeEntry.Hash.String())
  268. return nil, err
  269. }
  270. if entry.TreeEntry.Mode != 0160000 {
  271. // this is not a submodule
  272. return nil, err
  273. } else if !blobCache.FailOnMissingSubmodules {
  274. return internal.CreateDummyBlob(entry.TreeEntry.Hash)
  275. }
  276. file, errModules := fileGetter(".gitmodules")
  277. if errModules != nil {
  278. return nil, errModules
  279. }
  280. contents, errModules := file.Contents()
  281. if errModules != nil {
  282. return nil, errModules
  283. }
  284. modules := config.NewModules()
  285. errModules = modules.Unmarshal([]byte(contents))
  286. if errModules != nil {
  287. return nil, errModules
  288. }
  289. _, exists := modules.Submodules[entry.Name]
  290. if exists {
  291. // we found that this is a submodule
  292. return internal.CreateDummyBlob(entry.TreeEntry.Hash)
  293. }
  294. return nil, err
  295. }
  296. return blob, nil
  297. }
  298. func init() {
  299. core.Registry.Register(&BlobCache{})
  300. }