uast.go 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. package hercules
  2. import (
  3. "context"
  4. "errors"
  5. "runtime"
  6. "strings"
  7. "sync"
  8. "github.com/jeffail/tunny"
  9. "gopkg.in/bblfsh/client-go.v1"
  10. "gopkg.in/bblfsh/sdk.v1/protocol"
  11. "gopkg.in/bblfsh/sdk.v1/uast"
  12. "gopkg.in/src-d/go-git.v4"
  13. "gopkg.in/src-d/go-git.v4/plumbing"
  14. "gopkg.in/src-d/go-git.v4/plumbing/object"
  15. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  16. "fmt"
  17. "os"
  18. )
  19. type UASTExtractor struct {
  20. Endpoint string
  21. Context func() context.Context
  22. PoolSize int
  23. Extensions map[string]bool
  24. FailOnErrors bool
  25. clients []*bblfsh.BblfshClient
  26. pool *tunny.WorkPool
  27. }
  28. type uastTask struct {
  29. Client *bblfsh.BblfshClient
  30. Lock *sync.RWMutex
  31. Dest map[plumbing.Hash]*uast.Node
  32. File *object.File
  33. Errors *[]error
  34. Status chan int
  35. }
  36. type worker struct {
  37. Client *bblfsh.BblfshClient
  38. Job func(interface{}) interface{}
  39. }
  40. func (w worker) TunnyReady() bool {
  41. return true
  42. }
  43. func (w worker) TunnyJob(data interface{}) interface{} {
  44. task := data.(uastTask)
  45. task.Client = w.Client
  46. return w.Job(task)
  47. }
  48. func (exr *UASTExtractor) Name() string {
  49. return "UAST"
  50. }
  51. func (exr *UASTExtractor) Provides() []string {
  52. arr := [...]string{"uasts"}
  53. return arr[:]
  54. }
  55. func (exr *UASTExtractor) Requires() []string {
  56. arr := [...]string{"renamed_changes", "blob_cache"}
  57. return arr[:]
  58. }
  59. func (exr *UASTExtractor) Initialize(repository *git.Repository) {
  60. if exr.Context == nil {
  61. exr.Context = func() context.Context { return context.Background() }
  62. }
  63. poolSize := exr.PoolSize
  64. if poolSize == 0 {
  65. poolSize = runtime.NumCPU()
  66. }
  67. var err error
  68. exr.clients = make([]*bblfsh.BblfshClient, poolSize)
  69. for i := 0; i < poolSize; i++ {
  70. client, err := bblfsh.NewBblfshClient(exr.Endpoint)
  71. if err != nil {
  72. panic(err)
  73. }
  74. exr.clients[i] = client
  75. }
  76. if exr.pool != nil {
  77. exr.pool.Close()
  78. }
  79. workers := make([]tunny.TunnyWorker, poolSize)
  80. for i := 0; i < poolSize; i++ {
  81. workers[i] = worker{Client: exr.clients[i], Job: exr.extractTask}
  82. }
  83. exr.pool, err = tunny.CreateCustomPool(workers).Open()
  84. if err != nil {
  85. panic(err)
  86. }
  87. }
  88. func (exr *UASTExtractor) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  89. cache := deps["blob_cache"].(map[plumbing.Hash]*object.Blob)
  90. treeDiffs := deps["renamed_changes"].(object.Changes)
  91. uasts := map[plumbing.Hash]*uast.Node{}
  92. lock := sync.RWMutex{}
  93. errs := make([]error, 0)
  94. status := make(chan int)
  95. pending := 0
  96. submit := func(change *object.Change) {
  97. var ext string
  98. dotpos := strings.LastIndex(change.To.Name, ".")
  99. if dotpos >= 0 {
  100. ext = change.To.Name[dotpos + 1:]
  101. } else {
  102. ext = change.To.Name
  103. }
  104. _, exists := exr.Extensions[ext]
  105. if !exists {
  106. return
  107. }
  108. pending++
  109. exr.pool.SendWorkAsync(uastTask{
  110. Lock: &lock,
  111. Dest: uasts,
  112. File: &object.File{Name: change.To.Name, Blob: *cache[change.To.TreeEntry.Hash]},
  113. Errors: &errs, Status: status}, nil)
  114. }
  115. for _, change := range treeDiffs {
  116. action, err := change.Action()
  117. if err != nil {
  118. return nil, err
  119. }
  120. switch action {
  121. case merkletrie.Insert:
  122. submit(change)
  123. case merkletrie.Delete:
  124. continue
  125. case merkletrie.Modify:
  126. submit(change)
  127. }
  128. }
  129. for i := 0; i < pending; i++ {
  130. _ = <-status
  131. }
  132. if len(errs) > 0 {
  133. msgs := make([]string, len(errs))
  134. for i, err := range errs {
  135. msgs[i] = err.Error()
  136. }
  137. joined := strings.Join(msgs, "\n")
  138. if exr.FailOnErrors {
  139. return nil, errors.New(joined)
  140. } else {
  141. fmt.Fprintln(os.Stderr, joined)
  142. }
  143. }
  144. return map[string]interface{}{"uasts": uasts}, nil
  145. }
  146. func (exr *UASTExtractor) Finalize() interface{} {
  147. return nil
  148. }
  149. func (exr *UASTExtractor) extractUAST(
  150. client *bblfsh.BblfshClient, file *object.File) (*uast.Node, error) {
  151. request := client.NewParseRequest()
  152. contents, err := file.Contents()
  153. if err != nil {
  154. return nil, err
  155. }
  156. request.Content(contents)
  157. request.Filename(file.Name)
  158. response, err := request.DoWithContext(exr.Context())
  159. if err != nil {
  160. if strings.Contains("missing driver", err.Error()) {
  161. return nil, nil
  162. }
  163. return nil, err
  164. }
  165. if response.Status != protocol.Ok {
  166. return nil, errors.New(strings.Join(response.Errors, "\n"))
  167. }
  168. if err != nil {
  169. return nil, err
  170. }
  171. return response.UAST, nil
  172. }
  173. func (exr *UASTExtractor) extractTask(data interface{}) interface{} {
  174. task := data.(uastTask)
  175. defer func() { task.Status <- 0 }()
  176. node, err := exr.extractUAST(task.Client, task.File)
  177. task.Lock.Lock()
  178. defer task.Lock.Unlock()
  179. if err != nil {
  180. *task.Errors = append(*task.Errors, errors.New(task.File.Name+": "+err.Error()))
  181. return nil
  182. }
  183. task.Dest[task.File.Hash] = node
  184. return nil
  185. }
  186. type UASTChange struct {
  187. Before *uast.Node
  188. After *uast.Node
  189. Change *object.Change
  190. }
  191. type UASTChanges struct {
  192. cache map[plumbing.Hash]*uast.Node
  193. }
  194. func (uc *UASTChanges) Name() string {
  195. return "UASTChanges"
  196. }
  197. func (uc *UASTChanges) Provides() []string {
  198. arr := [...]string{"changed_uasts"}
  199. return arr[:]
  200. }
  201. func (uc *UASTChanges) Requires() []string {
  202. arr := [...]string{"uasts", "renamed_changes"}
  203. return arr[:]
  204. }
  205. func (uc *UASTChanges) Initialize(repository *git.Repository) {
  206. uc.cache = map[plumbing.Hash]*uast.Node{}
  207. }
  208. func (uc *UASTChanges) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  209. uasts := deps["uasts"].(map[plumbing.Hash]*uast.Node)
  210. treeDiffs := deps["renamed_changes"].(object.Changes)
  211. commit := make([]UASTChange, 0, len(treeDiffs))
  212. for _, change := range treeDiffs {
  213. action, err := change.Action()
  214. if err != nil {
  215. return nil, err
  216. }
  217. switch action {
  218. case merkletrie.Insert:
  219. hashTo := change.To.TreeEntry.Hash
  220. uastTo := uasts[hashTo]
  221. commit = append(commit, UASTChange{Before: nil, After: uastTo, Change: change})
  222. uc.cache[hashTo] = uastTo
  223. case merkletrie.Delete:
  224. hashFrom := change.From.TreeEntry.Hash
  225. commit = append(commit, UASTChange{Before: uc.cache[hashFrom], After: nil, Change: change})
  226. delete(uc.cache, hashFrom)
  227. case merkletrie.Modify:
  228. hashFrom := change.From.TreeEntry.Hash
  229. hashTo := change.To.TreeEntry.Hash
  230. uastTo := uasts[hashTo]
  231. commit = append(commit, UASTChange{Before: uc.cache[hashFrom], After: uastTo, Change: change})
  232. delete(uc.cache, hashFrom)
  233. uc.cache[hashTo] = uastTo
  234. }
  235. }
  236. return map[string]interface{}{"changed_uasts": commit}, nil
  237. }
  238. func (uc *UASTChanges) Finalize() interface{} {
  239. return nil
  240. }
  241. type UASTChangesSaver struct {
  242. result [][]UASTChange
  243. }
  244. func (saver *UASTChangesSaver) Name() string {
  245. return "UASTChangesSaver"
  246. }
  247. func (saver *UASTChangesSaver) Provides() []string {
  248. return []string{}
  249. }
  250. func (saver *UASTChangesSaver) Requires() []string {
  251. arr := [...]string{"changed_uasts"}
  252. return arr[:]
  253. }
  254. func (saver *UASTChangesSaver) Initialize(repository *git.Repository) {
  255. saver.result = [][]UASTChange{}
  256. }
  257. func (saver *UASTChangesSaver) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  258. changes := deps["changed_uasts"].([]UASTChange)
  259. saver.result = append(saver.result, changes)
  260. return nil, nil
  261. }
  262. func (saver *UASTChangesSaver) Finalize() interface{} {
  263. return saver.result
  264. }