uast.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. package hercules
  2. import (
  3. "bytes"
  4. "context"
  5. "errors"
  6. "fmt"
  7. "os"
  8. "runtime"
  9. "strings"
  10. "sync"
  11. "time"
  12. "github.com/jeffail/tunny"
  13. "gopkg.in/bblfsh/client-go.v1"
  14. "gopkg.in/bblfsh/sdk.v1/protocol"
  15. "gopkg.in/bblfsh/sdk.v1/uast"
  16. "gopkg.in/src-d/enry.v1"
  17. "gopkg.in/src-d/go-git.v4"
  18. "gopkg.in/src-d/go-git.v4/plumbing"
  19. "gopkg.in/src-d/go-git.v4/plumbing/object"
  20. "gopkg.in/src-d/go-git.v4/utils/ioutil"
  21. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  22. )
  23. type UASTExtractor struct {
  24. Endpoint string
  25. Context func() context.Context
  26. PoolSize int
  27. Languages map[string]bool
  28. FailOnErrors bool
  29. ProcessedFiles map[string]int
  30. clients []*bblfsh.BblfshClient
  31. pool *tunny.WorkPool
  32. }
  33. const (
  34. UAST_EXTRACTION_SKIPPED = -(1 << 31)
  35. ConfigUASTEndpoint = "UAST.Endpoint"
  36. ConfigUASTTimeout = "UAST.Timeout"
  37. ConfigUASTPoolSize = "UAST.PoolSize"
  38. ConfigUASTFailOnErrors = "UAST.FailOnErrors"
  39. ConfigUASTLanguages = "UAST.Languages"
  40. )
  41. type uastTask struct {
  42. Client *bblfsh.BblfshClient
  43. Lock *sync.RWMutex
  44. Dest map[plumbing.Hash]*uast.Node
  45. File *object.File
  46. Errors *[]error
  47. Status chan int
  48. }
  49. type worker struct {
  50. Client *bblfsh.BblfshClient
  51. Job func(interface{}) interface{}
  52. }
  53. func (w worker) TunnyReady() bool {
  54. return true
  55. }
  56. func (w worker) TunnyJob(data interface{}) interface{} {
  57. task := data.(uastTask)
  58. task.Client = w.Client
  59. return w.Job(task)
  60. }
  61. func (exr *UASTExtractor) Name() string {
  62. return "UAST"
  63. }
  64. func (exr *UASTExtractor) Provides() []string {
  65. arr := [...]string{"uasts"}
  66. return arr[:]
  67. }
  68. func (exr *UASTExtractor) Requires() []string {
  69. arr := [...]string{"changes", "blob_cache"}
  70. return arr[:]
  71. }
  72. func (exr *UASTExtractor) Features() []string {
  73. arr := [...]string{"uast"}
  74. return arr[:]
  75. }
  76. func (exr *UASTExtractor) ListConfigurationOptions() []ConfigurationOption {
  77. options := [...]ConfigurationOption{{
  78. Name: ConfigUASTEndpoint,
  79. Description: "How many days there are in a single band.",
  80. Flag: "bblfsh",
  81. Type: StringConfigurationOption,
  82. Default: "0.0.0.0:9432"}, {
  83. Name: ConfigUASTTimeout,
  84. Description: "Babelfish's server timeout in seconds.",
  85. Flag: "bblfsh-timeout",
  86. Type: IntConfigurationOption,
  87. Default: 20}, {
  88. Name: ConfigUASTPoolSize,
  89. Description: "Number of goroutines to extract UASTs.",
  90. Flag: "bblfsh-pool-size",
  91. Type: IntConfigurationOption,
  92. Default: runtime.NumCPU()}, {
  93. Name: ConfigUASTFailOnErrors,
  94. Description: "Panic if there is a UAST extraction error.",
  95. Flag: "bblfsh-fail-on-error",
  96. Type: BoolConfigurationOption,
  97. Default: false}, {
  98. Name: ConfigUASTLanguages,
  99. Description: "Programming languages from which to extract UASTs. Separated by comma \",\".",
  100. Flag: "languages",
  101. Type: StringConfigurationOption,
  102. Default: "Python,Java"},
  103. }
  104. return options[:]
  105. }
  106. func (exr *UASTExtractor) Configure(facts map[string]interface{}) {
  107. if val, exists := facts[ConfigUASTEndpoint].(string); exists {
  108. exr.Endpoint = val
  109. }
  110. if val, exists := facts["UAST.Timeout"].(int); exists {
  111. exr.Context = func() context.Context {
  112. ctx, _ := context.WithTimeout(context.Background(),
  113. time.Duration(val)*time.Second)
  114. return ctx
  115. }
  116. }
  117. if val, exists := facts[ConfigUASTPoolSize].(int); exists {
  118. exr.PoolSize = val
  119. }
  120. if val, exists := facts[ConfigUASTLanguages].(string); exists {
  121. exr.Languages = map[string]bool{}
  122. for _, lang := range strings.Split(val, ",") {
  123. exr.Languages[lang] = true
  124. }
  125. }
  126. if val, exists := facts[ConfigUASTFailOnErrors].(bool); exists {
  127. exr.FailOnErrors = val
  128. }
  129. }
  130. func (exr *UASTExtractor) Initialize(repository *git.Repository) {
  131. if exr.Context == nil {
  132. exr.Context = func() context.Context { return context.Background() }
  133. }
  134. poolSize := exr.PoolSize
  135. if poolSize == 0 {
  136. poolSize = runtime.NumCPU()
  137. }
  138. var err error
  139. exr.clients = make([]*bblfsh.BblfshClient, poolSize)
  140. for i := 0; i < poolSize; i++ {
  141. client, err := bblfsh.NewBblfshClient(exr.Endpoint)
  142. if err != nil {
  143. panic(err)
  144. }
  145. exr.clients[i] = client
  146. }
  147. if exr.pool != nil {
  148. exr.pool.Close()
  149. }
  150. workers := make([]tunny.TunnyWorker, poolSize)
  151. for i := 0; i < poolSize; i++ {
  152. workers[i] = worker{Client: exr.clients[i], Job: exr.extractTask}
  153. }
  154. exr.pool, err = tunny.CreateCustomPool(workers).Open()
  155. if err != nil {
  156. panic(err)
  157. }
  158. exr.ProcessedFiles = map[string]int{}
  159. }
  160. func (exr *UASTExtractor) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  161. cache := deps["blob_cache"].(map[plumbing.Hash]*object.Blob)
  162. treeDiffs := deps["changes"].(object.Changes)
  163. uasts := map[plumbing.Hash]*uast.Node{}
  164. lock := sync.RWMutex{}
  165. errs := make([]error, 0)
  166. status := make(chan int)
  167. pending := 0
  168. submit := func(change *object.Change) {
  169. {
  170. reader, err := cache[change.To.TreeEntry.Hash].Reader()
  171. if err != nil {
  172. errs = append(errs, err)
  173. return
  174. }
  175. defer ioutil.CheckClose(reader, &err)
  176. buf := new(bytes.Buffer)
  177. if _, err := buf.ReadFrom(reader); err != nil {
  178. errs = append(errs, err)
  179. return
  180. }
  181. lang, _ := enry.GetLanguageByContent(change.To.Name, buf.Bytes())
  182. if _, exists := exr.Languages[lang]; !exists {
  183. exr.ProcessedFiles[change.To.Name] = UAST_EXTRACTION_SKIPPED
  184. return
  185. }
  186. exr.ProcessedFiles[change.To.Name]++
  187. }
  188. pending++
  189. exr.pool.SendWorkAsync(uastTask{
  190. Lock: &lock,
  191. Dest: uasts,
  192. File: &object.File{Name: change.To.Name, Blob: *cache[change.To.TreeEntry.Hash]},
  193. Errors: &errs, Status: status}, nil)
  194. }
  195. for _, change := range treeDiffs {
  196. action, err := change.Action()
  197. if err != nil {
  198. return nil, err
  199. }
  200. switch action {
  201. case merkletrie.Insert:
  202. submit(change)
  203. case merkletrie.Delete:
  204. continue
  205. case merkletrie.Modify:
  206. submit(change)
  207. }
  208. }
  209. for i := 0; i < pending; i++ {
  210. _ = <-status
  211. }
  212. if len(errs) > 0 {
  213. msgs := make([]string, len(errs))
  214. for i, err := range errs {
  215. msgs[i] = err.Error()
  216. }
  217. joined := strings.Join(msgs, "\n")
  218. if exr.FailOnErrors {
  219. return nil, errors.New(joined)
  220. } else {
  221. fmt.Fprintln(os.Stderr, joined)
  222. }
  223. }
  224. return map[string]interface{}{"uasts": uasts}, nil
  225. }
  226. func (exr *UASTExtractor) extractUAST(
  227. client *bblfsh.BblfshClient, file *object.File) (*uast.Node, error) {
  228. request := client.NewParseRequest()
  229. contents, err := file.Contents()
  230. if err != nil {
  231. return nil, err
  232. }
  233. request.Content(contents)
  234. request.Filename(file.Name)
  235. response, err := request.DoWithContext(exr.Context())
  236. if err != nil {
  237. if strings.Contains("missing driver", err.Error()) {
  238. return nil, nil
  239. }
  240. return nil, err
  241. }
  242. if response.Status != protocol.Ok {
  243. return nil, errors.New(strings.Join(response.Errors, "\n"))
  244. }
  245. if err != nil {
  246. return nil, err
  247. }
  248. return response.UAST, nil
  249. }
  250. func (exr *UASTExtractor) extractTask(data interface{}) interface{} {
  251. task := data.(uastTask)
  252. defer func() { task.Status <- 0 }()
  253. node, err := exr.extractUAST(task.Client, task.File)
  254. task.Lock.Lock()
  255. defer task.Lock.Unlock()
  256. if err != nil {
  257. *task.Errors = append(*task.Errors, errors.New(task.File.Name+": "+err.Error()))
  258. return nil
  259. }
  260. task.Dest[task.File.Hash] = node
  261. return nil
  262. }
  263. type UASTChange struct {
  264. Before *uast.Node
  265. After *uast.Node
  266. Change *object.Change
  267. }
  268. type UASTChanges struct {
  269. cache map[plumbing.Hash]*uast.Node
  270. }
  271. func (uc *UASTChanges) Name() string {
  272. return "UASTChanges"
  273. }
  274. func (uc *UASTChanges) Provides() []string {
  275. arr := [...]string{"changed_uasts"}
  276. return arr[:]
  277. }
  278. func (uc *UASTChanges) Requires() []string {
  279. arr := [...]string{"uasts", "changes"}
  280. return arr[:]
  281. }
  282. func (uc *UASTChanges) Features() []string {
  283. arr := [...]string{"uast"}
  284. return arr[:]
  285. }
  286. func (uc *UASTChanges) ListConfigurationOptions() []ConfigurationOption {
  287. return []ConfigurationOption{}
  288. }
  289. func (uc *UASTChanges) Configure(facts map[string]interface{}) {}
  290. func (uc *UASTChanges) Initialize(repository *git.Repository) {
  291. uc.cache = map[plumbing.Hash]*uast.Node{}
  292. }
  293. func (uc *UASTChanges) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  294. uasts := deps["uasts"].(map[plumbing.Hash]*uast.Node)
  295. treeDiffs := deps["changes"].(object.Changes)
  296. commit := make([]UASTChange, 0, len(treeDiffs))
  297. for _, change := range treeDiffs {
  298. action, err := change.Action()
  299. if err != nil {
  300. return nil, err
  301. }
  302. switch action {
  303. case merkletrie.Insert:
  304. hashTo := change.To.TreeEntry.Hash
  305. uastTo := uasts[hashTo]
  306. commit = append(commit, UASTChange{Before: nil, After: uastTo, Change: change})
  307. uc.cache[hashTo] = uastTo
  308. case merkletrie.Delete:
  309. hashFrom := change.From.TreeEntry.Hash
  310. commit = append(commit, UASTChange{Before: uc.cache[hashFrom], After: nil, Change: change})
  311. delete(uc.cache, hashFrom)
  312. case merkletrie.Modify:
  313. hashFrom := change.From.TreeEntry.Hash
  314. hashTo := change.To.TreeEntry.Hash
  315. uastTo := uasts[hashTo]
  316. commit = append(commit, UASTChange{Before: uc.cache[hashFrom], After: uastTo, Change: change})
  317. delete(uc.cache, hashFrom)
  318. uc.cache[hashTo] = uastTo
  319. }
  320. }
  321. return map[string]interface{}{"changed_uasts": commit}, nil
  322. }
  323. type UASTChangesSaver struct {
  324. result [][]UASTChange
  325. }
  326. func (saver *UASTChangesSaver) Name() string {
  327. return "UASTChangesSaver"
  328. }
  329. func (saver *UASTChangesSaver) Provides() []string {
  330. return []string{}
  331. }
  332. func (saver *UASTChangesSaver) Requires() []string {
  333. arr := [...]string{"changed_uasts"}
  334. return arr[:]
  335. }
  336. func (saver *UASTChangesSaver) Features() []string {
  337. arr := [...]string{"uast"}
  338. return arr[:]
  339. }
  340. func (saver *UASTChangesSaver) ListConfigurationOptions() []ConfigurationOption {
  341. return []ConfigurationOption{}
  342. }
  343. func (saver *UASTChangesSaver) Configure(facts map[string]interface{}) {}
  344. func (saver *UASTChangesSaver) Initialize(repository *git.Repository) {
  345. saver.result = [][]UASTChange{}
  346. }
  347. func (saver *UASTChangesSaver) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  348. changes := deps["changed_uasts"].([]UASTChange)
  349. saver.result = append(saver.result, changes)
  350. return nil, nil
  351. }
  352. func (saver *UASTChangesSaver) Finalize() interface{} {
  353. return saver.result
  354. }
  355. func init() {
  356. Registry.Register(&UASTExtractor{})
  357. Registry.Register(&UASTChanges{})
  358. Registry.Register(&UASTChangesSaver{})
  359. }