uast.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642
  1. package uast
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "log"
  9. "os"
  10. "path"
  11. "runtime"
  12. "strings"
  13. "sync"
  14. "time"
  15. "github.com/Jeffail/tunny"
  16. "github.com/gogo/protobuf/proto"
  17. "gopkg.in/bblfsh/client-go.v3"
  18. "gopkg.in/bblfsh/sdk.v2/uast/nodes"
  19. "gopkg.in/bblfsh/sdk.v2/uast/nodes/nodesproto"
  20. "gopkg.in/src-d/go-git.v4"
  21. "gopkg.in/src-d/go-git.v4/plumbing"
  22. "gopkg.in/src-d/go-git.v4/plumbing/object"
  23. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  24. "gopkg.in/src-d/hercules.v10/internal/core"
  25. "gopkg.in/src-d/hercules.v10/internal/pb"
  26. items "gopkg.in/src-d/hercules.v10/internal/plumbing"
  27. )
  28. // Extractor retrieves UASTs from Babelfish server which correspond to changed files in a commit.
  29. // It is a PipelineItem.
  30. type Extractor struct {
  31. core.NoopMerger
  32. Endpoint string
  33. Context func() (context.Context, context.CancelFunc)
  34. PoolSize int
  35. FailOnErrors bool
  36. ProcessedFiles map[string]int
  37. IgnoredMissingDrivers map[string]bool
  38. clients []*bblfsh.Client
  39. pool *tunny.Pool
  40. }
  41. const (
  42. // ConfigUASTEndpoint is the name of the configuration option (Extractor.Configure())
  43. // which sets the Babelfish server address.
  44. ConfigUASTEndpoint = "UAST.Endpoint"
  45. // ConfigUASTTimeout is the name of the configuration option (Extractor.Configure())
  46. // which sets the maximum amount of time to wait for a Babelfish server response.
  47. ConfigUASTTimeout = "UAST.Timeout"
  48. // ConfigUASTPoolSize is the name of the configuration option (Extractor.Configure())
  49. // which sets the number of goroutines to run for UAST parse queries.
  50. ConfigUASTPoolSize = "UAST.PoolSize"
  51. // ConfigUASTFailOnErrors is the name of the configuration option (Extractor.Configure())
  52. // which enables early exit in case of any Babelfish UAST parsing errors.
  53. ConfigUASTFailOnErrors = "UAST.FailOnErrors"
  54. // ConfigUASTIgnoreMissingDrivers is the name of the configuration option (Extractor.Configure())
  55. // which sets the ignored missing driver names.
  56. ConfigUASTIgnoreMissingDrivers = "UAST.IgnoreMissingDrivers"
  57. // DefaultBabelfishEndpoint is the default address of the Babelfish parsing server.
  58. DefaultBabelfishEndpoint = "0.0.0.0:9432"
  59. // DefaultBabelfishTimeout is the default value of the RPC timeout in seconds.
  60. DefaultBabelfishTimeout = 20
  61. // FeatureUast is the name of the Pipeline feature which activates all the items related to UAST.
  62. FeatureUast = "uast"
  63. // DependencyUasts is the name of the dependency provided by Extractor.
  64. DependencyUasts = "uasts"
  65. )
  66. var (
  67. // DefaultBabelfishWorkers is the default number of parsing RPC goroutines.
  68. DefaultBabelfishWorkers = runtime.NumCPU() * 2
  69. // DefaultIgnoredMissingDrivers is the languages which are ignored if the Babelfish driver is missing.
  70. DefaultIgnoredMissingDrivers = []string{"markdown", "text", "yaml", "json"}
  71. )
  72. type uastTask struct {
  73. Lock *sync.RWMutex
  74. Dest map[plumbing.Hash]nodes.Node
  75. Name string
  76. Hash plumbing.Hash
  77. Data []byte
  78. Errors *[]error
  79. }
  80. type worker struct {
  81. Client *bblfsh.Client
  82. Extractor *Extractor
  83. }
  84. // Process will synchronously perform a job and return the result.
  85. func (w worker) Process(data interface{}) interface{} {
  86. return w.Extractor.extractTask(w.Client, data)
  87. }
  88. func (w worker) BlockUntilReady() {}
  89. func (w worker) Interrupt() {}
  90. func (w worker) Terminate() {}
  91. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  92. func (exr *Extractor) Name() string {
  93. return "UAST"
  94. }
  95. // Provides returns the list of names of entities which are produced by this PipelineItem.
  96. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  97. // to this list. Also used by core.Registry to build the global map of providers.
  98. func (exr *Extractor) Provides() []string {
  99. arr := [...]string{DependencyUasts}
  100. return arr[:]
  101. }
  102. // Requires returns the list of names of entities which are needed by this PipelineItem.
  103. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  104. // entities are Provides() upstream.
  105. func (exr *Extractor) Requires() []string {
  106. arr := [...]string{items.DependencyTreeChanges, items.DependencyBlobCache}
  107. return arr[:]
  108. }
  109. // Features which must be enabled for this PipelineItem to be automatically inserted into the DAG.
  110. func (exr *Extractor) Features() []string {
  111. arr := [...]string{FeatureUast}
  112. return arr[:]
  113. }
  114. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  115. func (exr *Extractor) ListConfigurationOptions() []core.ConfigurationOption {
  116. options := [...]core.ConfigurationOption{{
  117. Name: ConfigUASTEndpoint,
  118. Description: "How many days there are in a single band.",
  119. Flag: "bblfsh",
  120. Type: core.StringConfigurationOption,
  121. Default: DefaultBabelfishEndpoint}, {
  122. Name: ConfigUASTTimeout,
  123. Description: "Babelfish's server timeout in seconds.",
  124. Flag: "bblfsh-timeout",
  125. Type: core.IntConfigurationOption,
  126. Default: DefaultBabelfishTimeout}, {
  127. Name: ConfigUASTPoolSize,
  128. Description: "Number of goroutines to extract UASTs.",
  129. Flag: "bblfsh-pool-size",
  130. Type: core.IntConfigurationOption,
  131. Default: DefaultBabelfishWorkers}, {
  132. Name: ConfigUASTFailOnErrors,
  133. Description: "Panic if there is a UAST extraction error.",
  134. Flag: "bblfsh-fail-on-error",
  135. Type: core.BoolConfigurationOption,
  136. Default: false}, {
  137. Name: ConfigUASTIgnoreMissingDrivers,
  138. Description: "Do not warn about missing drivers for the specified languages.",
  139. Flag: "bblfsh-ignored-drivers",
  140. Type: core.StringsConfigurationOption,
  141. Default: DefaultIgnoredMissingDrivers},
  142. }
  143. return options[:]
  144. }
  145. // Configure sets the properties previously published by ListConfigurationOptions().
  146. func (exr *Extractor) Configure(facts map[string]interface{}) error {
  147. if val, exists := facts[ConfigUASTEndpoint].(string); exists {
  148. exr.Endpoint = val
  149. }
  150. if val, exists := facts[ConfigUASTTimeout].(int); exists {
  151. exr.Context = func() (context.Context, context.CancelFunc) {
  152. return context.WithTimeout(context.Background(),
  153. time.Duration(val)*time.Second)
  154. }
  155. }
  156. if val, exists := facts[ConfigUASTPoolSize].(int); exists {
  157. exr.PoolSize = val
  158. }
  159. if val, exists := facts[ConfigUASTFailOnErrors].(bool); exists {
  160. exr.FailOnErrors = val
  161. }
  162. if val, exists := facts[ConfigUASTIgnoreMissingDrivers].([]string); exists {
  163. exr.IgnoredMissingDrivers = map[string]bool{}
  164. for _, name := range val {
  165. exr.IgnoredMissingDrivers[name] = true
  166. }
  167. }
  168. return nil
  169. }
  170. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  171. // calls. The repository which is going to be analysed is supplied as an argument.
  172. func (exr *Extractor) Initialize(repository *git.Repository) error {
  173. if exr.Context == nil {
  174. exr.Context = func() (context.Context, context.CancelFunc) {
  175. return context.WithTimeout(context.Background(),
  176. time.Duration(DefaultBabelfishTimeout)*time.Second)
  177. }
  178. }
  179. if exr.Endpoint == "" {
  180. exr.Endpoint = DefaultBabelfishEndpoint
  181. }
  182. if exr.PoolSize == 0 {
  183. exr.PoolSize = DefaultBabelfishWorkers
  184. }
  185. poolSize := exr.PoolSize
  186. if poolSize == 0 {
  187. poolSize = runtime.NumCPU()
  188. }
  189. exr.clients = make([]*bblfsh.Client, poolSize)
  190. for i := 0; i < poolSize; i++ {
  191. client, err := bblfsh.NewClient(exr.Endpoint)
  192. if err != nil {
  193. if err.Error() == "context deadline exceeded" {
  194. log.Println("Looks like the Babelfish server is not running. Please refer " +
  195. "to https://docs.sourced.tech/babelfish/using-babelfish/getting-started#running-with-docker-recommended")
  196. }
  197. return err
  198. }
  199. exr.clients[i] = client
  200. }
  201. if exr.pool != nil {
  202. exr.pool.Close()
  203. }
  204. {
  205. i := 0
  206. exr.pool = tunny.New(poolSize, func() tunny.Worker {
  207. w := worker{Client: exr.clients[i], Extractor: exr}
  208. i++
  209. return w
  210. })
  211. }
  212. if exr.pool == nil {
  213. panic("UAST goroutine pool was not created")
  214. }
  215. exr.ProcessedFiles = map[string]int{}
  216. if exr.IgnoredMissingDrivers == nil {
  217. exr.IgnoredMissingDrivers = map[string]bool{}
  218. for _, name := range DefaultIgnoredMissingDrivers {
  219. exr.IgnoredMissingDrivers[name] = true
  220. }
  221. }
  222. return nil
  223. }
  224. // Consume runs this PipelineItem on the next commit data.
  225. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  226. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  227. // This function returns the mapping with analysis results. The keys must be the same as
  228. // in Provides(). If there was an error, nil is returned.
  229. func (exr *Extractor) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  230. cache := deps[items.DependencyBlobCache].(map[plumbing.Hash]*items.CachedBlob)
  231. treeDiffs := deps[items.DependencyTreeChanges].(object.Changes)
  232. uasts := map[plumbing.Hash]nodes.Node{}
  233. lock := sync.RWMutex{}
  234. errs := make([]error, 0)
  235. wg := sync.WaitGroup{}
  236. submit := func(change *object.Change) {
  237. exr.ProcessedFiles[change.To.Name]++
  238. wg.Add(1)
  239. go func(task interface{}) {
  240. exr.pool.Process(task)
  241. wg.Done()
  242. }(uastTask{
  243. Lock: &lock,
  244. Dest: uasts,
  245. Name: change.To.Name,
  246. Hash: change.To.TreeEntry.Hash,
  247. Data: cache[change.To.TreeEntry.Hash].Data,
  248. Errors: &errs,
  249. })
  250. }
  251. for _, change := range treeDiffs {
  252. action, err := change.Action()
  253. if err != nil {
  254. return nil, err
  255. }
  256. switch action {
  257. case merkletrie.Insert:
  258. submit(change)
  259. case merkletrie.Delete:
  260. continue
  261. case merkletrie.Modify:
  262. submit(change)
  263. }
  264. }
  265. wg.Wait()
  266. if len(errs) > 0 {
  267. msgs := make([]string, len(errs))
  268. for i, err := range errs {
  269. msgs[i] = err.Error()
  270. }
  271. joined := strings.Join(msgs, "\n")
  272. if exr.FailOnErrors {
  273. return nil, errors.New(joined)
  274. }
  275. log.Println(joined)
  276. }
  277. return map[string]interface{}{DependencyUasts: uasts}, nil
  278. }
  279. // Dispose closes the open GRPC channels.
  280. func (exr *Extractor) Dispose() {
  281. for _, client := range exr.clients {
  282. client.Close()
  283. }
  284. }
  285. // Fork clones this PipelineItem.
  286. func (exr *Extractor) Fork(n int) []core.PipelineItem {
  287. return core.ForkSamePipelineItem(exr, n)
  288. }
  289. func (exr *Extractor) extractUAST(
  290. client *bblfsh.Client, name string, data []byte) (nodes.Node, error) {
  291. ctx, cancel := exr.Context()
  292. if cancel != nil {
  293. defer cancel()
  294. }
  295. request := client.NewParseRequest().
  296. Content(string(data)).Filename(name).Mode(bblfsh.Semantic).Context(ctx)
  297. response, _, err := request.UAST()
  298. if err != nil {
  299. if strings.Contains("missing driver", err.Error()) {
  300. return nil, nil
  301. }
  302. return nil, err
  303. }
  304. return response, nil
  305. }
  306. func (exr *Extractor) extractTask(client *bblfsh.Client, data interface{}) interface{} {
  307. task := data.(uastTask)
  308. node, err := exr.extractUAST(client, task.Name, task.Data)
  309. task.Lock.Lock()
  310. defer task.Lock.Unlock()
  311. if err != nil {
  312. for lang := range exr.IgnoredMissingDrivers {
  313. if strings.HasSuffix(err.Error(), "\""+lang+"\"") {
  314. return nil
  315. }
  316. }
  317. *task.Errors = append(*task.Errors,
  318. fmt.Errorf("\nfile %s, blob %s: %v", task.Name, task.Hash.String(), err))
  319. return nil
  320. }
  321. if node != nil {
  322. task.Dest[task.Hash] = node
  323. }
  324. return nil
  325. }
  326. // Change is the type of the items in the list of changes which is provided by Changes.
  327. type Change struct {
  328. Before nodes.Node
  329. After nodes.Node
  330. Change *object.Change
  331. }
  332. const (
  333. // DependencyUastChanges is the name of the dependency provided by Changes.
  334. DependencyUastChanges = "changed_uasts"
  335. )
  336. // Changes is a structured analog of TreeDiff: it provides UASTs for every logical change
  337. // in a commit. It is a PipelineItem.
  338. type Changes struct {
  339. core.NoopMerger
  340. cache map[plumbing.Hash]nodes.Node
  341. }
  342. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  343. func (uc *Changes) Name() string {
  344. return "UASTChanges"
  345. }
  346. // Provides returns the list of names of entities which are produced by this PipelineItem.
  347. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  348. // to this list. Also used by core.Registry to build the global map of providers.
  349. func (uc *Changes) Provides() []string {
  350. arr := [...]string{DependencyUastChanges}
  351. return arr[:]
  352. }
  353. // Requires returns the list of names of entities which are needed by this PipelineItem.
  354. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  355. // entities are Provides() upstream.
  356. func (uc *Changes) Requires() []string {
  357. arr := [...]string{DependencyUasts, items.DependencyTreeChanges}
  358. return arr[:]
  359. }
  360. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  361. func (uc *Changes) ListConfigurationOptions() []core.ConfigurationOption {
  362. return []core.ConfigurationOption{}
  363. }
  364. // Configure sets the properties previously published by ListConfigurationOptions().
  365. func (uc *Changes) Configure(facts map[string]interface{}) error {
  366. return nil
  367. }
  368. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  369. // calls. The repository which is going to be analysed is supplied as an argument.
  370. func (uc *Changes) Initialize(repository *git.Repository) error {
  371. uc.cache = map[plumbing.Hash]nodes.Node{}
  372. return nil
  373. }
  374. // Consume runs this PipelineItem on the next commit data.
  375. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  376. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  377. // This function returns the mapping with analysis results. The keys must be the same as
  378. // in Provides(). If there was an error, nil is returned.
  379. func (uc *Changes) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  380. uasts := deps[DependencyUasts].(map[plumbing.Hash]nodes.Node)
  381. treeDiffs := deps[items.DependencyTreeChanges].(object.Changes)
  382. commit := make([]Change, 0, len(treeDiffs))
  383. for _, change := range treeDiffs {
  384. action, err := change.Action()
  385. if err != nil {
  386. return nil, err
  387. }
  388. switch action {
  389. case merkletrie.Insert:
  390. hashTo := change.To.TreeEntry.Hash
  391. uastTo := uasts[hashTo]
  392. commit = append(commit, Change{Before: nil, After: uastTo, Change: change})
  393. uc.cache[hashTo] = uastTo
  394. case merkletrie.Delete:
  395. hashFrom := change.From.TreeEntry.Hash
  396. commit = append(commit, Change{Before: uc.cache[hashFrom], After: nil, Change: change})
  397. delete(uc.cache, hashFrom)
  398. case merkletrie.Modify:
  399. hashFrom := change.From.TreeEntry.Hash
  400. hashTo := change.To.TreeEntry.Hash
  401. uastTo := uasts[hashTo]
  402. commit = append(commit, Change{Before: uc.cache[hashFrom], After: uastTo, Change: change})
  403. delete(uc.cache, hashFrom)
  404. uc.cache[hashTo] = uastTo
  405. }
  406. }
  407. return map[string]interface{}{DependencyUastChanges: commit}, nil
  408. }
  409. // Fork clones this PipelineItem.
  410. func (uc *Changes) Fork(n int) []core.PipelineItem {
  411. ucs := make([]core.PipelineItem, n)
  412. for i := 0; i < n; i++ {
  413. clone := &Changes{
  414. cache: map[plumbing.Hash]nodes.Node{},
  415. }
  416. for key, val := range uc.cache {
  417. clone.cache[key] = val
  418. }
  419. ucs[i] = clone
  420. }
  421. return ucs
  422. }
  423. // ChangesSaver dumps changed files and corresponding UASTs for every commit.
  424. // it is a LeafPipelineItem.
  425. type ChangesSaver struct {
  426. core.NoopMerger
  427. core.OneShotMergeProcessor
  428. // OutputPath points to the target directory with UASTs
  429. OutputPath string
  430. repository *git.Repository
  431. result [][]Change
  432. }
  433. const (
  434. // ConfigUASTChangesSaverOutputPath is the name of the configuration option
  435. // (ChangesSaver.Configure()) which sets the target directory where to save the files.
  436. ConfigUASTChangesSaverOutputPath = "ChangesSaver.OutputPath"
  437. )
  438. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  439. func (saver *ChangesSaver) Name() string {
  440. return "UASTChangesSaver"
  441. }
  442. // Provides returns the list of names of entities which are produced by this PipelineItem.
  443. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  444. // to this list. Also used by core.Registry to build the global map of providers.
  445. func (saver *ChangesSaver) Provides() []string {
  446. return []string{}
  447. }
  448. // Requires returns the list of names of entities which are needed by this PipelineItem.
  449. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  450. // entities are Provides() upstream.
  451. func (saver *ChangesSaver) Requires() []string {
  452. arr := [...]string{DependencyUastChanges}
  453. return arr[:]
  454. }
  455. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  456. func (saver *ChangesSaver) ListConfigurationOptions() []core.ConfigurationOption {
  457. options := [...]core.ConfigurationOption{{
  458. Name: ConfigUASTChangesSaverOutputPath,
  459. Description: "The target directory where to store the changed UAST files.",
  460. Flag: "changed-uast-dir",
  461. Type: core.PathConfigurationOption,
  462. Default: "."},
  463. }
  464. return options[:]
  465. }
  466. // Flag for the command line switch which enables this analysis.
  467. func (saver *ChangesSaver) Flag() string {
  468. return "dump-uast-changes"
  469. }
  470. // Description returns the text which explains what the analysis is doing.
  471. func (saver *ChangesSaver) Description() string {
  472. return "Saves UASTs and file contents on disk for each commit."
  473. }
  474. // Configure sets the properties previously published by ListConfigurationOptions().
  475. func (saver *ChangesSaver) Configure(facts map[string]interface{}) error {
  476. if val, exists := facts[ConfigUASTChangesSaverOutputPath]; exists {
  477. saver.OutputPath = val.(string)
  478. }
  479. return nil
  480. }
  481. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  482. // calls. The repository which is going to be analysed is supplied as an argument.
  483. func (saver *ChangesSaver) Initialize(repository *git.Repository) error {
  484. saver.repository = repository
  485. saver.result = [][]Change{}
  486. saver.OneShotMergeProcessor.Initialize()
  487. return nil
  488. }
  489. // Consume runs this PipelineItem on the next commit data.
  490. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  491. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  492. // This function returns the mapping with analysis results. The keys must be the same as
  493. // in Provides(). If there was an error, nil is returned.
  494. func (saver *ChangesSaver) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  495. if !saver.ShouldConsumeCommit(deps) {
  496. return nil, nil
  497. }
  498. changes := deps[DependencyUastChanges].([]Change)
  499. saver.result = append(saver.result, changes)
  500. return nil, nil
  501. }
  502. // Finalize returns the result of the analysis. Further Consume() calls are not expected.
  503. func (saver *ChangesSaver) Finalize() interface{} {
  504. return saver.result
  505. }
  506. // Fork clones this PipelineItem.
  507. func (saver *ChangesSaver) Fork(n int) []core.PipelineItem {
  508. return core.ForkSamePipelineItem(saver, n)
  509. }
  510. // Serialize converts the analysis result as returned by Finalize() to text or bytes.
  511. // The text format is YAML and the bytes format is Protocol Buffers.
  512. func (saver *ChangesSaver) Serialize(result interface{}, binary bool, writer io.Writer) error {
  513. saverResult := result.([][]Change)
  514. fileNames := saver.dumpFiles(saverResult)
  515. if binary {
  516. return saver.serializeBinary(fileNames, writer)
  517. }
  518. saver.serializeText(fileNames, writer)
  519. return nil
  520. }
  521. func (saver *ChangesSaver) dumpFiles(result [][]Change) []*pb.UASTChange {
  522. var fileNames []*pb.UASTChange
  523. dumpUast := func(uast nodes.Node, path string) {
  524. f, err := os.Create(path)
  525. if err != nil {
  526. panic(err)
  527. }
  528. defer f.Close()
  529. err = nodesproto.WriteTo(f, uast)
  530. if err != nil {
  531. panic(err)
  532. }
  533. }
  534. for i, changes := range result {
  535. for j, change := range changes {
  536. if change.Before == nil || change.After == nil {
  537. continue
  538. }
  539. record := &pb.UASTChange{FileName: change.Change.To.Name}
  540. record.UastBefore = path.Join(saver.OutputPath, fmt.Sprintf(
  541. "%d_%d_before_%s.pb", i, j, change.Change.From.TreeEntry.Hash.String()))
  542. dumpUast(change.Before, record.UastBefore)
  543. blob, _ := saver.repository.BlobObject(change.Change.From.TreeEntry.Hash)
  544. s, _ := (&object.File{Blob: *blob}).Contents()
  545. record.SrcBefore = path.Join(saver.OutputPath, fmt.Sprintf(
  546. "%d_%d_before_%s.src", i, j, change.Change.From.TreeEntry.Hash.String()))
  547. err := ioutil.WriteFile(record.SrcBefore, []byte(s), 0666)
  548. if err != nil {
  549. panic(err)
  550. }
  551. record.UastAfter = path.Join(saver.OutputPath, fmt.Sprintf(
  552. "%d_%d_after_%s.pb", i, j, change.Change.To.TreeEntry.Hash.String()))
  553. dumpUast(change.After, record.UastAfter)
  554. blob, _ = saver.repository.BlobObject(change.Change.To.TreeEntry.Hash)
  555. s, _ = (&object.File{Blob: *blob}).Contents()
  556. record.SrcAfter = path.Join(saver.OutputPath, fmt.Sprintf(
  557. "%d_%d_after_%s.src", i, j, change.Change.To.TreeEntry.Hash.String()))
  558. err = ioutil.WriteFile(record.SrcAfter, []byte(s), 0666)
  559. if err != nil {
  560. panic(err)
  561. }
  562. fileNames = append(fileNames, record)
  563. }
  564. }
  565. return fileNames
  566. }
  567. func (saver *ChangesSaver) serializeText(result []*pb.UASTChange, writer io.Writer) {
  568. for _, sc := range result {
  569. kv := [...]string{
  570. "file: " + sc.FileName,
  571. "src0: " + sc.SrcBefore, "src1: " + sc.SrcAfter,
  572. "uast0: " + sc.UastBefore, "uast1: " + sc.UastAfter,
  573. }
  574. fmt.Fprintf(writer, " - {%s}\n", strings.Join(kv[:], ", "))
  575. }
  576. }
  577. func (saver *ChangesSaver) serializeBinary(result []*pb.UASTChange, writer io.Writer) error {
  578. message := pb.UASTChangesSaverResults{Changes: result}
  579. serialized, err := proto.Marshal(&message)
  580. if err != nil {
  581. return err
  582. }
  583. _, err = writer.Write(serialized)
  584. return err
  585. }
  586. func init() {
  587. core.Registry.Register(&Extractor{})
  588. core.Registry.Register(&Changes{})
  589. core.Registry.Register(&ChangesSaver{})
  590. }