uast.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653
  1. package uast
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "os"
  9. "path"
  10. "runtime"
  11. "strings"
  12. "sync"
  13. "time"
  14. "github.com/Jeffail/tunny"
  15. "github.com/gogo/protobuf/proto"
  16. bblfsh "gopkg.in/bblfsh/client-go.v3"
  17. "gopkg.in/bblfsh/sdk.v2/uast/nodes"
  18. "gopkg.in/bblfsh/sdk.v2/uast/nodes/nodesproto"
  19. "gopkg.in/src-d/go-git.v4"
  20. "gopkg.in/src-d/go-git.v4/plumbing"
  21. "gopkg.in/src-d/go-git.v4/plumbing/object"
  22. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  23. "gopkg.in/src-d/hercules.v10/internal/core"
  24. "gopkg.in/src-d/hercules.v10/internal/pb"
  25. items "gopkg.in/src-d/hercules.v10/internal/plumbing"
  26. )
  27. // Extractor retrieves UASTs from Babelfish server which correspond to changed files in a commit.
  28. // It is a PipelineItem.
  29. type Extractor struct {
  30. core.NoopMerger
  31. Endpoint string
  32. Context func() (context.Context, context.CancelFunc)
  33. PoolSize int
  34. FailOnErrors bool
  35. ProcessedFiles map[string]int
  36. IgnoredMissingDrivers map[string]bool
  37. clients []*bblfsh.Client
  38. pool *tunny.Pool
  39. l core.Logger
  40. }
  41. const (
  42. // ConfigUASTEndpoint is the name of the configuration option (Extractor.Configure())
  43. // which sets the Babelfish server address.
  44. ConfigUASTEndpoint = "UAST.Endpoint"
  45. // ConfigUASTTimeout is the name of the configuration option (Extractor.Configure())
  46. // which sets the maximum amount of time to wait for a Babelfish server response.
  47. ConfigUASTTimeout = "UAST.Timeout"
  48. // ConfigUASTPoolSize is the name of the configuration option (Extractor.Configure())
  49. // which sets the number of goroutines to run for UAST parse queries.
  50. ConfigUASTPoolSize = "UAST.PoolSize"
  51. // ConfigUASTFailOnErrors is the name of the configuration option (Extractor.Configure())
  52. // which enables early exit in case of any Babelfish UAST parsing errors.
  53. ConfigUASTFailOnErrors = "UAST.FailOnErrors"
  54. // ConfigUASTIgnoreMissingDrivers is the name of the configuration option (Extractor.Configure())
  55. // which sets the ignored missing driver names.
  56. ConfigUASTIgnoreMissingDrivers = "UAST.IgnoreMissingDrivers"
  57. // DefaultBabelfishEndpoint is the default address of the Babelfish parsing server.
  58. DefaultBabelfishEndpoint = "0.0.0.0:9432"
  59. // DefaultBabelfishTimeout is the default value of the RPC timeout in seconds.
  60. DefaultBabelfishTimeout = 20
  61. // FeatureUast is the name of the Pipeline feature which activates all the items related to UAST.
  62. FeatureUast = "uast"
  63. // DependencyUasts is the name of the dependency provided by Extractor.
  64. DependencyUasts = "uasts"
  65. )
  66. var (
  67. // DefaultBabelfishWorkers is the default number of parsing RPC goroutines.
  68. DefaultBabelfishWorkers = runtime.NumCPU() * 2
  69. // DefaultIgnoredMissingDrivers is the languages which are ignored if the Babelfish driver is missing.
  70. DefaultIgnoredMissingDrivers = []string{"markdown", "text", "yaml", "json"}
  71. )
  72. type uastTask struct {
  73. Lock *sync.RWMutex
  74. Dest map[plumbing.Hash]nodes.Node
  75. Name string
  76. Hash plumbing.Hash
  77. Data []byte
  78. Errors *[]error
  79. }
  80. type worker struct {
  81. Client *bblfsh.Client
  82. Extractor *Extractor
  83. }
  84. // Process will synchronously perform a job and return the result.
  85. func (w worker) Process(data interface{}) interface{} {
  86. return w.Extractor.extractTask(w.Client, data)
  87. }
  88. func (w worker) BlockUntilReady() {}
  89. func (w worker) Interrupt() {}
  90. func (w worker) Terminate() {}
  91. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  92. func (exr *Extractor) Name() string {
  93. return "UAST"
  94. }
  95. // Provides returns the list of names of entities which are produced by this PipelineItem.
  96. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  97. // to this list. Also used by core.Registry to build the global map of providers.
  98. func (exr *Extractor) Provides() []string {
  99. return []string{DependencyUasts}
  100. }
  101. // Requires returns the list of names of entities which are needed by this PipelineItem.
  102. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  103. // entities are Provides() upstream.
  104. func (exr *Extractor) Requires() []string {
  105. return []string{items.DependencyTreeChanges, items.DependencyBlobCache}
  106. }
  107. // Features which must be enabled for this PipelineItem to be automatically inserted into the DAG.
  108. func (exr *Extractor) Features() []string {
  109. return []string{FeatureUast}
  110. }
  111. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  112. func (exr *Extractor) ListConfigurationOptions() []core.ConfigurationOption {
  113. options := [...]core.ConfigurationOption{{
  114. Name: ConfigUASTEndpoint,
  115. Description: "How many days there are in a single band.",
  116. Flag: "bblfsh",
  117. Type: core.StringConfigurationOption,
  118. Default: DefaultBabelfishEndpoint}, {
  119. Name: ConfigUASTTimeout,
  120. Description: "Babelfish's server timeout in seconds.",
  121. Flag: "bblfsh-timeout",
  122. Type: core.IntConfigurationOption,
  123. Default: DefaultBabelfishTimeout}, {
  124. Name: ConfigUASTPoolSize,
  125. Description: "Number of goroutines to extract UASTs.",
  126. Flag: "bblfsh-pool-size",
  127. Type: core.IntConfigurationOption,
  128. Default: DefaultBabelfishWorkers}, {
  129. Name: ConfigUASTFailOnErrors,
  130. Description: "Panic if there is a UAST extraction error.",
  131. Flag: "bblfsh-fail-on-error",
  132. Type: core.BoolConfigurationOption,
  133. Default: false}, {
  134. Name: ConfigUASTIgnoreMissingDrivers,
  135. Description: "Do not warn about missing drivers for the specified languages.",
  136. Flag: "bblfsh-ignored-drivers",
  137. Type: core.StringsConfigurationOption,
  138. Default: DefaultIgnoredMissingDrivers},
  139. }
  140. return options[:]
  141. }
  142. // Configure sets the properties previously published by ListConfigurationOptions().
  143. func (exr *Extractor) Configure(facts map[string]interface{}) error {
  144. if l, exists := facts[core.ConfigLogger].(core.Logger); exists {
  145. exr.l = l
  146. }
  147. if val, exists := facts[ConfigUASTEndpoint].(string); exists {
  148. exr.Endpoint = val
  149. }
  150. if val, exists := facts[ConfigUASTTimeout].(int); exists {
  151. exr.Context = func() (context.Context, context.CancelFunc) {
  152. return context.WithTimeout(context.Background(),
  153. time.Duration(val)*time.Second)
  154. }
  155. }
  156. if val, exists := facts[ConfigUASTPoolSize].(int); exists {
  157. exr.PoolSize = val
  158. }
  159. if val, exists := facts[ConfigUASTFailOnErrors].(bool); exists {
  160. exr.FailOnErrors = val
  161. }
  162. if val, exists := facts[ConfigUASTIgnoreMissingDrivers].([]string); exists {
  163. exr.IgnoredMissingDrivers = map[string]bool{}
  164. for _, name := range val {
  165. exr.IgnoredMissingDrivers[name] = true
  166. }
  167. }
  168. return nil
  169. }
  170. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  171. // calls. The repository which is going to be analysed is supplied as an argument.
  172. func (exr *Extractor) Initialize(repository *git.Repository) error {
  173. exr.l = core.NewLogger()
  174. if exr.Context == nil {
  175. exr.Context = func() (context.Context, context.CancelFunc) {
  176. return context.WithTimeout(context.Background(),
  177. time.Duration(DefaultBabelfishTimeout)*time.Second)
  178. }
  179. }
  180. if exr.Endpoint == "" {
  181. exr.Endpoint = DefaultBabelfishEndpoint
  182. }
  183. if exr.PoolSize == 0 {
  184. exr.PoolSize = DefaultBabelfishWorkers
  185. }
  186. poolSize := exr.PoolSize
  187. if poolSize == 0 {
  188. poolSize = runtime.NumCPU()
  189. }
  190. exr.clients = make([]*bblfsh.Client, poolSize)
  191. for i := 0; i < poolSize; i++ {
  192. client, err := bblfsh.NewClient(exr.Endpoint)
  193. if err != nil {
  194. if err.Error() == "context deadline exceeded" {
  195. exr.l.Error("Looks like the Babelfish server is not running. Please refer " +
  196. "to https://docs.sourced.tech/babelfish/using-babelfish/getting-started#running-with-docker-recommended")
  197. }
  198. return err
  199. }
  200. exr.clients[i] = client
  201. }
  202. if exr.pool != nil {
  203. exr.pool.Close()
  204. }
  205. {
  206. i := 0
  207. exr.pool = tunny.New(poolSize, func() tunny.Worker {
  208. w := worker{Client: exr.clients[i], Extractor: exr}
  209. i++
  210. return w
  211. })
  212. }
  213. if exr.pool == nil {
  214. panic("UAST goroutine pool was not created")
  215. }
  216. exr.ProcessedFiles = map[string]int{}
  217. if exr.IgnoredMissingDrivers == nil {
  218. exr.IgnoredMissingDrivers = map[string]bool{}
  219. for _, name := range DefaultIgnoredMissingDrivers {
  220. exr.IgnoredMissingDrivers[name] = true
  221. }
  222. }
  223. return nil
  224. }
  225. // Consume runs this PipelineItem on the next commit data.
  226. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  227. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  228. // This function returns the mapping with analysis results. The keys must be the same as
  229. // in Provides(). If there was an error, nil is returned.
  230. func (exr *Extractor) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  231. cache := deps[items.DependencyBlobCache].(map[plumbing.Hash]*items.CachedBlob)
  232. treeDiffs := deps[items.DependencyTreeChanges].(object.Changes)
  233. uasts := map[plumbing.Hash]nodes.Node{}
  234. lock := sync.RWMutex{}
  235. errs := make([]error, 0)
  236. wg := sync.WaitGroup{}
  237. submit := func(change *object.Change) {
  238. exr.ProcessedFiles[change.To.Name]++
  239. wg.Add(1)
  240. go func(task interface{}) {
  241. exr.pool.Process(task)
  242. wg.Done()
  243. }(uastTask{
  244. Lock: &lock,
  245. Dest: uasts,
  246. Name: change.To.Name,
  247. Hash: change.To.TreeEntry.Hash,
  248. Data: cache[change.To.TreeEntry.Hash].Data,
  249. Errors: &errs,
  250. })
  251. }
  252. for _, change := range treeDiffs {
  253. action, err := change.Action()
  254. if err != nil {
  255. return nil, err
  256. }
  257. switch action {
  258. case merkletrie.Insert:
  259. submit(change)
  260. case merkletrie.Delete:
  261. continue
  262. case merkletrie.Modify:
  263. submit(change)
  264. }
  265. }
  266. wg.Wait()
  267. if len(errs) > 0 {
  268. msgs := make([]string, len(errs))
  269. for i, err := range errs {
  270. msgs[i] = err.Error()
  271. }
  272. joined := strings.Join(msgs, "\n")
  273. if exr.FailOnErrors {
  274. return nil, errors.New(joined)
  275. }
  276. exr.l.Error(joined)
  277. }
  278. return map[string]interface{}{DependencyUasts: uasts}, nil
  279. }
  280. // Dispose closes the open GRPC channels.
  281. func (exr *Extractor) Dispose() {
  282. for _, client := range exr.clients {
  283. client.Close()
  284. }
  285. }
  286. // Fork clones this PipelineItem.
  287. func (exr *Extractor) Fork(n int) []core.PipelineItem {
  288. return core.ForkSamePipelineItem(exr, n)
  289. }
  290. func (exr *Extractor) extractUAST(
  291. client *bblfsh.Client, name string, data []byte) (nodes.Node, error) {
  292. ctx, cancel := exr.Context()
  293. if cancel != nil {
  294. defer cancel()
  295. }
  296. request := client.NewParseRequest().
  297. Content(string(data)).Filename(name).Mode(bblfsh.Semantic).Context(ctx)
  298. response, _, err := request.UAST()
  299. if err != nil {
  300. if strings.Contains("missing driver", err.Error()) {
  301. return nil, nil
  302. }
  303. return nil, err
  304. }
  305. return response, nil
  306. }
  307. func (exr *Extractor) extractTask(client *bblfsh.Client, data interface{}) interface{} {
  308. task := data.(uastTask)
  309. node, err := exr.extractUAST(client, task.Name, task.Data)
  310. task.Lock.Lock()
  311. defer task.Lock.Unlock()
  312. if err != nil {
  313. for lang := range exr.IgnoredMissingDrivers {
  314. if strings.HasSuffix(err.Error(), "\""+lang+"\"") {
  315. return nil
  316. }
  317. }
  318. *task.Errors = append(*task.Errors,
  319. fmt.Errorf("\nfile %s, blob %s: %v", task.Name, task.Hash.String(), err))
  320. return nil
  321. }
  322. if node != nil {
  323. task.Dest[task.Hash] = node
  324. }
  325. return nil
  326. }
  327. // Change is the type of the items in the list of changes which is provided by Changes.
  328. type Change struct {
  329. Before nodes.Node
  330. After nodes.Node
  331. Change *object.Change
  332. }
  333. const (
  334. // DependencyUastChanges is the name of the dependency provided by Changes.
  335. DependencyUastChanges = "changed_uasts"
  336. )
  337. // Changes is a structured analog of TreeDiff: it provides UASTs for every logical change
  338. // in a commit. It is a PipelineItem.
  339. type Changes struct {
  340. core.NoopMerger
  341. cache map[plumbing.Hash]nodes.Node
  342. l core.Logger
  343. }
  344. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  345. func (uc *Changes) Name() string {
  346. return "UASTChanges"
  347. }
  348. // Provides returns the list of names of entities which are produced by this PipelineItem.
  349. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  350. // to this list. Also used by core.Registry to build the global map of providers.
  351. func (uc *Changes) Provides() []string {
  352. return []string{DependencyUastChanges}
  353. }
  354. // Requires returns the list of names of entities which are needed by this PipelineItem.
  355. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  356. // entities are Provides() upstream.
  357. func (uc *Changes) Requires() []string {
  358. return []string{DependencyUasts, items.DependencyTreeChanges}
  359. }
  360. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  361. func (uc *Changes) ListConfigurationOptions() []core.ConfigurationOption {
  362. return []core.ConfigurationOption{}
  363. }
  364. // Configure sets the properties previously published by ListConfigurationOptions().
  365. func (uc *Changes) Configure(facts map[string]interface{}) error {
  366. if l, exists := facts[core.ConfigLogger].(core.Logger); exists {
  367. uc.l = l
  368. }
  369. return nil
  370. }
  371. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  372. // calls. The repository which is going to be analysed is supplied as an argument.
  373. func (uc *Changes) Initialize(repository *git.Repository) error {
  374. uc.l = core.NewLogger()
  375. uc.cache = map[plumbing.Hash]nodes.Node{}
  376. return nil
  377. }
  378. // Consume runs this PipelineItem on the next commit data.
  379. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  380. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  381. // This function returns the mapping with analysis results. The keys must be the same as
  382. // in Provides(). If there was an error, nil is returned.
  383. func (uc *Changes) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  384. uasts := deps[DependencyUasts].(map[plumbing.Hash]nodes.Node)
  385. treeDiffs := deps[items.DependencyTreeChanges].(object.Changes)
  386. commit := make([]Change, 0, len(treeDiffs))
  387. for _, change := range treeDiffs {
  388. action, err := change.Action()
  389. if err != nil {
  390. return nil, err
  391. }
  392. switch action {
  393. case merkletrie.Insert:
  394. hashTo := change.To.TreeEntry.Hash
  395. uastTo := uasts[hashTo]
  396. commit = append(commit, Change{Before: nil, After: uastTo, Change: change})
  397. uc.cache[hashTo] = uastTo
  398. case merkletrie.Delete:
  399. hashFrom := change.From.TreeEntry.Hash
  400. commit = append(commit, Change{Before: uc.cache[hashFrom], After: nil, Change: change})
  401. delete(uc.cache, hashFrom)
  402. case merkletrie.Modify:
  403. hashFrom := change.From.TreeEntry.Hash
  404. hashTo := change.To.TreeEntry.Hash
  405. uastTo := uasts[hashTo]
  406. commit = append(commit, Change{Before: uc.cache[hashFrom], After: uastTo, Change: change})
  407. delete(uc.cache, hashFrom)
  408. uc.cache[hashTo] = uastTo
  409. }
  410. }
  411. return map[string]interface{}{DependencyUastChanges: commit}, nil
  412. }
  413. // Fork clones this PipelineItem.
  414. func (uc *Changes) Fork(n int) []core.PipelineItem {
  415. ucs := make([]core.PipelineItem, n)
  416. for i := 0; i < n; i++ {
  417. clone := &Changes{
  418. cache: map[plumbing.Hash]nodes.Node{},
  419. }
  420. for key, val := range uc.cache {
  421. clone.cache[key] = val
  422. }
  423. ucs[i] = clone
  424. }
  425. return ucs
  426. }
  427. // ChangesSaver dumps changed files and corresponding UASTs for every commit.
  428. // it is a LeafPipelineItem.
  429. type ChangesSaver struct {
  430. core.NoopMerger
  431. core.OneShotMergeProcessor
  432. // OutputPath points to the target directory with UASTs
  433. OutputPath string
  434. repository *git.Repository
  435. result [][]Change
  436. l core.Logger
  437. }
  438. const (
  439. // ConfigUASTChangesSaverOutputPath is the name of the configuration option
  440. // (ChangesSaver.Configure()) which sets the target directory where to save the files.
  441. ConfigUASTChangesSaverOutputPath = "ChangesSaver.OutputPath"
  442. )
  443. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  444. func (saver *ChangesSaver) Name() string {
  445. return "UASTChangesSaver"
  446. }
  447. // Provides returns the list of names of entities which are produced by this PipelineItem.
  448. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  449. // to this list. Also used by core.Registry to build the global map of providers.
  450. func (saver *ChangesSaver) Provides() []string {
  451. return []string{}
  452. }
  453. // Requires returns the list of names of entities which are needed by this PipelineItem.
  454. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  455. // entities are Provides() upstream.
  456. func (saver *ChangesSaver) Requires() []string {
  457. return []string{DependencyUastChanges}
  458. }
  459. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  460. func (saver *ChangesSaver) ListConfigurationOptions() []core.ConfigurationOption {
  461. options := [...]core.ConfigurationOption{{
  462. Name: ConfigUASTChangesSaverOutputPath,
  463. Description: "The target directory where to store the changed UAST files.",
  464. Flag: "changed-uast-dir",
  465. Type: core.PathConfigurationOption,
  466. Default: "."},
  467. }
  468. return options[:]
  469. }
  470. // Flag for the command line switch which enables this analysis.
  471. func (saver *ChangesSaver) Flag() string {
  472. return "dump-uast-changes"
  473. }
  474. // Description returns the text which explains what the analysis is doing.
  475. func (saver *ChangesSaver) Description() string {
  476. return "Saves UASTs and file contents on disk for each commit."
  477. }
  478. // Configure sets the properties previously published by ListConfigurationOptions().
  479. func (saver *ChangesSaver) Configure(facts map[string]interface{}) error {
  480. if l, exists := facts[core.ConfigLogger].(core.Logger); exists {
  481. saver.l = l
  482. }
  483. if val, exists := facts[ConfigUASTChangesSaverOutputPath]; exists {
  484. saver.OutputPath = val.(string)
  485. }
  486. return nil
  487. }
  488. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  489. // calls. The repository which is going to be analysed is supplied as an argument.
  490. func (saver *ChangesSaver) Initialize(repository *git.Repository) error {
  491. saver.l = core.NewLogger()
  492. saver.repository = repository
  493. saver.result = [][]Change{}
  494. saver.OneShotMergeProcessor.Initialize()
  495. return nil
  496. }
  497. // Consume runs this PipelineItem on the next commit data.
  498. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  499. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  500. // This function returns the mapping with analysis results. The keys must be the same as
  501. // in Provides(). If there was an error, nil is returned.
  502. func (saver *ChangesSaver) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  503. if !saver.ShouldConsumeCommit(deps) {
  504. return nil, nil
  505. }
  506. changes := deps[DependencyUastChanges].([]Change)
  507. saver.result = append(saver.result, changes)
  508. return nil, nil
  509. }
  510. // Finalize returns the result of the analysis. Further Consume() calls are not expected.
  511. func (saver *ChangesSaver) Finalize() interface{} {
  512. return saver.result
  513. }
  514. // Fork clones this PipelineItem.
  515. func (saver *ChangesSaver) Fork(n int) []core.PipelineItem {
  516. return core.ForkSamePipelineItem(saver, n)
  517. }
  518. // Serialize converts the analysis result as returned by Finalize() to text or bytes.
  519. // The text format is YAML and the bytes format is Protocol Buffers.
  520. func (saver *ChangesSaver) Serialize(result interface{}, binary bool, writer io.Writer) error {
  521. saverResult := result.([][]Change)
  522. fileNames := saver.dumpFiles(saverResult)
  523. if binary {
  524. return saver.serializeBinary(fileNames, writer)
  525. }
  526. saver.serializeText(fileNames, writer)
  527. return nil
  528. }
  529. func (saver *ChangesSaver) dumpFiles(result [][]Change) []*pb.UASTChange {
  530. var fileNames []*pb.UASTChange
  531. dumpUast := func(uast nodes.Node, path string) {
  532. f, err := os.Create(path)
  533. if err != nil {
  534. panic(err)
  535. }
  536. defer f.Close()
  537. err = nodesproto.WriteTo(f, uast)
  538. if err != nil {
  539. panic(err)
  540. }
  541. }
  542. for i, changes := range result {
  543. for j, change := range changes {
  544. if change.Before == nil || change.After == nil {
  545. continue
  546. }
  547. record := &pb.UASTChange{FileName: change.Change.To.Name}
  548. record.UastBefore = path.Join(saver.OutputPath, fmt.Sprintf(
  549. "%d_%d_before_%s.pb", i, j, change.Change.From.TreeEntry.Hash.String()))
  550. dumpUast(change.Before, record.UastBefore)
  551. blob, _ := saver.repository.BlobObject(change.Change.From.TreeEntry.Hash)
  552. s, _ := (&object.File{Blob: *blob}).Contents()
  553. record.SrcBefore = path.Join(saver.OutputPath, fmt.Sprintf(
  554. "%d_%d_before_%s.src", i, j, change.Change.From.TreeEntry.Hash.String()))
  555. err := ioutil.WriteFile(record.SrcBefore, []byte(s), 0666)
  556. if err != nil {
  557. panic(err)
  558. }
  559. record.UastAfter = path.Join(saver.OutputPath, fmt.Sprintf(
  560. "%d_%d_after_%s.pb", i, j, change.Change.To.TreeEntry.Hash.String()))
  561. dumpUast(change.After, record.UastAfter)
  562. blob, _ = saver.repository.BlobObject(change.Change.To.TreeEntry.Hash)
  563. s, _ = (&object.File{Blob: *blob}).Contents()
  564. record.SrcAfter = path.Join(saver.OutputPath, fmt.Sprintf(
  565. "%d_%d_after_%s.src", i, j, change.Change.To.TreeEntry.Hash.String()))
  566. err = ioutil.WriteFile(record.SrcAfter, []byte(s), 0666)
  567. if err != nil {
  568. panic(err)
  569. }
  570. fileNames = append(fileNames, record)
  571. }
  572. }
  573. return fileNames
  574. }
  575. func (saver *ChangesSaver) serializeText(result []*pb.UASTChange, writer io.Writer) {
  576. for _, sc := range result {
  577. kv := [...]string{
  578. "file: " + sc.FileName,
  579. "src0: " + sc.SrcBefore, "src1: " + sc.SrcAfter,
  580. "uast0: " + sc.UastBefore, "uast1: " + sc.UastAfter,
  581. }
  582. fmt.Fprintf(writer, " - {%s}\n", strings.Join(kv[:], ", "))
  583. }
  584. }
  585. func (saver *ChangesSaver) serializeBinary(result []*pb.UASTChange, writer io.Writer) error {
  586. message := pb.UASTChangesSaverResults{Changes: result}
  587. serialized, err := proto.Marshal(&message)
  588. if err != nil {
  589. return err
  590. }
  591. _, err = writer.Write(serialized)
  592. return err
  593. }
  594. func init() {
  595. core.Registry.Register(&Extractor{})
  596. core.Registry.Register(&Changes{})
  597. core.Registry.Register(&ChangesSaver{})
  598. }