uast.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659
  1. package uast
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "os"
  9. "path"
  10. "runtime"
  11. "strings"
  12. "sync"
  13. "time"
  14. "github.com/Jeffail/tunny"
  15. "github.com/gogo/protobuf/proto"
  16. bblfsh "gopkg.in/bblfsh/client-go.v3"
  17. "gopkg.in/bblfsh/sdk.v2/uast/nodes"
  18. "gopkg.in/bblfsh/sdk.v2/uast/nodes/nodesproto"
  19. "gopkg.in/src-d/go-git.v4"
  20. "gopkg.in/src-d/go-git.v4/plumbing"
  21. "gopkg.in/src-d/go-git.v4/plumbing/object"
  22. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  23. "gopkg.in/src-d/hercules.v10/internal/core"
  24. "gopkg.in/src-d/hercules.v10/internal/pb"
  25. items "gopkg.in/src-d/hercules.v10/internal/plumbing"
  26. )
  27. // Extractor retrieves UASTs from Babelfish server which correspond to changed files in a commit.
  28. // It is a PipelineItem.
  29. type Extractor struct {
  30. core.NoopMerger
  31. Endpoint string
  32. Context func() (context.Context, context.CancelFunc)
  33. PoolSize int
  34. FailOnErrors bool
  35. ProcessedFiles map[string]int
  36. IgnoredMissingDrivers map[string]bool
  37. clients []*bblfsh.Client
  38. pool *tunny.Pool
  39. l core.Logger
  40. }
  41. const (
  42. // ConfigUASTEndpoint is the name of the configuration option (Extractor.Configure())
  43. // which sets the Babelfish server address.
  44. ConfigUASTEndpoint = "UAST.Endpoint"
  45. // ConfigUASTTimeout is the name of the configuration option (Extractor.Configure())
  46. // which sets the maximum amount of time to wait for a Babelfish server response.
  47. ConfigUASTTimeout = "UAST.Timeout"
  48. // ConfigUASTPoolSize is the name of the configuration option (Extractor.Configure())
  49. // which sets the number of goroutines to run for UAST parse queries.
  50. ConfigUASTPoolSize = "UAST.PoolSize"
  51. // ConfigUASTFailOnErrors is the name of the configuration option (Extractor.Configure())
  52. // which enables early exit in case of any Babelfish UAST parsing errors.
  53. ConfigUASTFailOnErrors = "UAST.FailOnErrors"
  54. // ConfigUASTIgnoreMissingDrivers is the name of the configuration option (Extractor.Configure())
  55. // which sets the ignored missing driver names.
  56. ConfigUASTIgnoreMissingDrivers = "UAST.IgnoreMissingDrivers"
  57. // DefaultBabelfishEndpoint is the default address of the Babelfish parsing server.
  58. DefaultBabelfishEndpoint = "0.0.0.0:9432"
  59. // DefaultBabelfishTimeout is the default value of the RPC timeout in seconds.
  60. DefaultBabelfishTimeout = 20
  61. // FeatureUast is the name of the Pipeline feature which activates all the items related to UAST.
  62. FeatureUast = "uast"
  63. // DependencyUasts is the name of the dependency provided by Extractor.
  64. DependencyUasts = "uasts"
  65. )
  66. var (
  67. // DefaultBabelfishWorkers is the default number of parsing RPC goroutines.
  68. DefaultBabelfishWorkers = runtime.NumCPU() * 2
  69. // DefaultIgnoredMissingDrivers is the languages which are ignored if the Babelfish driver is missing.
  70. DefaultIgnoredMissingDrivers = []string{"markdown", "text", "yaml", "json"}
  71. )
  72. type uastTask struct {
  73. Lock *sync.RWMutex
  74. Dest map[plumbing.Hash]nodes.Node
  75. Name string
  76. Hash plumbing.Hash
  77. Data []byte
  78. Errors *[]error
  79. }
  80. type worker struct {
  81. Client *bblfsh.Client
  82. Extractor *Extractor
  83. }
  84. // Process will synchronously perform a job and return the result.
  85. func (w worker) Process(data interface{}) interface{} {
  86. return w.Extractor.extractTask(w.Client, data)
  87. }
  88. func (w worker) BlockUntilReady() {}
  89. func (w worker) Interrupt() {}
  90. func (w worker) Terminate() {}
  91. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  92. func (exr *Extractor) Name() string {
  93. return "UAST"
  94. }
  95. // Provides returns the list of names of entities which are produced by this PipelineItem.
  96. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  97. // to this list. Also used by core.Registry to build the global map of providers.
  98. func (exr *Extractor) Provides() []string {
  99. arr := [...]string{DependencyUasts}
  100. return arr[:]
  101. }
  102. // Requires returns the list of names of entities which are needed by this PipelineItem.
  103. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  104. // entities are Provides() upstream.
  105. func (exr *Extractor) Requires() []string {
  106. arr := [...]string{items.DependencyTreeChanges, items.DependencyBlobCache}
  107. return arr[:]
  108. }
  109. // Features which must be enabled for this PipelineItem to be automatically inserted into the DAG.
  110. func (exr *Extractor) Features() []string {
  111. arr := [...]string{FeatureUast}
  112. return arr[:]
  113. }
  114. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  115. func (exr *Extractor) ListConfigurationOptions() []core.ConfigurationOption {
  116. options := [...]core.ConfigurationOption{{
  117. Name: ConfigUASTEndpoint,
  118. Description: "How many days there are in a single band.",
  119. Flag: "bblfsh",
  120. Type: core.StringConfigurationOption,
  121. Default: DefaultBabelfishEndpoint}, {
  122. Name: ConfigUASTTimeout,
  123. Description: "Babelfish's server timeout in seconds.",
  124. Flag: "bblfsh-timeout",
  125. Type: core.IntConfigurationOption,
  126. Default: DefaultBabelfishTimeout}, {
  127. Name: ConfigUASTPoolSize,
  128. Description: "Number of goroutines to extract UASTs.",
  129. Flag: "bblfsh-pool-size",
  130. Type: core.IntConfigurationOption,
  131. Default: DefaultBabelfishWorkers}, {
  132. Name: ConfigUASTFailOnErrors,
  133. Description: "Panic if there is a UAST extraction error.",
  134. Flag: "bblfsh-fail-on-error",
  135. Type: core.BoolConfigurationOption,
  136. Default: false}, {
  137. Name: ConfigUASTIgnoreMissingDrivers,
  138. Description: "Do not warn about missing drivers for the specified languages.",
  139. Flag: "bblfsh-ignored-drivers",
  140. Type: core.StringsConfigurationOption,
  141. Default: DefaultIgnoredMissingDrivers},
  142. }
  143. return options[:]
  144. }
  145. // Configure sets the properties previously published by ListConfigurationOptions().
  146. func (exr *Extractor) Configure(facts map[string]interface{}) error {
  147. if l, exists := facts[core.ConfigLogger].(core.Logger); exists {
  148. exr.l = l
  149. }
  150. if val, exists := facts[ConfigUASTEndpoint].(string); exists {
  151. exr.Endpoint = val
  152. }
  153. if val, exists := facts[ConfigUASTTimeout].(int); exists {
  154. exr.Context = func() (context.Context, context.CancelFunc) {
  155. return context.WithTimeout(context.Background(),
  156. time.Duration(val)*time.Second)
  157. }
  158. }
  159. if val, exists := facts[ConfigUASTPoolSize].(int); exists {
  160. exr.PoolSize = val
  161. }
  162. if val, exists := facts[ConfigUASTFailOnErrors].(bool); exists {
  163. exr.FailOnErrors = val
  164. }
  165. if val, exists := facts[ConfigUASTIgnoreMissingDrivers].([]string); exists {
  166. exr.IgnoredMissingDrivers = map[string]bool{}
  167. for _, name := range val {
  168. exr.IgnoredMissingDrivers[name] = true
  169. }
  170. }
  171. return nil
  172. }
  173. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  174. // calls. The repository which is going to be analysed is supplied as an argument.
  175. func (exr *Extractor) Initialize(repository *git.Repository) error {
  176. exr.l = core.NewLogger()
  177. if exr.Context == nil {
  178. exr.Context = func() (context.Context, context.CancelFunc) {
  179. return context.WithTimeout(context.Background(),
  180. time.Duration(DefaultBabelfishTimeout)*time.Second)
  181. }
  182. }
  183. if exr.Endpoint == "" {
  184. exr.Endpoint = DefaultBabelfishEndpoint
  185. }
  186. if exr.PoolSize == 0 {
  187. exr.PoolSize = DefaultBabelfishWorkers
  188. }
  189. poolSize := exr.PoolSize
  190. if poolSize == 0 {
  191. poolSize = runtime.NumCPU()
  192. }
  193. exr.clients = make([]*bblfsh.Client, poolSize)
  194. for i := 0; i < poolSize; i++ {
  195. client, err := bblfsh.NewClient(exr.Endpoint)
  196. if err != nil {
  197. if err.Error() == "context deadline exceeded" {
  198. exr.l.Error("Looks like the Babelfish server is not running. Please refer " +
  199. "to https://docs.sourced.tech/babelfish/using-babelfish/getting-started#running-with-docker-recommended")
  200. }
  201. return err
  202. }
  203. exr.clients[i] = client
  204. }
  205. if exr.pool != nil {
  206. exr.pool.Close()
  207. }
  208. {
  209. i := 0
  210. exr.pool = tunny.New(poolSize, func() tunny.Worker {
  211. w := worker{Client: exr.clients[i], Extractor: exr}
  212. i++
  213. return w
  214. })
  215. }
  216. if exr.pool == nil {
  217. panic("UAST goroutine pool was not created")
  218. }
  219. exr.ProcessedFiles = map[string]int{}
  220. if exr.IgnoredMissingDrivers == nil {
  221. exr.IgnoredMissingDrivers = map[string]bool{}
  222. for _, name := range DefaultIgnoredMissingDrivers {
  223. exr.IgnoredMissingDrivers[name] = true
  224. }
  225. }
  226. return nil
  227. }
  228. // Consume runs this PipelineItem on the next commit data.
  229. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  230. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  231. // This function returns the mapping with analysis results. The keys must be the same as
  232. // in Provides(). If there was an error, nil is returned.
  233. func (exr *Extractor) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  234. cache := deps[items.DependencyBlobCache].(map[plumbing.Hash]*items.CachedBlob)
  235. treeDiffs := deps[items.DependencyTreeChanges].(object.Changes)
  236. uasts := map[plumbing.Hash]nodes.Node{}
  237. lock := sync.RWMutex{}
  238. errs := make([]error, 0)
  239. wg := sync.WaitGroup{}
  240. submit := func(change *object.Change) {
  241. exr.ProcessedFiles[change.To.Name]++
  242. wg.Add(1)
  243. go func(task interface{}) {
  244. exr.pool.Process(task)
  245. wg.Done()
  246. }(uastTask{
  247. Lock: &lock,
  248. Dest: uasts,
  249. Name: change.To.Name,
  250. Hash: change.To.TreeEntry.Hash,
  251. Data: cache[change.To.TreeEntry.Hash].Data,
  252. Errors: &errs,
  253. })
  254. }
  255. for _, change := range treeDiffs {
  256. action, err := change.Action()
  257. if err != nil {
  258. return nil, err
  259. }
  260. switch action {
  261. case merkletrie.Insert:
  262. submit(change)
  263. case merkletrie.Delete:
  264. continue
  265. case merkletrie.Modify:
  266. submit(change)
  267. }
  268. }
  269. wg.Wait()
  270. if len(errs) > 0 {
  271. msgs := make([]string, len(errs))
  272. for i, err := range errs {
  273. msgs[i] = err.Error()
  274. }
  275. joined := strings.Join(msgs, "\n")
  276. if exr.FailOnErrors {
  277. return nil, errors.New(joined)
  278. }
  279. exr.l.Error(joined)
  280. }
  281. return map[string]interface{}{DependencyUasts: uasts}, nil
  282. }
  283. // Dispose closes the open GRPC channels.
  284. func (exr *Extractor) Dispose() {
  285. for _, client := range exr.clients {
  286. client.Close()
  287. }
  288. }
  289. // Fork clones this PipelineItem.
  290. func (exr *Extractor) Fork(n int) []core.PipelineItem {
  291. return core.ForkSamePipelineItem(exr, n)
  292. }
  293. func (exr *Extractor) extractUAST(
  294. client *bblfsh.Client, name string, data []byte) (nodes.Node, error) {
  295. ctx, cancel := exr.Context()
  296. if cancel != nil {
  297. defer cancel()
  298. }
  299. request := client.NewParseRequest().
  300. Content(string(data)).Filename(name).Mode(bblfsh.Semantic).Context(ctx)
  301. response, _, err := request.UAST()
  302. if err != nil {
  303. if strings.Contains("missing driver", err.Error()) {
  304. return nil, nil
  305. }
  306. return nil, err
  307. }
  308. return response, nil
  309. }
  310. func (exr *Extractor) extractTask(client *bblfsh.Client, data interface{}) interface{} {
  311. task := data.(uastTask)
  312. node, err := exr.extractUAST(client, task.Name, task.Data)
  313. task.Lock.Lock()
  314. defer task.Lock.Unlock()
  315. if err != nil {
  316. for lang := range exr.IgnoredMissingDrivers {
  317. if strings.HasSuffix(err.Error(), "\""+lang+"\"") {
  318. return nil
  319. }
  320. }
  321. *task.Errors = append(*task.Errors,
  322. fmt.Errorf("\nfile %s, blob %s: %v", task.Name, task.Hash.String(), err))
  323. return nil
  324. }
  325. if node != nil {
  326. task.Dest[task.Hash] = node
  327. }
  328. return nil
  329. }
  330. // Change is the type of the items in the list of changes which is provided by Changes.
  331. type Change struct {
  332. Before nodes.Node
  333. After nodes.Node
  334. Change *object.Change
  335. }
  336. const (
  337. // DependencyUastChanges is the name of the dependency provided by Changes.
  338. DependencyUastChanges = "changed_uasts"
  339. )
  340. // Changes is a structured analog of TreeDiff: it provides UASTs for every logical change
  341. // in a commit. It is a PipelineItem.
  342. type Changes struct {
  343. core.NoopMerger
  344. cache map[plumbing.Hash]nodes.Node
  345. l core.Logger
  346. }
  347. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  348. func (uc *Changes) Name() string {
  349. return "UASTChanges"
  350. }
  351. // Provides returns the list of names of entities which are produced by this PipelineItem.
  352. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  353. // to this list. Also used by core.Registry to build the global map of providers.
  354. func (uc *Changes) Provides() []string {
  355. arr := [...]string{DependencyUastChanges}
  356. return arr[:]
  357. }
  358. // Requires returns the list of names of entities which are needed by this PipelineItem.
  359. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  360. // entities are Provides() upstream.
  361. func (uc *Changes) Requires() []string {
  362. arr := [...]string{DependencyUasts, items.DependencyTreeChanges}
  363. return arr[:]
  364. }
  365. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  366. func (uc *Changes) ListConfigurationOptions() []core.ConfigurationOption {
  367. return []core.ConfigurationOption{}
  368. }
  369. // Configure sets the properties previously published by ListConfigurationOptions().
  370. func (uc *Changes) Configure(facts map[string]interface{}) error {
  371. if l, exists := facts[core.ConfigLogger].(core.Logger); exists {
  372. uc.l = l
  373. }
  374. return nil
  375. }
  376. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  377. // calls. The repository which is going to be analysed is supplied as an argument.
  378. func (uc *Changes) Initialize(repository *git.Repository) error {
  379. uc.l = core.NewLogger()
  380. uc.cache = map[plumbing.Hash]nodes.Node{}
  381. return nil
  382. }
  383. // Consume runs this PipelineItem on the next commit data.
  384. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  385. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  386. // This function returns the mapping with analysis results. The keys must be the same as
  387. // in Provides(). If there was an error, nil is returned.
  388. func (uc *Changes) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  389. uasts := deps[DependencyUasts].(map[plumbing.Hash]nodes.Node)
  390. treeDiffs := deps[items.DependencyTreeChanges].(object.Changes)
  391. commit := make([]Change, 0, len(treeDiffs))
  392. for _, change := range treeDiffs {
  393. action, err := change.Action()
  394. if err != nil {
  395. return nil, err
  396. }
  397. switch action {
  398. case merkletrie.Insert:
  399. hashTo := change.To.TreeEntry.Hash
  400. uastTo := uasts[hashTo]
  401. commit = append(commit, Change{Before: nil, After: uastTo, Change: change})
  402. uc.cache[hashTo] = uastTo
  403. case merkletrie.Delete:
  404. hashFrom := change.From.TreeEntry.Hash
  405. commit = append(commit, Change{Before: uc.cache[hashFrom], After: nil, Change: change})
  406. delete(uc.cache, hashFrom)
  407. case merkletrie.Modify:
  408. hashFrom := change.From.TreeEntry.Hash
  409. hashTo := change.To.TreeEntry.Hash
  410. uastTo := uasts[hashTo]
  411. commit = append(commit, Change{Before: uc.cache[hashFrom], After: uastTo, Change: change})
  412. delete(uc.cache, hashFrom)
  413. uc.cache[hashTo] = uastTo
  414. }
  415. }
  416. return map[string]interface{}{DependencyUastChanges: commit}, nil
  417. }
  418. // Fork clones this PipelineItem.
  419. func (uc *Changes) Fork(n int) []core.PipelineItem {
  420. ucs := make([]core.PipelineItem, n)
  421. for i := 0; i < n; i++ {
  422. clone := &Changes{
  423. cache: map[plumbing.Hash]nodes.Node{},
  424. }
  425. for key, val := range uc.cache {
  426. clone.cache[key] = val
  427. }
  428. ucs[i] = clone
  429. }
  430. return ucs
  431. }
  432. // ChangesSaver dumps changed files and corresponding UASTs for every commit.
  433. // it is a LeafPipelineItem.
  434. type ChangesSaver struct {
  435. core.NoopMerger
  436. core.OneShotMergeProcessor
  437. // OutputPath points to the target directory with UASTs
  438. OutputPath string
  439. repository *git.Repository
  440. result [][]Change
  441. l core.Logger
  442. }
  443. const (
  444. // ConfigUASTChangesSaverOutputPath is the name of the configuration option
  445. // (ChangesSaver.Configure()) which sets the target directory where to save the files.
  446. ConfigUASTChangesSaverOutputPath = "ChangesSaver.OutputPath"
  447. )
  448. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  449. func (saver *ChangesSaver) Name() string {
  450. return "UASTChangesSaver"
  451. }
  452. // Provides returns the list of names of entities which are produced by this PipelineItem.
  453. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  454. // to this list. Also used by core.Registry to build the global map of providers.
  455. func (saver *ChangesSaver) Provides() []string {
  456. return []string{}
  457. }
  458. // Requires returns the list of names of entities which are needed by this PipelineItem.
  459. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  460. // entities are Provides() upstream.
  461. func (saver *ChangesSaver) Requires() []string {
  462. arr := [...]string{DependencyUastChanges}
  463. return arr[:]
  464. }
  465. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  466. func (saver *ChangesSaver) ListConfigurationOptions() []core.ConfigurationOption {
  467. options := [...]core.ConfigurationOption{{
  468. Name: ConfigUASTChangesSaverOutputPath,
  469. Description: "The target directory where to store the changed UAST files.",
  470. Flag: "changed-uast-dir",
  471. Type: core.PathConfigurationOption,
  472. Default: "."},
  473. }
  474. return options[:]
  475. }
  476. // Flag for the command line switch which enables this analysis.
  477. func (saver *ChangesSaver) Flag() string {
  478. return "dump-uast-changes"
  479. }
  480. // Description returns the text which explains what the analysis is doing.
  481. func (saver *ChangesSaver) Description() string {
  482. return "Saves UASTs and file contents on disk for each commit."
  483. }
  484. // Configure sets the properties previously published by ListConfigurationOptions().
  485. func (saver *ChangesSaver) Configure(facts map[string]interface{}) error {
  486. if l, exists := facts[core.ConfigLogger].(core.Logger); exists {
  487. saver.l = l
  488. }
  489. if val, exists := facts[ConfigUASTChangesSaverOutputPath]; exists {
  490. saver.OutputPath = val.(string)
  491. }
  492. return nil
  493. }
  494. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  495. // calls. The repository which is going to be analysed is supplied as an argument.
  496. func (saver *ChangesSaver) Initialize(repository *git.Repository) error {
  497. saver.l = core.NewLogger()
  498. saver.repository = repository
  499. saver.result = [][]Change{}
  500. saver.OneShotMergeProcessor.Initialize()
  501. return nil
  502. }
  503. // Consume runs this PipelineItem on the next commit data.
  504. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  505. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  506. // This function returns the mapping with analysis results. The keys must be the same as
  507. // in Provides(). If there was an error, nil is returned.
  508. func (saver *ChangesSaver) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  509. if !saver.ShouldConsumeCommit(deps) {
  510. return nil, nil
  511. }
  512. changes := deps[DependencyUastChanges].([]Change)
  513. saver.result = append(saver.result, changes)
  514. return nil, nil
  515. }
  516. // Finalize returns the result of the analysis. Further Consume() calls are not expected.
  517. func (saver *ChangesSaver) Finalize() interface{} {
  518. return saver.result
  519. }
  520. // Fork clones this PipelineItem.
  521. func (saver *ChangesSaver) Fork(n int) []core.PipelineItem {
  522. return core.ForkSamePipelineItem(saver, n)
  523. }
  524. // Serialize converts the analysis result as returned by Finalize() to text or bytes.
  525. // The text format is YAML and the bytes format is Protocol Buffers.
  526. func (saver *ChangesSaver) Serialize(result interface{}, binary bool, writer io.Writer) error {
  527. saverResult := result.([][]Change)
  528. fileNames := saver.dumpFiles(saverResult)
  529. if binary {
  530. return saver.serializeBinary(fileNames, writer)
  531. }
  532. saver.serializeText(fileNames, writer)
  533. return nil
  534. }
  535. func (saver *ChangesSaver) dumpFiles(result [][]Change) []*pb.UASTChange {
  536. var fileNames []*pb.UASTChange
  537. dumpUast := func(uast nodes.Node, path string) {
  538. f, err := os.Create(path)
  539. if err != nil {
  540. panic(err)
  541. }
  542. defer f.Close()
  543. err = nodesproto.WriteTo(f, uast)
  544. if err != nil {
  545. panic(err)
  546. }
  547. }
  548. for i, changes := range result {
  549. for j, change := range changes {
  550. if change.Before == nil || change.After == nil {
  551. continue
  552. }
  553. record := &pb.UASTChange{FileName: change.Change.To.Name}
  554. record.UastBefore = path.Join(saver.OutputPath, fmt.Sprintf(
  555. "%d_%d_before_%s.pb", i, j, change.Change.From.TreeEntry.Hash.String()))
  556. dumpUast(change.Before, record.UastBefore)
  557. blob, _ := saver.repository.BlobObject(change.Change.From.TreeEntry.Hash)
  558. s, _ := (&object.File{Blob: *blob}).Contents()
  559. record.SrcBefore = path.Join(saver.OutputPath, fmt.Sprintf(
  560. "%d_%d_before_%s.src", i, j, change.Change.From.TreeEntry.Hash.String()))
  561. err := ioutil.WriteFile(record.SrcBefore, []byte(s), 0666)
  562. if err != nil {
  563. panic(err)
  564. }
  565. record.UastAfter = path.Join(saver.OutputPath, fmt.Sprintf(
  566. "%d_%d_after_%s.pb", i, j, change.Change.To.TreeEntry.Hash.String()))
  567. dumpUast(change.After, record.UastAfter)
  568. blob, _ = saver.repository.BlobObject(change.Change.To.TreeEntry.Hash)
  569. s, _ = (&object.File{Blob: *blob}).Contents()
  570. record.SrcAfter = path.Join(saver.OutputPath, fmt.Sprintf(
  571. "%d_%d_after_%s.src", i, j, change.Change.To.TreeEntry.Hash.String()))
  572. err = ioutil.WriteFile(record.SrcAfter, []byte(s), 0666)
  573. if err != nil {
  574. panic(err)
  575. }
  576. fileNames = append(fileNames, record)
  577. }
  578. }
  579. return fileNames
  580. }
  581. func (saver *ChangesSaver) serializeText(result []*pb.UASTChange, writer io.Writer) {
  582. for _, sc := range result {
  583. kv := [...]string{
  584. "file: " + sc.FileName,
  585. "src0: " + sc.SrcBefore, "src1: " + sc.SrcAfter,
  586. "uast0: " + sc.UastBefore, "uast1: " + sc.UastAfter,
  587. }
  588. fmt.Fprintf(writer, " - {%s}\n", strings.Join(kv[:], ", "))
  589. }
  590. }
  591. func (saver *ChangesSaver) serializeBinary(result []*pb.UASTChange, writer io.Writer) error {
  592. message := pb.UASTChangesSaverResults{Changes: result}
  593. serialized, err := proto.Marshal(&message)
  594. if err != nil {
  595. return err
  596. }
  597. _, err = writer.Write(serialized)
  598. return err
  599. }
  600. func init() {
  601. core.Registry.Register(&Extractor{})
  602. core.Registry.Register(&Changes{})
  603. core.Registry.Register(&ChangesSaver{})
  604. }