shotness.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. package leaves
  2. import (
  3. "fmt"
  4. "io"
  5. "sort"
  6. "unicode/utf8"
  7. "github.com/gogo/protobuf/proto"
  8. "github.com/sergi/go-diff/diffmatchpatch"
  9. "gopkg.in/bblfsh/client-go.v3/tools"
  10. "gopkg.in/bblfsh/sdk.v2/uast"
  11. uast_nodes "gopkg.in/bblfsh/sdk.v2/uast/nodes"
  12. "gopkg.in/bblfsh/sdk.v2/uast/query"
  13. "gopkg.in/src-d/go-git.v4"
  14. "gopkg.in/src-d/go-git.v4/plumbing/object"
  15. "gopkg.in/src-d/hercules.v10/internal/core"
  16. "gopkg.in/src-d/hercules.v10/internal/pb"
  17. items "gopkg.in/src-d/hercules.v10/internal/plumbing"
  18. uast_items "gopkg.in/src-d/hercules.v10/internal/plumbing/uast"
  19. )
  20. // ShotnessAnalysis contains the intermediate state which is mutated by Consume(). It should implement
  21. // LeafPipelineItem.
  22. type ShotnessAnalysis struct {
  23. core.NoopMerger
  24. core.OneShotMergeProcessor
  25. XpathStruct string
  26. XpathName string
  27. nodes map[string]*nodeShotness
  28. files map[string]map[string]*nodeShotness
  29. l core.Logger
  30. }
  31. const (
  32. // ConfigShotnessXpathStruct is the name of the configuration option (ShotnessAnalysis.Configure())
  33. // which sets the UAST XPath to choose the analysed nodes.
  34. // The format is Semantic UASTv2, see https://docs.sourced.tech/babelfish/using-babelfish/uast-querying
  35. ConfigShotnessXpathStruct = "Shotness.XpathStruct"
  36. // ConfigShotnessXpathName is the name of the configuration option (ShotnessAnalysis.Configure())
  37. // which sets the UAST XPath to find the name of the nodes chosen by ConfigShotnessXpathStruct.
  38. // The format is Semantic UASTv2, see https://docs.sourced.tech/babelfish/using-babelfish/uast-querying
  39. ConfigShotnessXpathName = "Shotness.XpathName"
  40. // DefaultShotnessXpathStruct is the default UAST XPath to choose the analysed nodes.
  41. // It extracts functions.
  42. DefaultShotnessXpathStruct = "//uast:FunctionGroup"
  43. // DefaultShotnessXpathName is the default UAST XPath to choose the names of the analysed nodes.
  44. // It looks at the current tree level and at the immediate children.
  45. DefaultShotnessXpathName = "/Nodes/uast:Alias/Name"
  46. )
  47. type nodeShotness struct {
  48. Count int
  49. Summary NodeSummary
  50. Couples map[string]int
  51. }
  52. // NodeSummary carries the node attributes which annotate the "shotness" analysis' counters.
  53. // These attributes are supposed to uniquely identify each node.
  54. type NodeSummary struct {
  55. Type string
  56. Name string
  57. File string
  58. }
  59. // ShotnessResult is returned by ShotnessAnalysis.Finalize() and represents the analysis result.
  60. type ShotnessResult struct {
  61. Nodes []NodeSummary
  62. Counters []map[int]int
  63. }
  64. func (node NodeSummary) String() string {
  65. return node.Type + "_" + node.Name + "_" + node.File
  66. }
  67. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  68. func (shotness *ShotnessAnalysis) Name() string {
  69. return "Shotness"
  70. }
  71. // Provides returns the list of names of entities which are produced by this PipelineItem.
  72. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  73. // to this list. Also used by core.Registry to build the global map of providers.
  74. func (shotness *ShotnessAnalysis) Provides() []string {
  75. return []string{}
  76. }
  77. // Requires returns the list of names of entities which are needed by this PipelineItem.
  78. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  79. // entities are Provides() upstream.
  80. func (shotness *ShotnessAnalysis) Requires() []string {
  81. return []string{items.DependencyFileDiff, uast_items.DependencyUastChanges}
  82. }
  83. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  84. func (shotness *ShotnessAnalysis) ListConfigurationOptions() []core.ConfigurationOption {
  85. opts := [...]core.ConfigurationOption{{
  86. Name: ConfigShotnessXpathStruct,
  87. Description: "Semantic UAST XPath query to use for filtering the nodes. " +
  88. "Refer to https://docs.sourced.tech/babelfish/using-babelfish/uast-querying",
  89. Flag: "shotness-xpath-struct",
  90. Type: core.StringConfigurationOption,
  91. Default: DefaultShotnessXpathStruct}, {
  92. Name: ConfigShotnessXpathName,
  93. Description: "Semantic UAST XPath query to determine the names of the filtered nodes. " +
  94. "Refer to https://docs.sourced.tech/babelfish/using-babelfish/uast-querying",
  95. Flag: "shotness-xpath-name",
  96. Type: core.StringConfigurationOption,
  97. Default: DefaultShotnessXpathName},
  98. }
  99. return opts[:]
  100. }
  101. // Flag returns the command line switch which activates the analysis.
  102. func (shotness *ShotnessAnalysis) Flag() string {
  103. return "shotness"
  104. }
  105. // Features returns the Hercules features required to deploy this leaf.
  106. func (shotness *ShotnessAnalysis) Features() []string {
  107. return []string{uast_items.FeatureUast}
  108. }
  109. // Description returns the text which explains what the analysis is doing.
  110. func (shotness *ShotnessAnalysis) Description() string {
  111. return "Structural hotness - a fine-grained alternative to --couples. " +
  112. "Given an XPath over UASTs - selecting functions by default - we build the square " +
  113. "co-occurrence matrix. The value in each cell equals to the number of times the pair " +
  114. "of selected UAST units appeared in the same commit."
  115. }
  116. // Configure sets the properties previously published by ListConfigurationOptions().
  117. func (shotness *ShotnessAnalysis) Configure(facts map[string]interface{}) error {
  118. if l, exists := facts[core.ConfigLogger].(core.Logger); exists {
  119. shotness.l = l
  120. }
  121. if val, exists := facts[ConfigShotnessXpathStruct]; exists {
  122. shotness.XpathStruct = val.(string)
  123. } else {
  124. shotness.XpathStruct = DefaultShotnessXpathStruct
  125. }
  126. if val, exists := facts[ConfigShotnessXpathName]; exists {
  127. shotness.XpathName = val.(string)
  128. } else {
  129. shotness.XpathName = DefaultShotnessXpathName
  130. }
  131. return nil
  132. }
  133. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  134. // calls. The repository which is going to be analysed is supplied as an argument.
  135. func (shotness *ShotnessAnalysis) Initialize(repository *git.Repository) error {
  136. shotness.l = core.NewLogger()
  137. shotness.nodes = map[string]*nodeShotness{}
  138. shotness.files = map[string]map[string]*nodeShotness{}
  139. shotness.OneShotMergeProcessor.Initialize()
  140. return nil
  141. }
  142. // Consume runs this PipelineItem on the next commit data.
  143. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  144. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  145. // This function returns the mapping with analysis results. The keys must be the same as
  146. // in Provides(). If there was an error, nil is returned.
  147. func (shotness *ShotnessAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  148. if !shotness.ShouldConsumeCommit(deps) {
  149. return nil, nil
  150. }
  151. commit := deps[core.DependencyCommit].(*object.Commit)
  152. changesList := deps[uast_items.DependencyUastChanges].([]uast_items.Change)
  153. diffs := deps[items.DependencyFileDiff].(map[string]items.FileDiffData)
  154. allNodes := map[string]bool{}
  155. addNode := func(name string, node uast_nodes.Node, fileName string) {
  156. nodeSummary := NodeSummary{
  157. Type: uast.TypeOf(node),
  158. Name: name,
  159. File: fileName,
  160. }
  161. key := nodeSummary.String()
  162. exists := allNodes[key]
  163. allNodes[key] = true
  164. var count int
  165. if ns := shotness.nodes[key]; ns != nil {
  166. count = ns.Count
  167. }
  168. if count == 0 {
  169. shotness.nodes[key] = &nodeShotness{
  170. Summary: nodeSummary, Count: 1, Couples: map[string]int{}}
  171. fmap := shotness.files[nodeSummary.File]
  172. if fmap == nil {
  173. fmap = map[string]*nodeShotness{}
  174. }
  175. fmap[key] = shotness.nodes[key]
  176. shotness.files[nodeSummary.File] = fmap
  177. } else if !exists { // in case there are removals and additions in the same node
  178. shotness.nodes[key].Count = count + 1
  179. }
  180. }
  181. for _, change := range changesList {
  182. if change.After == nil {
  183. for key, summary := range shotness.files[change.Change.From.Name] {
  184. for subkey := range summary.Couples {
  185. delete(shotness.nodes[subkey].Couples, key)
  186. }
  187. }
  188. for key := range shotness.files[change.Change.From.Name] {
  189. delete(shotness.nodes, key)
  190. }
  191. delete(shotness.files, change.Change.From.Name)
  192. continue
  193. }
  194. toName := change.Change.To.Name
  195. if change.Before == nil {
  196. nodes, err := shotness.extractNodes(change.After)
  197. if err != nil {
  198. shotness.l.Warnf("Shotness: commit %s file %s failed to filter UAST: %s\n",
  199. commit.Hash.String(), toName, err.Error())
  200. continue
  201. }
  202. for name, node := range nodes {
  203. addNode(name, node, toName)
  204. }
  205. continue
  206. }
  207. // Before -> After
  208. if change.Change.From.Name != toName {
  209. // renamed
  210. oldFile := shotness.files[change.Change.From.Name]
  211. newFile := map[string]*nodeShotness{}
  212. shotness.files[toName] = newFile
  213. for oldKey, ns := range oldFile {
  214. ns.Summary.File = toName
  215. newKey := ns.Summary.String()
  216. newFile[newKey] = ns
  217. shotness.nodes[newKey] = ns
  218. for coupleKey, count := range ns.Couples {
  219. coupleCouples := shotness.nodes[coupleKey].Couples
  220. delete(coupleCouples, oldKey)
  221. coupleCouples[newKey] = count
  222. }
  223. }
  224. // deferred cleanup is needed
  225. for key := range oldFile {
  226. delete(shotness.nodes, key)
  227. }
  228. delete(shotness.files, change.Change.From.Name)
  229. }
  230. // pass through old UAST
  231. // pass through new UAST
  232. nodesBefore, err := shotness.extractNodes(change.Before)
  233. if err != nil {
  234. shotness.l.Warnf("Shotness: commit ^%s file %s failed to filter UAST: %s\n",
  235. commit.Hash.String(), change.Change.From.Name, err.Error())
  236. continue
  237. }
  238. reversedNodesBefore := reverseNodeMap(nodesBefore)
  239. nodesAfter, err := shotness.extractNodes(change.After)
  240. if err != nil {
  241. shotness.l.Warnf("Shotness: commit %s file %s failed to filter UAST: %s\n",
  242. commit.Hash.String(), toName, err.Error())
  243. continue
  244. }
  245. reversedNodesAfter := reverseNodeMap(nodesAfter)
  246. genLine2Node := func(nodes map[string]uast_nodes.Node, linesNum int) [][]uast_nodes.Node {
  247. res := make([][]uast_nodes.Node, linesNum)
  248. for _, node := range nodes {
  249. pos := uast.PositionsOf(node.(uast_nodes.Object))
  250. if pos.Start() == nil {
  251. continue
  252. }
  253. startLine := pos.Start().Line
  254. endLine := pos.Start().Line
  255. if pos.End() != nil && pos.End().Line > pos.Start().Line {
  256. endLine = pos.End().Line
  257. } else {
  258. // we need to determine pos.End().Line
  259. uast_items.VisitEachNode(node, func(child uast_nodes.Node) {
  260. childPos := uast.PositionsOf(child.(uast_nodes.Object))
  261. if childPos.Start() != nil {
  262. candidate := childPos.Start().Line
  263. if childPos.End() != nil {
  264. candidate = childPos.End().Line
  265. }
  266. if candidate > endLine {
  267. endLine = candidate
  268. }
  269. }
  270. })
  271. }
  272. for l := startLine; l <= endLine; l++ {
  273. lineNodes := res[l-1]
  274. if lineNodes == nil {
  275. lineNodes = []uast_nodes.Node{}
  276. }
  277. lineNodes = append(lineNodes, node)
  278. res[l-1] = lineNodes
  279. }
  280. }
  281. return res
  282. }
  283. diff := diffs[toName]
  284. line2nodeBefore := genLine2Node(nodesBefore, diff.OldLinesOfCode)
  285. line2nodeAfter := genLine2Node(nodesAfter, diff.NewLinesOfCode)
  286. // Scan through all the edits. Given the line numbers, get the list of active nodes
  287. // and add them.
  288. var lineNumBefore, lineNumAfter int
  289. for _, edit := range diff.Diffs {
  290. size := utf8.RuneCountInString(edit.Text)
  291. switch edit.Type {
  292. case diffmatchpatch.DiffDelete:
  293. for l := lineNumBefore; l < lineNumBefore+size; l++ {
  294. nodes := line2nodeBefore[l]
  295. for _, node := range nodes {
  296. // toName because we handled a possible rename before
  297. addNode(reversedNodesBefore[uast_nodes.UniqueKey(node)], node, toName)
  298. }
  299. }
  300. lineNumBefore += size
  301. case diffmatchpatch.DiffInsert:
  302. for l := lineNumAfter; l < lineNumAfter+size; l++ {
  303. nodes := line2nodeAfter[l]
  304. for _, node := range nodes {
  305. addNode(reversedNodesAfter[uast_nodes.UniqueKey(node)], node, toName)
  306. }
  307. }
  308. lineNumAfter += size
  309. case diffmatchpatch.DiffEqual:
  310. lineNumBefore += size
  311. lineNumAfter += size
  312. }
  313. }
  314. }
  315. for keyi := range allNodes {
  316. for keyj := range allNodes {
  317. if keyi == keyj {
  318. continue
  319. }
  320. shotness.nodes[keyi].Couples[keyj]++
  321. }
  322. }
  323. return nil, nil
  324. }
  325. // Fork clones this PipelineItem.
  326. func (shotness *ShotnessAnalysis) Fork(n int) []core.PipelineItem {
  327. return core.ForkSamePipelineItem(shotness, n)
  328. }
  329. // Finalize returns the result of the analysis. Further Consume() calls are not expected.
  330. func (shotness *ShotnessAnalysis) Finalize() interface{} {
  331. result := ShotnessResult{
  332. Nodes: make([]NodeSummary, len(shotness.nodes)),
  333. Counters: make([]map[int]int, len(shotness.nodes)),
  334. }
  335. keys := make([]string, len(shotness.nodes))
  336. i := 0
  337. for key := range shotness.nodes {
  338. keys[i] = key
  339. i++
  340. }
  341. sort.Strings(keys)
  342. reverseKeys := map[string]int{}
  343. for i, key := range keys {
  344. reverseKeys[key] = i
  345. }
  346. for i, key := range keys {
  347. node := shotness.nodes[key]
  348. result.Nodes[i] = node.Summary
  349. counter := map[int]int{}
  350. result.Counters[i] = counter
  351. counter[i] = node.Count
  352. for ck, val := range node.Couples {
  353. counter[reverseKeys[ck]] = val
  354. }
  355. }
  356. return result
  357. }
  358. // Serialize converts the analysis result as returned by Finalize() to text or bytes.
  359. // The text format is YAML and the bytes format is Protocol Buffers.
  360. func (shotness *ShotnessAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  361. shotnessResult := result.(ShotnessResult)
  362. if binary {
  363. return shotness.serializeBinary(&shotnessResult, writer)
  364. }
  365. shotness.serializeText(&shotnessResult, writer)
  366. return nil
  367. }
  368. func (shotness *ShotnessAnalysis) serializeText(result *ShotnessResult, writer io.Writer) {
  369. for i, summary := range result.Nodes {
  370. fmt.Fprintf(writer, " - name: %s\n file: %s\n internal_role: %s\n counters: {",
  371. summary.Name, summary.File, summary.Type)
  372. keys := make([]int, len(result.Counters[i]))
  373. j := 0
  374. for key := range result.Counters[i] {
  375. keys[j] = key
  376. j++
  377. }
  378. sort.Ints(keys)
  379. j = 0
  380. for _, key := range keys {
  381. val := result.Counters[i][key]
  382. if j < len(result.Counters[i])-1 {
  383. fmt.Fprintf(writer, "\"%d\":%d,", key, val)
  384. } else {
  385. fmt.Fprintf(writer, "\"%d\":%d}\n", key, val)
  386. }
  387. j++
  388. }
  389. }
  390. }
  391. func (shotness *ShotnessAnalysis) serializeBinary(result *ShotnessResult, writer io.Writer) error {
  392. message := pb.ShotnessAnalysisResults{
  393. Records: make([]*pb.ShotnessRecord, len(result.Nodes)),
  394. }
  395. for i, summary := range result.Nodes {
  396. record := &pb.ShotnessRecord{
  397. Name: summary.Name,
  398. File: summary.File,
  399. Type: summary.Type,
  400. Counters: map[int32]int32{},
  401. }
  402. for key, val := range result.Counters[i] {
  403. record.Counters[int32(key)] = int32(val)
  404. }
  405. message.Records[i] = record
  406. }
  407. serialized, err := proto.Marshal(&message)
  408. if err != nil {
  409. return err
  410. }
  411. _, err = writer.Write(serialized)
  412. return err
  413. }
  414. func (shotness *ShotnessAnalysis) extractNodes(root uast_nodes.Node) (map[string]uast_nodes.Node, error) {
  415. it, err := tools.Filter(root, shotness.XpathStruct)
  416. if err != nil {
  417. return nil, err
  418. }
  419. structs := query.AllNodes(it)
  420. // some structs may be inside other structs; we pick the outermost
  421. // otherwise due to UAST quirks there may be false positives
  422. internal := map[uast_nodes.Comparable]bool{}
  423. for _, ext := range structs {
  424. mainNode := ext.(uast_nodes.Node)
  425. if internal[uast_nodes.UniqueKey(mainNode)] {
  426. continue
  427. }
  428. subs, err := tools.Filter(mainNode, shotness.XpathStruct)
  429. if err != nil {
  430. return nil, err
  431. }
  432. for subs.Next() {
  433. sub := subs.Node().(uast_nodes.Node)
  434. if uast_nodes.UniqueKey(sub) != uast_nodes.UniqueKey(mainNode) {
  435. internal[uast_nodes.UniqueKey(sub)] = true
  436. }
  437. }
  438. }
  439. res := map[string]uast_nodes.Node{}
  440. for _, ext := range structs {
  441. node := ext.(uast_nodes.Node)
  442. if internal[uast_nodes.UniqueKey(node)] {
  443. continue
  444. }
  445. nodeName, err := tools.FilterNode(node, "/*"+shotness.XpathName)
  446. if err != nil {
  447. return nil, err
  448. }
  449. res[string(nodeName.(uast_nodes.Object)["Name"].(uast_nodes.String))] = node
  450. }
  451. return res, nil
  452. }
  453. func reverseNodeMap(nodes map[string]uast_nodes.Node) map[uast_nodes.Comparable]string {
  454. res := map[uast_nodes.Comparable]string{}
  455. for key, node := range nodes {
  456. res[uast_nodes.UniqueKey(node)] = key
  457. }
  458. return res
  459. }
  460. func init() {
  461. core.Registry.Register(&ShotnessAnalysis{})
  462. }