shotness.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486
  1. package leaves
  2. import (
  3. "fmt"
  4. "io"
  5. "log"
  6. "sort"
  7. "unicode/utf8"
  8. "github.com/gogo/protobuf/proto"
  9. "github.com/sergi/go-diff/diffmatchpatch"
  10. "gopkg.in/bblfsh/client-go.v2/tools"
  11. "gopkg.in/bblfsh/sdk.v1/uast"
  12. "gopkg.in/src-d/go-git.v4"
  13. "gopkg.in/src-d/go-git.v4/plumbing/object"
  14. "gopkg.in/src-d/hercules.v4/internal/core"
  15. "gopkg.in/src-d/hercules.v4/internal/pb"
  16. items "gopkg.in/src-d/hercules.v4/internal/plumbing"
  17. uast_items "gopkg.in/src-d/hercules.v4/internal/plumbing/uast"
  18. )
  19. // ShotnessAnalysis contains the intermediate state which is mutated by Consume(). It should implement
  20. // LeafPipelineItem.
  21. type ShotnessAnalysis struct {
  22. core.NoopMerger
  23. core.OneShotMergeProcessor
  24. XpathStruct string
  25. XpathName string
  26. nodes map[string]*nodeShotness
  27. files map[string]map[string]*nodeShotness
  28. }
  29. const (
  30. // ConfigShotnessXpathStruct is the name of the configuration option (ShotnessAnalysis.Configure())
  31. // which sets the UAST XPath to choose the analysed nodes.
  32. ConfigShotnessXpathStruct = "Shotness.XpathStruct"
  33. // ConfigShotnessXpathName is the name of the configuration option (ShotnessAnalysis.Configure())
  34. // which sets the UAST XPath to find the name of the nodes chosen by ConfigShotnessXpathStruct.
  35. // These XPath-s can be different for some languages.
  36. ConfigShotnessXpathName = "Shotness.XpathName"
  37. // DefaultShotnessXpathStruct is the default UAST XPath to choose the analysed nodes.
  38. // It extracts functions.
  39. DefaultShotnessXpathStruct = "//*[@roleFunction and @roleDeclaration]"
  40. // DefaultShotnessXpathName is the default UAST XPath to choose the names of the analysed nodes.
  41. // It looks at the current tree level and at the immediate children.
  42. DefaultShotnessXpathName = "/*[@roleFunction and @roleIdentifier and @roleName] | /*/*[@roleFunction and @roleIdentifier and @roleName]"
  43. )
  44. type nodeShotness struct {
  45. Count int
  46. Summary NodeSummary
  47. Couples map[string]int
  48. }
  49. // NodeSummary carries the node attributes which annotate the "shotness" analysis' counters.
  50. // These attributes are supposed to uniquely identify each node.
  51. type NodeSummary struct {
  52. InternalRole string
  53. Roles []uast.Role
  54. Name string
  55. File string
  56. }
  57. // ShotnessResult is returned by ShotnessAnalysis.Finalize() and represents the analysis result.
  58. type ShotnessResult struct {
  59. Nodes []NodeSummary
  60. Counters []map[int]int
  61. }
  62. func (node NodeSummary) String() string {
  63. return node.InternalRole + "_" + node.Name + "_" + node.File
  64. }
  65. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  66. func (shotness *ShotnessAnalysis) Name() string {
  67. return "Shotness"
  68. }
  69. // Provides returns the list of names of entities which are produced by this PipelineItem.
  70. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  71. // to this list. Also used by core.Registry to build the global map of providers.
  72. func (shotness *ShotnessAnalysis) Provides() []string {
  73. return []string{}
  74. }
  75. // Requires returns the list of names of entities which are needed by this PipelineItem.
  76. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  77. // entities are Provides() upstream.
  78. func (shotness *ShotnessAnalysis) Requires() []string {
  79. arr := [...]string{items.DependencyFileDiff, uast_items.DependencyUastChanges}
  80. return arr[:]
  81. }
  82. // Features which must be enabled for this PipelineItem to be automatically inserted into the DAG.
  83. func (shotness *ShotnessAnalysis) Features() []string {
  84. arr := [...]string{uast_items.FeatureUast}
  85. return arr[:]
  86. }
  87. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  88. func (shotness *ShotnessAnalysis) ListConfigurationOptions() []core.ConfigurationOption {
  89. opts := [...]core.ConfigurationOption{{
  90. Name: ConfigShotnessXpathStruct,
  91. Description: "UAST XPath query to use for filtering the nodes.",
  92. Flag: "shotness-xpath-struct",
  93. Type: core.StringConfigurationOption,
  94. Default: DefaultShotnessXpathStruct}, {
  95. Name: ConfigShotnessXpathName,
  96. Description: "UAST XPath query to determine the names of the filtered nodes.",
  97. Flag: "shotness-xpath-name",
  98. Type: core.StringConfigurationOption,
  99. Default: DefaultShotnessXpathName},
  100. }
  101. return opts[:]
  102. }
  103. // Flag returns the command line switch which activates the analysis.
  104. func (shotness *ShotnessAnalysis) Flag() string {
  105. return "shotness"
  106. }
  107. // Configure sets the properties previously published by ListConfigurationOptions().
  108. func (shotness *ShotnessAnalysis) Configure(facts map[string]interface{}) {
  109. if val, exists := facts[ConfigShotnessXpathStruct]; exists {
  110. shotness.XpathStruct = val.(string)
  111. } else {
  112. shotness.XpathStruct = DefaultShotnessXpathStruct
  113. }
  114. if val, exists := facts[ConfigShotnessXpathName]; exists {
  115. shotness.XpathName = val.(string)
  116. } else {
  117. shotness.XpathName = DefaultShotnessXpathName
  118. }
  119. }
  120. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  121. // calls. The repository which is going to be analysed is supplied as an argument.
  122. func (shotness *ShotnessAnalysis) Initialize(repository *git.Repository) {
  123. shotness.nodes = map[string]*nodeShotness{}
  124. shotness.files = map[string]map[string]*nodeShotness{}
  125. shotness.OneShotMergeProcessor.Initialize()
  126. }
  127. // Consume runs this PipelineItem on the next commit data.
  128. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  129. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  130. // This function returns the mapping with analysis results. The keys must be the same as
  131. // in Provides(). If there was an error, nil is returned.
  132. func (shotness *ShotnessAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  133. if !shotness.ShouldConsumeCommit(deps) {
  134. return nil, nil
  135. }
  136. commit := deps[core.DependencyCommit].(*object.Commit)
  137. changesList := deps[uast_items.DependencyUastChanges].([]uast_items.Change)
  138. diffs := deps[items.DependencyFileDiff].(map[string]items.FileDiffData)
  139. allNodes := map[string]bool{}
  140. addNode := func(name string, node *uast.Node, fileName string) {
  141. nodeSummary := NodeSummary{
  142. InternalRole: node.InternalType,
  143. Roles: node.Roles,
  144. Name: name,
  145. File: fileName,
  146. }
  147. key := nodeSummary.String()
  148. exists := allNodes[key]
  149. allNodes[key] = true
  150. var count int
  151. if ns := shotness.nodes[key]; ns != nil {
  152. count = ns.Count
  153. }
  154. if count == 0 {
  155. shotness.nodes[key] = &nodeShotness{
  156. Summary: nodeSummary, Count: 1, Couples: map[string]int{}}
  157. fmap := shotness.files[nodeSummary.File]
  158. if fmap == nil {
  159. fmap = map[string]*nodeShotness{}
  160. }
  161. fmap[key] = shotness.nodes[key]
  162. shotness.files[nodeSummary.File] = fmap
  163. } else if !exists { // in case there are removals and additions in the same node
  164. shotness.nodes[key].Count = count + 1
  165. }
  166. }
  167. for _, change := range changesList {
  168. if change.After == nil {
  169. for key, summary := range shotness.files[change.Change.From.Name] {
  170. for subkey := range summary.Couples {
  171. delete(shotness.nodes[subkey].Couples, key)
  172. }
  173. }
  174. for key := range shotness.files[change.Change.From.Name] {
  175. delete(shotness.nodes, key)
  176. }
  177. delete(shotness.files, change.Change.From.Name)
  178. continue
  179. }
  180. toName := change.Change.To.Name
  181. if change.Before == nil {
  182. nodes, err := shotness.extractNodes(change.After)
  183. if err != nil {
  184. log.Printf("Shotness: commit %s file %s failed to filter UAST: %s\n",
  185. commit.Hash.String(), toName, err.Error())
  186. continue
  187. }
  188. for name, node := range nodes {
  189. addNode(name, node, toName)
  190. }
  191. continue
  192. }
  193. // Before -> After
  194. if change.Change.From.Name != toName {
  195. // renamed
  196. oldFile := shotness.files[change.Change.From.Name]
  197. newFile := map[string]*nodeShotness{}
  198. shotness.files[toName] = newFile
  199. for oldKey, ns := range oldFile {
  200. ns.Summary.File = toName
  201. newKey := ns.Summary.String()
  202. newFile[newKey] = ns
  203. shotness.nodes[newKey] = ns
  204. for coupleKey, count := range ns.Couples {
  205. coupleCouples := shotness.nodes[coupleKey].Couples
  206. delete(coupleCouples, oldKey)
  207. coupleCouples[newKey] = count
  208. }
  209. }
  210. // deferred cleanup is needed
  211. for key := range oldFile {
  212. delete(shotness.nodes, key)
  213. }
  214. delete(shotness.files, change.Change.From.Name)
  215. }
  216. // pass through old UAST
  217. // pass through new UAST
  218. nodesBefore, err := shotness.extractNodes(change.Before)
  219. if err != nil {
  220. log.Printf("Shotness: commit ^%s file %s failed to filter UAST: %s\n",
  221. commit.Hash.String(), change.Change.From.Name, err.Error())
  222. continue
  223. }
  224. reversedNodesBefore := reverseNodeMap(nodesBefore)
  225. nodesAfter, err := shotness.extractNodes(change.After)
  226. if err != nil {
  227. log.Printf("Shotness: commit %s file %s failed to filter UAST: %s\n",
  228. commit.Hash.String(), toName, err.Error())
  229. continue
  230. }
  231. reversedNodesAfter := reverseNodeMap(nodesAfter)
  232. genLine2Node := func(nodes map[string]*uast.Node, linesNum int) [][]*uast.Node {
  233. res := make([][]*uast.Node, linesNum)
  234. for _, node := range nodes {
  235. if node.StartPosition == nil {
  236. continue
  237. }
  238. startLine := node.StartPosition.Line
  239. endLine := node.StartPosition.Line
  240. if node.EndPosition != nil && node.EndPosition.Line > node.StartPosition.Line {
  241. endLine = node.EndPosition.Line
  242. } else {
  243. // we need to determine node.EndPosition.Line
  244. uast_items.VisitEachNode(node, func(child *uast.Node) {
  245. if child.StartPosition != nil {
  246. candidate := child.StartPosition.Line
  247. if child.EndPosition != nil {
  248. candidate = child.EndPosition.Line
  249. }
  250. if candidate > endLine {
  251. endLine = candidate
  252. }
  253. }
  254. })
  255. }
  256. for l := startLine; l <= endLine; l++ {
  257. lineNodes := res[l-1]
  258. if lineNodes == nil {
  259. lineNodes = []*uast.Node{}
  260. }
  261. lineNodes = append(lineNodes, node)
  262. res[l-1] = lineNodes
  263. }
  264. }
  265. return res
  266. }
  267. diff := diffs[toName]
  268. line2nodeBefore := genLine2Node(nodesBefore, diff.OldLinesOfCode)
  269. line2nodeAfter := genLine2Node(nodesAfter, diff.NewLinesOfCode)
  270. // Scan through all the edits. Given the line numbers, get the list of active nodes
  271. // and add them.
  272. var lineNumBefore, lineNumAfter int
  273. for _, edit := range diff.Diffs {
  274. size := utf8.RuneCountInString(edit.Text)
  275. switch edit.Type {
  276. case diffmatchpatch.DiffDelete:
  277. for l := lineNumBefore; l < lineNumBefore+size; l++ {
  278. nodes := line2nodeBefore[l]
  279. for _, node := range nodes {
  280. // toName because we handled a possible rename before
  281. addNode(reversedNodesBefore[node], node, toName)
  282. }
  283. }
  284. lineNumBefore += size
  285. case diffmatchpatch.DiffInsert:
  286. for l := lineNumAfter; l < lineNumAfter+size; l++ {
  287. nodes := line2nodeAfter[l]
  288. for _, node := range nodes {
  289. addNode(reversedNodesAfter[node], node, toName)
  290. }
  291. }
  292. lineNumAfter += size
  293. case diffmatchpatch.DiffEqual:
  294. lineNumBefore += size
  295. lineNumAfter += size
  296. }
  297. }
  298. }
  299. for keyi := range allNodes {
  300. for keyj := range allNodes {
  301. if keyi == keyj {
  302. continue
  303. }
  304. shotness.nodes[keyi].Couples[keyj]++
  305. }
  306. }
  307. return nil, nil
  308. }
  309. // Fork clones this PipelineItem.
  310. func (shotness *ShotnessAnalysis) Fork(n int) []core.PipelineItem {
  311. return core.ForkSamePipelineItem(shotness, n)
  312. }
  313. // Finalize returns the result of the analysis. Further Consume() calls are not expected.
  314. func (shotness *ShotnessAnalysis) Finalize() interface{} {
  315. result := ShotnessResult{
  316. Nodes: make([]NodeSummary, len(shotness.nodes)),
  317. Counters: make([]map[int]int, len(shotness.nodes)),
  318. }
  319. keys := make([]string, len(shotness.nodes))
  320. i := 0
  321. for key := range shotness.nodes {
  322. keys[i] = key
  323. i++
  324. }
  325. sort.Strings(keys)
  326. reverseKeys := map[string]int{}
  327. for i, key := range keys {
  328. reverseKeys[key] = i
  329. }
  330. for i, key := range keys {
  331. node := shotness.nodes[key]
  332. result.Nodes[i] = node.Summary
  333. counter := map[int]int{}
  334. result.Counters[i] = counter
  335. counter[i] = node.Count
  336. for ck, val := range node.Couples {
  337. counter[reverseKeys[ck]] = val
  338. }
  339. }
  340. return result
  341. }
  342. // Serialize converts the analysis result as returned by Finalize() to text or bytes.
  343. // The text format is YAML and the bytes format is Protocol Buffers.
  344. func (shotness *ShotnessAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  345. shotnessResult := result.(ShotnessResult)
  346. if binary {
  347. return shotness.serializeBinary(&shotnessResult, writer)
  348. }
  349. shotness.serializeText(&shotnessResult, writer)
  350. return nil
  351. }
  352. func (shotness *ShotnessAnalysis) serializeText(result *ShotnessResult, writer io.Writer) {
  353. for i, summary := range result.Nodes {
  354. fmt.Fprintf(writer, " - name: %s\n file: %s\n internal_role: %s\n roles: [",
  355. summary.Name, summary.File, summary.InternalRole)
  356. for j, r := range summary.Roles {
  357. if j < len(summary.Roles)-1 {
  358. fmt.Fprintf(writer, "%d,", r)
  359. } else {
  360. fmt.Fprintf(writer, "%d]\n counters: {", r)
  361. }
  362. }
  363. keys := make([]int, len(result.Counters[i]))
  364. j := 0
  365. for key := range result.Counters[i] {
  366. keys[j] = key
  367. j++
  368. }
  369. sort.Ints(keys)
  370. j = 0
  371. for _, key := range keys {
  372. val := result.Counters[i][key]
  373. if j < len(result.Counters[i])-1 {
  374. fmt.Fprintf(writer, "\"%d\":%d,", key, val)
  375. } else {
  376. fmt.Fprintf(writer, "\"%d\":%d}\n", key, val)
  377. }
  378. j++
  379. }
  380. }
  381. }
  382. func (shotness *ShotnessAnalysis) serializeBinary(result *ShotnessResult, writer io.Writer) error {
  383. message := pb.ShotnessAnalysisResults{
  384. Records: make([]*pb.ShotnessRecord, len(result.Nodes)),
  385. }
  386. for i, summary := range result.Nodes {
  387. record := &pb.ShotnessRecord{
  388. Name: summary.Name,
  389. File: summary.File,
  390. InternalRole: summary.InternalRole,
  391. Roles: make([]int32, len(summary.Roles)),
  392. Counters: map[int32]int32{},
  393. }
  394. for j, r := range summary.Roles {
  395. record.Roles[j] = int32(r)
  396. }
  397. for key, val := range result.Counters[i] {
  398. record.Counters[int32(key)] = int32(val)
  399. }
  400. message.Records[i] = record
  401. }
  402. serialized, err := proto.Marshal(&message)
  403. if err != nil {
  404. return err
  405. }
  406. writer.Write(serialized)
  407. return nil
  408. }
  409. func (shotness *ShotnessAnalysis) extractNodes(root *uast.Node) (map[string]*uast.Node, error) {
  410. structs, err := tools.Filter(root, shotness.XpathStruct)
  411. if err != nil {
  412. return nil, err
  413. }
  414. // some structs may be inside other structs; we pick the outermost
  415. // otherwise due to UAST quirks there may be false positives
  416. internal := map[*uast.Node]bool{}
  417. for _, mainNode := range structs {
  418. if internal[mainNode] {
  419. continue
  420. }
  421. subs, err := tools.Filter(mainNode, shotness.XpathStruct)
  422. if err != nil {
  423. return nil, err
  424. }
  425. for _, sub := range subs {
  426. if sub != mainNode {
  427. internal[sub] = true
  428. }
  429. }
  430. }
  431. res := map[string]*uast.Node{}
  432. for _, node := range structs {
  433. if internal[node] {
  434. continue
  435. }
  436. nodeNames, err := tools.Filter(node, shotness.XpathName)
  437. if err != nil {
  438. return nil, err
  439. }
  440. if len(nodeNames) == 0 {
  441. continue
  442. }
  443. res[nodeNames[0].Token] = node
  444. }
  445. return res, nil
  446. }
  447. func reverseNodeMap(nodes map[string]*uast.Node) map[*uast.Node]string {
  448. res := map[*uast.Node]string{}
  449. for key, node := range nodes {
  450. res[node] = key
  451. }
  452. return res
  453. }
  454. func init() {
  455. core.Registry.Register(&ShotnessAnalysis{})
  456. }