shotness.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. package hercules
  2. import (
  3. "fmt"
  4. "io"
  5. "log"
  6. "sort"
  7. "unicode/utf8"
  8. "github.com/gogo/protobuf/proto"
  9. "github.com/sergi/go-diff/diffmatchpatch"
  10. "gopkg.in/bblfsh/client-go.v2/tools"
  11. "gopkg.in/bblfsh/sdk.v1/uast"
  12. "gopkg.in/src-d/go-git.v4"
  13. "gopkg.in/src-d/go-git.v4/plumbing/object"
  14. "gopkg.in/src-d/hercules.v3/pb"
  15. )
  16. // ShotnessAnalysis contains the intermediate state which is mutated by Consume(). It should implement
  17. // LeafPipelineItem.
  18. type ShotnessAnalysis struct {
  19. XpathStruct string
  20. XpathName string
  21. nodes map[string]*nodeShotness
  22. files map[string]map[string]*nodeShotness
  23. }
  24. const (
  25. // ConfigShotnessXpathStruct is the name of the configuration option (ShotnessAnalysis.Configure())
  26. // which sets the UAST XPath to choose the analysed nodes.
  27. ConfigShotnessXpathStruct = "Shotness.XpathStruct"
  28. // ConfigShotnessXpathName is the name of the configuration option (ShotnessAnalysis.Configure())
  29. // which sets the UAST XPath to find the name of the nodes chosen by ConfigShotnessXpathStruct.
  30. // These XPath-s can be different for some languages.
  31. ConfigShotnessXpathName = "Shotness.XpathName"
  32. // DefaultShotnessXpathStruct is the default UAST XPath to choose the analysed nodes.
  33. // It extracts functions.
  34. DefaultShotnessXpathStruct = "//*[@roleFunction and @roleDeclaration]"
  35. // DefaultShotnessXpathName is the default UAST XPath to choose the names of the analysed nodes.
  36. // It looks at the current tree level and at the immediate children.
  37. DefaultShotnessXpathName = "/*[@roleFunction and @roleIdentifier and @roleName] | /*/*[@roleFunction and @roleIdentifier and @roleName]"
  38. )
  39. type nodeShotness struct {
  40. Count int
  41. Summary NodeSummary
  42. Couples map[string]int
  43. }
  44. // NodeSummary carries the node attributes which annotate the "shotness" analysis' counters.
  45. // These attributes are supposed to uniquely identify each node.
  46. type NodeSummary struct {
  47. InternalRole string
  48. Roles []uast.Role
  49. Name string
  50. File string
  51. }
  52. // ShotnessResult is returned by ShotnessAnalysis.Finalize() and represents the analysis result.
  53. type ShotnessResult struct {
  54. Nodes []NodeSummary
  55. Counters []map[int]int
  56. }
  57. func (node NodeSummary) String() string {
  58. return node.InternalRole + "_" + node.Name + "_" + node.File
  59. }
  60. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  61. func (shotness *ShotnessAnalysis) Name() string {
  62. return "Shotness"
  63. }
  64. // Provides returns the list of names of entities which are produced by this PipelineItem.
  65. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  66. // to this list. Also used by hercules.Registry to build the global map of providers.
  67. func (shotness *ShotnessAnalysis) Provides() []string {
  68. return []string{}
  69. }
  70. // Requires returns the list of names of entities which are needed by this PipelineItem.
  71. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  72. // entities are Provides() upstream.
  73. func (shotness *ShotnessAnalysis) Requires() []string {
  74. arr := [...]string{DependencyFileDiff, DependencyUastChanges}
  75. return arr[:]
  76. }
  77. // Features which must be enabled for this PipelineItem to be automatically inserted into the DAG.
  78. func (shotness *ShotnessAnalysis) Features() []string {
  79. arr := [...]string{FeatureUast}
  80. return arr[:]
  81. }
  82. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  83. func (shotness *ShotnessAnalysis) ListConfigurationOptions() []ConfigurationOption {
  84. opts := [...]ConfigurationOption{{
  85. Name: ConfigShotnessXpathStruct,
  86. Description: "UAST XPath query to use for filtering the nodes.",
  87. Flag: "shotness-xpath-struct",
  88. Type: StringConfigurationOption,
  89. Default: DefaultShotnessXpathStruct}, {
  90. Name: ConfigShotnessXpathName,
  91. Description: "UAST XPath query to determine the names of the filtered nodes.",
  92. Flag: "shotness-xpath-name",
  93. Type: StringConfigurationOption,
  94. Default: DefaultShotnessXpathName},
  95. }
  96. return opts[:]
  97. }
  98. // Flag returns the command line switch which activates the analysis.
  99. func (shotness *ShotnessAnalysis) Flag() string {
  100. return "shotness"
  101. }
  102. // Configure sets the properties previously published by ListConfigurationOptions().
  103. func (shotness *ShotnessAnalysis) Configure(facts map[string]interface{}) {
  104. if val, exists := facts[ConfigShotnessXpathStruct]; exists {
  105. shotness.XpathStruct = val.(string)
  106. } else {
  107. shotness.XpathStruct = DefaultShotnessXpathStruct
  108. }
  109. if val, exists := facts[ConfigShotnessXpathName]; exists {
  110. shotness.XpathName = val.(string)
  111. } else {
  112. shotness.XpathName = DefaultShotnessXpathName
  113. }
  114. }
  115. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  116. // calls. The repository which is going to be analysed is supplied as an argument.
  117. func (shotness *ShotnessAnalysis) Initialize(repository *git.Repository) {
  118. shotness.nodes = map[string]*nodeShotness{}
  119. shotness.files = map[string]map[string]*nodeShotness{}
  120. }
  121. // Consume runs this PipelineItem on the next commit data.
  122. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  123. // Additionally, "commit" is always present there and represents the analysed *object.Commit.
  124. // This function returns the mapping with analysis results. The keys must be the same as
  125. // in Provides(). If there was an error, nil is returned.
  126. func (shotness *ShotnessAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  127. commit := deps["commit"].(*object.Commit)
  128. changesList := deps[DependencyUastChanges].([]UASTChange)
  129. diffs := deps[DependencyFileDiff].(map[string]FileDiffData)
  130. allNodes := map[string]bool{}
  131. addNode := func(name string, node *uast.Node, fileName string) {
  132. nodeSummary := NodeSummary{
  133. InternalRole: node.InternalType,
  134. Roles: node.Roles,
  135. Name: name,
  136. File: fileName,
  137. }
  138. key := nodeSummary.String()
  139. exists := allNodes[key]
  140. allNodes[key] = true
  141. var count int
  142. if ns := shotness.nodes[key]; ns != nil {
  143. count = ns.Count
  144. }
  145. if count == 0 {
  146. shotness.nodes[key] = &nodeShotness{
  147. Summary: nodeSummary, Count: 1, Couples: map[string]int{}}
  148. fmap := shotness.files[nodeSummary.File]
  149. if fmap == nil {
  150. fmap = map[string]*nodeShotness{}
  151. }
  152. fmap[key] = shotness.nodes[key]
  153. shotness.files[nodeSummary.File] = fmap
  154. } else if !exists { // in case there are removals and additions in the same node
  155. shotness.nodes[key].Count = count + 1
  156. }
  157. }
  158. for _, change := range changesList {
  159. if change.After == nil {
  160. for key, summary := range shotness.files[change.Change.From.Name] {
  161. for subkey := range summary.Couples {
  162. delete(shotness.nodes[subkey].Couples, key)
  163. }
  164. }
  165. for key := range shotness.files[change.Change.From.Name] {
  166. delete(shotness.nodes, key)
  167. }
  168. delete(shotness.files, change.Change.From.Name)
  169. continue
  170. }
  171. toName := change.Change.To.Name
  172. if change.Before == nil {
  173. nodes, err := shotness.extractNodes(change.After)
  174. if err != nil {
  175. log.Printf("Shotness: commit %s file %s failed to filter UAST: %s\n",
  176. commit.Hash.String(), toName, err.Error())
  177. continue
  178. }
  179. for name, node := range nodes {
  180. addNode(name, node, toName)
  181. }
  182. continue
  183. }
  184. // Before -> After
  185. if change.Change.From.Name != toName {
  186. // renamed
  187. oldFile := shotness.files[change.Change.From.Name]
  188. newFile := map[string]*nodeShotness{}
  189. shotness.files[toName] = newFile
  190. for oldKey, ns := range oldFile {
  191. ns.Summary.File = toName
  192. newKey := ns.Summary.String()
  193. newFile[newKey] = ns
  194. shotness.nodes[newKey] = ns
  195. for coupleKey, count := range ns.Couples {
  196. coupleCouples := shotness.nodes[coupleKey].Couples
  197. delete(coupleCouples, oldKey)
  198. coupleCouples[newKey] = count
  199. }
  200. }
  201. // deferred cleanup is needed
  202. for key := range oldFile {
  203. delete(shotness.nodes, key)
  204. }
  205. delete(shotness.files, change.Change.From.Name)
  206. }
  207. // pass through old UAST
  208. // pass through new UAST
  209. nodesBefore, err := shotness.extractNodes(change.Before)
  210. if err != nil {
  211. log.Printf("Shotness: commit ^%s file %s failed to filter UAST: %s\n",
  212. commit.Hash.String(), change.Change.From.Name, err.Error())
  213. continue
  214. }
  215. reversedNodesBefore := reverseNodeMap(nodesBefore)
  216. nodesAfter, err := shotness.extractNodes(change.After)
  217. if err != nil {
  218. log.Printf("Shotness: commit %s file %s failed to filter UAST: %s\n",
  219. commit.Hash.String(), toName, err.Error())
  220. continue
  221. }
  222. reversedNodesAfter := reverseNodeMap(nodesAfter)
  223. genLine2Node := func(nodes map[string]*uast.Node, linesNum int) [][]*uast.Node {
  224. res := make([][]*uast.Node, linesNum)
  225. for _, node := range nodes {
  226. if node.StartPosition == nil {
  227. continue
  228. }
  229. startLine := node.StartPosition.Line
  230. endLine := node.StartPosition.Line
  231. if node.EndPosition != nil && node.EndPosition.Line > node.StartPosition.Line {
  232. endLine = node.EndPosition.Line
  233. } else {
  234. // we need to determine node.EndPosition.Line
  235. VisitEachNode(node, func(child *uast.Node) {
  236. if child.StartPosition != nil {
  237. candidate := child.StartPosition.Line
  238. if child.EndPosition != nil {
  239. candidate = child.EndPosition.Line
  240. }
  241. if candidate > endLine {
  242. endLine = candidate
  243. }
  244. }
  245. })
  246. }
  247. for l := startLine; l <= endLine; l++ {
  248. lineNodes := res[l-1]
  249. if lineNodes == nil {
  250. lineNodes = []*uast.Node{}
  251. }
  252. lineNodes = append(lineNodes, node)
  253. res[l-1] = lineNodes
  254. }
  255. }
  256. return res
  257. }
  258. diff := diffs[toName]
  259. line2nodeBefore := genLine2Node(nodesBefore, diff.OldLinesOfCode)
  260. line2nodeAfter := genLine2Node(nodesAfter, diff.NewLinesOfCode)
  261. // Scan through all the edits. Given the line numbers, get the list of active nodes
  262. // and add them.
  263. var lineNumBefore, lineNumAfter int
  264. for _, edit := range diff.Diffs {
  265. size := utf8.RuneCountInString(edit.Text)
  266. switch edit.Type {
  267. case diffmatchpatch.DiffDelete:
  268. for l := lineNumBefore; l < lineNumBefore+size; l++ {
  269. nodes := line2nodeBefore[l]
  270. for _, node := range nodes {
  271. // toName because we handled a possible rename before
  272. addNode(reversedNodesBefore[node], node, toName)
  273. }
  274. }
  275. lineNumBefore += size
  276. case diffmatchpatch.DiffInsert:
  277. for l := lineNumAfter; l < lineNumAfter+size; l++ {
  278. nodes := line2nodeAfter[l]
  279. for _, node := range nodes {
  280. addNode(reversedNodesAfter[node], node, toName)
  281. }
  282. }
  283. lineNumAfter += size
  284. case diffmatchpatch.DiffEqual:
  285. lineNumBefore += size
  286. lineNumAfter += size
  287. }
  288. }
  289. }
  290. for keyi := range allNodes {
  291. for keyj := range allNodes {
  292. if keyi == keyj {
  293. continue
  294. }
  295. shotness.nodes[keyi].Couples[keyj]++
  296. }
  297. }
  298. return nil, nil
  299. }
  300. // Finalize returns the result of the analysis. Further Consume() calls are not expected.
  301. func (shotness *ShotnessAnalysis) Finalize() interface{} {
  302. result := ShotnessResult{
  303. Nodes: make([]NodeSummary, len(shotness.nodes)),
  304. Counters: make([]map[int]int, len(shotness.nodes)),
  305. }
  306. keys := make([]string, len(shotness.nodes))
  307. i := 0
  308. for key := range shotness.nodes {
  309. keys[i] = key
  310. i++
  311. }
  312. sort.Strings(keys)
  313. reverseKeys := map[string]int{}
  314. for i, key := range keys {
  315. reverseKeys[key] = i
  316. }
  317. for i, key := range keys {
  318. node := shotness.nodes[key]
  319. result.Nodes[i] = node.Summary
  320. counter := map[int]int{}
  321. result.Counters[i] = counter
  322. counter[i] = node.Count
  323. for ck, val := range node.Couples {
  324. counter[reverseKeys[ck]] = val
  325. }
  326. }
  327. return result
  328. }
  329. // Serialize converts the analysis result as returned by Finalize() to text or bytes.
  330. // The text format is YAML and the bytes format is Protocol Buffers.
  331. func (shotness *ShotnessAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  332. shotnessResult := result.(ShotnessResult)
  333. if binary {
  334. return shotness.serializeBinary(&shotnessResult, writer)
  335. }
  336. shotness.serializeText(&shotnessResult, writer)
  337. return nil
  338. }
  339. func (shotness *ShotnessAnalysis) serializeText(result *ShotnessResult, writer io.Writer) {
  340. for i, summary := range result.Nodes {
  341. fmt.Fprintf(writer, " - name: %s\n file: %s\n internal_role: %s\n roles: [",
  342. summary.Name, summary.File, summary.InternalRole)
  343. for j, r := range summary.Roles {
  344. if j < len(summary.Roles)-1 {
  345. fmt.Fprintf(writer, "%d,", r)
  346. } else {
  347. fmt.Fprintf(writer, "%d]\n counters: {", r)
  348. }
  349. }
  350. keys := make([]int, len(result.Counters[i]))
  351. j := 0
  352. for key := range result.Counters[i] {
  353. keys[j] = key
  354. j++
  355. }
  356. sort.Ints(keys)
  357. j = 0
  358. for _, key := range keys {
  359. val := result.Counters[i][key]
  360. if j < len(result.Counters[i])-1 {
  361. fmt.Fprintf(writer, "\"%d\":%d,", key, val)
  362. } else {
  363. fmt.Fprintf(writer, "\"%d\":%d}\n", key, val)
  364. }
  365. j++
  366. }
  367. }
  368. }
  369. func (shotness *ShotnessAnalysis) serializeBinary(result *ShotnessResult, writer io.Writer) error {
  370. message := pb.ShotnessAnalysisResults{
  371. Records: make([]*pb.ShotnessRecord, len(result.Nodes)),
  372. }
  373. for i, summary := range result.Nodes {
  374. record := &pb.ShotnessRecord{
  375. Name: summary.Name,
  376. File: summary.File,
  377. InternalRole: summary.InternalRole,
  378. Roles: make([]int32, len(summary.Roles)),
  379. Counters: map[int32]int32{},
  380. }
  381. for j, r := range summary.Roles {
  382. record.Roles[j] = int32(r)
  383. }
  384. for key, val := range result.Counters[i] {
  385. record.Counters[int32(key)] = int32(val)
  386. }
  387. message.Records[i] = record
  388. }
  389. serialized, err := proto.Marshal(&message)
  390. if err != nil {
  391. return err
  392. }
  393. writer.Write(serialized)
  394. return nil
  395. }
  396. func (shotness *ShotnessAnalysis) extractNodes(root *uast.Node) (map[string]*uast.Node, error) {
  397. structs, err := tools.Filter(root, shotness.XpathStruct)
  398. if err != nil {
  399. return nil, err
  400. }
  401. // some structs may be inside other structs; we pick the outermost
  402. // otherwise due to UAST quirks there may be false positives
  403. internal := map[*uast.Node]bool{}
  404. for _, mainNode := range structs {
  405. if internal[mainNode] {
  406. continue
  407. }
  408. subs, err := tools.Filter(mainNode, shotness.XpathStruct)
  409. if err != nil {
  410. return nil, err
  411. }
  412. for _, sub := range subs {
  413. if sub != mainNode {
  414. internal[sub] = true
  415. }
  416. }
  417. }
  418. res := map[string]*uast.Node{}
  419. for _, node := range structs {
  420. if internal[node] {
  421. continue
  422. }
  423. nodeNames, err := tools.Filter(node, shotness.XpathName)
  424. if err != nil {
  425. return nil, err
  426. }
  427. if len(nodeNames) == 0 {
  428. continue
  429. }
  430. res[nodeNames[0].Token] = node
  431. }
  432. return res, nil
  433. }
  434. func reverseNodeMap(nodes map[string]*uast.Node) map[*uast.Node]string {
  435. res := map[*uast.Node]string{}
  436. for key, node := range nodes {
  437. res[node] = key
  438. }
  439. return res
  440. }
  441. func init() {
  442. Registry.Register(&ShotnessAnalysis{})
  443. }