shotness.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. package hercules
  2. import (
  3. "fmt"
  4. "io"
  5. "os"
  6. "sort"
  7. "unicode/utf8"
  8. "github.com/gogo/protobuf/proto"
  9. "github.com/sergi/go-diff/diffmatchpatch"
  10. "gopkg.in/bblfsh/client-go.v2/tools"
  11. "gopkg.in/bblfsh/sdk.v1/uast"
  12. "gopkg.in/src-d/go-git.v4"
  13. "gopkg.in/src-d/go-git.v4/plumbing/object"
  14. "gopkg.in/src-d/hercules.v3/pb"
  15. )
  16. // ShotnessAnalysis contains the intermediate state which is mutated by Consume(). It should implement
  17. // LeafPipelineItem.
  18. type ShotnessAnalysis struct {
  19. XpathStruct string
  20. XpathName string
  21. nodes map[string]*nodeShotness
  22. files map[string]map[string]*nodeShotness
  23. }
  24. const (
  25. ConfigShotnessXpathStruct = "Shotness.XpathStruct"
  26. ConfigShotnessXpathName = "Shotness.XpathName"
  27. DefaultShotnessXpathStruct = "//*[@roleFunction and @roleDeclaration]"
  28. DefaultShotnessXpathName = "/*[@roleFunction and @roleIdentifier and @roleName] | /*/*[@roleFunction and @roleIdentifier and @roleName]"
  29. )
  30. type nodeShotness struct {
  31. Count int
  32. Summary NodeSummary
  33. Couples map[string]int
  34. }
  35. type NodeSummary struct {
  36. InternalRole string
  37. Roles []uast.Role
  38. Name string
  39. File string
  40. }
  41. // ShotnessAnalysisResult is returned by Finalize() and represents the analysis result.
  42. type ShotnessAnalysisResult struct {
  43. Nodes []NodeSummary
  44. Counters []map[int]int
  45. }
  46. func (node NodeSummary) String() string {
  47. return node.InternalRole + "_" + node.Name + "_" + node.File
  48. }
  49. func (shotness *ShotnessAnalysis) Name() string {
  50. return "Shotness"
  51. }
  52. func (shotness *ShotnessAnalysis) Provides() []string {
  53. return []string{}
  54. }
  55. func (ref *ShotnessAnalysis) Features() []string {
  56. arr := [...]string{FeatureUast}
  57. return arr[:]
  58. }
  59. func (shotness *ShotnessAnalysis) Requires() []string {
  60. arr := [...]string{DependencyFileDiff, DependencyUastChanges}
  61. return arr[:]
  62. }
  63. // ListConfigurationOptions tells the engine which parameters can be changed through the command
  64. // line.
  65. func (shotness *ShotnessAnalysis) ListConfigurationOptions() []ConfigurationOption {
  66. opts := [...]ConfigurationOption{{
  67. Name: ConfigShotnessXpathStruct,
  68. Description: "UAST XPath query to use for filtering the nodes.",
  69. Flag: "shotness-xpath-struct",
  70. Type: StringConfigurationOption,
  71. Default: DefaultShotnessXpathStruct}, {
  72. Name: ConfigShotnessXpathName,
  73. Description: "UAST XPath query to determine the names of the filtered nodes.",
  74. Flag: "shotness-xpath-name",
  75. Type: StringConfigurationOption,
  76. Default: DefaultShotnessXpathName},
  77. }
  78. return opts[:]
  79. }
  80. // Flag returns the command line switch which activates the analysis.
  81. func (shotness *ShotnessAnalysis) Flag() string {
  82. return "shotness"
  83. }
  84. // Configure applies the parameters specified in the command line.
  85. func (shotness *ShotnessAnalysis) Configure(facts map[string]interface{}) {
  86. if val, exists := facts[ConfigShotnessXpathStruct]; exists {
  87. shotness.XpathStruct = val.(string)
  88. } else {
  89. shotness.XpathStruct = DefaultShotnessXpathStruct
  90. }
  91. if val, exists := facts[ConfigShotnessXpathName]; exists {
  92. shotness.XpathName = val.(string)
  93. } else {
  94. shotness.XpathName = DefaultShotnessXpathName
  95. }
  96. }
  97. // Initialize resets the internal temporary data structures and prepares the object for Consume().
  98. func (shotness *ShotnessAnalysis) Initialize(repository *git.Repository) {
  99. shotness.nodes = map[string]*nodeShotness{}
  100. shotness.files = map[string]map[string]*nodeShotness{}
  101. }
  102. // Consume is called for every commit in the sequence.
  103. func (shotness *ShotnessAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  104. commit := deps["commit"].(*object.Commit)
  105. changesList := deps[DependencyUastChanges].([]UASTChange)
  106. diffs := deps[DependencyFileDiff].(map[string]FileDiffData)
  107. allNodes := map[string]bool{}
  108. addNode := func(name string, node *uast.Node, fileName string) {
  109. nodeSummary := NodeSummary{
  110. InternalRole: node.InternalType,
  111. Roles: node.Roles,
  112. Name: name,
  113. File: fileName,
  114. }
  115. key := nodeSummary.String()
  116. exists := allNodes[key]
  117. allNodes[key] = true
  118. var count int
  119. if ns := shotness.nodes[key]; ns != nil {
  120. count = ns.Count
  121. }
  122. if count == 0 {
  123. shotness.nodes[key] = &nodeShotness{
  124. Summary: nodeSummary, Count: 1, Couples: map[string]int{}}
  125. fmap := shotness.files[nodeSummary.File]
  126. if fmap == nil {
  127. fmap = map[string]*nodeShotness{}
  128. }
  129. fmap[key] = shotness.nodes[key]
  130. shotness.files[nodeSummary.File] = fmap
  131. } else if !exists { // in case there are removals and additions in the same node
  132. shotness.nodes[key].Count = count + 1
  133. }
  134. }
  135. for _, change := range changesList {
  136. if change.After == nil {
  137. for key, summary := range shotness.files[change.Change.From.Name] {
  138. for subkey := range summary.Couples {
  139. delete(shotness.nodes[subkey].Couples, key)
  140. }
  141. }
  142. for key := range shotness.files[change.Change.From.Name] {
  143. delete(shotness.nodes, key)
  144. }
  145. delete(shotness.files, change.Change.From.Name)
  146. continue
  147. }
  148. toName := change.Change.To.Name
  149. if change.Before == nil {
  150. nodes, err := shotness.extractNodes(change.After)
  151. if err != nil {
  152. fmt.Fprintf(os.Stderr, "Shotness: commit %s file %s failed to filter UAST: %s\n",
  153. commit.Hash.String(), toName, err.Error())
  154. continue
  155. }
  156. for name, node := range nodes {
  157. addNode(name, node, toName)
  158. }
  159. continue
  160. }
  161. // Before -> After
  162. if change.Change.From.Name != toName {
  163. // renamed
  164. oldFile := shotness.files[change.Change.From.Name]
  165. newFile := map[string]*nodeShotness{}
  166. shotness.files[toName] = newFile
  167. for oldKey, ns := range oldFile {
  168. ns.Summary.File = toName
  169. newKey := ns.Summary.String()
  170. newFile[newKey] = ns
  171. shotness.nodes[newKey] = ns
  172. for coupleKey, count := range ns.Couples {
  173. coupleCouples := shotness.nodes[coupleKey].Couples
  174. delete(coupleCouples, oldKey)
  175. coupleCouples[newKey] = count
  176. }
  177. }
  178. // deferred cleanup is needed
  179. for key := range oldFile {
  180. delete(shotness.nodes, key)
  181. }
  182. delete(shotness.files, change.Change.From.Name)
  183. }
  184. // pass through old UAST
  185. // pass through new UAST
  186. nodesBefore, err := shotness.extractNodes(change.Before)
  187. if err != nil {
  188. fmt.Fprintf(os.Stderr, "Shotness: commit ^%s file %s failed to filter UAST: %s\n",
  189. commit.Hash.String(), change.Change.From.Name, err.Error())
  190. continue
  191. }
  192. reversedNodesBefore := reverseNodeMap(nodesBefore)
  193. nodesAfter, err := shotness.extractNodes(change.After)
  194. if err != nil {
  195. fmt.Fprintf(os.Stderr, "Shotness: commit %s file %s failed to filter UAST: %s\n",
  196. commit.Hash.String(), toName, err.Error())
  197. continue
  198. }
  199. reversedNodesAfter := reverseNodeMap(nodesAfter)
  200. genLine2Node := func(nodes map[string]*uast.Node, linesNum int) [][]*uast.Node {
  201. res := make([][]*uast.Node, linesNum)
  202. for _, node := range nodes {
  203. if node.StartPosition != nil && node.EndPosition != nil {
  204. for l := node.StartPosition.Line; l <= node.EndPosition.Line; l++ {
  205. lineNodes := res[l-1]
  206. if lineNodes == nil {
  207. lineNodes = []*uast.Node{}
  208. }
  209. lineNodes = append(lineNodes, node)
  210. res[l-1] = lineNodes
  211. }
  212. }
  213. }
  214. return res
  215. }
  216. diff := diffs[toName]
  217. line2nodeBefore := genLine2Node(nodesBefore, diff.OldLinesOfCode)
  218. line2nodeAfter := genLine2Node(nodesAfter, diff.NewLinesOfCode)
  219. // Scan through all the edits. Given the line numbers, get the list of active nodes
  220. // and add them.
  221. var lineNumBefore, lineNumAfter int
  222. for _, edit := range diff.Diffs {
  223. size := utf8.RuneCountInString(edit.Text)
  224. switch edit.Type {
  225. case diffmatchpatch.DiffDelete:
  226. for l := lineNumBefore; l < lineNumBefore+size; l++ {
  227. nodes := line2nodeBefore[l]
  228. for _, node := range nodes {
  229. // toName because we handled a possible rename before
  230. addNode(reversedNodesBefore[node], node, toName)
  231. }
  232. }
  233. lineNumBefore += size
  234. case diffmatchpatch.DiffInsert:
  235. for l := lineNumAfter; l < lineNumAfter+size; l++ {
  236. nodes := line2nodeAfter[l]
  237. for _, node := range nodes {
  238. addNode(reversedNodesAfter[node], node, toName)
  239. }
  240. }
  241. lineNumAfter += size
  242. case diffmatchpatch.DiffEqual:
  243. lineNumBefore += size
  244. lineNumAfter += size
  245. }
  246. }
  247. }
  248. for keyi := range allNodes {
  249. for keyj := range allNodes {
  250. if keyi == keyj {
  251. continue
  252. }
  253. shotness.nodes[keyi].Couples[keyj]++
  254. }
  255. }
  256. return nil, nil
  257. }
  258. // Finalize produces the result of the analysis. No more Consume() calls are expected afterwards.
  259. func (shotness *ShotnessAnalysis) Finalize() interface{} {
  260. result := ShotnessAnalysisResult{
  261. Nodes: make([]NodeSummary, len(shotness.nodes)),
  262. Counters: make([]map[int]int, len(shotness.nodes)),
  263. }
  264. keys := make([]string, len(shotness.nodes))
  265. i := 0
  266. for key := range shotness.nodes {
  267. keys[i] = key
  268. i++
  269. }
  270. sort.Strings(keys)
  271. reverseKeys := map[string]int{}
  272. for i, key := range keys {
  273. reverseKeys[key] = i
  274. }
  275. for i, key := range keys {
  276. node := shotness.nodes[key]
  277. result.Nodes[i] = node.Summary
  278. counter := map[int]int{}
  279. result.Counters[i] = counter
  280. counter[i] = node.Count
  281. for ck, val := range node.Couples {
  282. counter[reverseKeys[ck]] = val
  283. }
  284. }
  285. return result
  286. }
  287. // Serialize converts the result from Finalize() to either Protocol Buffers or YAML.
  288. func (shotness *ShotnessAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  289. shotnessResult := result.(ShotnessAnalysisResult)
  290. if binary {
  291. return shotness.serializeBinary(&shotnessResult, writer)
  292. }
  293. shotness.serializeText(&shotnessResult, writer)
  294. return nil
  295. }
  296. func (shotness *ShotnessAnalysis) serializeText(result *ShotnessAnalysisResult, writer io.Writer) {
  297. for i, summary := range result.Nodes {
  298. fmt.Fprintf(writer, " - name: %s\n file: %s\n ir: %s\n roles: [",
  299. summary.Name, summary.File, summary.InternalRole)
  300. for j, r := range summary.Roles {
  301. if j < len(summary.Roles)-1 {
  302. fmt.Fprintf(writer, "%d,", r)
  303. } else {
  304. fmt.Fprintf(writer, "%d]\n counters: {", r)
  305. }
  306. }
  307. keys := make([]int, len(result.Counters[i]))
  308. j := 0
  309. for key := range result.Counters[i] {
  310. keys[j] = key
  311. j++
  312. }
  313. sort.Ints(keys)
  314. j = 0
  315. for _, key := range keys {
  316. val := result.Counters[i][key]
  317. if j < len(result.Counters[i])-1 {
  318. fmt.Fprintf(writer, "%d:%d,", key, val)
  319. } else {
  320. fmt.Fprintf(writer, "%d:%d}\n", key, val)
  321. }
  322. j++
  323. }
  324. }
  325. }
  326. func (shotness *ShotnessAnalysis) serializeBinary(result *ShotnessAnalysisResult, writer io.Writer) error {
  327. message := pb.ShotnessAnalysisResultMessage{
  328. Records: make([]*pb.ShotnessRecord, len(result.Nodes)),
  329. }
  330. for i, summary := range result.Nodes {
  331. record := &pb.ShotnessRecord{
  332. Name: summary.Name,
  333. File: summary.File,
  334. InternalRole: summary.InternalRole,
  335. Roles: make([]int32, len(summary.Roles)),
  336. Counters: map[int32]int32{},
  337. }
  338. for j, r := range summary.Roles {
  339. record.Roles[j] = int32(r)
  340. }
  341. for key, val := range result.Counters[i] {
  342. record.Counters[int32(key)] = int32(val)
  343. }
  344. message.Records[i] = record
  345. }
  346. serialized, err := proto.Marshal(&message)
  347. if err != nil {
  348. return err
  349. }
  350. writer.Write(serialized)
  351. return nil
  352. }
  353. func (shotness *ShotnessAnalysis) extractNodes(root *uast.Node) (map[string]*uast.Node, error) {
  354. structs, err := tools.Filter(root, shotness.XpathStruct)
  355. if err != nil {
  356. return nil, err
  357. }
  358. // some structs may be inside other structs; we pick the outermost
  359. // otherwise due to UAST quirks there may be false positives
  360. internal := map[*uast.Node]bool{}
  361. for _, mainNode := range structs {
  362. subs, err := tools.Filter(mainNode, shotness.XpathStruct)
  363. if err != nil {
  364. return nil, err
  365. }
  366. for _, sub := range subs {
  367. if sub != mainNode {
  368. internal[sub] = true
  369. }
  370. }
  371. }
  372. res := map[string]*uast.Node{}
  373. for _, node := range structs {
  374. if internal[node] {
  375. continue
  376. }
  377. nodeNames, err := tools.Filter(node, shotness.XpathName)
  378. if err != nil {
  379. return nil, err
  380. }
  381. if len(nodeNames) == 0 {
  382. continue
  383. }
  384. res[nodeNames[0].Token] = node
  385. }
  386. return res, nil
  387. }
  388. func reverseNodeMap(nodes map[string]*uast.Node) map[*uast.Node]string {
  389. res := map[*uast.Node]string{}
  390. for key, node := range nodes {
  391. res[node] = key
  392. }
  393. return res
  394. }
  395. func init() {
  396. Registry.Register(&ShotnessAnalysis{})
  397. }