diff_refiner.go 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. package hercules
  2. import (
  3. "unicode/utf8"
  4. "github.com/sergi/go-diff/diffmatchpatch"
  5. "gopkg.in/bblfsh/sdk.v1/uast"
  6. "gopkg.in/src-d/go-git.v4"
  7. )
  8. // FileDiffRefiner uses UASTs to improve the human interpretability of diffs.
  9. // It is a PipelineItem.
  10. // The idea behind this algorithm is simple: in case of multiple choices which are equally
  11. // optimal, choose the one which touches less AST nodes.
  12. type FileDiffRefiner struct {
  13. }
  14. func (ref *FileDiffRefiner) Name() string {
  15. return "FileDiffRefiner"
  16. }
  17. func (ref *FileDiffRefiner) Provides() []string {
  18. arr := [...]string{DependencyFileDiff}
  19. return arr[:]
  20. }
  21. func (ref *FileDiffRefiner) Requires() []string {
  22. arr := [...]string{DependencyFileDiff, DependencyUastChanges}
  23. return arr[:]
  24. }
  25. func (ref *FileDiffRefiner) Features() []string {
  26. arr := [...]string{FeatureUast}
  27. return arr[:]
  28. }
  29. func (ref *FileDiffRefiner) ListConfigurationOptions() []ConfigurationOption {
  30. return []ConfigurationOption{}
  31. }
  32. func (ref *FileDiffRefiner) Configure(facts map[string]interface{}) {}
  33. func (ref *FileDiffRefiner) Initialize(repository *git.Repository) {
  34. }
  35. func (ref *FileDiffRefiner) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  36. changesList := deps[DependencyUastChanges].([]UASTChange)
  37. changes := map[string]UASTChange{}
  38. for _, change := range changesList {
  39. if change.Before != nil && change.After != nil {
  40. changes[change.Change.To.Name] = change
  41. }
  42. }
  43. diffs := deps[DependencyFileDiff].(map[string]FileDiffData)
  44. result := map[string]FileDiffData{}
  45. for fileName, oldDiff := range diffs {
  46. uastChange, exists := changes[fileName]
  47. if !exists {
  48. result[fileName] = oldDiff
  49. continue
  50. }
  51. suspicious := map[int][2]int{}
  52. line := 0
  53. for i, diff := range oldDiff.Diffs {
  54. if i == len(oldDiff.Diffs)-1 {
  55. break
  56. }
  57. if diff.Type == diffmatchpatch.DiffInsert &&
  58. oldDiff.Diffs[i+1].Type == diffmatchpatch.DiffEqual {
  59. matched := 0
  60. runesAdded := []rune(diff.Text)
  61. runesEqual := []rune(oldDiff.Diffs[i+1].Text)
  62. for ; matched < len(runesAdded) && matched < len(runesEqual) &&
  63. runesAdded[matched] == runesEqual[matched]; matched++ {
  64. }
  65. if matched > 0 {
  66. suspicious[i] = [2]int{line, matched}
  67. }
  68. }
  69. if diff.Type != diffmatchpatch.DiffDelete {
  70. line += utf8.RuneCountInString(diff.Text)
  71. }
  72. }
  73. if len(suspicious) == 0 {
  74. result[fileName] = oldDiff
  75. continue
  76. }
  77. line2node := make([][]*uast.Node, oldDiff.NewLinesOfCode)
  78. VisitEachNode(uastChange.After, func(node *uast.Node) {
  79. if node.StartPosition != nil && node.EndPosition != nil {
  80. for l := node.StartPosition.Line; l <= node.EndPosition.Line; l++ {
  81. nodes := line2node[l-1] // line starts with 1
  82. if nodes == nil {
  83. nodes = []*uast.Node{}
  84. }
  85. line2node[l-1] = append(nodes, node)
  86. }
  87. }
  88. })
  89. newDiff := FileDiffData{
  90. OldLinesOfCode: oldDiff.OldLinesOfCode,
  91. NewLinesOfCode: oldDiff.NewLinesOfCode,
  92. Diffs: []diffmatchpatch.Diff{},
  93. }
  94. skipNext := false
  95. for i, diff := range oldDiff.Diffs {
  96. if skipNext {
  97. skipNext = false
  98. continue
  99. }
  100. info, exists := suspicious[i]
  101. if !exists {
  102. newDiff.Diffs = append(newDiff.Diffs, diff)
  103. continue
  104. }
  105. line := info[0]
  106. matched := info[1]
  107. size := utf8.RuneCountInString(diff.Text)
  108. n1 := countNodesInInterval(line2node, line, line+size)
  109. n2 := countNodesInInterval(line2node, line+matched, line+size+matched)
  110. if n1 <= n2 {
  111. newDiff.Diffs = append(newDiff.Diffs, diff)
  112. continue
  113. }
  114. skipNext = true
  115. runes := []rune(diff.Text)
  116. newDiff.Diffs = append(newDiff.Diffs, diffmatchpatch.Diff{
  117. Type: diffmatchpatch.DiffEqual, Text: string(runes[:matched]),
  118. })
  119. newDiff.Diffs = append(newDiff.Diffs, diffmatchpatch.Diff{
  120. Type: diffmatchpatch.DiffInsert, Text: string(runes[matched:]) + string(runes[:matched]),
  121. })
  122. runes = []rune(oldDiff.Diffs[i+1].Text)
  123. if len(runes) > matched {
  124. newDiff.Diffs = append(newDiff.Diffs, diffmatchpatch.Diff{
  125. Type: diffmatchpatch.DiffEqual, Text: string(runes[matched:]),
  126. })
  127. }
  128. }
  129. result[fileName] = newDiff
  130. }
  131. return map[string]interface{}{DependencyFileDiff: result}, nil
  132. }
  133. // VisitEachNode is a handy routine to execute a callback on every node in the subtree,
  134. // including the root itself. Depth first tree traversal.
  135. func VisitEachNode(root *uast.Node, payload func(*uast.Node)) {
  136. queue := []*uast.Node{}
  137. queue = append(queue, root)
  138. for len(queue) > 0 {
  139. node := queue[len(queue)-1]
  140. queue = queue[:len(queue)-1]
  141. payload(node)
  142. for _, child := range node.Children {
  143. queue = append(queue, child)
  144. }
  145. }
  146. }
  147. func countNodesInInterval(occupiedMap [][]*uast.Node, start, end int) int {
  148. nodes := map[*uast.Node]bool{}
  149. for i := start; i < end; i++ {
  150. for _, node := range occupiedMap[i] {
  151. nodes[node] = true
  152. }
  153. }
  154. return len(nodes)
  155. }
  156. func init() {
  157. Registry.Register(&FileDiffRefiner{})
  158. }