瀏覽代碼

Merge pull request #242 from vmarkovtsev/master

Refactor levenshtein and xpather
Vadim Markovtsev 6 年之前
父節點
當前提交
b6d8e21483

+ 5 - 5
internal/plumbing/levenshtein.go

@@ -2,15 +2,15 @@
 // Use of this source code is governed by a BSD-style
 // license.
 
-package plumbing
+package levenshtein
 
-// LevenshteinContext is the object which allows to calculate the Levenshtein distance
+// Context is the object which allows to calculate the Levenshtein distance
 // with Distance() method. It is needed to ensure 0 memory allocations.
-type LevenshteinContext struct {
+type Context struct {
 	intSlice []int
 }
 
-func (c *LevenshteinContext) getIntSlice(l int) []int {
+func (c *Context) getIntSlice(l int) []int {
 	if cap(c.intSlice) < l {
 		c.intSlice = make([]int, l)
 	}
@@ -26,7 +26,7 @@ func (c *LevenshteinContext) getIntSlice(l int) []int {
 // This implementation is optimized to use O(min(m,n)) space.
 // It is based on the optimized C version found here:
 // http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#C
-func (c *LevenshteinContext) Distance(str1, str2 string) int {
+func (c *Context) Distance(str1, str2 string) int {
 	s1 := []rune(str1)
 	s2 := []rune(str2)
 

+ 4 - 4
internal/plumbing/levenshtein_test.go

@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license.
 
-package plumbing
+package levenshtein
 
 import (
 	"fmt"
@@ -29,7 +29,7 @@ var distanceTests = []struct {
 
 func TestDistance(t *testing.T) {
 
-	lev := &LevenshteinContext{}
+	lev := &Context{}
 
 	for index, distanceTest := range distanceTests {
 		result := lev.Distance(distanceTest.first, distanceTest.second)
@@ -49,7 +49,7 @@ func BenchmarkDistance(b *testing.B) {
 	b.ReportAllocs()
 	b.ResetTimer()
 
-	c := &LevenshteinContext{}
+	c := &Context{}
 
 	for i := 0; i < b.N; i++ {
 		total += c.Distance(s1, s2)
@@ -68,7 +68,7 @@ func BenchmarkDistanceOriginal(b *testing.B) {
 	b.ReportAllocs()
 	b.ResetTimer()
 
-	ctx := LevenshteinContext{}
+	ctx := Context{}
 	for i := 0; i < b.N; i++ {
 		total += ctx.Distance(s1, s2)
 	}

+ 2 - 1
internal/plumbing/renames.go

@@ -16,6 +16,7 @@ import (
 	"gopkg.in/src-d/go-git.v4/utils/merkletrie"
 	"gopkg.in/src-d/hercules.v9/internal"
 	"gopkg.in/src-d/hercules.v9/internal/core"
+	"gopkg.in/src-d/hercules.v9/internal/levenshtein"
 )
 
 // RenameAnalysis improves TreeDiff's results by searching for changed blobs under different
@@ -562,7 +563,7 @@ type candidateDistance struct {
 
 func sortRenameCandidates(candidates []int, origin string, nameGetter func(int) string) {
 	distances := make([]candidateDistance, len(candidates))
-	ctx := LevenshteinContext{}
+	ctx := levenshtein.Context{}
 	for i, x := range candidates {
 		name := filepath.Base(nameGetter(x))
 		distances[i] = candidateDistance{x, ctx.Distance(origin, name)}

+ 13 - 10
internal/plumbing/uast/changes_xpather.go

@@ -22,9 +22,9 @@ var hashKey = []byte{
 	16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 }
 
-// Extract returns the list of new or changed UAST nodes filtered by XPath.
-func (xpather ChangesXPather) Extract(changes []Change) []nodes.Node {
-	var result []nodes.Node
+// Extract returns the list of (inserted, removed) UAST nodes filtered by XPath.
+func (xpather ChangesXPather) Extract(changes []Change) ([]nodes.Node, []nodes.Node) {
+	var resultAdded, resultRemoved []nodes.Node
 	for _, change := range changes {
 		if change.After == nil {
 			continue
@@ -33,16 +33,19 @@ func (xpather ChangesXPather) Extract(changes []Change) []nodes.Node {
 		newNodes := xpather.filter(change.After, change.Change.To.TreeEntry.Hash)
 		oldHashes := xpather.hash(oldNodes)
 		newHashes := xpather.hash(newNodes)
-		// remove any untouched nodes
-		for hash := range oldHashes {
-			delete(newHashes, hash)
-		}
 		// there can be hash collisions; we ignore them
-		for _, node := range newHashes {
-			result = append(result, node)
+		for hash, node := range newHashes {
+			if _, exists := oldHashes[hash]; !exists {
+				resultAdded = append(resultAdded, node)
+			}
+		}
+		for hash, node := range oldHashes {
+			if _, exists := newHashes[hash]; !exists {
+				resultRemoved = append(resultRemoved, node)
+			}
 		}
 	}
-	return result
+	return resultAdded, resultRemoved
 }
 
 func (xpather ChangesXPather) filter(root nodes.Node, origin plumbing.Hash) []nodes.Node {

+ 16 - 3
internal/plumbing/uast/changes_xpather_test.go

@@ -4,10 +4,12 @@ package uast
 
 import (
 	"log"
+	"sort"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
 	"gopkg.in/bblfsh/client-go.v3"
+	"gopkg.in/bblfsh/sdk.v2/uast/nodes"
 	uast_test "gopkg.in/src-d/hercules.v9/internal/plumbing/uast/test"
 	"gopkg.in/src-d/hercules.v9/internal/test"
 )
@@ -27,7 +29,18 @@ func TestChangesXPatherExtractChanged(t *testing.T) {
 		{Before: nil, After: root2, Change: gitChange},
 		{Before: root1, After: nil, Change: gitChange},
 	}
-	xpather := ChangesXPather{XPath: "//*[@role='Comment']"}
-	nodes := xpather.Extract(uastChanges)
-	assert.True(t, len(nodes) > 0)
+	xpather := ChangesXPather{XPath: "//uast:Comment"}
+	nodesAdded, nodesRemoved := xpather.Extract(uastChanges)
+	sort.Slice(nodesRemoved, func(i, j int) bool {
+		return nodesRemoved[i].(nodes.Object)["Text"].(nodes.String) <
+			nodesRemoved[j].(nodes.Object)["Text"].(nodes.String)
+	})
+	for _, n := range nodesAdded {
+		assert.True(t, len(n.(nodes.Object)["Text"].(nodes.String)) > 0)
+	}
+	for _, n := range nodesRemoved[1:] {
+		assert.True(t, len(n.(nodes.Object)["Text"].(nodes.String)) > 0)
+	}
+	assert.True(t, len(nodesAdded) > 0)
+	assert.True(t, len(nodesRemoved) > 0)
 }

+ 1 - 1
leaves/comment_sentiment.go

@@ -161,7 +161,7 @@ func (sent *CommentSentimentAnalysis) Consume(deps map[string]interface{}) (map[
 	}
 	changes := deps[uast_items.DependencyUastChanges].([]uast_items.Change)
 	day := deps[items.DependencyDay].(int)
-	commentNodes := sent.xpather.Extract(changes)
+	commentNodes, _ := sent.xpather.Extract(changes)
 	comments := sent.mergeComments(commentNodes)
 	dayComments := sent.commentsByDay[day]
 	if dayComments == nil {