소스 검색

Fix Couples to work properly with branches

Vadim Markovtsev 6 년 전
부모
커밋
f6d933da12
2개의 변경된 파일176개의 추가작업 그리고 70개의 파일을 삭제
  1. 126 69
      leaves/couples.go
  2. 50 1
      leaves/couples_test.go

+ 126 - 69
leaves/couples.go

@@ -3,8 +3,7 @@ package leaves
 import (
 	"fmt"
 	"io"
-	"log"
-	"sort"
+		"sort"
 
 	"github.com/gogo/protobuf/proto"
 	"gopkg.in/src-d/go-git.v4"
@@ -21,6 +20,7 @@ import (
 // The results are matrices, where cell at row X and column Y is the number of commits which
 // changed X and Y together. In case with people, the numbers are summed for every common file.
 type CouplesAnalysis struct {
+	core.NoopMerger
 	core.OneShotMergeProcessor
 	// PeopleNumber is the number of developers for which to build the matrix. 0 disables this analysis.
 	PeopleNumber int
@@ -31,6 +31,10 @@ type CouplesAnalysis struct {
 	peopleCommits []int
 	// files store every file occurred in the same commit with every other file.
 	files map[string]map[string]int
+	// renames point from new file name to old file name.
+	renames *[]rename
+	// lastCommit is the last commit which was consumed.
+	lastCommit *object.Commit
 	// reversedPeopleDict references IdentityDetector.ReversedPeopleDict
 	reversedPeopleDict []string
 }
@@ -47,6 +51,11 @@ type CouplesResult struct {
 	reversedPeopleDict []string
 }
 
+type rename struct {
+	FromName string
+	ToName string
+}
+
 // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
 func (couples *CouplesAnalysis) Name() string {
 	return "Couples"
@@ -94,6 +103,7 @@ func (couples *CouplesAnalysis) Initialize(repository *git.Repository) {
 	}
 	couples.peopleCommits = make([]int, couples.PeopleNumber+1)
 	couples.files = map[string]map[string]int{}
+	couples.renames = &[]rename{}
 	couples.OneShotMergeProcessor.Initialize()
 }
 
@@ -105,6 +115,7 @@ func (couples *CouplesAnalysis) Initialize(repository *git.Repository) {
 func (couples *CouplesAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
 	firstMerge := couples.ShouldConsumeCommit(deps)
 	mergeMode := core.IsMergeCommit(deps)
+	couples.lastCommit = deps[core.DependencyCommit].(*object.Commit)
 	author := deps[identity.DependencyAuthor].(int)
 	if author == identity.AuthorMissing {
 		author = couples.PeopleNumber
@@ -113,14 +124,7 @@ func (couples *CouplesAnalysis) Consume(deps map[string]interface{}) (map[string
 		couples.peopleCommits[author]++
 	}
 	treeDiff := deps[items.DependencyTreeChanges].(object.Changes)
-	context := make([]string, 0)
-	deleteFile := func(name string) {
-		// we do not remove the file from people - the context does not expire
-		delete(couples.files, name)
-		for _, otherFiles := range couples.files {
-			delete(otherFiles, name)
-		}
-	}
+	context := make([]string, 0, len(treeDiff))
 	for _, change := range treeDiff {
 		action, err := change.Action()
 		if err != nil {
@@ -133,37 +137,20 @@ func (couples *CouplesAnalysis) Consume(deps map[string]interface{}) (map[string
 			if !mergeMode {
 				context = append(context, toName)
 				couples.people[author][toName]++
-			} else {
-				couples.files[toName] = map[string]int{}
+			} else if couples.people[author][toName] == 0 {
+				couples.people[author][toName] = 1
 			}
 		case merkletrie.Delete:
-			deleteFile(fromName)
 			if !mergeMode {
 				couples.people[author][fromName]++
+			} else if couples.people[author][fromName] == 0 {
+				couples.people[author][fromName] = 1
 			}
 		case merkletrie.Modify:
 			if fromName != toName {
 				// renamed
-				fromFiles := couples.files[fromName]
-				if fromFiles == nil {
-					log.Panicf("CouplesAnalysis corruption: %s %s",
-						fromName, deps[core.DependencyCommit].(*object.Commit).Hash.String())
-				}
-				couples.files[toName] = fromFiles
-				for _, otherFiles := range couples.files {
-					val, exists := otherFiles[fromName]
-					if exists {
-						otherFiles[toName] = val
-					}
-				}
-				deleteFile(fromName)
-				for _, authorFiles := range couples.people {
-					val, exists := authorFiles[fromName]
-					if exists {
-						authorFiles[toName] = val
-						delete(authorFiles, fromName)
-					}
-				}
+				*couples.renames = append(
+					*couples.renames, rename{ToName: toName, FromName: fromName})
 			}
 			if !mergeMode {
 				context = append(context, toName)
@@ -186,9 +173,10 @@ func (couples *CouplesAnalysis) Consume(deps map[string]interface{}) (map[string
 
 // Finalize returns the result of the analysis. Further Consume() calls are not expected.
 func (couples *CouplesAnalysis) Finalize() interface{} {
-	filesSequence := make([]string, len(couples.files))
+	files, people := couples.propagateRenames(couples.currentFiles())
+	filesSequence := make([]string, len(files))
 	i := 0
-	for file := range couples.files {
+	for file := range files {
 		filesSequence[i] = file
 		i++
 	}
@@ -202,12 +190,11 @@ func (couples *CouplesAnalysis) Finalize() interface{} {
 	peopleFiles := make([][]int, couples.PeopleNumber+1)
 	for i := range peopleMatrix {
 		peopleMatrix[i] = map[int]int64{}
-		for file, commits := range couples.people[i] {
-			fi, exists := filesIndex[file]
-			if exists {
+		for file, commits := range people[i] {
+			if fi, exists := filesIndex[file]; exists {
 				peopleFiles[i] = append(peopleFiles[i], fi)
 			}
-			for j, otherFiles := range couples.people {
+			for j, otherFiles := range people {
 				otherCommits := otherFiles[file]
 				delta := otherCommits
 				if otherCommits > commits {
@@ -224,7 +211,7 @@ func (couples *CouplesAnalysis) Finalize() interface{} {
 	filesMatrix := make([]map[int]int64, len(filesIndex))
 	for i := range filesMatrix {
 		filesMatrix[i] = map[int]int64{}
-		for otherFile, cooccs := range couples.files[filesSequence[i]] {
+		for otherFile, cooccs := range files[filesSequence[i]] {
 			filesMatrix[i][filesIndex[otherFile]] = int64(cooccs)
 		}
 	}
@@ -239,33 +226,7 @@ func (couples *CouplesAnalysis) Finalize() interface{} {
 
 // Fork clones this pipeline item.
 func (couples *CouplesAnalysis) Fork(n int) []core.PipelineItem {
-	clones := core.ForkCopyPipelineItem(couples, n)
-	for i := range clones {
-		files := map[string]map[string]int{}
-		clones[i].(*CouplesAnalysis).files = files
-		for key1, val1 := range couples.files {
-			subfiles := map[string]int{}
-			files[key1] = subfiles
-			for key2 := range val1 {
-				subfiles[key2] = 0
-			}
-		}
-	}
-	return clones
-}
-
-// Merge combines several couples together after a merge commit.
-func (couples *CouplesAnalysis) Merge(clones []core.PipelineItem) {
-	for i := range clones {
-		files := clones[i].(*CouplesAnalysis).files
-		for key1, vals := range files {
-			subfiles := couples.files[key1]
-			for key2, delta := range vals {
-				subfiles[key2] += delta
-				vals[key2] = 0
-			}
-		}
-	}
+	return core.ForkCopyPipelineItem(couples, n)
 }
 
 // Serialize converts the analysis result as returned by Finalize() to text or bytes.
@@ -402,7 +363,7 @@ func (couples *CouplesAnalysis) serializeText(result *CouplesResult, writer io.W
 	fmt.Fprintln(writer, "    matrix:")
 	for _, files := range result.FilesMatrix {
 		fmt.Fprint(writer, "      - {")
-		indices := []int{}
+		var indices []int
 		for file := range files {
 			indices = append(indices, file)
 		}
@@ -425,7 +386,7 @@ func (couples *CouplesAnalysis) serializeText(result *CouplesResult, writer io.W
 	fmt.Fprintln(writer, "    matrix:")
 	for _, people := range result.PeopleMatrix {
 		fmt.Fprint(writer, "      - {")
-		indices := []int{}
+		var indices []int
 		for file := range people {
 			indices = append(indices, file)
 		}
@@ -514,6 +475,102 @@ func (couples *CouplesAnalysis) serializeBinary(result *CouplesResult, writer io
 	return nil
 }
 
+// currentFiles return the list of files in the last consumed commit.
+func (couples *CouplesAnalysis) currentFiles() map[string]bool {
+	tree, _ := couples.lastCommit.Tree()
+	fileIter := tree.Files()
+	files := map[string]bool{}
+	fileIter.ForEach(func(fobj *object.File) error {
+		files[fobj.Name] = true
+		return nil
+	})
+	return files
+}
+
+// propagateRenames applies `renames` over the files from `lastCommit`.
+func (couples *CouplesAnalysis) propagateRenames(files map[string]bool) (
+	map[string]map[string]int, []map[string]int) {
+
+	renames := *couples.renames
+	reducedFiles := map[string]map[string]int{}
+	for file := range files {
+		fmap := map[string]int{}
+		reducedFiles[file] = fmap
+		refmap := couples.files[file]
+		for other := range files {
+			refval := refmap[other]
+			if refval > 0 {
+				fmap[other] = refval
+			}
+		}
+	}
+	// propagate renames
+	aliases := map[string]map[string]bool{}
+	{
+		pointers := map[string]string{}
+		for i := range renames {
+			rename := renames[len(renames)-i-1]
+			toName := rename.ToName
+			if newTo, exists := pointers[toName]; exists {
+				toName = newTo
+			}
+			if _, exists := reducedFiles[toName]; exists {
+				if rename.FromName != toName {
+					var set map[string]bool
+					if set, exists = aliases[toName]; !exists {
+						set = map[string]bool{}
+						aliases[toName] = set
+					}
+					set[rename.FromName] = true
+					pointers[rename.FromName] = toName
+				}
+				continue
+			}
+		}
+	}
+	adjustments := map[string]map[string]int{}
+	for final, set := range aliases {
+		adjustment := map[string]int{}
+		for alias := range set {
+			for k, v := range couples.files[alias] {
+				adjustment[k] += v
+			}
+		}
+		adjustments[final] = adjustment
+	}
+	for _, adjustment := range adjustments {
+		for final, set := range aliases {
+			for alias := range set {
+				adjustment[final] += adjustment[alias]
+				delete(adjustment, alias)
+			}
+		}
+	}
+	for final, adjustment := range adjustments {
+		for key, val := range adjustment {
+			if coocc, exists := reducedFiles[final][key]; exists {
+				reducedFiles[final][key] = coocc + val
+				reducedFiles[key][final] = coocc + val
+			}
+		}
+	}
+	people := make([]map[string]int, len(couples.people))
+	for i, counts := range couples.people {
+		reducedCounts := map[string]int{}
+		people[i] = reducedCounts
+		for file := range files {
+			count := counts[file]
+			for alias := range aliases[file] {
+				count += counts[alias]
+			}
+			if count > 0 {
+				reducedCounts[file] = count
+			}
+		}
+	}
+	return reducedFiles, people
+}
+
 func init() {
 	core.Registry.Register(&CouplesAnalysis{})
 }

+ 50 - 1
leaves/couples_test.go

@@ -16,7 +16,7 @@ import (
 	"gopkg.in/src-d/hercules.v4/internal/plumbing"
 	"gopkg.in/src-d/hercules.v4/internal/plumbing/identity"
 	"gopkg.in/src-d/hercules.v4/internal/test"
-)
+		)
 
 func fixtureCouples() *CouplesAnalysis {
 	c := CouplesAnalysis{PeopleNumber: 3}
@@ -353,6 +353,55 @@ func TestCouplesMerge(t *testing.T) {
 	assert.Equal(t, merged.FilesMatrix[2], getCouplesMap(1, 200))
 }
 
+func TestCouplesCurrentFiles(t *testing.T) {
+	c := fixtureCouples()
+	c.lastCommit, _ = test.Repository.CommitObject(gitplumbing.NewHash(
+		"cce947b98a050c6d356bc6ba95030254914027b1"))
+	files := c.currentFiles()
+	assert.Equal(t, files, map[string]bool{".gitignore": true, "LICENSE": true})
+}
+
+func TestCouplesPropagateRenames(t *testing.T) {
+	c := fixtureCouples()
+	c.files["one"] = map[string]int{
+		"one": 1,
+		"two": 2,
+		"three": 3,
+	}
+	c.files["two"] = map[string]int{
+		"one": 2,
+		"two": 10,
+		"three": 1,
+		"four": 7,
+	}
+	c.files["three"] = map[string]int{
+		"one": 3,
+		"two": 1,
+		"three": 3,
+		"four": 2,
+	}
+	c.files["four"] = map[string]int{
+		"two": 7,
+		"three": 3,
+		"four": 1,
+	}
+	c.PeopleNumber = 1
+	c.people = make([]map[string]int, 1)
+	c.people[0] = map[string]int{}
+	c.people[0]["one"] = 1
+	c.people[0]["two"] = 2
+	c.people[0]["three"] = 3
+	c.people[0]["four"] = 4
+	*c.renames = []rename{{ToName: "four", FromName: "one"}}
+	files, people := c.propagateRenames(map[string]bool{"two": true, "three": true, "four": true})
+	assert.Len(t, files, 3)
+	assert.Len(t, people, 1)
+	assert.Equal(t, files["two"], map[string]int{"two": 10, "three": 1, "four": 9})
+	assert.Equal(t, files["three"], map[string]int{"two": 1, "three": 3, "four": 6})
+	assert.Equal(t, files["four"], map[string]int{"two": 9, "three": 6, "four": 2})
+	assert.Equal(t, people[0], map[string]int{"two": 2, "three": 3, "four": 5})
+}
+
 func getSlice(vals ...int) []int {
 	return vals
 }