|
@@ -3,8 +3,7 @@ package leaves
|
|
|
import (
|
|
|
"fmt"
|
|
|
"io"
|
|
|
- "log"
|
|
|
- "sort"
|
|
|
+ "sort"
|
|
|
|
|
|
"github.com/gogo/protobuf/proto"
|
|
|
"gopkg.in/src-d/go-git.v4"
|
|
@@ -21,6 +20,7 @@ import (
|
|
|
// The results are matrices, where cell at row X and column Y is the number of commits which
|
|
|
// changed X and Y together. In case with people, the numbers are summed for every common file.
|
|
|
type CouplesAnalysis struct {
|
|
|
+ core.NoopMerger
|
|
|
core.OneShotMergeProcessor
|
|
|
// PeopleNumber is the number of developers for which to build the matrix. 0 disables this analysis.
|
|
|
PeopleNumber int
|
|
@@ -31,6 +31,10 @@ type CouplesAnalysis struct {
|
|
|
peopleCommits []int
|
|
|
// files store every file occurred in the same commit with every other file.
|
|
|
files map[string]map[string]int
|
|
|
+ // renames point from new file name to old file name.
|
|
|
+ renames *[]rename
|
|
|
+ // lastCommit is the last commit which was consumed.
|
|
|
+ lastCommit *object.Commit
|
|
|
// reversedPeopleDict references IdentityDetector.ReversedPeopleDict
|
|
|
reversedPeopleDict []string
|
|
|
}
|
|
@@ -47,6 +51,11 @@ type CouplesResult struct {
|
|
|
reversedPeopleDict []string
|
|
|
}
|
|
|
|
|
|
+type rename struct {
|
|
|
+ FromName string
|
|
|
+ ToName string
|
|
|
+}
|
|
|
+
|
|
|
// Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
|
|
|
func (couples *CouplesAnalysis) Name() string {
|
|
|
return "Couples"
|
|
@@ -94,6 +103,7 @@ func (couples *CouplesAnalysis) Initialize(repository *git.Repository) {
|
|
|
}
|
|
|
couples.peopleCommits = make([]int, couples.PeopleNumber+1)
|
|
|
couples.files = map[string]map[string]int{}
|
|
|
+ couples.renames = &[]rename{}
|
|
|
couples.OneShotMergeProcessor.Initialize()
|
|
|
}
|
|
|
|
|
@@ -105,6 +115,7 @@ func (couples *CouplesAnalysis) Initialize(repository *git.Repository) {
|
|
|
func (couples *CouplesAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
|
|
|
firstMerge := couples.ShouldConsumeCommit(deps)
|
|
|
mergeMode := core.IsMergeCommit(deps)
|
|
|
+ couples.lastCommit = deps[core.DependencyCommit].(*object.Commit)
|
|
|
author := deps[identity.DependencyAuthor].(int)
|
|
|
if author == identity.AuthorMissing {
|
|
|
author = couples.PeopleNumber
|
|
@@ -113,14 +124,7 @@ func (couples *CouplesAnalysis) Consume(deps map[string]interface{}) (map[string
|
|
|
couples.peopleCommits[author]++
|
|
|
}
|
|
|
treeDiff := deps[items.DependencyTreeChanges].(object.Changes)
|
|
|
- context := make([]string, 0)
|
|
|
- deleteFile := func(name string) {
|
|
|
- // we do not remove the file from people - the context does not expire
|
|
|
- delete(couples.files, name)
|
|
|
- for _, otherFiles := range couples.files {
|
|
|
- delete(otherFiles, name)
|
|
|
- }
|
|
|
- }
|
|
|
+ context := make([]string, 0, len(treeDiff))
|
|
|
for _, change := range treeDiff {
|
|
|
action, err := change.Action()
|
|
|
if err != nil {
|
|
@@ -133,37 +137,20 @@ func (couples *CouplesAnalysis) Consume(deps map[string]interface{}) (map[string
|
|
|
if !mergeMode {
|
|
|
context = append(context, toName)
|
|
|
couples.people[author][toName]++
|
|
|
- } else {
|
|
|
- couples.files[toName] = map[string]int{}
|
|
|
+ } else if couples.people[author][toName] == 0 {
|
|
|
+ couples.people[author][toName] = 1
|
|
|
}
|
|
|
case merkletrie.Delete:
|
|
|
- deleteFile(fromName)
|
|
|
if !mergeMode {
|
|
|
couples.people[author][fromName]++
|
|
|
+ } else if couples.people[author][fromName] == 0 {
|
|
|
+ couples.people[author][fromName] = 1
|
|
|
}
|
|
|
case merkletrie.Modify:
|
|
|
if fromName != toName {
|
|
|
// renamed
|
|
|
- fromFiles := couples.files[fromName]
|
|
|
- if fromFiles == nil {
|
|
|
- log.Panicf("CouplesAnalysis corruption: %s %s",
|
|
|
- fromName, deps[core.DependencyCommit].(*object.Commit).Hash.String())
|
|
|
- }
|
|
|
- couples.files[toName] = fromFiles
|
|
|
- for _, otherFiles := range couples.files {
|
|
|
- val, exists := otherFiles[fromName]
|
|
|
- if exists {
|
|
|
- otherFiles[toName] = val
|
|
|
- }
|
|
|
- }
|
|
|
- deleteFile(fromName)
|
|
|
- for _, authorFiles := range couples.people {
|
|
|
- val, exists := authorFiles[fromName]
|
|
|
- if exists {
|
|
|
- authorFiles[toName] = val
|
|
|
- delete(authorFiles, fromName)
|
|
|
- }
|
|
|
- }
|
|
|
+ *couples.renames = append(
|
|
|
+ *couples.renames, rename{ToName: toName, FromName: fromName})
|
|
|
}
|
|
|
if !mergeMode {
|
|
|
context = append(context, toName)
|
|
@@ -186,9 +173,10 @@ func (couples *CouplesAnalysis) Consume(deps map[string]interface{}) (map[string
|
|
|
|
|
|
// Finalize returns the result of the analysis. Further Consume() calls are not expected.
|
|
|
func (couples *CouplesAnalysis) Finalize() interface{} {
|
|
|
- filesSequence := make([]string, len(couples.files))
|
|
|
+ files, people := couples.propagateRenames(couples.currentFiles())
|
|
|
+ filesSequence := make([]string, len(files))
|
|
|
i := 0
|
|
|
- for file := range couples.files {
|
|
|
+ for file := range files {
|
|
|
filesSequence[i] = file
|
|
|
i++
|
|
|
}
|
|
@@ -202,12 +190,11 @@ func (couples *CouplesAnalysis) Finalize() interface{} {
|
|
|
peopleFiles := make([][]int, couples.PeopleNumber+1)
|
|
|
for i := range peopleMatrix {
|
|
|
peopleMatrix[i] = map[int]int64{}
|
|
|
- for file, commits := range couples.people[i] {
|
|
|
- fi, exists := filesIndex[file]
|
|
|
- if exists {
|
|
|
+ for file, commits := range people[i] {
|
|
|
+ if fi, exists := filesIndex[file]; exists {
|
|
|
peopleFiles[i] = append(peopleFiles[i], fi)
|
|
|
}
|
|
|
- for j, otherFiles := range couples.people {
|
|
|
+ for j, otherFiles := range people {
|
|
|
otherCommits := otherFiles[file]
|
|
|
delta := otherCommits
|
|
|
if otherCommits > commits {
|
|
@@ -224,7 +211,7 @@ func (couples *CouplesAnalysis) Finalize() interface{} {
|
|
|
filesMatrix := make([]map[int]int64, len(filesIndex))
|
|
|
for i := range filesMatrix {
|
|
|
filesMatrix[i] = map[int]int64{}
|
|
|
- for otherFile, cooccs := range couples.files[filesSequence[i]] {
|
|
|
+ for otherFile, cooccs := range files[filesSequence[i]] {
|
|
|
filesMatrix[i][filesIndex[otherFile]] = int64(cooccs)
|
|
|
}
|
|
|
}
|
|
@@ -239,33 +226,7 @@ func (couples *CouplesAnalysis) Finalize() interface{} {
|
|
|
|
|
|
// Fork clones this pipeline item.
|
|
|
func (couples *CouplesAnalysis) Fork(n int) []core.PipelineItem {
|
|
|
- clones := core.ForkCopyPipelineItem(couples, n)
|
|
|
- for i := range clones {
|
|
|
- files := map[string]map[string]int{}
|
|
|
- clones[i].(*CouplesAnalysis).files = files
|
|
|
- for key1, val1 := range couples.files {
|
|
|
- subfiles := map[string]int{}
|
|
|
- files[key1] = subfiles
|
|
|
- for key2 := range val1 {
|
|
|
- subfiles[key2] = 0
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return clones
|
|
|
-}
|
|
|
-
|
|
|
-// Merge combines several couples together after a merge commit.
|
|
|
-func (couples *CouplesAnalysis) Merge(clones []core.PipelineItem) {
|
|
|
- for i := range clones {
|
|
|
- files := clones[i].(*CouplesAnalysis).files
|
|
|
- for key1, vals := range files {
|
|
|
- subfiles := couples.files[key1]
|
|
|
- for key2, delta := range vals {
|
|
|
- subfiles[key2] += delta
|
|
|
- vals[key2] = 0
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
+ return core.ForkCopyPipelineItem(couples, n)
|
|
|
}
|
|
|
|
|
|
// Serialize converts the analysis result as returned by Finalize() to text or bytes.
|
|
@@ -402,7 +363,7 @@ func (couples *CouplesAnalysis) serializeText(result *CouplesResult, writer io.W
|
|
|
fmt.Fprintln(writer, " matrix:")
|
|
|
for _, files := range result.FilesMatrix {
|
|
|
fmt.Fprint(writer, " - {")
|
|
|
- indices := []int{}
|
|
|
+ var indices []int
|
|
|
for file := range files {
|
|
|
indices = append(indices, file)
|
|
|
}
|
|
@@ -425,7 +386,7 @@ func (couples *CouplesAnalysis) serializeText(result *CouplesResult, writer io.W
|
|
|
fmt.Fprintln(writer, " matrix:")
|
|
|
for _, people := range result.PeopleMatrix {
|
|
|
fmt.Fprint(writer, " - {")
|
|
|
- indices := []int{}
|
|
|
+ var indices []int
|
|
|
for file := range people {
|
|
|
indices = append(indices, file)
|
|
|
}
|
|
@@ -514,6 +475,102 @@ func (couples *CouplesAnalysis) serializeBinary(result *CouplesResult, writer io
|
|
|
return nil
|
|
|
}
|
|
|
|
|
|
+// currentFiles return the list of files in the last consumed commit.
|
|
|
+func (couples *CouplesAnalysis) currentFiles() map[string]bool {
|
|
|
+ tree, _ := couples.lastCommit.Tree()
|
|
|
+ fileIter := tree.Files()
|
|
|
+ files := map[string]bool{}
|
|
|
+ fileIter.ForEach(func(fobj *object.File) error {
|
|
|
+ files[fobj.Name] = true
|
|
|
+ return nil
|
|
|
+ })
|
|
|
+ return files
|
|
|
+}
|
|
|
+
|
|
|
+// propagateRenames applies `renames` over the files from `lastCommit`.
|
|
|
+func (couples *CouplesAnalysis) propagateRenames(files map[string]bool) (
|
|
|
+ map[string]map[string]int, []map[string]int) {
|
|
|
+
|
|
|
+ renames := *couples.renames
|
|
|
+ reducedFiles := map[string]map[string]int{}
|
|
|
+ for file := range files {
|
|
|
+ fmap := map[string]int{}
|
|
|
+ reducedFiles[file] = fmap
|
|
|
+ refmap := couples.files[file]
|
|
|
+ for other := range files {
|
|
|
+ refval := refmap[other]
|
|
|
+ if refval > 0 {
|
|
|
+ fmap[other] = refval
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // propagate renames
|
|
|
+ aliases := map[string]map[string]bool{}
|
|
|
+ {
|
|
|
+ pointers := map[string]string{}
|
|
|
+ for i := range renames {
|
|
|
+ rename := renames[len(renames)-i-1]
|
|
|
+ toName := rename.ToName
|
|
|
+ if newTo, exists := pointers[toName]; exists {
|
|
|
+ toName = newTo
|
|
|
+ }
|
|
|
+ if _, exists := reducedFiles[toName]; exists {
|
|
|
+ if rename.FromName != toName {
|
|
|
+ var set map[string]bool
|
|
|
+ if set, exists = aliases[toName]; !exists {
|
|
|
+ set = map[string]bool{}
|
|
|
+ aliases[toName] = set
|
|
|
+ }
|
|
|
+ set[rename.FromName] = true
|
|
|
+ pointers[rename.FromName] = toName
|
|
|
+ }
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ adjustments := map[string]map[string]int{}
|
|
|
+ for final, set := range aliases {
|
|
|
+ adjustment := map[string]int{}
|
|
|
+ for alias := range set {
|
|
|
+ for k, v := range couples.files[alias] {
|
|
|
+ adjustment[k] += v
|
|
|
+ }
|
|
|
+ }
|
|
|
+ adjustments[final] = adjustment
|
|
|
+ }
|
|
|
+ for _, adjustment := range adjustments {
|
|
|
+ for final, set := range aliases {
|
|
|
+ for alias := range set {
|
|
|
+ adjustment[final] += adjustment[alias]
|
|
|
+ delete(adjustment, alias)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ for final, adjustment := range adjustments {
|
|
|
+ for key, val := range adjustment {
|
|
|
+ if coocc, exists := reducedFiles[final][key]; exists {
|
|
|
+ reducedFiles[final][key] = coocc + val
|
|
|
+ reducedFiles[key][final] = coocc + val
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ people := make([]map[string]int, len(couples.people))
|
|
|
+ for i, counts := range couples.people {
|
|
|
+ reducedCounts := map[string]int{}
|
|
|
+ people[i] = reducedCounts
|
|
|
+ for file := range files {
|
|
|
+ count := counts[file]
|
|
|
+ for alias := range aliases[file] {
|
|
|
+ count += counts[alias]
|
|
|
+ }
|
|
|
+ if count > 0 {
|
|
|
+ reducedCounts[file] = count
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return reducedFiles, people
|
|
|
+}
|
|
|
+
|
|
|
func init() {
|
|
|
core.Registry.Register(&CouplesAnalysis{})
|
|
|
}
|