Ver código fonte

Merge branch 'master' of https://github.com/src-d/hercules

Robert Lin 6 anos atrás
pai
commit
3d6ec73047

+ 4 - 5
.travis.yml

@@ -9,6 +9,7 @@ go_import_path: gopkg.in/src-d/hercules.v10
 go:
   - 1.10.x
   - 1.11.x
+  - 1.12.x
 
 services:
   - docker
@@ -26,8 +27,6 @@ before_cache:
 
 matrix:
   fast_finish: true
-  allow_failures:
-    - go: tip
 
 stages:
   - test
@@ -60,7 +59,7 @@ install:
   - travis_retry make TAGS=tensorflow
 script:
   - set -e
-  - if [ $TRAVIS_GO_VERSION == "1.11.x" ]; then test -z "$(gofmt -s -l . | grep -v vendor/)"; fi
+  - if [ $TRAVIS_GO_VERSION != "1.10.x" ]; then test -z "$(gofmt -s -l . | grep -v vendor/)"; fi
   - go vet -tags tensorflow ./...
   - golint -set_exit_status $(go list ./... | grep -v /vendor/)
   - cd python && flake8 && cd ..
@@ -90,7 +89,7 @@ jobs:
     - stage: deploy
       os: osx
       osx_image: xcode9.3
-      go: 1.11.x
+      go: 1.12.x
       go_import_path: gopkg.in/src-d/hercules.v10
       before_install:
         - wget -O protoc.zip https://github.com/google/protobuf/releases/download/v$PROTOC_VERSION/protoc-$PROTOC_VERSION-osx-x86_64.zip
@@ -112,7 +111,7 @@ jobs:
           tags: true
     - stage: deploy
       os: linux
-      go: 1.11.x
+      go: 1.12.x
       go_import_path: gopkg.in/src-d/hercules.v10
       before_install:
         - wget -O protoc.zip https://github.com/google/protobuf/releases/download/v$PROTOC_VERSION/protoc-$PROTOC_VERSION-linux-x86_64.zip

+ 175 - 16
internal/plumbing/identity/identity.go

@@ -273,33 +273,192 @@ func (detector *Detector) GeneratePeopleDict(commits []*object.Commit) {
 	detector.ReversedPeopleDict = reverseDict
 }
 
-// MergeReversedDicts joins two identity lists together, excluding duplicates, in-order.
-// The returned mapping's values are: final index, index in first, index in second (-1 for a miss).
-func (detector Detector) MergeReversedDicts(rd1, rd2 []string) (map[string][3]int, []string) {
-	people := map[string][3]int{}
+// MergedIndex is the result of merging `rd1[First]` and `rd2[Second]`: the index in the final reversed
+// dictionary. -1 for `First` or `Second` means that the corresponding string does not exist
+// in respectively `rd1` and `rd2`.
+// See also:
+// * MergeReversedDictsLiteral()
+// * MergeReversedDictsIdentities()
+type MergedIndex struct {
+	Final  int
+	First  int
+	Second int
+}
+
+// MergeReversedDictsLiteral joins two string lists together, excluding duplicates, in-order.
+// The string comparisons are the usual ones.
+// The returned mapping's keys are the unique strings in `rd1 ∪ rd2`, and the values are:
+// 1. Index after merging.
+// 2. Corresponding index in the first array - `rd1`. -1 means that it does not exist.
+// 3. Corresponding index in the second array - `rd2`. -1 means that it does not exist.
+func MergeReversedDictsLiteral(rd1, rd2 []string) (map[string]MergedIndex, []string) {
+
+	people := map[string]MergedIndex{}
 	for i, pid := range rd1 {
-		ptrs := people[pid]
-		ptrs[0] = len(people)
-		ptrs[1] = i
-		ptrs[2] = -1
-		people[pid] = ptrs
+		people[pid] = MergedIndex{len(people), i, -1}
 	}
 	for i, pid := range rd2 {
-		ptrs, exists := people[pid]
-		if !exists {
-			ptrs[0] = len(people)
-			ptrs[1] = -1
+		if ptrs, exists := people[pid]; !exists {
+			people[pid] = MergedIndex{len(people), -1, i}
+		} else {
+			people[pid] = MergedIndex{ptrs.Final, ptrs.First, i}
 		}
-		ptrs[2] = i
-		people[pid] = ptrs
 	}
 	mrd := make([]string, len(people))
 	for name, ptrs := range people {
-		mrd[ptrs[0]] = name
+		mrd[ptrs.Final] = name
 	}
 	return people, mrd
 }
 
+type identityPair struct {
+	Index1 int
+	Index2 int
+}
+
+// MergeReversedDictsIdentities joins two identity lists together, excluding duplicates.
+// The strings are split by "|" and we find the connected components..
+// The returned mapping's keys are the unique strings in `rd1 ∪ rd2`, and the values are:
+// 1. Index after merging.
+// 2. Corresponding index in the first array - `rd1`. -1 means that it does not exist.
+// 3. Corresponding index in the second array - `rd2`. -1 means that it does not exist.
+func MergeReversedDictsIdentities(rd1, rd2 []string) (map[string]MergedIndex, []string) {
+
+	vocabulary := map[string]identityPair{}
+	vertices1 := make([][]string, len(rd1))
+	for i, s := range rd1 {
+		parts := strings.Split(s, "|")
+		vertices1[i] = parts
+		for _, p := range parts {
+			vocabulary[p] = identityPair{i, -1}
+		}
+	}
+	vertices2 := make([][]string, len(rd2))
+	for i, s := range rd2 {
+		parts := strings.Split(s, "|")
+		vertices2[i] = parts
+		for _, p := range parts {
+			if ip, exists := vocabulary[p]; !exists {
+				vocabulary[p] = identityPair{-1, i}
+			} else {
+				ip.Index2 = i
+				vocabulary[p] = ip
+			}
+		}
+	}
+
+	// find the connected components by walking the graph
+	var walks []map[string]bool
+	visited := map[string]bool{}
+
+	walkFromVertex := func(root []string) {
+		walk := map[string]bool{}
+		pending := map[string]bool{}
+		for _, p := range root {
+			pending[p] = true
+		}
+		for len(pending) > 0 {
+			var element string
+			for e := range pending {
+				element = e
+				delete(pending, e)
+				break
+			}
+			if !walk[element] {
+				walk[element] = true
+				ip := vocabulary[element]
+				if ip.Index1 >= 0 {
+					for _, p := range vertices1[ip.Index1] {
+						if !walk[p] {
+							pending[p] = true
+						}
+					}
+				}
+				if ip.Index2 >= 0 {
+					for _, p := range vertices2[ip.Index2] {
+						if !walk[p] {
+							pending[p] = true
+						}
+					}
+				}
+			}
+		}
+		for e := range walk {
+			visited[e] = true
+		}
+		walks = append(walks, walk)
+	}
+
+	for i1 := range rd1 {
+		var skip bool
+		for _, p := range vertices1[i1] {
+			if visited[p] {
+				skip = true
+				break
+			}
+		}
+		if skip {
+			continue
+		}
+		walkFromVertex(vertices1[i1])
+	}
+	for i2 := range rd2 {
+		var skip bool
+		for _, p := range vertices2[i2] {
+			if visited[p] {
+				skip = true
+				break
+			}
+		}
+		if skip {
+			continue
+		}
+		walkFromVertex(vertices2[i2])
+	}
+
+	mergedStrings := make([]string, 0, len(walks))
+	mergedIndex := map[string]MergedIndex{}
+	// convert each walk from strings to indexes
+	for walkIndex, walk := range walks {
+		ids := make([]string, 0, len(walk))
+		for key := range walk {
+			ids = append(ids, key)
+		}
+		// place emails after names
+		sort.Slice(ids, func(i, j int) bool {
+			iid := ids[i]
+			jid := ids[j]
+			iHasAt := strings.ContainsRune(iid, '@')
+			jHasAt := strings.ContainsRune(jid, '@')
+			if iHasAt == jHasAt {
+				return iid < jid
+			}
+			return jHasAt
+		})
+		mergedStrings = append(mergedStrings, strings.Join(ids, "|"))
+		for _, key := range ids {
+			ipair := vocabulary[key]
+			if ipair.Index1 >= 0 {
+				s1 := rd1[ipair.Index1]
+				if mi, exists := mergedIndex[s1]; !exists {
+					mergedIndex[s1] = MergedIndex{walkIndex, ipair.Index1, -1}
+				} else {
+					mergedIndex[s1] = MergedIndex{walkIndex, ipair.Index1, mi.Second}
+				}
+			}
+			if ipair.Index2 >= 0 {
+				s2 := rd2[ipair.Index2]
+				if mi, exists := mergedIndex[s2]; !exists {
+					mergedIndex[s2] = MergedIndex{walkIndex, -1, ipair.Index2}
+				} else {
+					mergedIndex[s2] = MergedIndex{walkIndex, mi.First, ipair.Index2}
+				}
+			}
+		}
+	}
+	return mergedIndex, mergedStrings
+}
+
 func init() {
 	core.Registry.Register(&Detector{})
 }

+ 56 - 23
internal/plumbing/identity/identity_test.go

@@ -46,11 +46,11 @@ func TestIdentityDetectorMeta(t *testing.T) {
 func TestIdentityDetectorConfigure(t *testing.T) {
 	id := fixtureIdentityDetector()
 	facts := map[string]interface{}{}
-	m1 := map[string]int{}
-	m2 := []string{}
+	m1 := map[string]int{"one": 0}
+	m2 := []string{"one"}
 	facts[FactIdentityDetectorPeopleDict] = m1
 	facts[FactIdentityDetectorReversedPeopleDict] = m2
-	id.Configure(facts)
+	assert.Nil(t, id.Configure(facts))
 	assert.Equal(t, m1, facts[FactIdentityDetectorPeopleDict])
 	assert.Equal(t, m2, facts[FactIdentityDetectorReversedPeopleDict])
 	assert.Equal(t, id.PeopleDict, facts[FactIdentityDetectorPeopleDict])
@@ -66,7 +66,7 @@ Vadim|vadim@sourced.tech`)
 	delete(facts, FactIdentityDetectorPeopleDict)
 	delete(facts, FactIdentityDetectorReversedPeopleDict)
 	facts[ConfigIdentityDetectorPeopleDictPath] = tmpf.Name()
-	id.Configure(facts)
+	assert.Nil(t, id.Configure(facts))
 	assert.Len(t, id.PeopleDict, 2)
 	assert.Len(t, id.ReversedPeopleDict, 1)
 	assert.Equal(t, id.ReversedPeopleDict[0], "Vadim")
@@ -74,7 +74,7 @@ Vadim|vadim@sourced.tech`)
 	delete(facts, FactIdentityDetectorReversedPeopleDict)
 	id = fixtureIdentityDetector()
 	id.PeopleDict = nil
-	id.Configure(facts)
+	assert.Nil(t, id.Configure(facts))
 	assert.Equal(t, id.PeopleDict, facts[FactIdentityDetectorPeopleDict])
 	assert.Equal(t, id.ReversedPeopleDict, facts[FactIdentityDetectorReversedPeopleDict])
 	assert.Len(t, id.PeopleDict, 4)
@@ -85,7 +85,7 @@ Vadim|vadim@sourced.tech`)
 	delete(facts, FactIdentityDetectorReversedPeopleDict)
 	id = fixtureIdentityDetector()
 	id.ReversedPeopleDict = nil
-	id.Configure(facts)
+	assert.Nil(t, id.Configure(facts))
 	assert.Equal(t, id.PeopleDict, facts[FactIdentityDetectorPeopleDict])
 	assert.Equal(t, id.ReversedPeopleDict, facts[FactIdentityDetectorReversedPeopleDict])
 	assert.Len(t, id.PeopleDict, 4)
@@ -108,7 +108,7 @@ Vadim|vadim@sourced.tech`)
 	id = fixtureIdentityDetector()
 	id.PeopleDict = nil
 	id.ReversedPeopleDict = nil
-	id.Configure(facts)
+	assert.Nil(t, id.Configure(facts))
 	assert.Equal(t, id.PeopleDict, facts[FactIdentityDetectorPeopleDict])
 	assert.Equal(t, id.ReversedPeopleDict, facts[FactIdentityDetectorReversedPeopleDict])
 	assert.True(t, len(id.PeopleDict) >= 3)
@@ -381,26 +381,59 @@ func TestIdentityDetectorGeneratePeopleDictMailmap(t *testing.T) {
 		"strange guy|vadim markovtsev|gmarkhor@gmail.com|vadim@sourced.tech")
 }
 
-func TestIdentityDetectorMergeReversedDicts(t *testing.T) {
-	pa1 := [...]string{"one", "two"}
-	pa2 := [...]string{"two", "three"}
-	people, merged := Detector{}.MergeReversedDicts(pa1[:], pa2[:])
+func TestIdentityDetectorMergeReversedDictsLiteral(t *testing.T) {
+	pa1 := [...]string{"one|one@one", "two|aaa@two"}
+	pa2 := [...]string{"two|aaa@two", "three|one@one"}
+	people, merged := MergeReversedDictsLiteral(pa1[:], pa2[:])
 	assert.Len(t, people, 3)
 	assert.Len(t, merged, 3)
-	assert.Equal(t, people["one"], [3]int{0, 0, -1})
-	assert.Equal(t, people["two"], [3]int{1, 1, 0})
-	assert.Equal(t, people["three"], [3]int{2, -1, 1})
-	vm := [...]string{"one", "two", "three"}
-	assert.Equal(t, merged, vm[:])
-	pa1 = [...]string{"two", "one"}
-	people, merged = Detector{}.MergeReversedDicts(pa1[:], pa2[:])
+	assert.Equal(t, people["one|one@one"], MergedIndex{0, 0, -1})
+	assert.Equal(t, people["two|aaa@two"], MergedIndex{1, 1, 0})
+	assert.Equal(t, people["three|one@one"], MergedIndex{2, -1, 1})
+	assert.Equal(t, merged, []string{"one|one@one", "two|aaa@two", "three|one@one"})
+	pa1 = [...]string{"two|aaa@two", "one|one@one"}
+	people, merged = MergeReversedDictsLiteral(pa1[:], pa2[:])
 	assert.Len(t, people, 3)
 	assert.Len(t, merged, 3)
-	assert.Equal(t, people["one"], [3]int{1, 1, -1})
-	assert.Equal(t, people["two"], [3]int{0, 0, 0})
-	assert.Equal(t, people["three"], [3]int{2, -1, 1})
-	vm = [...]string{"two", "one", "three"}
-	assert.Equal(t, merged, vm[:])
+	assert.Equal(t, people["one|one@one"], MergedIndex{1, 1, -1})
+	assert.Equal(t, people["two|aaa@two"], MergedIndex{0, 0, 0})
+	assert.Equal(t, people["three|one@one"], MergedIndex{2, -1, 1})
+	assert.Equal(t, merged, []string{"two|aaa@two", "one|one@one", "three|one@one"})
+}
+
+func TestIdentityDetectorMergeReversedDictsIdentities(t *testing.T) {
+	pa1 := [...]string{"one|one@one", "two|aaa@two"}
+	pa2 := [...]string{"two|aaa@two", "three|one@one"}
+	people, merged := MergeReversedDictsIdentities(pa1[:], pa2[:])
+	assert.Len(t, people, 3)
+	assert.Len(t, merged, 2)
+	assert.Equal(t, people["one|one@one"], MergedIndex{0, 0, -1})
+	assert.Equal(t, people["two|aaa@two"], MergedIndex{1, 1, 0})
+	assert.Equal(t, people["three|one@one"], MergedIndex{0, -1, 1})
+	assert.Equal(t, merged, []string{"one|three|one@one", "two|aaa@two"})
+}
+
+func TestIdentityDetectorMergeReversedDictsIdentitiesStrikeBack(t *testing.T) {
+	pa1 := [...]string{"one|one@one", "two|aaa@two", "three|three@three"}
+	pa2 := [...]string{"two|aaa@two", "three|one@one"}
+	people, merged := MergeReversedDictsIdentities(pa1[:], pa2[:])
+	assert.Len(t, people, 4)
+	assert.Len(t, merged, 2)
+	assert.Equal(t, people["one|one@one"], MergedIndex{0, 0, -1})
+	assert.Equal(t, people["two|aaa@two"], MergedIndex{1, 1, 0})
+	assert.Equal(t, people["three|one@one"], MergedIndex{0, -1, 1})
+	assert.Equal(t, people["three|three@three"], MergedIndex{0, 2, -1})
+	assert.Equal(t, merged, []string{"one|three|one@one|three@three", "two|aaa@two"})
+
+	pa1 = [...]string{"one|one@one", "two|aaa@two", "three|aaa@two"}
+	people, merged = MergeReversedDictsIdentities(pa1[:], pa2[:])
+	assert.Len(t, people, 4)
+	assert.Len(t, merged, 1)
+	assert.Equal(t, people["one|one@one"], MergedIndex{0, 0, -1})
+	assert.Equal(t, people["two|aaa@two"], MergedIndex{0, 1, 0})
+	assert.Equal(t, people["three|one@one"], MergedIndex{0, -1, 1})
+	assert.Equal(t, people["three|aaa@two"], MergedIndex{0, 2, -1})
+	assert.Equal(t, merged, []string{"one|three|two|aaa@two|one@one"})
 }
 
 func TestIdentityDetectorFork(t *testing.T) {

+ 10 - 10
leaves/burndown.go

@@ -668,8 +668,8 @@ func (analyser *BurndownAnalysis) MergeResults(
 	} else {
 		merged.granularity = bar2.granularity
 	}
-	var people map[string][3]int
-	people, merged.reversedPeopleDict = identity.Detector{}.MergeReversedDicts(
+	var people map[string]identity.MergedIndex
+	people, merged.reversedPeopleDict = identity.MergeReversedDictsIdentities(
 		bar1.reversedPeopleDict, bar2.reversedPeopleDict)
 	var wg sync.WaitGroup
 	if len(bar1.GlobalHistory) > 0 || len(bar2.GlobalHistory) > 0 {
@@ -693,11 +693,11 @@ func (analyser *BurndownAnalysis) MergeResults(
 				go func(i int) {
 					defer wg.Done()
 					var m1, m2 DenseHistory
-					if ptrs[1] >= 0 {
-						m1 = bar1.PeopleHistories[ptrs[1]]
+					if ptrs.First >= 0 {
+						m1 = bar1.PeopleHistories[ptrs.First]
 					}
-					if ptrs[2] >= 0 {
-						m2 = bar2.PeopleHistories[ptrs[2]]
+					if ptrs.Second >= 0 {
+						m2 = bar2.PeopleHistories[ptrs.Second]
 					}
 					merged.PeopleHistories[i] = analyser.mergeMatrices(
 						m1, m2,
@@ -731,18 +731,18 @@ func (analyser *BurndownAnalysis) MergeResults(
 					merged.PeopleMatrix[i] = make([]int64, len(merged.reversedPeopleDict)+2)
 				}
 				for i, key := range bar1.reversedPeopleDict {
-					mi := people[key][0] // index in merged.reversedPeopleDict
+					mi := people[key].Final // index in merged.reversedPeopleDict
 					copy(merged.PeopleMatrix[mi][:2], bar1.PeopleMatrix[i][:2])
 					for j, val := range bar1.PeopleMatrix[i][2:] {
-						merged.PeopleMatrix[mi][2+people[bar1.reversedPeopleDict[j]][0]] = val
+						merged.PeopleMatrix[mi][2+people[bar1.reversedPeopleDict[j]].Final] = val
 					}
 				}
 				for i, key := range bar2.reversedPeopleDict {
-					mi := people[key][0] // index in merged.reversedPeopleDict
+					mi := people[key].Final // index in merged.reversedPeopleDict
 					merged.PeopleMatrix[mi][0] += bar2.PeopleMatrix[i][0]
 					merged.PeopleMatrix[mi][1] += bar2.PeopleMatrix[i][1]
 					for j, val := range bar2.PeopleMatrix[i][2:] {
-						merged.PeopleMatrix[mi][2+people[bar2.reversedPeopleDict[j]][0]] += val
+						merged.PeopleMatrix[mi][2+people[bar2.reversedPeopleDict[j]].Final] += val
 					}
 				}
 			}

+ 14 - 14
leaves/couples.go

@@ -339,18 +339,18 @@ func (couples *CouplesAnalysis) MergeResults(r1, r2 interface{}, c1, c2 *core.Co
 	cr1 := r1.(CouplesResult)
 	cr2 := r2.(CouplesResult)
 	merged := CouplesResult{}
-	var people, files map[string][3]int
-	id := identity.Detector{}
-	people, merged.reversedPeopleDict = id.MergeReversedDicts(cr1.reversedPeopleDict, cr2.reversedPeopleDict)
-	files, merged.Files = id.MergeReversedDicts(cr1.Files, cr2.Files)
+	var people, files map[string]identity.MergedIndex
+	people, merged.reversedPeopleDict = identity.MergeReversedDictsIdentities(
+		cr1.reversedPeopleDict, cr2.reversedPeopleDict)
+	files, merged.Files = identity.MergeReversedDictsLiteral(cr1.Files, cr2.Files)
 	merged.FilesLines = make([]int, len(merged.Files))
 	for i, name := range merged.Files {
 		idxs := files[name]
-		if idxs[1] >= 0 {
-			merged.FilesLines[i] += cr1.FilesLines[idxs[1]]
+		if idxs.First >= 0 {
+			merged.FilesLines[i] += cr1.FilesLines[idxs.First]
 		}
-		if idxs[2] >= 0 {
-			merged.FilesLines[i] += cr2.FilesLines[idxs[2]]
+		if idxs.Second >= 0 {
+			merged.FilesLines[i] += cr2.FilesLines[idxs.Second]
 		}
 	}
 	merged.PeopleFiles = make([][]int, len(merged.reversedPeopleDict))
@@ -358,14 +358,14 @@ func (couples *CouplesAnalysis) MergeResults(r1, r2 interface{}, c1, c2 *core.Co
 	addPeopleFiles := func(peopleFiles [][]int, reversedPeopleDict []string,
 		reversedFilesDict []string) {
 		for pi, fs := range peopleFiles {
-			idx := people[reversedPeopleDict[pi]][0]
+			idx := people[reversedPeopleDict[pi]].Final
 			m := peopleFilesDicts[idx]
 			if m == nil {
 				m = map[int]bool{}
 				peopleFilesDicts[idx] = m
 			}
 			for _, f := range fs {
-				m[files[reversedFilesDict[f]][0]] = true
+				m[files[reversedFilesDict[f]].Final] = true
 			}
 		}
 	}
@@ -385,7 +385,7 @@ func (couples *CouplesAnalysis) MergeResults(r1, r2 interface{}, c1, c2 *core.Co
 		for pi, pc := range peopleMatrix {
 			var idx int
 			if pi < len(reversedPeopleDict) {
-				idx = people[reversedPeopleDict[pi]][0]
+				idx = people[reversedPeopleDict[pi]].Final
 			} else {
 				idx = len(merged.reversedPeopleDict)
 			}
@@ -397,7 +397,7 @@ func (couples *CouplesAnalysis) MergeResults(r1, r2 interface{}, c1, c2 *core.Co
 			for otherDev, val := range pc {
 				var otherIdx int
 				if otherDev < len(reversedPeopleDict) {
-					otherIdx = people[reversedPeopleDict[otherDev]][0]
+					otherIdx = people[reversedPeopleDict[otherDev]].Final
 				} else {
 					otherIdx = len(merged.reversedPeopleDict)
 				}
@@ -410,14 +410,14 @@ func (couples *CouplesAnalysis) MergeResults(r1, r2 interface{}, c1, c2 *core.Co
 	merged.FilesMatrix = make([]map[int]int64, len(merged.Files))
 	addFiles := func(filesMatrix []map[int]int64, reversedFilesDict []string) {
 		for fi, fc := range filesMatrix {
-			idx := people[reversedFilesDict[fi]][0]
+			idx := people[reversedFilesDict[fi]].Final
 			m := merged.FilesMatrix[idx]
 			if m == nil {
 				m = map[int]int64{}
 				merged.FilesMatrix[idx] = m
 			}
 			for file, val := range fc {
-				m[files[reversedFilesDict[file]][0]] += val
+				m[files[reversedFilesDict[file]].Final] += val
 			}
 		}
 	}

+ 5 - 34
leaves/devs.go

@@ -243,38 +243,9 @@ func (devs *DevsAnalysis) MergeResults(r1, r2 interface{}, c1, c2 *core.CommonAn
 	cr1 := r1.(DevsResult)
 	cr2 := r2.(DevsResult)
 	merged := DevsResult{}
-	type devIndexPair struct {
-		Index1 int
-		Index2 int
-	}
-	devIndex := map[string]devIndexPair{}
-	for dev, devName := range cr1.reversedPeopleDict {
-		devIndex[devName] = devIndexPair{Index1: dev + 1, Index2: devIndex[devName].Index2}
-	}
-	for dev, devName := range cr2.reversedPeopleDict {
-		devIndex[devName] = devIndexPair{Index1: devIndex[devName].Index1, Index2: dev + 1}
-	}
-	jointDevSeq := make([]string, len(devIndex))
-	{
-		i := 0
-		for dev := range devIndex {
-			jointDevSeq[i] = dev
-			i++
-		}
-	}
-	sort.Strings(jointDevSeq)
-	merged.reversedPeopleDict = jointDevSeq
-	invDevIndex1 := map[int]int{}
-	invDevIndex2 := map[int]int{}
-	for i, dev := range jointDevSeq {
-		pair := devIndex[dev]
-		if pair.Index1 > 0 {
-			invDevIndex1[pair.Index1-1] = i
-		}
-		if pair.Index2 > 0 {
-			invDevIndex2[pair.Index2-1] = i
-		}
-	}
+	var mergedIndex map[string]identity.MergedIndex
+	mergedIndex, merged.reversedPeopleDict = identity.MergeReversedDictsIdentities(
+		cr1.reversedPeopleDict, cr2.reversedPeopleDict)
 	newticks := map[int]map[int]*DevTick{}
 	merged.Ticks = newticks
 	for tick, dd := range cr1.Ticks {
@@ -286,7 +257,7 @@ func (devs *DevsAnalysis) MergeResults(r1, r2 interface{}, c1, c2 *core.CommonAn
 		for dev, stats := range dd {
 			newdev := dev
 			if newdev != identity.AuthorMissing {
-				newdev = invDevIndex1[dev]
+				newdev = mergedIndex[cr1.reversedPeopleDict[dev]].Final
 			}
 			newstats, exists := newdd[newdev]
 			if !exists {
@@ -316,7 +287,7 @@ func (devs *DevsAnalysis) MergeResults(r1, r2 interface{}, c1, c2 *core.CommonAn
 		for dev, stats := range dd {
 			newdev := dev
 			if newdev != identity.AuthorMissing {
-				newdev = invDevIndex2[dev]
+				newdev = mergedIndex[cr2.reversedPeopleDict[dev]].Final
 			}
 			newstats, exists := newdd[newdev]
 			if !exists {

+ 19 - 6
python/labours/labours.py

@@ -240,7 +240,7 @@ class YamlReader(Reader):
         people = self.data["Devs"]["people"]
         days = {int(d): {int(dev): DevDay(*(int(x) for x in day[:-1]), day[-1])
                          for dev, day in devs.items()}
-                for d, devs in self.data["Devs"]["days"].items()}
+                for d, devs in self.data["Devs"]["ticks"].items()}
         return people, days
 
     def _parse_burndown_matrix(self, matrix):
@@ -1349,6 +1349,13 @@ def order_commits(chosen_people, days, people):
     except ImportError as e:
         print("Cannot import fastdtw: %s\nInstall it from https://github.com/slaypni/fastdtw" % e)
         sys.exit(1)
+    # FIXME(vmarkovtsev): remove once https://github.com/slaypni/fastdtw/pull/28 is merged&released
+    try:
+        sys.modules["fastdtw.fastdtw"].__norm = lambda p: lambda a, b: numpy.linalg.norm(
+            numpy.atleast_1d(a) - numpy.atleast_1d(b), p)
+    except KeyError:
+        # the native extension does not have this bug
+        pass
 
     devseries = defaultdict(list)
     devstats = defaultdict(lambda: DevDay(0, 0, 0, 0, {}))
@@ -1377,15 +1384,21 @@ def order_commits(chosen_people, days, people):
                  windows[-1, 5] / windows[-1].max(),
                  windows[-1, 6] / windows[-1].max()]
             ))
-        arr[1] = commits * 7  # 7 is a pure heuristic here and is not related to window size
-        series[i] = list(arr.transpose())
+        arr[1] = commits * 7  # 7 is a pure heuristic here and is not related to the window size
+        series[i] = arr.transpose()
     # calculate the distance matrix using dynamic time warping metric
     dists = numpy.full((len(series),) * 2, -100500, dtype=numpy.float32)
-    for x in range(len(series)):
+    for x, serx in enumerate(series):
         dists[x, x] = 0
-        for y in range(x + 1, len(series)):
+        for y, sery in enumerate(series[x + 1:], start=x + 1):
+            min_day = int(min(serx[0][0], sery[0][0]))
+            max_day = int(max(serx[-1][0], sery[-1][0]))
+            arrx = numpy.zeros(max_day - min_day + 1, dtype=numpy.float32)
+            arry = numpy.zeros_like(arrx)
+            arrx[serx[:, 0].astype(int) - min_day] = serx[:, 1]
+            arry[sery[:, 0].astype(int) - min_day] = sery[:, 1]
             # L1 norm
-            dist, _ = fastdtw(series[x], series[y], radius=5, dist=1)
+            dist, _ = fastdtw(arrx, arry, radius=5, dist=1)
             dists[x, y] = dists[y, x] = dist
     print("Ordering the series")
     route = seriate(dists)