Browse Source

Fix the fastdtw evaluation

Signed-off-by: Vadim Markovtsev <vadim@sourced.tech>
Vadim Markovtsev 6 years ago
parent
commit
94c23b6ce9
2 changed files with 22 additions and 7 deletions
  1. 11 2
      internal/plumbing/identity/identity.go
  2. 11 5
      python/labours/labours.py

+ 11 - 2
internal/plumbing/identity/identity.go

@@ -265,9 +265,10 @@ func (detector *Detector) GeneratePeopleDict(commits []*object.Commit) {
 	detector.ReversedPeopleDict = reverseDict
 }
 
-// MergeReversedDicts joins two identity lists together, excluding duplicates, in-order.
+// MergeReversedDictsLiteral joins two string lists together, excluding duplicates, in-order.
+// The string comparisons are the usual ones.
 // The returned mapping's values are: final index, index in first, index in second (-1 for a miss).
-func (detector Detector) MergeReversedDicts(rd1, rd2 []string) (map[string][3]int, []string) {
+func (detector Detector) MergeReversedDictsLiteral(rd1, rd2 []string) (map[string][3]int, []string) {
 	people := map[string][3]int{}
 	for i, pid := range rd1 {
 		ptrs := people[pid]
@@ -292,6 +293,14 @@ func (detector Detector) MergeReversedDicts(rd1, rd2 []string) (map[string][3]in
 	return people, mrd
 }
 
+// MergeReversedDictsIdentities joins two identity lists together, excluding duplicates.
+// The strings are split by "|" and we find the connected components..
+// The returned mapping's values are: final index, index in first, index in second (-1 for a miss).
+func (detector Detector) MergeReversedDictsIdentities(rd1, rd2 []string) (map[string][3]int, []string) {
+
+	return nil, nil
+}
+
 func init() {
 	core.Registry.Register(&Detector{})
 }

+ 11 - 5
python/labours/labours.py

@@ -1377,15 +1377,21 @@ def order_commits(chosen_people, days, people):
                  windows[-1, 5] / windows[-1].max(),
                  windows[-1, 6] / windows[-1].max()]
             ))
-        arr[1] = commits * 7  # 7 is a pure heuristic here and is not related to window size
-        series[i] = list(arr.transpose())
+        arr[1] = commits * 7  # 7 is a pure heuristic here and is not related to the window size
+        series[i] = arr.transpose()
     # calculate the distance matrix using dynamic time warping metric
     dists = numpy.full((len(series),) * 2, -100500, dtype=numpy.float32)
-    for x in range(len(series)):
+    for x, serx in enumerate(series):
         dists[x, x] = 0
-        for y in range(x + 1, len(series)):
+        for y, sery in enumerate(series[x + 1:], start=x + 1):
+            min_day = int(min(serx[0][0], sery[0][0]))
+            max_day = int(max(serx[-1][0], sery[-1][0]))
+            arrx = numpy.zeros(max_day - min_day + 1, dtype=numpy.float32)
+            arry = numpy.zeros_like(arrx)
+            arrx[serx[:, 0].astype(int) - min_day] = serx[:, 1]
+            arry[sery[:, 0].astype(int) - min_day] = sery[:, 1]
             # L1 norm
-            dist, _ = fastdtw(series[x], series[y], radius=5, dist=1)
+            dist, _ = fastdtw(arrx, arry, radius=5, dist=1)
             dists[x, y] = dists[y, x] = dist
     print("Ordering the series")
     route = seriate(dists)