6 年之前 · 3a8653cc54
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@ Table of Contents
 
				       * [Project burndown](#project-burndown)
			
 
				       * [Files](#files)
			
 
				       * [People](#people)
			
 
				-      * [Churn matrix](#churn-matrix)
			
 
				+      * [Churn matrix](#overwrites-matrix)
			
 
				       * [Code ownership](#code-ownership)
			
 
				       * [Couples](#couples)
			
 
				       * [Structural hotness](#structural-hotness)
			
@@ -214,14 +214,14 @@ If `--people-dict` is specified, it should point to a text file with the custom
 
				 format is: every line is a single developer, it contains all the matching emails and names separated
			
 
				 by `|`. The case is ignored.
			
 
				 
			
 
				-#### Churn matrix
			
 
				+#### Overwrites matrix
			
 
				 
			
 
				-![Wireshark top 20 churn matrix](doc/wireshark_churn_matrix.png)
			
 
				-<p align="center">Wireshark top 20 devs - churn matrix</p>
			
 
				+![Wireshark top 20 overwrites matrix](doc/wireshark_overwrites_matrix.png)
			
 
				+<p align="center">Wireshark top 20 devs - overwrites matrix</p>
			
 
				 
			
 
				 ```
			
 
				 hercules --burndown --burndown-people [--people-dict=/path/to/identities]
			
 
				-labours -m churn-matrix
			
 
				+labours -m overwrites-matrix
			
 
				 ```
			
 
				 
			
 
				 Beside the burndown information, `--burndown-people` collects the added and deleted line statistics per
			
--- a/doc/wireshark_overwrites_matrix.png
+++ b/doc/wireshark_overwrites_matrix.png
--- a/python/labours/labours.py
+++ b/python/labours/labours.py
@@ -58,13 +58,13 @@ def parse_args():
 
				     parser.add_argument("--size", help="Axes' size in inches, for example \"12,9\"")
			
 
				     parser.add_argument("--relative", action="store_true",
			
 
				                         help="Occupy 100%% height for every measurement.")
			
 
				-    parser.add_argument("--couples-tmp-dir", help="Temporary directory to work with couples.")
			
 
				+    parser.add_argument("--tmpdir", help="Temporary directory for intermediate files.")
			
 
				     parser.add_argument("-m", "--mode",
			
 
				                         choices=["burndown-project", "burndown-file", "burndown-person",
			
 
				-                                 "churn-matrix", "ownership", "couples-files", "couples-people",
			
 
				-                                 "couples-shotness", "shotness", "sentiment", "devs",
			
 
				-                                 "devs-efforts", "old-vs-new", "all", "run-times", "languages",
			
 
				-                                 "devs-parallel"],
			
 
				+                                 "overwrites-matrix", "ownership", "couples-files",
			
 
				+                                 "couples-people", "couples-shotness", "shotness", "sentiment",
			
 
				+                                 "devs", "devs-efforts", "old-vs-new", "all", "run-times",
			
 
				+                                 "languages", "devs-parallel"],
			
 
				                         help="What to plot.")
			
 
				     parser.add_argument(
			
 
				         "--resample", default="year",
			
@@ -82,7 +82,7 @@ def parse_args():
 
				     parser.add_argument("--disable-projector", action="store_true",
			
 
				                         help="Do not run Tensorflow Projector on couples.")
			
 
				     parser.add_argument("--max-people", default=20, type=int,
			
 
				-                        help="Maximum number of developers in churn matrix and people plots.")
			
 
				+                        help="Maximum number of developers in overwrites matrix and people plots.")
			
 
				     args = parser.parse_args()
			
 
				     return args
			
 
				 
			
@@ -716,18 +716,19 @@ def load_ownership(header, sequence, contents, max_people):
 
				     return sequence, people, date_range_sampling, last
			
 
				 
			
 
				 
			
 
				-def load_churn_matrix(people, matrix, max_people):
			
 
				+def load_overwrites_matrix(people, matrix, max_people, normalize=True):
			
 
				     matrix = matrix.astype(float)
			
 
				     if matrix.shape[0] > max_people:
			
 
				         order = numpy.argsort(-matrix[:, 0])
			
 
				         matrix = matrix[order[:max_people]][:, [0, 1] + list(2 + order[:max_people])]
			
 
				         people = [people[i] for i in order[:max_people]]
			
 
				         print("Warning: truncated people to most productive %d" % max_people)
			
 
				-    zeros = matrix[:, 0] == 0
			
 
				-    matrix[zeros, :] = 1
			
 
				-    matrix /= matrix[:, 0][:, None]
			
 
				+    if normalize:
			
 
				+        zeros = matrix[:, 0] == 0
			
 
				+        matrix[zeros, :] = 1
			
 
				+        matrix /= matrix[:, 0][:, None]
			
 
				+        matrix[zeros, :] = 0
			
 
				     matrix = -matrix[:, 1:]
			
 
				-    matrix[zeros, :] = 0
			
 
				     for i, name in enumerate(people):
			
 
				         if len(name) > 40:
			
 
				             people[i] = name[:37] + "..."
			
@@ -907,11 +908,11 @@ def plot_many_burndown(args, target, header, parts):
 
				     sys.stdout.write(stdout.getvalue())
			
 
				 
			
 
				 
			
 
				-def plot_churn_matrix(args, repo, people, matrix):
			
 
				+def plot_overwrites_matrix(args, repo, people, matrix):
			
 
				     if args.output and args.output.endswith(".json"):
			
 
				         data = locals().copy()
			
 
				         del data["args"]
			
 
				-        data["type"] = "churn_matrix"
			
 
				+        data["type"] = "overwrites_matrix"
			
 
				         if args.mode == "all":
			
 
				             output = get_plot_path(args.output, "matrix")
			
 
				         else:
			
@@ -1410,24 +1411,9 @@ def order_commits(chosen_people, days, people):
 
				     series = list(devseries.values())
			
 
				     for i, s in enumerate(series):
			
 
				         arr = numpy.array(s).transpose().astype(numpy.float32)
			
 
				-        commits = arr[1]
			
 
				-        if len(commits) < 7:
			
 
				-            commits /= commits.max()
			
 
				-        else:
			
 
				-            # 4 is sizeof(float32)
			
 
				-            windows = numpy.lib.stride_tricks.as_strided(commits, [len(commits) - 6, 7], [4, 4])
			
 
				-            commits = numpy.concatenate((
			
 
				-                [windows[0, 0] / windows[0].max(),
			
 
				-                 windows[0, 1] / windows[0].max(),
			
 
				-                 windows[0, 2] / windows[0].max()],
			
 
				-                windows[:, 3] / windows.max(axis=1),
			
 
				-                [windows[-1, 4] / windows[-1].max(),
			
 
				-                 windows[-1, 5] / windows[-1].max(),
			
 
				-                 windows[-1, 6] / windows[-1].max()]
			
 
				-            ))
			
 
				-        arr[1] = commits * 7  # 7 is a pure heuristic here and is not related to the window size
			
 
				+        arr[1] /= arr[1].sum()
			
 
				         series[i] = arr.transpose()
			
 
				-    # calculate the distance matrix using dynamic time warping metric
			
 
				+    # calculate the distance matrix using dynamic time warping
			
 
				     dists = numpy.full((len(series),) * 2, -100500, dtype=numpy.float32)
			
 
				     for x, serx in enumerate(series):
			
 
				         dists[x, x] = 0
			
@@ -1450,8 +1436,7 @@ def hdbscan_cluster_routed_series(dists, route):
 
				     try:
			
 
				         from hdbscan import HDBSCAN
			
 
				     except ImportError as e:
			
 
				-        print("Cannot import ortools: %s\nInstall it from "
			
 
				-              "https://developers.google.com/optimization/install/python/" % e)
			
 
				+        print("Cannot import hdbscan: %s" % e)
			
 
				         sys.exit(1)
			
 
				 
			
 
				     opt_dist_chain = numpy.cumsum(numpy.array(
			
@@ -1799,12 +1784,22 @@ def main():
 
				         except KeyError:
			
 
				             print("people: " + burndown_people_warning)
			
 
				 
			
 
				-    def churn_matrix():
			
 
				+    def overwrites_matrix():
			
 
				         try:
			
 
				-            plot_churn_matrix(args, name, *load_churn_matrix(
			
 
				+
			
 
				+            plot_overwrites_matrix(args, name, *load_overwrites_matrix(
			
 
				                 *reader.get_people_interaction(), max_people=args.max_people))
			
 
				+            people, matrix = load_overwrites_matrix(
			
 
				+                *reader.get_people_interaction(), max_people=1000000, normalize=False)
			
 
				+            from scipy.sparse import csr_matrix
			
 
				+            matrix = matrix[:, 1:]
			
 
				+            matrix = numpy.triu(matrix) + numpy.tril(matrix).T
			
 
				+            matrix = matrix + matrix.T
			
 
				+            matrix = csr_matrix(matrix)
			
 
				+            write_embeddings("overwrites", args.output, not args.disable_projector,
			
 
				+                             *train_embeddings(people, matrix, tmpdir=args.tmpdir))
			
 
				         except KeyError:
			
 
				-            print("churn_matrix: " + burndown_people_warning)
			
 
				+            print("overwrites_matrix: " + burndown_people_warning)
			
 
				 
			
 
				     def ownership_burndown():
			
 
				         try:
			
@@ -1822,7 +1817,7 @@ def main():
 
				         try:
			
 
				             write_embeddings("files", args.output, not args.disable_projector,
			
 
				                              *train_embeddings(*reader.get_files_coocc(),
			
 
				-                                               tmpdir=args.couples_tmp_dir))
			
 
				+                                               tmpdir=args.tmpdir))
			
 
				         except KeyError:
			
 
				             print(couples_warning)
			
 
				 
			
@@ -1830,7 +1825,7 @@ def main():
 
				         try:
			
 
				             write_embeddings("people", args.output, not args.disable_projector,
			
 
				                              *train_embeddings(*reader.get_people_coocc(),
			
 
				-                                               tmpdir=args.couples_tmp_dir))
			
 
				+                                               tmpdir=args.tmpdir))
			
 
				         except KeyError:
			
 
				             print(couples_warning)
			
 
				 
			
@@ -1838,7 +1833,7 @@ def main():
 
				         try:
			
 
				             write_embeddings("shotness", args.output, not args.disable_projector,
			
 
				                              *train_embeddings(*reader.get_shotness_coocc(),
			
 
				-                                               tmpdir=args.couples_tmp_dir))
			
 
				+                                               tmpdir=args.tmpdir))
			
 
				         except KeyError:
			
 
				             print(shotness_warning)
			
 
				 
			
@@ -1916,7 +1911,7 @@ def main():
 
				         "burndown-project": project_burndown,
			
 
				         "burndown-file": files_burndown,
			
 
				         "burndown-person": people_burndown,
			
 
				-        "churn-matrix": churn_matrix,
			
 
				+        "overwrites-matrix": overwrites_matrix,
			
 
				         "ownership": ownership_burndown,
			
 
				         "couples-files": couples_files,
			
 
				         "couples-people": couples_people,
			
@@ -1936,7 +1931,7 @@ def main():
 
				         project_burndown()
			
 
				         files_burndown()
			
 
				         people_burndown()
			
 
				-        churn_matrix()
			
 
				+        overwrites_matrix()
			
 
				         ownership_burndown()
			
 
				         couples_files()
			
 
				         couples_people()
			
--- a/python/setup.py
+++ b/python/setup.py
@@ -15,7 +15,7 @@ setup(
 
				     description="Python companion for github.com/src-d/hercules to visualize the results.",
			
 
				     long_description=long_description,
			
 
				     long_description_content_type="text/markdown",
			
 
				-    version="10.1.0",
			
 
				+    version="10.2.0",
			
 
				     license="Apache-2.0",
			
 
				     author="source{d}",
			
 
				     author_email="machine-learning@sourced.tech",