пре 7 година · a989179fca
--- a/README.md
+++ b/README.md
@@ -313,6 +313,22 @@ python3 labours.py -m old-vs-new -o <name>
 
				 `--devs` from the previous section allows to plot how many lines were added and how many existing changed
			
 
				 (deleted or replaced) through time. This plot is smoothed.
			
 
				 
			
 
				+#### Efforts through time
			
 
				+
			
 
				+![kubernetes/kubernetes](doc/k8s_efforts.png)
			
 
				+<p align="center">kubernetes/kubernetes efforts through time.</p>
			
 
				+
			
 
				+```
			
 
				+hercules --devs [-people-dict=/path/to/identities]
			
 
				+python3 labours.py -m devs-efforts -o <name>
			
 
				+```
			
 
				+
			
 
				+Besides, `--devs` allows to plot how many lines have been changed (added or removed) by each developer.
			
 
				+The upper part of the plot is an accumulated (integrated) lower part. It is impossible to have the same scale
			
 
				+for both parts, so the lower values are scaled, and hence there are no lower Y axis ticks.
			
 
				+There is a difference between the efforts plot and the ownership plot, although changing lines correlate
			
 
				+with owning lines.
			
 
				+
			
 
				 #### Sentiment (positive and negative code)
			
 
				 
			
 
				 ![Django sentiment](doc/sentiment.png)
			
--- a/doc/k8s_efforts.png
+++ b/doc/k8s_efforts.png
--- a/labours.py
+++ b/labours.py
@@ -63,7 +63,8 @@ def parse_args():
 
				                         choices=["burndown-project", "burndown-file", "burndown-person",
			
 
				                                  "churn-matrix", "ownership", "couples-files", "couples-people",
			
 
				                                  "couples-shotness", "shotness", "sentiment", "devs",
			
 
				-                                 "devs-efforts", "old-vs-new", "all", "run-times", "languages"],
			
 
				+                                 "devs-efforts", "old-vs-new", "all", "run-times", "languages",
			
 
				+                                 "devs-parallel"],
			
 
				                         help="What to plot.")
			
 
				     parser.add_argument(
			
 
				         "--resample", default="year",
			
@@ -240,7 +241,7 @@ class YamlReader(Reader):
 
				         days = {int(d): {int(dev): DevDay(*(int(x) for x in day[:-1]), day[-1])
			
 
				                          for dev, day in devs.items()}
			
 
				                 for d, devs in self.data["Devs"]["days"].items()}
			
 
				-        return days, people
			
 
				+        return people, days
			
 
				 
			
 
				     def _parse_burndown_matrix(self, matrix):
			
 
				         return numpy.array([numpy.fromstring(line, dtype=int, sep=" ")
			
@@ -364,7 +365,7 @@ class ProtobufReader(Reader):
 
				                                                       for k, v in stats.languages.items()})
			
 
				                     for dev, stats in day.devs.items()}
			
 
				                 for d, day in self.contents["Devs"].days.items()}
			
 
				-        return days, people
			
 
				+        return people, days
			
 
				 
			
 
				     def _parse_burndown_matrix(self, matrix):
			
 
				         dense = numpy.zeros((matrix.number_of_rows, matrix.number_of_columns), dtype=int)
			
@@ -698,6 +699,7 @@ def import_pyplot(backend, style):
 
				         matplotlib.use(backend)
			
 
				     from matplotlib import pyplot
			
 
				     pyplot.style.use(style)
			
 
				+    print("matplotlib: backend is", matplotlib.get_backend())
			
 
				     return matplotlib, pyplot
			
 
				 
			
 
				 
			
@@ -738,7 +740,7 @@ def get_plot_path(base, name):
 
				     return output
			
 
				 
			
 
				 
			
 
				-def deploy_plot(title, output, background):
			
 
				+def deploy_plot(title, output, background, tight=True):
			
 
				     import matplotlib.pyplot as pyplot
			
 
				 
			
 
				     if not output:
			
@@ -747,10 +749,11 @@ def deploy_plot(title, output, background):
 
				     else:
			
 
				         if title:
			
 
				             pyplot.title(title, color="black" if background == "white" else "white")
			
 
				-        try:
			
 
				-            pyplot.tight_layout()
			
 
				-        except:  # noqa: E722
			
 
				-            print("Warning: failed to set the tight layout")
			
 
				+        if tight:
			
 
				+            try:
			
 
				+                pyplot.tight_layout()
			
 
				+            except:  # noqa: E722
			
 
				+                print("Warning: failed to set the tight layout")
			
 
				         pyplot.savefig(output, transparent=True)
			
 
				     pyplot.clf()
			
 
				 
			
@@ -846,7 +849,7 @@ def plot_burndown(args, target, name, matrix, date_range_sampling, labels, granu
 
				             if target == "project":
			
 
				                 name = "project"
			
 
				             output = get_plot_path(args.output, name)
			
 
				-    deploy_plot(title, output, args.style)
			
 
				+    deploy_plot(title, output, args.background)
			
 
				 
			
 
				 
			
 
				 def plot_many_burndown(args, target, header, parts):
			
@@ -905,7 +908,7 @@ def plot_churn_matrix(args, repo, people, matrix):
 
				     if args.output:
			
 
				         # FIXME(vmarkovtsev): otherwise the title is screwed in savefig()
			
 
				         title = ""
			
 
				-    deploy_plot(title, output, args.style)
			
 
				+    deploy_plot(title, output, args.background)
			
 
				 
			
 
				 
			
 
				 def plot_ownership(args, repo, names, people, date_range, last):
			
@@ -942,7 +945,7 @@ def plot_ownership(args, repo, names, people, date_range, last):
 
				         output = get_plot_path(args.output, "people")
			
 
				     else:
			
 
				         output = args.output
			
 
				-    deploy_plot("%s code ownership through time" % repo, output, args.style)
			
 
				+    deploy_plot("%s code ownership through time" % repo, output, args.background)
			
 
				 
			
 
				 
			
 
				 IDEAL_SHARD_SIZE = 4096
			
@@ -1117,7 +1120,7 @@ web_server = CORSWebServer()
 
				 def write_embeddings(name, output, run_server, index, embeddings):
			
 
				     print("Writing Tensorflow Projector files...")
			
 
				     if not output:
			
 
				-        output = "couples_" + name
			
 
				+        output = "couples"
			
 
				     if output.endswith(".json"):
			
 
				         output = os.path.join(output[:-5], "couples")
			
 
				         run_server = False
			
@@ -1232,30 +1235,12 @@ def show_sentiment_stats(args, name, resample, start_date, data):
 
				     overall_neg = sum(2 * (d[1].Value - 0.5) for d in data if d[1].Value > 0.5)
			
 
				     title = "%s sentiment +%.1f -%.1f δ=%.1f" % (
			
 
				         name, overall_pos, overall_neg, overall_pos - overall_neg)
			
 
				-    deploy_plot(title, args.output, args.style)
			
 
				+    deploy_plot(title, args.output, args.background)
			
 
				 
			
 
				 
			
 
				-def show_devs(args, name, start_date, end_date, data):
			
 
				-    try:
			
 
				-        from fastdtw import fastdtw
			
 
				-    except ImportError as e:
			
 
				-        print("Cannot import fastdtw: %s\nInstall it from https://github.com/slaypni/fastdtw" % e)
			
 
				-        sys.exit(1)
			
 
				-    try:
			
 
				-        from ortools.constraint_solver import pywrapcp, routing_enums_pb2
			
 
				-    except ImportError as e:
			
 
				-        print("Cannot import ortools: %s\nInstall it from "
			
 
				-              "https://developers.google.com/optimization/install/python/" % e)
			
 
				-        sys.exit(1)
			
 
				-    try:
			
 
				-        from hdbscan import HDBSCAN
			
 
				-    except ImportError as e:
			
 
				-        print("Cannot import ortools: %s\nInstall it from "
			
 
				-              "https://developers.google.com/optimization/install/python/" % e)
			
 
				-        sys.exit(1)
			
 
				+def show_devs(args, name, start_date, end_date, people, days):
			
 
				     from scipy.signal import convolve, slepian
			
 
				 
			
 
				-    days, people = data
			
 
				     max_people = 50
			
 
				     if len(people) > max_people:
			
 
				         print("Picking top 100 developers by commit count")
			
@@ -1268,77 +1253,12 @@ def show_devs(args, name, start_date, end_date, data):
 
				         chosen_people = {people[k] for _, k in commits[:max_people]}
			
 
				     else:
			
 
				         chosen_people = set(people)
			
 
				-    devseries = defaultdict(list)
			
 
				-    devstats = defaultdict(lambda: DevDay(0, 0, 0, 0, {}))
			
 
				-    for day, devs in sorted(days.items()):
			
 
				-        for dev, stats in devs.items():
			
 
				-            if people[dev] in chosen_people:
			
 
				-                devseries[dev].append((day, stats.Commits))
			
 
				-                devstats[dev] = devstats[dev].add(stats)
			
 
				-    print("Calculating the distance matrix")
			
 
				-    # max-normalize the time series using a sliding window
			
 
				-    keys = list(devseries.keys())
			
 
				-    series = list(devseries.values())
			
 
				-    for i, s in enumerate(series):
			
 
				-        arr = numpy.array(s).transpose().astype(numpy.float32)
			
 
				-        commits = arr[1]
			
 
				-        if len(commits) < 7:
			
 
				-            commits /= commits.max()
			
 
				-        else:
			
 
				-            # 4 is sizeof(float32)
			
 
				-            windows = numpy.lib.stride_tricks.as_strided(commits, [len(commits) - 6, 7], [4, 4])
			
 
				-            commits = numpy.concatenate((
			
 
				-                [windows[0, 0] / windows[0].max(),
			
 
				-                 windows[0, 1] / windows[0].max(),
			
 
				-                 windows[0, 2] / windows[0].max()],
			
 
				-                windows[:, 3] / windows.max(axis=1),
			
 
				-                [windows[-1, 4] / windows[-1].max(),
			
 
				-                 windows[-1, 5] / windows[-1].max(),
			
 
				-                 windows[-1, 6] / windows[-1].max()]
			
 
				-            ))
			
 
				-        arr[1] = commits * 7  # 7 is a pure heuristic here and is not related to window size
			
 
				-        series[i] = list(arr.transpose())
			
 
				-    # calculate the distance matrix using dynamic time warping metric
			
 
				-    dists = numpy.full((len(series) + 1, len(series) + 1), -100500, dtype=numpy.float32)
			
 
				-    for x in range(len(series)):
			
 
				-        dists[x, x] = 0
			
 
				-        for y in range(x + 1, len(series)):
			
 
				-            # L1 norm
			
 
				-            dist, _ = fastdtw(series[x], series[y], radius=5, dist=1)
			
 
				-            dists[x, y] = dists[y, x] = dist
			
 
				-    # preparation for seriation ordering
			
 
				-    dists[len(series), :] = 0
			
 
				-    dists[:, len(series)] = 0
			
 
				-    assert (dists >= 0).all()
			
 
				-    print("Ordering the series")
			
 
				-    # solve the TSP on the distance matrix
			
 
				-    routing = pywrapcp.RoutingModel(dists.shape[0], 1, len(series))
			
 
				-
			
 
				-    def dist_callback(x, y):
			
 
				-        # ortools wants integers, so we approximate here
			
 
				-        return int(dists[x][y] * 1000)
			
 
				-
			
 
				-    routing.SetArcCostEvaluatorOfAllVehicles(dist_callback)
			
 
				-    search_parameters = pywrapcp.RoutingModel.DefaultSearchParameters()
			
 
				-    search_parameters.local_search_metaheuristic = (
			
 
				-        routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH)
			
 
				-    search_parameters.time_limit_ms = 2000
			
 
				-    assignment = routing.SolveWithParameters(search_parameters)
			
 
				-    index = routing.Start(0)
			
 
				-    route = []
			
 
				-    while not routing.IsEnd(index):
			
 
				-        node = routing.IndexToNode(index)
			
 
				-        if node < len(keys):
			
 
				-            route.append(node)
			
 
				-        index = assignment.Value(routing.NextVar(index))
			
 
				+    dists, devseries, devstats, route = order_commits(chosen_people, days, people)
			
 
				     route_map = {v: i for i, v in enumerate(route)}
			
 
				-
			
 
				     # determine clusters
			
 
				-    opt_dist_chain = numpy.cumsum(numpy.array(
			
 
				-        [0] + [dists[route[i], route[i + 1]] for i in range(len(route) - 1)]))
			
 
				-    clusters = HDBSCAN(min_cluster_size=2).fit_predict(opt_dist_chain[:, numpy.newaxis])
			
 
				+    clusters = hdbscan_cluster_routed_series(dists, route)
			
 
				+    keys = list(devseries.keys())
			
 
				     route = [keys[node] for node in route]
			
 
				-
			
 
				     print("Plotting")
			
 
				     # smooth time series
			
 
				     start_date = datetime.fromtimestamp(start_date)
			
@@ -1363,12 +1283,17 @@ def show_devs(args, name, start_date, end_date, data):
 
				     colors = prop_cycle.by_key()["color"]
			
 
				     fig, axes = pyplot.subplots(final.shape[0], 1)
			
 
				     backgrounds = ("#C4FFDB", "#FFD0CD") if args.background == "white" else ("#05401C", "#40110E")
			
 
				+    max_cluster = numpy.max(clusters)
			
 
				     for ax, series, cluster, dev_i in zip(axes, final, clusters, route):
			
 
				         if cluster >= 0:
			
 
				             color = colors[cluster % len(colors)]
			
 
				+            i = 1
			
 
				+            while color == "#777777":
			
 
				+                color = colors[(max_cluster + i) % len(colors)]
			
 
				+                i += 1
			
 
				         else:
			
 
				             # outlier
			
 
				-            color = "grey"
			
 
				+            color = "#777777"
			
 
				         ax.fill_between(plot_x, series, color=color)
			
 
				         ax.set_axis_off()
			
 
				         author = people[dev_i]
			
@@ -1407,13 +1332,110 @@ def show_devs(args, name, start_date, end_date, data):
 
				     axes[-1].set_facecolor((1.0,) * 3 + (0.0,))
			
 
				 
			
 
				     title = ("%s commits" % name) if not args.output else ""
			
 
				-    deploy_plot(title, args.output, args.style)
			
 
				+    deploy_plot(title, args.output, args.background)
			
 
				+
			
 
				+
			
 
				+def order_commits(chosen_people, days, people):
			
 
				+    try:
			
 
				+        from fastdtw import fastdtw
			
 
				+    except ImportError as e:
			
 
				+        print("Cannot import fastdtw: %s\nInstall it from https://github.com/slaypni/fastdtw" % e)
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    devseries = defaultdict(list)
			
 
				+    devstats = defaultdict(lambda: DevDay(0, 0, 0, 0, {}))
			
 
				+    for day, devs in sorted(days.items()):
			
 
				+        for dev, stats in devs.items():
			
 
				+            if people[dev] in chosen_people:
			
 
				+                devseries[dev].append((day, stats.Commits))
			
 
				+                devstats[dev] = devstats[dev].add(stats)
			
 
				+    print("Calculating the distance matrix")
			
 
				+    # max-normalize the time series using a sliding window
			
 
				+    series = list(devseries.values())
			
 
				+    for i, s in enumerate(series):
			
 
				+        arr = numpy.array(s).transpose().astype(numpy.float32)
			
 
				+        commits = arr[1]
			
 
				+        if len(commits) < 7:
			
 
				+            commits /= commits.max()
			
 
				+        else:
			
 
				+            # 4 is sizeof(float32)
			
 
				+            windows = numpy.lib.stride_tricks.as_strided(commits, [len(commits) - 6, 7], [4, 4])
			
 
				+            commits = numpy.concatenate((
			
 
				+                [windows[0, 0] / windows[0].max(),
			
 
				+                 windows[0, 1] / windows[0].max(),
			
 
				+                 windows[0, 2] / windows[0].max()],
			
 
				+                windows[:, 3] / windows.max(axis=1),
			
 
				+                [windows[-1, 4] / windows[-1].max(),
			
 
				+                 windows[-1, 5] / windows[-1].max(),
			
 
				+                 windows[-1, 6] / windows[-1].max()]
			
 
				+            ))
			
 
				+        arr[1] = commits * 7  # 7 is a pure heuristic here and is not related to window size
			
 
				+        series[i] = list(arr.transpose())
			
 
				+    # calculate the distance matrix using dynamic time warping metric
			
 
				+    dists = numpy.full((len(series) + 1, len(series) + 1), -100500, dtype=numpy.float32)
			
 
				+    for x in range(len(series)):
			
 
				+        dists[x, x] = 0
			
 
				+        for y in range(x + 1, len(series)):
			
 
				+            # L1 norm
			
 
				+            dist, _ = fastdtw(series[x], series[y], radius=5, dist=1)
			
 
				+            dists[x, y] = dists[y, x] = dist
			
 
				+    # preparation for seriation ordering
			
 
				+    dists[len(series), :] = 0
			
 
				+    dists[:, len(series)] = 0
			
 
				+    assert (dists >= 0).all()
			
 
				+    print("Ordering the series")
			
 
				+    route = seriate(dists)
			
 
				+    return dists, devseries, devstats, route
			
 
				+
			
 
				+
			
 
				+def hdbscan_cluster_routed_series(dists, route):
			
 
				+    try:
			
 
				+        from hdbscan import HDBSCAN
			
 
				+    except ImportError as e:
			
 
				+        print("Cannot import ortools: %s\nInstall it from "
			
 
				+              "https://developers.google.com/optimization/install/python/" % e)
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    opt_dist_chain = numpy.cumsum(numpy.array(
			
 
				+        [0] + [dists[route[i], route[i + 1]] for i in range(len(route) - 1)]))
			
 
				+    clusters = HDBSCAN(min_cluster_size=2).fit_predict(opt_dist_chain[:, numpy.newaxis])
			
 
				+    return clusters
			
 
				+
			
 
				+
			
 
				+def seriate(dists):
			
 
				+    try:
			
 
				+        from ortools.constraint_solver import pywrapcp, routing_enums_pb2
			
 
				+    except ImportError as e:
			
 
				+        print("Cannot import ortools: %s\nInstall it from "
			
 
				+              "https://developers.google.com/optimization/install/python/" % e)
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    # solve the TSP on the distance matrix
			
 
				+    routing = pywrapcp.RoutingModel(dists.shape[0], 1, dists.shape[0] - 1)
			
 
				+
			
 
				+    def dist_callback(x, y):
			
 
				+        # ortools wants integers, so we approximate here
			
 
				+        return int(dists[x][y] * 1000)
			
 
				+
			
 
				+    routing.SetArcCostEvaluatorOfAllVehicles(dist_callback)
			
 
				+    search_parameters = pywrapcp.RoutingModel.DefaultSearchParameters()
			
 
				+    search_parameters.local_search_metaheuristic = (
			
 
				+        routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH)
			
 
				+    search_parameters.time_limit_ms = 2000
			
 
				+    assignment = routing.SolveWithParameters(search_parameters)
			
 
				+    index = routing.Start(0)
			
 
				+    route = []
			
 
				+    while not routing.IsEnd(index):
			
 
				+        node = routing.IndexToNode(index)
			
 
				+        if node < dists.shape[0] - 1:
			
 
				+            route.append(node)
			
 
				+        index = assignment.Value(routing.NextVar(index))
			
 
				+    return route
			
 
				 
			
 
				 
			
 
				-def show_devs_efforts(args, name, start_date, end_date, data, max_people):
			
 
				+def show_devs_efforts(args, name, start_date, end_date, people, days, max_people):
			
 
				     from scipy.signal import convolve, slepian
			
 
				 
			
 
				-    days, people = data
			
 
				     start_date = datetime.fromtimestamp(start_date)
			
 
				     start_date = datetime(start_date.year, start_date.month, start_date.day)
			
 
				     end_date = datetime.fromtimestamp(end_date)
			
@@ -1434,13 +1456,18 @@ def show_devs_efforts(args, name, start_date, end_date, data, max_people):
 
				 
			
 
				     efforts = numpy.zeros((len(chosen) + 1, (end_date - start_date).days + 1), dtype=numpy.float32)
			
 
				     for day, devs in days.items():
			
 
				-        for dev, stats in devs.items():
			
 
				-            dev = chosen_order.get(dev, len(chosen_order))
			
 
				-            efforts[dev][day] += stats.Added + stats.Removed + stats.Changed
			
 
				+        if day < efforts.shape[1]:
			
 
				+            for dev, stats in devs.items():
			
 
				+                dev = chosen_order.get(dev, len(chosen_order))
			
 
				+                efforts[dev][day] += stats.Added + stats.Removed + stats.Changed
			
 
				+    efforts_cum = numpy.cumsum(efforts, axis=1)
			
 
				     window = slepian(10, 0.5)
			
 
				     window /= window.sum()
			
 
				-    for i in range(efforts.shape[0]):
			
 
				-        efforts[i] = convolve(efforts[i], window, "same")
			
 
				+    for e in (efforts, efforts_cum):
			
 
				+        for i in range(e.shape[0]):
			
 
				+            ending = e[i][-len(window) * 2:].copy()
			
 
				+            e[i] = convolve(e[i], window, "same")
			
 
				+            e[i][-len(ending):] = ending
			
 
				     matplotlib, pyplot = import_pyplot(args.backend, args.style)
			
 
				     plot_x = [start_date + timedelta(days=i) for i in range(efforts.shape[1])]
			
 
				 
			
@@ -1449,19 +1476,26 @@ def show_devs_efforts(args, name, start_date, end_date, data, max_people):
 
				         if len(name) > 40:
			
 
				             people[i] = name[:37] + "..."
			
 
				 
			
 
				-    polys = pyplot.stackplot(plot_x, efforts, labels=people)
			
 
				+    polys = pyplot.stackplot(plot_x, efforts_cum, labels=people)
			
 
				+    if len(polys) == max_people + 1:
			
 
				+        polys[-1].set_hatch("/")
			
 
				+    polys = pyplot.stackplot(plot_x, -efforts * efforts_cum.max() / efforts.max())
			
 
				     if len(polys) == max_people + 1:
			
 
				         polys[-1].set_hatch("/")
			
 
				-    legend = pyplot.legend(loc=2, fontsize=args.font_size)
			
 
				+    yticks = []
			
 
				+    for tick in pyplot.gca().yaxis.iter_ticks():
			
 
				+        if tick[1] >= 0:
			
 
				+            yticks.append(tick[1])
			
 
				+    pyplot.gca().yaxis.set_ticks(yticks)
			
 
				+    legend = pyplot.legend(loc=2, ncol=2, fontsize=args.font_size)
			
 
				     apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.background,
			
 
				-                     args.font_size, args.size)
			
 
				-    deploy_plot("Efforts through time (changed lines of code)", args.output, args.style)
			
 
				+                     args.font_size, args.size or "16,10")
			
 
				+    deploy_plot("Efforts through time (changed lines of code)", args.output, args.background)
			
 
				 
			
 
				 
			
 
				-def show_old_vs_new(args, name, start_date, end_date, data):
			
 
				+def show_old_vs_new(args, name, start_date, end_date, people, days):
			
 
				     from scipy.signal import convolve, slepian
			
 
				 
			
 
				-    days, people = data
			
 
				     start_date = datetime.fromtimestamp(start_date)
			
 
				     start_date = datetime(start_date.year, start_date.month, start_date.day)
			
 
				     end_date = datetime.fromtimestamp(end_date)
			
@@ -1483,11 +1517,10 @@ def show_old_vs_new(args, name, start_date, end_date, data):
 
				     pyplot.legend(loc=2, fontsize=args.font_size)
			
 
				     for tick in chain(pyplot.gca().xaxis.get_major_ticks(), pyplot.gca().yaxis.get_major_ticks()):
			
 
				         tick.label.set_fontsize(args.font_size)
			
 
				-    deploy_plot("Additions vs changes", args.output, args.style)
			
 
				+    deploy_plot("Additions vs changes", args.output, args.background)
			
 
				 
			
 
				 
			
 
				-def show_languages(args, name, start_date, end_date, data):
			
 
				-    days, people = data
			
 
				+def show_languages(args, name, start_date, end_date, people, days):
			
 
				     devlangs = defaultdict(lambda: defaultdict(lambda: numpy.zeros(3, dtype=int)))
			
 
				     for day, devs in days.items():
			
 
				         for dev, stats in devs.items():
			
@@ -1503,6 +1536,134 @@ def show_languages(args, name, start_date, end_date, data):
 
				                 print("%s: %d" % (lang, vals))
			
 
				 
			
 
				 
			
 
				+class ParallelDevData:
			
 
				+    def __init__(self):
			
 
				+        self.commits_rank = -1
			
 
				+        self.commits = -1
			
 
				+        self.lines_rank = -1
			
 
				+        self.lines = -1
			
 
				+        self.ownership_rank = -1
			
 
				+        self.ownership = -1
			
 
				+        self.couples_index = -1
			
 
				+        self.couples_cluster = -1
			
 
				+        self.commit_coocc_index = -1
			
 
				+        self.commit_coocc_cluster = -1
			
 
				+
			
 
				+    def __str__(self):
			
 
				+        return str(self.__dict__)
			
 
				+
			
 
				+    def __repr__(self):
			
 
				+        return str(self)
			
 
				+
			
 
				+
			
 
				+def load_devs_parallel(ownership, couples, devs, max_people):
			
 
				+    try:
			
 
				+        from hdbscan import HDBSCAN
			
 
				+    except ImportError as e:
			
 
				+        print("Cannot import ortools: %s\nInstall it from "
			
 
				+              "https://developers.google.com/optimization/install/python/" % e)
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    people, owned = ownership
			
 
				+    _, cmatrix = couples
			
 
				+    _, days = devs
			
 
				+
			
 
				+    print("calculating - commits")
			
 
				+    commits = defaultdict(int)
			
 
				+    for day, devs in days.items():
			
 
				+        for dev, stats in devs.items():
			
 
				+            commits[people[dev]] += stats.Commits
			
 
				+    chosen = [k for v, k in sorted(((v, k) for k, v in commits.items()),
			
 
				+                                   reverse=True)[:max_people]]
			
 
				+    result = {k: ParallelDevData() for k in chosen}
			
 
				+    for k, v in result.items():
			
 
				+        v.commits_rank = chosen.index(k)
			
 
				+        v.commits = commits[k]
			
 
				+
			
 
				+    print("calculating - lines")
			
 
				+    lines = defaultdict(int)
			
 
				+    for day, devs in days.items():
			
 
				+        for dev, stats in devs.items():
			
 
				+            lines[people[dev]] += stats.Added + stats.Removed + stats.Changed
			
 
				+    lines_index = {k: i for i, (_, k) in enumerate(sorted(
			
 
				+        ((v, k) for k, v in lines.items() if k in chosen), reverse=True))}
			
 
				+    for k, v in result.items():
			
 
				+        v.lines_rank = lines_index[k]
			
 
				+        v.lines = lines[k]
			
 
				+
			
 
				+    print("calculating - ownership")
			
 
				+    owned_index = {k: i for i, (_, k) in enumerate(sorted(
			
 
				+        ((owned[k][-1].sum(), k) for k in chosen), reverse=True))}
			
 
				+    for k, v in result.items():
			
 
				+        v.ownership_rank = owned_index[k]
			
 
				+        v.ownership = owned[k][-1].sum()
			
 
				+
			
 
				+    print("calculating - couples")
			
 
				+    embeddings = numpy.genfromtxt(fname="couples_people_data.tsv", delimiter="\t")[
			
 
				+        [people.index(k) for k in chosen]]
			
 
				+    embeddings /= numpy.linalg.norm(embeddings, axis=1)[:, None]
			
 
				+    cos = embeddings.dot(embeddings.T)
			
 
				+    cos[cos > 1] = 1  # tiny precision faults
			
 
				+    dists = numpy.zeros((len(chosen) + 1,) * 2)
			
 
				+    dists[:len(chosen), :len(chosen)] = numpy.arccos(cos)
			
 
				+    clusters = HDBSCAN(min_cluster_size=2, metric="precomputed").fit_predict(
			
 
				+        dists[:len(chosen), :len(chosen)])
			
 
				+    for k, v in result.items():
			
 
				+        v.couples_cluster = clusters[chosen.index(k)]
			
 
				+
			
 
				+    couples_order = seriate(dists)
			
 
				+    roll_options = []
			
 
				+    for i in range(len(couples_order)):
			
 
				+        loss = 0
			
 
				+        for k, v in result.items():
			
 
				+            loss += abs(
			
 
				+                v.ownership_rank - (couples_order.index(chosen.index(k)) + i) % len(chosen))
			
 
				+        roll_options.append(loss)
			
 
				+    best_roll = numpy.argmin(roll_options)
			
 
				+    couples_order = list(numpy.roll(couples_order, best_roll))
			
 
				+    for k, v in result.items():
			
 
				+        v.couples_index = couples_order.index(chosen.index(k))
			
 
				+
			
 
				+    print("calculating - commit series")
			
 
				+    dists, devseries, _, orig_route = order_commits(chosen, days, people)
			
 
				+    keys = list(devseries.keys())
			
 
				+    route = [keys[node] for node in orig_route]
			
 
				+    for roll in range(len(route)):
			
 
				+        loss = 0
			
 
				+        for k, v in result.items():
			
 
				+            i = route.index(people.index(k))
			
 
				+            loss += abs(v.couples_index - ((i + roll) % len(route)))
			
 
				+        roll_options[roll] = loss
			
 
				+    best_roll = numpy.argmin(roll_options)
			
 
				+    route = list(numpy.roll(route, best_roll))
			
 
				+    orig_route = list(numpy.roll(orig_route, best_roll))
			
 
				+    clusters = hdbscan_cluster_routed_series(dists, orig_route)
			
 
				+    for k, v in result.items():
			
 
				+        v.commit_coocc_index = route.index(people.index(k))
			
 
				+        v.commit_coocc_index = clusters[v.commit_coocc_index]
			
 
				+
			
 
				+    print(result)
			
 
				+    return result, chosen
			
 
				+
			
 
				+
			
 
				+def show_devs_parallel(args, name, start_date, end_date, data):
			
 
				+    matplotlib, pyplot = import_pyplot(args.backend, args.style)
			
 
				+    pyplot.xlim((0, 6))
			
 
				+    pyplot.ylim((0, 1))
			
 
				+
			
 
				+    x = numpy.linspace(0.1, 0.9, 1000)
			
 
				+    y = numpy.linspace(0.1, 0.9, 1000)
			
 
				+    points = numpy.array([x, y]).T.reshape(-1, 1, 2)
			
 
				+    segments = numpy.concatenate([points[:-1], points[1:]], axis=1)
			
 
				+
			
 
				+    from matplotlib.collections import LineCollection
			
 
				+    lc = LineCollection(segments)
			
 
				+    lc.set_array(numpy.linspace(0, 1, segments.shape[0]))
			
 
				+    pyplot.gca().add_collection(lc)
			
 
				+
			
 
				+    deploy_plot("Developers", args.output, args.background)
			
 
				+
			
 
				+
			
 
				 def _format_number(n):
			
 
				     if n == 0:
			
 
				         return "0"
			
@@ -1655,7 +1816,7 @@ def main():
 
				         except KeyError:
			
 
				             print(devs_warning)
			
 
				             return
			
 
				-        show_devs(args, reader.get_name(), *reader.get_header(), data)
			
 
				+        show_devs(args, reader.get_name(), *reader.get_header(), *data)
			
 
				 
			
 
				     def devs_efforts():
			
 
				         try:
			
@@ -1663,7 +1824,7 @@ def main():
 
				         except KeyError:
			
 
				             print(devs_warning)
			
 
				             return
			
 
				-        show_devs_efforts(args, reader.get_name(), *reader.get_header(), data,
			
 
				+        show_devs_efforts(args, reader.get_name(), *reader.get_header(), *data,
			
 
				                           max_people=args.max_people)
			
 
				 
			
 
				     def old_vs_new():
			
@@ -1672,7 +1833,7 @@ def main():
 
				         except KeyError:
			
 
				             print(devs_warning)
			
 
				             return
			
 
				-        show_old_vs_new(args, reader.get_name(), *reader.get_header(), data)
			
 
				+        show_old_vs_new(args, reader.get_name(), *reader.get_header(), *data)
			
 
				 
			
 
				     def languages():
			
 
				         try:
			
@@ -1680,7 +1841,26 @@ def main():
 
				         except KeyError:
			
 
				             print(devs_warning)
			
 
				             return
			
 
				-        show_languages(args, reader.get_name(), *reader.get_header(), data)
			
 
				+        show_languages(args, reader.get_name(), *reader.get_header(), *data)
			
 
				+
			
 
				+    def devs_parallel():
			
 
				+        try:
			
 
				+            ownership = reader.get_ownership_burndown()
			
 
				+        except KeyError:
			
 
				+            print(burndown_people_warning)
			
 
				+            return
			
 
				+        try:
			
 
				+            couples = reader.get_people_coocc()
			
 
				+        except KeyError:
			
 
				+            print(couples_warning)
			
 
				+            return
			
 
				+        try:
			
 
				+            devs = reader.get_devs()
			
 
				+        except KeyError:
			
 
				+            print(devs_warning)
			
 
				+            return
			
 
				+        show_devs_parallel(args, reader.get_name(), *reader.get_header(),
			
 
				+                           load_devs_parallel(ownership, couples, devs, args.max_people))
			
 
				 
			
 
				     modes = {
			
 
				         "run-times": run_times,
			
@@ -1698,6 +1878,7 @@ def main():
 
				         "devs-efforts": devs_efforts,
			
 
				         "old-vs-new": old_vs_new,
			
 
				         "languages": languages,
			
 
				+        "devs-parallel": devs_parallel,
			
 
				     }
			
 
				     try:
			
 
				         modes[args.mode]()
			
@@ -1715,6 +1896,7 @@ def main():
 
				         sentiment()
			
 
				         devs()
			
 
				         devs_efforts()
			
 
				+        # devs_parallel()
			
 
				 
			
 
				     if web_server.running:
			
 
				         secs = int(os.getenv("COUPLES_SERVER_TIME", "60"))