瀏覽代碼

Merge pull request #227 from vmarkovtsev/master

Efforts plot
Vadim Markovtsev 6 年之前
父節點
當前提交
a989179fca
共有 3 個文件被更改,包括 320 次插入122 次删除
  1. 16 0
      README.md
  2. 二進制
      doc/k8s_efforts.png
  3. 304 122
      labours.py

+ 16 - 0
README.md

@@ -313,6 +313,22 @@ python3 labours.py -m old-vs-new -o <name>
 `--devs` from the previous section allows to plot how many lines were added and how many existing changed
 `--devs` from the previous section allows to plot how many lines were added and how many existing changed
 (deleted or replaced) through time. This plot is smoothed.
 (deleted or replaced) through time. This plot is smoothed.
 
 
+#### Efforts through time
+
+![kubernetes/kubernetes](doc/k8s_efforts.png)
+<p align="center">kubernetes/kubernetes efforts through time.</p>
+
+```
+hercules --devs [-people-dict=/path/to/identities]
+python3 labours.py -m devs-efforts -o <name>
+```
+
+Besides, `--devs` allows to plot how many lines have been changed (added or removed) by each developer.
+The upper part of the plot is an accumulated (integrated) lower part. It is impossible to have the same scale
+for both parts, so the lower values are scaled, and hence there are no lower Y axis ticks.
+There is a difference between the efforts plot and the ownership plot, although changing lines correlate
+with owning lines.
+
 #### Sentiment (positive and negative code)
 #### Sentiment (positive and negative code)
 
 
 ![Django sentiment](doc/sentiment.png)
 ![Django sentiment](doc/sentiment.png)

二進制
doc/k8s_efforts.png


+ 304 - 122
labours.py

@@ -63,7 +63,8 @@ def parse_args():
                         choices=["burndown-project", "burndown-file", "burndown-person",
                         choices=["burndown-project", "burndown-file", "burndown-person",
                                  "churn-matrix", "ownership", "couples-files", "couples-people",
                                  "churn-matrix", "ownership", "couples-files", "couples-people",
                                  "couples-shotness", "shotness", "sentiment", "devs",
                                  "couples-shotness", "shotness", "sentiment", "devs",
-                                 "devs-efforts", "old-vs-new", "all", "run-times", "languages"],
+                                 "devs-efforts", "old-vs-new", "all", "run-times", "languages",
+                                 "devs-parallel"],
                         help="What to plot.")
                         help="What to plot.")
     parser.add_argument(
     parser.add_argument(
         "--resample", default="year",
         "--resample", default="year",
@@ -240,7 +241,7 @@ class YamlReader(Reader):
         days = {int(d): {int(dev): DevDay(*(int(x) for x in day[:-1]), day[-1])
         days = {int(d): {int(dev): DevDay(*(int(x) for x in day[:-1]), day[-1])
                          for dev, day in devs.items()}
                          for dev, day in devs.items()}
                 for d, devs in self.data["Devs"]["days"].items()}
                 for d, devs in self.data["Devs"]["days"].items()}
-        return days, people
+        return people, days
 
 
     def _parse_burndown_matrix(self, matrix):
     def _parse_burndown_matrix(self, matrix):
         return numpy.array([numpy.fromstring(line, dtype=int, sep=" ")
         return numpy.array([numpy.fromstring(line, dtype=int, sep=" ")
@@ -364,7 +365,7 @@ class ProtobufReader(Reader):
                                                       for k, v in stats.languages.items()})
                                                       for k, v in stats.languages.items()})
                     for dev, stats in day.devs.items()}
                     for dev, stats in day.devs.items()}
                 for d, day in self.contents["Devs"].days.items()}
                 for d, day in self.contents["Devs"].days.items()}
-        return days, people
+        return people, days
 
 
     def _parse_burndown_matrix(self, matrix):
     def _parse_burndown_matrix(self, matrix):
         dense = numpy.zeros((matrix.number_of_rows, matrix.number_of_columns), dtype=int)
         dense = numpy.zeros((matrix.number_of_rows, matrix.number_of_columns), dtype=int)
@@ -698,6 +699,7 @@ def import_pyplot(backend, style):
         matplotlib.use(backend)
         matplotlib.use(backend)
     from matplotlib import pyplot
     from matplotlib import pyplot
     pyplot.style.use(style)
     pyplot.style.use(style)
+    print("matplotlib: backend is", matplotlib.get_backend())
     return matplotlib, pyplot
     return matplotlib, pyplot
 
 
 
 
@@ -738,7 +740,7 @@ def get_plot_path(base, name):
     return output
     return output
 
 
 
 
-def deploy_plot(title, output, background):
+def deploy_plot(title, output, background, tight=True):
     import matplotlib.pyplot as pyplot
     import matplotlib.pyplot as pyplot
 
 
     if not output:
     if not output:
@@ -747,10 +749,11 @@ def deploy_plot(title, output, background):
     else:
     else:
         if title:
         if title:
             pyplot.title(title, color="black" if background == "white" else "white")
             pyplot.title(title, color="black" if background == "white" else "white")
-        try:
-            pyplot.tight_layout()
-        except:  # noqa: E722
-            print("Warning: failed to set the tight layout")
+        if tight:
+            try:
+                pyplot.tight_layout()
+            except:  # noqa: E722
+                print("Warning: failed to set the tight layout")
         pyplot.savefig(output, transparent=True)
         pyplot.savefig(output, transparent=True)
     pyplot.clf()
     pyplot.clf()
 
 
@@ -846,7 +849,7 @@ def plot_burndown(args, target, name, matrix, date_range_sampling, labels, granu
             if target == "project":
             if target == "project":
                 name = "project"
                 name = "project"
             output = get_plot_path(args.output, name)
             output = get_plot_path(args.output, name)
-    deploy_plot(title, output, args.style)
+    deploy_plot(title, output, args.background)
 
 
 
 
 def plot_many_burndown(args, target, header, parts):
 def plot_many_burndown(args, target, header, parts):
@@ -905,7 +908,7 @@ def plot_churn_matrix(args, repo, people, matrix):
     if args.output:
     if args.output:
         # FIXME(vmarkovtsev): otherwise the title is screwed in savefig()
         # FIXME(vmarkovtsev): otherwise the title is screwed in savefig()
         title = ""
         title = ""
-    deploy_plot(title, output, args.style)
+    deploy_plot(title, output, args.background)
 
 
 
 
 def plot_ownership(args, repo, names, people, date_range, last):
 def plot_ownership(args, repo, names, people, date_range, last):
@@ -942,7 +945,7 @@ def plot_ownership(args, repo, names, people, date_range, last):
         output = get_plot_path(args.output, "people")
         output = get_plot_path(args.output, "people")
     else:
     else:
         output = args.output
         output = args.output
-    deploy_plot("%s code ownership through time" % repo, output, args.style)
+    deploy_plot("%s code ownership through time" % repo, output, args.background)
 
 
 
 
 IDEAL_SHARD_SIZE = 4096
 IDEAL_SHARD_SIZE = 4096
@@ -1117,7 +1120,7 @@ web_server = CORSWebServer()
 def write_embeddings(name, output, run_server, index, embeddings):
 def write_embeddings(name, output, run_server, index, embeddings):
     print("Writing Tensorflow Projector files...")
     print("Writing Tensorflow Projector files...")
     if not output:
     if not output:
-        output = "couples_" + name
+        output = "couples"
     if output.endswith(".json"):
     if output.endswith(".json"):
         output = os.path.join(output[:-5], "couples")
         output = os.path.join(output[:-5], "couples")
         run_server = False
         run_server = False
@@ -1232,30 +1235,12 @@ def show_sentiment_stats(args, name, resample, start_date, data):
     overall_neg = sum(2 * (d[1].Value - 0.5) for d in data if d[1].Value > 0.5)
     overall_neg = sum(2 * (d[1].Value - 0.5) for d in data if d[1].Value > 0.5)
     title = "%s sentiment +%.1f -%.1f δ=%.1f" % (
     title = "%s sentiment +%.1f -%.1f δ=%.1f" % (
         name, overall_pos, overall_neg, overall_pos - overall_neg)
         name, overall_pos, overall_neg, overall_pos - overall_neg)
-    deploy_plot(title, args.output, args.style)
+    deploy_plot(title, args.output, args.background)
 
 
 
 
-def show_devs(args, name, start_date, end_date, data):
-    try:
-        from fastdtw import fastdtw
-    except ImportError as e:
-        print("Cannot import fastdtw: %s\nInstall it from https://github.com/slaypni/fastdtw" % e)
-        sys.exit(1)
-    try:
-        from ortools.constraint_solver import pywrapcp, routing_enums_pb2
-    except ImportError as e:
-        print("Cannot import ortools: %s\nInstall it from "
-              "https://developers.google.com/optimization/install/python/" % e)
-        sys.exit(1)
-    try:
-        from hdbscan import HDBSCAN
-    except ImportError as e:
-        print("Cannot import ortools: %s\nInstall it from "
-              "https://developers.google.com/optimization/install/python/" % e)
-        sys.exit(1)
+def show_devs(args, name, start_date, end_date, people, days):
     from scipy.signal import convolve, slepian
     from scipy.signal import convolve, slepian
 
 
-    days, people = data
     max_people = 50
     max_people = 50
     if len(people) > max_people:
     if len(people) > max_people:
         print("Picking top 100 developers by commit count")
         print("Picking top 100 developers by commit count")
@@ -1268,77 +1253,12 @@ def show_devs(args, name, start_date, end_date, data):
         chosen_people = {people[k] for _, k in commits[:max_people]}
         chosen_people = {people[k] for _, k in commits[:max_people]}
     else:
     else:
         chosen_people = set(people)
         chosen_people = set(people)
-    devseries = defaultdict(list)
-    devstats = defaultdict(lambda: DevDay(0, 0, 0, 0, {}))
-    for day, devs in sorted(days.items()):
-        for dev, stats in devs.items():
-            if people[dev] in chosen_people:
-                devseries[dev].append((day, stats.Commits))
-                devstats[dev] = devstats[dev].add(stats)
-    print("Calculating the distance matrix")
-    # max-normalize the time series using a sliding window
-    keys = list(devseries.keys())
-    series = list(devseries.values())
-    for i, s in enumerate(series):
-        arr = numpy.array(s).transpose().astype(numpy.float32)
-        commits = arr[1]
-        if len(commits) < 7:
-            commits /= commits.max()
-        else:
-            # 4 is sizeof(float32)
-            windows = numpy.lib.stride_tricks.as_strided(commits, [len(commits) - 6, 7], [4, 4])
-            commits = numpy.concatenate((
-                [windows[0, 0] / windows[0].max(),
-                 windows[0, 1] / windows[0].max(),
-                 windows[0, 2] / windows[0].max()],
-                windows[:, 3] / windows.max(axis=1),
-                [windows[-1, 4] / windows[-1].max(),
-                 windows[-1, 5] / windows[-1].max(),
-                 windows[-1, 6] / windows[-1].max()]
-            ))
-        arr[1] = commits * 7  # 7 is a pure heuristic here and is not related to window size
-        series[i] = list(arr.transpose())
-    # calculate the distance matrix using dynamic time warping metric
-    dists = numpy.full((len(series) + 1, len(series) + 1), -100500, dtype=numpy.float32)
-    for x in range(len(series)):
-        dists[x, x] = 0
-        for y in range(x + 1, len(series)):
-            # L1 norm
-            dist, _ = fastdtw(series[x], series[y], radius=5, dist=1)
-            dists[x, y] = dists[y, x] = dist
-    # preparation for seriation ordering
-    dists[len(series), :] = 0
-    dists[:, len(series)] = 0
-    assert (dists >= 0).all()
-    print("Ordering the series")
-    # solve the TSP on the distance matrix
-    routing = pywrapcp.RoutingModel(dists.shape[0], 1, len(series))
-
-    def dist_callback(x, y):
-        # ortools wants integers, so we approximate here
-        return int(dists[x][y] * 1000)
-
-    routing.SetArcCostEvaluatorOfAllVehicles(dist_callback)
-    search_parameters = pywrapcp.RoutingModel.DefaultSearchParameters()
-    search_parameters.local_search_metaheuristic = (
-        routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH)
-    search_parameters.time_limit_ms = 2000
-    assignment = routing.SolveWithParameters(search_parameters)
-    index = routing.Start(0)
-    route = []
-    while not routing.IsEnd(index):
-        node = routing.IndexToNode(index)
-        if node < len(keys):
-            route.append(node)
-        index = assignment.Value(routing.NextVar(index))
+    dists, devseries, devstats, route = order_commits(chosen_people, days, people)
     route_map = {v: i for i, v in enumerate(route)}
     route_map = {v: i for i, v in enumerate(route)}
-
     # determine clusters
     # determine clusters
-    opt_dist_chain = numpy.cumsum(numpy.array(
-        [0] + [dists[route[i], route[i + 1]] for i in range(len(route) - 1)]))
-    clusters = HDBSCAN(min_cluster_size=2).fit_predict(opt_dist_chain[:, numpy.newaxis])
+    clusters = hdbscan_cluster_routed_series(dists, route)
+    keys = list(devseries.keys())
     route = [keys[node] for node in route]
     route = [keys[node] for node in route]
-
     print("Plotting")
     print("Plotting")
     # smooth time series
     # smooth time series
     start_date = datetime.fromtimestamp(start_date)
     start_date = datetime.fromtimestamp(start_date)
@@ -1363,12 +1283,17 @@ def show_devs(args, name, start_date, end_date, data):
     colors = prop_cycle.by_key()["color"]
     colors = prop_cycle.by_key()["color"]
     fig, axes = pyplot.subplots(final.shape[0], 1)
     fig, axes = pyplot.subplots(final.shape[0], 1)
     backgrounds = ("#C4FFDB", "#FFD0CD") if args.background == "white" else ("#05401C", "#40110E")
     backgrounds = ("#C4FFDB", "#FFD0CD") if args.background == "white" else ("#05401C", "#40110E")
+    max_cluster = numpy.max(clusters)
     for ax, series, cluster, dev_i in zip(axes, final, clusters, route):
     for ax, series, cluster, dev_i in zip(axes, final, clusters, route):
         if cluster >= 0:
         if cluster >= 0:
             color = colors[cluster % len(colors)]
             color = colors[cluster % len(colors)]
+            i = 1
+            while color == "#777777":
+                color = colors[(max_cluster + i) % len(colors)]
+                i += 1
         else:
         else:
             # outlier
             # outlier
-            color = "grey"
+            color = "#777777"
         ax.fill_between(plot_x, series, color=color)
         ax.fill_between(plot_x, series, color=color)
         ax.set_axis_off()
         ax.set_axis_off()
         author = people[dev_i]
         author = people[dev_i]
@@ -1407,13 +1332,110 @@ def show_devs(args, name, start_date, end_date, data):
     axes[-1].set_facecolor((1.0,) * 3 + (0.0,))
     axes[-1].set_facecolor((1.0,) * 3 + (0.0,))
 
 
     title = ("%s commits" % name) if not args.output else ""
     title = ("%s commits" % name) if not args.output else ""
-    deploy_plot(title, args.output, args.style)
+    deploy_plot(title, args.output, args.background)
+
+
+def order_commits(chosen_people, days, people):
+    try:
+        from fastdtw import fastdtw
+    except ImportError as e:
+        print("Cannot import fastdtw: %s\nInstall it from https://github.com/slaypni/fastdtw" % e)
+        sys.exit(1)
+
+    devseries = defaultdict(list)
+    devstats = defaultdict(lambda: DevDay(0, 0, 0, 0, {}))
+    for day, devs in sorted(days.items()):
+        for dev, stats in devs.items():
+            if people[dev] in chosen_people:
+                devseries[dev].append((day, stats.Commits))
+                devstats[dev] = devstats[dev].add(stats)
+    print("Calculating the distance matrix")
+    # max-normalize the time series using a sliding window
+    series = list(devseries.values())
+    for i, s in enumerate(series):
+        arr = numpy.array(s).transpose().astype(numpy.float32)
+        commits = arr[1]
+        if len(commits) < 7:
+            commits /= commits.max()
+        else:
+            # 4 is sizeof(float32)
+            windows = numpy.lib.stride_tricks.as_strided(commits, [len(commits) - 6, 7], [4, 4])
+            commits = numpy.concatenate((
+                [windows[0, 0] / windows[0].max(),
+                 windows[0, 1] / windows[0].max(),
+                 windows[0, 2] / windows[0].max()],
+                windows[:, 3] / windows.max(axis=1),
+                [windows[-1, 4] / windows[-1].max(),
+                 windows[-1, 5] / windows[-1].max(),
+                 windows[-1, 6] / windows[-1].max()]
+            ))
+        arr[1] = commits * 7  # 7 is a pure heuristic here and is not related to window size
+        series[i] = list(arr.transpose())
+    # calculate the distance matrix using dynamic time warping metric
+    dists = numpy.full((len(series) + 1, len(series) + 1), -100500, dtype=numpy.float32)
+    for x in range(len(series)):
+        dists[x, x] = 0
+        for y in range(x + 1, len(series)):
+            # L1 norm
+            dist, _ = fastdtw(series[x], series[y], radius=5, dist=1)
+            dists[x, y] = dists[y, x] = dist
+    # preparation for seriation ordering
+    dists[len(series), :] = 0
+    dists[:, len(series)] = 0
+    assert (dists >= 0).all()
+    print("Ordering the series")
+    route = seriate(dists)
+    return dists, devseries, devstats, route
+
+
+def hdbscan_cluster_routed_series(dists, route):
+    try:
+        from hdbscan import HDBSCAN
+    except ImportError as e:
+        print("Cannot import ortools: %s\nInstall it from "
+              "https://developers.google.com/optimization/install/python/" % e)
+        sys.exit(1)
+
+    opt_dist_chain = numpy.cumsum(numpy.array(
+        [0] + [dists[route[i], route[i + 1]] for i in range(len(route) - 1)]))
+    clusters = HDBSCAN(min_cluster_size=2).fit_predict(opt_dist_chain[:, numpy.newaxis])
+    return clusters
+
+
+def seriate(dists):
+    try:
+        from ortools.constraint_solver import pywrapcp, routing_enums_pb2
+    except ImportError as e:
+        print("Cannot import ortools: %s\nInstall it from "
+              "https://developers.google.com/optimization/install/python/" % e)
+        sys.exit(1)
+
+    # solve the TSP on the distance matrix
+    routing = pywrapcp.RoutingModel(dists.shape[0], 1, dists.shape[0] - 1)
+
+    def dist_callback(x, y):
+        # ortools wants integers, so we approximate here
+        return int(dists[x][y] * 1000)
+
+    routing.SetArcCostEvaluatorOfAllVehicles(dist_callback)
+    search_parameters = pywrapcp.RoutingModel.DefaultSearchParameters()
+    search_parameters.local_search_metaheuristic = (
+        routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH)
+    search_parameters.time_limit_ms = 2000
+    assignment = routing.SolveWithParameters(search_parameters)
+    index = routing.Start(0)
+    route = []
+    while not routing.IsEnd(index):
+        node = routing.IndexToNode(index)
+        if node < dists.shape[0] - 1:
+            route.append(node)
+        index = assignment.Value(routing.NextVar(index))
+    return route
 
 
 
 
-def show_devs_efforts(args, name, start_date, end_date, data, max_people):
+def show_devs_efforts(args, name, start_date, end_date, people, days, max_people):
     from scipy.signal import convolve, slepian
     from scipy.signal import convolve, slepian
 
 
-    days, people = data
     start_date = datetime.fromtimestamp(start_date)
     start_date = datetime.fromtimestamp(start_date)
     start_date = datetime(start_date.year, start_date.month, start_date.day)
     start_date = datetime(start_date.year, start_date.month, start_date.day)
     end_date = datetime.fromtimestamp(end_date)
     end_date = datetime.fromtimestamp(end_date)
@@ -1434,13 +1456,18 @@ def show_devs_efforts(args, name, start_date, end_date, data, max_people):
 
 
     efforts = numpy.zeros((len(chosen) + 1, (end_date - start_date).days + 1), dtype=numpy.float32)
     efforts = numpy.zeros((len(chosen) + 1, (end_date - start_date).days + 1), dtype=numpy.float32)
     for day, devs in days.items():
     for day, devs in days.items():
-        for dev, stats in devs.items():
-            dev = chosen_order.get(dev, len(chosen_order))
-            efforts[dev][day] += stats.Added + stats.Removed + stats.Changed
+        if day < efforts.shape[1]:
+            for dev, stats in devs.items():
+                dev = chosen_order.get(dev, len(chosen_order))
+                efforts[dev][day] += stats.Added + stats.Removed + stats.Changed
+    efforts_cum = numpy.cumsum(efforts, axis=1)
     window = slepian(10, 0.5)
     window = slepian(10, 0.5)
     window /= window.sum()
     window /= window.sum()
-    for i in range(efforts.shape[0]):
-        efforts[i] = convolve(efforts[i], window, "same")
+    for e in (efforts, efforts_cum):
+        for i in range(e.shape[0]):
+            ending = e[i][-len(window) * 2:].copy()
+            e[i] = convolve(e[i], window, "same")
+            e[i][-len(ending):] = ending
     matplotlib, pyplot = import_pyplot(args.backend, args.style)
     matplotlib, pyplot = import_pyplot(args.backend, args.style)
     plot_x = [start_date + timedelta(days=i) for i in range(efforts.shape[1])]
     plot_x = [start_date + timedelta(days=i) for i in range(efforts.shape[1])]
 
 
@@ -1449,19 +1476,26 @@ def show_devs_efforts(args, name, start_date, end_date, data, max_people):
         if len(name) > 40:
         if len(name) > 40:
             people[i] = name[:37] + "..."
             people[i] = name[:37] + "..."
 
 
-    polys = pyplot.stackplot(plot_x, efforts, labels=people)
+    polys = pyplot.stackplot(plot_x, efforts_cum, labels=people)
+    if len(polys) == max_people + 1:
+        polys[-1].set_hatch("/")
+    polys = pyplot.stackplot(plot_x, -efforts * efforts_cum.max() / efforts.max())
     if len(polys) == max_people + 1:
     if len(polys) == max_people + 1:
         polys[-1].set_hatch("/")
         polys[-1].set_hatch("/")
-    legend = pyplot.legend(loc=2, fontsize=args.font_size)
+    yticks = []
+    for tick in pyplot.gca().yaxis.iter_ticks():
+        if tick[1] >= 0:
+            yticks.append(tick[1])
+    pyplot.gca().yaxis.set_ticks(yticks)
+    legend = pyplot.legend(loc=2, ncol=2, fontsize=args.font_size)
     apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.background,
     apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.background,
-                     args.font_size, args.size)
-    deploy_plot("Efforts through time (changed lines of code)", args.output, args.style)
+                     args.font_size, args.size or "16,10")
+    deploy_plot("Efforts through time (changed lines of code)", args.output, args.background)
 
 
 
 
-def show_old_vs_new(args, name, start_date, end_date, data):
+def show_old_vs_new(args, name, start_date, end_date, people, days):
     from scipy.signal import convolve, slepian
     from scipy.signal import convolve, slepian
 
 
-    days, people = data
     start_date = datetime.fromtimestamp(start_date)
     start_date = datetime.fromtimestamp(start_date)
     start_date = datetime(start_date.year, start_date.month, start_date.day)
     start_date = datetime(start_date.year, start_date.month, start_date.day)
     end_date = datetime.fromtimestamp(end_date)
     end_date = datetime.fromtimestamp(end_date)
@@ -1483,11 +1517,10 @@ def show_old_vs_new(args, name, start_date, end_date, data):
     pyplot.legend(loc=2, fontsize=args.font_size)
     pyplot.legend(loc=2, fontsize=args.font_size)
     for tick in chain(pyplot.gca().xaxis.get_major_ticks(), pyplot.gca().yaxis.get_major_ticks()):
     for tick in chain(pyplot.gca().xaxis.get_major_ticks(), pyplot.gca().yaxis.get_major_ticks()):
         tick.label.set_fontsize(args.font_size)
         tick.label.set_fontsize(args.font_size)
-    deploy_plot("Additions vs changes", args.output, args.style)
+    deploy_plot("Additions vs changes", args.output, args.background)
 
 
 
 
-def show_languages(args, name, start_date, end_date, data):
-    days, people = data
+def show_languages(args, name, start_date, end_date, people, days):
     devlangs = defaultdict(lambda: defaultdict(lambda: numpy.zeros(3, dtype=int)))
     devlangs = defaultdict(lambda: defaultdict(lambda: numpy.zeros(3, dtype=int)))
     for day, devs in days.items():
     for day, devs in days.items():
         for dev, stats in devs.items():
         for dev, stats in devs.items():
@@ -1503,6 +1536,134 @@ def show_languages(args, name, start_date, end_date, data):
                 print("%s: %d" % (lang, vals))
                 print("%s: %d" % (lang, vals))
 
 
 
 
+class ParallelDevData:
+    def __init__(self):
+        self.commits_rank = -1
+        self.commits = -1
+        self.lines_rank = -1
+        self.lines = -1
+        self.ownership_rank = -1
+        self.ownership = -1
+        self.couples_index = -1
+        self.couples_cluster = -1
+        self.commit_coocc_index = -1
+        self.commit_coocc_cluster = -1
+
+    def __str__(self):
+        return str(self.__dict__)
+
+    def __repr__(self):
+        return str(self)
+
+
+def load_devs_parallel(ownership, couples, devs, max_people):
+    try:
+        from hdbscan import HDBSCAN
+    except ImportError as e:
+        print("Cannot import ortools: %s\nInstall it from "
+              "https://developers.google.com/optimization/install/python/" % e)
+        sys.exit(1)
+
+    people, owned = ownership
+    _, cmatrix = couples
+    _, days = devs
+
+    print("calculating - commits")
+    commits = defaultdict(int)
+    for day, devs in days.items():
+        for dev, stats in devs.items():
+            commits[people[dev]] += stats.Commits
+    chosen = [k for v, k in sorted(((v, k) for k, v in commits.items()),
+                                   reverse=True)[:max_people]]
+    result = {k: ParallelDevData() for k in chosen}
+    for k, v in result.items():
+        v.commits_rank = chosen.index(k)
+        v.commits = commits[k]
+
+    print("calculating - lines")
+    lines = defaultdict(int)
+    for day, devs in days.items():
+        for dev, stats in devs.items():
+            lines[people[dev]] += stats.Added + stats.Removed + stats.Changed
+    lines_index = {k: i for i, (_, k) in enumerate(sorted(
+        ((v, k) for k, v in lines.items() if k in chosen), reverse=True))}
+    for k, v in result.items():
+        v.lines_rank = lines_index[k]
+        v.lines = lines[k]
+
+    print("calculating - ownership")
+    owned_index = {k: i for i, (_, k) in enumerate(sorted(
+        ((owned[k][-1].sum(), k) for k in chosen), reverse=True))}
+    for k, v in result.items():
+        v.ownership_rank = owned_index[k]
+        v.ownership = owned[k][-1].sum()
+
+    print("calculating - couples")
+    embeddings = numpy.genfromtxt(fname="couples_people_data.tsv", delimiter="\t")[
+        [people.index(k) for k in chosen]]
+    embeddings /= numpy.linalg.norm(embeddings, axis=1)[:, None]
+    cos = embeddings.dot(embeddings.T)
+    cos[cos > 1] = 1  # tiny precision faults
+    dists = numpy.zeros((len(chosen) + 1,) * 2)
+    dists[:len(chosen), :len(chosen)] = numpy.arccos(cos)
+    clusters = HDBSCAN(min_cluster_size=2, metric="precomputed").fit_predict(
+        dists[:len(chosen), :len(chosen)])
+    for k, v in result.items():
+        v.couples_cluster = clusters[chosen.index(k)]
+
+    couples_order = seriate(dists)
+    roll_options = []
+    for i in range(len(couples_order)):
+        loss = 0
+        for k, v in result.items():
+            loss += abs(
+                v.ownership_rank - (couples_order.index(chosen.index(k)) + i) % len(chosen))
+        roll_options.append(loss)
+    best_roll = numpy.argmin(roll_options)
+    couples_order = list(numpy.roll(couples_order, best_roll))
+    for k, v in result.items():
+        v.couples_index = couples_order.index(chosen.index(k))
+
+    print("calculating - commit series")
+    dists, devseries, _, orig_route = order_commits(chosen, days, people)
+    keys = list(devseries.keys())
+    route = [keys[node] for node in orig_route]
+    for roll in range(len(route)):
+        loss = 0
+        for k, v in result.items():
+            i = route.index(people.index(k))
+            loss += abs(v.couples_index - ((i + roll) % len(route)))
+        roll_options[roll] = loss
+    best_roll = numpy.argmin(roll_options)
+    route = list(numpy.roll(route, best_roll))
+    orig_route = list(numpy.roll(orig_route, best_roll))
+    clusters = hdbscan_cluster_routed_series(dists, orig_route)
+    for k, v in result.items():
+        v.commit_coocc_index = route.index(people.index(k))
+        v.commit_coocc_index = clusters[v.commit_coocc_index]
+
+    print(result)
+    return result, chosen
+
+
+def show_devs_parallel(args, name, start_date, end_date, data):
+    matplotlib, pyplot = import_pyplot(args.backend, args.style)
+    pyplot.xlim((0, 6))
+    pyplot.ylim((0, 1))
+
+    x = numpy.linspace(0.1, 0.9, 1000)
+    y = numpy.linspace(0.1, 0.9, 1000)
+    points = numpy.array([x, y]).T.reshape(-1, 1, 2)
+    segments = numpy.concatenate([points[:-1], points[1:]], axis=1)
+
+    from matplotlib.collections import LineCollection
+    lc = LineCollection(segments)
+    lc.set_array(numpy.linspace(0, 1, segments.shape[0]))
+    pyplot.gca().add_collection(lc)
+
+    deploy_plot("Developers", args.output, args.background)
+
+
 def _format_number(n):
 def _format_number(n):
     if n == 0:
     if n == 0:
         return "0"
         return "0"
@@ -1655,7 +1816,7 @@ def main():
         except KeyError:
         except KeyError:
             print(devs_warning)
             print(devs_warning)
             return
             return
-        show_devs(args, reader.get_name(), *reader.get_header(), data)
+        show_devs(args, reader.get_name(), *reader.get_header(), *data)
 
 
     def devs_efforts():
     def devs_efforts():
         try:
         try:
@@ -1663,7 +1824,7 @@ def main():
         except KeyError:
         except KeyError:
             print(devs_warning)
             print(devs_warning)
             return
             return
-        show_devs_efforts(args, reader.get_name(), *reader.get_header(), data,
+        show_devs_efforts(args, reader.get_name(), *reader.get_header(), *data,
                           max_people=args.max_people)
                           max_people=args.max_people)
 
 
     def old_vs_new():
     def old_vs_new():
@@ -1672,7 +1833,7 @@ def main():
         except KeyError:
         except KeyError:
             print(devs_warning)
             print(devs_warning)
             return
             return
-        show_old_vs_new(args, reader.get_name(), *reader.get_header(), data)
+        show_old_vs_new(args, reader.get_name(), *reader.get_header(), *data)
 
 
     def languages():
     def languages():
         try:
         try:
@@ -1680,7 +1841,26 @@ def main():
         except KeyError:
         except KeyError:
             print(devs_warning)
             print(devs_warning)
             return
             return
-        show_languages(args, reader.get_name(), *reader.get_header(), data)
+        show_languages(args, reader.get_name(), *reader.get_header(), *data)
+
+    def devs_parallel():
+        try:
+            ownership = reader.get_ownership_burndown()
+        except KeyError:
+            print(burndown_people_warning)
+            return
+        try:
+            couples = reader.get_people_coocc()
+        except KeyError:
+            print(couples_warning)
+            return
+        try:
+            devs = reader.get_devs()
+        except KeyError:
+            print(devs_warning)
+            return
+        show_devs_parallel(args, reader.get_name(), *reader.get_header(),
+                           load_devs_parallel(ownership, couples, devs, args.max_people))
 
 
     modes = {
     modes = {
         "run-times": run_times,
         "run-times": run_times,
@@ -1698,6 +1878,7 @@ def main():
         "devs-efforts": devs_efforts,
         "devs-efforts": devs_efforts,
         "old-vs-new": old_vs_new,
         "old-vs-new": old_vs_new,
         "languages": languages,
         "languages": languages,
+        "devs-parallel": devs_parallel,
     }
     }
     try:
     try:
         modes[args.mode]()
         modes[args.mode]()
@@ -1715,6 +1896,7 @@ def main():
         sentiment()
         sentiment()
         devs()
         devs()
         devs_efforts()
         devs_efforts()
+        # devs_parallel()
 
 
     if web_server.running:
     if web_server.running:
         secs = int(os.getenv("COUPLES_SERVER_TIME", "60"))
         secs = int(os.getenv("COUPLES_SERVER_TIME", "60"))