|
@@ -63,7 +63,8 @@ def parse_args():
|
|
|
choices=["burndown-project", "burndown-file", "burndown-person",
|
|
|
"churn-matrix", "ownership", "couples-files", "couples-people",
|
|
|
"couples-shotness", "shotness", "sentiment", "devs",
|
|
|
- "devs-efforts", "old-vs-new", "all", "run-times", "languages"],
|
|
|
+ "devs-efforts", "old-vs-new", "all", "run-times", "languages",
|
|
|
+ "devs-parallel"],
|
|
|
help="What to plot.")
|
|
|
parser.add_argument(
|
|
|
"--resample", default="year",
|
|
@@ -240,7 +241,7 @@ class YamlReader(Reader):
|
|
|
days = {int(d): {int(dev): DevDay(*(int(x) for x in day[:-1]), day[-1])
|
|
|
for dev, day in devs.items()}
|
|
|
for d, devs in self.data["Devs"]["days"].items()}
|
|
|
- return days, people
|
|
|
+ return people, days
|
|
|
|
|
|
def _parse_burndown_matrix(self, matrix):
|
|
|
return numpy.array([numpy.fromstring(line, dtype=int, sep=" ")
|
|
@@ -364,7 +365,7 @@ class ProtobufReader(Reader):
|
|
|
for k, v in stats.languages.items()})
|
|
|
for dev, stats in day.devs.items()}
|
|
|
for d, day in self.contents["Devs"].days.items()}
|
|
|
- return days, people
|
|
|
+ return people, days
|
|
|
|
|
|
def _parse_burndown_matrix(self, matrix):
|
|
|
dense = numpy.zeros((matrix.number_of_rows, matrix.number_of_columns), dtype=int)
|
|
@@ -698,6 +699,7 @@ def import_pyplot(backend, style):
|
|
|
matplotlib.use(backend)
|
|
|
from matplotlib import pyplot
|
|
|
pyplot.style.use(style)
|
|
|
+ print("matplotlib: backend is", matplotlib.get_backend())
|
|
|
return matplotlib, pyplot
|
|
|
|
|
|
|
|
@@ -738,7 +740,7 @@ def get_plot_path(base, name):
|
|
|
return output
|
|
|
|
|
|
|
|
|
-def deploy_plot(title, output, background):
|
|
|
+def deploy_plot(title, output, background, tight=True):
|
|
|
import matplotlib.pyplot as pyplot
|
|
|
|
|
|
if not output:
|
|
@@ -747,10 +749,11 @@ def deploy_plot(title, output, background):
|
|
|
else:
|
|
|
if title:
|
|
|
pyplot.title(title, color="black" if background == "white" else "white")
|
|
|
- try:
|
|
|
- pyplot.tight_layout()
|
|
|
- except: # noqa: E722
|
|
|
- print("Warning: failed to set the tight layout")
|
|
|
+ if tight:
|
|
|
+ try:
|
|
|
+ pyplot.tight_layout()
|
|
|
+ except: # noqa: E722
|
|
|
+ print("Warning: failed to set the tight layout")
|
|
|
pyplot.savefig(output, transparent=True)
|
|
|
pyplot.clf()
|
|
|
|
|
@@ -846,7 +849,7 @@ def plot_burndown(args, target, name, matrix, date_range_sampling, labels, granu
|
|
|
if target == "project":
|
|
|
name = "project"
|
|
|
output = get_plot_path(args.output, name)
|
|
|
- deploy_plot(title, output, args.style)
|
|
|
+ deploy_plot(title, output, args.background)
|
|
|
|
|
|
|
|
|
def plot_many_burndown(args, target, header, parts):
|
|
@@ -905,7 +908,7 @@ def plot_churn_matrix(args, repo, people, matrix):
|
|
|
if args.output:
|
|
|
# FIXME(vmarkovtsev): otherwise the title is screwed in savefig()
|
|
|
title = ""
|
|
|
- deploy_plot(title, output, args.style)
|
|
|
+ deploy_plot(title, output, args.background)
|
|
|
|
|
|
|
|
|
def plot_ownership(args, repo, names, people, date_range, last):
|
|
@@ -942,7 +945,7 @@ def plot_ownership(args, repo, names, people, date_range, last):
|
|
|
output = get_plot_path(args.output, "people")
|
|
|
else:
|
|
|
output = args.output
|
|
|
- deploy_plot("%s code ownership through time" % repo, output, args.style)
|
|
|
+ deploy_plot("%s code ownership through time" % repo, output, args.background)
|
|
|
|
|
|
|
|
|
IDEAL_SHARD_SIZE = 4096
|
|
@@ -1117,7 +1120,7 @@ web_server = CORSWebServer()
|
|
|
def write_embeddings(name, output, run_server, index, embeddings):
|
|
|
print("Writing Tensorflow Projector files...")
|
|
|
if not output:
|
|
|
- output = "couples_" + name
|
|
|
+ output = "couples"
|
|
|
if output.endswith(".json"):
|
|
|
output = os.path.join(output[:-5], "couples")
|
|
|
run_server = False
|
|
@@ -1232,30 +1235,12 @@ def show_sentiment_stats(args, name, resample, start_date, data):
|
|
|
overall_neg = sum(2 * (d[1].Value - 0.5) for d in data if d[1].Value > 0.5)
|
|
|
title = "%s sentiment +%.1f -%.1f δ=%.1f" % (
|
|
|
name, overall_pos, overall_neg, overall_pos - overall_neg)
|
|
|
- deploy_plot(title, args.output, args.style)
|
|
|
+ deploy_plot(title, args.output, args.background)
|
|
|
|
|
|
|
|
|
-def show_devs(args, name, start_date, end_date, data):
|
|
|
- try:
|
|
|
- from fastdtw import fastdtw
|
|
|
- except ImportError as e:
|
|
|
- print("Cannot import fastdtw: %s\nInstall it from https://github.com/slaypni/fastdtw" % e)
|
|
|
- sys.exit(1)
|
|
|
- try:
|
|
|
- from ortools.constraint_solver import pywrapcp, routing_enums_pb2
|
|
|
- except ImportError as e:
|
|
|
- print("Cannot import ortools: %s\nInstall it from "
|
|
|
- "https://developers.google.com/optimization/install/python/" % e)
|
|
|
- sys.exit(1)
|
|
|
- try:
|
|
|
- from hdbscan import HDBSCAN
|
|
|
- except ImportError as e:
|
|
|
- print("Cannot import ortools: %s\nInstall it from "
|
|
|
- "https://developers.google.com/optimization/install/python/" % e)
|
|
|
- sys.exit(1)
|
|
|
+def show_devs(args, name, start_date, end_date, people, days):
|
|
|
from scipy.signal import convolve, slepian
|
|
|
|
|
|
- days, people = data
|
|
|
max_people = 50
|
|
|
if len(people) > max_people:
|
|
|
print("Picking top 100 developers by commit count")
|
|
@@ -1268,77 +1253,12 @@ def show_devs(args, name, start_date, end_date, data):
|
|
|
chosen_people = {people[k] for _, k in commits[:max_people]}
|
|
|
else:
|
|
|
chosen_people = set(people)
|
|
|
- devseries = defaultdict(list)
|
|
|
- devstats = defaultdict(lambda: DevDay(0, 0, 0, 0, {}))
|
|
|
- for day, devs in sorted(days.items()):
|
|
|
- for dev, stats in devs.items():
|
|
|
- if people[dev] in chosen_people:
|
|
|
- devseries[dev].append((day, stats.Commits))
|
|
|
- devstats[dev] = devstats[dev].add(stats)
|
|
|
- print("Calculating the distance matrix")
|
|
|
- # max-normalize the time series using a sliding window
|
|
|
- keys = list(devseries.keys())
|
|
|
- series = list(devseries.values())
|
|
|
- for i, s in enumerate(series):
|
|
|
- arr = numpy.array(s).transpose().astype(numpy.float32)
|
|
|
- commits = arr[1]
|
|
|
- if len(commits) < 7:
|
|
|
- commits /= commits.max()
|
|
|
- else:
|
|
|
- # 4 is sizeof(float32)
|
|
|
- windows = numpy.lib.stride_tricks.as_strided(commits, [len(commits) - 6, 7], [4, 4])
|
|
|
- commits = numpy.concatenate((
|
|
|
- [windows[0, 0] / windows[0].max(),
|
|
|
- windows[0, 1] / windows[0].max(),
|
|
|
- windows[0, 2] / windows[0].max()],
|
|
|
- windows[:, 3] / windows.max(axis=1),
|
|
|
- [windows[-1, 4] / windows[-1].max(),
|
|
|
- windows[-1, 5] / windows[-1].max(),
|
|
|
- windows[-1, 6] / windows[-1].max()]
|
|
|
- ))
|
|
|
- arr[1] = commits * 7 # 7 is a pure heuristic here and is not related to window size
|
|
|
- series[i] = list(arr.transpose())
|
|
|
- # calculate the distance matrix using dynamic time warping metric
|
|
|
- dists = numpy.full((len(series) + 1, len(series) + 1), -100500, dtype=numpy.float32)
|
|
|
- for x in range(len(series)):
|
|
|
- dists[x, x] = 0
|
|
|
- for y in range(x + 1, len(series)):
|
|
|
- # L1 norm
|
|
|
- dist, _ = fastdtw(series[x], series[y], radius=5, dist=1)
|
|
|
- dists[x, y] = dists[y, x] = dist
|
|
|
- # preparation for seriation ordering
|
|
|
- dists[len(series), :] = 0
|
|
|
- dists[:, len(series)] = 0
|
|
|
- assert (dists >= 0).all()
|
|
|
- print("Ordering the series")
|
|
|
- # solve the TSP on the distance matrix
|
|
|
- routing = pywrapcp.RoutingModel(dists.shape[0], 1, len(series))
|
|
|
-
|
|
|
- def dist_callback(x, y):
|
|
|
- # ortools wants integers, so we approximate here
|
|
|
- return int(dists[x][y] * 1000)
|
|
|
-
|
|
|
- routing.SetArcCostEvaluatorOfAllVehicles(dist_callback)
|
|
|
- search_parameters = pywrapcp.RoutingModel.DefaultSearchParameters()
|
|
|
- search_parameters.local_search_metaheuristic = (
|
|
|
- routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH)
|
|
|
- search_parameters.time_limit_ms = 2000
|
|
|
- assignment = routing.SolveWithParameters(search_parameters)
|
|
|
- index = routing.Start(0)
|
|
|
- route = []
|
|
|
- while not routing.IsEnd(index):
|
|
|
- node = routing.IndexToNode(index)
|
|
|
- if node < len(keys):
|
|
|
- route.append(node)
|
|
|
- index = assignment.Value(routing.NextVar(index))
|
|
|
+ dists, devseries, devstats, route = order_commits(chosen_people, days, people)
|
|
|
route_map = {v: i for i, v in enumerate(route)}
|
|
|
-
|
|
|
# determine clusters
|
|
|
- opt_dist_chain = numpy.cumsum(numpy.array(
|
|
|
- [0] + [dists[route[i], route[i + 1]] for i in range(len(route) - 1)]))
|
|
|
- clusters = HDBSCAN(min_cluster_size=2).fit_predict(opt_dist_chain[:, numpy.newaxis])
|
|
|
+ clusters = hdbscan_cluster_routed_series(dists, route)
|
|
|
+ keys = list(devseries.keys())
|
|
|
route = [keys[node] for node in route]
|
|
|
-
|
|
|
print("Plotting")
|
|
|
# smooth time series
|
|
|
start_date = datetime.fromtimestamp(start_date)
|
|
@@ -1363,12 +1283,17 @@ def show_devs(args, name, start_date, end_date, data):
|
|
|
colors = prop_cycle.by_key()["color"]
|
|
|
fig, axes = pyplot.subplots(final.shape[0], 1)
|
|
|
backgrounds = ("#C4FFDB", "#FFD0CD") if args.background == "white" else ("#05401C", "#40110E")
|
|
|
+ max_cluster = numpy.max(clusters)
|
|
|
for ax, series, cluster, dev_i in zip(axes, final, clusters, route):
|
|
|
if cluster >= 0:
|
|
|
color = colors[cluster % len(colors)]
|
|
|
+ i = 1
|
|
|
+ while color == "#777777":
|
|
|
+ color = colors[(max_cluster + i) % len(colors)]
|
|
|
+ i += 1
|
|
|
else:
|
|
|
# outlier
|
|
|
- color = "grey"
|
|
|
+ color = "#777777"
|
|
|
ax.fill_between(plot_x, series, color=color)
|
|
|
ax.set_axis_off()
|
|
|
author = people[dev_i]
|
|
@@ -1407,13 +1332,110 @@ def show_devs(args, name, start_date, end_date, data):
|
|
|
axes[-1].set_facecolor((1.0,) * 3 + (0.0,))
|
|
|
|
|
|
title = ("%s commits" % name) if not args.output else ""
|
|
|
- deploy_plot(title, args.output, args.style)
|
|
|
+ deploy_plot(title, args.output, args.background)
|
|
|
+
|
|
|
+
|
|
|
+def order_commits(chosen_people, days, people):
|
|
|
+ try:
|
|
|
+ from fastdtw import fastdtw
|
|
|
+ except ImportError as e:
|
|
|
+ print("Cannot import fastdtw: %s\nInstall it from https://github.com/slaypni/fastdtw" % e)
|
|
|
+ sys.exit(1)
|
|
|
+
|
|
|
+ devseries = defaultdict(list)
|
|
|
+ devstats = defaultdict(lambda: DevDay(0, 0, 0, 0, {}))
|
|
|
+ for day, devs in sorted(days.items()):
|
|
|
+ for dev, stats in devs.items():
|
|
|
+ if people[dev] in chosen_people:
|
|
|
+ devseries[dev].append((day, stats.Commits))
|
|
|
+ devstats[dev] = devstats[dev].add(stats)
|
|
|
+ print("Calculating the distance matrix")
|
|
|
+ # max-normalize the time series using a sliding window
|
|
|
+ series = list(devseries.values())
|
|
|
+ for i, s in enumerate(series):
|
|
|
+ arr = numpy.array(s).transpose().astype(numpy.float32)
|
|
|
+ commits = arr[1]
|
|
|
+ if len(commits) < 7:
|
|
|
+ commits /= commits.max()
|
|
|
+ else:
|
|
|
+ # 4 is sizeof(float32)
|
|
|
+ windows = numpy.lib.stride_tricks.as_strided(commits, [len(commits) - 6, 7], [4, 4])
|
|
|
+ commits = numpy.concatenate((
|
|
|
+ [windows[0, 0] / windows[0].max(),
|
|
|
+ windows[0, 1] / windows[0].max(),
|
|
|
+ windows[0, 2] / windows[0].max()],
|
|
|
+ windows[:, 3] / windows.max(axis=1),
|
|
|
+ [windows[-1, 4] / windows[-1].max(),
|
|
|
+ windows[-1, 5] / windows[-1].max(),
|
|
|
+ windows[-1, 6] / windows[-1].max()]
|
|
|
+ ))
|
|
|
+ arr[1] = commits * 7 # 7 is a pure heuristic here and is not related to window size
|
|
|
+ series[i] = list(arr.transpose())
|
|
|
+ # calculate the distance matrix using dynamic time warping metric
|
|
|
+ dists = numpy.full((len(series) + 1, len(series) + 1), -100500, dtype=numpy.float32)
|
|
|
+ for x in range(len(series)):
|
|
|
+ dists[x, x] = 0
|
|
|
+ for y in range(x + 1, len(series)):
|
|
|
+ # L1 norm
|
|
|
+ dist, _ = fastdtw(series[x], series[y], radius=5, dist=1)
|
|
|
+ dists[x, y] = dists[y, x] = dist
|
|
|
+ # preparation for seriation ordering
|
|
|
+ dists[len(series), :] = 0
|
|
|
+ dists[:, len(series)] = 0
|
|
|
+ assert (dists >= 0).all()
|
|
|
+ print("Ordering the series")
|
|
|
+ route = seriate(dists)
|
|
|
+ return dists, devseries, devstats, route
|
|
|
+
|
|
|
+
|
|
|
+def hdbscan_cluster_routed_series(dists, route):
|
|
|
+ try:
|
|
|
+ from hdbscan import HDBSCAN
|
|
|
+ except ImportError as e:
|
|
|
+ print("Cannot import ortools: %s\nInstall it from "
|
|
|
+ "https://developers.google.com/optimization/install/python/" % e)
|
|
|
+ sys.exit(1)
|
|
|
+
|
|
|
+ opt_dist_chain = numpy.cumsum(numpy.array(
|
|
|
+ [0] + [dists[route[i], route[i + 1]] for i in range(len(route) - 1)]))
|
|
|
+ clusters = HDBSCAN(min_cluster_size=2).fit_predict(opt_dist_chain[:, numpy.newaxis])
|
|
|
+ return clusters
|
|
|
+
|
|
|
+
|
|
|
+def seriate(dists):
|
|
|
+ try:
|
|
|
+ from ortools.constraint_solver import pywrapcp, routing_enums_pb2
|
|
|
+ except ImportError as e:
|
|
|
+ print("Cannot import ortools: %s\nInstall it from "
|
|
|
+ "https://developers.google.com/optimization/install/python/" % e)
|
|
|
+ sys.exit(1)
|
|
|
+
|
|
|
+ # solve the TSP on the distance matrix
|
|
|
+ routing = pywrapcp.RoutingModel(dists.shape[0], 1, dists.shape[0] - 1)
|
|
|
+
|
|
|
+ def dist_callback(x, y):
|
|
|
+ # ortools wants integers, so we approximate here
|
|
|
+ return int(dists[x][y] * 1000)
|
|
|
+
|
|
|
+ routing.SetArcCostEvaluatorOfAllVehicles(dist_callback)
|
|
|
+ search_parameters = pywrapcp.RoutingModel.DefaultSearchParameters()
|
|
|
+ search_parameters.local_search_metaheuristic = (
|
|
|
+ routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH)
|
|
|
+ search_parameters.time_limit_ms = 2000
|
|
|
+ assignment = routing.SolveWithParameters(search_parameters)
|
|
|
+ index = routing.Start(0)
|
|
|
+ route = []
|
|
|
+ while not routing.IsEnd(index):
|
|
|
+ node = routing.IndexToNode(index)
|
|
|
+ if node < dists.shape[0] - 1:
|
|
|
+ route.append(node)
|
|
|
+ index = assignment.Value(routing.NextVar(index))
|
|
|
+ return route
|
|
|
|
|
|
|
|
|
-def show_devs_efforts(args, name, start_date, end_date, data, max_people):
|
|
|
+def show_devs_efforts(args, name, start_date, end_date, people, days, max_people):
|
|
|
from scipy.signal import convolve, slepian
|
|
|
|
|
|
- days, people = data
|
|
|
start_date = datetime.fromtimestamp(start_date)
|
|
|
start_date = datetime(start_date.year, start_date.month, start_date.day)
|
|
|
end_date = datetime.fromtimestamp(end_date)
|
|
@@ -1434,13 +1456,18 @@ def show_devs_efforts(args, name, start_date, end_date, data, max_people):
|
|
|
|
|
|
efforts = numpy.zeros((len(chosen) + 1, (end_date - start_date).days + 1), dtype=numpy.float32)
|
|
|
for day, devs in days.items():
|
|
|
- for dev, stats in devs.items():
|
|
|
- dev = chosen_order.get(dev, len(chosen_order))
|
|
|
- efforts[dev][day] += stats.Added + stats.Removed + stats.Changed
|
|
|
+ if day < efforts.shape[1]:
|
|
|
+ for dev, stats in devs.items():
|
|
|
+ dev = chosen_order.get(dev, len(chosen_order))
|
|
|
+ efforts[dev][day] += stats.Added + stats.Removed + stats.Changed
|
|
|
+ efforts_cum = numpy.cumsum(efforts, axis=1)
|
|
|
window = slepian(10, 0.5)
|
|
|
window /= window.sum()
|
|
|
- for i in range(efforts.shape[0]):
|
|
|
- efforts[i] = convolve(efforts[i], window, "same")
|
|
|
+ for e in (efforts, efforts_cum):
|
|
|
+ for i in range(e.shape[0]):
|
|
|
+ ending = e[i][-len(window) * 2:].copy()
|
|
|
+ e[i] = convolve(e[i], window, "same")
|
|
|
+ e[i][-len(ending):] = ending
|
|
|
matplotlib, pyplot = import_pyplot(args.backend, args.style)
|
|
|
plot_x = [start_date + timedelta(days=i) for i in range(efforts.shape[1])]
|
|
|
|
|
@@ -1449,19 +1476,26 @@ def show_devs_efforts(args, name, start_date, end_date, data, max_people):
|
|
|
if len(name) > 40:
|
|
|
people[i] = name[:37] + "..."
|
|
|
|
|
|
- polys = pyplot.stackplot(plot_x, efforts, labels=people)
|
|
|
+ polys = pyplot.stackplot(plot_x, efforts_cum, labels=people)
|
|
|
+ if len(polys) == max_people + 1:
|
|
|
+ polys[-1].set_hatch("/")
|
|
|
+ polys = pyplot.stackplot(plot_x, -efforts * efforts_cum.max() / efforts.max())
|
|
|
if len(polys) == max_people + 1:
|
|
|
polys[-1].set_hatch("/")
|
|
|
- legend = pyplot.legend(loc=2, fontsize=args.font_size)
|
|
|
+ yticks = []
|
|
|
+ for tick in pyplot.gca().yaxis.iter_ticks():
|
|
|
+ if tick[1] >= 0:
|
|
|
+ yticks.append(tick[1])
|
|
|
+ pyplot.gca().yaxis.set_ticks(yticks)
|
|
|
+ legend = pyplot.legend(loc=2, ncol=2, fontsize=args.font_size)
|
|
|
apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.background,
|
|
|
- args.font_size, args.size)
|
|
|
- deploy_plot("Efforts through time (changed lines of code)", args.output, args.style)
|
|
|
+ args.font_size, args.size or "16,10")
|
|
|
+ deploy_plot("Efforts through time (changed lines of code)", args.output, args.background)
|
|
|
|
|
|
|
|
|
-def show_old_vs_new(args, name, start_date, end_date, data):
|
|
|
+def show_old_vs_new(args, name, start_date, end_date, people, days):
|
|
|
from scipy.signal import convolve, slepian
|
|
|
|
|
|
- days, people = data
|
|
|
start_date = datetime.fromtimestamp(start_date)
|
|
|
start_date = datetime(start_date.year, start_date.month, start_date.day)
|
|
|
end_date = datetime.fromtimestamp(end_date)
|
|
@@ -1483,11 +1517,10 @@ def show_old_vs_new(args, name, start_date, end_date, data):
|
|
|
pyplot.legend(loc=2, fontsize=args.font_size)
|
|
|
for tick in chain(pyplot.gca().xaxis.get_major_ticks(), pyplot.gca().yaxis.get_major_ticks()):
|
|
|
tick.label.set_fontsize(args.font_size)
|
|
|
- deploy_plot("Additions vs changes", args.output, args.style)
|
|
|
+ deploy_plot("Additions vs changes", args.output, args.background)
|
|
|
|
|
|
|
|
|
-def show_languages(args, name, start_date, end_date, data):
|
|
|
- days, people = data
|
|
|
+def show_languages(args, name, start_date, end_date, people, days):
|
|
|
devlangs = defaultdict(lambda: defaultdict(lambda: numpy.zeros(3, dtype=int)))
|
|
|
for day, devs in days.items():
|
|
|
for dev, stats in devs.items():
|
|
@@ -1503,6 +1536,134 @@ def show_languages(args, name, start_date, end_date, data):
|
|
|
print("%s: %d" % (lang, vals))
|
|
|
|
|
|
|
|
|
+class ParallelDevData:
|
|
|
+ def __init__(self):
|
|
|
+ self.commits_rank = -1
|
|
|
+ self.commits = -1
|
|
|
+ self.lines_rank = -1
|
|
|
+ self.lines = -1
|
|
|
+ self.ownership_rank = -1
|
|
|
+ self.ownership = -1
|
|
|
+ self.couples_index = -1
|
|
|
+ self.couples_cluster = -1
|
|
|
+ self.commit_coocc_index = -1
|
|
|
+ self.commit_coocc_cluster = -1
|
|
|
+
|
|
|
+ def __str__(self):
|
|
|
+ return str(self.__dict__)
|
|
|
+
|
|
|
+ def __repr__(self):
|
|
|
+ return str(self)
|
|
|
+
|
|
|
+
|
|
|
+def load_devs_parallel(ownership, couples, devs, max_people):
|
|
|
+ try:
|
|
|
+ from hdbscan import HDBSCAN
|
|
|
+ except ImportError as e:
|
|
|
+ print("Cannot import ortools: %s\nInstall it from "
|
|
|
+ "https://developers.google.com/optimization/install/python/" % e)
|
|
|
+ sys.exit(1)
|
|
|
+
|
|
|
+ people, owned = ownership
|
|
|
+ _, cmatrix = couples
|
|
|
+ _, days = devs
|
|
|
+
|
|
|
+ print("calculating - commits")
|
|
|
+ commits = defaultdict(int)
|
|
|
+ for day, devs in days.items():
|
|
|
+ for dev, stats in devs.items():
|
|
|
+ commits[people[dev]] += stats.Commits
|
|
|
+ chosen = [k for v, k in sorted(((v, k) for k, v in commits.items()),
|
|
|
+ reverse=True)[:max_people]]
|
|
|
+ result = {k: ParallelDevData() for k in chosen}
|
|
|
+ for k, v in result.items():
|
|
|
+ v.commits_rank = chosen.index(k)
|
|
|
+ v.commits = commits[k]
|
|
|
+
|
|
|
+ print("calculating - lines")
|
|
|
+ lines = defaultdict(int)
|
|
|
+ for day, devs in days.items():
|
|
|
+ for dev, stats in devs.items():
|
|
|
+ lines[people[dev]] += stats.Added + stats.Removed + stats.Changed
|
|
|
+ lines_index = {k: i for i, (_, k) in enumerate(sorted(
|
|
|
+ ((v, k) for k, v in lines.items() if k in chosen), reverse=True))}
|
|
|
+ for k, v in result.items():
|
|
|
+ v.lines_rank = lines_index[k]
|
|
|
+ v.lines = lines[k]
|
|
|
+
|
|
|
+ print("calculating - ownership")
|
|
|
+ owned_index = {k: i for i, (_, k) in enumerate(sorted(
|
|
|
+ ((owned[k][-1].sum(), k) for k in chosen), reverse=True))}
|
|
|
+ for k, v in result.items():
|
|
|
+ v.ownership_rank = owned_index[k]
|
|
|
+ v.ownership = owned[k][-1].sum()
|
|
|
+
|
|
|
+ print("calculating - couples")
|
|
|
+ embeddings = numpy.genfromtxt(fname="couples_people_data.tsv", delimiter="\t")[
|
|
|
+ [people.index(k) for k in chosen]]
|
|
|
+ embeddings /= numpy.linalg.norm(embeddings, axis=1)[:, None]
|
|
|
+ cos = embeddings.dot(embeddings.T)
|
|
|
+ cos[cos > 1] = 1 # tiny precision faults
|
|
|
+ dists = numpy.zeros((len(chosen) + 1,) * 2)
|
|
|
+ dists[:len(chosen), :len(chosen)] = numpy.arccos(cos)
|
|
|
+ clusters = HDBSCAN(min_cluster_size=2, metric="precomputed").fit_predict(
|
|
|
+ dists[:len(chosen), :len(chosen)])
|
|
|
+ for k, v in result.items():
|
|
|
+ v.couples_cluster = clusters[chosen.index(k)]
|
|
|
+
|
|
|
+ couples_order = seriate(dists)
|
|
|
+ roll_options = []
|
|
|
+ for i in range(len(couples_order)):
|
|
|
+ loss = 0
|
|
|
+ for k, v in result.items():
|
|
|
+ loss += abs(
|
|
|
+ v.ownership_rank - (couples_order.index(chosen.index(k)) + i) % len(chosen))
|
|
|
+ roll_options.append(loss)
|
|
|
+ best_roll = numpy.argmin(roll_options)
|
|
|
+ couples_order = list(numpy.roll(couples_order, best_roll))
|
|
|
+ for k, v in result.items():
|
|
|
+ v.couples_index = couples_order.index(chosen.index(k))
|
|
|
+
|
|
|
+ print("calculating - commit series")
|
|
|
+ dists, devseries, _, orig_route = order_commits(chosen, days, people)
|
|
|
+ keys = list(devseries.keys())
|
|
|
+ route = [keys[node] for node in orig_route]
|
|
|
+ for roll in range(len(route)):
|
|
|
+ loss = 0
|
|
|
+ for k, v in result.items():
|
|
|
+ i = route.index(people.index(k))
|
|
|
+ loss += abs(v.couples_index - ((i + roll) % len(route)))
|
|
|
+ roll_options[roll] = loss
|
|
|
+ best_roll = numpy.argmin(roll_options)
|
|
|
+ route = list(numpy.roll(route, best_roll))
|
|
|
+ orig_route = list(numpy.roll(orig_route, best_roll))
|
|
|
+ clusters = hdbscan_cluster_routed_series(dists, orig_route)
|
|
|
+ for k, v in result.items():
|
|
|
+ v.commit_coocc_index = route.index(people.index(k))
|
|
|
+ v.commit_coocc_index = clusters[v.commit_coocc_index]
|
|
|
+
|
|
|
+ print(result)
|
|
|
+ return result, chosen
|
|
|
+
|
|
|
+
|
|
|
+def show_devs_parallel(args, name, start_date, end_date, data):
|
|
|
+ matplotlib, pyplot = import_pyplot(args.backend, args.style)
|
|
|
+ pyplot.xlim((0, 6))
|
|
|
+ pyplot.ylim((0, 1))
|
|
|
+
|
|
|
+ x = numpy.linspace(0.1, 0.9, 1000)
|
|
|
+ y = numpy.linspace(0.1, 0.9, 1000)
|
|
|
+ points = numpy.array([x, y]).T.reshape(-1, 1, 2)
|
|
|
+ segments = numpy.concatenate([points[:-1], points[1:]], axis=1)
|
|
|
+
|
|
|
+ from matplotlib.collections import LineCollection
|
|
|
+ lc = LineCollection(segments)
|
|
|
+ lc.set_array(numpy.linspace(0, 1, segments.shape[0]))
|
|
|
+ pyplot.gca().add_collection(lc)
|
|
|
+
|
|
|
+ deploy_plot("Developers", args.output, args.background)
|
|
|
+
|
|
|
+
|
|
|
def _format_number(n):
|
|
|
if n == 0:
|
|
|
return "0"
|
|
@@ -1655,7 +1816,7 @@ def main():
|
|
|
except KeyError:
|
|
|
print(devs_warning)
|
|
|
return
|
|
|
- show_devs(args, reader.get_name(), *reader.get_header(), data)
|
|
|
+ show_devs(args, reader.get_name(), *reader.get_header(), *data)
|
|
|
|
|
|
def devs_efforts():
|
|
|
try:
|
|
@@ -1663,7 +1824,7 @@ def main():
|
|
|
except KeyError:
|
|
|
print(devs_warning)
|
|
|
return
|
|
|
- show_devs_efforts(args, reader.get_name(), *reader.get_header(), data,
|
|
|
+ show_devs_efforts(args, reader.get_name(), *reader.get_header(), *data,
|
|
|
max_people=args.max_people)
|
|
|
|
|
|
def old_vs_new():
|
|
@@ -1672,7 +1833,7 @@ def main():
|
|
|
except KeyError:
|
|
|
print(devs_warning)
|
|
|
return
|
|
|
- show_old_vs_new(args, reader.get_name(), *reader.get_header(), data)
|
|
|
+ show_old_vs_new(args, reader.get_name(), *reader.get_header(), *data)
|
|
|
|
|
|
def languages():
|
|
|
try:
|
|
@@ -1680,7 +1841,26 @@ def main():
|
|
|
except KeyError:
|
|
|
print(devs_warning)
|
|
|
return
|
|
|
- show_languages(args, reader.get_name(), *reader.get_header(), data)
|
|
|
+ show_languages(args, reader.get_name(), *reader.get_header(), *data)
|
|
|
+
|
|
|
+ def devs_parallel():
|
|
|
+ try:
|
|
|
+ ownership = reader.get_ownership_burndown()
|
|
|
+ except KeyError:
|
|
|
+ print(burndown_people_warning)
|
|
|
+ return
|
|
|
+ try:
|
|
|
+ couples = reader.get_people_coocc()
|
|
|
+ except KeyError:
|
|
|
+ print(couples_warning)
|
|
|
+ return
|
|
|
+ try:
|
|
|
+ devs = reader.get_devs()
|
|
|
+ except KeyError:
|
|
|
+ print(devs_warning)
|
|
|
+ return
|
|
|
+ show_devs_parallel(args, reader.get_name(), *reader.get_header(),
|
|
|
+ load_devs_parallel(ownership, couples, devs, args.max_people))
|
|
|
|
|
|
modes = {
|
|
|
"run-times": run_times,
|
|
@@ -1698,6 +1878,7 @@ def main():
|
|
|
"devs-efforts": devs_efforts,
|
|
|
"old-vs-new": old_vs_new,
|
|
|
"languages": languages,
|
|
|
+ "devs-parallel": devs_parallel,
|
|
|
}
|
|
|
try:
|
|
|
modes[args.mode]()
|
|
@@ -1715,6 +1896,7 @@ def main():
|
|
|
sentiment()
|
|
|
devs()
|
|
|
devs_efforts()
|
|
|
+ # devs_parallel()
|
|
|
|
|
|
if web_server.running:
|
|
|
secs = int(os.getenv("COUPLES_SERVER_TIME", "60"))
|