Parcourir la source

Add "old-vs-new" plot

Fixes https://github.com/src-d/hercules/issues/166

Signed-off-by: Vadim Markovtsev <vadim@sourced.tech>
Vadim Markovtsev il y a 6 ans
Parent
commit
edaa332eb0
3 fichiers modifiés avec 54 ajouts et 3 suppressions
  1. 13 0
      README.md
  2. BIN
      doc/add_vs_changed.png
  3. 41 3
      labours.py

+ 13 - 0
README.md

@@ -300,6 +300,19 @@ insights from the `tensorflow/tensorflow` plot above:
 2. The "blue" group of developers covers the global maintainers and a few people who left (at the top).
 3. The "red" group shows how core developers join the project or become less active.
 
+#### Added vs changed lines through time
+
+![tensorflow/tensorflow](doc/add_vs_changed.png)
+<p align="center">tensorflow/tensorflow added and changed lines through time.</p>
+
+```
+hercules --devs [-people-dict=/path/to/identities]
+python3 labours.py -m old-vs-new -o <name>
+```
+
+`--devs` from the previous section allows to plot how many lines were added and how many existing changed
+(deleted or replaced) through time. This plot is smoothed.
+
 #### Sentiment (positive and negative code)
 
 ![Django sentiment](doc/sentiment.png)

BIN
doc/add_vs_changed.png


+ 41 - 3
labours.py

@@ -3,6 +3,7 @@ import argparse
 from collections import defaultdict, namedtuple
 from datetime import datetime, timedelta
 from importlib import import_module
+from itertools import chain
 import io
 import json
 import os
@@ -61,7 +62,7 @@ def parse_args():
     parser.add_argument("-m", "--mode",
                         choices=["burndown-project", "burndown-file", "burndown-person",
                                  "churn-matrix", "ownership", "couples", "shotness", "sentiment",
-                                 "devs", "all", "run-times"],
+                                 "devs", "old-vs-new", "all", "run-times"],
                         help="What to plot.")
     parser.add_argument(
         "--resample", default="year",
@@ -1314,7 +1315,7 @@ def show_devs(args, name, start_date, end_date, data):
         else:
             # outlier
             color = "grey"
-        ax.plot(plot_x, series, color=color)
+        ax.fill_between(plot_x, series, color=color)
         ax.set_axis_off()
         author = people[dev_i]
         ax.text(0.03, 0.5, author[:36] + (author[36:] and "..."),
@@ -1344,7 +1345,7 @@ def show_devs(args, name, start_date, end_date, data):
         axes[-1].xaxis.set_major_locator(matplotlib.dates.MonthLocator(interval=interval))
         axes[-1].xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y-%m"))
     for tick in axes[-1].xaxis.get_major_ticks():
-        tick.label.set_fontsize(16)
+        tick.label.set_fontsize(args.font_size)
     axes[-1].spines["left"].set_visible(False)
     axes[-1].spines["right"].set_visible(False)
     axes[-1].spines["top"].set_visible(False)
@@ -1355,6 +1356,34 @@ def show_devs(args, name, start_date, end_date, data):
     deploy_plot(title, args.output, args.style)
 
 
+def show_old_vs_new(args, name, start_date, end_date, data):
+    from scipy.signal import convolve, slepian
+
+    days, people = data
+    start_date = datetime.fromtimestamp(start_date)
+    start_date = datetime(start_date.year, start_date.month, start_date.day)
+    end_date = datetime.fromtimestamp(end_date)
+    end_date = datetime(end_date.year, end_date.month, end_date.day)
+    new_lines = numpy.zeros((end_date - start_date).days + 1)
+    old_lines = numpy.zeros_like(new_lines)
+    for day, devs in days.items():
+        for stats in devs.values():
+            new_lines[day] += stats.Added
+            old_lines[day] += stats.Removed + stats.Changed
+    resolution = 32
+    window = slepian(len(new_lines) // resolution, 0.5)
+    new_lines = convolve(new_lines, window, "same")
+    old_lines = convolve(old_lines, window, "same")
+    matplotlib, pyplot = import_pyplot(args.backend, args.style)
+    plot_x = [start_date + timedelta(days=i) for i in range(len(new_lines))]
+    pyplot.fill_between(plot_x, new_lines, color="#8DB843", label="Changed new lines")
+    pyplot.fill_between(plot_x, old_lines, color="#E14C35", label="Changed existing lines")
+    pyplot.legend(loc=2, fontsize=args.font_size)
+    for tick in chain(pyplot.gca().xaxis.get_major_ticks(), pyplot.gca().yaxis.get_major_ticks()):
+        tick.label.set_fontsize(args.font_size)
+    deploy_plot("Additions vs changes", args.output, args.style)
+
+
 def _format_number(n):
     if n == 0:
         return "0"
@@ -1502,6 +1531,14 @@ def main():
             return
         show_devs(args, reader.get_name(), *reader.get_header(), data)
 
+    def old_vs_new():
+        try:
+            data = reader.get_devs()
+        except KeyError:
+            print(devs_warning)
+            return
+        show_old_vs_new(args, reader.get_name(), *reader.get_header(), data)
+
     modes = {
         "run-times": run_times,
         "burndown-project": project_burndown,
@@ -1513,6 +1550,7 @@ def main():
         "shotness": shotness,
         "sentiment": sentiment,
         "devs": devs,
+        "old-vs-new": old_vs_new,
     }
     try:
         modes[args.mode]()