6 lat temu · b56651f296
--- a/python/.flake8
+++ b/python/.flake8
@@ -1,6 +0,0 @@
 
				-[flake8]
			
 
				-ignore=D,B007
			
 
				-max-line-length=99
			
 
				-inline-quotes="
			
 
				-import-order-style=appnexus
			
 
				-exclude=labours/pb_pb2.py
			
--- a/python/labours/labours.py
+++ b/python/labours/labours.py
@@ -1,6 +1,7 @@
 
				 #!/usr/bin/env python3
			
 
				 import argparse
			
 
				 from collections import defaultdict, namedtuple
			
 
				+import contextlib
			
 
				 from datetime import datetime, timedelta
			
 
				 from importlib import import_module
			
 
				 import io
			
@@ -16,21 +17,11 @@ import threading
 
				 import time
			
 
				 import warnings
			
 
				 
			
 
				-
			
 
				-try:
			
 
				-    from clint.textui import progress
			
 
				-except ImportError:
			
 
				-    print("Warning: clint is not installed, no fancy progressbars in the terminal for you.")
			
 
				-    progress = None
			
 
				 import numpy
			
 
				+import tqdm
			
 
				 import yaml
			
 
				 
			
 
				 
			
 
				-if sys.version_info[0] < 3:
			
 
				-    # OK, ancients, I will support Python 2, but you owe me a beer
			
 
				-    input = raw_input  # noqa: F821
			
 
				-
			
 
				-
			
 
				 def list_matplotlib_styles():
			
 
				     script = "import sys; from matplotlib import pyplot; " \
			
 
				              "sys.stdout.write(repr(pyplot.style.available))"
			
@@ -59,13 +50,14 @@ def parse_args():
 
				     parser.add_argument("--relative", action="store_true",
			
 
				                         help="Occupy 100%% height for every measurement.")
			
 
				     parser.add_argument("--tmpdir", help="Temporary directory for intermediate files.")
			
 
				-    parser.add_argument("-m", "--mode",
			
 
				+    parser.add_argument("-m", "--mode", dest="modes", default=[], action="append",
			
 
				                         choices=["burndown-project", "burndown-file", "burndown-person",
			
 
				                                  "overwrites-matrix", "ownership", "couples-files",
			
 
				                                  "couples-people", "couples-shotness", "shotness", "sentiment",
			
 
				-                                 "devs", "devs-efforts", "old-vs-new", "all", "run-times",
			
 
				-                                 "languages", "devs-parallel"],
			
 
				-                        help="What to plot.")
			
 
				+                                 "devs", "devs-efforts", "old-vs-new", "run-times",
			
 
				+                                 "languages", "devs-parallel", "all"],
			
 
				+                        help="What to plot. Can be repeated, e.g. "
			
 
				+                             "-m burndown-project -m run-times")
			
 
				     parser.add_argument(
			
 
				         "--resample", default="year",
			
 
				         help="The way to resample the time series. Possible values are: "
			
@@ -475,7 +467,7 @@ def print_survival_function(kmf, sampling):
 
				         pass
			
 
				 
			
 
				 
			
 
				-def interpolate_burndown_matrix(matrix, granularity, sampling):
			
 
				+def interpolate_burndown_matrix(matrix, granularity, sampling, progress=False):
			
 
				     daily = numpy.zeros(
			
 
				         (matrix.shape[0] * granularity, matrix.shape[1] * sampling),
			
 
				         dtype=numpy.float32)
			
@@ -487,7 +479,7 @@ def interpolate_burndown_matrix(matrix, granularity, sampling):
 
				     ⌄
			
 
				     bands, y
			
 
				     """
			
 
				-    for y in range(matrix.shape[0]):
			
 
				+    for y in tqdm.tqdm(range(matrix.shape[0]), disable=(not progress)):
			
 
				         for x in range(matrix.shape[1]):
			
 
				             if y * granularity > (x + 1) * sampling:
			
 
				                 # the future is zeros
			
@@ -613,7 +605,14 @@ def floor_datetime(dt, duration):
 
				     return datetime.fromtimestamp(dt.timestamp() - dt.timestamp() % duration)
			
 
				 
			
 
				 
			
 
				-def load_burndown(header, name, matrix, resample, report_survival=True):
			
 
				+def load_burndown(
			
 
				+    header,
			
 
				+    name,
			
 
				+    matrix,
			
 
				+    resample,
			
 
				+    report_survival=True,
			
 
				+    interpolation_progress=False
			
 
				+):
			
 
				     pandas = import_pandas()
			
 
				 
			
 
				     start, last, sampling, granularity, tick = header
			
@@ -631,7 +630,12 @@ def load_burndown(header, name, matrix, resample, report_survival=True):
 
				         # Interpolate the day x day matrix.
			
 
				         # Each day brings equal weight in the granularity.
			
 
				         # Sampling's interpolation is linear.
			
 
				-        daily = interpolate_burndown_matrix(matrix, granularity, sampling)
			
 
				+        daily = interpolate_burndown_matrix(
			
 
				+            matrix=matrix,
			
 
				+            granularity=granularity,
			
 
				+            sampling=sampling,
			
 
				+            progress=interpolation_progress,
			
 
				+        )
			
 
				         daily[(last - start).days:] = 0
			
 
				         # Resample the bands
			
 
				         aliases = {
			
@@ -812,6 +816,7 @@ def deploy_plot(title, output, background, tight=True):
 
				                 pyplot.tight_layout()
			
 
				             except:  # noqa: E722
			
 
				                 print("Warning: failed to set the tight layout")
			
 
				+        print("Writing plot to %s" % output)
			
 
				         pyplot.savefig(output, transparent=True)
			
 
				     pyplot.clf()
			
 
				 
			
@@ -913,14 +918,10 @@ def plot_burndown(args, target, name, matrix, date_range_sampling, labels, granu
 
				 def plot_many_burndown(args, target, header, parts):
			
 
				     if not args.output:
			
 
				         print("Warning: output not set, showing %d plots." % len(parts))
			
 
				-    itercnt = progress.bar(parts, expected_size=len(parts)) \
			
 
				-        if progress is not None else parts
			
 
				     stdout = io.StringIO()
			
 
				-    for name, matrix in itercnt:
			
 
				-        backup = sys.stdout
			
 
				-        sys.stdout = stdout
			
 
				-        plot_burndown(args, target, *load_burndown(header, name, matrix, args.resample))
			
 
				-        sys.stdout = backup
			
 
				+    for name, matrix in tqdm.tqdm(parts):
			
 
				+        with contextlib.redirect_stdout(stdout):
			
 
				+            plot_burndown(args, target, *load_burndown(header, name, matrix, args.resample))
			
 
				     sys.stdout.write(stdout.getvalue())
			
 
				 
			
 
				 
			
@@ -1011,11 +1012,11 @@ IDEAL_SHARD_SIZE = 4096
 
				 
			
 
				 
			
 
				 def train_embeddings(index, matrix, tmpdir, shard_size=IDEAL_SHARD_SIZE):
			
 
				+    import tensorflow as tf
			
 
				     try:
			
 
				         from . import swivel
			
 
				     except (SystemError, ImportError):
			
 
				         import swivel
			
 
				-    import tensorflow as tf
			
 
				 
			
 
				     assert matrix.shape[0] == matrix.shape[1]
			
 
				     assert len(index) <= matrix.shape[0]
			
@@ -1142,11 +1143,7 @@ class CORSWebServer(object):
 
				     def serve(self):
			
 
				         outer = self
			
 
				 
			
 
				-        try:
			
 
				-            from http.server import HTTPServer, SimpleHTTPRequestHandler, test
			
 
				-        except ImportError:  # Python 2
			
 
				-            from BaseHTTPServer import HTTPServer, test
			
 
				-            from SimpleHTTPServer import SimpleHTTPRequestHandler
			
 
				+        from http.server import HTTPServer, SimpleHTTPRequestHandler, test
			
 
				 
			
 
				         class ClojureServer(HTTPServer):
			
 
				             def __init__(self, *args, **kwargs):
			
@@ -1439,18 +1436,21 @@ def order_commits(chosen_people, days, people):
 
				         series[i] = arr.transpose()
			
 
				     # calculate the distance matrix using dynamic time warping
			
 
				     dists = numpy.full((len(series),) * 2, -100500, dtype=numpy.float32)
			
 
				-    for x, serx in enumerate(series):
			
 
				-        dists[x, x] = 0
			
 
				-        for y, sery in enumerate(series[x + 1:], start=x + 1):
			
 
				-            min_day = int(min(serx[0][0], sery[0][0]))
			
 
				-            max_day = int(max(serx[-1][0], sery[-1][0]))
			
 
				-            arrx = numpy.zeros(max_day - min_day + 1, dtype=numpy.float32)
			
 
				-            arry = numpy.zeros_like(arrx)
			
 
				-            arrx[serx[:, 0].astype(int) - min_day] = serx[:, 1]
			
 
				-            arry[sery[:, 0].astype(int) - min_day] = sery[:, 1]
			
 
				-            # L1 norm
			
 
				-            dist, _ = fastdtw(arrx, arry, radius=5, dist=1)
			
 
				-            dists[x, y] = dists[y, x] = dist
			
 
				+    # TODO: what's the total for this progress bar?
			
 
				+    with tqdm.tqdm() as pb:
			
 
				+        for x, serx in enumerate(series):
			
 
				+            dists[x, x] = 0
			
 
				+            for y, sery in enumerate(series[x + 1:], start=x + 1):
			
 
				+                min_day = int(min(serx[0][0], sery[0][0]))
			
 
				+                max_day = int(max(serx[-1][0], sery[-1][0]))
			
 
				+                arrx = numpy.zeros(max_day - min_day + 1, dtype=numpy.float32)
			
 
				+                arry = numpy.zeros_like(arrx)
			
 
				+                arrx[serx[:, 0].astype(int) - min_day] = serx[:, 1]
			
 
				+                arry[sery[:, 0].astype(int) - min_day] = sery[:, 1]
			
 
				+                # L1 norm
			
 
				+                dist, _ = fastdtw(arrx, arry, radius=5, dist=1)
			
 
				+                dists[x, y] = dists[y, x] = dist
			
 
				+                pb.update()
			
 
				     print("Ordering the series")
			
 
				     route = seriate(dists)
			
 
				     return dists, devseries, devstats, route
			
@@ -1792,7 +1792,7 @@ def main():
 
				             return
			
 
				         plot_burndown(args, "project",
			
 
				                       *load_burndown(full_header, *reader.get_project_burndown(),
			
 
				-                                     resample=args.resample))
			
 
				+                                     resample=args.resample, interpolation_progress=True))
			
 
				 
			
 
				     def files_burndown():
			
 
				         try:
			
@@ -1960,23 +1960,37 @@ def main():
 
				         "languages": languages,
			
 
				         "devs-parallel": devs_parallel,
			
 
				     }
			
 
				-    try:
			
 
				-        modes[args.mode]()
			
 
				-    except KeyError:
			
 
				-        assert args.mode == "all"
			
 
				-        project_burndown()
			
 
				-        # files_burndown()
			
 
				-        # people_burndown()
			
 
				-        overwrites_matrix()
			
 
				-        ownership_burndown()
			
 
				-        couples_files()
			
 
				-        couples_people()
			
 
				-        couples_shotness()
			
 
				-        shotness()
			
 
				-        # sentiment()
			
 
				-        devs()
			
 
				-        devs_efforts()
			
 
				-        # devs_parallel()
			
 
				+
			
 
				+    if "all" in args.modes:
			
 
				+        all_mode = True
			
 
				+        args.modes = [
			
 
				+            "burndown-project",
			
 
				+            "overwrites-matrix",
			
 
				+            "ownership",
			
 
				+            "couples-files",
			
 
				+            "couples-people",
			
 
				+            "couples-shotness",
			
 
				+            "shotness",
			
 
				+            "devs",
			
 
				+            "devs-efforts",
			
 
				+        ]
			
 
				+    else:
			
 
				+        all_mode = False
			
 
				+
			
 
				+    for mode in args.modes:
			
 
				+        if mode not in modes:
			
 
				+            print("Unknown mode: %s" % mode)
			
 
				+            continue
			
 
				+
			
 
				+        print("Running: %s" % mode)
			
 
				+        # `args.mode` is required for path determination in the mode functions
			
 
				+        args.mode = ("all" if all_mode else mode)
			
 
				+        try:
			
 
				+            modes[mode]()
			
 
				+        except ImportError as ie:
			
 
				+            print("A module required by the %s mode was not found: %s" % (mode, ie))
			
 
				+            if not all_mode:
			
 
				+                raise
			
 
				 
			
 
				     if web_server.running:
			
 
				         secs = int(os.getenv("COUPLES_SERVER_TIME", "60"))
			
--- a/python/requirements.in
+++ b/python/requirements.in
@@ -0,0 +1,13 @@
 
				+matplotlib>=2.0,<4.0
			
 
				+numpy>=1.12.0,<2.0
			
 
				+pandas>=0.20.0,<1.0
			
 
				+PyYAML>=3.0,<5.0
			
 
				+scipy>=0.19.0,<1.2.2
			
 
				+protobuf>=3.5.0,<4.0
			
 
				+munch>=2.0,<3.0
			
 
				+hdbscan>=0.8.0,<2.0
			
 
				+seriate>=1.0,<2.0
			
 
				+fastdtw>=0.3.2,<2.0
			
 
				+python-dateutil>=2.6.0,<3.0
			
 
				+lifelines>=0.20.0,<2.0
			
 
				+tqdm>=4.3,<5.0
			
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -1,13 +1,35 @@
 
				-clint>=0.5.1,<1.0
			
 
				-matplotlib>=2.0,<4.0
			
 
				-numpy==1.16.0
			
 
				-pandas>=0.20.0,<1.0
			
 
				-PyYAML==4.2b1
			
 
				-scipy==1.2.1
			
 
				-protobuf>=3.5.0,<4.0
			
 
				-munch>=2.0
			
 
				-hdbscan==0.8.18
			
 
				-seriate==1.0.0
			
 
				+#
			
 
				+# This file is autogenerated by pip-compile
			
 
				+# To update, run:
			
 
				+#
			
 
				+#    pip-compile requirements.in
			
 
				+#
			
 
				+autograd-gamma==0.4.1     # via lifelines
			
 
				+autograd==1.3             # via autograd-gamma, lifelines
			
 
				+cycler==0.10.0            # via matplotlib
			
 
				+cython==0.29.13           # via hdbscan
			
 
				 fastdtw==0.3.2
			
 
				+future==0.17.1            # via autograd
			
 
				+hdbscan==0.8.22
			
 
				+joblib==0.13.2            # via hdbscan, scikit-learn
			
 
				+kiwisolver==1.1.0         # via matplotlib
			
 
				+lifelines==0.22.7
			
 
				+matplotlib==3.1.1
			
 
				+munch==2.3.2
			
 
				+numpy==1.17.2
			
 
				+ortools==7.3.7083         # via seriate
			
 
				+packaging==19.2           # via seriate
			
 
				+pandas==0.25.1
			
 
				+protobuf==3.9.2
			
 
				+pyparsing==2.4.2          # via matplotlib, packaging
			
 
				 python-dateutil==2.8.0
			
 
				-lifelines==0.21.3
			
 
				+pytz==2019.2              # via pandas
			
 
				+pyyaml==3.13
			
 
				+scikit-learn==0.21.3      # via hdbscan
			
 
				+scipy==1.2.1
			
 
				+seriate==1.1.0
			
 
				+six==1.12.0               # via cycler, munch, ortools, packaging, protobuf, python-dateutil
			
 
				+tqdm==4.36.1
			
 
				+
			
 
				+# The following packages are considered to be unsafe in a requirements file:
			
 
				+# setuptools==41.2.0        # via kiwisolver, protobuf
			
--- a/python/setup.cfg
+++ b/python/setup.cfg
@@ -0,0 +1,12 @@
 
				+[flake8]
			
 
				+exclude = labours/pb_pb2.py
			
 
				+ignore = D,B007
			
 
				+import-order-style = appnexus
			
 
				+inline-quotes = "
			
 
				+max-line-length = 99
			
 
				+
			
 
				+[isort]
			
 
				+force_sort_within_sections = true
			
 
				+line_length = 99
			
 
				+lines_between_types = 0
			
 
				+multi_line_output = 0
			
--- a/python/setup.py
+++ b/python/setup.py
@@ -9,6 +9,9 @@ try:
 
				 except FileNotFoundError:
			
 
				     long_description = ""
			
 
				 
			
 
				+with open(os.path.join(os.path.dirname(__file__), "requirements.in"), encoding="utf-8") as f:
			
 
				+    requirements = f.readlines()
			
 
				+
			
 
				 
			
 
				 setup(
			
 
				     name="labours",
			
@@ -23,21 +26,7 @@ setup(
 
				     download_url="https://github.com/src-d/hercules",
			
 
				     packages=["labours"],
			
 
				     keywords=["git", "mloncode", "mining software repositories", "hercules"],
			
 
				-    install_requires=[
			
 
				-        "clint>=0.5.1,<1.0",
			
 
				-        "matplotlib>=2.0,<4.0",
			
 
				-        "numpy>=1.12.0,<2.0",
			
 
				-        "pandas>=0.20.0,<1.0",
			
 
				-        "PyYAML>=3.0,<5.0",
			
 
				-        "scipy>=0.19.0,<1.2.2",
			
 
				-        "protobuf>=3.5.0,<4.0",
			
 
				-        "munch>=2.0,<3.0",
			
 
				-        "hdbscan>=0.8.0,<2.0",
			
 
				-        "seriate>=1.0,<2.0",
			
 
				-        "fastdtw>=0.3.2,<2.0",
			
 
				-        "python-dateutil>=2.6.0,<3.0",
			
 
				-        "lifelines>=0.20.0,<2.0",
			
 
				-    ],
			
 
				+    install_requires=requirements,
			
 
				     package_data={"labours": ["../LICENSE.md", "../README.md", "../requirements.txt"]},
			
 
				     entry_points={
			
 
				         "console_scripts": ["labours=labours.__main__:main"],