123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962 |
- #!/usr/bin/env python3
- import argparse
- from collections import defaultdict, namedtuple
- from datetime import datetime, timedelta
- from importlib import import_module
- import io
- from itertools import chain
- import json
- import os
- import re
- import shutil
- import subprocess
- import sys
- import tempfile
- import threading
- import time
- import warnings
- try:
- from clint.textui import progress
- except ImportError:
- print("Warning: clint is not installed, no fancy progressbars in the terminal for you.")
- progress = None
- import numpy
- import yaml
- if sys.version_info[0] < 3:
- # OK, ancients, I will support Python 2, but you owe me a beer
- input = raw_input # noqa: F821
- def list_matplotlib_styles():
- script = "import sys; from matplotlib import pyplot; " \
- "sys.stdout.write(repr(pyplot.style.available))"
- styles = eval(subprocess.check_output([sys.executable, "-c", script]))
- styles.remove("classic")
- return ["default", "classic"] + styles
- def parse_args():
- parser = argparse.ArgumentParser()
- parser.add_argument("-o", "--output", default="",
- help="Path to the output file/directory (empty for display). "
- "If the extension is JSON, the data is saved instead of "
- "the real image.")
- parser.add_argument("-i", "--input", default="-",
- help="Path to the input file (- for stdin).")
- parser.add_argument("-f", "--input-format", default="auto", choices=["yaml", "pb", "auto"])
- parser.add_argument("--font-size", default=12, type=int,
- help="Size of the labels and legend.")
- parser.add_argument("--style", default="ggplot", choices=list_matplotlib_styles(),
- help="Plot style to use.")
- parser.add_argument("--backend", help="Matplotlib backend to use.")
- parser.add_argument("--background", choices=["black", "white"], default="white",
- help="Plot's general color scheme.")
- parser.add_argument("--size", help="Axes' size in inches, for example \"12,9\"")
- parser.add_argument("--relative", action="store_true",
- help="Occupy 100%% height for every measurement.")
- parser.add_argument("--couples-tmp-dir", help="Temporary directory to work with couples.")
- parser.add_argument("-m", "--mode",
- choices=["burndown-project", "burndown-file", "burndown-person",
- "churn-matrix", "ownership", "couples-files", "couples-people",
- "couples-shotness", "shotness", "sentiment", "devs",
- "devs-efforts", "old-vs-new", "all", "run-times", "languages",
- "devs-parallel"],
- help="What to plot.")
- parser.add_argument(
- "--resample", default="year",
- help="The way to resample the time series. Possible values are: "
- "\"month\", \"year\", \"no\", \"raw\" and pandas offset aliases ("
- "http://pandas.pydata.org/pandas-docs/stable/timeseries.html"
- "#offset-aliases).")
- dateutil_url = "https://dateutil.readthedocs.io/en/stable/parser.html#dateutil.parser.parse"
- parser.add_argument("--start-date",
- help="Start date of time-based plots. Any format is accepted which is "
- "supported by %s" % dateutil_url)
- parser.add_argument("--end-date",
- help="End date of time-based plots. Any format is accepted which is "
- "supported by %s" % dateutil_url)
- parser.add_argument("--disable-projector", action="store_true",
- help="Do not run Tensorflow Projector on couples.")
- parser.add_argument("--max-people", default=20, type=int,
- help="Maximum number of developers in churn matrix and people plots.")
- args = parser.parse_args()
- return args
- class Reader(object):
- def read(self, file):
- raise NotImplementedError
- def get_name(self):
- raise NotImplementedError
- def get_header(self):
- raise NotImplementedError
- def get_burndown_parameters(self):
- raise NotImplementedError
- def get_project_burndown(self):
- raise NotImplementedError
- def get_files_burndown(self):
- raise NotImplementedError
- def get_people_burndown(self):
- raise NotImplementedError
- def get_ownership_burndown(self):
- raise NotImplementedError
- def get_people_interaction(self):
- raise NotImplementedError
- def get_files_coocc(self):
- raise NotImplementedError
- def get_people_coocc(self):
- raise NotImplementedError
- def get_shotness_coocc(self):
- raise NotImplementedError
- def get_shotness(self):
- raise NotImplementedError
- def get_sentiment(self):
- raise NotImplementedError
- def get_devs(self):
- raise NotImplementedError
- class YamlReader(Reader):
- def read(self, file):
- yaml.reader.Reader.NON_PRINTABLE = re.compile(r"(?!x)x")
- try:
- loader = yaml.CLoader
- except AttributeError:
- print("Warning: failed to import yaml.CLoader, falling back to slow yaml.Loader")
- loader = yaml.Loader
- try:
- if file != "-":
- with open(file) as fin:
- data = yaml.load(fin, Loader=loader)
- else:
- data = yaml.load(sys.stdin, Loader=loader)
- except (UnicodeEncodeError, yaml.reader.ReaderError) as e:
- print("\nInvalid unicode in the input: %s\nPlease filter it through "
- "fix_yaml_unicode.py" % e)
- sys.exit(1)
- if data is None:
- print("\nNo data has been read - has Hercules crashed?")
- sys.exit(1)
- self.data = data
- def get_run_times(self):
- return {}
- def get_name(self):
- return self.data["hercules"]["repository"]
- def get_header(self):
- header = self.data["hercules"]
- return header["begin_unix_time"], header["end_unix_time"]
- def get_burndown_parameters(self):
- header = self.data["Burndown"]
- return header["sampling"], header["granularity"], header["tick_size"]
- def get_project_burndown(self):
- return self.data["hercules"]["repository"], \
- self._parse_burndown_matrix(self.data["Burndown"]["project"]).T
- def get_files_burndown(self):
- return [(p[0], self._parse_burndown_matrix(p[1]).T)
- for p in self.data["Burndown"]["files"].items()]
- def get_people_burndown(self):
- return [(p[0], self._parse_burndown_matrix(p[1]).T)
- for p in self.data["Burndown"]["people"].items()]
- def get_ownership_burndown(self):
- return self.data["Burndown"]["people_sequence"].copy(), \
- {p[0]: self._parse_burndown_matrix(p[1])
- for p in self.data["Burndown"]["people"].items()}
- def get_people_interaction(self):
- return self.data["Burndown"]["people_sequence"].copy(), \
- self._parse_burndown_matrix(self.data["Burndown"]["people_interaction"])
- def get_files_coocc(self):
- coocc = self.data["Couples"]["files_coocc"]
- return coocc["index"], self._parse_coocc_matrix(coocc["matrix"])
- def get_people_coocc(self):
- coocc = self.data["Couples"]["people_coocc"]
- return coocc["index"], self._parse_coocc_matrix(coocc["matrix"])
- def get_shotness_coocc(self):
- shotness = self.data["Shotness"]
- index = ["%s:%s" % (i["file"], i["name"]) for i in shotness]
- indptr = numpy.zeros(len(shotness) + 1, dtype=numpy.int64)
- indices = []
- data = []
- for i, record in enumerate(shotness):
- pairs = [(int(k), v) for k, v in record["counters"].items()]
- pairs.sort()
- indptr[i + 1] = indptr[i] + len(pairs)
- for k, v in pairs:
- indices.append(k)
- data.append(v)
- indices = numpy.array(indices, dtype=numpy.int32)
- data = numpy.array(data, dtype=numpy.int32)
- from scipy.sparse import csr_matrix
- return index, csr_matrix((data, indices, indptr), shape=(len(shotness),) * 2)
- def get_shotness(self):
- from munch import munchify
- obj = munchify(self.data["Shotness"])
- # turn strings into ints
- for item in obj:
- item.counters = {int(k): v for k, v in item.counters.items()}
- if len(obj) == 0:
- raise KeyError
- return obj
- def get_sentiment(self):
- from munch import munchify
- return munchify({int(key): {
- "Comments": vals[2].split("|"),
- "Commits": vals[1],
- "Value": float(vals[0])
- } for key, vals in self.data["Sentiment"].items()})
- def get_devs(self):
- people = self.data["Devs"]["people"]
- days = {int(d): {int(dev): DevDay(*(int(x) for x in day[:-1]), day[-1])
- for dev, day in devs.items()}
- for d, devs in self.data["Devs"]["ticks"].items()}
- return people, days
- def _parse_burndown_matrix(self, matrix):
- return numpy.array([numpy.fromstring(line, dtype=int, sep=" ")
- for line in matrix.split("\n")])
- def _parse_coocc_matrix(self, matrix):
- from scipy.sparse import csr_matrix
- data = []
- indices = []
- indptr = [0]
- for row in matrix:
- for k, v in sorted(row.items()):
- data.append(v)
- indices.append(k)
- indptr.append(indptr[-1] + len(row))
- return csr_matrix((data, indices, indptr), shape=(len(matrix),) * 2)
- class ProtobufReader(Reader):
- def read(self, file):
- try:
- from labours.pb_pb2 import AnalysisResults
- except ImportError as e:
- print("\n\n>>> You need to generate python/hercules/pb/pb_pb2.py - run \"make\"\n",
- file=sys.stderr)
- raise e from None
- self.data = AnalysisResults()
- if file != "-":
- with open(file, "rb") as fin:
- bytes = fin.read()
- else:
- bytes = sys.stdin.buffer.read()
- if not bytes:
- raise ValueError("empty input")
- self.data.ParseFromString(bytes)
- self.contents = {}
- for key, val in self.data.contents.items():
- try:
- mod, name = PB_MESSAGES[key].rsplit(".", 1)
- except KeyError:
- sys.stderr.write("Warning: there is no registered PB decoder for %s\n" % key)
- continue
- cls = getattr(import_module(mod), name)
- self.contents[key] = msg = cls()
- msg.ParseFromString(val)
- def get_run_times(self):
- return {key: val for key, val in self.data.header.run_time_per_item.items()}
- def get_name(self):
- return self.data.header.repository
- def get_header(self):
- header = self.data.header
- return header.begin_unix_time, header.end_unix_time
- def get_burndown_parameters(self):
- burndown = self.contents["Burndown"]
- return burndown.sampling, burndown.granularity, burndown.tick_size / 1000000000
- def get_project_burndown(self):
- return self._parse_burndown_matrix(self.contents["Burndown"].project)
- def get_files_burndown(self):
- return [self._parse_burndown_matrix(i) for i in self.contents["Burndown"].files]
- def get_people_burndown(self):
- return [self._parse_burndown_matrix(i) for i in self.contents["Burndown"].people]
- def get_ownership_burndown(self):
- people = self.get_people_burndown()
- return [p[0] for p in people], {p[0]: p[1].T for p in people}
- def get_people_interaction(self):
- burndown = self.contents["Burndown"]
- return [i.name for i in burndown.people], \
- self._parse_sparse_matrix(burndown.people_interaction).toarray()
- def get_files_coocc(self):
- node = self.contents["Couples"].file_couples
- return list(node.index), self._parse_sparse_matrix(node.matrix)
- def get_people_coocc(self):
- node = self.contents["Couples"].people_couples
- return list(node.index), self._parse_sparse_matrix(node.matrix)
- def get_shotness_coocc(self):
- shotness = self.get_shotness()
- index = ["%s:%s" % (i.file, i.name) for i in shotness]
- indptr = numpy.zeros(len(shotness) + 1, dtype=numpy.int32)
- indices = []
- data = []
- for i, record in enumerate(shotness):
- pairs = list(record.counters.items())
- pairs.sort()
- indptr[i + 1] = indptr[i] + len(pairs)
- for k, v in pairs:
- indices.append(k)
- data.append(v)
- indices = numpy.array(indices, dtype=numpy.int32)
- data = numpy.array(data, dtype=numpy.int32)
- from scipy.sparse import csr_matrix
- return index, csr_matrix((data, indices, indptr), shape=(len(shotness),) * 2)
- def get_shotness(self):
- records = self.contents["Shotness"].records
- if len(records) == 0:
- raise KeyError
- return records
- def get_sentiment(self):
- byday = self.contents["Sentiment"].SentimentByDay
- if len(byday) == 0:
- raise KeyError
- return byday
- def get_devs(self):
- people = list(self.contents["Devs"].dev_index)
- days = {d: {dev: DevDay(stats.commits, stats.stats.added, stats.stats.removed,
- stats.stats.changed, {k: [v.added, v.removed, v.changed]
- for k, v in stats.languages.items()})
- for dev, stats in day.devs.items()}
- for d, day in self.contents["Devs"].ticks.items()}
- return people, days
- def _parse_burndown_matrix(self, matrix):
- dense = numpy.zeros((matrix.number_of_rows, matrix.number_of_columns), dtype=int)
- for y, row in enumerate(matrix.rows):
- for x, col in enumerate(row.columns):
- dense[y, x] = col
- return matrix.name, dense.T
- def _parse_sparse_matrix(self, matrix):
- from scipy.sparse import csr_matrix
- return csr_matrix((list(matrix.data), list(matrix.indices), list(matrix.indptr)),
- shape=(matrix.number_of_rows, matrix.number_of_columns))
- READERS = {"yaml": YamlReader, "yml": YamlReader, "pb": ProtobufReader}
- PB_MESSAGES = {
- "Burndown": "labours.pb_pb2.BurndownAnalysisResults",
- "Couples": "labours.pb_pb2.CouplesAnalysisResults",
- "Shotness": "labours.pb_pb2.ShotnessAnalysisResults",
- "Devs": "labours.pb_pb2.DevsAnalysisResults",
- }
- def read_input(args):
- sys.stdout.write("Reading the input... ")
- sys.stdout.flush()
- if args.input != "-":
- if args.input_format == "auto":
- try:
- args.input_format = args.input.rsplit(".", 1)[1]
- except IndexError:
- try:
- with open(args.input) as f:
- f.read(1 << 16)
- args.input_format = "yaml"
- except UnicodeDecodeError:
- args.input_format = "pb"
- elif args.input_format == "auto":
- args.input_format = "yaml"
- reader = READERS[args.input_format]()
- reader.read(args.input)
- print("done")
- return reader
- class DevDay(namedtuple("DevDay", ("Commits", "Added", "Removed", "Changed", "Languages"))):
- def add(self, dd):
- langs = defaultdict(lambda: [0] * 3)
- for key, val in self.Languages.items():
- for i in range(3):
- langs[key][i] += val[i]
- for key, val in dd.Languages.items():
- for i in range(3):
- langs[key][i] += val[i]
- return DevDay(Commits=self.Commits + dd.Commits,
- Added=self.Added + dd.Added,
- Removed=self.Removed + dd.Removed,
- Changed=self.Changed + dd.Changed,
- Languages=dict(langs))
- def fit_kaplan_meier(matrix):
- from lifelines import KaplanMeierFitter
- T = []
- W = []
- indexes = numpy.arange(matrix.shape[0], dtype=int)
- entries = numpy.zeros(matrix.shape[0], int)
- dead = set()
- for i in range(1, matrix.shape[1]):
- diff = matrix[:, i - 1] - matrix[:, i]
- entries[diff < 0] = i
- mask = diff > 0
- deaths = diff[mask]
- T.append(numpy.full(len(deaths), i) - entries[indexes[mask]])
- W.append(deaths)
- entered = entries > 0
- entered[0] = True
- dead = dead.union(set(numpy.where((matrix[:, i] == 0) & entered)[0]))
- # add the survivors as censored
- nnzind = entries != 0
- nnzind[0] = True
- nnzind[sorted(dead)] = False
- T.append(numpy.full(nnzind.sum(), matrix.shape[1]) - entries[nnzind])
- W.append(matrix[nnzind, -1])
- T = numpy.concatenate(T)
- E = numpy.ones(len(T), bool)
- E[-nnzind.sum():] = 0
- W = numpy.concatenate(W)
- if T.size == 0:
- return None
- kmf = KaplanMeierFitter().fit(T, E, weights=W)
- return kmf
- def print_survival_function(kmf, sampling):
- sf = kmf.survival_function_
- sf.index = [timedelta(days=d) for d in sf.index * sampling]
- sf.columns = ["Ratio of survived lines"]
- try:
- print(sf[len(sf) // 6::len(sf) // 6].append(sf.tail(1)))
- except ValueError:
- pass
- def interpolate_burndown_matrix(matrix, granularity, sampling):
- daily = numpy.zeros(
- (matrix.shape[0] * granularity, matrix.shape[1] * sampling),
- dtype=numpy.float32)
- """
- ----------> samples, x
- |
- |
- |
- ⌄
- bands, y
- """
- for y in range(matrix.shape[0]):
- for x in range(matrix.shape[1]):
- if y * granularity > (x + 1) * sampling:
- # the future is zeros
- continue
- def decay(start_index: int, start_val: float):
- if start_val == 0:
- return
- k = matrix[y][x] / start_val # <= 1
- scale = (x + 1) * sampling - start_index
- for i in range(y * granularity, (y + 1) * granularity):
- initial = daily[i][start_index - 1]
- for j in range(start_index, (x + 1) * sampling):
- daily[i][j] = initial * (
- 1 + (k - 1) * (j - start_index + 1) / scale)
- def grow(finish_index: int, finish_val: float):
- initial = matrix[y][x - 1] if x > 0 else 0
- start_index = x * sampling
- if start_index < y * granularity:
- start_index = y * granularity
- if finish_index == start_index:
- return
- avg = (finish_val - initial) / (finish_index - start_index)
- for j in range(x * sampling, finish_index):
- for i in range(start_index, j + 1):
- daily[i][j] = avg
- # copy [x*g..y*s)
- for j in range(x * sampling, finish_index):
- for i in range(y * granularity, x * sampling):
- daily[i][j] = daily[i][j - 1]
- if (y + 1) * granularity >= (x + 1) * sampling:
- # x*granularity <= (y+1)*sampling
- # 1. x*granularity <= y*sampling
- # y*sampling..(y+1)sampling
- #
- # x+1
- # /
- # /
- # / y+1 -|
- # / |
- # / y -|
- # /
- # / x
- #
- # 2. x*granularity > y*sampling
- # x*granularity..(y+1)sampling
- #
- # x+1
- # /
- # /
- # / y+1 -|
- # / |
- # / x -|
- # /
- # / y
- if y * granularity <= x * sampling:
- grow((x + 1) * sampling, matrix[y][x])
- elif (x + 1) * sampling > y * granularity:
- grow((x + 1) * sampling, matrix[y][x])
- avg = matrix[y][x] / ((x + 1) * sampling - y * granularity)
- for j in range(y * granularity, (x + 1) * sampling):
- for i in range(y * granularity, j + 1):
- daily[i][j] = avg
- elif (y + 1) * granularity >= x * sampling:
- # y*sampling <= (x+1)*granularity < (y+1)sampling
- # y*sampling..(x+1)*granularity
- # (x+1)*granularity..(y+1)sampling
- # x+1
- # /\
- # / \
- # / \
- # / y+1
- # /
- # y
- v1 = matrix[y][x - 1]
- v2 = matrix[y][x]
- delta = (y + 1) * granularity - x * sampling
- previous = 0
- if x > 0 and (x - 1) * sampling >= y * granularity:
- # x*g <= (y-1)*s <= y*s <= (x+1)*g <= (y+1)*s
- # |________|.......^
- if x > 1:
- previous = matrix[y][x - 2]
- scale = sampling
- else:
- # (y-1)*s < x*g <= y*s <= (x+1)*g <= (y+1)*s
- # |______|.......^
- scale = sampling if x == 0 else x * sampling - y * granularity
- peak = v1 + (v1 - previous) / scale * delta
- if v2 > peak:
- # we need to adjust the peak, it may not be less than the decayed value
- if x < matrix.shape[1] - 1:
- # y*s <= (x+1)*g <= (y+1)*s < (y+2)*s
- # ^.........|_________|
- k = (v2 - matrix[y][x + 1]) / sampling # > 0
- peak = matrix[y][x] + k * ((x + 1) * sampling - (y + 1) * granularity)
- # peak > v2 > v1
- else:
- peak = v2
- # not enough data to interpolate; this is at least not restricted
- grow((y + 1) * granularity, peak)
- decay((y + 1) * granularity, peak)
- else:
- # (x+1)*granularity < y*sampling
- # y*sampling..(y+1)sampling
- decay(x * sampling, matrix[y][x - 1])
- return daily
- def import_pandas():
- import pandas
- try:
- from pandas.plotting import register_matplotlib_converters
- register_matplotlib_converters()
- except ImportError:
- pass
- return pandas
- def floor_datetime(dt, duration):
- return datetime.fromtimestamp(dt.timestamp() - dt.timestamp() % duration)
- def load_burndown(header, name, matrix, resample, report_survival=True):
- pandas = import_pandas()
- start, last, sampling, granularity, tick = header
- assert sampling > 0
- assert granularity > 0
- start = floor_datetime(datetime.fromtimestamp(start), tick)
- last = datetime.fromtimestamp(last)
- if report_survival:
- kmf = fit_kaplan_meier(matrix)
- if kmf is not None:
- print_survival_function(kmf, sampling)
- finish = start + timedelta(seconds=matrix.shape[1] * sampling * tick)
- if resample not in ("no", "raw"):
- print("resampling to %s, please wait..." % resample)
- # Interpolate the day x day matrix.
- # Each day brings equal weight in the granularity.
- # Sampling's interpolation is linear.
- daily = interpolate_burndown_matrix(matrix, granularity, sampling)
- daily[(last - start).days:] = 0
- # Resample the bands
- aliases = {
- "year": "A",
- "month": "M"
- }
- resample = aliases.get(resample, resample)
- periods = 0
- date_granularity_sampling = [start]
- while date_granularity_sampling[-1] < finish:
- periods += 1
- date_granularity_sampling = pandas.date_range(
- start, periods=periods, freq=resample)
- if date_granularity_sampling[0] > finish:
- if resample == "A":
- print("too loose resampling - by year, trying by month")
- return load_burndown(header, name, matrix, "month", report_survival=False)
- else:
- raise ValueError("Too loose resampling: %s. Try finer." % resample)
- date_range_sampling = pandas.date_range(
- date_granularity_sampling[0],
- periods=(finish - date_granularity_sampling[0]).days,
- freq="1D")
- # Fill the new square matrix
- matrix = numpy.zeros(
- (len(date_granularity_sampling), len(date_range_sampling)),
- dtype=numpy.float32)
- for i, gdt in enumerate(date_granularity_sampling):
- istart = (date_granularity_sampling[i - 1] - start).days \
- if i > 0 else 0
- ifinish = (gdt - start).days
- for j, sdt in enumerate(date_range_sampling):
- if (sdt - start).days >= istart:
- break
- matrix[i, j:] = \
- daily[istart:ifinish, (sdt - start).days:].sum(axis=0)
- # Hardcode some cases to improve labels' readability
- if resample in ("year", "A"):
- labels = [dt.year for dt in date_granularity_sampling]
- elif resample in ("month", "M"):
- labels = [dt.strftime("%Y %B") for dt in date_granularity_sampling]
- else:
- labels = [dt.date() for dt in date_granularity_sampling]
- else:
- labels = [
- "%s - %s" % ((start + timedelta(seconds=i * granularity * tick)).date(),
- (
- start + timedelta(seconds=(i + 1) * granularity * tick)).date())
- for i in range(matrix.shape[0])]
- if len(labels) > 18:
- warnings.warn("Too many labels - consider resampling.")
- resample = "M" # fake resampling type is checked while plotting
- date_range_sampling = pandas.date_range(
- start + timedelta(seconds=sampling * tick), periods=matrix.shape[1],
- freq="%dD" % sampling)
- return name, matrix, date_range_sampling, labels, granularity, sampling, resample
- def load_ownership(header, sequence, contents, max_people):
- pandas = import_pandas()
- start, last, sampling, _, tick = header
- start = datetime.fromtimestamp(start)
- start = floor_datetime(start, tick)
- last = datetime.fromtimestamp(last)
- people = []
- for name in sequence:
- people.append(contents[name].sum(axis=1))
- people = numpy.array(people)
- date_range_sampling = pandas.date_range(
- start + timedelta(seconds=sampling * tick), periods=people[0].shape[0],
- freq="%dD" % sampling)
- if people.shape[0] > max_people:
- order = numpy.argsort(-people.sum(axis=1))
- chosen_people = people[order[:max_people + 1]]
- chosen_people[max_people] = people[order[max_people:]].sum(axis=0)
- people = chosen_people
- sequence = [sequence[i] for i in order[:max_people]] + ["others"]
- print("Warning: truncated people to the most owning %d" % max_people)
- for i, name in enumerate(sequence):
- if len(name) > 40:
- sequence[i] = name[:37] + "..."
- return sequence, people, date_range_sampling, last
- def load_churn_matrix(people, matrix, max_people):
- matrix = matrix.astype(float)
- if matrix.shape[0] > max_people:
- order = numpy.argsort(-matrix[:, 0])
- matrix = matrix[order[:max_people]][:, [0, 1] + list(2 + order[:max_people])]
- people = [people[i] for i in order[:max_people]]
- print("Warning: truncated people to most productive %d" % max_people)
- zeros = matrix[:, 0] == 0
- matrix[zeros, :] = 1
- matrix /= matrix[:, 0][:, None]
- matrix = -matrix[:, 1:]
- matrix[zeros, :] = 0
- for i, name in enumerate(people):
- if len(name) > 40:
- people[i] = name[:37] + "..."
- return people, matrix
- def import_pyplot(backend, style):
- import matplotlib
- if backend:
- matplotlib.use(backend)
- from matplotlib import pyplot
- pyplot.style.use(style)
- print("matplotlib: backend is", matplotlib.get_backend())
- return matplotlib, pyplot
- def apply_plot_style(figure, axes, legend, background, font_size, axes_size):
- foreground = "black" if background == "white" else "white"
- if axes_size is None:
- axes_size = (16, 12)
- else:
- axes_size = tuple(float(p) for p in axes_size.split(","))
- figure.set_size_inches(*axes_size)
- for side in ("bottom", "top", "left", "right"):
- axes.spines[side].set_color(foreground)
- for axis in (axes.xaxis, axes.yaxis):
- axis.label.update(dict(fontsize=font_size, color=foreground))
- for axis in ("x", "y"):
- getattr(axes, axis + "axis").get_offset_text().set_size(font_size)
- axes.tick_params(axis=axis, colors=foreground, labelsize=font_size)
- try:
- axes.ticklabel_format(axis="y", style="sci", scilimits=(0, 3))
- except AttributeError:
- pass
- figure.patch.set_facecolor(background)
- axes.set_facecolor(background)
- if legend is not None:
- frame = legend.get_frame()
- for setter in (frame.set_facecolor, frame.set_edgecolor):
- setter(background)
- for text in legend.get_texts():
- text.set_color(foreground)
- def get_plot_path(base, name):
- root, ext = os.path.splitext(base)
- if not ext:
- ext = ".png"
- output = os.path.join(root, name + ext)
- os.makedirs(os.path.dirname(output), exist_ok=True)
- return output
- def deploy_plot(title, output, background, tight=True):
- import matplotlib.pyplot as pyplot
- if not output:
- pyplot.gcf().canvas.set_window_title(title)
- pyplot.show()
- else:
- if title:
- pyplot.title(title, color="black" if background == "white" else "white")
- if tight:
- try:
- pyplot.tight_layout()
- except: # noqa: E722
- print("Warning: failed to set the tight layout")
- pyplot.savefig(output, transparent=True)
- pyplot.clf()
- def default_json(x):
- if hasattr(x, "tolist"):
- return x.tolist()
- if hasattr(x, "isoformat"):
- return x.isoformat()
- return x
- def parse_date(text, default):
- if not text:
- return default
- from dateutil.parser import parse
- return parse(text)
- def plot_burndown(args, target, name, matrix, date_range_sampling, labels, granularity,
- sampling, resample):
- if args.output and args.output.endswith(".json"):
- data = locals().copy()
- del data["args"]
- data["type"] = "burndown"
- if args.mode == "project" and target == "project":
- output = args.output
- else:
- if target == "project":
- name = "project"
- output = get_plot_path(args.output, name)
- with open(output, "w") as fout:
- json.dump(data, fout, sort_keys=True, default=default_json)
- return
- matplotlib, pyplot = import_pyplot(args.backend, args.style)
- pyplot.stackplot(date_range_sampling, matrix, labels=labels)
- if args.relative:
- for i in range(matrix.shape[1]):
- matrix[:, i] /= matrix[:, i].sum()
- pyplot.ylim(0, 1)
- legend_loc = 3
- else:
- legend_loc = 2
- legend = pyplot.legend(loc=legend_loc, fontsize=args.font_size)
- pyplot.ylabel("Lines of code")
- pyplot.xlabel("Time")
- apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.background,
- args.font_size, args.size)
- pyplot.xlim(parse_date(args.start_date, date_range_sampling[0]),
- parse_date(args.end_date, date_range_sampling[-1]))
- locator = pyplot.gca().xaxis.get_major_locator()
- # set the optimal xticks locator
- if "M" not in resample:
- pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
- locs = pyplot.gca().get_xticks().tolist()
- if len(locs) >= 16:
- pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
- locs = pyplot.gca().get_xticks().tolist()
- if len(locs) >= 16:
- pyplot.gca().xaxis.set_major_locator(locator)
- if locs[0] < pyplot.xlim()[0]:
- del locs[0]
- endindex = -1
- if len(locs) >= 2 and pyplot.xlim()[1] - locs[-1] > (locs[-1] - locs[-2]) / 2:
- locs.append(pyplot.xlim()[1])
- endindex = len(locs) - 1
- startindex = -1
- if len(locs) >= 2 and locs[0] - pyplot.xlim()[0] > (locs[1] - locs[0]) / 2:
- locs.append(pyplot.xlim()[0])
- startindex = len(locs) - 1
- pyplot.gca().set_xticks(locs)
- # hacking time!
- labels = pyplot.gca().get_xticklabels()
- if startindex >= 0:
- labels[startindex].set_text(date_range_sampling[0].date())
- labels[startindex].set_text = lambda _: None
- labels[startindex].set_rotation(30)
- labels[startindex].set_ha("right")
- if endindex >= 0:
- labels[endindex].set_text(date_range_sampling[-1].date())
- labels[endindex].set_text = lambda _: None
- labels[endindex].set_rotation(30)
- labels[endindex].set_ha("right")
- title = "%s %d x %d (granularity %d, sampling %d)" % \
- ((name,) + matrix.shape + (granularity, sampling))
- output = args.output
- if output:
- if args.mode == "project" and target == "project":
- output = args.output
- else:
- if target == "project":
- name = "project"
- output = get_plot_path(args.output, name)
- deploy_plot(title, output, args.background)
- def plot_many_burndown(args, target, header, parts):
- if not args.output:
- print("Warning: output not set, showing %d plots." % len(parts))
- itercnt = progress.bar(parts, expected_size=len(parts)) \
- if progress is not None else parts
- stdout = io.StringIO()
- for name, matrix in itercnt:
- backup = sys.stdout
- sys.stdout = stdout
- plot_burndown(args, target, *load_burndown(header, name, matrix, args.resample))
- sys.stdout = backup
- sys.stdout.write(stdout.getvalue())
- def plot_churn_matrix(args, repo, people, matrix):
- if args.output and args.output.endswith(".json"):
- data = locals().copy()
- del data["args"]
- data["type"] = "churn_matrix"
- if args.mode == "all":
- output = get_plot_path(args.output, "matrix")
- else:
- output = args.output
- with open(output, "w") as fout:
- json.dump(data, fout, sort_keys=True, default=default_json)
- return
- matplotlib, pyplot = import_pyplot(args.backend, args.style)
- s = 4 + matrix.shape[1] * 0.3
- fig = pyplot.figure(figsize=(s, s))
- ax = fig.add_subplot(111)
- ax.xaxis.set_label_position("top")
- ax.matshow(matrix, cmap=pyplot.cm.OrRd)
- ax.set_xticks(numpy.arange(0, matrix.shape[1]))
- ax.set_yticks(numpy.arange(0, matrix.shape[0]))
- ax.set_yticklabels(people, va="center")
- ax.set_xticks(numpy.arange(0.5, matrix.shape[1] + 0.5), minor=True)
- ax.set_xticklabels(["Unidentified"] + people, rotation=45, ha="left",
- va="bottom", rotation_mode="anchor")
- ax.set_yticks(numpy.arange(0.5, matrix.shape[0] + 0.5), minor=True)
- ax.grid(False)
- ax.grid(which="minor")
- apply_plot_style(fig, ax, None, args.background, args.font_size, args.size)
- if not args.output:
- pos1 = ax.get_position()
- pos2 = (pos1.x0 + 0.15, pos1.y0 - 0.1, pos1.width * 0.9, pos1.height * 0.9)
- ax.set_position(pos2)
- if args.mode == "all" and args.output:
- output = get_plot_path(args.output, "matrix")
- else:
- output = args.output
- title = "%s %d developers overwrite" % (repo, matrix.shape[0])
- if args.output:
- # FIXME(vmarkovtsev): otherwise the title is screwed in savefig()
- title = ""
- deploy_plot(title, output, args.background)
- def plot_ownership(args, repo, names, people, date_range, last):
- if args.output and args.output.endswith(".json"):
- data = locals().copy()
- del data["args"]
- data["type"] = "ownership"
- if args.mode == "all" and args.output:
- output = get_plot_path(args.output, "people")
- else:
- output = args.output
- with open(output, "w") as fout:
- json.dump(data, fout, sort_keys=True, default=default_json)
- return
- matplotlib, pyplot = import_pyplot(args.backend, args.style)
- polys = pyplot.stackplot(date_range, people, labels=names)
- if names[-1] == "others":
- polys[-1].set_hatch("/")
- pyplot.xlim(parse_date(args.start_date, date_range[0]), parse_date(args.end_date, last))
- if args.relative:
- for i in range(people.shape[1]):
- people[:, i] /= people[:, i].sum()
- pyplot.ylim(0, 1)
- legend_loc = 3
- else:
- legend_loc = 2
- ncol = 1 if len(names) < 15 else 2
- legend = pyplot.legend(loc=legend_loc, fontsize=args.font_size, ncol=ncol)
- apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.background,
- args.font_size, args.size)
- if args.mode == "all" and args.output:
- output = get_plot_path(args.output, "people")
- else:
- output = args.output
- deploy_plot("%s code ownership through time" % repo, output, args.background)
- IDEAL_SHARD_SIZE = 4096
- def train_embeddings(index, matrix, tmpdir, shard_size=IDEAL_SHARD_SIZE):
- try:
- from . import swivel
- except (SystemError, ImportError):
- import swivel
- import tensorflow as tf
- assert matrix.shape[0] == matrix.shape[1]
- assert len(index) <= matrix.shape[0]
- outlier_threshold = numpy.percentile(matrix.data, 99)
- matrix.data[matrix.data > outlier_threshold] = outlier_threshold
- nshards = len(index) // shard_size
- if nshards * shard_size < len(index):
- nshards += 1
- shard_size = len(index) // nshards
- nshards = len(index) // shard_size
- remainder = len(index) - nshards * shard_size
- if remainder > 0:
- lengths = matrix.indptr[1:] - matrix.indptr[:-1]
- filtered = sorted(numpy.argsort(lengths)[remainder:])
- else:
- filtered = list(range(len(index)))
- if len(filtered) < matrix.shape[0]:
- print("Truncating the sparse matrix...")
- matrix = matrix[filtered, :][:, filtered]
- meta_index = []
- for i, j in enumerate(filtered):
- meta_index.append((index[j], matrix[i, i]))
- index = [mi[0] for mi in meta_index]
- with tempfile.TemporaryDirectory(prefix="hercules_labours_", dir=tmpdir or None) as tmproot:
- print("Writing Swivel metadata...")
- vocabulary = "\n".join(index)
- with open(os.path.join(tmproot, "row_vocab.txt"), "w") as out:
- out.write(vocabulary)
- with open(os.path.join(tmproot, "col_vocab.txt"), "w") as out:
- out.write(vocabulary)
- del vocabulary
- bool_sums = matrix.indptr[1:] - matrix.indptr[:-1]
- bool_sums_str = "\n".join(map(str, bool_sums.tolist()))
- with open(os.path.join(tmproot, "row_sums.txt"), "w") as out:
- out.write(bool_sums_str)
- with open(os.path.join(tmproot, "col_sums.txt"), "w") as out:
- out.write(bool_sums_str)
- del bool_sums_str
- reorder = numpy.argsort(-bool_sums)
- print("Writing Swivel shards...")
- for row in range(nshards):
- for col in range(nshards):
- def _int64s(xs):
- return tf.train.Feature(
- int64_list=tf.train.Int64List(value=list(xs)))
- def _floats(xs):
- return tf.train.Feature(
- float_list=tf.train.FloatList(value=list(xs)))
- indices_row = reorder[row::nshards]
- indices_col = reorder[col::nshards]
- shard = matrix[indices_row][:, indices_col].tocoo()
- example = tf.train.Example(features=tf.train.Features(feature={
- "global_row": _int64s(indices_row),
- "global_col": _int64s(indices_col),
- "sparse_local_row": _int64s(shard.row),
- "sparse_local_col": _int64s(shard.col),
- "sparse_value": _floats(shard.data)}))
- with open(os.path.join(tmproot, "shard-%03d-%03d.pb" % (row, col)), "wb") as out:
- out.write(example.SerializeToString())
- print("Training Swivel model...")
- swivel.FLAGS.submatrix_rows = shard_size
- swivel.FLAGS.submatrix_cols = shard_size
- if len(meta_index) <= IDEAL_SHARD_SIZE / 16:
- embedding_size = 50
- num_epochs = 100000
- elif len(meta_index) <= IDEAL_SHARD_SIZE:
- embedding_size = 50
- num_epochs = 50000
- elif len(meta_index) <= IDEAL_SHARD_SIZE * 2:
- embedding_size = 60
- num_epochs = 10000
- elif len(meta_index) <= IDEAL_SHARD_SIZE * 4:
- embedding_size = 70
- num_epochs = 8000
- elif len(meta_index) <= IDEAL_SHARD_SIZE * 10:
- embedding_size = 80
- num_epochs = 5000
- elif len(meta_index) <= IDEAL_SHARD_SIZE * 25:
- embedding_size = 100
- num_epochs = 1000
- elif len(meta_index) <= IDEAL_SHARD_SIZE * 100:
- embedding_size = 200
- num_epochs = 600
- else:
- embedding_size = 300
- num_epochs = 300
- if os.getenv("CI"):
- # Travis, AppVeyor etc. during the integration tests
- num_epochs /= 10
- swivel.FLAGS.embedding_size = embedding_size
- swivel.FLAGS.input_base_path = tmproot
- swivel.FLAGS.output_base_path = tmproot
- swivel.FLAGS.loss_multiplier = 1.0 / shard_size
- swivel.FLAGS.num_epochs = num_epochs
- # Tensorflow 1.5 parses sys.argv unconditionally *applause*
- argv_backup = sys.argv[1:]
- del sys.argv[1:]
- swivel.main(None)
- sys.argv.extend(argv_backup)
- print("Reading Swivel embeddings...")
- embeddings = []
- with open(os.path.join(tmproot, "row_embedding.tsv")) as frow:
- with open(os.path.join(tmproot, "col_embedding.tsv")) as fcol:
- for i, (lrow, lcol) in enumerate(zip(frow, fcol)):
- prow, pcol = (l.split("\t", 1) for l in (lrow, lcol))
- assert prow[0] == pcol[0]
- erow, ecol = \
- (numpy.fromstring(p[1], dtype=numpy.float32, sep="\t")
- for p in (prow, pcol))
- embeddings.append((erow + ecol) / 2)
- return meta_index, embeddings
- class CORSWebServer(object):
- def __init__(self):
- self.thread = threading.Thread(target=self.serve)
- self.server = None
- def serve(self):
- outer = self
- try:
- from http.server import HTTPServer, SimpleHTTPRequestHandler, test
- except ImportError: # Python 2
- from BaseHTTPServer import HTTPServer, test
- from SimpleHTTPServer import SimpleHTTPRequestHandler
- class ClojureServer(HTTPServer):
- def __init__(self, *args, **kwargs):
- HTTPServer.__init__(self, *args, **kwargs)
- outer.server = self
- class CORSRequestHandler(SimpleHTTPRequestHandler):
- def end_headers(self):
- self.send_header("Access-Control-Allow-Origin", "*")
- SimpleHTTPRequestHandler.end_headers(self)
- test(CORSRequestHandler, ClojureServer)
- def start(self):
- self.thread.start()
- def stop(self):
- if self.running:
- self.server.shutdown()
- self.thread.join()
- @property
- def running(self):
- return self.server is not None
- web_server = CORSWebServer()
- def write_embeddings(name, output, run_server, index, embeddings):
- print("Writing Tensorflow Projector files...")
- if not output:
- output = "couples"
- if output.endswith(".json"):
- output = os.path.join(output[:-5], "couples")
- run_server = False
- metaf = "%s_%s_meta.tsv" % (output, name)
- with open(metaf, "w") as fout:
- fout.write("name\tcommits\n")
- for pair in index:
- fout.write("%s\t%s\n" % pair)
- print("Wrote", metaf)
- dataf = "%s_%s_data.tsv" % (output, name)
- with open(dataf, "w") as fout:
- for vec in embeddings:
- fout.write("\t".join(str(v) for v in vec))
- fout.write("\n")
- print("Wrote", dataf)
- jsonf = "%s_%s.json" % (output, name)
- with open(jsonf, "w") as fout:
- fout.write("""{
- "embeddings": [
- {
- "tensorName": "%s %s coupling",
- "tensorShape": [%s, %s],
- "tensorPath": "http://0.0.0.0:8000/%s",
- "metadataPath": "http://0.0.0.0:8000/%s"
- }
- ]
- }
- """ % (output, name, len(embeddings), len(embeddings[0]), dataf, metaf))
- print("Wrote %s" % jsonf)
- if run_server and not web_server.running:
- web_server.start()
- url = "http://projector.tensorflow.org/?config=http://0.0.0.0:8000/" + jsonf
- print(url)
- if run_server:
- if shutil.which("xdg-open") is not None:
- os.system("xdg-open " + url)
- else:
- browser = os.getenv("BROWSER", "")
- if browser:
- os.system(browser + " " + url)
- else:
- print("\t" + url)
- def show_shotness_stats(data):
- top = sorted(((r.counters[i], i) for i, r in enumerate(data)), reverse=True)
- for count, i in top:
- r = data[i]
- print("%8d %s:%s [%s]" % (count, r.file, r.name, r.internal_role))
- def show_sentiment_stats(args, name, resample, start_date, data):
- from scipy.signal import convolve, slepian
- matplotlib, pyplot = import_pyplot(args.backend, args.style)
- start_date = datetime.fromtimestamp(start_date)
- data = sorted(data.items())
- mood = numpy.zeros(data[-1][0] + 1, dtype=numpy.float32)
- timeline = numpy.array([start_date + timedelta(days=i) for i in range(mood.shape[0])])
- for d, val in data:
- mood[d] = (0.5 - val.Value) * 2
- resolution = 32
- window = slepian(len(timeline) // resolution, 0.5)
- window /= window.sum()
- mood_smooth = convolve(mood, window, "same")
- pos = mood_smooth.copy()
- pos[pos < 0] = 0
- neg = mood_smooth.copy()
- neg[neg >= 0] = 0
- resolution = 4
- window = numpy.ones(len(timeline) // resolution)
- window /= window.sum()
- avg = convolve(mood, window, "same")
- pyplot.fill_between(timeline, pos, color="#8DB843", label="Positive")
- pyplot.fill_between(timeline, neg, color="#E14C35", label="Negative")
- pyplot.plot(timeline, avg, color="grey", label="Average", linewidth=5)
- legend = pyplot.legend(loc=1, fontsize=args.font_size)
- pyplot.ylabel("Comment sentiment")
- pyplot.xlabel("Time")
- apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.background,
- args.font_size, args.size)
- pyplot.xlim(parse_date(args.start_date, timeline[0]), parse_date(args.end_date, timeline[-1]))
- locator = pyplot.gca().xaxis.get_major_locator()
- # set the optimal xticks locator
- if "M" not in resample:
- pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
- locs = pyplot.gca().get_xticks().tolist()
- if len(locs) >= 16:
- pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
- locs = pyplot.gca().get_xticks().tolist()
- if len(locs) >= 16:
- pyplot.gca().xaxis.set_major_locator(locator)
- if locs[0] < pyplot.xlim()[0]:
- del locs[0]
- endindex = -1
- if len(locs) >= 2 and pyplot.xlim()[1] - locs[-1] > (locs[-1] - locs[-2]) / 2:
- locs.append(pyplot.xlim()[1])
- endindex = len(locs) - 1
- startindex = -1
- if len(locs) >= 2 and locs[0] - pyplot.xlim()[0] > (locs[1] - locs[0]) / 2:
- locs.append(pyplot.xlim()[0])
- startindex = len(locs) - 1
- pyplot.gca().set_xticks(locs)
- # hacking time!
- labels = pyplot.gca().get_xticklabels()
- if startindex >= 0:
- labels[startindex].set_text(timeline[0].date())
- labels[startindex].set_text = lambda _: None
- labels[startindex].set_rotation(30)
- labels[startindex].set_ha("right")
- if endindex >= 0:
- labels[endindex].set_text(timeline[-1].date())
- labels[endindex].set_text = lambda _: None
- labels[endindex].set_rotation(30)
- labels[endindex].set_ha("right")
- overall_pos = sum(2 * (0.5 - d[1].Value) for d in data if d[1].Value < 0.5)
- overall_neg = sum(2 * (d[1].Value - 0.5) for d in data if d[1].Value > 0.5)
- title = "%s sentiment +%.1f -%.1f δ=%.1f" % (
- name, overall_pos, overall_neg, overall_pos - overall_neg)
- deploy_plot(title, args.output, args.background)
- def show_devs(args, name, start_date, end_date, people, days):
- from scipy.signal import convolve, slepian
- max_people = 50
- if len(people) > max_people:
- print("Picking top 100 developers by commit count")
- # pick top N developers by commit count
- commits = defaultdict(int)
- for devs in days.values():
- for dev, stats in devs.items():
- commits[dev] += stats.Commits
- commits = sorted(((v, k) for k, v in commits.items()), reverse=True)
- chosen_people = {people[k] for _, k in commits[:max_people]}
- else:
- chosen_people = set(people)
- dists, devseries, devstats, route = order_commits(chosen_people, days, people)
- route_map = {v: i for i, v in enumerate(route)}
- # determine clusters
- clusters = hdbscan_cluster_routed_series(dists, route)
- keys = list(devseries.keys())
- route = [keys[node] for node in route]
- print("Plotting")
- # smooth time series
- start_date = datetime.fromtimestamp(start_date)
- start_date = datetime(start_date.year, start_date.month, start_date.day)
- end_date = datetime.fromtimestamp(end_date)
- end_date = datetime(end_date.year, end_date.month, end_date.day)
- size = (end_date - start_date).days + 1
- plot_x = [start_date + timedelta(days=i) for i in range(size)]
- resolution = 64
- window = slepian(size // resolution, 0.5)
- final = numpy.zeros((len(devseries), size), dtype=numpy.float32)
- for i, s in enumerate(devseries.values()):
- arr = numpy.array(s).transpose()
- full_history = numpy.zeros(size, dtype=numpy.float32)
- mask = arr[0] < size
- full_history[arr[0][mask]] = arr[1][mask]
- final[route_map[i]] = convolve(full_history, window, "same")
- matplotlib, pyplot = import_pyplot(args.backend, args.style)
- pyplot.rcParams["figure.figsize"] = (32, 16)
- prop_cycle = pyplot.rcParams["axes.prop_cycle"]
- colors = prop_cycle.by_key()["color"]
- fig, axes = pyplot.subplots(final.shape[0], 1)
- backgrounds = ("#C4FFDB", "#FFD0CD") if args.background == "white" else ("#05401C", "#40110E")
- max_cluster = numpy.max(clusters)
- for ax, series, cluster, dev_i in zip(axes, final, clusters, route):
- if cluster >= 0:
- color = colors[cluster % len(colors)]
- i = 1
- while color == "#777777":
- color = colors[(max_cluster + i) % len(colors)]
- i += 1
- else:
- # outlier
- color = "#777777"
- ax.fill_between(plot_x, series, color=color)
- ax.set_axis_off()
- author = people[dev_i]
- ax.text(0.03, 0.5, author[:36] + (author[36:] and "..."),
- horizontalalignment="right", verticalalignment="center",
- transform=ax.transAxes, fontsize=14,
- color="black" if args.background == "white" else "white")
- ds = devstats[dev_i]
- stats = "%5d %8s %8s" % (ds[0], _format_number(ds[1] - ds[2]), _format_number(ds[3]))
- ax.text(0.97, 0.5, stats,
- horizontalalignment="left", verticalalignment="center",
- transform=ax.transAxes, fontsize=14, family="monospace",
- backgroundcolor=backgrounds[ds[1] <= ds[2]],
- color="black" if args.background == "white" else "white")
- axes[0].text(0.97, 1.75, " cmts delta changed",
- horizontalalignment="left", verticalalignment="center",
- transform=axes[0].transAxes, fontsize=14, family="monospace",
- color="black" if args.background == "white" else "white")
- axes[-1].set_axis_on()
- target_num_labels = 12
- num_months = (end_date.year - start_date.year) * 12 + end_date.month - start_date.month
- interval = int(numpy.ceil(num_months / target_num_labels))
- if interval >= 8:
- interval = int(numpy.ceil(num_months / (12 * target_num_labels)))
- axes[-1].xaxis.set_major_locator(matplotlib.dates.YearLocator(base=max(1, interval // 12)))
- axes[-1].xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y"))
- else:
- axes[-1].xaxis.set_major_locator(matplotlib.dates.MonthLocator(interval=interval))
- axes[-1].xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y-%m"))
- for tick in axes[-1].xaxis.get_major_ticks():
- tick.label.set_fontsize(args.font_size)
- axes[-1].spines["left"].set_visible(False)
- axes[-1].spines["right"].set_visible(False)
- axes[-1].spines["top"].set_visible(False)
- axes[-1].get_yaxis().set_visible(False)
- axes[-1].set_facecolor((1.0,) * 3 + (0.0,))
- title = ("%s commits" % name) if not args.output else ""
- deploy_plot(title, args.output, args.background)
- def order_commits(chosen_people, days, people):
- from seriate import seriate
- try:
- from fastdtw import fastdtw
- except ImportError as e:
- print("Cannot import fastdtw: %s\nInstall it from https://github.com/slaypni/fastdtw" % e)
- sys.exit(1)
- # FIXME(vmarkovtsev): remove once https://github.com/slaypni/fastdtw/pull/28 is merged&released
- try:
- sys.modules["fastdtw.fastdtw"].__norm = lambda p: lambda a, b: numpy.linalg.norm(
- numpy.atleast_1d(a) - numpy.atleast_1d(b), p)
- except KeyError:
- # the native extension does not have this bug
- pass
- devseries = defaultdict(list)
- devstats = defaultdict(lambda: DevDay(0, 0, 0, 0, {}))
- for day, devs in sorted(days.items()):
- for dev, stats in devs.items():
- if people[dev] in chosen_people:
- devseries[dev].append((day, stats.Commits))
- devstats[dev] = devstats[dev].add(stats)
- print("Calculating the distance matrix")
- # max-normalize the time series using a sliding window
- series = list(devseries.values())
- for i, s in enumerate(series):
- arr = numpy.array(s).transpose().astype(numpy.float32)
- commits = arr[1]
- if len(commits) < 7:
- commits /= commits.max()
- else:
- # 4 is sizeof(float32)
- windows = numpy.lib.stride_tricks.as_strided(commits, [len(commits) - 6, 7], [4, 4])
- commits = numpy.concatenate((
- [windows[0, 0] / windows[0].max(),
- windows[0, 1] / windows[0].max(),
- windows[0, 2] / windows[0].max()],
- windows[:, 3] / windows.max(axis=1),
- [windows[-1, 4] / windows[-1].max(),
- windows[-1, 5] / windows[-1].max(),
- windows[-1, 6] / windows[-1].max()]
- ))
- arr[1] = commits * 7 # 7 is a pure heuristic here and is not related to the window size
- series[i] = arr.transpose()
- # calculate the distance matrix using dynamic time warping metric
- dists = numpy.full((len(series),) * 2, -100500, dtype=numpy.float32)
- for x, serx in enumerate(series):
- dists[x, x] = 0
- for y, sery in enumerate(series[x + 1:], start=x + 1):
- min_day = int(min(serx[0][0], sery[0][0]))
- max_day = int(max(serx[-1][0], sery[-1][0]))
- arrx = numpy.zeros(max_day - min_day + 1, dtype=numpy.float32)
- arry = numpy.zeros_like(arrx)
- arrx[serx[:, 0].astype(int) - min_day] = serx[:, 1]
- arry[sery[:, 0].astype(int) - min_day] = sery[:, 1]
- # L1 norm
- dist, _ = fastdtw(arrx, arry, radius=5, dist=1)
- dists[x, y] = dists[y, x] = dist
- print("Ordering the series")
- route = seriate(dists)
- return dists, devseries, devstats, route
- def hdbscan_cluster_routed_series(dists, route):
- try:
- from hdbscan import HDBSCAN
- except ImportError as e:
- print("Cannot import ortools: %s\nInstall it from "
- "https://developers.google.com/optimization/install/python/" % e)
- sys.exit(1)
- opt_dist_chain = numpy.cumsum(numpy.array(
- [0] + [dists[route[i], route[i + 1]] for i in range(len(route) - 1)]))
- clusters = HDBSCAN(min_cluster_size=2).fit_predict(opt_dist_chain[:, numpy.newaxis])
- return clusters
- def show_devs_efforts(args, name, start_date, end_date, people, days, max_people):
- from scipy.signal import convolve, slepian
- start_date = datetime.fromtimestamp(start_date)
- start_date = datetime(start_date.year, start_date.month, start_date.day)
- end_date = datetime.fromtimestamp(end_date)
- end_date = datetime(end_date.year, end_date.month, end_date.day)
- efforts_by_dev = defaultdict(int)
- for day, devs in days.items():
- for dev, stats in devs.items():
- efforts_by_dev[dev] += stats.Added + stats.Removed + stats.Changed
- if len(efforts_by_dev) > max_people:
- chosen = {v for k, v in sorted(
- ((v, k) for k, v in efforts_by_dev.items()), reverse=True)[:max_people]}
- print("Warning: truncated people to the most active %d" % max_people)
- else:
- chosen = set(efforts_by_dev)
- chosen_efforts = sorted(((efforts_by_dev[k], k) for k in chosen), reverse=True)
- chosen_order = {k: i for i, (_, k) in enumerate(chosen_efforts)}
- efforts = numpy.zeros((len(chosen) + 1, (end_date - start_date).days + 1), dtype=numpy.float32)
- for day, devs in days.items():
- if day < efforts.shape[1]:
- for dev, stats in devs.items():
- dev = chosen_order.get(dev, len(chosen_order))
- efforts[dev][day] += stats.Added + stats.Removed + stats.Changed
- efforts_cum = numpy.cumsum(efforts, axis=1)
- window = slepian(10, 0.5)
- window /= window.sum()
- for e in (efforts, efforts_cum):
- for i in range(e.shape[0]):
- ending = e[i][-len(window) * 2:].copy()
- e[i] = convolve(e[i], window, "same")
- e[i][-len(ending):] = ending
- matplotlib, pyplot = import_pyplot(args.backend, args.style)
- plot_x = [start_date + timedelta(days=i) for i in range(efforts.shape[1])]
- people = [people[k] for _, k in chosen_efforts] + ["others"]
- for i, name in enumerate(people):
- if len(name) > 40:
- people[i] = name[:37] + "..."
- polys = pyplot.stackplot(plot_x, efforts_cum, labels=people)
- if len(polys) == max_people + 1:
- polys[-1].set_hatch("/")
- polys = pyplot.stackplot(plot_x, -efforts * efforts_cum.max() / efforts.max())
- if len(polys) == max_people + 1:
- polys[-1].set_hatch("/")
- yticks = []
- for tick in pyplot.gca().yaxis.iter_ticks():
- if tick[1] >= 0:
- yticks.append(tick[1])
- pyplot.gca().yaxis.set_ticks(yticks)
- legend = pyplot.legend(loc=2, ncol=2, fontsize=args.font_size)
- apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.background,
- args.font_size, args.size or "16,10")
- deploy_plot("Efforts through time (changed lines of code)", args.output, args.background)
- def show_old_vs_new(args, name, start_date, end_date, people, days):
- from scipy.signal import convolve, slepian
- start_date = datetime.fromtimestamp(start_date)
- start_date = datetime(start_date.year, start_date.month, start_date.day)
- end_date = datetime.fromtimestamp(end_date)
- end_date = datetime(end_date.year, end_date.month, end_date.day)
- new_lines = numpy.zeros((end_date - start_date).days + 1)
- old_lines = numpy.zeros_like(new_lines)
- for day, devs in days.items():
- for stats in devs.values():
- new_lines[day] += stats.Added
- old_lines[day] += stats.Removed + stats.Changed
- resolution = 32
- window = slepian(len(new_lines) // resolution, 0.5)
- new_lines = convolve(new_lines, window, "same")
- old_lines = convolve(old_lines, window, "same")
- matplotlib, pyplot = import_pyplot(args.backend, args.style)
- plot_x = [start_date + timedelta(days=i) for i in range(len(new_lines))]
- pyplot.fill_between(plot_x, new_lines, color="#8DB843", label="Changed new lines")
- pyplot.fill_between(plot_x, old_lines, color="#E14C35", label="Changed existing lines")
- pyplot.legend(loc=2, fontsize=args.font_size)
- for tick in chain(pyplot.gca().xaxis.get_major_ticks(), pyplot.gca().yaxis.get_major_ticks()):
- tick.label.set_fontsize(args.font_size)
- deploy_plot("Additions vs changes", args.output, args.background)
- def show_languages(args, name, start_date, end_date, people, days):
- devlangs = defaultdict(lambda: defaultdict(lambda: numpy.zeros(3, dtype=int)))
- for day, devs in days.items():
- for dev, stats in devs.items():
- for lang, vals in stats.Languages.items():
- devlangs[dev][lang] += vals
- devlangs = sorted(devlangs.items(), key=lambda p: -sum(x.sum() for x in p[1].values()))
- for dev, ls in devlangs:
- print()
- print("#", people[dev])
- ls = sorted(((vals.sum(), lang) for lang, vals in ls.items()), reverse=True)
- for vals, lang in ls:
- if lang:
- print("%s: %d" % (lang, vals))
- class ParallelDevData:
- def __init__(self):
- self.commits_rank = -1
- self.commits = -1
- self.lines_rank = -1
- self.lines = -1
- self.ownership_rank = -1
- self.ownership = -1
- self.couples_index = -1
- self.couples_cluster = -1
- self.commit_coocc_index = -1
- self.commit_coocc_cluster = -1
- def __str__(self):
- return str(self.__dict__)
- def __repr__(self):
- return str(self)
- def load_devs_parallel(ownership, couples, devs, max_people):
- from seriate import seriate
- try:
- from hdbscan import HDBSCAN
- except ImportError as e:
- print("Cannot import ortools: %s\nInstall it from "
- "https://developers.google.com/optimization/install/python/" % e)
- sys.exit(1)
- people, owned = ownership
- _, cmatrix = couples
- _, days = devs
- print("calculating - commits")
- commits = defaultdict(int)
- for day, devs in days.items():
- for dev, stats in devs.items():
- commits[people[dev]] += stats.Commits
- chosen = [k for v, k in sorted(((v, k) for k, v in commits.items()),
- reverse=True)[:max_people]]
- result = {k: ParallelDevData() for k in chosen}
- for k, v in result.items():
- v.commits_rank = chosen.index(k)
- v.commits = commits[k]
- print("calculating - lines")
- lines = defaultdict(int)
- for day, devs in days.items():
- for dev, stats in devs.items():
- lines[people[dev]] += stats.Added + stats.Removed + stats.Changed
- lines_index = {k: i for i, (_, k) in enumerate(sorted(
- ((v, k) for k, v in lines.items() if k in chosen), reverse=True))}
- for k, v in result.items():
- v.lines_rank = lines_index[k]
- v.lines = lines[k]
- print("calculating - ownership")
- owned_index = {k: i for i, (_, k) in enumerate(sorted(
- ((owned[k][-1].sum(), k) for k in chosen), reverse=True))}
- for k, v in result.items():
- v.ownership_rank = owned_index[k]
- v.ownership = owned[k][-1].sum()
- print("calculating - couples")
- embeddings = numpy.genfromtxt(fname="couples_people_data.tsv", delimiter="\t")[
- [people.index(k) for k in chosen]]
- embeddings /= numpy.linalg.norm(embeddings, axis=1)[:, None]
- cos = embeddings.dot(embeddings.T)
- cos[cos > 1] = 1 # tiny precision faults
- dists = numpy.arccos(cos)
- clusters = HDBSCAN(min_cluster_size=2, metric="precomputed").fit_predict(dists)
- for k, v in result.items():
- v.couples_cluster = clusters[chosen.index(k)]
- couples_order = seriate(dists)
- roll_options = []
- for i in range(len(couples_order)):
- loss = 0
- for k, v in result.items():
- loss += abs(
- v.ownership_rank - (couples_order.index(chosen.index(k)) + i) % len(chosen))
- roll_options.append(loss)
- best_roll = numpy.argmin(roll_options)
- couples_order = list(numpy.roll(couples_order, best_roll))
- for k, v in result.items():
- v.couples_index = couples_order.index(chosen.index(k))
- print("calculating - commit series")
- dists, devseries, _, orig_route = order_commits(chosen, days, people)
- keys = list(devseries.keys())
- route = [keys[node] for node in orig_route]
- for roll in range(len(route)):
- loss = 0
- for k, v in result.items():
- i = route.index(people.index(k))
- loss += abs(v.couples_index - ((i + roll) % len(route)))
- roll_options[roll] = loss
- best_roll = numpy.argmin(roll_options)
- route = list(numpy.roll(route, best_roll))
- orig_route = list(numpy.roll(orig_route, best_roll))
- clusters = hdbscan_cluster_routed_series(dists, orig_route)
- for k, v in result.items():
- v.commit_coocc_index = route.index(people.index(k))
- v.commit_coocc_cluster = clusters[v.commit_coocc_index]
- return result
- def show_devs_parallel(args, name, start_date, end_date, devs):
- matplotlib, pyplot = import_pyplot(args.backend, args.style)
- from matplotlib.collections import LineCollection
- def solve_equations(x1, y1, x2, y2):
- xcube = (x1 - x2) ** 3
- a = 2 * (y2 - y1) / xcube
- b = 3 * (y1 - y2) * (x1 + x2) / xcube
- c = 6 * (y2 - y1) * x1 * x2 / xcube
- d = y1 - a * x1 ** 3 - b * x1 ** 2 - c * x1
- return a, b, c, d
- # biggest = {k: max(getattr(d, k) for d in devs.values())
- # for k in ("commits", "lines", "ownership")}
- for k, dev in devs.items():
- points = numpy.array([
- (1, dev.commits_rank),
- (2, dev.lines_rank),
- (3, dev.ownership_rank),
- (4, dev.couples_index),
- (5, dev.commit_coocc_index)],
- dtype=float)
- points[:, 1] = points[:, 1] / len(devs)
- splines = []
- for i in range(len(points) - 1):
- a, b, c, d = solve_equations(*points[i], *points[i + 1])
- x = numpy.linspace(i + 1, i + 2, 100)
- smooth_points = numpy.array(
- [x, a * x ** 3 + b * x ** 2 + c * x + d]).T.reshape(-1, 1, 2)
- splines.append(smooth_points)
- points = numpy.concatenate(splines)
- segments = numpy.concatenate([points[:-1], points[1:]], axis=1)
- lc = LineCollection(segments)
- lc.set_array(numpy.linspace(0, 0.1, segments.shape[0]))
- pyplot.gca().add_collection(lc)
- pyplot.xlim(0, 6)
- pyplot.ylim(-0.1, 1.1)
- deploy_plot("Developers", args.output, args.background)
- def _format_number(n):
- if n == 0:
- return "0"
- power = int(numpy.log10(abs(n)))
- if power >= 6:
- n = n / 1000000
- if n >= 10:
- n = str(int(n))
- else:
- n = "%.1f" % n
- if n.endswith("0"):
- n = n[:-2]
- suffix = "M"
- elif power >= 3:
- n = n / 1000
- if n >= 10:
- n = str(int(n))
- else:
- n = "%.1f" % n
- if n.endswith("0"):
- n = n[:-2]
- suffix = "K"
- else:
- n = str(n)
- suffix = ""
- return n + suffix
- def main():
- args = parse_args()
- reader = read_input(args)
- header = reader.get_header()
- name = reader.get_name()
- burndown_warning = "Burndown stats were not collected. Re-run hercules with --burndown."
- burndown_files_warning = \
- "Burndown stats for files were not collected. Re-run hercules with " \
- "--burndown --burndown-files."
- burndown_people_warning = \
- "Burndown stats for people were not collected. Re-run hercules with " \
- "--burndown --burndown-people."
- couples_warning = "Coupling stats were not collected. Re-run hercules with --couples."
- shotness_warning = "Structural hotness stats were not collected. Re-run hercules with " \
- "--shotness. Also check --languages - the output may be empty."
- sentiment_warning = "Sentiment stats were not collected. Re-run hercules with --sentiment."
- devs_warning = "Devs stats were not collected. Re-run hercules with --devs."
- def run_times():
- rt = reader.get_run_times()
- pandas = import_pandas()
- series = pandas.to_timedelta(pandas.Series(rt).sort_values(ascending=False), unit="s")
- df = pandas.concat([series, series / series.sum()], axis=1)
- df.columns = ["time", "ratio"]
- print(df)
- def project_burndown():
- try:
- full_header = header + reader.get_burndown_parameters()
- except KeyError:
- print("project: " + burndown_warning)
- return
- plot_burndown(args, "project",
- *load_burndown(full_header, *reader.get_project_burndown(),
- resample=args.resample))
- def files_burndown():
- try:
- full_header = header + reader.get_burndown_parameters()
- except KeyError:
- print(burndown_warning)
- return
- try:
- plot_many_burndown(args, "file", full_header, reader.get_files_burndown())
- except KeyError:
- print("files: " + burndown_files_warning)
- def people_burndown():
- try:
- full_header = header + reader.get_burndown_parameters()
- except KeyError:
- print(burndown_warning)
- return
- try:
- plot_many_burndown(args, "person", full_header, reader.get_people_burndown())
- except KeyError:
- print("people: " + burndown_people_warning)
- def churn_matrix():
- try:
- plot_churn_matrix(args, name, *load_churn_matrix(
- *reader.get_people_interaction(), max_people=args.max_people))
- except KeyError:
- print("churn_matrix: " + burndown_people_warning)
- def ownership_burndown():
- try:
- full_header = header + reader.get_burndown_parameters()
- except KeyError:
- print(burndown_warning)
- return
- try:
- plot_ownership(args, name, *load_ownership(
- full_header, *reader.get_ownership_burndown(), max_people=args.max_people))
- except KeyError:
- print("ownership: " + burndown_people_warning)
- def couples_files():
- try:
- write_embeddings("files", args.output, not args.disable_projector,
- *train_embeddings(*reader.get_files_coocc(),
- tmpdir=args.couples_tmp_dir))
- except KeyError:
- print(couples_warning)
- def couples_people():
- try:
- write_embeddings("people", args.output, not args.disable_projector,
- *train_embeddings(*reader.get_people_coocc(),
- tmpdir=args.couples_tmp_dir))
- except KeyError:
- print(couples_warning)
- def couples_shotness():
- try:
- write_embeddings("shotness", args.output, not args.disable_projector,
- *train_embeddings(*reader.get_shotness_coocc(),
- tmpdir=args.couples_tmp_dir))
- except KeyError:
- print(shotness_warning)
- def shotness():
- try:
- data = reader.get_shotness()
- except KeyError:
- print(shotness_warning)
- return
- show_shotness_stats(data)
- def sentiment():
- try:
- data = reader.get_sentiment()
- except KeyError:
- print(sentiment_warning)
- return
- show_sentiment_stats(args, reader.get_name(), args.resample, reader.get_header()[0], data)
- def devs():
- try:
- data = reader.get_devs()
- except KeyError:
- print(devs_warning)
- return
- show_devs(args, reader.get_name(), *reader.get_header(), *data)
- def devs_efforts():
- try:
- data = reader.get_devs()
- except KeyError:
- print(devs_warning)
- return
- show_devs_efforts(args, reader.get_name(), *reader.get_header(), *data,
- max_people=args.max_people)
- def old_vs_new():
- try:
- data = reader.get_devs()
- except KeyError:
- print(devs_warning)
- return
- show_old_vs_new(args, reader.get_name(), *reader.get_header(), *data)
- def languages():
- try:
- data = reader.get_devs()
- except KeyError:
- print(devs_warning)
- return
- show_languages(args, reader.get_name(), *reader.get_header(), *data)
- def devs_parallel():
- try:
- ownership = reader.get_ownership_burndown()
- except KeyError:
- print(burndown_people_warning)
- return
- try:
- couples = reader.get_people_coocc()
- except KeyError:
- print(couples_warning)
- return
- try:
- devs = reader.get_devs()
- except KeyError:
- print(devs_warning)
- return
- show_devs_parallel(args, reader.get_name(), *reader.get_header(),
- load_devs_parallel(ownership, couples, devs, args.max_people))
- modes = {
- "run-times": run_times,
- "burndown-project": project_burndown,
- "burndown-file": files_burndown,
- "burndown-person": people_burndown,
- "churn-matrix": churn_matrix,
- "ownership": ownership_burndown,
- "couples-files": couples_files,
- "couples-people": couples_people,
- "couples-shotness": couples_shotness,
- "shotness": shotness,
- "sentiment": sentiment,
- "devs": devs,
- "devs-efforts": devs_efforts,
- "old-vs-new": old_vs_new,
- "languages": languages,
- "devs-parallel": devs_parallel,
- }
- try:
- modes[args.mode]()
- except KeyError:
- assert args.mode == "all"
- project_burndown()
- files_burndown()
- people_burndown()
- churn_matrix()
- ownership_burndown()
- couples_files()
- couples_people()
- couples_shotness()
- shotness()
- sentiment()
- devs()
- devs_efforts()
- # devs_parallel()
- if web_server.running:
- secs = int(os.getenv("COUPLES_SERVER_TIME", "60"))
- print("Sleeping for %d seconds, safe to Ctrl-C" % secs)
- sys.stdout.flush()
- try:
- time.sleep(secs)
- except KeyboardInterrupt:
- pass
- web_server.stop()
- if __name__ == "__main__":
- sys.exit(main())
|