123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329 |
- import argparse
- import os
- import subprocess
- import sys
- import time
- import numpy
- from labours.burndown import import_pandas, load_burndown
- from labours.cors_web_server import web_server
- from labours.embeddings import train_embeddings, write_embeddings
- from labours.modes.burndown import plot_burndown, plot_many_burndown
- from labours.modes.devs import show_devs, show_devs_efforts
- from labours.modes.devs_parallel import load_devs_parallel, show_devs_parallel
- from labours.modes.languages import show_languages
- from labours.modes.old_vs_new import show_old_vs_new
- from labours.modes.overwrites import load_overwrites_matrix, plot_overwrites_matrix
- from labours.modes.ownership import load_ownership, plot_ownership
- from labours.modes.sentiment import show_sentiment_stats
- from labours.modes.shotness import show_shotness_stats
- from labours.readers import read_input
- def list_matplotlib_styles():
- script = "import sys; from matplotlib import pyplot; " \
- "sys.stdout.write(repr(pyplot.style.available))"
- styles = eval(subprocess.check_output([sys.executable, "-c", script]))
- styles.remove("classic")
- return ["default", "classic"] + styles
- def parse_args():
- parser = argparse.ArgumentParser()
- parser.add_argument("-o", "--output", default="",
- help="Path to the output file/directory (empty for display). "
- "If the extension is JSON, the data is saved instead of "
- "the real image.")
- parser.add_argument("-i", "--input", default="-",
- help="Path to the input file (- for stdin).")
- parser.add_argument("-f", "--input-format", default="auto", choices=["yaml", "pb", "auto"])
- parser.add_argument("--font-size", default=12, type=int,
- help="Size of the labels and legend.")
- parser.add_argument("--style", default="ggplot", choices=list_matplotlib_styles(),
- help="Plot style to use.")
- parser.add_argument("--backend", help="Matplotlib backend to use.")
- parser.add_argument("--background", choices=["black", "white"], default="white",
- help="Plot's general color scheme.")
- parser.add_argument("--size", help="Axes' size in inches, for example \"12,9\"")
- parser.add_argument("--relative", action="store_true",
- help="Occupy 100%% height for every measurement.")
- parser.add_argument("--tmpdir", help="Temporary directory for intermediate files.")
- parser.add_argument("-m", "--mode", dest="modes", default=[], action="append",
- choices=["burndown-project", "burndown-file", "burndown-person",
- "overwrites-matrix", "ownership", "couples-files",
- "couples-people", "couples-shotness", "shotness", "sentiment",
- "devs", "devs-efforts", "old-vs-new", "run-times",
- "languages", "devs-parallel", "all"],
- help="What to plot. Can be repeated, e.g. "
- "-m burndown-project -m run-times")
- parser.add_argument(
- "--resample", default="year",
- help="The way to resample the time series. Possible values are: "
- "\"month\", \"year\", \"no\", \"raw\" and pandas offset aliases ("
- "http://pandas.pydata.org/pandas-docs/stable/timeseries.html"
- "#offset-aliases).")
- dateutil_url = "https://dateutil.readthedocs.io/en/stable/parser.html#dateutil.parser.parse"
- parser.add_argument("--start-date",
- help="Start date of time-based plots. Any format is accepted which is "
- "supported by %s" % dateutil_url)
- parser.add_argument("--end-date",
- help="End date of time-based plots. Any format is accepted which is "
- "supported by %s" % dateutil_url)
- parser.add_argument("--disable-projector", action="store_true",
- help="Do not run Tensorflow Projector on couples.")
- parser.add_argument("--max-people", default=20, type=int,
- help="Maximum number of developers in overwrites matrix and people plots.")
- parser.add_argument("--order-ownership-by-time", action="store_true",
- help="Sort developers in the ownership plot according to their first "
- "appearance in the history. The default is sorting by the number of "
- "commits.")
- args = parser.parse_args()
- return args
- def main():
- args = parse_args()
- reader = read_input(args)
- header = reader.get_header()
- name = reader.get_name()
- burndown_warning = "Burndown stats were not collected. Re-run hercules with --burndown."
- burndown_files_warning = \
- "Burndown stats for files were not collected. Re-run hercules with " \
- "--burndown --burndown-files."
- burndown_people_warning = \
- "Burndown stats for people were not collected. Re-run hercules with " \
- "--burndown --burndown-people."
- couples_warning = "Coupling stats were not collected. Re-run hercules with --couples."
- shotness_warning = "Structural hotness stats were not collected. Re-run hercules with " \
- "--shotness. Also check --languages - the output may be empty."
- sentiment_warning = "Sentiment stats were not collected. Re-run hercules with --sentiment."
- devs_warning = "Devs stats were not collected. Re-run hercules with --devs."
- def run_times():
- rt = reader.get_run_times()
- pandas = import_pandas()
- series = pandas.to_timedelta(pandas.Series(rt).sort_values(ascending=False), unit="s")
- df = pandas.concat([series, series / series.sum()], axis=1)
- df.columns = ["time", "ratio"]
- print(df)
- def project_burndown():
- try:
- full_header = header + reader.get_burndown_parameters()
- except KeyError:
- print("project: " + burndown_warning)
- return
- plot_burndown(args, "project",
- *load_burndown(full_header, *reader.get_project_burndown(),
- resample=args.resample, interpolation_progress=True))
- def files_burndown():
- try:
- full_header = header + reader.get_burndown_parameters()
- except KeyError:
- print(burndown_warning)
- return
- try:
- plot_many_burndown(args, "file", full_header, reader.get_files_burndown())
- except KeyError:
- print("files: " + burndown_files_warning)
- def people_burndown():
- try:
- full_header = header + reader.get_burndown_parameters()
- except KeyError:
- print(burndown_warning)
- return
- try:
- plot_many_burndown(args, "person", full_header, reader.get_people_burndown())
- except KeyError:
- print("people: " + burndown_people_warning)
- def overwrites_matrix():
- try:
- plot_overwrites_matrix(args, name, *load_overwrites_matrix(
- *reader.get_people_interaction(), max_people=args.max_people))
- people, matrix = load_overwrites_matrix(
- *reader.get_people_interaction(), max_people=1000000, normalize=False)
- from scipy.sparse import csr_matrix
- matrix = matrix[:, 1:]
- matrix = numpy.triu(matrix) + numpy.tril(matrix).T
- matrix = matrix + matrix.T
- matrix = csr_matrix(matrix)
- try:
- write_embeddings("overwrites", args.output, not args.disable_projector,
- *train_embeddings(people, matrix, tmpdir=args.tmpdir))
- except AttributeError as e:
- print("Training the embeddings is not possible: %s: %s", type(e).__name__, e)
- except KeyError:
- print("overwrites_matrix: " + burndown_people_warning)
- def ownership_burndown():
- try:
- full_header = header + reader.get_burndown_parameters()
- except KeyError:
- print(burndown_warning)
- return
- try:
- plot_ownership(args, name, *load_ownership(
- full_header, *reader.get_ownership_burndown(), max_people=args.max_people,
- order_by_time=args.order_ownership_by_time))
- except KeyError:
- print("ownership: " + burndown_people_warning)
- def couples_files():
- try:
- write_embeddings("files", args.output, not args.disable_projector,
- *train_embeddings(*reader.get_files_coocc(),
- tmpdir=args.tmpdir))
- except KeyError:
- print(couples_warning)
- def couples_people():
- try:
- write_embeddings("people", args.output, not args.disable_projector,
- *train_embeddings(*reader.get_people_coocc(),
- tmpdir=args.tmpdir))
- except KeyError:
- print(couples_warning)
- def couples_shotness():
- try:
- write_embeddings("shotness", args.output, not args.disable_projector,
- *train_embeddings(*reader.get_shotness_coocc(),
- tmpdir=args.tmpdir))
- except KeyError:
- print(shotness_warning)
- def shotness():
- try:
- data = reader.get_shotness()
- except KeyError:
- print(shotness_warning)
- return
- show_shotness_stats(data)
- def sentiment():
- try:
- data = reader.get_sentiment()
- except KeyError:
- print(sentiment_warning)
- return
- show_sentiment_stats(args, reader.get_name(), args.resample, reader.get_header()[0], data)
- def devs():
- try:
- data = reader.get_devs()
- except KeyError:
- print(devs_warning)
- return
- show_devs(args, reader.get_name(), *reader.get_header(), *data,
- max_people=args.max_people)
- def devs_efforts():
- try:
- data = reader.get_devs()
- except KeyError:
- print(devs_warning)
- return
- show_devs_efforts(args, reader.get_name(), *reader.get_header(), *data,
- max_people=args.max_people)
- def old_vs_new():
- try:
- data = reader.get_devs()
- except KeyError:
- print(devs_warning)
- return
- show_old_vs_new(args, reader.get_name(), *reader.get_header(), *data)
- def languages():
- try:
- data = reader.get_devs()
- except KeyError:
- print(devs_warning)
- return
- show_languages(args, reader.get_name(), *reader.get_header(), *data)
- def devs_parallel():
- try:
- ownership = reader.get_ownership_burndown()
- except KeyError:
- print(burndown_people_warning)
- return
- try:
- couples = reader.get_people_coocc()
- except KeyError:
- print(couples_warning)
- return
- try:
- devs = reader.get_devs()
- except KeyError:
- print(devs_warning)
- return
- show_devs_parallel(args, reader.get_name(), *reader.get_header(),
- load_devs_parallel(ownership, couples, devs, args.max_people))
- modes = {
- "run-times": run_times,
- "burndown-project": project_burndown,
- "burndown-file": files_burndown,
- "burndown-person": people_burndown,
- "overwrites-matrix": overwrites_matrix,
- "ownership": ownership_burndown,
- "couples-files": couples_files,
- "couples-people": couples_people,
- "couples-shotness": couples_shotness,
- "shotness": shotness,
- "sentiment": sentiment,
- "devs": devs,
- "devs-efforts": devs_efforts,
- "old-vs-new": old_vs_new,
- "languages": languages,
- "devs-parallel": devs_parallel,
- }
- if "all" in args.modes:
- all_mode = True
- args.modes = [
- "burndown-project",
- "overwrites-matrix",
- "ownership",
- "couples-files",
- "couples-people",
- "couples-shotness",
- "shotness",
- "devs",
- "devs-efforts",
- ]
- else:
- all_mode = False
- for mode in args.modes:
- if mode not in modes:
- print("Unknown mode: %s" % mode)
- continue
- print("Running: %s" % mode)
- # `args.mode` is required for path determination in the mode functions
- args.mode = ("all" if all_mode else mode)
- try:
- modes[mode]()
- except ImportError as ie:
- print("A module required by the %s mode was not found: %s" % (mode, ie))
- if not all_mode:
- raise
- if web_server.running:
- secs = int(os.getenv("COUPLES_SERVER_TIME", "60"))
- print("Sleeping for %d seconds, safe to Ctrl-C" % secs)
- sys.stdout.flush()
- try:
- time.sleep(secs)
- except KeyboardInterrupt:
- pass
- web_server.stop()
|