1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889 |
- import argparse
- from datetime import datetime, timedelta
- import os
- import re
- import sys
- import yaml
- from matplotlib import pyplot
- import matplotlib.dates as mdates
- import numpy
- import pandas
- def parse_args():
- parser = argparse.ArgumentParser()
- parser.add_argument("input", help="Path to the input YAML file. \"-\" means stdin.")
- parser.add_argument("-o", "--output", help="Output directory. If empty, display the plots.")
- parser.add_argument("-f", "--format", choices=("png", "svg"), default="png",
- help="Output format")
- parser.add_argument("--tick-days", type=int, default=7, help="Ticks interval in days.")
- args = parser.parse_args()
- return args
- def parse_input(file):
- yaml.reader.Reader.NON_PRINTABLE = re.compile(r"(?!x)x")
- try:
- loader = yaml.CLoader
- except AttributeError:
- print("Warning: failed to import yaml.CLoader, falling back to slow yaml.Loader")
- loader = yaml.Loader
- try:
- if file != "-":
- with open(file) as fin:
- return yaml.load(fin, Loader=loader)
- else:
- return yaml.load(sys.stdin, Loader=loader)
- except (UnicodeEncodeError, yaml.reader.ReaderError) as e:
- print("\nInvalid unicode in the input: %s\nPlease filter it through "
- "fix_yaml_unicode.py" % e)
- sys.exit(1)
- def plot_churn(name, data, url, beginTime, endTime, output, fmt, tick_interval):
- days, adds, dels = data["days"], data["additions"], data["removals"]
- dates = [beginTime + timedelta(days=d) for d in days]
- df = pandas.DataFrame(data=list(zip(adds, dels)),
- index=dates,
- columns=("additions", "removals"))
- df["removals"] = -df["removals"]
- df = df.reindex(pandas.date_range(beginTime, endTime, freq="D"))
- effective = df["additions"] + df["removals"]
- effective = effective.cumsum()
- effective.fillna(method="ffill", inplace=True)
- scale = numpy.maximum(df.max(), -df.min()).max()
- effective = effective / effective.max() * scale
- pyplot.figure(figsize=(16, 9))
- for spine in pyplot.gca().spines.values():
- spine.set_visible(False)
- pyplot.gca().xaxis.set_major_locator(mdates.DayLocator(interval=tick_interval))
- pyplot.gca().xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d"))
- pyplot.tick_params(top="off", bottom="off", left="off", right="off", labelleft="off", labelbottom="on")
- pyplot.bar(df.index, df["additions"], label="additions")
- pyplot.bar(df.index, df["removals"], label="removals")
- pyplot.plot(df.index, effective, "black", label="effective")
- pyplot.xticks(rotation="vertical")
- pyplot.legend(loc=2, fontsize=18)
- pyplot.title("%s churn plot, %s" % (name, url), fontsize=24)
- if not output:
- pyplot.show()
- else:
- os.makedirs(output, exist_ok=True)
- pyplot.savefig(os.path.join(output, name.replace("/", "_") + "." + fmt),
- bbox_inches="tight", transparent=True)
- def main():
- args = parse_args()
- data = parse_input(args.input)
- beginTime, endTime = (datetime.fromtimestamp(data["hercules"][t])
- for t in ("begin_unix_time", "end_unix_time"))
- for key, val in data["ChurnAnalysis"].items():
- plot_churn(key, val, data["hercules"]["repository"], beginTime, endTime,
- args.output, args.format, args.tick_days)
- if __name__ == "__main__":
- sys.exit(main())
|