plot_churn.py 3.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. import argparse
  2. from datetime import datetime, timedelta
  3. import os
  4. import re
  5. import sys
  6. import yaml
  7. from matplotlib import pyplot
  8. import matplotlib.dates as mdates
  9. import pandas
  10. def parse_args():
  11. parser = argparse.ArgumentParser()
  12. parser.add_argument("input", help="Path to the input YAML file. \"-\" means stdin.")
  13. parser.add_argument("-o", "--output", help="Output directory. If empty, display the plots.")
  14. parser.add_argument("-f", "--format", choices=("png", "svg"), default="png",
  15. help="Output format")
  16. parser.add_argument("--tick-days", type=int, default=7, help="Ticks interval in days.")
  17. args = parser.parse_args()
  18. return args
  19. def parse_input(file):
  20. yaml.reader.Reader.NON_PRINTABLE = re.compile(r"(?!x)x")
  21. try:
  22. loader = yaml.CLoader
  23. except AttributeError:
  24. print("Warning: failed to import yaml.CLoader, falling back to slow yaml.Loader")
  25. loader = yaml.Loader
  26. try:
  27. if file != "-":
  28. with open(file) as fin:
  29. return yaml.load(fin, Loader=loader)
  30. else:
  31. return yaml.load(sys.stdin, Loader=loader)
  32. except (UnicodeEncodeError, yaml.reader.ReaderError) as e:
  33. print("\nInvalid unicode in the input: %s\nPlease filter it through "
  34. "fix_yaml_unicode.py" % e)
  35. sys.exit(1)
  36. def plot_churn(name, data, url, beginTime, endTime, output, fmt, tick_interval):
  37. days, adds, dels = data["days"], data["additions"], data["removals"]
  38. dates = [beginTime + timedelta(days=d) for d in days]
  39. df = pandas.DataFrame(data=list(zip(adds, dels)),
  40. index=dates,
  41. columns=("additions", "removals"))
  42. df["removals"] = -df["removals"]
  43. df = df.reindex(pandas.date_range(beginTime, endTime, freq="D"))
  44. pyplot.figure(figsize=(16, 9))
  45. for spine in pyplot.gca().spines.values():
  46. spine.set_visible(False)
  47. pyplot.gca().xaxis.set_major_locator(mdates.DayLocator(interval=tick_interval))
  48. pyplot.gca().xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d"))
  49. pyplot.tick_params(top="off", bottom="off", left="off", right="off", labelleft="off", labelbottom="on")
  50. pyplot.bar(df.index, df["additions"], label="additions")
  51. pyplot.bar(df.index, df["removals"], label="removals")
  52. pyplot.xticks(rotation="vertical")
  53. pyplot.legend(loc=1)
  54. pyplot.title("%s churn plot, %s" % (name, url), fontsize=24)
  55. if not output:
  56. pyplot.show()
  57. else:
  58. os.makedirs(output, exist_ok=True)
  59. pyplot.savefig(os.path.join(output, name.replace("/", "_") + "." + fmt),
  60. bbox_inches="tight", transparent=True)
  61. def main():
  62. args = parse_args()
  63. data = parse_input(args.input)
  64. beginTime, endTime = (datetime.fromtimestamp(data["hercules"][t])
  65. for t in ("begin_unix_time", "end_unix_time"))
  66. for key, val in data["ChurnAnalysis"].items():
  67. plot_churn(key, val, data["hercules"]["repository"], beginTime, endTime,
  68. args.output, args.format, args.tick_days)
  69. if __name__ == "__main__":
  70. sys.exit(main())