labours.py 3.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. import argparse
  2. from datetime import datetime, timedelta
  3. import sys
  4. import warnings
  5. import numpy
  6. if sys.version_info[0] < 3:
  7. # OK, ancients, I will support Python 2, but you owe me a beer
  8. input = raw_input
  9. def parse_args():
  10. parser = argparse.ArgumentParser()
  11. parser.add_argument("--output", default="",
  12. help="Path to the output file (empty for display).")
  13. parser.add_argument("--text-size", default=12,
  14. help="Size of the labels and legend.")
  15. parser.add_argument("--backend", help="Matplotlib backend to use.")
  16. parser.add_argument(
  17. "--resample", default="year",
  18. help="The way to resample the time series. Possible values are: "
  19. "\"month\", \"year\", \"no\", \"raw\" and pandas offset aliases ("
  20. "http://pandas.pydata.org/pandas-docs/stable/timeseries.html"
  21. "#offset-aliases).")
  22. args = parser.parse_args()
  23. return args
  24. def main():
  25. args = parse_args()
  26. import matplotlib
  27. if args.backend:
  28. matplotlib.use(args.backend)
  29. import matplotlib.pyplot as pyplot
  30. import pandas
  31. start, granularity, sampling = input().split()
  32. start = datetime.fromtimestamp(int(start))
  33. granularity = int(granularity)
  34. sampling = int(sampling)
  35. matrix = numpy.array([numpy.fromstring(line, dtype=int, sep=" ")
  36. for line in sys.stdin.read().split("\n")[:-1]]).T
  37. date_range_granularity = pandas.date_range(
  38. start, periods=matrix.shape[0], freq="%dD" % granularity)
  39. date_range_sampling = pandas.date_range(
  40. start, periods=matrix.shape[1], freq="%dD" % sampling)
  41. df = pandas.DataFrame({
  42. dr: pandas.Series(row, index=date_range_sampling)
  43. for dr, row in zip(date_range_granularity, matrix)
  44. }).T
  45. if args.resample not in ("no", "raw"):
  46. aliases = {
  47. "year": "A",
  48. "month": "M"
  49. }
  50. df = df.resample(aliases.get(args.resample, args.resample)).mean()
  51. matrix = df.as_matrix()
  52. if args.resample in ("year", "A"):
  53. labels = [dt.year for dt in df.index]
  54. elif args.resample in ("month", "M"):
  55. labels = [dt.strftime("%Y %B") for dt in df.index]
  56. else:
  57. labels = [dt.date() for dt in df.index]
  58. else:
  59. labels = [
  60. "%s - %s" % ((start + timedelta(days=i * granularity)).date(),
  61. (start + timedelta(days=(i + 1) * granularity)).date())
  62. for i in range(matrix.shape[0])]
  63. if len(labels) > 18:
  64. warnings.warn("Too many labels - consider resampling.")
  65. pyplot.stackplot(date_range_sampling, matrix, labels=labels)
  66. pyplot.legend(loc=2, fontsize=args.text_size)
  67. pyplot.ylabel("Lines of code", fontsize=args.text_size)
  68. pyplot.ylabel("Time", fontsize=args.text_size)
  69. pyplot.tick_params(labelsize=args.text_size)
  70. pyplot.gcf().set_size_inches(12, 9)
  71. if not args.output:
  72. pyplot.gcf().canvas.set_window_title(
  73. "Hercules %d x %d (granularity %d, sampling %d)" %
  74. (matrix.shape + (granularity, sampling)))
  75. pyplot.show()
  76. else:
  77. pyplot.tight_layout()
  78. pyplot.savefig(args.output, transparent=True)
  79. if __name__ == "__main__":
  80. sys.exit(main())