labours.py 3.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. import argparse
  2. from datetime import datetime, timedelta
  3. import sys
  4. import warnings
  5. import numpy
  6. if sys.version_info[0] < 3:
  7. # OK, ancients, I will support Python 2, but you owe me a beer
  8. input = raw_input
  9. def parse_args():
  10. parser = argparse.ArgumentParser()
  11. parser.add_argument("--output", default="",
  12. help="Path to the output file (empty for display).")
  13. parser.add_argument("--text-size", default=12,
  14. help="Size of the labels and legend.")
  15. parser.add_argument("--backend", help="Matplotlib backend to use.")
  16. parser.add_argument(
  17. "--resample", default="year",
  18. help="The way to resample the time series. Possible values are: "
  19. "\"month\", \"year\", \"no\", \"raw\" and pandas offset aliases ("
  20. "http://pandas.pydata.org/pandas-docs/stable/timeseries.html"
  21. "#offset-aliases).")
  22. args = parser.parse_args()
  23. return args
  24. def main():
  25. args = parse_args()
  26. import matplotlib
  27. if args.backend:
  28. matplotlib.use(args.backend)
  29. import matplotlib.pyplot as pyplot
  30. import pandas
  31. start, granularity, sampling = input().split()
  32. start = datetime.fromtimestamp(int(start))
  33. granularity = int(granularity)
  34. sampling = int(sampling)
  35. matrix = numpy.array([numpy.fromstring(line, dtype=int, sep=" ")
  36. for line in sys.stdin.read().split("\n")[:-1]]).T
  37. date_range_sampling = pandas.date_range(
  38. start, periods=matrix.shape[1], freq="%dD" % sampling)
  39. if args.resample not in ("no", "raw"):
  40. aliases = {
  41. "year": "A",
  42. "month": "M"
  43. }
  44. daily_matrix = numpy.zeros(
  45. (matrix.shape[0] * granularity, matrix.shape[1]),
  46. dtype=numpy.float32)
  47. for i in range(matrix.shape[0]):
  48. daily_matrix[i * granularity:(i + 1) * granularity] = \
  49. matrix[i] / granularity
  50. date_range_granularity = pandas.date_range(
  51. start, periods=daily_matrix.shape[0], freq="1D")
  52. df = pandas.DataFrame({
  53. dr: pandas.Series(row, index=date_range_sampling)
  54. for dr, row in zip(date_range_granularity, daily_matrix)
  55. }).T
  56. df = df.resample(aliases.get(args.resample, args.resample)).sum()
  57. matrix = df.as_matrix()
  58. if args.resample in ("year", "A"):
  59. labels = [dt.year for dt in df.index]
  60. elif args.resample in ("month", "M"):
  61. labels = [dt.strftime("%Y %B") for dt in df.index]
  62. else:
  63. labels = [dt.date() for dt in df.index]
  64. else:
  65. labels = [
  66. "%s - %s" % ((start + timedelta(days=i * granularity)).date(),
  67. (start + timedelta(days=(i + 1) * granularity)).date())
  68. for i in range(matrix.shape[0])]
  69. if len(labels) > 18:
  70. warnings.warn("Too many labels - consider resampling.")
  71. pyplot.stackplot(date_range_sampling, matrix, labels=labels)
  72. pyplot.legend(loc=2, fontsize=args.text_size)
  73. pyplot.ylabel("Lines of code", fontsize=args.text_size)
  74. pyplot.xlabel("Time", fontsize=args.text_size)
  75. pyplot.tick_params(labelsize=args.text_size)
  76. pyplot.xlim(date_range_sampling[0], date_range_sampling[-1])
  77. pyplot.gcf().set_size_inches(12, 9)
  78. if not args.output:
  79. pyplot.gcf().canvas.set_window_title(
  80. "Hercules %d x %d (granularity %d, sampling %d)" %
  81. (matrix.shape + (granularity, sampling)))
  82. pyplot.show()
  83. else:
  84. pyplot.tight_layout()
  85. pyplot.savefig(args.output, transparent=True)
  86. if __name__ == "__main__":
  87. sys.exit(main())