labours.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. import argparse
  2. from datetime import datetime, timedelta
  3. import sys
  4. import warnings
  5. import numpy
  6. if sys.version_info[0] < 3:
  7. # OK, ancients, I will support Python 2, but you owe me a beer
  8. input = raw_input
  9. def parse_args():
  10. parser = argparse.ArgumentParser()
  11. parser.add_argument("--output", default="",
  12. help="Path to the output file (empty for display).")
  13. parser.add_argument("--text-size", default=12,
  14. help="Size of the labels and legend.")
  15. parser.add_argument("--backend", help="Matplotlib backend to use.")
  16. parser.add_argument("--style", choices=["black", "white"], default="black",
  17. help="Plot's general color scheme.")
  18. parser.add_argument("--relative", action="store_true",
  19. help="Occupy 100% height for every measurement.")
  20. parser.add_argument(
  21. "--resample", default="year",
  22. help="The way to resample the time series. Possible values are: "
  23. "\"month\", \"year\", \"no\", \"raw\" and pandas offset aliases ("
  24. "http://pandas.pydata.org/pandas-docs/stable/timeseries.html"
  25. "#offset-aliases).")
  26. args = parser.parse_args()
  27. return args
  28. def main():
  29. args = parse_args()
  30. import matplotlib
  31. if args.backend:
  32. matplotlib.use(args.backend)
  33. import matplotlib.pyplot as pyplot
  34. import pandas
  35. start, granularity, sampling = input().split()
  36. start = datetime.fromtimestamp(int(start))
  37. granularity = int(granularity)
  38. sampling = int(sampling)
  39. matrix = numpy.array([numpy.fromstring(line, dtype=int, sep=" ")
  40. for line in sys.stdin.read().split("\n")[:-1]]).T
  41. date_range_sampling = pandas.date_range(
  42. start + timedelta(days=sampling), periods=matrix.shape[1],
  43. freq="%dD" % sampling)
  44. if args.resample not in ("no", "raw"):
  45. aliases = {
  46. "year": "A",
  47. "month": "M"
  48. }
  49. args.resample = aliases.get(args.resample, args.resample)
  50. daily_matrix = numpy.zeros(
  51. (matrix.shape[0] * granularity, matrix.shape[1]),
  52. dtype=numpy.float32)
  53. daily_start = 1 if "M" in args.resample else 0
  54. for i in range(daily_start, matrix.shape[0]):
  55. daily_matrix[i * granularity:(i + 1) * granularity] = \
  56. matrix[i] / granularity
  57. date_range_granularity = pandas.date_range(
  58. start, periods=daily_matrix.shape[0], freq="1D")
  59. df = pandas.DataFrame({
  60. dr: pandas.Series(row, index=date_range_sampling)
  61. for dr, row in zip(date_range_granularity, daily_matrix)
  62. }).T
  63. df = df.resample(args.resample).sum()
  64. if "M" in args.resample:
  65. row0 = matrix[0]
  66. matrix = df.as_matrix()
  67. if "M" in args.resample:
  68. matrix[0] = row0
  69. for i in range(1, min(*matrix.shape)):
  70. matrix[i, i] += matrix[i, :i].sum()
  71. matrix[i, :i] = 0
  72. if args.resample in ("year", "A"):
  73. labels = [dt.year for dt in df.index]
  74. elif args.resample in ("month", "M"):
  75. labels = [dt.strftime("%Y %B") for dt in df.index]
  76. else:
  77. labels = [dt.date() for dt in df.index]
  78. else:
  79. labels = [
  80. "%s - %s" % ((start + timedelta(days=i * granularity)).date(),
  81. (start + timedelta(days=(i + 1) * granularity)).date())
  82. for i in range(matrix.shape[0])]
  83. if len(labels) > 18:
  84. warnings.warn("Too many labels - consider resampling.")
  85. args.resample = "M"
  86. if args.style == "white":
  87. pyplot.gca().spines["bottom"].set_color("white")
  88. pyplot.gca().spines["top"].set_color("white")
  89. pyplot.gca().spines["left"].set_color("white")
  90. pyplot.gca().spines["right"].set_color("white")
  91. pyplot.gca().xaxis.label.set_color("white")
  92. pyplot.gca().yaxis.label.set_color("white")
  93. pyplot.gca().tick_params(axis="x", colors="white")
  94. pyplot.gca().tick_params(axis="y", colors="white")
  95. if args.relative:
  96. for i in range(matrix.shape[1]):
  97. matrix[:, i] /= matrix[:, i].sum()
  98. pyplot.ylim(0, 1)
  99. legend_loc = 3
  100. else:
  101. legend_loc = 2
  102. pyplot.stackplot(date_range_sampling, matrix, labels=labels)
  103. legend = pyplot.legend(loc=legend_loc, fontsize=args.text_size)
  104. frame = legend.get_frame()
  105. frame.set_facecolor("black" if args.style == "white" else "white")
  106. frame.set_edgecolor("black" if args.style == "white" else "white")
  107. for text in legend.get_texts():
  108. text.set_color(args.style)
  109. pyplot.ylabel("Lines of code", fontsize=args.text_size)
  110. pyplot.xlabel("Time", fontsize=args.text_size)
  111. pyplot.tick_params(labelsize=args.text_size)
  112. pyplot.xlim(date_range_sampling[0], date_range_sampling[-1])
  113. pyplot.gcf().set_size_inches(12, 9)
  114. locator = pyplot.gca().xaxis.get_major_locator()
  115. # set the optimal xticks locator
  116. if "M" not in args.resample:
  117. pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
  118. locs = pyplot.gca().get_xticks().tolist()
  119. if len(locs) >= 16:
  120. pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
  121. locs = pyplot.gca().get_xticks().tolist()
  122. if len(locs) >= 16:
  123. pyplot.gca().xaxis.set_major_locator(locator)
  124. if locs[0] < pyplot.xlim()[0]:
  125. del locs[0]
  126. endindex = -1
  127. if len(locs) >= 2 and \
  128. pyplot.xlim()[1] - locs[-1] >= (locs[-1] - locs[-2]) / 2:
  129. locs.append(pyplot.xlim()[1])
  130. endindex = len(locs) - 1
  131. startindex = -1
  132. if len(locs) >= 2 and \
  133. locs[0] - pyplot.xlim()[0] >= (locs[1] - locs[0]) / 2:
  134. locs.append(pyplot.xlim()[0])
  135. startindex = len(locs) - 1
  136. pyplot.gca().set_xticks(locs)
  137. # hacking time!
  138. labels = pyplot.gca().get_xticklabels()
  139. if startindex >= 0:
  140. if "M" in args.resample:
  141. labels[startindex].set_text(date_range_sampling[0].date())
  142. labels[startindex].set_text = lambda _: None
  143. labels[startindex].set_rotation(30)
  144. labels[startindex].set_ha("right")
  145. if endindex >= 0:
  146. if "M" in args.resample:
  147. labels[endindex].set_text(date_range_sampling[-1].date())
  148. labels[endindex].set_text = lambda _: None
  149. labels[endindex].set_rotation(30)
  150. labels[endindex].set_ha("right")
  151. if not args.output:
  152. pyplot.gcf().canvas.set_window_title(
  153. "Hercules %d x %d (granularity %d, sampling %d)" %
  154. (matrix.shape + (granularity, sampling)))
  155. pyplot.show()
  156. else:
  157. pyplot.tight_layout()
  158. pyplot.savefig(args.output, transparent=True)
  159. if __name__ == "__main__":
  160. sys.exit(main())