labours.py 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. import argparse
  2. from datetime import datetime, timedelta
  3. import sys
  4. import warnings
  5. import numpy
  6. if sys.version_info[0] < 3:
  7. # OK, ancients, I will support Python 2, but you owe me a beer
  8. input = raw_input
  9. def parse_args():
  10. parser = argparse.ArgumentParser()
  11. parser.add_argument("--output", default="",
  12. help="Path to the output file (empty for display).")
  13. parser.add_argument("--input", default="-",
  14. help="Path to the input file (- for stdin).")
  15. parser.add_argument("--text-size", default=12, type=int,
  16. help="Size of the labels and legend.")
  17. parser.add_argument("--backend", help="Matplotlib backend to use.")
  18. parser.add_argument("--style", choices=["black", "white"], default="black",
  19. help="Plot's general color scheme.")
  20. parser.add_argument("--relative", action="store_true",
  21. help="Occupy 100%% height for every measurement.")
  22. parser.add_argument(
  23. "--resample", default="year",
  24. help="The way to resample the time series. Possible values are: "
  25. "\"month\", \"year\", \"no\", \"raw\" and pandas offset aliases ("
  26. "http://pandas.pydata.org/pandas-docs/stable/timeseries.html"
  27. "#offset-aliases).")
  28. args = parser.parse_args()
  29. return args
  30. def calculate_average_lifetime(matrix):
  31. lifetimes = numpy.zeros(matrix.shape[1] - 1)
  32. for band in matrix:
  33. start = 0
  34. for i, line in enumerate(band):
  35. if i == 0 or band[i - 1] == 0:
  36. start += 1
  37. continue
  38. lifetimes[i - start] = band[i - 1] - line
  39. lifetimes[i - start] = band[i - 1]
  40. return (lifetimes.dot(numpy.arange(1, matrix.shape[1], 1))
  41. / (lifetimes.sum() * matrix.shape[1]))
  42. def load_matrix(args):
  43. import pandas
  44. if args.input != "-":
  45. with open(args.input) as fin:
  46. header = fin.readline()[:-1]
  47. contents = fin.read()
  48. else:
  49. header = input()
  50. contents = sys.stdin.read()
  51. start, last, granularity, sampling = header.split()
  52. start = datetime.fromtimestamp(int(start))
  53. last = datetime.fromtimestamp(int(last))
  54. granularity = int(granularity)
  55. sampling = int(sampling)
  56. matrix = numpy.array([numpy.fromstring(line, dtype=int, sep=" ")
  57. for line in contents.split("\n")[:-1]]).T
  58. print("Lifetime index:", calculate_average_lifetime(matrix))
  59. finish = start + timedelta(days=matrix.shape[1] * sampling)
  60. if args.resample not in ("no", "raw"):
  61. # Interpolate the day x day matrix.
  62. # Each day brings equal weight in the granularity.
  63. # Sampling's interpolation is linear.
  64. daily_matrix = numpy.zeros(
  65. (matrix.shape[0] * granularity, matrix.shape[1] * sampling),
  66. dtype=numpy.float32)
  67. epsrange = numpy.arange(0, 1, 1.0 / sampling)
  68. for y in range(matrix.shape[0]):
  69. for x in range(matrix.shape[1]):
  70. previous = matrix[y, x - 1] if x > 0 else 0
  71. value = ((previous + (matrix[y, x] - previous) * epsrange)
  72. / granularity)[numpy.newaxis, :]
  73. if (y + 1) * granularity <= x * sampling:
  74. daily_matrix[y * granularity:(y + 1) * granularity,
  75. x * sampling:(x + 1) * sampling] = value
  76. elif y * granularity <= (x + 1) * sampling:
  77. for suby in range(y * granularity, (y + 1) * granularity):
  78. for subx in range(suby, (x + 1) * sampling):
  79. daily_matrix[suby, subx] = matrix[
  80. y, x] / granularity
  81. daily_matrix[(last - start).days:] = 0
  82. # Resample the bands
  83. aliases = {
  84. "year": "A",
  85. "month": "M"
  86. }
  87. args.resample = aliases.get(args.resample, args.resample)
  88. periods = 0
  89. date_granularity_sampling = [start]
  90. while date_granularity_sampling[-1] < finish:
  91. periods += 1
  92. date_granularity_sampling = pandas.date_range(
  93. start, periods=periods, freq=args.resample)
  94. date_range_sampling = pandas.date_range(
  95. date_granularity_sampling[0],
  96. periods=(finish - date_granularity_sampling[0]).days,
  97. freq="1D")
  98. # Fill the new square matrix
  99. matrix = numpy.zeros(
  100. (len(date_granularity_sampling), len(date_range_sampling)),
  101. dtype=numpy.float32)
  102. for i, gdt in enumerate(date_granularity_sampling):
  103. istart = (date_granularity_sampling[i - 1] - start).days \
  104. if i > 0 else 0
  105. ifinish = (gdt - start).days
  106. for j, sdt in enumerate(date_range_sampling):
  107. if (sdt - start).days >= istart:
  108. break
  109. matrix[i, j:] = \
  110. daily_matrix[istart:ifinish, (sdt - start).days:].sum(axis=0)
  111. # Hardcode some cases to improve labels' readability
  112. if args.resample in ("year", "A"):
  113. labels = [dt.year for dt in date_granularity_sampling]
  114. elif args.resample in ("month", "M"):
  115. labels = [dt.strftime("%Y %B") for dt in date_granularity_sampling]
  116. else:
  117. labels = [dt.date() for dt in date_granularity_sampling]
  118. else:
  119. labels = [
  120. "%s - %s" % ((start + timedelta(days=i * granularity)).date(),
  121. (
  122. start + timedelta(days=(i + 1) * granularity)).date())
  123. for i in range(matrix.shape[0])]
  124. if len(labels) > 18:
  125. warnings.warn("Too many labels - consider resampling.")
  126. args.resample = "M" # fake resampling type is checked while plotting
  127. date_range_sampling = pandas.date_range(
  128. start + timedelta(days=sampling), periods=matrix.shape[1],
  129. freq="%dD" % sampling)
  130. return matrix, date_range_sampling, labels, granularity, sampling
  131. def plot_matrix(args, matrix, date_range_sampling, labels, granularity,
  132. sampling):
  133. import matplotlib
  134. if args.backend:
  135. matplotlib.use(args.backend)
  136. import matplotlib.pyplot as pyplot
  137. if args.style == "white":
  138. pyplot.gca().spines["bottom"].set_color("white")
  139. pyplot.gca().spines["top"].set_color("white")
  140. pyplot.gca().spines["left"].set_color("white")
  141. pyplot.gca().spines["right"].set_color("white")
  142. pyplot.gca().xaxis.label.set_color("white")
  143. pyplot.gca().yaxis.label.set_color("white")
  144. pyplot.gca().tick_params(axis="x", colors="white")
  145. pyplot.gca().tick_params(axis="y", colors="white")
  146. if args.relative:
  147. for i in range(matrix.shape[1]):
  148. matrix[:, i] /= matrix[:, i].sum()
  149. pyplot.ylim(0, 1)
  150. legend_loc = 3
  151. else:
  152. legend_loc = 2
  153. pyplot.stackplot(date_range_sampling, matrix, labels=labels)
  154. legend = pyplot.legend(loc=legend_loc, fontsize=args.text_size)
  155. frame = legend.get_frame()
  156. frame.set_facecolor("black" if args.style == "white" else "white")
  157. frame.set_edgecolor("black" if args.style == "white" else "white")
  158. for text in legend.get_texts():
  159. text.set_color(args.style)
  160. pyplot.ylabel("Lines of code", fontsize=args.text_size)
  161. pyplot.xlabel("Time", fontsize=args.text_size)
  162. pyplot.tick_params(labelsize=args.text_size)
  163. pyplot.xlim(date_range_sampling[0], date_range_sampling[-1])
  164. pyplot.gcf().set_size_inches(12, 9)
  165. locator = pyplot.gca().xaxis.get_major_locator()
  166. # set the optimal xticks locator
  167. if "M" not in args.resample:
  168. pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
  169. locs = pyplot.gca().get_xticks().tolist()
  170. if len(locs) >= 16:
  171. pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
  172. locs = pyplot.gca().get_xticks().tolist()
  173. if len(locs) >= 16:
  174. pyplot.gca().xaxis.set_major_locator(locator)
  175. if locs[0] < pyplot.xlim()[0]:
  176. del locs[0]
  177. endindex = -1
  178. if len(locs) >= 2 and \
  179. pyplot.xlim()[1] - locs[-1] > (locs[-1] - locs[-2]) / 2:
  180. locs.append(pyplot.xlim()[1])
  181. endindex = len(locs) - 1
  182. startindex = -1
  183. if len(locs) >= 2 and \
  184. locs[0] - pyplot.xlim()[0] > (locs[1] - locs[0]) / 2:
  185. locs.append(pyplot.xlim()[0])
  186. startindex = len(locs) - 1
  187. pyplot.gca().set_xticks(locs)
  188. # hacking time!
  189. labels = pyplot.gca().get_xticklabels()
  190. if startindex >= 0:
  191. labels[startindex].set_text(date_range_sampling[0].date())
  192. labels[startindex].set_text = lambda _: None
  193. labels[startindex].set_rotation(30)
  194. labels[startindex].set_ha("right")
  195. if endindex >= 0:
  196. labels[endindex].set_text(date_range_sampling[-1].date())
  197. labels[endindex].set_text = lambda _: None
  198. labels[endindex].set_rotation(30)
  199. labels[endindex].set_ha("right")
  200. if not args.output:
  201. pyplot.gcf().canvas.set_window_title(
  202. "Hercules %d x %d (granularity %d, sampling %d)" %
  203. (matrix.shape + (granularity, sampling)))
  204. pyplot.show()
  205. else:
  206. pyplot.tight_layout()
  207. pyplot.savefig(args.output, transparent=True)
  208. def main():
  209. args = parse_args()
  210. plot_matrix(args, *load_matrix(args))
  211. if __name__ == "__main__":
  212. sys.exit(main())