labours.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686
  1. import argparse
  2. from datetime import datetime, timedelta
  3. import io
  4. import os
  5. import re
  6. import sys
  7. import tempfile
  8. import threading
  9. import time
  10. import warnings
  11. try:
  12. from clint.textui import progress
  13. except ImportError:
  14. print("Warning: clint is not installed, no fancy progressbars in the terminal for you.")
  15. progress = None
  16. import numpy
  17. import yaml
  18. if sys.version_info[0] < 3:
  19. # OK, ancients, I will support Python 2, but you owe me a beer
  20. input = raw_input
  21. def parse_args():
  22. parser = argparse.ArgumentParser()
  23. parser.add_argument("-o", "--output", default="",
  24. help="Path to the output file/directory (empty for display).")
  25. parser.add_argument("-i", "--input", default="-",
  26. help="Path to the input file (- for stdin).")
  27. parser.add_argument("--text-size", default=12, type=int,
  28. help="Size of the labels and legend.")
  29. parser.add_argument("--backend", help="Matplotlib backend to use.")
  30. parser.add_argument("--style", choices=["black", "white"], default="black",
  31. help="Plot's general color scheme.")
  32. parser.add_argument("--size", help="Axes' size in inches, for example \"12,9\"")
  33. parser.add_argument("--relative", action="store_true",
  34. help="Occupy 100%% height for every measurement.")
  35. parser.add_argument("--couples-tmp-dir", help="Temporary directory to work with couples.")
  36. parser.add_argument("-m", "--mode",
  37. choices=["project", "file", "person", "churn_matrix", "people", "couples",
  38. "all"],
  39. default="project", help="What to plot.")
  40. parser.add_argument(
  41. "--resample", default="year",
  42. help="The way to resample the time series. Possible values are: "
  43. "\"month\", \"year\", \"no\", \"raw\" and pandas offset aliases ("
  44. "http://pandas.pydata.org/pandas-docs/stable/timeseries.html"
  45. "#offset-aliases).")
  46. parser.add_argument("--disable-projector", action="store_true",
  47. help="Do not run Tensorflow Projector on couples.")
  48. parser.add_argument("--max-people", default=20, type=int,
  49. help="Maximum number of developers in churn matrix and people plots.")
  50. args = parser.parse_args()
  51. return args
  52. def read_input(args):
  53. sys.stdout.write("Reading the input... ")
  54. sys.stdout.flush()
  55. yaml.reader.Reader.NON_PRINTABLE = re.compile(r"(?!x)x")
  56. try:
  57. loader = yaml.CLoader
  58. except AttributeError:
  59. print("Warning: failed to import yaml.CLoader, falling back to slow yaml.Loader")
  60. loader = yaml.Loader
  61. try:
  62. if args.input != "-":
  63. with open(args.input) as fin:
  64. data = yaml.load(fin, Loader=loader)
  65. else:
  66. data = yaml.load(sys.stdin, Loader=loader)
  67. except (UnicodeEncodeError, yaml.reader.ReaderError) as e:
  68. print("\nInvalid unicode in the input: %s\nPlease filter it through fix_yaml_unicode.py" %
  69. e)
  70. sys.exit(1)
  71. print("done")
  72. return data["burndown"], data["project"], data.get("files"), data.get("people_sequence"), \
  73. data.get("people"), data.get("people_interaction"), data.get("files_coocc"), \
  74. data.get("people_coocc")
  75. def calculate_average_lifetime(matrix):
  76. lifetimes = numpy.zeros(matrix.shape[1] - 1)
  77. for band in matrix:
  78. start = 0
  79. for i, line in enumerate(band):
  80. if i == 0 or band[i - 1] == 0:
  81. start += 1
  82. continue
  83. lifetimes[i - start] = band[i - 1] - line
  84. lifetimes[i - start] = band[i - 1]
  85. return (lifetimes.dot(numpy.arange(1, matrix.shape[1], 1))
  86. / (lifetimes.sum() * matrix.shape[1]))
  87. def load_main(header, name, matrix, resample):
  88. import pandas
  89. start = header["begin"]
  90. last = header["end"]
  91. granularity = header["granularity"]
  92. sampling = header["sampling"]
  93. start = datetime.fromtimestamp(int(start))
  94. last = datetime.fromtimestamp(int(last))
  95. granularity = int(granularity)
  96. sampling = int(sampling)
  97. matrix = numpy.array([numpy.fromstring(line, dtype=int, sep=" ")
  98. for line in matrix.split("\n")]).T
  99. print(name, "lifetime index:", calculate_average_lifetime(matrix))
  100. finish = start + timedelta(days=matrix.shape[1] * sampling)
  101. if resample not in ("no", "raw"):
  102. # Interpolate the day x day matrix.
  103. # Each day brings equal weight in the granularity.
  104. # Sampling's interpolation is linear.
  105. daily_matrix = numpy.zeros(
  106. (matrix.shape[0] * granularity, matrix.shape[1] * sampling),
  107. dtype=numpy.float32)
  108. epsrange = numpy.arange(0, 1, 1.0 / sampling)
  109. for y in range(matrix.shape[0]):
  110. for x in range(matrix.shape[1]):
  111. previous = matrix[y, x - 1] if x > 0 else 0
  112. value = ((previous + (matrix[y, x] - previous) * epsrange)
  113. / granularity)[numpy.newaxis, :]
  114. if (y + 1) * granularity <= x * sampling:
  115. daily_matrix[y * granularity:(y + 1) * granularity,
  116. x * sampling:(x + 1) * sampling] = value
  117. elif y * granularity <= (x + 1) * sampling:
  118. for suby in range(y * granularity, (y + 1) * granularity):
  119. for subx in range(suby, (x + 1) * sampling):
  120. daily_matrix[suby, subx] = matrix[
  121. y, x] / granularity
  122. daily_matrix[(last - start).days:] = 0
  123. # Resample the bands
  124. aliases = {
  125. "year": "A",
  126. "month": "M"
  127. }
  128. resample = aliases.get(resample, resample)
  129. periods = 0
  130. date_granularity_sampling = [start]
  131. while date_granularity_sampling[-1] < finish:
  132. periods += 1
  133. date_granularity_sampling = pandas.date_range(
  134. start, periods=periods, freq=resample)
  135. date_range_sampling = pandas.date_range(
  136. date_granularity_sampling[0],
  137. periods=(finish - date_granularity_sampling[0]).days,
  138. freq="1D")
  139. # Fill the new square matrix
  140. matrix = numpy.zeros(
  141. (len(date_granularity_sampling), len(date_range_sampling)),
  142. dtype=numpy.float32)
  143. for i, gdt in enumerate(date_granularity_sampling):
  144. istart = (date_granularity_sampling[i - 1] - start).days \
  145. if i > 0 else 0
  146. ifinish = (gdt - start).days
  147. for j, sdt in enumerate(date_range_sampling):
  148. if (sdt - start).days >= istart:
  149. break
  150. matrix[i, j:] = \
  151. daily_matrix[istart:ifinish, (sdt - start).days:].sum(axis=0)
  152. # Hardcode some cases to improve labels" readability
  153. if resample in ("year", "A"):
  154. labels = [dt.year for dt in date_granularity_sampling]
  155. elif resample in ("month", "M"):
  156. labels = [dt.strftime("%Y %B") for dt in date_granularity_sampling]
  157. else:
  158. labels = [dt.date() for dt in date_granularity_sampling]
  159. else:
  160. labels = [
  161. "%s - %s" % ((start + timedelta(days=i * granularity)).date(),
  162. (
  163. start + timedelta(days=(i + 1) * granularity)).date())
  164. for i in range(matrix.shape[0])]
  165. if len(labels) > 18:
  166. warnings.warn("Too many labels - consider resampling.")
  167. resample = "M" # fake resampling type is checked while plotting
  168. date_range_sampling = pandas.date_range(
  169. start + timedelta(days=sampling), periods=matrix.shape[1],
  170. freq="%dD" % sampling)
  171. return name, matrix, date_range_sampling, labels, granularity, sampling, resample
  172. def load_churn_matrix(contents):
  173. matrix = numpy.array([numpy.fromstring(line, dtype=int, sep=" ")
  174. for line in contents.split("\n")])
  175. return matrix
  176. def load_people(header, sequence, contents):
  177. import pandas
  178. start = header["begin"]
  179. last = header["end"]
  180. sampling = header["sampling"]
  181. start = datetime.fromtimestamp(int(start))
  182. last = datetime.fromtimestamp(int(last))
  183. sampling = int(sampling)
  184. people = []
  185. for name in sequence:
  186. people.append(numpy.array([numpy.fromstring(line, dtype=int, sep=" ")
  187. for line in contents[name].split("\n")]).sum(axis=1))
  188. people = numpy.array(people)
  189. date_range_sampling = pandas.date_range(
  190. start + timedelta(days=sampling), periods=people[0].shape[0],
  191. freq="%dD" % sampling)
  192. return sequence, people, date_range_sampling, last
  193. def apply_plot_style(figure, axes, legend, style, text_size, axes_size):
  194. if axes_size is None:
  195. axes_size = (12, 9)
  196. else:
  197. axes_size = tuple(float(p) for p in axes_size.split(","))
  198. figure.set_size_inches(*axes_size)
  199. for side in ("bottom", "top", "left", "right"):
  200. axes.spines[side].set_color(style)
  201. for axis in (axes.xaxis, axes.yaxis):
  202. axis.label.update(dict(fontsize=text_size, color=style))
  203. for axis in ("x", "y"):
  204. axes.tick_params(axis=axis, colors=style, labelsize=text_size)
  205. if legend is not None:
  206. frame = legend.get_frame()
  207. for setter in (frame.set_facecolor, frame.set_edgecolor):
  208. setter("black" if style == "white" else "white")
  209. for text in legend.get_texts():
  210. text.set_color(style)
  211. def get_plot_path(base, name):
  212. root, ext = os.path.splitext(base)
  213. if not ext:
  214. ext = ".png"
  215. output = os.path.join(root, name + ext)
  216. os.makedirs(os.path.dirname(output), exist_ok=True)
  217. return output
  218. def deploy_plot(title, output, style):
  219. import matplotlib.pyplot as pyplot
  220. if not output:
  221. pyplot.gcf().canvas.set_window_title(title)
  222. pyplot.show()
  223. else:
  224. if title:
  225. pyplot.title(title, color=style)
  226. try:
  227. pyplot.tight_layout()
  228. except:
  229. print("Warning: failed to set the tight layout")
  230. pyplot.savefig(output, transparent=True)
  231. pyplot.clf()
  232. def plot_burndown(args, target, name, matrix, date_range_sampling, labels, granularity,
  233. sampling, resample):
  234. import matplotlib
  235. if args.backend:
  236. matplotlib.use(args.backend)
  237. import matplotlib.pyplot as pyplot
  238. pyplot.stackplot(date_range_sampling, matrix, labels=labels)
  239. if args.relative:
  240. for i in range(matrix.shape[1]):
  241. matrix[:, i] /= matrix[:, i].sum()
  242. pyplot.ylim(0, 1)
  243. legend_loc = 3
  244. else:
  245. legend_loc = 2
  246. legend = pyplot.legend(loc=legend_loc, fontsize=args.text_size)
  247. pyplot.ylabel("Lines of code")
  248. pyplot.xlabel("Time")
  249. apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.style, args.text_size, args.size)
  250. pyplot.xlim(date_range_sampling[0], date_range_sampling[-1])
  251. locator = pyplot.gca().xaxis.get_major_locator()
  252. # set the optimal xticks locator
  253. if "M" not in resample:
  254. pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
  255. locs = pyplot.gca().get_xticks().tolist()
  256. if len(locs) >= 16:
  257. pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
  258. locs = pyplot.gca().get_xticks().tolist()
  259. if len(locs) >= 16:
  260. pyplot.gca().xaxis.set_major_locator(locator)
  261. if locs[0] < pyplot.xlim()[0]:
  262. del locs[0]
  263. endindex = -1
  264. if len(locs) >= 2 and \
  265. pyplot.xlim()[1] - locs[-1] > (locs[-1] - locs[-2]) / 2:
  266. locs.append(pyplot.xlim()[1])
  267. endindex = len(locs) - 1
  268. startindex = -1
  269. if len(locs) >= 2 and \
  270. locs[0] - pyplot.xlim()[0] > (locs[1] - locs[0]) / 2:
  271. locs.append(pyplot.xlim()[0])
  272. startindex = len(locs) - 1
  273. pyplot.gca().set_xticks(locs)
  274. # hacking time!
  275. labels = pyplot.gca().get_xticklabels()
  276. if startindex >= 0:
  277. labels[startindex].set_text(date_range_sampling[0].date())
  278. labels[startindex].set_text = lambda _: None
  279. labels[startindex].set_rotation(30)
  280. labels[startindex].set_ha("right")
  281. if endindex >= 0:
  282. labels[endindex].set_text(date_range_sampling[-1].date())
  283. labels[endindex].set_text = lambda _: None
  284. labels[endindex].set_rotation(30)
  285. labels[endindex].set_ha("right")
  286. title = "%s %d x %d (granularity %d, sampling %d)" % \
  287. ((name,) + matrix.shape + (granularity, sampling))
  288. output = args.output
  289. if output:
  290. if args.mode == "project" and target == "project":
  291. output = args.output
  292. else:
  293. if target == "project":
  294. name = "project"
  295. output = get_plot_path(args.output, name)
  296. deploy_plot(title, output, args.style)
  297. def plot_many(args, target, header, parts):
  298. if not args.output:
  299. print("Warning: output not set, showing %d plots." % len(parts))
  300. itercnt = progress.bar(parts.items(), expected_size=len(parts)) \
  301. if progress is not None else parts.items()
  302. stdout = io.StringIO()
  303. for name, matrix in itercnt:
  304. backup = sys.stdout
  305. sys.stdout = stdout
  306. plot_burndown(args, target, *load_main(header, name, matrix, args.resample))
  307. sys.stdout = backup
  308. sys.stdout.write(stdout.getvalue())
  309. def plot_churn_matrix(args, repo, people, matrix):
  310. matrix = matrix.astype(float)
  311. if matrix.shape[0] > args.max_people:
  312. order = numpy.argsort(-matrix[:, 0])
  313. matrix = matrix[order[:args.max_people]][:, [0, 1] + list(2 + order[:args.max_people])]
  314. people = [people[i] for i in order[:args.max_people]]
  315. print("Warning: truncated people to most productive %d" % args.max_people)
  316. zeros = matrix[:, 0] == 0
  317. matrix[zeros, :] = 1
  318. matrix /= matrix[:, 0][:, None]
  319. matrix = -matrix[:, 1:]
  320. matrix[zeros, :] = 0
  321. for i, name in enumerate(people):
  322. if len(name) > 40:
  323. people[i] = name[:37] + "..."
  324. import matplotlib
  325. if args.backend:
  326. matplotlib.use(args.backend)
  327. import matplotlib.pyplot as pyplot
  328. s = 4 + matrix.shape[1] * 0.3
  329. fig = pyplot.figure(figsize=(s, s))
  330. ax = fig.add_subplot(111)
  331. ax.xaxis.set_label_position("top")
  332. ax.matshow(matrix, cmap=pyplot.cm.OrRd)
  333. ax.set_xticks(numpy.arange(0, matrix.shape[1]))
  334. ax.set_yticks(numpy.arange(0, matrix.shape[0]))
  335. ax.set_xticklabels(["Unidentified"] + people, rotation=90, ha="center")
  336. ax.set_yticklabels(people, va="center")
  337. ax.set_xticks(numpy.arange(0.5, matrix.shape[1] + 0.5), minor=True)
  338. ax.set_yticks(numpy.arange(0.5, matrix.shape[0] + 0.5), minor=True)
  339. ax.grid(which="minor")
  340. apply_plot_style(fig, ax, None, args.style, args.text_size, args.size)
  341. if not args.output:
  342. pos1 = ax.get_position()
  343. pos2 = (pos1.x0 + 0.245, pos1.y0 - 0.1, pos1.width * 0.9, pos1.height * 0.9)
  344. ax.set_position(pos2)
  345. if args.mode == "all":
  346. output = get_plot_path(args.output, "matrix")
  347. else:
  348. output = args.output
  349. title = "%s %d developers overwrite" % (repo, matrix.shape[0])
  350. if args.output:
  351. # FIXME(vmarkovtsev): otherwise the title is screwed in savefig()
  352. title = ""
  353. deploy_plot(title, output, args.style)
  354. def plot_people(args, repo, names, people, date_range, last):
  355. import matplotlib
  356. if args.backend:
  357. matplotlib.use(args.backend)
  358. import matplotlib.pyplot as pyplot
  359. if people.shape[0] > args.max_people:
  360. order = numpy.argsort(-people.sum(axis=1))
  361. people = people[order[:args.max_people]]
  362. names = [names[i] for i in order[:args.max_people]]
  363. print("Warning: truncated people to most owning %d" % args.max_people)
  364. for i, name in enumerate(names):
  365. if len(name) > 40:
  366. names[i] = name[:37] + "..."
  367. pyplot.stackplot(date_range, people, labels=names)
  368. pyplot.xlim(date_range[0], last)
  369. if args.relative:
  370. for i in range(people.shape[1]):
  371. people[:, i] /= people[:, i].sum()
  372. pyplot.ylim(0, 1)
  373. legend_loc = 3
  374. else:
  375. legend_loc = 2
  376. legend = pyplot.legend(loc=legend_loc, fontsize=args.text_size)
  377. apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.style, args.text_size, args.size)
  378. if args.mode == "all":
  379. output = get_plot_path(args.output, "people")
  380. else:
  381. output = args.output
  382. deploy_plot("%s code ownership through time" % repo, output, args.style)
  383. def train_embeddings(coocc_tree, tmpdir, shard_size=4096):
  384. from scipy.sparse import csr_matrix
  385. try:
  386. from . import swivel
  387. except (SystemError, ImportError):
  388. import swivel
  389. import tensorflow as tf
  390. index = coocc_tree["index"]
  391. nshards = len(index) // shard_size
  392. if nshards * shard_size < len(index):
  393. nshards += 1
  394. shard_size = len(index) // nshards
  395. nshards = len(index) // shard_size
  396. remainder = len(index) - nshards * shard_size
  397. if remainder > 0:
  398. lengths = numpy.array([len(cd) for cd in coocc_tree["matrix"]])
  399. filtered = sorted(numpy.argsort(lengths)[remainder:])
  400. else:
  401. filtered = list(range(len(index)))
  402. print("Reading the sparse matrix...")
  403. data = []
  404. indices = []
  405. indptr = [0]
  406. for row, cd in enumerate(coocc_tree["matrix"]):
  407. if row >= len(index):
  408. break
  409. for col, val in sorted(cd.items()):
  410. data.append(val)
  411. indices.append(col)
  412. indptr.append(indptr[-1] + len(cd))
  413. matrix = csr_matrix((data, indices, indptr), shape=(len(index), len(index)))
  414. if len(filtered) < len(index):
  415. matrix = matrix[filtered, :][:, filtered]
  416. meta_index = []
  417. for i, j in enumerate(filtered):
  418. meta_index.append((index[j], matrix[i, i]))
  419. index = [mi[0] for mi in meta_index]
  420. with tempfile.TemporaryDirectory(prefix="hercules_labours_", dir=tmpdir or None) as tmproot:
  421. print("Writing Swivel metadata...")
  422. vocabulary = "\n".join(index)
  423. with open(os.path.join(tmproot, "row_vocab.txt"), "w") as out:
  424. out.write(vocabulary)
  425. with open(os.path.join(tmproot, "col_vocab.txt"), "w") as out:
  426. out.write(vocabulary)
  427. del vocabulary
  428. bool_sums = matrix.indptr[1:] - matrix.indptr[:-1]
  429. bool_sums_str = "\n".join(map(str, bool_sums.tolist()))
  430. with open(os.path.join(tmproot, "row_sums.txt"), "w") as out:
  431. out.write(bool_sums_str)
  432. with open(os.path.join(tmproot, "col_sums.txt"), "w") as out:
  433. out.write(bool_sums_str)
  434. del bool_sums_str
  435. reorder = numpy.argsort(-bool_sums)
  436. print("Writing Swivel shards...")
  437. for row in range(nshards):
  438. for col in range(nshards):
  439. def _int64s(xs):
  440. return tf.train.Feature(
  441. int64_list=tf.train.Int64List(value=list(xs)))
  442. def _floats(xs):
  443. return tf.train.Feature(
  444. float_list=tf.train.FloatList(value=list(xs)))
  445. indices_row = reorder[row::nshards]
  446. indices_col = reorder[col::nshards]
  447. shard = matrix[indices_row][:, indices_col].tocoo()
  448. example = tf.train.Example(features=tf.train.Features(feature={
  449. "global_row": _int64s(indices_row),
  450. "global_col": _int64s(indices_col),
  451. "sparse_local_row": _int64s(shard.row),
  452. "sparse_local_col": _int64s(shard.col),
  453. "sparse_value": _floats(shard.data)}))
  454. with open(os.path.join(tmproot, "shard-%03d-%03d.pb" % (row, col)), "wb") as out:
  455. out.write(example.SerializeToString())
  456. print("Training Swivel model...")
  457. swivel.FLAGS.submatrix_rows = shard_size
  458. swivel.FLAGS.submatrix_cols = shard_size
  459. if len(meta_index) < 10000:
  460. embedding_size = 50
  461. num_epochs = 200
  462. elif len(meta_index) < 100000:
  463. embedding_size = 100
  464. num_epochs = 250
  465. elif len(meta_index) < 500000:
  466. embedding_size = 200
  467. num_epochs = 300
  468. else:
  469. embedding_size = 300
  470. num_epochs = 200
  471. swivel.FLAGS.embedding_size = embedding_size
  472. swivel.FLAGS.input_base_path = tmproot
  473. swivel.FLAGS.output_base_path = tmproot
  474. swivel.FLAGS.loss_multiplier = 1.0 / shard_size
  475. swivel.FLAGS.num_epochs = num_epochs
  476. swivel.main(None)
  477. print("Reading Swivel embeddings...")
  478. embeddings = []
  479. with open(os.path.join(tmproot, "row_embedding.tsv")) as frow:
  480. with open(os.path.join(tmproot, "col_embedding.tsv")) as fcol:
  481. for i, (lrow, lcol) in enumerate(zip(frow, fcol)):
  482. prow, pcol = (l.split("\t", 1) for l in (lrow, lcol))
  483. assert prow[0] == pcol[0]
  484. erow, ecol = \
  485. (numpy.fromstring(p[1], dtype=numpy.float32, sep="\t")
  486. for p in (prow, pcol))
  487. embeddings.append((erow + ecol) / 2)
  488. return meta_index, embeddings
  489. class CORSWebServer(object):
  490. def __init__(self):
  491. self.thread = threading.Thread(target=self.serve)
  492. self.server = None
  493. def serve(self):
  494. outer = self
  495. try:
  496. from http.server import HTTPServer, SimpleHTTPRequestHandler, test
  497. except ImportError: # Python 2
  498. from BaseHTTPServer import HTTPServer, test
  499. from SimpleHTTPServer import SimpleHTTPRequestHandler
  500. class ClojureServer(HTTPServer):
  501. def __init__(self, *args, **kwargs):
  502. HTTPServer.__init__(self, *args, **kwargs)
  503. outer.server = self
  504. class CORSRequestHandler(SimpleHTTPRequestHandler):
  505. def end_headers (self):
  506. self.send_header("Access-Control-Allow-Origin", "*")
  507. SimpleHTTPRequestHandler.end_headers(self)
  508. test(CORSRequestHandler, ClojureServer)
  509. def start(self):
  510. self.thread.start()
  511. def stop(self):
  512. if self.running:
  513. self.server.shutdown()
  514. self.thread.join()
  515. @property
  516. def running(self):
  517. return self.server is not None
  518. web_server = CORSWebServer()
  519. def write_embeddings(name, output, run_server, index, embeddings):
  520. print("Writing Tensorflow Projector files...")
  521. if not output:
  522. output = "couples_" + name
  523. metaf = "%s_%s_meta.tsv" % (output, name)
  524. with open(metaf, "w") as fout:
  525. fout.write("name\tcommits\n")
  526. for pair in index:
  527. fout.write("%s\t%s\n" % pair)
  528. print("Wrote", metaf)
  529. dataf = "%s_%s_data.tsv" % (output, name)
  530. with open(dataf, "w") as fout:
  531. for vec in embeddings:
  532. fout.write("\t".join(str(v) for v in vec))
  533. fout.write("\n")
  534. print("Wrote", dataf)
  535. jsonf = "%s_%s.json" % (output, name)
  536. with open(jsonf, "w") as fout:
  537. fout.write("""{
  538. "embeddings": [
  539. {
  540. "tensorName": "%s %s coupling",
  541. "tensorShape": [%s, %s],
  542. "tensorPath": "http://0.0.0.0:8000/%s",
  543. "metadataPath": "http://0.0.0.0:8000/%s"
  544. }
  545. ]
  546. }
  547. """ % (output, name, len(embeddings), len(embeddings[0]), dataf, metaf))
  548. print("Wrote %s", jsonf)
  549. if run_server and not web_server.running:
  550. web_server.start()
  551. url = "http://projector.tensorflow.org/?config=http://0.0.0.0:8000/" + jsonf
  552. print(url)
  553. if run_server:
  554. os.system("xdg-open " + url)
  555. def main():
  556. args = parse_args()
  557. header, main_contents, files_contents, people_sequence, people_contents, people_matrix, \
  558. files_coocc, people_coocc = read_input(args)
  559. name = next(iter(main_contents))
  560. files_warning = "Files stats were not collected. Re-run hercules with -files."
  561. people_warning = "People stats were not collected. Re-run hercules with -people."
  562. couples_warning = "Coupling stats were not collected. Re-run hercules with -couples."
  563. if args.mode == "project":
  564. plot_burndown(args, "project",
  565. *load_main(header, name, main_contents[name], args.resample))
  566. elif args.mode == "file":
  567. if not files_contents:
  568. print(files_warning)
  569. return
  570. plot_many(args, "file", header, files_contents)
  571. elif args.mode == "person":
  572. if not people_contents:
  573. print(people_warning)
  574. return
  575. plot_many(args, "person", header, people_contents)
  576. elif args.mode == "churn_matrix":
  577. if not people_contents:
  578. print(people_warning)
  579. return
  580. plot_churn_matrix(args, name, people_sequence, load_churn_matrix(people_matrix))
  581. elif args.mode == "people":
  582. if not people_contents:
  583. print(people_warning)
  584. return
  585. plot_people(args, name, *load_people(header, people_sequence, people_contents))
  586. elif args.mode == "couples":
  587. if not files_coocc or not people_coocc:
  588. print(couples_warning)
  589. return
  590. write_embeddings("files", args.output, not args.disable_projector,
  591. *train_embeddings(files_coocc, args.couples_tmp_dir))
  592. write_embeddings("people", args.output, not args.disable_projector,
  593. *train_embeddings(people_coocc, args.couples_tmp_dir))
  594. elif args.mode == "all":
  595. plot_burndown(args, "project",
  596. *load_main(header, name, main_contents[name], args.resample))
  597. if files_contents:
  598. plot_many(args, "file", header, files_contents)
  599. if people_contents:
  600. plot_many(args, "person", header, people_contents)
  601. plot_churn_matrix(args, name, people_sequence, load_churn_matrix(people_matrix))
  602. plot_people(args, name, *load_people(header, people_sequence, people_contents))
  603. if people_coocc:
  604. if not files_coocc or not people_coocc:
  605. print(couples_warning)
  606. return
  607. write_embeddings("files", args.output, not args.disable_projector,
  608. *train_embeddings(files_coocc, args.couples_tmp_dir))
  609. write_embeddings("people", args.output, not args.disable_projector,
  610. *train_embeddings(people_coocc, args.couples_tmp_dir))
  611. if web_server.running:
  612. print("Sleeping for 60 seconds, safe to Ctrl-C")
  613. try:
  614. time.sleep(60)
  615. except KeyboardInterrupt:
  616. pass
  617. web_server.stop()
  618. if __name__ == "__main__":
  619. sys.exit(main())