labours.py 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306
  1. #!/usr/bin/env python3
  2. import argparse
  3. import io
  4. import json
  5. import os
  6. import re
  7. import shutil
  8. import subprocess
  9. import sys
  10. import tempfile
  11. import threading
  12. import time
  13. import warnings
  14. from datetime import datetime, timedelta
  15. from importlib import import_module
  16. try:
  17. from clint.textui import progress
  18. except ImportError:
  19. print("Warning: clint is not installed, no fancy progressbars in the terminal for you.")
  20. progress = None
  21. import numpy
  22. import yaml
  23. if sys.version_info[0] < 3:
  24. # OK, ancients, I will support Python 2, but you owe me a beer
  25. input = raw_input # noqa: F821
  26. PB_MESSAGES = {
  27. "Burndown": "internal.pb.pb_pb2.BurndownAnalysisResults",
  28. "Couples": "internal.pb.pb_pb2.CouplesAnalysisResults",
  29. "Shotness": "internal.pb.pb_pb2.ShotnessAnalysisResults",
  30. }
  31. def list_matplotlib_styles():
  32. script = "import sys; from matplotlib import pyplot; " \
  33. "sys.stdout.write(repr(pyplot.style.available))"
  34. styles = eval(subprocess.check_output([sys.executable, "-c", script]))
  35. styles.remove("classic")
  36. return ["default", "classic"] + styles
  37. def parse_args():
  38. parser = argparse.ArgumentParser()
  39. parser.add_argument("-o", "--output", default="",
  40. help="Path to the output file/directory (empty for display). "
  41. "If the extension is JSON, the data is saved instead of "
  42. "the real image.")
  43. parser.add_argument("-i", "--input", default="-",
  44. help="Path to the input file (- for stdin).")
  45. parser.add_argument("-f", "--input-format", default="auto", choices=["yaml", "pb", "auto"])
  46. parser.add_argument("--font-size", default=12, type=int,
  47. help="Size of the labels and legend.")
  48. parser.add_argument("--style", default="ggplot", choices=list_matplotlib_styles(),
  49. help="Plot style to use.")
  50. parser.add_argument("--backend", help="Matplotlib backend to use.")
  51. parser.add_argument("--background", choices=["black", "white"], default="white",
  52. help="Plot's general color scheme.")
  53. parser.add_argument("--size", help="Axes' size in inches, for example \"12,9\"")
  54. parser.add_argument("--relative", action="store_true",
  55. help="Occupy 100%% height for every measurement.")
  56. parser.add_argument("--couples-tmp-dir", help="Temporary directory to work with couples.")
  57. parser.add_argument("-m", "--mode",
  58. choices=["project", "file", "person", "churn_matrix", "ownership",
  59. "couples", "shotness", "sentiment", "all", "run_times"],
  60. help="What to plot.")
  61. parser.add_argument(
  62. "--resample", default="year",
  63. help="The way to resample the time series. Possible values are: "
  64. "\"month\", \"year\", \"no\", \"raw\" and pandas offset aliases ("
  65. "http://pandas.pydata.org/pandas-docs/stable/timeseries.html"
  66. "#offset-aliases).")
  67. parser.add_argument("--disable-projector", action="store_true",
  68. help="Do not run Tensorflow Projector on couples.")
  69. parser.add_argument("--max-people", default=20, type=int,
  70. help="Maximum number of developers in churn matrix and people plots.")
  71. args = parser.parse_args()
  72. return args
  73. class Reader(object):
  74. def read(self, file):
  75. raise NotImplementedError
  76. def get_name(self):
  77. raise NotImplementedError
  78. def get_header(self):
  79. raise NotImplementedError
  80. def get_burndown_parameters(self):
  81. raise NotImplementedError
  82. def get_project_burndown(self):
  83. raise NotImplementedError
  84. def get_files_burndown(self):
  85. raise NotImplementedError
  86. def get_people_burndown(self):
  87. raise NotImplementedError
  88. def get_ownership_burndown(self):
  89. raise NotImplementedError
  90. def get_people_interaction(self):
  91. raise NotImplementedError
  92. def get_files_coocc(self):
  93. raise NotImplementedError
  94. def get_people_coocc(self):
  95. raise NotImplementedError
  96. def get_shotness_coocc(self):
  97. raise NotImplementedError
  98. def get_shotness(self):
  99. raise NotImplementedError
  100. class YamlReader(Reader):
  101. def read(self, file):
  102. yaml.reader.Reader.NON_PRINTABLE = re.compile(r"(?!x)x")
  103. try:
  104. loader = yaml.CLoader
  105. except AttributeError:
  106. print("Warning: failed to import yaml.CLoader, falling back to slow yaml.Loader")
  107. loader = yaml.Loader
  108. try:
  109. if file != "-":
  110. with open(file) as fin:
  111. data = yaml.load(fin, Loader=loader)
  112. else:
  113. data = yaml.load(sys.stdin, Loader=loader)
  114. except (UnicodeEncodeError, yaml.reader.ReaderError) as e:
  115. print("\nInvalid unicode in the input: %s\nPlease filter it through "
  116. "fix_yaml_unicode.py" % e)
  117. sys.exit(1)
  118. if data is None:
  119. print("\nNo data has been read - has Hercules crashed?")
  120. sys.exit(1)
  121. self.data = data
  122. def get_run_times(self):
  123. return {}
  124. def get_name(self):
  125. return self.data["hercules"]["repository"]
  126. def get_header(self):
  127. header = self.data["hercules"]
  128. return header["begin_unix_time"], header["end_unix_time"]
  129. def get_burndown_parameters(self):
  130. header = self.data["Burndown"]
  131. return header["sampling"], header["granularity"]
  132. def get_project_burndown(self):
  133. return self.data["hercules"]["repository"], \
  134. self._parse_burndown_matrix(self.data["Burndown"]["project"]).T
  135. def get_files_burndown(self):
  136. return [(p[0], self._parse_burndown_matrix(p[1]).T)
  137. for p in self.data["Burndown"]["files"].items()]
  138. def get_people_burndown(self):
  139. return [(p[0], self._parse_burndown_matrix(p[1]).T)
  140. for p in self.data["Burndown"]["people"].items()]
  141. def get_ownership_burndown(self):
  142. return self.data["Burndown"]["people_sequence"].copy(),\
  143. {p[0]: self._parse_burndown_matrix(p[1])
  144. for p in self.data["Burndown"]["people"].items()}
  145. def get_people_interaction(self):
  146. return self.data["Burndown"]["people_sequence"].copy(), \
  147. self._parse_burndown_matrix(self.data["Burndown"]["people_interaction"])
  148. def get_files_coocc(self):
  149. coocc = self.data["Couples"]["files_coocc"]
  150. return coocc["index"], self._parse_coocc_matrix(coocc["matrix"])
  151. def get_people_coocc(self):
  152. coocc = self.data["Couples"]["people_coocc"]
  153. return coocc["index"], self._parse_coocc_matrix(coocc["matrix"])
  154. def get_shotness_coocc(self):
  155. shotness = self.data["Shotness"]
  156. index = ["%s:%s" % (i["file"], i["name"]) for i in shotness]
  157. indptr = numpy.zeros(len(shotness) + 1, dtype=numpy.int64)
  158. indices = []
  159. data = []
  160. for i, record in enumerate(shotness):
  161. pairs = [(int(k), v) for k, v in record["counters"].items()]
  162. pairs.sort()
  163. indptr[i + 1] = indptr[i] + len(pairs)
  164. for k, v in pairs:
  165. indices.append(k)
  166. data.append(v)
  167. indices = numpy.array(indices, dtype=numpy.int32)
  168. data = numpy.array(data, dtype=numpy.int32)
  169. from scipy.sparse import csr_matrix
  170. return index, csr_matrix((data, indices, indptr), shape=(len(shotness),) * 2)
  171. def get_shotness(self):
  172. from munch import munchify
  173. obj = munchify(self.data["Shotness"])
  174. # turn strings into ints
  175. for item in obj:
  176. item.counters = {int(k): v for k, v in item.counters.items()}
  177. if len(obj) == 0:
  178. raise KeyError
  179. return obj
  180. def get_sentiment(self):
  181. from munch import munchify
  182. return munchify({int(key): {
  183. "Comments": vals[2].split("|"),
  184. "Commits": vals[1],
  185. "Value": float(vals[0])
  186. } for key, vals in self.data["Sentiment"].items()})
  187. def _parse_burndown_matrix(self, matrix):
  188. return numpy.array([numpy.fromstring(line, dtype=int, sep=" ")
  189. for line in matrix.split("\n")])
  190. def _parse_coocc_matrix(self, matrix):
  191. from scipy.sparse import csr_matrix
  192. data = []
  193. indices = []
  194. indptr = [0]
  195. for row in matrix:
  196. for k, v in sorted(row.items()):
  197. data.append(v)
  198. indices.append(k)
  199. indptr.append(indptr[-1] + len(row))
  200. return csr_matrix((data, indices, indptr), shape=(len(matrix),) * 2)
  201. class ProtobufReader(Reader):
  202. def read(self, file):
  203. try:
  204. from internal.pb.pb_pb2 import AnalysisResults
  205. except ImportError as e:
  206. print("\n\n>>> You need to generate internal/pb/pb_pb2.py - run \"make\"\n",
  207. file=sys.stderr)
  208. raise e from None
  209. self.data = AnalysisResults()
  210. if file != "-":
  211. with open(file, "rb") as fin:
  212. self.data.ParseFromString(fin.read())
  213. else:
  214. self.data.ParseFromString(sys.stdin.buffer.read())
  215. self.contents = {}
  216. for key, val in self.data.contents.items():
  217. try:
  218. mod, name = PB_MESSAGES[key].rsplit(".", 1)
  219. except KeyError:
  220. sys.stderr.write("Warning: there is no registered PB decoder for %s\n" % key)
  221. continue
  222. cls = getattr(import_module(mod), name)
  223. self.contents[key] = msg = cls()
  224. msg.ParseFromString(val)
  225. def get_run_times(self):
  226. return {key: val for key, val in self.data.header.run_time_per_item.items()}
  227. def get_name(self):
  228. return self.data.header.repository
  229. def get_header(self):
  230. header = self.data.header
  231. return header.begin_unix_time, header.end_unix_time
  232. def get_burndown_parameters(self):
  233. burndown = self.contents["Burndown"]
  234. return burndown.sampling, burndown.granularity
  235. def get_project_burndown(self):
  236. return self._parse_burndown_matrix(self.contents["Burndown"].project)
  237. def get_files_burndown(self):
  238. return [self._parse_burndown_matrix(i) for i in self.contents["Burndown"].files]
  239. def get_people_burndown(self):
  240. return [self._parse_burndown_matrix(i) for i in self.contents["Burndown"].people]
  241. def get_ownership_burndown(self):
  242. people = self.get_people_burndown()
  243. return [p[0] for p in people], {p[0]: p[1].T for p in people}
  244. def get_people_interaction(self):
  245. burndown = self.contents["Burndown"]
  246. return [i.name for i in burndown.people], \
  247. self._parse_sparse_matrix(burndown.people_interaction).toarray()
  248. def get_files_coocc(self):
  249. node = self.contents["Couples"].file_couples
  250. return list(node.index), self._parse_sparse_matrix(node.matrix)
  251. def get_people_coocc(self):
  252. node = self.contents["Couples"].people_couples
  253. return list(node.index), self._parse_sparse_matrix(node.matrix)
  254. def get_shotness_coocc(self):
  255. shotness = self.get_shotness()
  256. index = ["%s:%s" % (i.file, i.name) for i in shotness]
  257. indptr = numpy.zeros(len(shotness) + 1, dtype=numpy.int32)
  258. indices = []
  259. data = []
  260. for i, record in enumerate(shotness):
  261. pairs = list(record.counters.items())
  262. pairs.sort()
  263. indptr[i + 1] = indptr[i] + len(pairs)
  264. for k, v in pairs:
  265. indices.append(k)
  266. data.append(v)
  267. indices = numpy.array(indices, dtype=numpy.int32)
  268. data = numpy.array(data, dtype=numpy.int32)
  269. from scipy.sparse import csr_matrix
  270. return index, csr_matrix((data, indices, indptr), shape=(len(shotness),) * 2)
  271. def get_shotness(self):
  272. records = self.contents["Shotness"].records
  273. if len(records) == 0:
  274. raise KeyError
  275. return records
  276. def get_sentiment(self):
  277. byday = self.contents["Sentiment"].SentimentByDay
  278. if len(byday) == 0:
  279. raise KeyError
  280. return byday
  281. def _parse_burndown_matrix(self, matrix):
  282. dense = numpy.zeros((matrix.number_of_rows, matrix.number_of_columns), dtype=int)
  283. for y, row in enumerate(matrix.rows):
  284. for x, col in enumerate(row.columns):
  285. dense[y, x] = col
  286. return matrix.name, dense.T
  287. def _parse_sparse_matrix(self, matrix):
  288. from scipy.sparse import csr_matrix
  289. return csr_matrix((list(matrix.data), list(matrix.indices), list(matrix.indptr)),
  290. shape=(matrix.number_of_rows, matrix.number_of_columns))
  291. READERS = {"yaml": YamlReader, "yml": YamlReader, "pb": ProtobufReader}
  292. def read_input(args):
  293. sys.stdout.write("Reading the input... ")
  294. sys.stdout.flush()
  295. if args.input != "-":
  296. if args.input_format == "auto":
  297. args.input_format = args.input.rsplit(".", 1)[1]
  298. elif args.input_format == "auto":
  299. args.input_format = "yaml"
  300. reader = READERS[args.input_format]()
  301. reader.read(args.input)
  302. print("done")
  303. return reader
  304. def calculate_average_lifetime(matrix):
  305. lifetimes = numpy.zeros(matrix.shape[1] - 1)
  306. for band in matrix:
  307. start = 0
  308. for i, line in enumerate(band):
  309. if i == 0 or band[i - 1] == 0:
  310. start += 1
  311. continue
  312. lifetimes[i - start] = band[i - 1] - line
  313. lifetimes[i - start] = band[i - 1]
  314. lsum = lifetimes.sum()
  315. if lsum != 0:
  316. return (lifetimes.dot(numpy.arange(1, matrix.shape[1], 1))
  317. / (lsum * matrix.shape[1]))
  318. return numpy.nan
  319. def interpolate_burndown_matrix(matrix, granularity, sampling):
  320. daily = numpy.zeros(
  321. (matrix.shape[0] * granularity, matrix.shape[1] * sampling),
  322. dtype=numpy.float32)
  323. """
  324. ----------> samples, x
  325. |
  326. |
  327. |
  328. bands, y
  329. """
  330. for y in range(matrix.shape[0]):
  331. for x in range(matrix.shape[1]):
  332. if y * granularity > (x + 1) * sampling:
  333. # the future is zeros
  334. continue
  335. def decay(start_index: int, start_val: float):
  336. if start_val == 0:
  337. return
  338. k = matrix[y][x] / start_val # <= 1
  339. scale = (x + 1) * sampling - start_index
  340. for i in range(y * granularity, (y + 1) * granularity):
  341. initial = daily[i][start_index - 1]
  342. for j in range(start_index, (x + 1) * sampling):
  343. daily[i][j] = initial * (
  344. 1 + (k - 1) * (j - start_index + 1) / scale)
  345. def grow(finish_index: int, finish_val: float):
  346. initial = matrix[y][x - 1] if x > 0 else 0
  347. start_index = x * sampling
  348. if start_index < y * granularity:
  349. start_index = y * granularity
  350. if finish_index == start_index:
  351. return
  352. avg = (finish_val - initial) / (finish_index - start_index)
  353. for j in range(x * sampling, finish_index):
  354. for i in range(start_index, j + 1):
  355. daily[i][j] = avg
  356. # copy [x*g..y*s)
  357. for j in range(x * sampling, finish_index):
  358. for i in range(y * granularity, x * sampling):
  359. daily[i][j] = daily[i][j - 1]
  360. if (y + 1) * granularity >= (x + 1) * sampling:
  361. # x*granularity <= (y+1)*sampling
  362. # 1. x*granularity <= y*sampling
  363. # y*sampling..(y+1)sampling
  364. #
  365. # x+1
  366. # /
  367. # /
  368. # / y+1 -|
  369. # / |
  370. # / y -|
  371. # /
  372. # / x
  373. #
  374. # 2. x*granularity > y*sampling
  375. # x*granularity..(y+1)sampling
  376. #
  377. # x+1
  378. # /
  379. # /
  380. # / y+1 -|
  381. # / |
  382. # / x -|
  383. # /
  384. # / y
  385. if y * granularity <= x * sampling:
  386. grow((x + 1) * sampling, matrix[y][x])
  387. elif (x + 1) * sampling > y * granularity:
  388. grow((x + 1) * sampling, matrix[y][x])
  389. avg = matrix[y][x] / ((x + 1) * sampling - y * granularity)
  390. for j in range(y * granularity, (x + 1) * sampling):
  391. for i in range(y * granularity, j + 1):
  392. daily[i][j] = avg
  393. elif (y + 1) * granularity >= x * sampling:
  394. # y*sampling <= (x+1)*granularity < (y+1)sampling
  395. # y*sampling..(x+1)*granularity
  396. # (x+1)*granularity..(y+1)sampling
  397. # x+1
  398. # /\
  399. # / \
  400. # / \
  401. # / y+1
  402. # /
  403. # y
  404. v1 = matrix[y][x - 1]
  405. v2 = matrix[y][x]
  406. delta = (y + 1) * granularity - x * sampling
  407. previous = 0
  408. if x > 0 and (x - 1) * sampling >= y * granularity:
  409. # x*g <= (y-1)*s <= y*s <= (x+1)*g <= (y+1)*s
  410. # |________|.......^
  411. if x > 1:
  412. previous = matrix[y][x - 2]
  413. scale = sampling
  414. else:
  415. # (y-1)*s < x*g <= y*s <= (x+1)*g <= (y+1)*s
  416. # |______|.......^
  417. scale = sampling if x == 0 else x * sampling - y * granularity
  418. peak = v1 + (v1 - previous) / scale * delta
  419. if v2 > peak:
  420. # we need to adjust the peak, it may not be less than the decayed value
  421. if x < matrix.shape[1] - 1:
  422. # y*s <= (x+1)*g <= (y+1)*s < (y+2)*s
  423. # ^.........|_________|
  424. k = (v2 - matrix[y][x + 1]) / sampling # > 0
  425. peak = matrix[y][x] + k * ((x + 1) * sampling - (y + 1) * granularity)
  426. # peak > v2 > v1
  427. else:
  428. peak = v2
  429. # not enough data to interpolate; this is at least not restricted
  430. grow((y + 1) * granularity, peak)
  431. decay((y + 1) * granularity, peak)
  432. else:
  433. # (x+1)*granularity < y*sampling
  434. # y*sampling..(y+1)sampling
  435. decay(x * sampling, matrix[y][x - 1])
  436. return daily
  437. def load_burndown(header, name, matrix, resample):
  438. import pandas
  439. start, last, sampling, granularity = header
  440. assert sampling > 0
  441. assert granularity >= sampling
  442. start = datetime.fromtimestamp(start)
  443. last = datetime.fromtimestamp(last)
  444. print(name, "lifetime index:", calculate_average_lifetime(matrix))
  445. finish = start + timedelta(days=matrix.shape[1] * sampling)
  446. if resample not in ("no", "raw"):
  447. print("resampling to %s, please wait..." % resample)
  448. # Interpolate the day x day matrix.
  449. # Each day brings equal weight in the granularity.
  450. # Sampling's interpolation is linear.
  451. daily = interpolate_burndown_matrix(matrix, granularity, sampling)
  452. daily[(last - start).days:] = 0
  453. # Resample the bands
  454. aliases = {
  455. "year": "A",
  456. "month": "M"
  457. }
  458. resample = aliases.get(resample, resample)
  459. periods = 0
  460. date_granularity_sampling = [start]
  461. while date_granularity_sampling[-1] < finish:
  462. periods += 1
  463. date_granularity_sampling = pandas.date_range(
  464. start, periods=periods, freq=resample)
  465. date_range_sampling = pandas.date_range(
  466. date_granularity_sampling[0],
  467. periods=(finish - date_granularity_sampling[0]).days,
  468. freq="1D")
  469. # Fill the new square matrix
  470. matrix = numpy.zeros(
  471. (len(date_granularity_sampling), len(date_range_sampling)),
  472. dtype=numpy.float32)
  473. for i, gdt in enumerate(date_granularity_sampling):
  474. istart = (date_granularity_sampling[i - 1] - start).days \
  475. if i > 0 else 0
  476. ifinish = (gdt - start).days
  477. for j, sdt in enumerate(date_range_sampling):
  478. if (sdt - start).days >= istart:
  479. break
  480. matrix[i, j:] = \
  481. daily[istart:ifinish, (sdt - start).days:].sum(axis=0)
  482. # Hardcode some cases to improve labels' readability
  483. if resample in ("year", "A"):
  484. labels = [dt.year for dt in date_granularity_sampling]
  485. elif resample in ("month", "M"):
  486. labels = [dt.strftime("%Y %B") for dt in date_granularity_sampling]
  487. else:
  488. labels = [dt.date() for dt in date_granularity_sampling]
  489. else:
  490. labels = [
  491. "%s - %s" % ((start + timedelta(days=i * granularity)).date(),
  492. (
  493. start + timedelta(days=(i + 1) * granularity)).date())
  494. for i in range(matrix.shape[0])]
  495. if len(labels) > 18:
  496. warnings.warn("Too many labels - consider resampling.")
  497. resample = "M" # fake resampling type is checked while plotting
  498. date_range_sampling = pandas.date_range(
  499. start + timedelta(days=sampling), periods=matrix.shape[1],
  500. freq="%dD" % sampling)
  501. return name, matrix, date_range_sampling, labels, granularity, sampling, resample
  502. def load_ownership(header, sequence, contents, max_people):
  503. import pandas
  504. start, last, sampling, _ = header
  505. start = datetime.fromtimestamp(start)
  506. last = datetime.fromtimestamp(last)
  507. people = []
  508. for name in sequence:
  509. people.append(contents[name].sum(axis=1))
  510. people = numpy.array(people)
  511. date_range_sampling = pandas.date_range(
  512. start + timedelta(days=sampling), periods=people[0].shape[0],
  513. freq="%dD" % sampling)
  514. if people.shape[0] > max_people:
  515. order = numpy.argsort(-people.sum(axis=1))
  516. people = people[order[:max_people]]
  517. sequence = [sequence[i] for i in order[:max_people]]
  518. print("Warning: truncated people to most owning %d" % max_people)
  519. for i, name in enumerate(sequence):
  520. if len(name) > 40:
  521. sequence[i] = name[:37] + "..."
  522. return sequence, people, date_range_sampling, last
  523. def load_churn_matrix(people, matrix, max_people):
  524. matrix = matrix.astype(float)
  525. if matrix.shape[0] > max_people:
  526. order = numpy.argsort(-matrix[:, 0])
  527. matrix = matrix[order[:max_people]][:, [0, 1] + list(2 + order[:max_people])]
  528. people = [people[i] for i in order[:max_people]]
  529. print("Warning: truncated people to most productive %d" % max_people)
  530. zeros = matrix[:, 0] == 0
  531. matrix[zeros, :] = 1
  532. matrix /= matrix[:, 0][:, None]
  533. matrix = -matrix[:, 1:]
  534. matrix[zeros, :] = 0
  535. for i, name in enumerate(people):
  536. if len(name) > 40:
  537. people[i] = name[:37] + "..."
  538. return people, matrix
  539. def import_pyplot(backend, style):
  540. import matplotlib
  541. if backend:
  542. matplotlib.use(backend)
  543. from matplotlib import pyplot
  544. pyplot.style.use(style)
  545. return matplotlib, pyplot
  546. def apply_plot_style(figure, axes, legend, background, font_size, axes_size):
  547. foreground = "black" if background == "white" else "white"
  548. if axes_size is None:
  549. axes_size = (12, 9)
  550. else:
  551. axes_size = tuple(float(p) for p in axes_size.split(","))
  552. figure.set_size_inches(*axes_size)
  553. for side in ("bottom", "top", "left", "right"):
  554. axes.spines[side].set_color(foreground)
  555. for axis in (axes.xaxis, axes.yaxis):
  556. axis.label.update(dict(fontsize=font_size, color=foreground))
  557. for axis in ("x", "y"):
  558. getattr(axes, axis + "axis").get_offset_text().set_size(font_size)
  559. axes.tick_params(axis=axis, colors=foreground, labelsize=font_size)
  560. try:
  561. axes.ticklabel_format(axis="y", style="sci", scilimits=(0, 3))
  562. except AttributeError:
  563. pass
  564. figure.patch.set_facecolor(background)
  565. axes.set_facecolor(background)
  566. if legend is not None:
  567. frame = legend.get_frame()
  568. for setter in (frame.set_facecolor, frame.set_edgecolor):
  569. setter(background)
  570. for text in legend.get_texts():
  571. text.set_color(foreground)
  572. def get_plot_path(base, name):
  573. root, ext = os.path.splitext(base)
  574. if not ext:
  575. ext = ".png"
  576. output = os.path.join(root, name + ext)
  577. os.makedirs(os.path.dirname(output), exist_ok=True)
  578. return output
  579. def deploy_plot(title, output, background):
  580. import matplotlib.pyplot as pyplot
  581. if not output:
  582. pyplot.gcf().canvas.set_window_title(title)
  583. pyplot.show()
  584. else:
  585. if title:
  586. pyplot.title(title, color="black" if background == "white" else "white")
  587. try:
  588. pyplot.tight_layout()
  589. except: # noqa: E722
  590. print("Warning: failed to set the tight layout")
  591. pyplot.savefig(output, transparent=True)
  592. pyplot.clf()
  593. def default_json(x):
  594. if hasattr(x, "tolist"):
  595. return x.tolist()
  596. if hasattr(x, "isoformat"):
  597. return x.isoformat()
  598. return x
  599. def plot_burndown(args, target, name, matrix, date_range_sampling, labels, granularity,
  600. sampling, resample):
  601. if args.output and args.output.endswith(".json"):
  602. data = locals().copy()
  603. del data["args"]
  604. data["type"] = "burndown"
  605. if args.mode == "project" and target == "project":
  606. output = args.output
  607. else:
  608. if target == "project":
  609. name = "project"
  610. output = get_plot_path(args.output, name)
  611. with open(output, "w") as fout:
  612. json.dump(data, fout, sort_keys=True, default=default_json)
  613. return
  614. matplotlib, pyplot = import_pyplot(args.backend, args.style)
  615. pyplot.stackplot(date_range_sampling, matrix, labels=labels)
  616. if args.relative:
  617. for i in range(matrix.shape[1]):
  618. matrix[:, i] /= matrix[:, i].sum()
  619. pyplot.ylim(0, 1)
  620. legend_loc = 3
  621. else:
  622. legend_loc = 2
  623. pyplot.style.use("ggplot")
  624. legend = pyplot.legend(loc=legend_loc, fontsize=args.font_size)
  625. pyplot.ylabel("Lines of code")
  626. pyplot.xlabel("Time")
  627. apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.background,
  628. args.font_size, args.size)
  629. pyplot.xlim(date_range_sampling[0], date_range_sampling[-1])
  630. locator = pyplot.gca().xaxis.get_major_locator()
  631. # set the optimal xticks locator
  632. if "M" not in resample:
  633. pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
  634. locs = pyplot.gca().get_xticks().tolist()
  635. if len(locs) >= 16:
  636. pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
  637. locs = pyplot.gca().get_xticks().tolist()
  638. if len(locs) >= 16:
  639. pyplot.gca().xaxis.set_major_locator(locator)
  640. if locs[0] < pyplot.xlim()[0]:
  641. del locs[0]
  642. endindex = -1
  643. if len(locs) >= 2 and pyplot.xlim()[1] - locs[-1] > (locs[-1] - locs[-2]) / 2:
  644. locs.append(pyplot.xlim()[1])
  645. endindex = len(locs) - 1
  646. startindex = -1
  647. if len(locs) >= 2 and locs[0] - pyplot.xlim()[0] > (locs[1] - locs[0]) / 2:
  648. locs.append(pyplot.xlim()[0])
  649. startindex = len(locs) - 1
  650. pyplot.gca().set_xticks(locs)
  651. # hacking time!
  652. labels = pyplot.gca().get_xticklabels()
  653. if startindex >= 0:
  654. labels[startindex].set_text(date_range_sampling[0].date())
  655. labels[startindex].set_text = lambda _: None
  656. labels[startindex].set_rotation(30)
  657. labels[startindex].set_ha("right")
  658. if endindex >= 0:
  659. labels[endindex].set_text(date_range_sampling[-1].date())
  660. labels[endindex].set_text = lambda _: None
  661. labels[endindex].set_rotation(30)
  662. labels[endindex].set_ha("right")
  663. title = "%s %d x %d (granularity %d, sampling %d)" % \
  664. ((name,) + matrix.shape + (granularity, sampling))
  665. output = args.output
  666. if output:
  667. if args.mode == "project" and target == "project":
  668. output = args.output
  669. else:
  670. if target == "project":
  671. name = "project"
  672. output = get_plot_path(args.output, name)
  673. deploy_plot(title, output, args.style)
  674. def plot_many_burndown(args, target, header, parts):
  675. if not args.output:
  676. print("Warning: output not set, showing %d plots." % len(parts))
  677. itercnt = progress.bar(parts, expected_size=len(parts)) \
  678. if progress is not None else parts
  679. stdout = io.StringIO()
  680. for name, matrix in itercnt:
  681. backup = sys.stdout
  682. sys.stdout = stdout
  683. plot_burndown(args, target, *load_burndown(header, name, matrix, args.resample))
  684. sys.stdout = backup
  685. sys.stdout.write(stdout.getvalue())
  686. def plot_churn_matrix(args, repo, people, matrix):
  687. if args.output and args.output.endswith(".json"):
  688. data = locals().copy()
  689. del data["args"]
  690. data["type"] = "churn_matrix"
  691. if args.mode == "all":
  692. output = get_plot_path(args.output, "matrix")
  693. else:
  694. output = args.output
  695. with open(output, "w") as fout:
  696. json.dump(data, fout, sort_keys=True, default=default_json)
  697. return
  698. matplotlib, pyplot = import_pyplot(args.backend, args.style)
  699. s = 4 + matrix.shape[1] * 0.3
  700. fig = pyplot.figure(figsize=(s, s))
  701. ax = fig.add_subplot(111)
  702. ax.xaxis.set_label_position("top")
  703. ax.matshow(matrix, cmap=pyplot.cm.OrRd)
  704. ax.set_xticks(numpy.arange(0, matrix.shape[1]))
  705. ax.set_yticks(numpy.arange(0, matrix.shape[0]))
  706. ax.set_yticklabels(people, va="center")
  707. ax.set_xticks(numpy.arange(0.5, matrix.shape[1] + 0.5), minor=True)
  708. ax.set_xticklabels(["Unidentified"] + people, rotation=45, ha="left",
  709. va="bottom", rotation_mode="anchor")
  710. ax.set_yticks(numpy.arange(0.5, matrix.shape[0] + 0.5), minor=True)
  711. ax.grid(which="minor")
  712. apply_plot_style(fig, ax, None, args.background, args.font_size, args.size)
  713. if not args.output:
  714. pos1 = ax.get_position()
  715. pos2 = (pos1.x0 + 0.15, pos1.y0 - 0.1, pos1.width * 0.9, pos1.height * 0.9)
  716. ax.set_position(pos2)
  717. if args.mode == "all":
  718. output = get_plot_path(args.output, "matrix")
  719. else:
  720. output = args.output
  721. title = "%s %d developers overwrite" % (repo, matrix.shape[0])
  722. if args.output:
  723. # FIXME(vmarkovtsev): otherwise the title is screwed in savefig()
  724. title = ""
  725. deploy_plot(title, output, args.style)
  726. def plot_ownership(args, repo, names, people, date_range, last):
  727. if args.output and args.output.endswith(".json"):
  728. data = locals().copy()
  729. del data["args"]
  730. data["type"] = "ownership"
  731. if args.mode == "all":
  732. output = get_plot_path(args.output, "people")
  733. else:
  734. output = args.output
  735. with open(output, "w") as fout:
  736. json.dump(data, fout, sort_keys=True, default=default_json)
  737. return
  738. matplotlib, pyplot = import_pyplot(args.backend, args.style)
  739. pyplot.stackplot(date_range, people, labels=names)
  740. pyplot.xlim(date_range[0], last)
  741. if args.relative:
  742. for i in range(people.shape[1]):
  743. people[:, i] /= people[:, i].sum()
  744. pyplot.ylim(0, 1)
  745. legend_loc = 3
  746. else:
  747. legend_loc = 2
  748. legend = pyplot.legend(loc=legend_loc, fontsize=args.font_size)
  749. apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.background,
  750. args.font_size, args.size)
  751. if args.mode == "all":
  752. output = get_plot_path(args.output, "people")
  753. else:
  754. output = args.output
  755. deploy_plot("%s code ownership through time" % repo, output, args.style)
  756. IDEAL_SHARD_SIZE = 4096
  757. def train_embeddings(index, matrix, tmpdir, shard_size=IDEAL_SHARD_SIZE):
  758. try:
  759. from . import swivel
  760. except (SystemError, ImportError):
  761. import swivel
  762. import tensorflow as tf
  763. assert matrix.shape[0] == matrix.shape[1]
  764. assert len(index) <= matrix.shape[0]
  765. outlier_threshold = numpy.percentile(matrix.data, 99)
  766. matrix.data[matrix.data > outlier_threshold] = outlier_threshold
  767. nshards = len(index) // shard_size
  768. if nshards * shard_size < len(index):
  769. nshards += 1
  770. shard_size = len(index) // nshards
  771. nshards = len(index) // shard_size
  772. remainder = len(index) - nshards * shard_size
  773. if remainder > 0:
  774. lengths = matrix.indptr[1:] - matrix.indptr[:-1]
  775. filtered = sorted(numpy.argsort(lengths)[remainder:])
  776. else:
  777. filtered = list(range(len(index)))
  778. if len(filtered) < matrix.shape[0]:
  779. print("Truncating the sparse matrix...")
  780. matrix = matrix[filtered, :][:, filtered]
  781. meta_index = []
  782. for i, j in enumerate(filtered):
  783. meta_index.append((index[j], matrix[i, i]))
  784. index = [mi[0] for mi in meta_index]
  785. with tempfile.TemporaryDirectory(prefix="hercules_labours_", dir=tmpdir or None) as tmproot:
  786. print("Writing Swivel metadata...")
  787. vocabulary = "\n".join(index)
  788. with open(os.path.join(tmproot, "row_vocab.txt"), "w") as out:
  789. out.write(vocabulary)
  790. with open(os.path.join(tmproot, "col_vocab.txt"), "w") as out:
  791. out.write(vocabulary)
  792. del vocabulary
  793. bool_sums = matrix.indptr[1:] - matrix.indptr[:-1]
  794. bool_sums_str = "\n".join(map(str, bool_sums.tolist()))
  795. with open(os.path.join(tmproot, "row_sums.txt"), "w") as out:
  796. out.write(bool_sums_str)
  797. with open(os.path.join(tmproot, "col_sums.txt"), "w") as out:
  798. out.write(bool_sums_str)
  799. del bool_sums_str
  800. reorder = numpy.argsort(-bool_sums)
  801. print("Writing Swivel shards...")
  802. for row in range(nshards):
  803. for col in range(nshards):
  804. def _int64s(xs):
  805. return tf.train.Feature(
  806. int64_list=tf.train.Int64List(value=list(xs)))
  807. def _floats(xs):
  808. return tf.train.Feature(
  809. float_list=tf.train.FloatList(value=list(xs)))
  810. indices_row = reorder[row::nshards]
  811. indices_col = reorder[col::nshards]
  812. shard = matrix[indices_row][:, indices_col].tocoo()
  813. example = tf.train.Example(features=tf.train.Features(feature={
  814. "global_row": _int64s(indices_row),
  815. "global_col": _int64s(indices_col),
  816. "sparse_local_row": _int64s(shard.row),
  817. "sparse_local_col": _int64s(shard.col),
  818. "sparse_value": _floats(shard.data)}))
  819. with open(os.path.join(tmproot, "shard-%03d-%03d.pb" % (row, col)), "wb") as out:
  820. out.write(example.SerializeToString())
  821. print("Training Swivel model...")
  822. swivel.FLAGS.submatrix_rows = shard_size
  823. swivel.FLAGS.submatrix_cols = shard_size
  824. if len(meta_index) <= IDEAL_SHARD_SIZE / 16:
  825. embedding_size = 50
  826. num_epochs = 100000
  827. elif len(meta_index) <= IDEAL_SHARD_SIZE:
  828. embedding_size = 50
  829. num_epochs = 50000
  830. elif len(meta_index) <= IDEAL_SHARD_SIZE * 2:
  831. embedding_size = 60
  832. num_epochs = 10000
  833. elif len(meta_index) <= IDEAL_SHARD_SIZE * 4:
  834. embedding_size = 70
  835. num_epochs = 8000
  836. elif len(meta_index) <= IDEAL_SHARD_SIZE * 10:
  837. embedding_size = 80
  838. num_epochs = 5000
  839. elif len(meta_index) <= IDEAL_SHARD_SIZE * 25:
  840. embedding_size = 100
  841. num_epochs = 1000
  842. elif len(meta_index) <= IDEAL_SHARD_SIZE * 100:
  843. embedding_size = 200
  844. num_epochs = 600
  845. else:
  846. embedding_size = 300
  847. num_epochs = 300
  848. if os.getenv("CI"):
  849. # Travis, AppVeyor etc. during the integration tests
  850. num_epochs /= 10
  851. swivel.FLAGS.embedding_size = embedding_size
  852. swivel.FLAGS.input_base_path = tmproot
  853. swivel.FLAGS.output_base_path = tmproot
  854. swivel.FLAGS.loss_multiplier = 1.0 / shard_size
  855. swivel.FLAGS.num_epochs = num_epochs
  856. # Tensorflow 1.5 parses sys.argv unconditionally *applause*
  857. argv_backup = sys.argv[1:]
  858. del sys.argv[1:]
  859. swivel.main(None)
  860. sys.argv.extend(argv_backup)
  861. print("Reading Swivel embeddings...")
  862. embeddings = []
  863. with open(os.path.join(tmproot, "row_embedding.tsv")) as frow:
  864. with open(os.path.join(tmproot, "col_embedding.tsv")) as fcol:
  865. for i, (lrow, lcol) in enumerate(zip(frow, fcol)):
  866. prow, pcol = (l.split("\t", 1) for l in (lrow, lcol))
  867. assert prow[0] == pcol[0]
  868. erow, ecol = \
  869. (numpy.fromstring(p[1], dtype=numpy.float32, sep="\t")
  870. for p in (prow, pcol))
  871. embeddings.append((erow + ecol) / 2)
  872. return meta_index, embeddings
  873. class CORSWebServer(object):
  874. def __init__(self):
  875. self.thread = threading.Thread(target=self.serve)
  876. self.server = None
  877. def serve(self):
  878. outer = self
  879. try:
  880. from http.server import HTTPServer, SimpleHTTPRequestHandler, test
  881. except ImportError: # Python 2
  882. from BaseHTTPServer import HTTPServer, test
  883. from SimpleHTTPServer import SimpleHTTPRequestHandler
  884. class ClojureServer(HTTPServer):
  885. def __init__(self, *args, **kwargs):
  886. HTTPServer.__init__(self, *args, **kwargs)
  887. outer.server = self
  888. class CORSRequestHandler(SimpleHTTPRequestHandler):
  889. def end_headers(self):
  890. self.send_header("Access-Control-Allow-Origin", "*")
  891. SimpleHTTPRequestHandler.end_headers(self)
  892. test(CORSRequestHandler, ClojureServer)
  893. def start(self):
  894. self.thread.start()
  895. def stop(self):
  896. if self.running:
  897. self.server.shutdown()
  898. self.thread.join()
  899. @property
  900. def running(self):
  901. return self.server is not None
  902. web_server = CORSWebServer()
  903. def write_embeddings(name, output, run_server, index, embeddings):
  904. print("Writing Tensorflow Projector files...")
  905. if not output:
  906. output = "couples_" + name
  907. if output.endswith(".json"):
  908. output = os.path.join(output[:-5], "couples")
  909. run_server = False
  910. metaf = "%s_%s_meta.tsv" % (output, name)
  911. with open(metaf, "w") as fout:
  912. fout.write("name\tcommits\n")
  913. for pair in index:
  914. fout.write("%s\t%s\n" % pair)
  915. print("Wrote", metaf)
  916. dataf = "%s_%s_data.tsv" % (output, name)
  917. with open(dataf, "w") as fout:
  918. for vec in embeddings:
  919. fout.write("\t".join(str(v) for v in vec))
  920. fout.write("\n")
  921. print("Wrote", dataf)
  922. jsonf = "%s_%s.json" % (output, name)
  923. with open(jsonf, "w") as fout:
  924. fout.write("""{
  925. "embeddings": [
  926. {
  927. "tensorName": "%s %s coupling",
  928. "tensorShape": [%s, %s],
  929. "tensorPath": "http://0.0.0.0:8000/%s",
  930. "metadataPath": "http://0.0.0.0:8000/%s"
  931. }
  932. ]
  933. }
  934. """ % (output, name, len(embeddings), len(embeddings[0]), dataf, metaf))
  935. print("Wrote %s" % jsonf)
  936. if run_server and not web_server.running:
  937. web_server.start()
  938. url = "http://projector.tensorflow.org/?config=http://0.0.0.0:8000/" + jsonf
  939. print(url)
  940. if run_server:
  941. if shutil.which("xdg-open") is not None:
  942. os.system("xdg-open " + url)
  943. else:
  944. browser = os.getenv("BROWSER", "")
  945. if browser:
  946. os.system(browser + " " + url)
  947. else:
  948. print("\t" + url)
  949. def show_shotness_stats(data):
  950. top = sorted(((r.counters[i], i) for i, r in enumerate(data)), reverse=True)
  951. for count, i in top:
  952. r = data[i]
  953. print("%8d %s:%s [%s]" % (count, r.file, r.name, r.internal_role))
  954. def show_sentiment_stats(args, name, resample, start, data):
  955. matplotlib, pyplot = import_pyplot(args.backend, args.style)
  956. start = datetime.fromtimestamp(start)
  957. data = sorted(data.items())
  958. xdates = [start + timedelta(days=d[0]) for d in data]
  959. xpos = []
  960. ypos = []
  961. xneg = []
  962. yneg = []
  963. for x, (_, y) in zip(xdates, data):
  964. y = 0.5 - y.Value
  965. if y > 0:
  966. xpos.append(x)
  967. ypos.append(y)
  968. else:
  969. xneg.append(x)
  970. yneg.append(y)
  971. pyplot.bar(xpos, ypos, color="g", label="Positive")
  972. pyplot.bar(xneg, yneg, color="r", label="Negative")
  973. legend = pyplot.legend(loc=1, fontsize=args.font_size)
  974. pyplot.ylabel("Lines of code")
  975. pyplot.xlabel("Time")
  976. apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.background,
  977. args.font_size, args.size)
  978. pyplot.xlim(xdates[0], xdates[-1])
  979. locator = pyplot.gca().xaxis.get_major_locator()
  980. # set the optimal xticks locator
  981. if "M" not in resample:
  982. pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
  983. locs = pyplot.gca().get_xticks().tolist()
  984. if len(locs) >= 16:
  985. pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
  986. locs = pyplot.gca().get_xticks().tolist()
  987. if len(locs) >= 16:
  988. pyplot.gca().xaxis.set_major_locator(locator)
  989. if locs[0] < pyplot.xlim()[0]:
  990. del locs[0]
  991. endindex = -1
  992. if len(locs) >= 2 and pyplot.xlim()[1] - locs[-1] > (locs[-1] - locs[-2]) / 2:
  993. locs.append(pyplot.xlim()[1])
  994. endindex = len(locs) - 1
  995. startindex = -1
  996. if len(locs) >= 2 and locs[0] - pyplot.xlim()[0] > (locs[1] - locs[0]) / 2:
  997. locs.append(pyplot.xlim()[0])
  998. startindex = len(locs) - 1
  999. pyplot.gca().set_xticks(locs)
  1000. # hacking time!
  1001. labels = pyplot.gca().get_xticklabels()
  1002. if startindex >= 0:
  1003. labels[startindex].set_text(xdates[0].date())
  1004. labels[startindex].set_text = lambda _: None
  1005. labels[startindex].set_rotation(30)
  1006. labels[startindex].set_ha("right")
  1007. if endindex >= 0:
  1008. labels[endindex].set_text(xdates[-1].date())
  1009. labels[endindex].set_text = lambda _: None
  1010. labels[endindex].set_rotation(30)
  1011. labels[endindex].set_ha("right")
  1012. overall_pos = sum(2 * (0.5 - d[1].Value) for d in data if d[1].Value < 0.5)
  1013. overall_neg = sum(2 * (d[1].Value - 0.5) for d in data if d[1].Value > 0.5)
  1014. title = "%s sentiment +%.1f -%.1f δ=%.1f" % (
  1015. name, overall_pos, overall_neg, overall_pos - overall_neg)
  1016. deploy_plot(title, args.output, args.style)
  1017. def main():
  1018. args = parse_args()
  1019. reader = read_input(args)
  1020. header = reader.get_header()
  1021. name = reader.get_name()
  1022. burndown_warning = "Burndown stats were not collected. Re-run hercules with --burndown."
  1023. burndown_files_warning = \
  1024. "Burndown stats for files were not collected. Re-run hercules with " \
  1025. "--burndown --burndown-files."
  1026. burndown_people_warning = \
  1027. "Burndown stats for people were not collected. Re-run hercules with " \
  1028. "--burndown --burndown-people."
  1029. couples_warning = "Coupling stats were not collected. Re-run hercules with --couples."
  1030. shotness_warning = "Structural hotness stats were not collected. Re-run hercules with " \
  1031. "--shotness. Also check --languages - the output may be empty."
  1032. sentiment_warning = "Sentiment stats were not collected. Re-run hercules with --sentiment."
  1033. def run_times():
  1034. rt = reader.get_run_times()
  1035. import pandas
  1036. series = pandas.to_timedelta(pandas.Series(rt).sort_values(ascending=False), unit="s")
  1037. df = pandas.concat([series, series / series.sum()], axis=1)
  1038. df.columns = ["time", "ratio"]
  1039. print(df)
  1040. def project_burndown():
  1041. try:
  1042. full_header = header + reader.get_burndown_parameters()
  1043. except KeyError:
  1044. print("project: " + burndown_warning)
  1045. return
  1046. plot_burndown(args, "project",
  1047. *load_burndown(full_header, *reader.get_project_burndown(),
  1048. resample=args.resample))
  1049. def files_burndown():
  1050. try:
  1051. full_header = header + reader.get_burndown_parameters()
  1052. except KeyError:
  1053. print(burndown_warning)
  1054. return
  1055. try:
  1056. plot_many_burndown(args, "file", full_header, reader.get_files_burndown())
  1057. except KeyError:
  1058. print("files: " + burndown_files_warning)
  1059. def people_burndown():
  1060. try:
  1061. full_header = header + reader.get_burndown_parameters()
  1062. except KeyError:
  1063. print(burndown_warning)
  1064. return
  1065. try:
  1066. plot_many_burndown(args, "person", full_header, reader.get_people_burndown())
  1067. except KeyError:
  1068. print("people: " + burndown_people_warning)
  1069. def churn_matrix():
  1070. try:
  1071. plot_churn_matrix(args, name, *load_churn_matrix(
  1072. *reader.get_people_interaction(), max_people=args.max_people))
  1073. except KeyError:
  1074. print("churn_matrix: " + burndown_people_warning)
  1075. def ownership_burndown():
  1076. try:
  1077. full_header = header + reader.get_burndown_parameters()
  1078. except KeyError:
  1079. print(burndown_warning)
  1080. return
  1081. try:
  1082. plot_ownership(args, name, *load_ownership(
  1083. full_header, *reader.get_ownership_burndown(), max_people=args.max_people))
  1084. except KeyError:
  1085. print("ownership: " + burndown_people_warning)
  1086. def couples():
  1087. try:
  1088. write_embeddings("files", args.output, not args.disable_projector,
  1089. *train_embeddings(*reader.get_files_coocc(),
  1090. tmpdir=args.couples_tmp_dir))
  1091. write_embeddings("people", args.output, not args.disable_projector,
  1092. *train_embeddings(*reader.get_people_coocc(),
  1093. tmpdir=args.couples_tmp_dir))
  1094. except KeyError:
  1095. print(couples_warning)
  1096. try:
  1097. write_embeddings("shotness", args.output, not args.disable_projector,
  1098. *train_embeddings(*reader.get_shotness_coocc(),
  1099. tmpdir=args.couples_tmp_dir))
  1100. except KeyError:
  1101. print(shotness_warning)
  1102. def shotness():
  1103. try:
  1104. data = reader.get_shotness()
  1105. except KeyError:
  1106. print(shotness_warning)
  1107. return
  1108. show_shotness_stats(data)
  1109. def sentiment():
  1110. try:
  1111. data = reader.get_sentiment()
  1112. except KeyError:
  1113. print(sentiment_warning)
  1114. return
  1115. show_sentiment_stats(args, reader.get_name(), args.resample, reader.get_header()[0], data)
  1116. if args.mode == "run_times":
  1117. run_times()
  1118. elif args.mode == "project":
  1119. project_burndown()
  1120. elif args.mode == "file":
  1121. files_burndown()
  1122. elif args.mode == "person":
  1123. people_burndown()
  1124. elif args.mode == "churn_matrix":
  1125. churn_matrix()
  1126. elif args.mode == "ownership":
  1127. ownership_burndown()
  1128. elif args.mode == "couples":
  1129. couples()
  1130. elif args.mode == "shotness":
  1131. shotness()
  1132. elif args.mode == "sentiment":
  1133. sentiment()
  1134. elif args.mode == "all":
  1135. project_burndown()
  1136. files_burndown()
  1137. people_burndown()
  1138. churn_matrix()
  1139. ownership_burndown()
  1140. couples()
  1141. shotness()
  1142. sentiment()
  1143. if web_server.running:
  1144. secs = int(os.getenv("COUPLES_SERVER_TIME", "60"))
  1145. print("Sleeping for %d seconds, safe to Ctrl-C" % secs)
  1146. sys.stdout.flush()
  1147. try:
  1148. time.sleep(secs)
  1149. except KeyboardInterrupt:
  1150. pass
  1151. web_server.stop()
  1152. if __name__ == "__main__":
  1153. sys.exit(main())