labours.py 47 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291
  1. #!/usr/bin/env python3
  2. import argparse
  3. import io
  4. import json
  5. import os
  6. import re
  7. import shutil
  8. import sys
  9. import tempfile
  10. import threading
  11. import time
  12. import warnings
  13. from datetime import datetime, timedelta
  14. from importlib import import_module
  15. try:
  16. from clint.textui import progress
  17. except ImportError:
  18. print("Warning: clint is not installed, no fancy progressbars in the terminal for you.")
  19. progress = None
  20. import numpy
  21. import yaml
  22. if sys.version_info[0] < 3:
  23. # OK, ancients, I will support Python 2, but you owe me a beer
  24. input = raw_input # noqa: F821
  25. PB_MESSAGES = {
  26. "Burndown": "internal.pb.pb_pb2.BurndownAnalysisResults",
  27. "Couples": "internal.pb.pb_pb2.CouplesAnalysisResults",
  28. "Shotness": "internal.pb.pb_pb2.ShotnessAnalysisResults",
  29. }
  30. def parse_args():
  31. parser = argparse.ArgumentParser()
  32. parser.add_argument("-o", "--output", default="",
  33. help="Path to the output file/directory (empty for display). "
  34. "If the extension is JSON, the data is saved instead of "
  35. "the real image.")
  36. parser.add_argument("-i", "--input", default="-",
  37. help="Path to the input file (- for stdin).")
  38. parser.add_argument("-f", "--input-format", default="auto", choices=["yaml", "pb", "auto"])
  39. parser.add_argument("--text-size", default=12, type=int,
  40. help="Size of the labels and legend.")
  41. parser.add_argument("--backend", help="Matplotlib backend to use.")
  42. parser.add_argument("--style", choices=["black", "white"], default="black",
  43. help="Plot's general color scheme.")
  44. parser.add_argument("--size", help="Axes' size in inches, for example \"12,9\"")
  45. parser.add_argument("--relative", action="store_true",
  46. help="Occupy 100%% height for every measurement.")
  47. parser.add_argument("--couples-tmp-dir", help="Temporary directory to work with couples.")
  48. parser.add_argument("-m", "--mode",
  49. choices=["project", "file", "person", "churn_matrix", "ownership",
  50. "couples", "shotness", "sentiment", "all", "run_times"],
  51. help="What to plot.")
  52. parser.add_argument(
  53. "--resample", default="year",
  54. help="The way to resample the time series. Possible values are: "
  55. "\"month\", \"year\", \"no\", \"raw\" and pandas offset aliases ("
  56. "http://pandas.pydata.org/pandas-docs/stable/timeseries.html"
  57. "#offset-aliases).")
  58. parser.add_argument("--disable-projector", action="store_true",
  59. help="Do not run Tensorflow Projector on couples.")
  60. parser.add_argument("--max-people", default=20, type=int,
  61. help="Maximum number of developers in churn matrix and people plots.")
  62. args = parser.parse_args()
  63. return args
  64. class Reader(object):
  65. def read(self, file):
  66. raise NotImplementedError
  67. def get_name(self):
  68. raise NotImplementedError
  69. def get_header(self):
  70. raise NotImplementedError
  71. def get_burndown_parameters(self):
  72. raise NotImplementedError
  73. def get_project_burndown(self):
  74. raise NotImplementedError
  75. def get_files_burndown(self):
  76. raise NotImplementedError
  77. def get_people_burndown(self):
  78. raise NotImplementedError
  79. def get_ownership_burndown(self):
  80. raise NotImplementedError
  81. def get_people_interaction(self):
  82. raise NotImplementedError
  83. def get_files_coocc(self):
  84. raise NotImplementedError
  85. def get_people_coocc(self):
  86. raise NotImplementedError
  87. def get_shotness_coocc(self):
  88. raise NotImplementedError
  89. def get_shotness(self):
  90. raise NotImplementedError
  91. class YamlReader(Reader):
  92. def read(self, file):
  93. yaml.reader.Reader.NON_PRINTABLE = re.compile(r"(?!x)x")
  94. try:
  95. loader = yaml.CLoader
  96. except AttributeError:
  97. print("Warning: failed to import yaml.CLoader, falling back to slow yaml.Loader")
  98. loader = yaml.Loader
  99. try:
  100. if file != "-":
  101. with open(file) as fin:
  102. data = yaml.load(fin, Loader=loader)
  103. else:
  104. data = yaml.load(sys.stdin, Loader=loader)
  105. except (UnicodeEncodeError, yaml.reader.ReaderError) as e:
  106. print("\nInvalid unicode in the input: %s\nPlease filter it through "
  107. "fix_yaml_unicode.py" % e)
  108. sys.exit(1)
  109. if data is None:
  110. print("\nNo data has been read - has Hercules crashed?")
  111. sys.exit(1)
  112. self.data = data
  113. def get_run_times(self):
  114. return {}
  115. def get_name(self):
  116. return self.data["hercules"]["repository"]
  117. def get_header(self):
  118. header = self.data["hercules"]
  119. return header["begin_unix_time"], header["end_unix_time"]
  120. def get_burndown_parameters(self):
  121. header = self.data["Burndown"]
  122. return header["sampling"], header["granularity"]
  123. def get_project_burndown(self):
  124. return self.data["hercules"]["repository"], \
  125. self._parse_burndown_matrix(self.data["Burndown"]["project"]).T
  126. def get_files_burndown(self):
  127. return [(p[0], self._parse_burndown_matrix(p[1]).T)
  128. for p in self.data["Burndown"]["files"].items()]
  129. def get_people_burndown(self):
  130. return [(p[0], self._parse_burndown_matrix(p[1]).T)
  131. for p in self.data["Burndown"]["people"].items()]
  132. def get_ownership_burndown(self):
  133. return self.data["Burndown"]["people_sequence"].copy(),\
  134. {p[0]: self._parse_burndown_matrix(p[1])
  135. for p in self.data["Burndown"]["people"].items()}
  136. def get_people_interaction(self):
  137. return self.data["Burndown"]["people_sequence"].copy(), \
  138. self._parse_burndown_matrix(self.data["Burndown"]["people_interaction"])
  139. def get_files_coocc(self):
  140. coocc = self.data["Couples"]["files_coocc"]
  141. return coocc["index"], self._parse_coocc_matrix(coocc["matrix"])
  142. def get_people_coocc(self):
  143. coocc = self.data["Couples"]["people_coocc"]
  144. return coocc["index"], self._parse_coocc_matrix(coocc["matrix"])
  145. def get_shotness_coocc(self):
  146. shotness = self.data["Shotness"]
  147. index = ["%s:%s" % (i["file"], i["name"]) for i in shotness]
  148. indptr = numpy.zeros(len(shotness) + 1, dtype=numpy.int64)
  149. indices = []
  150. data = []
  151. for i, record in enumerate(shotness):
  152. pairs = [(int(k), v) for k, v in record["counters"].items()]
  153. pairs.sort()
  154. indptr[i + 1] = indptr[i] + len(pairs)
  155. for k, v in pairs:
  156. indices.append(k)
  157. data.append(v)
  158. indices = numpy.array(indices, dtype=numpy.int32)
  159. data = numpy.array(data, dtype=numpy.int32)
  160. from scipy.sparse import csr_matrix
  161. return index, csr_matrix((data, indices, indptr), shape=(len(shotness),) * 2)
  162. def get_shotness(self):
  163. from munch import munchify
  164. obj = munchify(self.data["Shotness"])
  165. # turn strings into ints
  166. for item in obj:
  167. item.counters = {int(k): v for k, v in item.counters.items()}
  168. if len(obj) == 0:
  169. raise KeyError
  170. return obj
  171. def get_sentiment(self):
  172. from munch import munchify
  173. return munchify({int(key): {
  174. "Comments": vals[2].split("|"),
  175. "Commits": vals[1],
  176. "Value": float(vals[0])
  177. } for key, vals in self.data["Sentiment"].items()})
  178. def _parse_burndown_matrix(self, matrix):
  179. return numpy.array([numpy.fromstring(line, dtype=int, sep=" ")
  180. for line in matrix.split("\n")])
  181. def _parse_coocc_matrix(self, matrix):
  182. from scipy.sparse import csr_matrix
  183. data = []
  184. indices = []
  185. indptr = [0]
  186. for row in matrix:
  187. for k, v in sorted(row.items()):
  188. data.append(v)
  189. indices.append(k)
  190. indptr.append(indptr[-1] + len(row))
  191. return csr_matrix((data, indices, indptr), shape=(len(matrix),) * 2)
  192. class ProtobufReader(Reader):
  193. def read(self, file):
  194. try:
  195. from internal.pb.pb_pb2 import AnalysisResults
  196. except ImportError as e:
  197. print("\n\n>>> You need to generate internal/pb/pb_pb2.py - run \"make\"\n",
  198. file=sys.stderr)
  199. raise e from None
  200. self.data = AnalysisResults()
  201. if file != "-":
  202. with open(file, "rb") as fin:
  203. self.data.ParseFromString(fin.read())
  204. else:
  205. self.data.ParseFromString(sys.stdin.buffer.read())
  206. self.contents = {}
  207. for key, val in self.data.contents.items():
  208. try:
  209. mod, name = PB_MESSAGES[key].rsplit(".", 1)
  210. except KeyError:
  211. sys.stderr.write("Warning: there is no registered PB decoder for %s\n" % key)
  212. continue
  213. cls = getattr(import_module(mod), name)
  214. self.contents[key] = msg = cls()
  215. msg.ParseFromString(val)
  216. def get_run_times(self):
  217. return {key: val for key, val in self.data.header.run_time_per_item.items()}
  218. def get_name(self):
  219. return self.data.header.repository
  220. def get_header(self):
  221. header = self.data.header
  222. return header.begin_unix_time, header.end_unix_time
  223. def get_burndown_parameters(self):
  224. burndown = self.contents["Burndown"]
  225. return burndown.sampling, burndown.granularity
  226. def get_project_burndown(self):
  227. return self._parse_burndown_matrix(self.contents["Burndown"].project)
  228. def get_files_burndown(self):
  229. return [self._parse_burndown_matrix(i) for i in self.contents["Burndown"].files]
  230. def get_people_burndown(self):
  231. return [self._parse_burndown_matrix(i) for i in self.contents["Burndown"].people]
  232. def get_ownership_burndown(self):
  233. people = self.get_people_burndown()
  234. return [p[0] for p in people], {p[0]: p[1].T for p in people}
  235. def get_people_interaction(self):
  236. burndown = self.contents["Burndown"]
  237. return [i.name for i in burndown.people], \
  238. self._parse_sparse_matrix(burndown.people_interaction).toarray()
  239. def get_files_coocc(self):
  240. node = self.contents["Couples"].file_couples
  241. return list(node.index), self._parse_sparse_matrix(node.matrix)
  242. def get_people_coocc(self):
  243. node = self.contents["Couples"].people_couples
  244. return list(node.index), self._parse_sparse_matrix(node.matrix)
  245. def get_shotness_coocc(self):
  246. shotness = self.get_shotness()
  247. index = ["%s:%s" % (i.file, i.name) for i in shotness]
  248. indptr = numpy.zeros(len(shotness) + 1, dtype=numpy.int32)
  249. indices = []
  250. data = []
  251. for i, record in enumerate(shotness):
  252. pairs = list(record.counters.items())
  253. pairs.sort()
  254. indptr[i + 1] = indptr[i] + len(pairs)
  255. for k, v in pairs:
  256. indices.append(k)
  257. data.append(v)
  258. indices = numpy.array(indices, dtype=numpy.int32)
  259. data = numpy.array(data, dtype=numpy.int32)
  260. from scipy.sparse import csr_matrix
  261. return index, csr_matrix((data, indices, indptr), shape=(len(shotness),) * 2)
  262. def get_shotness(self):
  263. records = self.contents["Shotness"].records
  264. if len(records) == 0:
  265. raise KeyError
  266. return records
  267. def get_sentiment(self):
  268. byday = self.contents["Sentiment"].SentimentByDay
  269. if len(byday) == 0:
  270. raise KeyError
  271. return byday
  272. def _parse_burndown_matrix(self, matrix):
  273. dense = numpy.zeros((matrix.number_of_rows, matrix.number_of_columns), dtype=int)
  274. for y, row in enumerate(matrix.rows):
  275. for x, col in enumerate(row.columns):
  276. dense[y, x] = col
  277. return matrix.name, dense.T
  278. def _parse_sparse_matrix(self, matrix):
  279. from scipy.sparse import csr_matrix
  280. return csr_matrix((list(matrix.data), list(matrix.indices), list(matrix.indptr)),
  281. shape=(matrix.number_of_rows, matrix.number_of_columns))
  282. READERS = {"yaml": YamlReader, "yml": YamlReader, "pb": ProtobufReader}
  283. def read_input(args):
  284. sys.stdout.write("Reading the input... ")
  285. sys.stdout.flush()
  286. if args.input != "-":
  287. if args.input_format == "auto":
  288. args.input_format = args.input.rsplit(".", 1)[1]
  289. elif args.input_format == "auto":
  290. args.input_format = "yaml"
  291. reader = READERS[args.input_format]()
  292. reader.read(args.input)
  293. print("done")
  294. return reader
  295. def calculate_average_lifetime(matrix):
  296. lifetimes = numpy.zeros(matrix.shape[1] - 1)
  297. for band in matrix:
  298. start = 0
  299. for i, line in enumerate(band):
  300. if i == 0 or band[i - 1] == 0:
  301. start += 1
  302. continue
  303. lifetimes[i - start] = band[i - 1] - line
  304. lifetimes[i - start] = band[i - 1]
  305. lsum = lifetimes.sum()
  306. if lsum != 0:
  307. return (lifetimes.dot(numpy.arange(1, matrix.shape[1], 1))
  308. / (lsum * matrix.shape[1]))
  309. return numpy.nan
  310. def interpolate_burndown_matrix(matrix, granularity, sampling):
  311. daily = numpy.zeros(
  312. (matrix.shape[0] * granularity, matrix.shape[1] * sampling),
  313. dtype=numpy.float32)
  314. """
  315. ----------> samples, x
  316. |
  317. |
  318. |
  319. bands, y
  320. """
  321. for y in range(matrix.shape[0]):
  322. for x in range(matrix.shape[1]):
  323. if y * granularity > (x + 1) * sampling:
  324. # the future is zeros
  325. continue
  326. def decay(start_index: int, start_val: float):
  327. if start_val == 0:
  328. return
  329. k = matrix[y][x] / start_val # <= 1
  330. scale = (x + 1) * sampling - start_index
  331. for i in range(y * granularity, (y + 1) * granularity):
  332. initial = daily[i][start_index - 1]
  333. for j in range(start_index, (x + 1) * sampling):
  334. daily[i][j] = initial * (
  335. 1 + (k - 1) * (j - start_index + 1) / scale)
  336. def grow(finish_index: int, finish_val: float):
  337. initial = matrix[y][x - 1] if x > 0 else 0
  338. start_index = x * sampling
  339. if start_index < y * granularity:
  340. start_index = y * granularity
  341. if finish_index == start_index:
  342. return
  343. avg = (finish_val - initial) / (finish_index - start_index)
  344. for j in range(x * sampling, finish_index):
  345. for i in range(start_index, j + 1):
  346. daily[i][j] = avg
  347. # copy [x*g..y*s)
  348. for j in range(x * sampling, finish_index):
  349. for i in range(y * granularity, x * sampling):
  350. daily[i][j] = daily[i][j - 1]
  351. if (y + 1) * granularity >= (x + 1) * sampling:
  352. # x*granularity <= (y+1)*sampling
  353. # 1. x*granularity <= y*sampling
  354. # y*sampling..(y+1)sampling
  355. #
  356. # x+1
  357. # /
  358. # /
  359. # / y+1 -|
  360. # / |
  361. # / y -|
  362. # /
  363. # / x
  364. #
  365. # 2. x*granularity > y*sampling
  366. # x*granularity..(y+1)sampling
  367. #
  368. # x+1
  369. # /
  370. # /
  371. # / y+1 -|
  372. # / |
  373. # / x -|
  374. # /
  375. # / y
  376. if y * granularity <= x * sampling:
  377. grow((x + 1) * sampling, matrix[y][x])
  378. elif (x + 1) * sampling > y * granularity:
  379. grow((x + 1) * sampling, matrix[y][x])
  380. avg = matrix[y][x] / ((x + 1) * sampling - y * granularity)
  381. for j in range(y * granularity, (x + 1) * sampling):
  382. for i in range(y * granularity, j + 1):
  383. daily[i][j] = avg
  384. elif (y + 1) * granularity >= x * sampling:
  385. # y*sampling <= (x+1)*granularity < (y+1)sampling
  386. # y*sampling..(x+1)*granularity
  387. # (x+1)*granularity..(y+1)sampling
  388. # x+1
  389. # /\
  390. # / \
  391. # / \
  392. # / y+1
  393. # /
  394. # y
  395. v1 = matrix[y][x - 1]
  396. v2 = matrix[y][x]
  397. delta = (y + 1) * granularity - x * sampling
  398. previous = 0
  399. if x > 0 and (x - 1) * sampling >= y * granularity:
  400. # x*g <= (y-1)*s <= y*s <= (x+1)*g <= (y+1)*s
  401. # |________|.......^
  402. if x > 1:
  403. previous = matrix[y][x - 2]
  404. scale = sampling
  405. else:
  406. # (y-1)*s < x*g <= y*s <= (x+1)*g <= (y+1)*s
  407. # |______|.......^
  408. scale = sampling if x == 0 else x * sampling - y * granularity
  409. peak = v1 + (v1 - previous) / scale * delta
  410. if v2 > peak:
  411. # we need to adjust the peak, it may not be less than the decayed value
  412. if x < matrix.shape[1] - 1:
  413. # y*s <= (x+1)*g <= (y+1)*s < (y+2)*s
  414. # ^.........|_________|
  415. k = (v2 - matrix[y][x + 1]) / sampling # > 0
  416. peak = matrix[y][x] + k * ((x + 1) * sampling - (y + 1) * granularity)
  417. # peak > v2 > v1
  418. else:
  419. peak = v2
  420. # not enough data to interpolate; this is at least not restricted
  421. grow((y + 1) * granularity, peak)
  422. decay((y + 1) * granularity, peak)
  423. else:
  424. # (x+1)*granularity < y*sampling
  425. # y*sampling..(y+1)sampling
  426. decay(x * sampling, matrix[y][x - 1])
  427. return daily
  428. def load_burndown(header, name, matrix, resample):
  429. import pandas
  430. start, last, sampling, granularity = header
  431. assert sampling > 0
  432. assert granularity >= sampling
  433. start = datetime.fromtimestamp(start)
  434. last = datetime.fromtimestamp(last)
  435. print(name, "lifetime index:", calculate_average_lifetime(matrix))
  436. finish = start + timedelta(days=matrix.shape[1] * sampling)
  437. if resample not in ("no", "raw"):
  438. print("resampling to %s, please wait..." % resample)
  439. # Interpolate the day x day matrix.
  440. # Each day brings equal weight in the granularity.
  441. # Sampling's interpolation is linear.
  442. daily = interpolate_burndown_matrix(matrix, granularity, sampling)
  443. daily[(last - start).days:] = 0
  444. # Resample the bands
  445. aliases = {
  446. "year": "A",
  447. "month": "M"
  448. }
  449. resample = aliases.get(resample, resample)
  450. periods = 0
  451. date_granularity_sampling = [start]
  452. while date_granularity_sampling[-1] < finish:
  453. periods += 1
  454. date_granularity_sampling = pandas.date_range(
  455. start, periods=periods, freq=resample)
  456. date_range_sampling = pandas.date_range(
  457. date_granularity_sampling[0],
  458. periods=(finish - date_granularity_sampling[0]).days,
  459. freq="1D")
  460. # Fill the new square matrix
  461. matrix = numpy.zeros(
  462. (len(date_granularity_sampling), len(date_range_sampling)),
  463. dtype=numpy.float32)
  464. for i, gdt in enumerate(date_granularity_sampling):
  465. istart = (date_granularity_sampling[i - 1] - start).days \
  466. if i > 0 else 0
  467. ifinish = (gdt - start).days
  468. for j, sdt in enumerate(date_range_sampling):
  469. if (sdt - start).days >= istart:
  470. break
  471. matrix[i, j:] = \
  472. daily[istart:ifinish, (sdt - start).days:].sum(axis=0)
  473. # Hardcode some cases to improve labels' readability
  474. if resample in ("year", "A"):
  475. labels = [dt.year for dt in date_granularity_sampling]
  476. elif resample in ("month", "M"):
  477. labels = [dt.strftime("%Y %B") for dt in date_granularity_sampling]
  478. else:
  479. labels = [dt.date() for dt in date_granularity_sampling]
  480. else:
  481. labels = [
  482. "%s - %s" % ((start + timedelta(days=i * granularity)).date(),
  483. (
  484. start + timedelta(days=(i + 1) * granularity)).date())
  485. for i in range(matrix.shape[0])]
  486. if len(labels) > 18:
  487. warnings.warn("Too many labels - consider resampling.")
  488. resample = "M" # fake resampling type is checked while plotting
  489. date_range_sampling = pandas.date_range(
  490. start + timedelta(days=sampling), periods=matrix.shape[1],
  491. freq="%dD" % sampling)
  492. return name, matrix, date_range_sampling, labels, granularity, sampling, resample
  493. def load_ownership(header, sequence, contents, max_people):
  494. import pandas
  495. start, last, sampling, _ = header
  496. start = datetime.fromtimestamp(start)
  497. last = datetime.fromtimestamp(last)
  498. people = []
  499. for name in sequence:
  500. people.append(contents[name].sum(axis=1))
  501. people = numpy.array(people)
  502. date_range_sampling = pandas.date_range(
  503. start + timedelta(days=sampling), periods=people[0].shape[0],
  504. freq="%dD" % sampling)
  505. if people.shape[0] > max_people:
  506. order = numpy.argsort(-people.sum(axis=1))
  507. people = people[order[:max_people]]
  508. sequence = [sequence[i] for i in order[:max_people]]
  509. print("Warning: truncated people to most owning %d" % max_people)
  510. for i, name in enumerate(sequence):
  511. if len(name) > 40:
  512. sequence[i] = name[:37] + "..."
  513. return sequence, people, date_range_sampling, last
  514. def load_churn_matrix(people, matrix, max_people):
  515. matrix = matrix.astype(float)
  516. if matrix.shape[0] > max_people:
  517. order = numpy.argsort(-matrix[:, 0])
  518. matrix = matrix[order[:max_people]][:, [0, 1] + list(2 + order[:max_people])]
  519. people = [people[i] for i in order[:max_people]]
  520. print("Warning: truncated people to most productive %d" % max_people)
  521. zeros = matrix[:, 0] == 0
  522. matrix[zeros, :] = 1
  523. matrix /= matrix[:, 0][:, None]
  524. matrix = -matrix[:, 1:]
  525. matrix[zeros, :] = 0
  526. for i, name in enumerate(people):
  527. if len(name) > 40:
  528. people[i] = name[:37] + "..."
  529. return people, matrix
  530. def apply_plot_style(figure, axes, legend, style, text_size, axes_size):
  531. if axes_size is None:
  532. axes_size = (12, 9)
  533. else:
  534. axes_size = tuple(float(p) for p in axes_size.split(","))
  535. figure.set_size_inches(*axes_size)
  536. for side in ("bottom", "top", "left", "right"):
  537. axes.spines[side].set_color(style)
  538. for axis in (axes.xaxis, axes.yaxis):
  539. axis.label.update(dict(fontsize=text_size, color=style))
  540. for axis in ("x", "y"):
  541. getattr(axes, axis + "axis").get_offset_text().set_size(text_size)
  542. axes.tick_params(axis=axis, colors=style, labelsize=text_size)
  543. try:
  544. axes.ticklabel_format(axis="y", style="sci", scilimits=(0, 3))
  545. except AttributeError:
  546. pass
  547. if legend is not None:
  548. frame = legend.get_frame()
  549. for setter in (frame.set_facecolor, frame.set_edgecolor):
  550. setter("black" if style == "white" else "white")
  551. for text in legend.get_texts():
  552. text.set_color(style)
  553. def get_plot_path(base, name):
  554. root, ext = os.path.splitext(base)
  555. if not ext:
  556. ext = ".png"
  557. output = os.path.join(root, name + ext)
  558. os.makedirs(os.path.dirname(output), exist_ok=True)
  559. return output
  560. def deploy_plot(title, output, style):
  561. import matplotlib.pyplot as pyplot
  562. if not output:
  563. pyplot.gcf().canvas.set_window_title(title)
  564. pyplot.show()
  565. else:
  566. if title:
  567. pyplot.title(title, color=style)
  568. try:
  569. pyplot.tight_layout()
  570. except: # noqa: E722
  571. print("Warning: failed to set the tight layout")
  572. pyplot.savefig(output, transparent=True)
  573. pyplot.clf()
  574. def default_json(x):
  575. if hasattr(x, "tolist"):
  576. return x.tolist()
  577. if hasattr(x, "isoformat"):
  578. return x.isoformat()
  579. return x
  580. def plot_burndown(args, target, name, matrix, date_range_sampling, labels, granularity,
  581. sampling, resample):
  582. if args.output and args.output.endswith(".json"):
  583. data = locals().copy()
  584. del data["args"]
  585. data["type"] = "burndown"
  586. if args.mode == "project" and target == "project":
  587. output = args.output
  588. else:
  589. if target == "project":
  590. name = "project"
  591. output = get_plot_path(args.output, name)
  592. with open(output, "w") as fout:
  593. json.dump(data, fout, sort_keys=True, default=default_json)
  594. return
  595. import matplotlib
  596. if args.backend:
  597. matplotlib.use(args.backend)
  598. import matplotlib.pyplot as pyplot
  599. pyplot.stackplot(date_range_sampling, matrix, labels=labels)
  600. if args.relative:
  601. for i in range(matrix.shape[1]):
  602. matrix[:, i] /= matrix[:, i].sum()
  603. pyplot.ylim(0, 1)
  604. legend_loc = 3
  605. else:
  606. legend_loc = 2
  607. legend = pyplot.legend(loc=legend_loc, fontsize=args.text_size)
  608. pyplot.ylabel("Lines of code")
  609. pyplot.xlabel("Time")
  610. apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.style, args.text_size, args.size)
  611. pyplot.xlim(date_range_sampling[0], date_range_sampling[-1])
  612. locator = pyplot.gca().xaxis.get_major_locator()
  613. # set the optimal xticks locator
  614. if "M" not in resample:
  615. pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
  616. locs = pyplot.gca().get_xticks().tolist()
  617. if len(locs) >= 16:
  618. pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
  619. locs = pyplot.gca().get_xticks().tolist()
  620. if len(locs) >= 16:
  621. pyplot.gca().xaxis.set_major_locator(locator)
  622. if locs[0] < pyplot.xlim()[0]:
  623. del locs[0]
  624. endindex = -1
  625. if len(locs) >= 2 and pyplot.xlim()[1] - locs[-1] > (locs[-1] - locs[-2]) / 2:
  626. locs.append(pyplot.xlim()[1])
  627. endindex = len(locs) - 1
  628. startindex = -1
  629. if len(locs) >= 2 and locs[0] - pyplot.xlim()[0] > (locs[1] - locs[0]) / 2:
  630. locs.append(pyplot.xlim()[0])
  631. startindex = len(locs) - 1
  632. pyplot.gca().set_xticks(locs)
  633. # hacking time!
  634. labels = pyplot.gca().get_xticklabels()
  635. if startindex >= 0:
  636. labels[startindex].set_text(date_range_sampling[0].date())
  637. labels[startindex].set_text = lambda _: None
  638. labels[startindex].set_rotation(30)
  639. labels[startindex].set_ha("right")
  640. if endindex >= 0:
  641. labels[endindex].set_text(date_range_sampling[-1].date())
  642. labels[endindex].set_text = lambda _: None
  643. labels[endindex].set_rotation(30)
  644. labels[endindex].set_ha("right")
  645. title = "%s %d x %d (granularity %d, sampling %d)" % \
  646. ((name,) + matrix.shape + (granularity, sampling))
  647. output = args.output
  648. if output:
  649. if args.mode == "project" and target == "project":
  650. output = args.output
  651. else:
  652. if target == "project":
  653. name = "project"
  654. output = get_plot_path(args.output, name)
  655. deploy_plot(title, output, args.style)
  656. def plot_many_burndown(args, target, header, parts):
  657. if not args.output:
  658. print("Warning: output not set, showing %d plots." % len(parts))
  659. itercnt = progress.bar(parts, expected_size=len(parts)) \
  660. if progress is not None else parts
  661. stdout = io.StringIO()
  662. for name, matrix in itercnt:
  663. backup = sys.stdout
  664. sys.stdout = stdout
  665. plot_burndown(args, target, *load_burndown(header, name, matrix, args.resample))
  666. sys.stdout = backup
  667. sys.stdout.write(stdout.getvalue())
  668. def plot_churn_matrix(args, repo, people, matrix):
  669. if args.output and args.output.endswith(".json"):
  670. data = locals().copy()
  671. del data["args"]
  672. data["type"] = "churn_matrix"
  673. if args.mode == "all":
  674. output = get_plot_path(args.output, "matrix")
  675. else:
  676. output = args.output
  677. with open(output, "w") as fout:
  678. json.dump(data, fout, sort_keys=True, default=default_json)
  679. return
  680. import matplotlib
  681. if args.backend:
  682. matplotlib.use(args.backend)
  683. import matplotlib.pyplot as pyplot
  684. s = 4 + matrix.shape[1] * 0.3
  685. fig = pyplot.figure(figsize=(s, s))
  686. ax = fig.add_subplot(111)
  687. ax.xaxis.set_label_position("top")
  688. ax.matshow(matrix, cmap=pyplot.cm.OrRd)
  689. ax.set_xticks(numpy.arange(0, matrix.shape[1]))
  690. ax.set_yticks(numpy.arange(0, matrix.shape[0]))
  691. ax.set_yticklabels(people, va="center")
  692. ax.set_xticks(numpy.arange(0.5, matrix.shape[1] + 0.5), minor=True)
  693. ax.set_xticklabels(["Unidentified"] + people, rotation=45, ha="left",
  694. va="bottom", rotation_mode="anchor")
  695. ax.set_yticks(numpy.arange(0.5, matrix.shape[0] + 0.5), minor=True)
  696. ax.grid(which="minor")
  697. apply_plot_style(fig, ax, None, args.style, args.text_size, args.size)
  698. if not args.output:
  699. pos1 = ax.get_position()
  700. pos2 = (pos1.x0 + 0.15, pos1.y0 - 0.1, pos1.width * 0.9, pos1.height * 0.9)
  701. ax.set_position(pos2)
  702. if args.mode == "all":
  703. output = get_plot_path(args.output, "matrix")
  704. else:
  705. output = args.output
  706. title = "%s %d developers overwrite" % (repo, matrix.shape[0])
  707. if args.output:
  708. # FIXME(vmarkovtsev): otherwise the title is screwed in savefig()
  709. title = ""
  710. deploy_plot(title, output, args.style)
  711. def plot_ownership(args, repo, names, people, date_range, last):
  712. if args.output and args.output.endswith(".json"):
  713. data = locals().copy()
  714. del data["args"]
  715. data["type"] = "ownership"
  716. if args.mode == "all":
  717. output = get_plot_path(args.output, "people")
  718. else:
  719. output = args.output
  720. with open(output, "w") as fout:
  721. json.dump(data, fout, sort_keys=True, default=default_json)
  722. return
  723. import matplotlib
  724. if args.backend:
  725. matplotlib.use(args.backend)
  726. import matplotlib.pyplot as pyplot
  727. pyplot.stackplot(date_range, people, labels=names)
  728. pyplot.xlim(date_range[0], last)
  729. if args.relative:
  730. for i in range(people.shape[1]):
  731. people[:, i] /= people[:, i].sum()
  732. pyplot.ylim(0, 1)
  733. legend_loc = 3
  734. else:
  735. legend_loc = 2
  736. legend = pyplot.legend(loc=legend_loc, fontsize=args.text_size)
  737. apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.style, args.text_size, args.size)
  738. if args.mode == "all":
  739. output = get_plot_path(args.output, "people")
  740. else:
  741. output = args.output
  742. deploy_plot("%s code ownership through time" % repo, output, args.style)
  743. IDEAL_SHARD_SIZE = 4096
  744. def train_embeddings(index, matrix, tmpdir, shard_size=IDEAL_SHARD_SIZE):
  745. try:
  746. from . import swivel
  747. except (SystemError, ImportError):
  748. import swivel
  749. import tensorflow as tf
  750. assert matrix.shape[0] == matrix.shape[1]
  751. assert len(index) <= matrix.shape[0]
  752. outlier_threshold = numpy.percentile(matrix.data, 99)
  753. matrix.data[matrix.data > outlier_threshold] = outlier_threshold
  754. nshards = len(index) // shard_size
  755. if nshards * shard_size < len(index):
  756. nshards += 1
  757. shard_size = len(index) // nshards
  758. nshards = len(index) // shard_size
  759. remainder = len(index) - nshards * shard_size
  760. if remainder > 0:
  761. lengths = matrix.indptr[1:] - matrix.indptr[:-1]
  762. filtered = sorted(numpy.argsort(lengths)[remainder:])
  763. else:
  764. filtered = list(range(len(index)))
  765. if len(filtered) < matrix.shape[0]:
  766. print("Truncating the sparse matrix...")
  767. matrix = matrix[filtered, :][:, filtered]
  768. meta_index = []
  769. for i, j in enumerate(filtered):
  770. meta_index.append((index[j], matrix[i, i]))
  771. index = [mi[0] for mi in meta_index]
  772. with tempfile.TemporaryDirectory(prefix="hercules_labours_", dir=tmpdir or None) as tmproot:
  773. print("Writing Swivel metadata...")
  774. vocabulary = "\n".join(index)
  775. with open(os.path.join(tmproot, "row_vocab.txt"), "w") as out:
  776. out.write(vocabulary)
  777. with open(os.path.join(tmproot, "col_vocab.txt"), "w") as out:
  778. out.write(vocabulary)
  779. del vocabulary
  780. bool_sums = matrix.indptr[1:] - matrix.indptr[:-1]
  781. bool_sums_str = "\n".join(map(str, bool_sums.tolist()))
  782. with open(os.path.join(tmproot, "row_sums.txt"), "w") as out:
  783. out.write(bool_sums_str)
  784. with open(os.path.join(tmproot, "col_sums.txt"), "w") as out:
  785. out.write(bool_sums_str)
  786. del bool_sums_str
  787. reorder = numpy.argsort(-bool_sums)
  788. print("Writing Swivel shards...")
  789. for row in range(nshards):
  790. for col in range(nshards):
  791. def _int64s(xs):
  792. return tf.train.Feature(
  793. int64_list=tf.train.Int64List(value=list(xs)))
  794. def _floats(xs):
  795. return tf.train.Feature(
  796. float_list=tf.train.FloatList(value=list(xs)))
  797. indices_row = reorder[row::nshards]
  798. indices_col = reorder[col::nshards]
  799. shard = matrix[indices_row][:, indices_col].tocoo()
  800. example = tf.train.Example(features=tf.train.Features(feature={
  801. "global_row": _int64s(indices_row),
  802. "global_col": _int64s(indices_col),
  803. "sparse_local_row": _int64s(shard.row),
  804. "sparse_local_col": _int64s(shard.col),
  805. "sparse_value": _floats(shard.data)}))
  806. with open(os.path.join(tmproot, "shard-%03d-%03d.pb" % (row, col)), "wb") as out:
  807. out.write(example.SerializeToString())
  808. print("Training Swivel model...")
  809. swivel.FLAGS.submatrix_rows = shard_size
  810. swivel.FLAGS.submatrix_cols = shard_size
  811. if len(meta_index) <= IDEAL_SHARD_SIZE / 16:
  812. embedding_size = 50
  813. num_epochs = 100000
  814. elif len(meta_index) <= IDEAL_SHARD_SIZE:
  815. embedding_size = 50
  816. num_epochs = 50000
  817. elif len(meta_index) <= IDEAL_SHARD_SIZE * 2:
  818. embedding_size = 60
  819. num_epochs = 10000
  820. elif len(meta_index) <= IDEAL_SHARD_SIZE * 4:
  821. embedding_size = 70
  822. num_epochs = 8000
  823. elif len(meta_index) <= IDEAL_SHARD_SIZE * 10:
  824. embedding_size = 80
  825. num_epochs = 5000
  826. elif len(meta_index) <= IDEAL_SHARD_SIZE * 25:
  827. embedding_size = 100
  828. num_epochs = 1000
  829. elif len(meta_index) <= IDEAL_SHARD_SIZE * 100:
  830. embedding_size = 200
  831. num_epochs = 600
  832. else:
  833. embedding_size = 300
  834. num_epochs = 300
  835. if os.getenv("CI"):
  836. # Travis, AppVeyor etc. during the integration tests
  837. num_epochs /= 10
  838. swivel.FLAGS.embedding_size = embedding_size
  839. swivel.FLAGS.input_base_path = tmproot
  840. swivel.FLAGS.output_base_path = tmproot
  841. swivel.FLAGS.loss_multiplier = 1.0 / shard_size
  842. swivel.FLAGS.num_epochs = num_epochs
  843. # Tensorflow 1.5 parses sys.argv unconditionally *applause*
  844. argv_backup = sys.argv[1:]
  845. del sys.argv[1:]
  846. swivel.main(None)
  847. sys.argv.extend(argv_backup)
  848. print("Reading Swivel embeddings...")
  849. embeddings = []
  850. with open(os.path.join(tmproot, "row_embedding.tsv")) as frow:
  851. with open(os.path.join(tmproot, "col_embedding.tsv")) as fcol:
  852. for i, (lrow, lcol) in enumerate(zip(frow, fcol)):
  853. prow, pcol = (l.split("\t", 1) for l in (lrow, lcol))
  854. assert prow[0] == pcol[0]
  855. erow, ecol = \
  856. (numpy.fromstring(p[1], dtype=numpy.float32, sep="\t")
  857. for p in (prow, pcol))
  858. embeddings.append((erow + ecol) / 2)
  859. return meta_index, embeddings
  860. class CORSWebServer(object):
  861. def __init__(self):
  862. self.thread = threading.Thread(target=self.serve)
  863. self.server = None
  864. def serve(self):
  865. outer = self
  866. try:
  867. from http.server import HTTPServer, SimpleHTTPRequestHandler, test
  868. except ImportError: # Python 2
  869. from BaseHTTPServer import HTTPServer, test
  870. from SimpleHTTPServer import SimpleHTTPRequestHandler
  871. class ClojureServer(HTTPServer):
  872. def __init__(self, *args, **kwargs):
  873. HTTPServer.__init__(self, *args, **kwargs)
  874. outer.server = self
  875. class CORSRequestHandler(SimpleHTTPRequestHandler):
  876. def end_headers(self):
  877. self.send_header("Access-Control-Allow-Origin", "*")
  878. SimpleHTTPRequestHandler.end_headers(self)
  879. test(CORSRequestHandler, ClojureServer)
  880. def start(self):
  881. self.thread.start()
  882. def stop(self):
  883. if self.running:
  884. self.server.shutdown()
  885. self.thread.join()
  886. @property
  887. def running(self):
  888. return self.server is not None
  889. web_server = CORSWebServer()
  890. def write_embeddings(name, output, run_server, index, embeddings):
  891. print("Writing Tensorflow Projector files...")
  892. if not output:
  893. output = "couples_" + name
  894. if output.endswith(".json"):
  895. output = os.path.join(output[:-5], "couples")
  896. run_server = False
  897. metaf = "%s_%s_meta.tsv" % (output, name)
  898. with open(metaf, "w") as fout:
  899. fout.write("name\tcommits\n")
  900. for pair in index:
  901. fout.write("%s\t%s\n" % pair)
  902. print("Wrote", metaf)
  903. dataf = "%s_%s_data.tsv" % (output, name)
  904. with open(dataf, "w") as fout:
  905. for vec in embeddings:
  906. fout.write("\t".join(str(v) for v in vec))
  907. fout.write("\n")
  908. print("Wrote", dataf)
  909. jsonf = "%s_%s.json" % (output, name)
  910. with open(jsonf, "w") as fout:
  911. fout.write("""{
  912. "embeddings": [
  913. {
  914. "tensorName": "%s %s coupling",
  915. "tensorShape": [%s, %s],
  916. "tensorPath": "http://0.0.0.0:8000/%s",
  917. "metadataPath": "http://0.0.0.0:8000/%s"
  918. }
  919. ]
  920. }
  921. """ % (output, name, len(embeddings), len(embeddings[0]), dataf, metaf))
  922. print("Wrote %s" % jsonf)
  923. if run_server and not web_server.running:
  924. web_server.start()
  925. url = "http://projector.tensorflow.org/?config=http://0.0.0.0:8000/" + jsonf
  926. print(url)
  927. if run_server:
  928. if shutil.which("xdg-open") is not None:
  929. os.system("xdg-open " + url)
  930. else:
  931. browser = os.getenv("BROWSER", "")
  932. if browser:
  933. os.system(browser + " " + url)
  934. else:
  935. print("\t" + url)
  936. def show_shotness_stats(data):
  937. top = sorted(((r.counters[i], i) for i, r in enumerate(data)), reverse=True)
  938. for count, i in top:
  939. r = data[i]
  940. print("%8d %s:%s [%s]" % (count, r.file, r.name, r.internal_role))
  941. def show_sentiment_stats(args, name, resample, start, data):
  942. import matplotlib
  943. if args.backend:
  944. matplotlib.use(args.backend)
  945. import matplotlib.pyplot as pyplot
  946. start = datetime.fromtimestamp(start)
  947. data = sorted(data.items())
  948. xdates = [start + timedelta(days=d[0]) for d in data]
  949. xpos = []
  950. ypos = []
  951. xneg = []
  952. yneg = []
  953. for x, (_, y) in zip(xdates, data):
  954. y = 0.5 - y.Value
  955. if y > 0:
  956. xpos.append(x)
  957. ypos.append(y)
  958. else:
  959. xneg.append(x)
  960. yneg.append(y)
  961. pyplot.bar(xpos, ypos, color="g", label="Positive")
  962. pyplot.bar(xneg, yneg, color="r", label="Negative")
  963. legend = pyplot.legend(loc=1, fontsize=args.text_size)
  964. pyplot.ylabel("Lines of code")
  965. pyplot.xlabel("Time")
  966. apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.style, args.text_size, args.size)
  967. pyplot.xlim(xdates[0], xdates[-1])
  968. locator = pyplot.gca().xaxis.get_major_locator()
  969. # set the optimal xticks locator
  970. if "M" not in resample:
  971. pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
  972. locs = pyplot.gca().get_xticks().tolist()
  973. if len(locs) >= 16:
  974. pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
  975. locs = pyplot.gca().get_xticks().tolist()
  976. if len(locs) >= 16:
  977. pyplot.gca().xaxis.set_major_locator(locator)
  978. if locs[0] < pyplot.xlim()[0]:
  979. del locs[0]
  980. endindex = -1
  981. if len(locs) >= 2 and pyplot.xlim()[1] - locs[-1] > (locs[-1] - locs[-2]) / 2:
  982. locs.append(pyplot.xlim()[1])
  983. endindex = len(locs) - 1
  984. startindex = -1
  985. if len(locs) >= 2 and locs[0] - pyplot.xlim()[0] > (locs[1] - locs[0]) / 2:
  986. locs.append(pyplot.xlim()[0])
  987. startindex = len(locs) - 1
  988. pyplot.gca().set_xticks(locs)
  989. # hacking time!
  990. labels = pyplot.gca().get_xticklabels()
  991. if startindex >= 0:
  992. labels[startindex].set_text(xdates[0].date())
  993. labels[startindex].set_text = lambda _: None
  994. labels[startindex].set_rotation(30)
  995. labels[startindex].set_ha("right")
  996. if endindex >= 0:
  997. labels[endindex].set_text(xdates[-1].date())
  998. labels[endindex].set_text = lambda _: None
  999. labels[endindex].set_rotation(30)
  1000. labels[endindex].set_ha("right")
  1001. overall_pos = sum(2 * (0.5 - d[1].Value) for d in data if d[1].Value < 0.5)
  1002. overall_neg = sum(2 * (d[1].Value - 0.5) for d in data if d[1].Value > 0.5)
  1003. title = "%s sentiment +%.1f -%.1f δ=%.1f" % (
  1004. name, overall_pos, overall_neg, overall_pos - overall_neg)
  1005. deploy_plot(title, args.output, args.style)
  1006. def main():
  1007. args = parse_args()
  1008. reader = read_input(args)
  1009. header = reader.get_header()
  1010. name = reader.get_name()
  1011. burndown_warning = "Burndown stats were not collected. Re-run hercules with --burndown."
  1012. burndown_files_warning = \
  1013. "Burndown stats for files were not collected. Re-run hercules with " \
  1014. "--burndown --burndown-files."
  1015. burndown_people_warning = \
  1016. "Burndown stats for people were not collected. Re-run hercules with " \
  1017. "--burndown --burndown-people."
  1018. couples_warning = "Coupling stats were not collected. Re-run hercules with --couples."
  1019. shotness_warning = "Structural hotness stats were not collected. Re-run hercules with " \
  1020. "--shotness. Also check --languages - the output may be empty."
  1021. sentiment_warning = "Sentiment stats were not collected. Re-run hercules with --sentiment."
  1022. def run_times():
  1023. rt = reader.get_run_times()
  1024. import pandas
  1025. series = pandas.to_timedelta(pandas.Series(rt).sort_values(ascending=False), unit="s")
  1026. df = pandas.concat([series, series / series.sum()], axis=1)
  1027. df.columns = ["time", "ratio"]
  1028. print(df)
  1029. def project_burndown():
  1030. try:
  1031. full_header = header + reader.get_burndown_parameters()
  1032. except KeyError:
  1033. print("project: " + burndown_warning)
  1034. return
  1035. plot_burndown(args, "project",
  1036. *load_burndown(full_header, *reader.get_project_burndown(),
  1037. resample=args.resample))
  1038. def files_burndown():
  1039. try:
  1040. full_header = header + reader.get_burndown_parameters()
  1041. except KeyError:
  1042. print(burndown_warning)
  1043. return
  1044. try:
  1045. plot_many_burndown(args, "file", full_header, reader.get_files_burndown())
  1046. except KeyError:
  1047. print("files: " + burndown_files_warning)
  1048. def people_burndown():
  1049. try:
  1050. full_header = header + reader.get_burndown_parameters()
  1051. except KeyError:
  1052. print(burndown_warning)
  1053. return
  1054. try:
  1055. plot_many_burndown(args, "person", full_header, reader.get_people_burndown())
  1056. except KeyError:
  1057. print("people: " + burndown_people_warning)
  1058. def churn_matrix():
  1059. try:
  1060. plot_churn_matrix(args, name, *load_churn_matrix(
  1061. *reader.get_people_interaction(), max_people=args.max_people))
  1062. except KeyError:
  1063. print("churn_matrix: " + burndown_people_warning)
  1064. def ownership_burndown():
  1065. try:
  1066. full_header = header + reader.get_burndown_parameters()
  1067. except KeyError:
  1068. print(burndown_warning)
  1069. return
  1070. try:
  1071. plot_ownership(args, name, *load_ownership(
  1072. full_header, *reader.get_ownership_burndown(), max_people=args.max_people))
  1073. except KeyError:
  1074. print("ownership: " + burndown_people_warning)
  1075. def couples():
  1076. try:
  1077. write_embeddings("files", args.output, not args.disable_projector,
  1078. *train_embeddings(*reader.get_files_coocc(),
  1079. tmpdir=args.couples_tmp_dir))
  1080. write_embeddings("people", args.output, not args.disable_projector,
  1081. *train_embeddings(*reader.get_people_coocc(),
  1082. tmpdir=args.couples_tmp_dir))
  1083. except KeyError:
  1084. print(couples_warning)
  1085. try:
  1086. write_embeddings("shotness", args.output, not args.disable_projector,
  1087. *train_embeddings(*reader.get_shotness_coocc(),
  1088. tmpdir=args.couples_tmp_dir))
  1089. except KeyError:
  1090. print(shotness_warning)
  1091. def shotness():
  1092. try:
  1093. data = reader.get_shotness()
  1094. except KeyError:
  1095. print(shotness_warning)
  1096. return
  1097. show_shotness_stats(data)
  1098. def sentiment():
  1099. try:
  1100. data = reader.get_sentiment()
  1101. except KeyError:
  1102. print(sentiment_warning)
  1103. return
  1104. show_sentiment_stats(args, reader.get_name(), args.resample, reader.get_header()[0], data)
  1105. if args.mode == "run_times":
  1106. run_times()
  1107. elif args.mode == "project":
  1108. project_burndown()
  1109. elif args.mode == "file":
  1110. files_burndown()
  1111. elif args.mode == "person":
  1112. people_burndown()
  1113. elif args.mode == "churn_matrix":
  1114. churn_matrix()
  1115. elif args.mode == "ownership":
  1116. ownership_burndown()
  1117. elif args.mode == "couples":
  1118. couples()
  1119. elif args.mode == "shotness":
  1120. shotness()
  1121. elif args.mode == "sentiment":
  1122. sentiment()
  1123. elif args.mode == "all":
  1124. project_burndown()
  1125. files_burndown()
  1126. people_burndown()
  1127. churn_matrix()
  1128. ownership_burndown()
  1129. couples()
  1130. shotness()
  1131. sentiment()
  1132. if web_server.running:
  1133. secs = int(os.getenv("COUPLES_SERVER_TIME", "60"))
  1134. print("Sleeping for %d seconds, safe to Ctrl-C" % secs)
  1135. sys.stdout.flush()
  1136. try:
  1137. time.sleep(secs)
  1138. except KeyboardInterrupt:
  1139. pass
  1140. web_server.stop()
  1141. if __name__ == "__main__":
  1142. sys.exit(main())