cli.py 14 KB


  1. import argparse
  2. from argparse import Namespace
  3. import os
  4. import subprocess
  5. import sys
  6. import time
  7. from typing import List
  8. import numpy
  9. from labours.cors_web_server import web_server
  10. from labours.embeddings import train_embeddings, write_embeddings
  11. from labours.modes.burndown import load_burndown, plot_burndown, plot_many_burndown
  12. from labours.modes.devs import show_devs, show_devs_efforts
  13. from labours.modes.devs_parallel import load_devs_parallel, show_devs_parallel
  14. from labours.modes.languages import show_languages
  15. from labours.modes.old_vs_new import show_old_vs_new
  16. from labours.modes.overwrites import load_overwrites_matrix, plot_overwrites_matrix
  17. from labours.modes.ownership import load_ownership, plot_ownership
  18. from labours.modes.sentiment import show_sentiment_stats
  19. from labours.modes.shotness import show_shotness_stats
  20. from labours.readers import read_input
  21. from labours.utils import import_pandas
  22. # NB: this value is modified within the Dockerfile.
  23. DEFAULT_MATPLOTLIB_BACKEND = None
  24. def list_matplotlib_styles() -> List[str]:
  25. script = (
  26. "import sys; from matplotlib import pyplot; "
  27. "sys.stdout.write(repr(pyplot.style.available))"
  28. )
  29. styles = eval(subprocess.check_output([sys.executable, "-c", script]))
  30. styles.remove("classic")
  31. return ["default", "classic"] + styles
  32. def parse_args() -> Namespace:
  33. parser = argparse.ArgumentParser()
  34. parser.add_argument(
  35. "-o",
  36. "--output",
  37. default="",
  38. help="Path to the output file/directory (empty for display). "
  39. "If the extension is JSON, the data is saved instead of "
  40. "the real image.",
  41. )
  42. parser.add_argument(
  43. "-i", "--input", default="-", help="Path to the input file (- for stdin)."
  44. )
  45. parser.add_argument(
  46. "-f", "--input-format", default="auto", choices=["yaml", "pb", "auto"]
  47. )
  48. parser.add_argument(
  49. "--font-size", default=12, type=int, help="Size of the labels and legend."
  50. )
  51. parser.add_argument(
  52. "--style",
  53. default="ggplot",
  54. choices=list_matplotlib_styles(),
  55. help="Plot style to use.",
  56. )
  57. parser.add_argument(
  58. "--backend",
  59. default=DEFAULT_MATPLOTLIB_BACKEND,
  60. help="Matplotlib backend to use.",
  61. )
  62. parser.add_argument(
  63. "--background",
  64. choices=["black", "white"],
  65. default="white",
  66. help="Plot's general color scheme.",
  67. )
  68. parser.add_argument("--size", help="Axes' size in inches, for example \"12,9\"")
  69. parser.add_argument(
  70. "--relative",
  71. action="store_true",
  72. help="Occupy 100%% height for every measurement.",
  73. )
  74. parser.add_argument("--tmpdir", help="Temporary directory for intermediate files.")
  75. parser.add_argument(
  76. "-m",
  77. "--mode",
  78. dest="modes",
  79. default=[],
  80. action="append",
  81. choices=[
  82. "burndown-project",
  83. "burndown-file",
  84. "burndown-person",
  85. "overwrites-matrix",
  86. "ownership",
  87. "couples-files",
  88. "couples-people",
  89. "couples-shotness",
  90. "shotness",
  91. "sentiment",
  92. "devs",
  93. "devs-efforts",
  94. "old-vs-new",
  95. "run-times",
  96. "languages",
  97. "devs-parallel",
  98. "all",
  99. ],
  100. help="What to plot. Can be repeated, e.g. " "-m burndown-project -m run-times",
  101. )
  102. parser.add_argument(
  103. "--resample",
  104. default="year",
  105. help="The way to resample the time series. Possible values are: "
  106. "\"month\", \"year\", \"no\", \"raw\" and pandas offset aliases ("
  107. "http://pandas.pydata.org/pandas-docs/stable/timeseries.html"
  108. "#offset-aliases).",
  109. )
  110. dateutil_url = (
  111. "https://dateutil.readthedocs.io/en/stable/parser.html#dateutil.parser.parse"
  112. )
  113. parser.add_argument(
  114. "--start-date",
  115. help="Start date of time-based plots. Any format is accepted which is "
  116. "supported by %s" % dateutil_url,
  117. )
  118. parser.add_argument(
  119. "--end-date",
  120. help="End date of time-based plots. Any format is accepted which is "
  121. "supported by %s" % dateutil_url,
  122. )
  123. parser.add_argument(
  124. "--disable-projector",
  125. action="store_true",
  126. help="Do not run Tensorflow Projector on couples.",
  127. )
  128. parser.add_argument(
  129. "--max-people",
  130. default=20,
  131. type=int,
  132. help="Maximum number of developers in overwrites matrix and people plots.",
  133. )
  134. parser.add_argument(
  135. "--order-ownership-by-time",
  136. action="store_true",
  137. help="Sort developers in the ownership plot according to their first "
  138. "appearance in the history. The default is sorting by the number of "
  139. "commits.",
  140. )
  141. args = parser.parse_args()
  142. return args
  143. def main() -> None:
  144. args = parse_args()
  145. reader = read_input(args)
  146. header = reader.get_header()
  147. name = reader.get_name()
  148. burndown_warning = (
  149. "Burndown stats were not collected. Re-run hercules with --burndown."
  150. )
  151. burndown_files_warning = (
  152. "Burndown stats for files were not collected. Re-run hercules with "
  153. "--burndown --burndown-files."
  154. )
  155. burndown_people_warning = (
  156. "Burndown stats for people were not collected. Re-run hercules with "
  157. "--burndown --burndown-people."
  158. )
  159. couples_warning = (
  160. "Coupling stats were not collected. Re-run hercules with --couples."
  161. )
  162. shotness_warning = (
  163. "Structural hotness stats were not collected. Re-run hercules with "
  164. "--shotness. Also check --languages - the output may be empty."
  165. )
  166. sentiment_warning = (
  167. "Sentiment stats were not collected. Re-run hercules with --sentiment."
  168. )
  169. devs_warning = "Devs stats were not collected. Re-run hercules with --devs."
  170. def run_times():
  171. rt = reader.get_run_times()
  172. pandas = import_pandas()
  173. series = pandas.to_timedelta(
  174. pandas.Series(rt).sort_values(ascending=False), unit="s"
  175. )
  176. df = pandas.concat([series, series / series.sum()], axis=1)
  177. df.columns = ["time", "ratio"]
  178. print(df)
  179. def project_burndown():
  180. try:
  181. full_header = header + reader.get_burndown_parameters()
  182. except KeyError:
  183. print("project: " + burndown_warning)
  184. return
  185. plot_burndown(
  186. args,
  187. "project",
  188. *load_burndown(
  189. full_header,
  190. *reader.get_project_burndown(),
  191. resample=args.resample,
  192. interpolation_progress=True,
  193. ),
  194. )
  195. def files_burndown():
  196. try:
  197. full_header = header + reader.get_burndown_parameters()
  198. except KeyError:
  199. print(burndown_warning)
  200. return
  201. try:
  202. plot_many_burndown(args, "file", full_header, reader.get_files_burndown())
  203. except KeyError:
  204. print("files: " + burndown_files_warning)
  205. def people_burndown():
  206. try:
  207. full_header = header + reader.get_burndown_parameters()
  208. except KeyError:
  209. print(burndown_warning)
  210. return
  211. try:
  212. plot_many_burndown(
  213. args, "person", full_header, reader.get_people_burndown()
  214. )
  215. except KeyError:
  216. print("people: " + burndown_people_warning)
  217. def overwrites_matrix():
  218. try:
  219. plot_overwrites_matrix(
  220. args,
  221. name,
  222. *load_overwrites_matrix(
  223. *reader.get_people_interaction(), max_people=args.max_people
  224. ),
  225. )
  226. people, matrix = load_overwrites_matrix(
  227. *reader.get_people_interaction(), max_people=1000000, normalize=False
  228. )
  229. from scipy.sparse import csr_matrix
  230. matrix = matrix[:, 1:]
  231. matrix = numpy.triu(matrix) + numpy.tril(matrix).T
  232. matrix = matrix + matrix.T
  233. matrix = csr_matrix(matrix)
  234. try:
  235. write_embeddings(
  236. "overwrites",
  237. args.output,
  238. not args.disable_projector,
  239. *train_embeddings(people, matrix, tmpdir=args.tmpdir),
  240. )
  241. except AttributeError as e:
  242. print(
  243. "Training the embeddings is not possible: %s: %s",
  244. type(e).__name__,
  245. e,
  246. )
  247. except KeyError:
  248. print("overwrites_matrix: " + burndown_people_warning)
  249. def ownership_burndown():
  250. try:
  251. full_header = header + reader.get_burndown_parameters()
  252. except KeyError:
  253. print(burndown_warning)
  254. return
  255. try:
  256. plot_ownership(
  257. args,
  258. name,
  259. *load_ownership(
  260. full_header,
  261. *reader.get_ownership_burndown(),
  262. max_people=args.max_people,
  263. order_by_time=args.order_ownership_by_time,
  264. ),
  265. )
  266. except KeyError:
  267. print("ownership: " + burndown_people_warning)
  268. def couples_files():
  269. try:
  270. write_embeddings(
  271. "files",
  272. args.output,
  273. not args.disable_projector,
  274. *train_embeddings(*reader.get_files_coocc(), tmpdir=args.tmpdir),
  275. )
  276. except KeyError:
  277. print(couples_warning)
  278. def couples_people():
  279. try:
  280. write_embeddings(
  281. "people",
  282. args.output,
  283. not args.disable_projector,
  284. *train_embeddings(*reader.get_people_coocc(), tmpdir=args.tmpdir),
  285. )
  286. except KeyError:
  287. print(couples_warning)
  288. def couples_shotness():
  289. try:
  290. write_embeddings(
  291. "shotness",
  292. args.output,
  293. not args.disable_projector,
  294. *train_embeddings(*reader.get_shotness_coocc(), tmpdir=args.tmpdir),
  295. )
  296. except KeyError:
  297. print(shotness_warning)
  298. def shotness():
  299. try:
  300. data = reader.get_shotness()
  301. except KeyError:
  302. print(shotness_warning)
  303. return
  304. show_shotness_stats(data)
  305. def sentiment():
  306. try:
  307. data = reader.get_sentiment()
  308. except KeyError:
  309. print(sentiment_warning)
  310. return
  311. show_sentiment_stats(
  312. args, reader.get_name(), args.resample, reader.get_header()[0], data
  313. )
  314. def devs():
  315. try:
  316. data = reader.get_devs()
  317. except KeyError:
  318. print(devs_warning)
  319. return
  320. show_devs(
  321. args,
  322. reader.get_name(),
  323. *reader.get_header(),
  324. *data,
  325. max_people=args.max_people,
  326. )
  327. def devs_efforts():
  328. try:
  329. data = reader.get_devs()
  330. except KeyError:
  331. print(devs_warning)
  332. return
  333. show_devs_efforts(
  334. args,
  335. reader.get_name(),
  336. *reader.get_header(),
  337. *data,
  338. max_people=args.max_people,
  339. )
  340. def old_vs_new():
  341. try:
  342. data = reader.get_devs()
  343. except KeyError:
  344. print(devs_warning)
  345. return
  346. show_old_vs_new(args, reader.get_name(), *reader.get_header(), *data)
  347. def languages():
  348. try:
  349. data = reader.get_devs()
  350. except KeyError:
  351. print(devs_warning)
  352. return
  353. show_languages(args, reader.get_name(), *reader.get_header(), *data)
  354. def devs_parallel():
  355. try:
  356. ownership = reader.get_ownership_burndown()
  357. except KeyError:
  358. print(burndown_people_warning)
  359. return
  360. try:
  361. couples = reader.get_people_coocc()
  362. except KeyError:
  363. print(couples_warning)
  364. return
  365. try:
  366. devs = reader.get_devs()
  367. except KeyError:
  368. print(devs_warning)
  369. return
  370. show_devs_parallel(
  371. args,
  372. reader.get_name(),
  373. *reader.get_header(),
  374. load_devs_parallel(ownership, couples, devs, args.max_people),
  375. )
  376. modes = {
  377. "run-times": run_times,
  378. "burndown-project": project_burndown,
  379. "burndown-file": files_burndown,
  380. "burndown-person": people_burndown,
  381. "overwrites-matrix": overwrites_matrix,
  382. "ownership": ownership_burndown,
  383. "couples-files": couples_files,
  384. "couples-people": couples_people,
  385. "couples-shotness": couples_shotness,
  386. "shotness": shotness,
  387. "sentiment": sentiment,
  388. "devs": devs,
  389. "devs-efforts": devs_efforts,
  390. "old-vs-new": old_vs_new,
  391. "languages": languages,
  392. "devs-parallel": devs_parallel,
  393. }
  394. if "all" in args.modes:
  395. all_mode = True
  396. args.modes = [
  397. "burndown-project",
  398. "overwrites-matrix",
  399. "ownership",
  400. "couples-files",
  401. "couples-people",
  402. "couples-shotness",
  403. "shotness",
  404. "devs",
  405. "devs-efforts",
  406. ]
  407. else:
  408. all_mode = False
  409. for mode in args.modes:
  410. if mode not in modes:
  411. print("Unknown mode: %s" % mode)
  412. continue
  413. print("Running: %s" % mode)
  414. # `args.mode` is required for path determination in the mode functions
  415. args.mode = "all" if all_mode else mode
  416. try:
  417. modes[mode]()
  418. except ImportError as ie:
  419. print("A module required by the %s mode was not found: %s" % (mode, ie))
  420. if not all_mode:
  421. raise
  422. if web_server.running:
  423. secs = int(os.getenv("COUPLES_SERVER_TIME", "60"))
  424. print("Sleeping for %d seconds, safe to Ctrl-C" % secs)
  425. sys.stdout.flush()
  426. try:
  427. time.sleep(secs)
  428. except KeyboardInterrupt:
  429. pass
  430. web_server.stop()