cli.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. import argparse
  2. from argparse import Namespace
  3. import os
  4. import subprocess
  5. import sys
  6. import time
  7. from typing import List
  8. import numpy
  9. from labours.cors_web_server import web_server
  10. from labours.embeddings import train_embeddings, write_embeddings
  11. from labours.modes.burndown import load_burndown, plot_burndown, plot_many_burndown
  12. from labours.modes.devs import show_devs, show_devs_efforts
  13. from labours.modes.devs_parallel import load_devs_parallel, show_devs_parallel
  14. from labours.modes.languages import show_languages
  15. from labours.modes.old_vs_new import show_old_vs_new
  16. from labours.modes.overwrites import load_overwrites_matrix, plot_overwrites_matrix
  17. from labours.modes.ownership import load_ownership, plot_ownership
  18. from labours.modes.sentiment import show_sentiment_stats
  19. from labours.modes.shotness import show_shotness_stats
  20. from labours.readers import read_input
  21. from labours.utils import import_pandas
  22. # NB: this value is modified within the Dockerfile.
  23. DEFAULT_MATPLOTLIB_BACKEND = None
  24. def list_matplotlib_styles() -> List[str]:
  25. script = (
  26. "import sys; from matplotlib import pyplot; "
  27. "sys.stdout.write(repr(pyplot.style.available))"
  28. )
  29. styles = eval(subprocess.check_output([sys.executable, "-c", script]))
  30. styles.remove("classic")
  31. return ["default", "classic"] + styles
  32. def parse_args() -> Namespace:
  33. parser = argparse.ArgumentParser()
  34. parser.add_argument(
  35. "-o",
  36. "--output",
  37. default="",
  38. help="Path to the output file/directory (empty for display). "
  39. "If the extension is JSON, the data is saved instead of "
  40. "the real image.",
  41. )
  42. parser.add_argument(
  43. "-i", "--input", default="-", help="Path to the input file (- for stdin)."
  44. )
  45. parser.add_argument(
  46. "-f", "--input-format", default="auto", choices=["yaml", "pb", "auto"]
  47. )
  48. parser.add_argument(
  49. "--font-size", default=12, type=int, help="Size of the labels and legend."
  50. )
  51. parser.add_argument(
  52. "--style",
  53. default="ggplot",
  54. choices=list_matplotlib_styles(),
  55. help="Plot style to use.",
  56. )
  57. parser.add_argument(
  58. "--backend",
  59. default=DEFAULT_MATPLOTLIB_BACKEND,
  60. help="Matplotlib backend to use.",
  61. )
  62. parser.add_argument(
  63. "--background",
  64. choices=["black", "white"],
  65. default="white",
  66. help="Plot's general color scheme.",
  67. )
  68. parser.add_argument("--size", help="Axes' size in inches, for example \"12,9\"")
  69. parser.add_argument(
  70. "--relative",
  71. action="store_true",
  72. help="Occupy 100%% height for every measurement.",
  73. )
  74. parser.add_argument("--tmpdir", help="Temporary directory for intermediate files.")
  75. parser.add_argument(
  76. "-m",
  77. "--mode",
  78. dest="modes",
  79. default=[],
  80. action="append",
  81. choices=[
  82. "burndown-project",
  83. "burndown-file",
  84. "burndown-person",
  85. "overwrites-matrix",
  86. "ownership",
  87. "couples-files",
  88. "couples-people",
  89. "couples-shotness",
  90. "shotness",
  91. "sentiment",
  92. "devs",
  93. "devs-efforts",
  94. "old-vs-new",
  95. "run-times",
  96. "languages",
  97. "devs-parallel",
  98. "all",
  99. ],
  100. help="What to plot. Can be repeated, e.g. " "-m burndown-project -m run-times",
  101. )
  102. parser.add_argument(
  103. "--resample",
  104. default="year",
  105. help="The way to resample the time series. Possible values are: "
  106. "\"month\", \"year\", \"no\", \"raw\" and pandas offset aliases ("
  107. "http://pandas.pydata.org/pandas-docs/stable/timeseries.html"
  108. "#offset-aliases).",
  109. )
  110. dateutil_url = (
  111. "https://dateutil.readthedocs.io/en/stable/parser.html#dateutil.parser.parse"
  112. )
  113. parser.add_argument(
  114. "--start-date",
  115. help="Start date of time-based plots. Any format is accepted which is "
  116. "supported by %s" % dateutil_url,
  117. )
  118. parser.add_argument(
  119. "--end-date",
  120. help="End date of time-based plots. Any format is accepted which is "
  121. "supported by %s" % dateutil_url,
  122. )
  123. parser.add_argument(
  124. "--disable-projector",
  125. action="store_true",
  126. help="Do not run Tensorflow Projector on couples.",
  127. )
  128. parser.add_argument(
  129. "--max-people",
  130. default=20,
  131. type=int,
  132. help="Maximum number of developers in overwrites matrix and people plots.",
  133. )
  134. parser.add_argument(
  135. "--order-ownership-by-time",
  136. action="store_true",
  137. help="Sort developers in the ownership plot according to their first "
  138. "appearance in the history. The default is sorting by the number of "
  139. "commits.",
  140. )
  141. args = parser.parse_args()
  142. return args
  143. def main() -> None:
  144. args = parse_args()
  145. reader = read_input(args)
  146. header = reader.get_header()
  147. name = reader.get_name()
  148. burndown_warning = (
  149. "Burndown stats were not collected. Re-run hercules with --burndown."
  150. )
  151. burndown_files_warning = (
  152. "Burndown stats for files were not collected. Re-run hercules with "
  153. "--burndown --burndown-files."
  154. )
  155. burndown_people_warning = (
  156. "Burndown stats for people were not collected. Re-run hercules with "
  157. "--burndown --burndown-people."
  158. )
  159. couples_warning = (
  160. "Coupling stats were not collected. Re-run hercules with --couples."
  161. )
  162. shotness_warning = (
  163. "Structural hotness stats were not collected. Re-run hercules with "
  164. "--shotness. Also check --languages - the output may be empty."
  165. )
  166. sentiment_warning = (
  167. "Sentiment stats were not collected. Re-run hercules with --sentiment."
  168. )
  169. devs_warning = "Devs stats were not collected. Re-run hercules with --devs."
  170. def run_times():
  171. rt = reader.get_run_times()
  172. pandas = import_pandas()
  173. series = pandas.to_timedelta(
  174. pandas.Series(rt).sort_values(ascending=False), unit="s"
  175. )
  176. df = pandas.concat([series, series / series.sum()], axis=1)
  177. df.columns = ["time", "ratio"]
  178. print(df)
  179. def project_burndown():
  180. try:
  181. full_header = header + reader.get_burndown_parameters()
  182. except KeyError:
  183. print("project: " + burndown_warning)
  184. return
  185. plot_burndown(
  186. args,
  187. "project",
  188. *load_burndown(
  189. full_header,
  190. *reader.get_project_burndown(),
  191. resample=args.resample,
  192. interpolation_progress=True,
  193. ),
  194. )
  195. def files_burndown():
  196. try:
  197. full_header = header + reader.get_burndown_parameters()
  198. except KeyError:
  199. print(burndown_warning)
  200. return
  201. try:
  202. plot_many_burndown(args, "file", full_header, reader.get_files_burndown())
  203. except KeyError:
  204. print("files: " + burndown_files_warning)
  205. def people_burndown():
  206. try:
  207. full_header = header + reader.get_burndown_parameters()
  208. except KeyError:
  209. print(burndown_warning)
  210. return
  211. try:
  212. plot_many_burndown(
  213. args, "person", full_header, reader.get_people_burndown()
  214. )
  215. except KeyError:
  216. print("people: " + burndown_people_warning)
  217. def overwrites_matrix():
  218. try:
  219. plot_overwrites_matrix(
  220. args,
  221. name,
  222. *load_overwrites_matrix(
  223. *reader.get_people_interaction(), max_people=args.max_people
  224. ),
  225. )
  226. people, matrix = load_overwrites_matrix(
  227. *reader.get_people_interaction(), max_people=1000000, normalize=False
  228. )
  229. from scipy.sparse import csr_matrix
  230. matrix = matrix[:, 1:]
  231. matrix = numpy.triu(matrix) + numpy.tril(matrix).T
  232. matrix = matrix + matrix.T
  233. matrix = csr_matrix(matrix)
  234. try:
  235. write_embeddings(
  236. "overwrites",
  237. args.output,
  238. not args.disable_projector,
  239. *train_embeddings(people, matrix, tmpdir=args.tmpdir),
  240. )
  241. except AttributeError as e:
  242. print(
  243. "Training the embeddings is not possible: %s: %s",
  244. type(e).__name__,
  245. e,
  246. )
  247. except KeyError:
  248. print("overwrites_matrix: " + burndown_people_warning)
  249. def ownership_burndown():
  250. try:
  251. full_header = header + reader.get_burndown_parameters()
  252. except KeyError:
  253. print(burndown_warning)
  254. return
  255. try:
  256. plot_ownership(
  257. args,
  258. name,
  259. *load_ownership(
  260. full_header,
  261. *reader.get_ownership_burndown(),
  262. max_people=args.max_people,
  263. order_by_time=args.order_ownership_by_time,
  264. ),
  265. )
  266. except KeyError:
  267. print("ownership: " + burndown_people_warning)
  268. def couples_files():
  269. try:
  270. write_embeddings(
  271. "files",
  272. args.output,
  273. not args.disable_projector,
  274. *train_embeddings(*reader.get_files_coocc(), tmpdir=args.tmpdir),
  275. )
  276. except KeyError:
  277. print(couples_warning)
  278. def couples_people():
  279. try:
  280. write_embeddings(
  281. "people",
  282. args.output,
  283. not args.disable_projector,
  284. *train_embeddings(*reader.get_people_coocc(), tmpdir=args.tmpdir),
  285. )
  286. except KeyError:
  287. print(couples_warning)
  288. def couples_shotness():
  289. try:
  290. write_embeddings(
  291. "shotness",
  292. args.output,
  293. not args.disable_projector,
  294. *train_embeddings(*reader.get_shotness_coocc(), tmpdir=args.tmpdir),
  295. )
  296. except KeyError:
  297. print(shotness_warning)
  298. def shotness():
  299. try:
  300. data = reader.get_shotness()
  301. except KeyError:
  302. print(shotness_warning)
  303. return
  304. show_shotness_stats(data)
  305. def sentiment():
  306. try:
  307. data = reader.get_sentiment()
  308. except KeyError:
  309. print(sentiment_warning)
  310. return
  311. show_sentiment_stats(
  312. args, reader.get_name(), args.resample, reader.get_header()[0], data
  313. )
  314. def devs():
  315. try:
  316. data = reader.get_devs()
  317. except KeyError:
  318. print(devs_warning)
  319. return
  320. show_devs(
  321. args,
  322. reader.get_name(),
  323. *reader.get_header(),
  324. *data,
  325. max_people=args.max_people,
  326. )
  327. def devs_efforts():
  328. try:
  329. data = reader.get_devs()
  330. except KeyError:
  331. print(devs_warning)
  332. return
  333. show_devs_efforts(
  334. args,
  335. reader.get_name(),
  336. *reader.get_header(),
  337. *data,
  338. max_people=args.max_people,
  339. )
  340. def old_vs_new():
  341. try:
  342. data = reader.get_devs()
  343. except KeyError:
  344. print(devs_warning)
  345. return
  346. show_old_vs_new(args, reader.get_name(), *reader.get_header(), *data)
  347. def languages():
  348. try:
  349. data = reader.get_devs()
  350. except KeyError:
  351. print(devs_warning)
  352. return
  353. show_languages(args, reader.get_name(), *reader.get_header(), *data)
  354. def devs_parallel():
  355. try:
  356. ownership = reader.get_ownership_burndown()
  357. except KeyError:
  358. print(burndown_people_warning)
  359. return
  360. try:
  361. couples = reader.get_people_coocc()
  362. except KeyError:
  363. print(couples_warning)
  364. return
  365. try:
  366. devs = reader.get_devs()
  367. except KeyError:
  368. print(devs_warning)
  369. return
  370. show_devs_parallel(
  371. args,
  372. reader.get_name(),
  373. *reader.get_header(),
  374. load_devs_parallel(ownership, couples, devs, args.max_people),
  375. )
  376. modes = {
  377. "run-times": run_times,
  378. "burndown-project": project_burndown,
  379. "burndown-file": files_burndown,
  380. "burndown-person": people_burndown,
  381. "overwrites-matrix": overwrites_matrix,
  382. "ownership": ownership_burndown,
  383. "couples-files": couples_files,
  384. "couples-people": couples_people,
  385. "couples-shotness": couples_shotness,
  386. "shotness": shotness,
  387. "sentiment": sentiment,
  388. "devs": devs,
  389. "devs-efforts": devs_efforts,
  390. "old-vs-new": old_vs_new,
  391. "languages": languages,
  392. "devs-parallel": devs_parallel,
  393. }
  394. if "all" in args.modes:
  395. all_mode = True
  396. args.modes = [
  397. "burndown-project",
  398. "overwrites-matrix",
  399. "ownership",
  400. "couples-files",
  401. "couples-people",
  402. "couples-shotness",
  403. "shotness",
  404. "devs",
  405. "devs-efforts",
  406. ]
  407. else:
  408. all_mode = False
  409. for mode in args.modes:
  410. if mode not in modes:
  411. print("Unknown mode: %s" % mode)
  412. continue
  413. print("Running: %s" % mode)
  414. # `args.mode` is required for path determination in the mode functions
  415. args.mode = "all" if all_mode else mode
  416. try:
  417. modes[mode]()
  418. except ImportError as ie:
  419. print("A module required by the %s mode was not found: %s" % (mode, ie))
  420. if not all_mode:
  421. raise
  422. if web_server.running:
  423. secs = int(os.getenv("COUPLES_SERVER_TIME", "60"))
  424. print("Sleeping for %d seconds, safe to Ctrl-C" % secs)
  425. sys.stdout.flush()
  426. try:
  427. time.sleep(secs)
  428. except KeyboardInterrupt:
  429. pass
  430. web_server.stop()