mkhtml.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662
  1. #!/usr/bin/env python3
  2. ############################################################################
  3. #
  4. # MODULE: Builds manual pages
  5. # AUTHOR(S): Markus Neteler
  6. # Glynn Clements
  7. # Martin Landa <landa.martin gmail.com>
  8. # PURPOSE: Create HTML manual page snippets
  9. # COPYRIGHT: (C) 2007-2022 by Glynn Clements
  10. # and the GRASS Development Team
  11. #
  12. # This program is free software under the GNU General
  13. # Public License (>=v2). Read the file COPYING that
  14. # comes with GRASS for details.
  15. #
  16. #############################################################################
  17. import http
  18. import sys
  19. import os
  20. import string
  21. import re
  22. from datetime import datetime
  23. import locale
  24. import json
  25. import pathlib
  26. import shutil
  27. import subprocess
  28. import time
  29. try:
  30. # Python 2 import
  31. from HTMLParser import HTMLParser
  32. except:
  33. # Python 3 import
  34. from html.parser import HTMLParser
  35. from six.moves.urllib import request as urlrequest
  36. from six.moves.urllib.error import HTTPError, URLError
  37. try:
  38. import urlparse
  39. except:
  40. import urllib.parse as urlparse
  41. try:
  42. import grass.script as gs
  43. except ImportError:
  44. # During compilation GRASS GIS
  45. gs = None
  46. HEADERS = {
  47. "User-Agent": "Mozilla/5.0",
  48. }
  49. HTTP_STATUS_CODES = list(http.HTTPStatus)
  50. if sys.version_info[0] == 2:
  51. PY2 = True
  52. else:
  53. PY2 = False
  54. if not PY2:
  55. unicode = str
  56. grass_version = os.getenv("VERSION_NUMBER", "unknown")
  57. trunk_url = ""
  58. addons_url = ""
  59. if grass_version != "unknown":
  60. major, minor, patch = grass_version.split(".")
  61. trunk_url = "https://github.com/OSGeo/grass/tree/main/"
  62. addons_url = f"https://github.com/OSGeo/grass-addons/tree/grass{major}/"
  63. def _get_encoding():
  64. encoding = locale.getdefaultlocale()[1]
  65. if not encoding:
  66. encoding = 'UTF-8'
  67. return encoding
  68. def decode(bytes_):
  69. """Decode bytes with default locale and return (unicode) string
  70. No-op if parameter is not bytes (assumed unicode string).
  71. :param bytes bytes_: the bytes to decode
  72. """
  73. if isinstance(bytes_, unicode):
  74. return bytes_
  75. if isinstance(bytes_, bytes):
  76. enc = _get_encoding()
  77. return bytes_.decode(enc)
  78. return unicode(bytes_)
  79. def urlopen(url, *args, **kwargs):
  80. """Wrapper around urlopen. Same function as 'urlopen', but with the
  81. ability to define headers.
  82. """
  83. request = urlrequest.Request(url, headers=HEADERS)
  84. return urlrequest.urlopen(request, *args, **kwargs)
  85. def set_proxy():
  86. """Set proxy"""
  87. proxy = os.getenv("GRASS_PROXY")
  88. if proxy:
  89. proxies = {}
  90. for ptype, purl in (p.split("=") for p in proxy.split(",")):
  91. proxies[ptype] = purl
  92. urlrequest.install_opener(
  93. urlrequest.build_opener(urlrequest.ProxyHandler(proxies))
  94. )
  95. set_proxy()
  96. def download_git_commit(url, response_format, *args, **kwargs):
  97. """Download module/addon last commit from GitHub API
  98. :param str url: url address
  99. :param str response_format: content type
  100. :return urllib.request.urlopen or None response: response object or
  101. None
  102. """
  103. try:
  104. response = urlopen(url, *args, **kwargs)
  105. if not response.code == 200:
  106. index = HTTP_STATUS_CODES.index(response.code)
  107. desc = HTTP_STATUS_CODES[index].description
  108. gs.fatal(
  109. _(
  110. "Download commit from <{url}>, return status code "
  111. "{code}, {desc}".format(
  112. url=url,
  113. code=response.code,
  114. desc=desc,
  115. ),
  116. ),
  117. )
  118. if response_format not in response.getheader("Content-Type"):
  119. gs.fatal(
  120. _(
  121. "Wrong downloaded commit file format. "
  122. "Check url <{url}>. Allowed file format is "
  123. "{response_format}.".format(
  124. url=url,
  125. response_format=response_format,
  126. ),
  127. ),
  128. )
  129. return response
  130. except HTTPError as err:
  131. gs.warning(
  132. _(
  133. "The download of the commit from the GitHub API "
  134. "server wasn't successful, <{}>. Commit and commit "
  135. "date will not be included in the <{}> addon html manual "
  136. "page.".format(err.msg, pgm)
  137. ),
  138. )
  139. except URLError:
  140. gs.warning(
  141. _(
  142. "Download file from <{url}>, failed. Check internet "
  143. "connection. Commit and commit date will not be included "
  144. "in the <{pgm}> addon manual page.".format(url=url, pgm=pgm)
  145. ),
  146. )
  147. def get_last_git_commit(src_dir, is_addon, addon_path):
  148. """Get last module/addon git commit
  149. :param str src_dir: module/addon source dir
  150. :param bool is_addon: True if it is addon
  151. :param str addon_path: addon path
  152. :return dict git_log: dict with key commit and date, if not
  153. possible download commit from GitHub API server
  154. values of keys have "unknown" string
  155. """
  156. unknown = "unknown"
  157. git_log = {"commit": unknown, "date": unknown}
  158. cwd = os.getcwd()
  159. datetime_format = "%A %b %d %H:%M:%S %Y" # e.g. Sun Jan 16 23:09:35 2022
  160. grass_modules_url = (
  161. "https://api.github.com/repos/osgeo/grass/commits?path={path}"
  162. "&page=1&per_page=1&sha=main".format(path=src_dir)
  163. ) # sha=git_branch_name
  164. grass_addons_url = (
  165. "https://api.github.com/repos/osgeo/grass-addons/commits?path={path}"
  166. "&page=1&per_page=1&sha=grass{major}".format(
  167. path=addon_path,
  168. major=major,
  169. )
  170. ) # sha=git_branch_name
  171. if shutil.which("git"):
  172. if os.path.exists(src_dir):
  173. os.chdir(src_dir)
  174. git_log["date"] = time.ctime(os.path.getmtime(src_dir))
  175. stdout, stderr = subprocess.Popen(
  176. args=["git", "log", "-1"],
  177. stdout=subprocess.PIPE,
  178. stderr=subprocess.PIPE,
  179. ).communicate()
  180. stdout = decode(stdout)
  181. stderr = decode(stderr)
  182. os.chdir(cwd)
  183. if stderr and "fatal: not a git repository" in stderr:
  184. response = download_git_commit(
  185. url=grass_addons_url if is_addon else grass_modules_url,
  186. response_format="application/json",
  187. )
  188. if response:
  189. commit = json.loads(response.read())
  190. if commit:
  191. git_log["commit"] = commit[0]["sha"]
  192. git_log["date"] = datetime.strptime(
  193. commit[0]["commit"]["author"]["date"], "%Y-%m-%dT%H:%M:%SZ"
  194. ).strftime(datetime_format)
  195. else:
  196. if stdout:
  197. commit = stdout.splitlines()
  198. git_log["commit"] = commit[0].split(" ")[-1]
  199. commit_date = commit[2].lstrip("Date:").strip()
  200. git_log["date"] = commit_date.rsplit(" ", 1)[0]
  201. return git_log
  202. html_page_footer_pages_path = (
  203. os.getenv("HTML_PAGE_FOOTER_PAGES_PATH")
  204. if os.getenv("HTML_PAGE_FOOTER_PAGES_PATH")
  205. else ""
  206. )
  207. pgm = sys.argv[1]
  208. src_file = "%s.html" % pgm
  209. tmp_file = "%s.tmp.html" % pgm
  210. header_base = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
  211. <html>
  212. <head>
  213. <title>GRASS GIS Manual: ${PGM}</title>
  214. <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
  215. <link rel="stylesheet" href="grassdocs.css" type="text/css">
  216. </head>
  217. <body bgcolor="white">
  218. <div id="container">
  219. <a href="index.html"><img src="grass_logo.png" alt="GRASS logo"></a>
  220. <hr class="header">
  221. """
  222. header_nopgm = """<h2>${PGM}</h2>
  223. """
  224. header_pgm = """<h2>NAME</h2>
  225. <em><b>${PGM}</b></em>
  226. """
  227. header_pgm_desc = """<h2>NAME</h2>
  228. <em><b>${PGM}</b></em> - ${PGM_DESC}
  229. """
  230. sourcecode = string.Template(
  231. """<h2>SOURCE CODE</h2>
  232. <p>
  233. Available at:
  234. <a href="${URL_SOURCE}">${PGM} source code</a>
  235. (<a href="${URL_LOG}">history</a>)
  236. </p>
  237. <p>
  238. ${DATE_TAG}
  239. </p>
  240. """
  241. )
  242. footer_index = string.Template(
  243. """<hr class="header">
  244. <p>
  245. <a href="index.html">Main index</a> |
  246. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}${INDEXNAME}.html">${INDEXNAMECAP} index</a> |
  247. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}topics.html">Topics index</a> |
  248. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}keywords.html">Keywords index</a> |
  249. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}graphical_index.html">Graphical index</a> |
  250. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}full_index.html">Full index</a>
  251. </p>
  252. <p>
  253. &copy; 2003-${YEAR}
  254. <a href="http://grass.osgeo.org">GRASS Development Team</a>,
  255. GRASS GIS ${GRASS_VERSION} Reference Manual
  256. </p>
  257. </div>
  258. </body>
  259. </html>
  260. """)
  261. footer_noindex = string.Template(
  262. """<hr class="header">
  263. <p>
  264. <a href="index.html">Main index</a> |
  265. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}topics.html">Topics index</a> |
  266. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}keywords.html">Keywords index</a> |
  267. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}graphical_index.html">Graphical index</a> |
  268. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}full_index.html">Full index</a>
  269. </p>
  270. <p>
  271. &copy; 2003-${YEAR}
  272. <a href="http://grass.osgeo.org">GRASS Development Team</a>,
  273. GRASS GIS ${GRASS_VERSION} Reference Manual
  274. </p>
  275. </div>
  276. </body>
  277. </html>
  278. """)
  279. def read_file(name):
  280. try:
  281. f = open(name, 'rb')
  282. s = f.read()
  283. f.close()
  284. if PY2:
  285. return s
  286. else:
  287. return decode(s)
  288. except IOError:
  289. return ""
  290. def create_toc(src_data):
  291. class MyHTMLParser(HTMLParser):
  292. def __init__(self):
  293. HTMLParser.__init__(self)
  294. self.reset()
  295. self.idx = 1
  296. self.tag_curr = ''
  297. self.tag_last = ''
  298. self.process_text = False
  299. self.data = []
  300. self.tags_allowed = ('h1', 'h2', 'h3')
  301. self.tags_ignored = ('img')
  302. self.text = ''
  303. def handle_starttag(self, tag, attrs):
  304. if tag in self.tags_allowed:
  305. self.process_text = True
  306. self.tag_last = self.tag_curr
  307. self.tag_curr = tag
  308. def handle_endtag(self, tag):
  309. if tag in self.tags_allowed:
  310. self.data.append((tag, '%s_%d' % (tag, self.idx),
  311. self.text))
  312. self.idx += 1
  313. self.process_text = False
  314. self.text = ''
  315. self.tag_curr = self.tag_last
  316. def handle_data(self, data):
  317. if not self.process_text:
  318. return
  319. if self.tag_curr in self.tags_allowed or self.tag_curr in self.tags_ignored:
  320. self.text += data
  321. else:
  322. self.text += '<%s>%s</%s>' % (self.tag_curr, data, self.tag_curr)
  323. # instantiate the parser and fed it some HTML
  324. parser = MyHTMLParser()
  325. parser.feed(src_data)
  326. return parser.data
  327. def escape_href(label):
  328. # remove html tags
  329. label = re.sub('<[^<]+?>', '', label)
  330. # fix &nbsp;
  331. label = label.replace('&nbsp;', '')
  332. # fix "
  333. label = label.replace('"', '')
  334. # replace space with underscore + lower
  335. return label.replace(' ', '-').lower()
  336. def write_toc(data):
  337. if not data:
  338. return
  339. fd = sys.stdout
  340. fd.write('<div class="toc">\n')
  341. fd.write('<h4 class="toc">Table of contents</h4>\n')
  342. fd.write('<ul class="toc">\n')
  343. first = True
  344. has_h2 = False
  345. in_h3 = False
  346. indent = 4
  347. for tag, href, text in data:
  348. if tag == 'h3' and not in_h3 and has_h2:
  349. fd.write('\n%s<ul class="toc">\n' % (' ' * indent))
  350. indent += 4
  351. in_h3 = True
  352. elif not first:
  353. fd.write('</li>\n')
  354. if tag == 'h2':
  355. has_h2 = True
  356. if in_h3:
  357. indent -= 4
  358. fd.write('%s</ul></li>\n' % (' ' * indent))
  359. in_h3 = False
  360. text = text.replace(u'\xa0', u' ')
  361. fd.write('%s<li class="toc"><a href="#%s" class="toc">%s</a>' % \
  362. (' ' * indent, escape_href(text), text))
  363. first = False
  364. fd.write('</li>\n</ul>\n')
  365. fd.write('</div>\n')
  366. def update_toc(data):
  367. ret_data = []
  368. pat = re.compile(r'(<(h[2|3])>)(.+)(</h[2|3]>)')
  369. idx = 1
  370. for line in data.splitlines():
  371. if pat.search(line):
  372. xline = pat.split(line)
  373. line = xline[1] + '<a name="%s">' % escape_href(xline[3]) + xline[3] + '</a>' + xline[4]
  374. idx += 1
  375. ret_data.append(line)
  376. return '\n'.join(ret_data)
  377. def get_addon_path():
  378. """Check if pgm is in the addons list and get addon path
  379. return: pgm path if pgm is addon else None
  380. """
  381. addon_base = os.getenv('GRASS_ADDON_BASE')
  382. if addon_base:
  383. # addons_paths.json is file created during install extension
  384. # check get_addons_paths() function in the g.extension.py file
  385. addons_file = "addons_paths.json"
  386. addons_paths = os.path.join(addon_base, addons_file)
  387. if not os.path.exists(addons_paths):
  388. # Compiled addon has own dir e.g. ~/.grass7/addons/db.join/
  389. # with bin/ docs/ etc/ scripts/ subdir, required for compilation
  390. # addons on osgeo lxd container server and generation of
  391. # modules.xml file (build-xml.py script), when addons_paths.json
  392. # file is stored one level dir up
  393. addons_paths = os.path.join(
  394. os.path.abspath(os.path.join(addon_base, "..")),
  395. addons_file,
  396. )
  397. if not os.path.exists(addons_paths):
  398. return
  399. with open(addons_paths) as f:
  400. addons_paths = json.load(f)
  401. for addon in addons_paths["tree"]:
  402. if pgm == pathlib.Path(addon["path"]).name:
  403. return addon["path"]
  404. # process header
  405. src_data = read_file(src_file)
  406. name = re.search('(<!-- meta page name:)(.*)(-->)', src_data, re.IGNORECASE)
  407. pgm_desc = None
  408. if name:
  409. pgm = name.group(2).strip().split('-', 1)[0].strip()
  410. name_desc = re.search('(<!-- meta page name description:)(.*)(-->)', src_data, re.IGNORECASE)
  411. if name_desc:
  412. pgm_desc = name_desc.group(2).strip()
  413. desc = re.search('(<!-- meta page description:)(.*)(-->)', src_data,
  414. re.IGNORECASE)
  415. if desc:
  416. pgm = desc.group(2).strip()
  417. header_tmpl = string.Template(header_base + header_nopgm)
  418. else:
  419. if not pgm_desc:
  420. header_tmpl = string.Template(header_base + header_pgm)
  421. else:
  422. header_tmpl = string.Template(header_base + header_pgm_desc)
  423. if not re.search('<html>', src_data, re.IGNORECASE):
  424. tmp_data = read_file(tmp_file)
  425. """
  426. Adjusting keywords html pages paths if add-on html man page
  427. stored on the server
  428. """
  429. if html_page_footer_pages_path:
  430. new_keywords_paths = []
  431. orig_keywords_paths = re.search(
  432. r'<h[1-9]>KEYWORDS</h[1-9]>(.*?)<h[1-9]>',
  433. tmp_data, re.DOTALL,
  434. )
  435. if orig_keywords_paths:
  436. search_txt = 'href="'
  437. for i in orig_keywords_paths.group(1).split(','):
  438. if search_txt in i:
  439. index = i.index(search_txt) + len(search_txt)
  440. new_keywords_paths.append(
  441. i[:index] + html_page_footer_pages_path + i[index:],
  442. )
  443. if new_keywords_paths:
  444. tmp_data = tmp_data.replace(
  445. orig_keywords_paths.group(1),
  446. ','.join(new_keywords_paths),
  447. )
  448. if not re.search('<html>', tmp_data, re.IGNORECASE):
  449. sys.stdout.write(header_tmpl.substitute(PGM=pgm, PGM_DESC=pgm_desc))
  450. if tmp_data:
  451. for line in tmp_data.splitlines(True):
  452. if not re.search('</body>|</html>', line, re.IGNORECASE):
  453. sys.stdout.write(line)
  454. # create TOC
  455. write_toc(create_toc(src_data))
  456. # process body
  457. sys.stdout.write(update_toc(src_data))
  458. # if </html> is found, suppose a complete html is provided.
  459. # otherwise, generate module class reference:
  460. if re.search('</html>', src_data, re.IGNORECASE):
  461. sys.exit()
  462. index_names = {
  463. 'd' : 'display',
  464. 'db': 'database',
  465. 'g' : 'general',
  466. 'i' : 'imagery',
  467. 'm' : 'miscellaneous',
  468. 'ps': 'postscript',
  469. 'p' : 'paint',
  470. 'r' : 'raster',
  471. 'r3': 'raster3d',
  472. 's' : 'sites',
  473. 't' : 'temporal',
  474. 'v' : 'vector'
  475. }
  476. def to_title(name):
  477. """Convert name of command class/family to form suitable for title"""
  478. if name == 'raster3d':
  479. return '3D raster'
  480. elif name == 'postscript':
  481. return 'PostScript'
  482. else:
  483. return name.capitalize()
  484. index_titles = {}
  485. for key, name in index_names.items():
  486. index_titles[key] = to_title(name)
  487. # process footer
  488. index = re.search('(<!-- meta page index:)(.*)(-->)', src_data, re.IGNORECASE)
  489. if index:
  490. index_name = index.group(2).strip()
  491. if '|' in index_name:
  492. index_name, index_name_cap = index_name.split('|', 1)
  493. else:
  494. index_name_cap = to_title(index_name)
  495. else:
  496. mod_class = pgm.split('.', 1)[0]
  497. index_name = index_names.get(mod_class, '')
  498. index_name_cap = index_titles.get(mod_class, '')
  499. year = os.getenv("VERSION_DATE")
  500. if not year:
  501. year = str(datetime.now().year)
  502. # check the names of scripts to assign the right folder
  503. topdir = os.path.abspath(os.getenv("MODULE_TOPDIR"))
  504. curdir = os.path.abspath(os.path.curdir)
  505. if curdir.startswith(topdir):
  506. source_url = trunk_url
  507. pgmdir = curdir.replace(topdir, '').lstrip(os.path.sep)
  508. else:
  509. # addons
  510. source_url = addons_url
  511. pgmdir = os.path.sep.join(curdir.split(os.path.sep)[-3:])
  512. url_source = ""
  513. addon_path = None
  514. if os.getenv("SOURCE_URL", ""):
  515. addon_path = get_addon_path()
  516. if addon_path:
  517. # Addon is installed from the local dir
  518. if os.path.exists(os.getenv("SOURCE_URL")):
  519. url_source = urlparse.urljoin(
  520. addons_url,
  521. addon_path,
  522. )
  523. else:
  524. url_source = urlparse.urljoin(
  525. os.environ["SOURCE_URL"].split("src")[0],
  526. addon_path,
  527. )
  528. else:
  529. url_source = urlparse.urljoin(source_url, pgmdir)
  530. if sys.platform == 'win32':
  531. url_source = url_source.replace(os.path.sep, '/')
  532. if index_name:
  533. branches = "branches"
  534. tree = "tree"
  535. commits = "commits"
  536. if branches in url_source:
  537. url_log = url_source.replace(branches, commits)
  538. url_source = url_source.replace(branches, tree)
  539. else:
  540. url_log = url_source.replace(tree, commits)
  541. git_commit = get_last_git_commit(
  542. src_dir=curdir,
  543. addon_path=addon_path if addon_path else None,
  544. is_addon=True if addon_path else False,
  545. )
  546. if git_commit["commit"] == "unknown":
  547. date_tag = "Accessed: {date}".format(date=git_commit["date"])
  548. else:
  549. date_tag = "Latest change: {date} in commit: {commit}".format(
  550. date=git_commit["date"], commit=git_commit["commit"]
  551. )
  552. sys.stdout.write(
  553. sourcecode.substitute(
  554. URL_SOURCE=url_source,
  555. PGM=pgm,
  556. URL_LOG=url_log,
  557. DATE_TAG=date_tag,
  558. )
  559. )
  560. sys.stdout.write(
  561. footer_index.substitute(
  562. INDEXNAME=index_name,
  563. INDEXNAMECAP=index_name_cap,
  564. YEAR=year,
  565. GRASS_VERSION=grass_version,
  566. HTML_PAGE_FOOTER_PAGES_PATH=html_page_footer_pages_path,
  567. ),
  568. )
  569. else:
  570. sys.stdout.write(
  571. footer_noindex.substitute(
  572. YEAR=year,
  573. GRASS_VERSION=grass_version,
  574. HTML_PAGE_FOOTER_PAGES_PATH=html_page_footer_pages_path,
  575. ),
  576. )