mkhtml.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676
  1. #!/usr/bin/env python3
  2. ############################################################################
  3. #
  4. # MODULE: Builds manual pages
  5. # AUTHOR(S): Markus Neteler
  6. # Glynn Clements
  7. # Martin Landa <landa.martin gmail.com>
  8. # PURPOSE: Create HTML manual page snippets
  9. # COPYRIGHT: (C) 2007-2022 by Glynn Clements
  10. # and the GRASS Development Team
  11. #
  12. # This program is free software under the GNU General
  13. # Public License (>=v2). Read the file COPYING that
  14. # comes with GRASS for details.
  15. #
  16. #############################################################################
  17. import http
  18. import sys
  19. import os
  20. import string
  21. import re
  22. from datetime import datetime
  23. import locale
  24. import json
  25. import pathlib
  26. import shutil
  27. import subprocess
  28. import time
  29. try:
  30. # Python 2 import
  31. from HTMLParser import HTMLParser
  32. except:
  33. # Python 3 import
  34. from html.parser import HTMLParser
  35. from six.moves.urllib import request as urlrequest
  36. from six.moves.urllib.error import HTTPError, URLError
  37. try:
  38. import urlparse
  39. except:
  40. import urllib.parse as urlparse
  41. try:
  42. import grass.script as gs
  43. except ImportError:
  44. # During compilation GRASS GIS
  45. gs = None
  46. HEADERS = {
  47. "User-Agent": "Mozilla/5.0",
  48. }
  49. HTTP_STATUS_CODES = list(http.HTTPStatus)
  50. if sys.version_info[0] == 2:
  51. PY2 = True
  52. else:
  53. PY2 = False
  54. if not PY2:
  55. unicode = str
  56. grass_version = os.getenv("VERSION_NUMBER", "unknown")
  57. trunk_url = ""
  58. addons_url = ""
  59. if grass_version != "unknown":
  60. major, minor, patch = grass_version.split(".")
  61. grass_git_branch = "releasebranch_{major}_{minor}".format(
  62. major=major,
  63. minor=minor,
  64. )
  65. base_url = "https://github.com/OSGeo"
  66. trunk_url = "{base_url}/grass/tree/{branch}/".format(
  67. base_url=base_url, branch=grass_git_branch
  68. )
  69. addons_url = "{base_url}/grass-addons/tree/grass{major}/".format(
  70. base_url=base_url, major=major
  71. )
  72. def _get_encoding():
  73. encoding = locale.getdefaultlocale()[1]
  74. if not encoding:
  75. encoding = 'UTF-8'
  76. return encoding
  77. def decode(bytes_):
  78. """Decode bytes with default locale and return (unicode) string
  79. No-op if parameter is not bytes (assumed unicode string).
  80. :param bytes bytes_: the bytes to decode
  81. """
  82. if isinstance(bytes_, unicode):
  83. return bytes_
  84. if isinstance(bytes_, bytes):
  85. enc = _get_encoding()
  86. return bytes_.decode(enc)
  87. return unicode(bytes_)
  88. def urlopen(url, *args, **kwargs):
  89. """Wrapper around urlopen. Same function as 'urlopen', but with the
  90. ability to define headers.
  91. """
  92. request = urlrequest.Request(url, headers=HEADERS)
  93. return urlrequest.urlopen(request, *args, **kwargs)
  94. def set_proxy():
  95. """Set proxy"""
  96. proxy = os.getenv("GRASS_PROXY")
  97. if proxy:
  98. proxies = {}
  99. for ptype, purl in (p.split("=") for p in proxy.split(",")):
  100. proxies[ptype] = purl
  101. urlrequest.install_opener(
  102. urlrequest.build_opener(urlrequest.ProxyHandler(proxies))
  103. )
  104. set_proxy()
  105. def download_git_commit(url, response_format, *args, **kwargs):
  106. """Download module/addon last commit from GitHub API
  107. :param str url: url address
  108. :param str response_format: content type
  109. :return urllib.request.urlopen or None response: response object or
  110. None
  111. """
  112. try:
  113. response = urlopen(url, *args, **kwargs)
  114. if not response.code == 200:
  115. index = HTTP_STATUS_CODES.index(response.code)
  116. desc = HTTP_STATUS_CODES[index].description
  117. gs.fatal(
  118. _(
  119. "Download commit from <{url}>, return status code "
  120. "{code}, {desc}".format(
  121. url=url,
  122. code=response.code,
  123. desc=desc,
  124. ),
  125. ),
  126. )
  127. if response_format not in response.getheader("Content-Type"):
  128. gs.fatal(
  129. _(
  130. "Wrong downloaded commit file format. "
  131. "Check url <{url}>. Allowed file format is "
  132. "{response_format}.".format(
  133. url=url,
  134. response_format=response_format,
  135. ),
  136. ),
  137. )
  138. return response
  139. except HTTPError as err:
  140. gs.warning(
  141. _(
  142. "The download of the commit from the GitHub API "
  143. "server wasn't successful, <{}>. Commit and commit "
  144. "date will not be included in the <{}> addon html manual "
  145. "page.".format(err.msg, pgm)
  146. ),
  147. )
  148. except URLError:
  149. gs.warning(
  150. _(
  151. "Download file from <{url}>, failed. Check internet "
  152. "connection. Commit and commit date will not be included "
  153. "in the <{pgm}> addon manual page.".format(url=url, pgm=pgm)
  154. ),
  155. )
  156. def get_last_git_commit(src_dir, is_addon, addon_path):
  157. """Get last module/addon git commit
  158. :param str src_dir: module/addon source dir
  159. :param bool is_addon: True if it is addon
  160. :param str addon_path: addon path
  161. :return dict git_log: dict with key commit and date, if not
  162. possible download commit from GitHub API server
  163. values of keys have "unknown" string
  164. """
  165. unknown = "unknown"
  166. git_log = {"commit": unknown, "date": unknown}
  167. datetime_format = "%A %b %d %H:%M:%S %Y" # e.g. Sun Jan 16 23:09:35 2022
  168. if is_addon:
  169. grass_addons_url = (
  170. "https://api.github.com/repos/osgeo/grass-addons/commits?path={path}"
  171. "&page=1&per_page=1&sha=grass{major}".format(
  172. path=addon_path,
  173. major=major,
  174. )
  175. ) # sha=git_branch_name
  176. else:
  177. core_module_path = os.path.join(
  178. *(set(src_dir.split(os.path.sep)) ^ set(topdir.split(os.path.sep)))
  179. )
  180. grass_modules_url = (
  181. "https://api.github.com/repos/osgeo/grass/commits?path={path}"
  182. "&page=1&per_page=1&sha={branch}".format(
  183. branch=grass_git_branch,
  184. path=core_module_path,
  185. )
  186. ) # sha=git_branch_name
  187. if shutil.which("git"):
  188. if os.path.exists(src_dir):
  189. git_log["date"] = time.ctime(os.path.getmtime(src_dir))
  190. stdout, stderr = subprocess.Popen(
  191. args=["git", "log", "-1", src_dir],
  192. stdout=subprocess.PIPE,
  193. stderr=subprocess.PIPE,
  194. ).communicate()
  195. stdout = decode(stdout)
  196. stderr = decode(stderr)
  197. if stderr and "fatal: not a git repository" in stderr:
  198. response = download_git_commit(
  199. url=grass_addons_url if is_addon else grass_modules_url,
  200. response_format="application/json",
  201. )
  202. if response:
  203. commit = json.loads(response.read())
  204. if commit:
  205. git_log["commit"] = commit[0]["sha"]
  206. git_log["date"] = datetime.strptime(
  207. commit[0]["commit"]["author"]["date"],
  208. "%Y-%m-%dT%H:%M:%SZ",
  209. ).strftime(datetime_format)
  210. else:
  211. if stdout:
  212. commit = stdout.splitlines()
  213. git_log["commit"] = commit[0].split(" ")[-1]
  214. commit_date = commit[2].lstrip("Date:").strip()
  215. git_log["date"] = commit_date.rsplit(" ", 1)[0]
  216. return git_log
  217. html_page_footer_pages_path = (
  218. os.getenv("HTML_PAGE_FOOTER_PAGES_PATH")
  219. if os.getenv("HTML_PAGE_FOOTER_PAGES_PATH")
  220. else ""
  221. )
  222. pgm = sys.argv[1]
  223. src_file = "%s.html" % pgm
  224. tmp_file = "%s.tmp.html" % pgm
  225. header_base = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
  226. <html>
  227. <head>
  228. <title>GRASS GIS Manual: ${PGM}</title>
  229. <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
  230. <link rel="stylesheet" href="grassdocs.css" type="text/css">
  231. </head>
  232. <body bgcolor="white">
  233. <div id="container">
  234. <a href="index.html"><img src="grass_logo.png" alt="GRASS logo"></a>
  235. <hr class="header">
  236. """
  237. header_nopgm = """<h2>${PGM}</h2>
  238. """
  239. header_pgm = """<h2>NAME</h2>
  240. <em><b>${PGM}</b></em>
  241. """
  242. header_pgm_desc = """<h2>NAME</h2>
  243. <em><b>${PGM}</b></em> - ${PGM_DESC}
  244. """
  245. sourcecode = string.Template(
  246. """<h2>SOURCE CODE</h2>
  247. <p>
  248. Available at:
  249. <a href="${URL_SOURCE}">${PGM} source code</a>
  250. (<a href="${URL_LOG}">history</a>)
  251. </p>
  252. <p>
  253. ${DATE_TAG}
  254. </p>
  255. """
  256. )
  257. footer_index = string.Template(
  258. """<hr class="header">
  259. <p>
  260. <a href="index.html">Main index</a> |
  261. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}${INDEXNAME}.html">${INDEXNAMECAP} index</a> |
  262. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}topics.html">Topics index</a> |
  263. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}keywords.html">Keywords index</a> |
  264. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}graphical_index.html">Graphical index</a> |
  265. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}full_index.html">Full index</a>
  266. </p>
  267. <p>
  268. &copy; 2003-${YEAR}
  269. <a href="http://grass.osgeo.org">GRASS Development Team</a>,
  270. GRASS GIS ${GRASS_VERSION} Reference Manual
  271. </p>
  272. </div>
  273. </body>
  274. </html>
  275. """)
  276. footer_noindex = string.Template(
  277. """<hr class="header">
  278. <p>
  279. <a href="index.html">Main index</a> |
  280. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}topics.html">Topics index</a> |
  281. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}keywords.html">Keywords index</a> |
  282. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}graphical_index.html">Graphical index</a> |
  283. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}full_index.html">Full index</a>
  284. </p>
  285. <p>
  286. &copy; 2003-${YEAR}
  287. <a href="http://grass.osgeo.org">GRASS Development Team</a>,
  288. GRASS GIS ${GRASS_VERSION} Reference Manual
  289. </p>
  290. </div>
  291. </body>
  292. </html>
  293. """)
  294. def read_file(name):
  295. try:
  296. f = open(name, 'rb')
  297. s = f.read()
  298. f.close()
  299. if PY2:
  300. return s
  301. else:
  302. return decode(s)
  303. except IOError:
  304. return ""
  305. def create_toc(src_data):
  306. class MyHTMLParser(HTMLParser):
  307. def __init__(self):
  308. HTMLParser.__init__(self)
  309. self.reset()
  310. self.idx = 1
  311. self.tag_curr = ''
  312. self.tag_last = ''
  313. self.process_text = False
  314. self.data = []
  315. self.tags_allowed = ('h1', 'h2', 'h3')
  316. self.tags_ignored = ('img')
  317. self.text = ''
  318. def handle_starttag(self, tag, attrs):
  319. if tag in self.tags_allowed:
  320. self.process_text = True
  321. self.tag_last = self.tag_curr
  322. self.tag_curr = tag
  323. def handle_endtag(self, tag):
  324. if tag in self.tags_allowed:
  325. self.data.append((tag, '%s_%d' % (tag, self.idx),
  326. self.text))
  327. self.idx += 1
  328. self.process_text = False
  329. self.text = ''
  330. self.tag_curr = self.tag_last
  331. def handle_data(self, data):
  332. if not self.process_text:
  333. return
  334. if self.tag_curr in self.tags_allowed or self.tag_curr in self.tags_ignored:
  335. self.text += data
  336. else:
  337. self.text += '<%s>%s</%s>' % (self.tag_curr, data, self.tag_curr)
  338. # instantiate the parser and fed it some HTML
  339. parser = MyHTMLParser()
  340. parser.feed(src_data)
  341. return parser.data
  342. def escape_href(label):
  343. # remove html tags
  344. label = re.sub('<[^<]+?>', '', label)
  345. # fix &nbsp;
  346. label = label.replace('&nbsp;', '')
  347. # fix "
  348. label = label.replace('"', '')
  349. # replace space with underscore + lower
  350. return label.replace(' ', '-').lower()
  351. def write_toc(data):
  352. if not data:
  353. return
  354. fd = sys.stdout
  355. fd.write('<div class="toc">\n')
  356. fd.write('<h4 class="toc">Table of contents</h4>\n')
  357. fd.write('<ul class="toc">\n')
  358. first = True
  359. has_h2 = False
  360. in_h3 = False
  361. indent = 4
  362. for tag, href, text in data:
  363. if tag == 'h3' and not in_h3 and has_h2:
  364. fd.write('\n%s<ul class="toc">\n' % (' ' * indent))
  365. indent += 4
  366. in_h3 = True
  367. elif not first:
  368. fd.write('</li>\n')
  369. if tag == 'h2':
  370. has_h2 = True
  371. if in_h3:
  372. indent -= 4
  373. fd.write('%s</ul></li>\n' % (' ' * indent))
  374. in_h3 = False
  375. text = text.replace(u'\xa0', u' ')
  376. fd.write('%s<li class="toc"><a href="#%s" class="toc">%s</a>' % \
  377. (' ' * indent, escape_href(text), text))
  378. first = False
  379. fd.write('</li>\n</ul>\n')
  380. fd.write('</div>\n')
  381. def update_toc(data):
  382. ret_data = []
  383. pat = re.compile(r'(<(h[2|3])>)(.+)(</h[2|3]>)')
  384. idx = 1
  385. for line in data.splitlines():
  386. if pat.search(line):
  387. xline = pat.split(line)
  388. line = xline[1] + '<a name="%s">' % escape_href(xline[3]) + xline[3] + '</a>' + xline[4]
  389. idx += 1
  390. ret_data.append(line)
  391. return '\n'.join(ret_data)
  392. def get_addon_path():
  393. """Check if pgm is in the addons list and get addon path
  394. return: pgm path if pgm is addon else None
  395. """
  396. addon_base = os.getenv('GRASS_ADDON_BASE')
  397. if addon_base:
  398. # addons_paths.json is file created during install extension
  399. # check get_addons_paths() function in the g.extension.py file
  400. addons_file = "addons_paths.json"
  401. addons_paths = os.path.join(addon_base, addons_file)
  402. if not os.path.exists(addons_paths):
  403. # Compiled addon has own dir e.g. ~/.grass7/addons/db.join/
  404. # with bin/ docs/ etc/ scripts/ subdir, required for compilation
  405. # addons on osgeo lxd container server and generation of
  406. # modules.xml file (build-xml.py script), when addons_paths.json
  407. # file is stored one level dir up
  408. addons_paths = os.path.join(
  409. os.path.abspath(os.path.join(addon_base, "..")),
  410. addons_file,
  411. )
  412. if not os.path.exists(addons_paths):
  413. return
  414. with open(addons_paths) as f:
  415. addons_paths = json.load(f)
  416. for addon in addons_paths["tree"]:
  417. if pgm == pathlib.Path(addon["path"]).name:
  418. return addon["path"]
  419. # process header
  420. src_data = read_file(src_file)
  421. name = re.search('(<!-- meta page name:)(.*)(-->)', src_data, re.IGNORECASE)
  422. pgm_desc = None
  423. if name:
  424. pgm = name.group(2).strip().split('-', 1)[0].strip()
  425. name_desc = re.search('(<!-- meta page name description:)(.*)(-->)', src_data, re.IGNORECASE)
  426. if name_desc:
  427. pgm_desc = name_desc.group(2).strip()
  428. desc = re.search('(<!-- meta page description:)(.*)(-->)', src_data,
  429. re.IGNORECASE)
  430. if desc:
  431. pgm = desc.group(2).strip()
  432. header_tmpl = string.Template(header_base + header_nopgm)
  433. else:
  434. if not pgm_desc:
  435. header_tmpl = string.Template(header_base + header_pgm)
  436. else:
  437. header_tmpl = string.Template(header_base + header_pgm_desc)
  438. if not re.search('<html>', src_data, re.IGNORECASE):
  439. tmp_data = read_file(tmp_file)
  440. """
  441. Adjusting keywords html pages paths if add-on html man page
  442. stored on the server
  443. """
  444. if html_page_footer_pages_path:
  445. new_keywords_paths = []
  446. orig_keywords_paths = re.search(
  447. r'<h[1-9]>KEYWORDS</h[1-9]>(.*?)<h[1-9]>',
  448. tmp_data, re.DOTALL,
  449. )
  450. if orig_keywords_paths:
  451. search_txt = 'href="'
  452. for i in orig_keywords_paths.group(1).split(','):
  453. if search_txt in i:
  454. index = i.index(search_txt) + len(search_txt)
  455. new_keywords_paths.append(
  456. i[:index] + html_page_footer_pages_path + i[index:],
  457. )
  458. if new_keywords_paths:
  459. tmp_data = tmp_data.replace(
  460. orig_keywords_paths.group(1),
  461. ','.join(new_keywords_paths),
  462. )
  463. if not re.search('<html>', tmp_data, re.IGNORECASE):
  464. sys.stdout.write(header_tmpl.substitute(PGM=pgm, PGM_DESC=pgm_desc))
  465. if tmp_data:
  466. for line in tmp_data.splitlines(True):
  467. if not re.search('</body>|</html>', line, re.IGNORECASE):
  468. sys.stdout.write(line)
  469. # create TOC
  470. write_toc(create_toc(src_data))
  471. # process body
  472. sys.stdout.write(update_toc(src_data))
  473. # if </html> is found, suppose a complete html is provided.
  474. # otherwise, generate module class reference:
  475. if re.search('</html>', src_data, re.IGNORECASE):
  476. sys.exit()
  477. index_names = {
  478. 'd' : 'display',
  479. 'db': 'database',
  480. 'g' : 'general',
  481. 'i' : 'imagery',
  482. 'm' : 'miscellaneous',
  483. 'ps': 'postscript',
  484. 'p' : 'paint',
  485. 'r' : 'raster',
  486. 'r3': 'raster3d',
  487. 's' : 'sites',
  488. 't' : 'temporal',
  489. 'v' : 'vector'
  490. }
  491. def to_title(name):
  492. """Convert name of command class/family to form suitable for title"""
  493. if name == 'raster3d':
  494. return '3D raster'
  495. elif name == 'postscript':
  496. return 'PostScript'
  497. else:
  498. return name.capitalize()
  499. index_titles = {}
  500. for key, name in index_names.items():
  501. index_titles[key] = to_title(name)
  502. # process footer
  503. index = re.search('(<!-- meta page index:)(.*)(-->)', src_data, re.IGNORECASE)
  504. if index:
  505. index_name = index.group(2).strip()
  506. if '|' in index_name:
  507. index_name, index_name_cap = index_name.split('|', 1)
  508. else:
  509. index_name_cap = to_title(index_name)
  510. else:
  511. mod_class = pgm.split('.', 1)[0]
  512. index_name = index_names.get(mod_class, '')
  513. index_name_cap = index_titles.get(mod_class, '')
  514. year = os.getenv("VERSION_DATE")
  515. if not year:
  516. year = str(datetime.now().year)
  517. # check the names of scripts to assign the right folder
  518. topdir = os.path.abspath(os.getenv("MODULE_TOPDIR"))
  519. curdir = os.path.abspath(os.path.curdir)
  520. if curdir.startswith(topdir):
  521. source_url = trunk_url
  522. pgmdir = curdir.replace(topdir, '').lstrip(os.path.sep)
  523. else:
  524. # addons
  525. source_url = addons_url
  526. pgmdir = os.path.sep.join(curdir.split(os.path.sep)[-3:])
  527. url_source = ""
  528. addon_path = None
  529. if os.getenv("SOURCE_URL", ""):
  530. addon_path = get_addon_path()
  531. if addon_path:
  532. # Addon is installed from the local dir
  533. if os.path.exists(os.getenv("SOURCE_URL")):
  534. url_source = urlparse.urljoin(
  535. addons_url,
  536. addon_path,
  537. )
  538. else:
  539. url_source = urlparse.urljoin(
  540. os.environ["SOURCE_URL"].split("src")[0],
  541. addon_path,
  542. )
  543. else:
  544. url_source = urlparse.urljoin(source_url, pgmdir)
  545. if sys.platform == 'win32':
  546. url_source = url_source.replace(os.path.sep, '/')
  547. if index_name:
  548. branches = "branches"
  549. tree = "tree"
  550. commits = "commits"
  551. if branches in url_source:
  552. url_log = url_source.replace(branches, commits)
  553. url_source = url_source.replace(branches, tree)
  554. else:
  555. url_log = url_source.replace(tree, commits)
  556. git_commit = get_last_git_commit(
  557. src_dir=curdir,
  558. addon_path=addon_path if addon_path else None,
  559. is_addon=True if addon_path else False,
  560. )
  561. if git_commit["commit"] == "unknown":
  562. date_tag = "Accessed: {date}".format(date=git_commit["date"])
  563. else:
  564. date_tag = "Latest change: {date} in commit: {commit}".format(
  565. date=git_commit["date"], commit=git_commit["commit"]
  566. )
  567. sys.stdout.write(
  568. sourcecode.substitute(
  569. URL_SOURCE=url_source,
  570. PGM=pgm,
  571. URL_LOG=url_log,
  572. DATE_TAG=date_tag,
  573. )
  574. )
  575. sys.stdout.write(
  576. footer_index.substitute(
  577. INDEXNAME=index_name,
  578. INDEXNAMECAP=index_name_cap,
  579. YEAR=year,
  580. GRASS_VERSION=grass_version,
  581. HTML_PAGE_FOOTER_PAGES_PATH=html_page_footer_pages_path,
  582. ),
  583. )
  584. else:
  585. sys.stdout.write(
  586. footer_noindex.substitute(
  587. YEAR=year,
  588. GRASS_VERSION=grass_version,
  589. HTML_PAGE_FOOTER_PAGES_PATH=html_page_footer_pages_path,
  590. ),
  591. )