mkhtml.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666
  1. #!/usr/bin/env python3
  2. ############################################################################
  3. #
  4. # MODULE: Builds manual pages
  5. # AUTHOR(S): Markus Neteler
  6. # Glynn Clements
  7. # Martin Landa <landa.martin gmail.com>
  8. # PURPOSE: Create HTML manual page snippets
  9. # COPYRIGHT: (C) 2007-2022 by Glynn Clements
  10. # and the GRASS Development Team
  11. #
  12. # This program is free software under the GNU General
  13. # Public License (>=v2). Read the file COPYING that
  14. # comes with GRASS for details.
  15. #
  16. #############################################################################
  17. import http
  18. import sys
  19. import os
  20. import string
  21. import re
  22. from datetime import datetime
  23. import locale
  24. import json
  25. import pathlib
  26. import shutil
  27. import subprocess
  28. import time
  29. try:
  30. # Python 2 import
  31. from HTMLParser import HTMLParser
  32. except:
  33. # Python 3 import
  34. from html.parser import HTMLParser
  35. from six.moves.urllib import request as urlrequest
  36. from six.moves.urllib.error import HTTPError, URLError
  37. try:
  38. import urlparse
  39. except:
  40. import urllib.parse as urlparse
  41. try:
  42. import grass.script as gs
  43. except ImportError:
  44. # During compilation GRASS GIS
  45. gs = None
  46. HEADERS = {
  47. "User-Agent": "Mozilla/5.0",
  48. }
  49. HTTP_STATUS_CODES = list(http.HTTPStatus)
  50. if sys.version_info[0] == 2:
  51. PY2 = True
  52. else:
  53. PY2 = False
  54. if not PY2:
  55. unicode = str
  56. grass_version = os.getenv("VERSION_NUMBER", "unknown")
  57. trunk_url = ""
  58. addons_url = ""
  59. if grass_version != "unknown":
  60. major, minor, patch = grass_version.split(".")
  61. base_url = "https://github.com/OSGeo"
  62. trunk_url = "{base_url}/grass/tree/releasebranch_{major}_{minor}/".format(
  63. base_url=base_url, major=major, minor=minor
  64. )
  65. addons_url = "{base_url}/grass-addons/tree/grass{major}/".format(
  66. base_url=base_url, major=major
  67. )
  68. def _get_encoding():
  69. encoding = locale.getdefaultlocale()[1]
  70. if not encoding:
  71. encoding = 'UTF-8'
  72. return encoding
  73. def decode(bytes_):
  74. """Decode bytes with default locale and return (unicode) string
  75. No-op if parameter is not bytes (assumed unicode string).
  76. :param bytes bytes_: the bytes to decode
  77. """
  78. if isinstance(bytes_, unicode):
  79. return bytes_
  80. if isinstance(bytes_, bytes):
  81. enc = _get_encoding()
  82. return bytes_.decode(enc)
  83. return unicode(bytes_)
  84. def urlopen(url, *args, **kwargs):
  85. """Wrapper around urlopen. Same function as 'urlopen', but with the
  86. ability to define headers.
  87. """
  88. request = urlrequest.Request(url, headers=HEADERS)
  89. return urlrequest.urlopen(request, *args, **kwargs)
  90. def set_proxy():
  91. """Set proxy"""
  92. proxy = os.getenv("GRASS_PROXY")
  93. if proxy:
  94. proxies = {}
  95. for ptype, purl in (p.split("=") for p in proxy.split(",")):
  96. proxies[ptype] = purl
  97. urlrequest.install_opener(
  98. urlrequest.build_opener(urlrequest.ProxyHandler(proxies))
  99. )
  100. set_proxy()
  101. def download_git_commit(url, response_format, *args, **kwargs):
  102. """Download module/addon last commit from GitHub API
  103. :param str url: url address
  104. :param str response_format: content type
  105. :return urllib.request.urlopen or None response: response object or
  106. None
  107. """
  108. try:
  109. response = urlopen(url, *args, **kwargs)
  110. if not response.code == 200:
  111. index = HTTP_STATUS_CODES.index(response.code)
  112. desc = HTTP_STATUS_CODES[index].description
  113. gs.fatal(
  114. _(
  115. "Download commit from <{url}>, return status code "
  116. "{code}, {desc}".format(
  117. url=url,
  118. code=response.code,
  119. desc=desc,
  120. ),
  121. ),
  122. )
  123. if response_format not in response.getheader("Content-Type"):
  124. gs.fatal(
  125. _(
  126. "Wrong downloaded commit file format. "
  127. "Check url <{url}>. Allowed file format is "
  128. "{response_format}.".format(
  129. url=url,
  130. response_format=response_format,
  131. ),
  132. ),
  133. )
  134. return response
  135. except HTTPError as err:
  136. gs.warning(
  137. _(
  138. "The download of the commit from the GitHub API "
  139. "server wasn't successful, <{}>. Commit and commit "
  140. "date will not be included in the <{}> addon html manual "
  141. "page.".format(err.msg, pgm)
  142. ),
  143. )
  144. except URLError:
  145. gs.warning(
  146. _(
  147. "Download file from <{url}>, failed. Check internet "
  148. "connection. Commit and commit date will not be included "
  149. "in the <{pgm}> addon manual page.".format(url=url, pgm=pgm)
  150. ),
  151. )
  152. def get_last_git_commit(src_dir, is_addon, addon_path):
  153. """Get last module/addon git commit
  154. :param str src_dir: module/addon source dir
  155. :param bool is_addon: True if it is addon
  156. :param str addon_path: addon path
  157. :return dict git_log: dict with key commit and date, if not
  158. possible download commit from GitHub API server
  159. values of keys have "unknown" string
  160. """
  161. unknown = "unknown"
  162. git_log = {"commit": unknown, "date": unknown}
  163. cwd = os.getcwd()
  164. datetime_format = "%A %b %d %H:%M:%S %Y" # e.g. Sun Jan 16 23:09:35 2022
  165. grass_modules_url = (
  166. "https://api.github.com/repos/osgeo/grass/commits?path={path}"
  167. "&page=1&per_page=1&sha=main".format(path=src_dir)
  168. ) # sha=git_branch_name
  169. grass_addons_url = (
  170. "https://api.github.com/repos/osgeo/grass-addons/commits?path={path}"
  171. "&page=1&per_page=1&sha=grass{major}".format(
  172. path=addon_path,
  173. major=major,
  174. )
  175. ) # sha=git_branch_name
  176. if shutil.which("git"):
  177. if os.path.exists(src_dir):
  178. os.chdir(src_dir)
  179. git_log["date"] = time.ctime(os.path.getmtime(src_dir))
  180. stdout, stderr = subprocess.Popen(
  181. args=["git", "log", "-1"],
  182. stdout=subprocess.PIPE,
  183. stderr=subprocess.PIPE,
  184. ).communicate()
  185. stdout = decode(stdout)
  186. stderr = decode(stderr)
  187. os.chdir(cwd)
  188. if stderr and "fatal: not a git repository" in stderr:
  189. response = download_git_commit(
  190. url=grass_addons_url if is_addon else grass_modules_url,
  191. response_format="application/json",
  192. )
  193. if response:
  194. commit = json.loads(response.read())
  195. if commit:
  196. git_log["commit"] = commit[0]["sha"]
  197. git_log["date"] = datetime.strptime(
  198. commit[0]["commit"]["author"]["date"], "%Y-%m-%dT%H:%M:%SZ"
  199. ).strftime(datetime_format)
  200. else:
  201. if stdout:
  202. commit = stdout.splitlines()
  203. git_log["commit"] = commit[0].split(" ")[-1]
  204. commit_date = commit[2].lstrip("Date:").strip()
  205. git_log["date"] = commit_date.rsplit(" ", 1)[0]
  206. return git_log
  207. html_page_footer_pages_path = (
  208. os.getenv("HTML_PAGE_FOOTER_PAGES_PATH")
  209. if os.getenv("HTML_PAGE_FOOTER_PAGES_PATH")
  210. else ""
  211. )
  212. pgm = sys.argv[1]
  213. src_file = "%s.html" % pgm
  214. tmp_file = "%s.tmp.html" % pgm
  215. header_base = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
  216. <html>
  217. <head>
  218. <title>GRASS GIS Manual: ${PGM}</title>
  219. <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
  220. <link rel="stylesheet" href="grassdocs.css" type="text/css">
  221. </head>
  222. <body bgcolor="white">
  223. <div id="container">
  224. <a href="index.html"><img src="grass_logo.png" alt="GRASS logo"></a>
  225. <hr class="header">
  226. """
  227. header_nopgm = """<h2>${PGM}</h2>
  228. """
  229. header_pgm = """<h2>NAME</h2>
  230. <em><b>${PGM}</b></em>
  231. """
  232. header_pgm_desc = """<h2>NAME</h2>
  233. <em><b>${PGM}</b></em> - ${PGM_DESC}
  234. """
  235. sourcecode = string.Template(
  236. """<h2>SOURCE CODE</h2>
  237. <p>
  238. Available at:
  239. <a href="${URL_SOURCE}">${PGM} source code</a>
  240. (<a href="${URL_LOG}">history</a>)
  241. </p>
  242. <p>
  243. ${DATE_TAG}
  244. </p>
  245. """
  246. )
  247. footer_index = string.Template(
  248. """<hr class="header">
  249. <p>
  250. <a href="index.html">Main index</a> |
  251. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}${INDEXNAME}.html">${INDEXNAMECAP} index</a> |
  252. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}topics.html">Topics index</a> |
  253. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}keywords.html">Keywords index</a> |
  254. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}graphical_index.html">Graphical index</a> |
  255. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}full_index.html">Full index</a>
  256. </p>
  257. <p>
  258. &copy; 2003-${YEAR}
  259. <a href="http://grass.osgeo.org">GRASS Development Team</a>,
  260. GRASS GIS ${GRASS_VERSION} Reference Manual
  261. </p>
  262. </div>
  263. </body>
  264. </html>
  265. """)
  266. footer_noindex = string.Template(
  267. """<hr class="header">
  268. <p>
  269. <a href="index.html">Main index</a> |
  270. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}topics.html">Topics index</a> |
  271. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}keywords.html">Keywords index</a> |
  272. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}graphical_index.html">Graphical index</a> |
  273. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}full_index.html">Full index</a>
  274. </p>
  275. <p>
  276. &copy; 2003-${YEAR}
  277. <a href="http://grass.osgeo.org">GRASS Development Team</a>,
  278. GRASS GIS ${GRASS_VERSION} Reference Manual
  279. </p>
  280. </div>
  281. </body>
  282. </html>
  283. """)
  284. def read_file(name):
  285. try:
  286. f = open(name, 'rb')
  287. s = f.read()
  288. f.close()
  289. if PY2:
  290. return s
  291. else:
  292. return decode(s)
  293. except IOError:
  294. return ""
  295. def create_toc(src_data):
  296. class MyHTMLParser(HTMLParser):
  297. def __init__(self):
  298. HTMLParser.__init__(self)
  299. self.reset()
  300. self.idx = 1
  301. self.tag_curr = ''
  302. self.tag_last = ''
  303. self.process_text = False
  304. self.data = []
  305. self.tags_allowed = ('h1', 'h2', 'h3')
  306. self.tags_ignored = ('img')
  307. self.text = ''
  308. def handle_starttag(self, tag, attrs):
  309. if tag in self.tags_allowed:
  310. self.process_text = True
  311. self.tag_last = self.tag_curr
  312. self.tag_curr = tag
  313. def handle_endtag(self, tag):
  314. if tag in self.tags_allowed:
  315. self.data.append((tag, '%s_%d' % (tag, self.idx),
  316. self.text))
  317. self.idx += 1
  318. self.process_text = False
  319. self.text = ''
  320. self.tag_curr = self.tag_last
  321. def handle_data(self, data):
  322. if not self.process_text:
  323. return
  324. if self.tag_curr in self.tags_allowed or self.tag_curr in self.tags_ignored:
  325. self.text += data
  326. else:
  327. self.text += '<%s>%s</%s>' % (self.tag_curr, data, self.tag_curr)
  328. # instantiate the parser and fed it some HTML
  329. parser = MyHTMLParser()
  330. parser.feed(src_data)
  331. return parser.data
  332. def escape_href(label):
  333. # remove html tags
  334. label = re.sub('<[^<]+?>', '', label)
  335. # fix &nbsp;
  336. label = label.replace('&nbsp;', '')
  337. # fix "
  338. label = label.replace('"', '')
  339. # replace space with underscore + lower
  340. return label.replace(' ', '-').lower()
  341. def write_toc(data):
  342. if not data:
  343. return
  344. fd = sys.stdout
  345. fd.write('<div class="toc">\n')
  346. fd.write('<h4 class="toc">Table of contents</h4>\n')
  347. fd.write('<ul class="toc">\n')
  348. first = True
  349. has_h2 = False
  350. in_h3 = False
  351. indent = 4
  352. for tag, href, text in data:
  353. if tag == 'h3' and not in_h3 and has_h2:
  354. fd.write('\n%s<ul class="toc">\n' % (' ' * indent))
  355. indent += 4
  356. in_h3 = True
  357. elif not first:
  358. fd.write('</li>\n')
  359. if tag == 'h2':
  360. has_h2 = True
  361. if in_h3:
  362. indent -= 4
  363. fd.write('%s</ul></li>\n' % (' ' * indent))
  364. in_h3 = False
  365. text = text.replace(u'\xa0', u' ')
  366. fd.write('%s<li class="toc"><a href="#%s" class="toc">%s</a>' % \
  367. (' ' * indent, escape_href(text), text))
  368. first = False
  369. fd.write('</li>\n</ul>\n')
  370. fd.write('</div>\n')
  371. def update_toc(data):
  372. ret_data = []
  373. pat = re.compile(r'(<(h[2|3])>)(.+)(</h[2|3]>)')
  374. idx = 1
  375. for line in data.splitlines():
  376. if pat.search(line):
  377. xline = pat.split(line)
  378. line = xline[1] + '<a name="%s">' % escape_href(xline[3]) + xline[3] + '</a>' + xline[4]
  379. idx += 1
  380. ret_data.append(line)
  381. return '\n'.join(ret_data)
  382. def get_addon_path():
  383. """Check if pgm is in the addons list and get addon path
  384. return: pgm path if pgm is addon else None
  385. """
  386. addon_base = os.getenv('GRASS_ADDON_BASE')
  387. if addon_base:
  388. # addons_paths.json is file created during install extension
  389. # check get_addons_paths() function in the g.extension.py file
  390. addons_file = "addons_paths.json"
  391. addons_paths = os.path.join(addon_base, addons_file)
  392. if not os.path.exists(addons_paths):
  393. # Compiled addon has own dir e.g. ~/.grass7/addons/db.join/
  394. # with bin/ docs/ etc/ scripts/ subdir, required for compilation
  395. # addons on osgeo lxd container server and generation of
  396. # modules.xml file (build-xml.py script), when addons_paths.json
  397. # file is stored one level dir up
  398. addons_paths = os.path.join(
  399. os.path.abspath(os.path.join(addon_base, "..")),
  400. addons_file,
  401. )
  402. if not os.path.exists(addons_paths):
  403. return
  404. with open(addons_paths) as f:
  405. addons_paths = json.load(f)
  406. for addon in addons_paths["tree"]:
  407. if pgm == pathlib.Path(addon["path"]).name:
  408. return addon["path"]
  409. # process header
  410. src_data = read_file(src_file)
  411. name = re.search('(<!-- meta page name:)(.*)(-->)', src_data, re.IGNORECASE)
  412. pgm_desc = None
  413. if name:
  414. pgm = name.group(2).strip().split('-', 1)[0].strip()
  415. name_desc = re.search('(<!-- meta page name description:)(.*)(-->)', src_data, re.IGNORECASE)
  416. if name_desc:
  417. pgm_desc = name_desc.group(2).strip()
  418. desc = re.search('(<!-- meta page description:)(.*)(-->)', src_data,
  419. re.IGNORECASE)
  420. if desc:
  421. pgm = desc.group(2).strip()
  422. header_tmpl = string.Template(header_base + header_nopgm)
  423. else:
  424. if not pgm_desc:
  425. header_tmpl = string.Template(header_base + header_pgm)
  426. else:
  427. header_tmpl = string.Template(header_base + header_pgm_desc)
  428. if not re.search('<html>', src_data, re.IGNORECASE):
  429. tmp_data = read_file(tmp_file)
  430. """
  431. Adjusting keywords html pages paths if add-on html man page
  432. stored on the server
  433. """
  434. if html_page_footer_pages_path:
  435. new_keywords_paths = []
  436. orig_keywords_paths = re.search(
  437. r'<h[1-9]>KEYWORDS</h[1-9]>(.*?)<h[1-9]>',
  438. tmp_data, re.DOTALL,
  439. )
  440. if orig_keywords_paths:
  441. search_txt = 'href="'
  442. for i in orig_keywords_paths.group(1).split(','):
  443. if search_txt in i:
  444. index = i.index(search_txt) + len(search_txt)
  445. new_keywords_paths.append(
  446. i[:index] + html_page_footer_pages_path + i[index:],
  447. )
  448. if new_keywords_paths:
  449. tmp_data = tmp_data.replace(
  450. orig_keywords_paths.group(1),
  451. ','.join(new_keywords_paths),
  452. )
  453. if not re.search('<html>', tmp_data, re.IGNORECASE):
  454. sys.stdout.write(header_tmpl.substitute(PGM=pgm, PGM_DESC=pgm_desc))
  455. if tmp_data:
  456. for line in tmp_data.splitlines(True):
  457. if not re.search('</body>|</html>', line, re.IGNORECASE):
  458. sys.stdout.write(line)
  459. # create TOC
  460. write_toc(create_toc(src_data))
  461. # process body
  462. sys.stdout.write(update_toc(src_data))
  463. # if </html> is found, suppose a complete html is provided.
  464. # otherwise, generate module class reference:
  465. if re.search('</html>', src_data, re.IGNORECASE):
  466. sys.exit()
  467. index_names = {
  468. 'd' : 'display',
  469. 'db': 'database',
  470. 'g' : 'general',
  471. 'i' : 'imagery',
  472. 'm' : 'miscellaneous',
  473. 'ps': 'postscript',
  474. 'p' : 'paint',
  475. 'r' : 'raster',
  476. 'r3': 'raster3d',
  477. 's' : 'sites',
  478. 't' : 'temporal',
  479. 'v' : 'vector'
  480. }
  481. def to_title(name):
  482. """Convert name of command class/family to form suitable for title"""
  483. if name == 'raster3d':
  484. return '3D raster'
  485. elif name == 'postscript':
  486. return 'PostScript'
  487. else:
  488. return name.capitalize()
  489. index_titles = {}
  490. for key, name in index_names.items():
  491. index_titles[key] = to_title(name)
  492. # process footer
  493. index = re.search('(<!-- meta page index:)(.*)(-->)', src_data, re.IGNORECASE)
  494. if index:
  495. index_name = index.group(2).strip()
  496. if '|' in index_name:
  497. index_name, index_name_cap = index_name.split('|', 1)
  498. else:
  499. index_name_cap = to_title(index_name)
  500. else:
  501. mod_class = pgm.split('.', 1)[0]
  502. index_name = index_names.get(mod_class, '')
  503. index_name_cap = index_titles.get(mod_class, '')
  504. year = os.getenv("VERSION_DATE")
  505. if not year:
  506. year = str(datetime.now().year)
  507. # check the names of scripts to assign the right folder
  508. topdir = os.path.abspath(os.getenv("MODULE_TOPDIR"))
  509. curdir = os.path.abspath(os.path.curdir)
  510. if curdir.startswith(topdir):
  511. source_url = trunk_url
  512. pgmdir = curdir.replace(topdir, '').lstrip(os.path.sep)
  513. else:
  514. # addons
  515. source_url = addons_url
  516. pgmdir = os.path.sep.join(curdir.split(os.path.sep)[-3:])
  517. url_source = ""
  518. addon_path = None
  519. if os.getenv("SOURCE_URL", ""):
  520. addon_path = get_addon_path()
  521. if addon_path:
  522. # Addon is installed from the local dir
  523. if os.path.exists(os.getenv("SOURCE_URL")):
  524. url_source = urlparse.urljoin(
  525. addons_url,
  526. addon_path,
  527. )
  528. else:
  529. url_source = urlparse.urljoin(
  530. os.environ["SOURCE_URL"].split("src")[0],
  531. addon_path,
  532. )
  533. else:
  534. url_source = urlparse.urljoin(source_url, pgmdir)
  535. if sys.platform == 'win32':
  536. url_source = url_source.replace(os.path.sep, '/')
  537. if index_name:
  538. branches = "branches"
  539. tree = "tree"
  540. commits = "commits"
  541. if branches in url_source:
  542. url_log = url_source.replace(branches, commits)
  543. url_source = url_source.replace(branches, tree)
  544. else:
  545. url_log = url_source.replace(tree, commits)
  546. git_commit = get_last_git_commit(
  547. src_dir=curdir,
  548. addon_path=addon_path if addon_path else None,
  549. is_addon=True if addon_path else False,
  550. )
  551. if git_commit["commit"] == "unknown":
  552. date_tag = "Accessed: {date}".format(date=git_commit["date"])
  553. else:
  554. date_tag = "Latest change: {date} in commit: {commit}".format(
  555. date=git_commit["date"], commit=git_commit["commit"]
  556. )
  557. sys.stdout.write(
  558. sourcecode.substitute(
  559. URL_SOURCE=url_source,
  560. PGM=pgm,
  561. URL_LOG=url_log,
  562. DATE_TAG=date_tag,
  563. )
  564. )
  565. sys.stdout.write(
  566. footer_index.substitute(
  567. INDEXNAME=index_name,
  568. INDEXNAMECAP=index_name_cap,
  569. YEAR=year,
  570. GRASS_VERSION=grass_version,
  571. HTML_PAGE_FOOTER_PAGES_PATH=html_page_footer_pages_path,
  572. ),
  573. )
  574. else:
  575. sys.stdout.write(
  576. footer_noindex.substitute(
  577. YEAR=year,
  578. GRASS_VERSION=grass_version,
  579. HTML_PAGE_FOOTER_PAGES_PATH=html_page_footer_pages_path,
  580. ),
  581. )