mkhtml.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. #!/usr/bin/env python3
  2. ############################################################################
  3. #
  4. # MODULE: Builds manual pages
  5. # AUTHOR(S): Markus Neteler
  6. # Glynn Clements
  7. # Martin Landa <landa.martin gmail.com>
  8. # PURPOSE: Create HTML manual page snippets
  9. # COPYRIGHT: (C) 2007-2021 by Glynn Clements
  10. # and the GRASS Development Team
  11. #
  12. # This program is free software under the GNU General
  13. # Public License (>=v2). Read the file COPYING that
  14. # comes with GRASS for details.
  15. #
  16. #############################################################################
  17. import sys
  18. import os
  19. import string
  20. import re
  21. from datetime import datetime
  22. import locale
  23. import json
  24. import pathlib
  25. try:
  26. # Python 2 import
  27. from HTMLParser import HTMLParser
  28. except ImportError:
  29. # Python 3 import
  30. from html.parser import HTMLParser
  31. try:
  32. import urlparse
  33. except ImportError:
  34. import urllib.parse as urlparse
  35. if sys.version_info[0] == 2:
  36. PY2 = True
  37. else:
  38. PY2 = False
  39. if not PY2:
  40. unicode = str
  41. def _get_encoding():
  42. encoding = locale.getdefaultlocale()[1]
  43. if not encoding:
  44. encoding = "UTF-8"
  45. return encoding
  46. def decode(bytes_):
  47. """Decode bytes with default locale and return (unicode) string
  48. No-op if parameter is not bytes (assumed unicode string).
  49. :param bytes bytes_: the bytes to decode
  50. """
  51. if isinstance(bytes_, unicode):
  52. return bytes_
  53. if isinstance(bytes_, bytes):
  54. enc = _get_encoding()
  55. return bytes_.decode(enc)
  56. return unicode(bytes_)
  57. html_page_footer_pages_path = (
  58. os.getenv("HTML_PAGE_FOOTER_PAGES_PATH")
  59. if os.getenv("HTML_PAGE_FOOTER_PAGES_PATH")
  60. else ""
  61. )
  62. pgm = sys.argv[1]
  63. src_file = "%s.html" % pgm
  64. tmp_file = "%s.tmp.html" % pgm
  65. grass_version = os.getenv("VERSION_NUMBER", "unknown")
  66. trunk_url = ""
  67. addons_url = ""
  68. if grass_version != "unknown":
  69. major, minor, patch = grass_version.split(".")
  70. trunk_url = "https://github.com/OSGeo/grass/tree/main/"
  71. addons_url = f"https://github.com/OSGeo/grass-addons/tree/grass{major}/"
  72. header_base = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
  73. <html>
  74. <head>
  75. <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
  76. <title>${PGM} - GRASS GIS Manual</title>
  77. <meta name="Author" content="GRASS Development Team">
  78. <meta name="description" content="${PGM}: ${PGM_DESC}">
  79. <link rel="stylesheet" href="grassdocs.css" type="text/css">
  80. </head>
  81. <body bgcolor="white">
  82. <div id="container">
  83. <a href="index.html"><img src="grass_logo.png" alt="GRASS logo"></a>
  84. <hr class="header">
  85. """
  86. header_nopgm = """<h2>${PGM}</h2>
  87. """
  88. header_pgm = """<h2>NAME</h2>
  89. <em><b>${PGM}</b></em>
  90. """
  91. header_pgm_desc = """<h2>NAME</h2>
  92. <em><b>${PGM}</b></em> - ${PGM_DESC}
  93. """
  94. sourcecode = string.Template(
  95. """<h2>SOURCE CODE</h2>
  96. <p>
  97. Available at:
  98. <a href="${URL_SOURCE}">${PGM} source code</a>
  99. (<a href="${URL_LOG}">history</a>)
  100. </p>
  101. """
  102. )
  103. footer_index = string.Template(
  104. """<hr class="header">
  105. <p>
  106. <a href="index.html">Main index</a> |
  107. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}${INDEXNAME}.html">${INDEXNAMECAP} index</a> |
  108. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}topics.html">Topics index</a> |
  109. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}keywords.html">Keywords index</a> |
  110. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}graphical_index.html">Graphical index</a> |
  111. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}full_index.html">Full index</a>
  112. </p>
  113. <p>
  114. &copy; 2003-${YEAR}
  115. <a href="https://grass.osgeo.org">GRASS Development Team</a>,
  116. GRASS GIS ${GRASS_VERSION} Reference Manual
  117. </p>
  118. </div>
  119. </body>
  120. </html>
  121. """
  122. )
  123. footer_noindex = string.Template(
  124. """<hr class="header">
  125. <p>
  126. <a href="index.html">Main index</a> |
  127. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}topics.html">Topics index</a> |
  128. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}keywords.html">Keywords index</a> |
  129. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}graphical_index.html">Graphical index</a> |
  130. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}full_index.html">Full index</a>
  131. </p>
  132. <p>
  133. &copy; 2003-${YEAR}
  134. <a href="https://grass.osgeo.org">GRASS Development Team</a>,
  135. GRASS GIS ${GRASS_VERSION} Reference Manual
  136. </p>
  137. </div>
  138. </body>
  139. </html>
  140. """
  141. )
  142. def read_file(name):
  143. try:
  144. f = open(name, "rb")
  145. s = f.read()
  146. f.close()
  147. if PY2:
  148. return s
  149. else:
  150. return decode(s)
  151. except IOError:
  152. return ""
  153. def create_toc(src_data):
  154. class MyHTMLParser(HTMLParser):
  155. def __init__(self):
  156. HTMLParser.__init__(self)
  157. self.reset()
  158. self.idx = 1
  159. self.tag_curr = ""
  160. self.tag_last = ""
  161. self.process_text = False
  162. self.data = []
  163. self.tags_allowed = ("h1", "h2", "h3")
  164. self.tags_ignored = "img"
  165. self.text = ""
  166. def handle_starttag(self, tag, attrs):
  167. if tag in self.tags_allowed:
  168. self.process_text = True
  169. self.tag_last = self.tag_curr
  170. self.tag_curr = tag
  171. def handle_endtag(self, tag):
  172. if tag in self.tags_allowed:
  173. self.data.append((tag, "%s_%d" % (tag, self.idx), self.text))
  174. self.idx += 1
  175. self.process_text = False
  176. self.text = ""
  177. self.tag_curr = self.tag_last
  178. def handle_data(self, data):
  179. if not self.process_text:
  180. return
  181. if self.tag_curr in self.tags_allowed or self.tag_curr in self.tags_ignored:
  182. self.text += data
  183. else:
  184. self.text += "<%s>%s</%s>" % (self.tag_curr, data, self.tag_curr)
  185. # instantiate the parser and fed it some HTML
  186. parser = MyHTMLParser()
  187. parser.feed(src_data)
  188. return parser.data
  189. def escape_href(label):
  190. # remove html tags
  191. label = re.sub("<[^<]+?>", "", label)
  192. # fix &nbsp;
  193. label = label.replace("&nbsp;", "")
  194. # fix "
  195. label = label.replace('"', "")
  196. # replace space with underscore + lower
  197. return label.replace(" ", "-").lower()
  198. def write_toc(data):
  199. if not data:
  200. return
  201. fd = sys.stdout
  202. fd.write('<div class="toc">\n')
  203. fd.write('<h4 class="toc">Table of contents</h4>\n')
  204. fd.write('<ul class="toc">\n')
  205. first = True
  206. has_h2 = False
  207. in_h3 = False
  208. indent = 4
  209. for tag, href, text in data:
  210. if tag == "h3" and not in_h3 and has_h2:
  211. fd.write('\n%s<ul class="toc">\n' % (" " * indent))
  212. indent += 4
  213. in_h3 = True
  214. elif not first:
  215. fd.write("</li>\n")
  216. if tag == "h2":
  217. has_h2 = True
  218. if in_h3:
  219. indent -= 4
  220. fd.write("%s</ul></li>\n" % (" " * indent))
  221. in_h3 = False
  222. text = text.replace("\xa0", " ")
  223. fd.write(
  224. '%s<li class="toc"><a href="#%s" class="toc">%s</a>'
  225. % (" " * indent, escape_href(text), text)
  226. )
  227. first = False
  228. fd.write("</li>\n</ul>\n")
  229. fd.write("</div>\n")
  230. def update_toc(data):
  231. ret_data = []
  232. pat = re.compile(r"(<(h[2|3])>)(.+)(</h[2|3]>)")
  233. idx = 1
  234. for line in data.splitlines():
  235. if pat.search(line):
  236. xline = pat.split(line)
  237. line = (
  238. xline[1]
  239. + '<a name="%s">' % escape_href(xline[3])
  240. + xline[3]
  241. + "</a>"
  242. + xline[4]
  243. )
  244. idx += 1
  245. ret_data.append(line)
  246. return "\n".join(ret_data)
  247. def get_addon_path():
  248. """Check if pgm is in the addons list and get addon path
  249. return: pgm path if pgm is addon else None
  250. """
  251. addon_base = os.getenv("GRASS_ADDON_BASE")
  252. if addon_base:
  253. # addons_paths.json is file created during install extension
  254. # check get_addons_paths() function in the g.extension.py file
  255. addons_file = "addons_paths.json"
  256. addons_paths = os.path.join(addon_base, addons_file)
  257. if not os.path.exists(addons_paths):
  258. # Compiled addon has own dir e.g. ~/.grass8/addons/db.join/
  259. # with bin/ docs/ etc/ scripts/ subdir, required for compilation
  260. # addons on osgeo lxd container server and generation of
  261. # modules.xml file (build-xml.py script), when addons_paths.json
  262. # file is stored one level dir up
  263. addons_paths = os.path.join(
  264. os.path.abspath(os.path.join(addon_base, "..")),
  265. addons_file,
  266. )
  267. if not os.path.exists(addons_paths):
  268. return
  269. with open(addons_paths) as f:
  270. addons_paths = json.load(f)
  271. for addon in addons_paths["tree"]:
  272. if pgm == pathlib.Path(addon["path"]).name:
  273. return addon["path"]
  274. # process header
  275. src_data = read_file(src_file)
  276. name = re.search("(<!-- meta page name:)(.*)(-->)", src_data, re.IGNORECASE)
  277. pgm_desc = "GRASS GIS Reference Manual"
  278. if name:
  279. pgm = name.group(2).strip().split("-", 1)[0].strip()
  280. name_desc = re.search(
  281. "(<!-- meta page name description:)(.*)(-->)", src_data, re.IGNORECASE
  282. )
  283. if name_desc:
  284. pgm_desc = name_desc.group(2).strip()
  285. desc = re.search("(<!-- meta page description:)(.*)(-->)", src_data, re.IGNORECASE)
  286. if desc:
  287. pgm = desc.group(2).strip()
  288. header_tmpl = string.Template(header_base + header_nopgm)
  289. else:
  290. if not pgm_desc:
  291. header_tmpl = string.Template(header_base + header_pgm)
  292. else:
  293. header_tmpl = string.Template(header_base + header_pgm_desc)
  294. if not re.search("<html>", src_data, re.IGNORECASE):
  295. tmp_data = read_file(tmp_file)
  296. """
  297. Adjusting keywords html pages paths if add-on html man page
  298. stored on the server
  299. """
  300. if html_page_footer_pages_path:
  301. new_keywords_paths = []
  302. orig_keywords_paths = re.search(
  303. r"<h[1-9]>KEYWORDS</h[1-9]>(.*?)<h[1-9]>",
  304. tmp_data,
  305. re.DOTALL,
  306. )
  307. if orig_keywords_paths:
  308. search_txt = 'href="'
  309. for i in orig_keywords_paths.group(1).split(","):
  310. if search_txt in i:
  311. index = i.index(search_txt) + len(search_txt)
  312. new_keywords_paths.append(
  313. i[:index] + html_page_footer_pages_path + i[index:],
  314. )
  315. if new_keywords_paths:
  316. tmp_data = tmp_data.replace(
  317. orig_keywords_paths.group(1),
  318. ",".join(new_keywords_paths),
  319. )
  320. if not re.search("<html>", tmp_data, re.IGNORECASE):
  321. sys.stdout.write(header_tmpl.substitute(PGM=pgm, PGM_DESC=pgm_desc))
  322. if tmp_data:
  323. for line in tmp_data.splitlines(True):
  324. if not re.search("</body>|</html>", line, re.IGNORECASE):
  325. sys.stdout.write(line)
  326. # create TOC
  327. write_toc(create_toc(src_data))
  328. # process body
  329. sys.stdout.write(update_toc(src_data))
  330. # if </html> is found, suppose a complete html is provided.
  331. # otherwise, generate module class reference:
  332. if re.search("</html>", src_data, re.IGNORECASE):
  333. sys.exit()
  334. index_names = {
  335. "d": "display",
  336. "db": "database",
  337. "g": "general",
  338. "i": "imagery",
  339. "m": "miscellaneous",
  340. "ps": "postscript",
  341. "p": "paint",
  342. "r": "raster",
  343. "r3": "raster3d",
  344. "s": "sites",
  345. "t": "temporal",
  346. "v": "vector",
  347. }
  348. def to_title(name):
  349. """Convert name of command class/family to form suitable for title"""
  350. if name == "raster3d":
  351. return "3D raster"
  352. elif name == "postscript":
  353. return "PostScript"
  354. else:
  355. return name.capitalize()
  356. index_titles = {}
  357. for key, name in index_names.items():
  358. index_titles[key] = to_title(name)
  359. # process footer
  360. index = re.search("(<!-- meta page index:)(.*)(-->)", src_data, re.IGNORECASE)
  361. if index:
  362. index_name = index.group(2).strip()
  363. if "|" in index_name:
  364. index_name, index_name_cap = index_name.split("|", 1)
  365. else:
  366. index_name_cap = to_title(index_name)
  367. else:
  368. mod_class = pgm.split(".", 1)[0]
  369. index_name = index_names.get(mod_class, "")
  370. index_name_cap = index_titles.get(mod_class, "")
  371. year = os.getenv("VERSION_DATE")
  372. if not year:
  373. year = str(datetime.now().year)
  374. # check the names of scripts to assign the right folder
  375. topdir = os.path.abspath(os.getenv("MODULE_TOPDIR"))
  376. curdir = os.path.abspath(os.path.curdir)
  377. if curdir.startswith(topdir + os.path.sep):
  378. source_url = trunk_url
  379. pgmdir = curdir.replace(topdir, "").lstrip(os.path.sep)
  380. else:
  381. # addons
  382. source_url = addons_url
  383. pgmdir = os.path.sep.join(curdir.split(os.path.sep)[-3:])
  384. url_source = ""
  385. if os.getenv("SOURCE_URL", ""):
  386. addon_path = get_addon_path()
  387. if addon_path:
  388. # Addon is installed from the local dir
  389. if os.path.exists(os.getenv("SOURCE_URL")):
  390. url_source = urlparse.urljoin(
  391. addons_url,
  392. addon_path,
  393. )
  394. else:
  395. url_source = urlparse.urljoin(
  396. os.environ["SOURCE_URL"].split("src")[0],
  397. addon_path,
  398. )
  399. else:
  400. url_source = urlparse.urljoin(source_url, pgmdir)
  401. if sys.platform == "win32":
  402. url_source = url_source.replace(os.path.sep, "/")
  403. if index_name:
  404. branches = "branches"
  405. tree = "tree"
  406. commits = "commits"
  407. if branches in url_source:
  408. url_log = url_source.replace(branches, commits)
  409. url_source = url_source.replace(branches, tree)
  410. else:
  411. url_log = url_source.replace(tree, commits)
  412. sys.stdout.write(
  413. sourcecode.substitute(
  414. URL_SOURCE=url_source,
  415. PGM=pgm,
  416. URL_LOG=url_log,
  417. )
  418. )
  419. sys.stdout.write(
  420. footer_index.substitute(
  421. INDEXNAME=index_name,
  422. INDEXNAMECAP=index_name_cap,
  423. YEAR=year,
  424. GRASS_VERSION=grass_version,
  425. HTML_PAGE_FOOTER_PAGES_PATH=html_page_footer_pages_path,
  426. ),
  427. )
  428. else:
  429. sys.stdout.write(
  430. footer_noindex.substitute(
  431. YEAR=year,
  432. GRASS_VERSION=grass_version,
  433. HTML_PAGE_FOOTER_PAGES_PATH=html_page_footer_pages_path,
  434. ),
  435. )