mkhtml.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. #!/usr/bin/env python3
  2. ############################################################################
  3. #
  4. # MODULE: Builds manual pages
  5. # AUTHOR(S): Markus Neteler
  6. # Glynn Clements
  7. # Martin Landa <landa.martin gmail.com>
  8. # PURPOSE: Create HTML manual page snippets
  9. # COPYRIGHT: (C) 2007-2021 by Glynn Clements
  10. # and the GRASS Development Team
  11. #
  12. # This program is free software under the GNU General
  13. # Public License (>=v2). Read the file COPYING that
  14. # comes with GRASS for details.
  15. #
  16. #############################################################################
  17. import sys
  18. import os
  19. import string
  20. import re
  21. from datetime import datetime
  22. import locale
  23. import json
  24. try:
  25. # Python 2 import
  26. from HTMLParser import HTMLParser
  27. except:
  28. # Python 3 import
  29. from html.parser import HTMLParser
  30. try:
  31. import urlparse
  32. except:
  33. import urllib.parse as urlparse
  34. if sys.version_info[0] == 2:
  35. PY2 = True
  36. else:
  37. PY2 = False
  38. if not PY2:
  39. unicode = str
  40. def _get_encoding():
  41. encoding = locale.getdefaultlocale()[1]
  42. if not encoding:
  43. encoding = "UTF-8"
  44. return encoding
  45. def decode(bytes_):
  46. """Decode bytes with default locale and return (unicode) string
  47. No-op if parameter is not bytes (assumed unicode string).
  48. :param bytes bytes_: the bytes to decode
  49. """
  50. if isinstance(bytes_, unicode):
  51. return bytes_
  52. if isinstance(bytes_, bytes):
  53. enc = _get_encoding()
  54. return bytes_.decode(enc)
  55. return unicode(bytes_)
  56. html_page_footer_pages_path = (
  57. os.getenv("HTML_PAGE_FOOTER_PAGES_PATH")
  58. if os.getenv("HTML_PAGE_FOOTER_PAGES_PATH")
  59. else ""
  60. )
  61. pgm = sys.argv[1]
  62. src_file = "%s.html" % pgm
  63. tmp_file = "%s.tmp.html" % pgm
  64. trunk_url = "https://github.com/OSGeo/grass/tree/master/"
  65. addons_url = "https://github.com/OSGeo/grass-addons/tree/master/"
  66. header_base = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
  67. <html>
  68. <head>
  69. <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
  70. <title>${PGM} - GRASS GIS Manual</title>
  71. <meta name="Author" content="GRASS Development Team">
  72. <meta name="description" content="${PGM}: ${PGM_DESC}">
  73. <link rel="stylesheet" href="grassdocs.css" type="text/css">
  74. </head>
  75. <body bgcolor="white">
  76. <div id="container">
  77. <a href="index.html"><img src="grass_logo.png" alt="GRASS logo"></a>
  78. <hr class="header">
  79. """
  80. header_nopgm = """<h2>${PGM}</h2>
  81. """
  82. header_pgm = """<h2>NAME</h2>
  83. <em><b>${PGM}</b></em>
  84. """
  85. header_pgm_desc = """<h2>NAME</h2>
  86. <em><b>${PGM}</b></em> - ${PGM_DESC}
  87. """
  88. sourcecode = string.Template(
  89. """<h2>SOURCE CODE</h2>
  90. <p>Available at: <a href="${URL_SOURCE}">${PGM} source code</a> (<a href="${URL_LOG}">history</a>)</p>
  91. """
  92. )
  93. footer_index = string.Template(
  94. """<hr class="header">
  95. <p>
  96. <a href="index.html">Main index</a> |
  97. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}${INDEXNAME}.html">${INDEXNAMECAP} index</a> |
  98. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}topics.html">Topics index</a> |
  99. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}keywords.html">Keywords index</a> |
  100. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}graphical_index.html">Graphical index</a> |
  101. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}full_index.html">Full index</a>
  102. </p>
  103. <p>
  104. &copy; 2003-${YEAR}
  105. <a href="https://grass.osgeo.org">GRASS Development Team</a>,
  106. GRASS GIS ${GRASS_VERSION} Reference Manual
  107. </p>
  108. </div>
  109. </body>
  110. </html>
  111. """
  112. )
  113. footer_noindex = string.Template(
  114. """<hr class="header">
  115. <p>
  116. <a href="index.html">Main index</a> |
  117. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}topics.html">Topics index</a> |
  118. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}keywords.html">Keywords index</a> |
  119. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}graphical_index.html">Graphical index</a> |
  120. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}full_index.html">Full index</a>
  121. </p>
  122. <p>
  123. &copy; 2003-${YEAR}
  124. <a href="https://grass.osgeo.org">GRASS Development Team</a>,
  125. GRASS GIS ${GRASS_VERSION} Reference Manual
  126. </p>
  127. </div>
  128. </body>
  129. </html>
  130. """
  131. )
  132. def read_file(name):
  133. try:
  134. f = open(name, "rb")
  135. s = f.read()
  136. f.close()
  137. if PY2:
  138. return s
  139. else:
  140. return decode(s)
  141. except IOError:
  142. return ""
  143. def create_toc(src_data):
  144. class MyHTMLParser(HTMLParser):
  145. def __init__(self):
  146. HTMLParser.__init__(self)
  147. self.reset()
  148. self.idx = 1
  149. self.tag_curr = ""
  150. self.tag_last = ""
  151. self.process_text = False
  152. self.data = []
  153. self.tags_allowed = ("h1", "h2", "h3")
  154. self.tags_ignored = "img"
  155. self.text = ""
  156. def handle_starttag(self, tag, attrs):
  157. if tag in self.tags_allowed:
  158. self.process_text = True
  159. self.tag_last = self.tag_curr
  160. self.tag_curr = tag
  161. def handle_endtag(self, tag):
  162. if tag in self.tags_allowed:
  163. self.data.append((tag, "%s_%d" % (tag, self.idx), self.text))
  164. self.idx += 1
  165. self.process_text = False
  166. self.text = ""
  167. self.tag_curr = self.tag_last
  168. def handle_data(self, data):
  169. if not self.process_text:
  170. return
  171. if self.tag_curr in self.tags_allowed or self.tag_curr in self.tags_ignored:
  172. self.text += data
  173. else:
  174. self.text += "<%s>%s</%s>" % (self.tag_curr, data, self.tag_curr)
  175. # instantiate the parser and fed it some HTML
  176. parser = MyHTMLParser()
  177. parser.feed(src_data)
  178. return parser.data
  179. def escape_href(label):
  180. # remove html tags
  181. label = re.sub("<[^<]+?>", "", label)
  182. # fix &nbsp;
  183. label = label.replace("&nbsp;", "")
  184. # fix "
  185. label = label.replace('"', "")
  186. # replace space with underscore + lower
  187. return label.replace(" ", "-").lower()
  188. def write_toc(data):
  189. if not data:
  190. return
  191. fd = sys.stdout
  192. fd.write('<div class="toc">\n')
  193. fd.write('<h4 class="toc">Table of contents</h4>\n')
  194. fd.write('<ul class="toc">\n')
  195. first = True
  196. has_h2 = False
  197. in_h3 = False
  198. indent = 4
  199. for tag, href, text in data:
  200. if tag == "h3" and not in_h3 and has_h2:
  201. fd.write('\n%s<ul class="toc">\n' % (" " * indent))
  202. indent += 4
  203. in_h3 = True
  204. elif not first:
  205. fd.write("</li>\n")
  206. if tag == "h2":
  207. has_h2 = True
  208. if in_h3:
  209. indent -= 4
  210. fd.write("%s</ul></li>\n" % (" " * indent))
  211. in_h3 = False
  212. text = text.replace(u"\xa0", u" ")
  213. fd.write(
  214. '%s<li class="toc"><a href="#%s" class="toc">%s</a>'
  215. % (" " * indent, escape_href(text), text)
  216. )
  217. first = False
  218. fd.write("</li>\n</ul>\n")
  219. fd.write("</div>\n")
  220. def update_toc(data):
  221. ret_data = []
  222. pat = re.compile(r"(<(h[2|3])>)(.+)(</h[2|3]>)")
  223. idx = 1
  224. for line in data.splitlines():
  225. if pat.search(line):
  226. xline = pat.split(line)
  227. line = (
  228. xline[1]
  229. + '<a name="%s">' % escape_href(xline[3])
  230. + xline[3]
  231. + "</a>"
  232. + xline[4]
  233. )
  234. idx += 1
  235. ret_data.append(line)
  236. return "\n".join(ret_data)
  237. def get_addon_path(pgm):
  238. """Check if pgm is in addons list and get addon path
  239. :param pgm str: pgm
  240. :return tuple: (True, path) if pgm is addon else (None, None)
  241. """
  242. addon_base = os.getenv("GRASS_ADDON_BASE")
  243. if addon_base:
  244. """'addons_paths.json' is file created during install extension
  245. check get_addons_paths() function in the g.extension.py file
  246. """
  247. addons_paths = os.path.join(addon_base, "addons_paths.json")
  248. if os.path.exists(addons_paths):
  249. with open(addons_paths, "r") as f:
  250. addons_paths = json.load(f)
  251. for addon in addons_paths["tree"]:
  252. split_path = addon["path"].split("/")
  253. root_dir, module_dir = split_path[0], split_path[-1]
  254. if "grass7" == root_dir and pgm == module_dir:
  255. return True, addon["path"]
  256. return None, None
  257. # process header
  258. src_data = read_file(src_file)
  259. name = re.search("(<!-- meta page name:)(.*)(-->)", src_data, re.IGNORECASE)
  260. pgm_desc = "GRASS GIS Reference Manual"
  261. if name:
  262. pgm = name.group(2).strip().split("-", 1)[0].strip()
  263. name_desc = re.search(
  264. "(<!-- meta page name description:)(.*)(-->)", src_data, re.IGNORECASE
  265. )
  266. if name_desc:
  267. pgm_desc = name_desc.group(2).strip()
  268. desc = re.search("(<!-- meta page description:)(.*)(-->)", src_data, re.IGNORECASE)
  269. if desc:
  270. pgm = desc.group(2).strip()
  271. header_tmpl = string.Template(header_base + header_nopgm)
  272. else:
  273. if not pgm_desc:
  274. header_tmpl = string.Template(header_base + header_pgm)
  275. else:
  276. header_tmpl = string.Template(header_base + header_pgm_desc)
  277. if not re.search("<html>", src_data, re.IGNORECASE):
  278. tmp_data = read_file(tmp_file)
  279. """
  280. Adjusting keywords html pages paths if add-on html man page
  281. stored on the server
  282. """
  283. if html_page_footer_pages_path:
  284. new_keywords_paths = []
  285. orig_keywords_paths = re.search(
  286. r"<h[1-9]>KEYWORDS</h[1-9]>(.*?)<h[1-9]>",
  287. tmp_data,
  288. re.DOTALL,
  289. )
  290. if orig_keywords_paths:
  291. search_txt = 'href="'
  292. for i in orig_keywords_paths.group(1).split(","):
  293. if search_txt in i:
  294. index = i.index(search_txt) + len(search_txt)
  295. new_keywords_paths.append(
  296. i[:index] + html_page_footer_pages_path + i[index:],
  297. )
  298. if new_keywords_paths:
  299. tmp_data = tmp_data.replace(
  300. orig_keywords_paths.group(1),
  301. ",".join(new_keywords_paths),
  302. )
  303. if not re.search("<html>", tmp_data, re.IGNORECASE):
  304. sys.stdout.write(header_tmpl.substitute(PGM=pgm, PGM_DESC=pgm_desc))
  305. if tmp_data:
  306. for line in tmp_data.splitlines(True):
  307. if not re.search("</body>|</html>", line, re.IGNORECASE):
  308. sys.stdout.write(line)
  309. # create TOC
  310. write_toc(create_toc(src_data))
  311. # process body
  312. sys.stdout.write(update_toc(src_data))
  313. # if </html> is found, suppose a complete html is provided.
  314. # otherwise, generate module class reference:
  315. if re.search("</html>", src_data, re.IGNORECASE):
  316. sys.exit()
  317. index_names = {
  318. "d": "display",
  319. "db": "database",
  320. "g": "general",
  321. "i": "imagery",
  322. "m": "miscellaneous",
  323. "ps": "postscript",
  324. "p": "paint",
  325. "r": "raster",
  326. "r3": "raster3d",
  327. "s": "sites",
  328. "t": "temporal",
  329. "v": "vector",
  330. }
  331. def to_title(name):
  332. """Convert name of command class/family to form suitable for title"""
  333. if name == "raster3d":
  334. return "3D raster"
  335. elif name == "postscript":
  336. return "PostScript"
  337. else:
  338. return name.capitalize()
  339. index_titles = {}
  340. for key, name in index_names.items():
  341. index_titles[key] = to_title(name)
  342. # process footer
  343. index = re.search("(<!-- meta page index:)(.*)(-->)", src_data, re.IGNORECASE)
  344. if index:
  345. index_name = index.group(2).strip()
  346. if "|" in index_name:
  347. index_name, index_name_cap = index_name.split("|", 1)
  348. else:
  349. index_name_cap = to_title(index_name)
  350. else:
  351. mod_class = pgm.split(".", 1)[0]
  352. index_name = index_names.get(mod_class, "")
  353. index_name_cap = index_titles.get(mod_class, "")
  354. grass_version = os.getenv("VERSION_NUMBER", "unknown")
  355. year = os.getenv("VERSION_DATE")
  356. if not year:
  357. year = str(datetime.now().year)
  358. # check the names of scripts to assign the right folder
  359. topdir = os.path.abspath(os.getenv("MODULE_TOPDIR"))
  360. curdir = os.path.abspath(os.path.curdir)
  361. if curdir.startswith(topdir):
  362. source_url = trunk_url
  363. pgmdir = curdir.replace(topdir, "").lstrip(os.path.sep)
  364. else:
  365. # addons
  366. source_url = addons_url
  367. pgmdir = os.path.sep.join(curdir.split(os.path.sep)[-3:])
  368. url_source = ""
  369. if os.getenv("SOURCE_URL", ""):
  370. # addons
  371. for prefix in index_names.keys():
  372. cwd = os.getcwd()
  373. idx = cwd.find("{0}{1}.".format(os.path.sep, prefix))
  374. if idx > -1:
  375. pgmname = cwd[idx + 1 :]
  376. classname = index_names[prefix]
  377. url_source = urlparse.urljoin(
  378. "{0}{1}/".format(os.environ["SOURCE_URL"], classname), pgmname
  379. )
  380. break
  381. else:
  382. url_source = urlparse.urljoin(source_url, pgmdir)
  383. if sys.platform == "win32":
  384. url_source = url_source.replace(os.path.sep, "/")
  385. if index_name:
  386. tree = "grass/tree"
  387. commits = "grass/commits"
  388. is_addon, addon_path = get_addon_path(pgm=pgm)
  389. if is_addon:
  390. # Fix gui/wxpython addon url path
  391. url_source = urlparse.urljoin(
  392. os.environ["SOURCE_URL"],
  393. addon_path.split("/", 1)[1],
  394. )
  395. tree = "grass-addons/tree"
  396. commits = "grass-addons/commits"
  397. sys.stdout.write(
  398. sourcecode.substitute(
  399. URL_SOURCE=url_source, PGM=pgm, URL_LOG=url_source.replace(tree, commits)
  400. )
  401. )
  402. sys.stdout.write(
  403. footer_index.substitute(
  404. INDEXNAME=index_name,
  405. INDEXNAMECAP=index_name_cap,
  406. YEAR=year,
  407. GRASS_VERSION=grass_version,
  408. HTML_PAGE_FOOTER_PAGES_PATH=html_page_footer_pages_path,
  409. ),
  410. )
  411. else:
  412. sys.stdout.write(
  413. footer_noindex.substitute(
  414. YEAR=year,
  415. GRASS_VERSION=grass_version,
  416. HTML_PAGE_FOOTER_PAGES_PATH=html_page_footer_pages_path,
  417. ),
  418. )