mkhtml.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462
  1. #!/usr/bin/env python3
  2. ############################################################################
  3. #
  4. # MODULE: Builds manual pages
  5. # AUTHOR(S): Markus Neteler
  6. # Glynn Clements
  7. # Martin Landa <landa.martin gmail.com>
  8. # PURPOSE: Create HTML manual page snippets
  9. # COPYRIGHT: (C) 2007-2017 by Glynn Clements
  10. # and the GRASS Development Team
  11. #
  12. # This program is free software under the GNU General
  13. # Public License (>=v2). Read the file COPYING that
  14. # comes with GRASS for details.
  15. #
  16. #############################################################################
  17. import sys
  18. import os
  19. import string
  20. import re
  21. from datetime import datetime
  22. import locale
  23. import json
  24. try:
  25. # Python 2 import
  26. from HTMLParser import HTMLParser
  27. except:
  28. # Python 3 import
  29. from html.parser import HTMLParser
  30. try:
  31. import urlparse
  32. except:
  33. import urllib.parse as urlparse
  34. if sys.version_info[0] == 2:
  35. PY2 = True
  36. else:
  37. PY2 = False
  38. if not PY2:
  39. unicode = str
  40. def _get_encoding():
  41. encoding = locale.getdefaultlocale()[1]
  42. if not encoding:
  43. encoding = 'UTF-8'
  44. return encoding
  45. def decode(bytes_):
  46. """Decode bytes with default locale and return (unicode) string
  47. No-op if parameter is not bytes (assumed unicode string).
  48. :param bytes bytes_: the bytes to decode
  49. """
  50. if isinstance(bytes_, unicode):
  51. return bytes_
  52. if isinstance(bytes_, bytes):
  53. enc = _get_encoding()
  54. return bytes_.decode(enc)
  55. return unicode(bytes_)
  56. html_page_footer_pages_path = os.getenv('HTML_PAGE_FOOTER_PAGES_PATH') if \
  57. os.getenv('HTML_PAGE_FOOTER_PAGES_PATH') else ''
  58. pgm = sys.argv[1]
  59. src_file = "%s.html" % pgm
  60. tmp_file = "%s.tmp.html" % pgm
  61. trunk_url = "https://github.com/OSGeo/grass/tree/master/"
  62. addons_url = "https://github.com/OSGeo/grass-addons/tree/master/"
  63. header_base = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
  64. <html>
  65. <head>
  66. <title>GRASS GIS Manual: ${PGM}</title>
  67. <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
  68. <link rel="stylesheet" href="grassdocs.css" type="text/css">
  69. </head>
  70. <body bgcolor="white">
  71. <div id="container">
  72. <a href="index.html"><img src="grass_logo.png" alt="GRASS logo"></a>
  73. <hr class="header">
  74. """
  75. header_nopgm = """<h2>${PGM}</h2>
  76. """
  77. header_pgm = """<h2>NAME</h2>
  78. <em><b>${PGM}</b></em>
  79. """
  80. header_pgm_desc = """<h2>NAME</h2>
  81. <em><b>${PGM}</b></em> - ${PGM_DESC}
  82. """
  83. sourcecode = string.Template(
  84. """<h2>SOURCE CODE</h2>
  85. <p>Available at: <a href="${URL_SOURCE}">${PGM} source code</a> (<a href="${URL_LOG}">history</a>)</p>
  86. """
  87. )
  88. footer_index = string.Template(
  89. """<hr class="header">
  90. <p>
  91. <a href="index.html">Main index</a> |
  92. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}${INDEXNAME}.html">${INDEXNAMECAP} index</a> |
  93. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}topics.html">Topics index</a> |
  94. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}keywords.html">Keywords index</a> |
  95. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}graphical_index.html">Graphical index</a> |
  96. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}full_index.html">Full index</a>
  97. </p>
  98. <p>
  99. &copy; 2003-${YEAR}
  100. <a href="http://grass.osgeo.org">GRASS Development Team</a>,
  101. GRASS GIS ${GRASS_VERSION} Reference Manual
  102. </p>
  103. </div>
  104. </body>
  105. </html>
  106. """)
  107. footer_noindex = string.Template(
  108. """<hr class="header">
  109. <p>
  110. <a href="index.html">Main index</a> |
  111. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}topics.html">Topics index</a> |
  112. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}keywords.html">Keywords index</a> |
  113. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}graphical_index.html">Graphical index</a> |
  114. <a href="${HTML_PAGE_FOOTER_PAGES_PATH}full_index.html">Full index</a>
  115. </p>
  116. <p>
  117. &copy; 2003-${YEAR}
  118. <a href="http://grass.osgeo.org">GRASS Development Team</a>,
  119. GRASS GIS ${GRASS_VERSION} Reference Manual
  120. </p>
  121. </div>
  122. </body>
  123. </html>
  124. """)
  125. def read_file(name):
  126. try:
  127. f = open(name, 'rb')
  128. s = f.read()
  129. f.close()
  130. if PY2:
  131. return s
  132. else:
  133. return decode(s)
  134. except IOError:
  135. return ""
  136. def create_toc(src_data):
  137. class MyHTMLParser(HTMLParser):
  138. def __init__(self):
  139. HTMLParser.__init__(self)
  140. self.reset()
  141. self.idx = 1
  142. self.tag_curr = ''
  143. self.tag_last = ''
  144. self.process_text = False
  145. self.data = []
  146. self.tags_allowed = ('h1', 'h2', 'h3')
  147. self.tags_ignored = ('img')
  148. self.text = ''
  149. def handle_starttag(self, tag, attrs):
  150. if tag in self.tags_allowed:
  151. self.process_text = True
  152. self.tag_last = self.tag_curr
  153. self.tag_curr = tag
  154. def handle_endtag(self, tag):
  155. if tag in self.tags_allowed:
  156. self.data.append((tag, '%s_%d' % (tag, self.idx),
  157. self.text))
  158. self.idx += 1
  159. self.process_text = False
  160. self.text = ''
  161. self.tag_curr = self.tag_last
  162. def handle_data(self, data):
  163. if not self.process_text:
  164. return
  165. if self.tag_curr in self.tags_allowed or self.tag_curr in self.tags_ignored:
  166. self.text += data
  167. else:
  168. self.text += '<%s>%s</%s>' % (self.tag_curr, data, self.tag_curr)
  169. # instantiate the parser and fed it some HTML
  170. parser = MyHTMLParser()
  171. parser.feed(src_data)
  172. return parser.data
  173. def escape_href(label):
  174. # remove html tags
  175. label = re.sub('<[^<]+?>', '', label)
  176. # fix &nbsp;
  177. label = label.replace('&nbsp;', '')
  178. # fix "
  179. label = label.replace('"', '')
  180. # replace space with underscore + lower
  181. return label.replace(' ', '-').lower()
  182. def write_toc(data):
  183. if not data:
  184. return
  185. fd = sys.stdout
  186. fd.write('<div class="toc">\n')
  187. fd.write('<h4 class="toc">Table of contents</h4>\n')
  188. fd.write('<ul class="toc">\n')
  189. first = True
  190. has_h2 = False
  191. in_h3 = False
  192. indent = 4
  193. for tag, href, text in data:
  194. if tag == 'h3' and not in_h3 and has_h2:
  195. fd.write('\n%s<ul class="toc">\n' % (' ' * indent))
  196. indent += 4
  197. in_h3 = True
  198. elif not first:
  199. fd.write('</li>\n')
  200. if tag == 'h2':
  201. has_h2 = True
  202. if in_h3:
  203. indent -= 4
  204. fd.write('%s</ul></li>\n' % (' ' * indent))
  205. in_h3 = False
  206. text = text.replace(u'\xa0', u' ')
  207. fd.write('%s<li class="toc"><a href="#%s" class="toc">%s</a>' % \
  208. (' ' * indent, escape_href(text), text))
  209. first = False
  210. fd.write('</li>\n</ul>\n')
  211. fd.write('</div>\n')
  212. def update_toc(data):
  213. ret_data = []
  214. pat = re.compile(r'(<(h[2|3])>)(.+)(</h[2|3]>)')
  215. idx = 1
  216. for line in data.splitlines():
  217. if pat.search(line):
  218. xline = pat.split(line)
  219. line = xline[1] + '<a name="%s">' % escape_href(xline[3]) + xline[3] + '</a>' + xline[4]
  220. idx += 1
  221. ret_data.append(line)
  222. return '\n'.join(ret_data)
  223. def get_addon_path(pgm):
  224. """Check if pgm is in addons list and get addon path
  225. :param pgm str: pgm
  226. :return tuple: (True, path) if pgm is addon else (None, None)
  227. """
  228. addon_base = os.getenv('GRASS_ADDON_BASE')
  229. if addon_base:
  230. """'addons_paths.json' is file created during install extension
  231. check get_addons_paths() function in the g.extension.py file
  232. """
  233. addons_paths = os.path.join(addon_base, 'addons_paths.json')
  234. if os.path.exists(addons_paths):
  235. with open(addons_paths, 'r') as f:
  236. addons_paths = json.load(f)
  237. for addon in addons_paths['tree']:
  238. split_path = addon['path'].split('/')
  239. root_dir, module_dir = split_path[0], split_path[-1]
  240. if 'grass7' == root_dir and pgm == module_dir:
  241. return True, addon['path']
  242. return None, None
  243. # process header
  244. src_data = read_file(src_file)
  245. name = re.search('(<!-- meta page name:)(.*)(-->)', src_data, re.IGNORECASE)
  246. pgm_desc = None
  247. if name:
  248. pgm = name.group(2).strip().split('-', 1)[0].strip()
  249. name_desc = re.search('(<!-- meta page name description:)(.*)(-->)', src_data, re.IGNORECASE)
  250. if name_desc:
  251. pgm_desc = name_desc.group(2).strip()
  252. desc = re.search('(<!-- meta page description:)(.*)(-->)', src_data,
  253. re.IGNORECASE)
  254. if desc:
  255. pgm = desc.group(2).strip()
  256. header_tmpl = string.Template(header_base + header_nopgm)
  257. else:
  258. if not pgm_desc:
  259. header_tmpl = string.Template(header_base + header_pgm)
  260. else:
  261. header_tmpl = string.Template(header_base + header_pgm_desc)
  262. if not re.search('<html>', src_data, re.IGNORECASE):
  263. tmp_data = read_file(tmp_file)
  264. """
  265. Adjusting keywords html pages paths if add-on html man page
  266. stored on the server
  267. """
  268. if html_page_footer_pages_path:
  269. new_keywords_paths = []
  270. orig_keywords_paths = re.search(
  271. r'<h[1-9]>KEYWORDS</h[1-9]>(.*?)<h[1-9]>',
  272. tmp_data, re.DOTALL,
  273. ).group(1)
  274. for i in orig_keywords_paths.split(','):
  275. index = i.index('href="') + len('href="')
  276. new_keywords_paths.append(
  277. i[:index] + html_page_footer_pages_path + i[index:],
  278. )
  279. if new_keywords_paths:
  280. tmp_data = tmp_data.replace(
  281. orig_keywords_paths,
  282. ','.join(new_keywords_paths) if len(new_keywords_paths) > 1
  283. else new_keywords_paths[0],
  284. )
  285. if not re.search('<html>', tmp_data, re.IGNORECASE):
  286. sys.stdout.write(header_tmpl.substitute(PGM=pgm, PGM_DESC=pgm_desc))
  287. if tmp_data:
  288. for line in tmp_data.splitlines(True):
  289. if not re.search('</body>|</html>', line, re.IGNORECASE):
  290. sys.stdout.write(line)
  291. # create TOC
  292. write_toc(create_toc(src_data))
  293. # process body
  294. sys.stdout.write(update_toc(src_data))
  295. # if </html> is found, suppose a complete html is provided.
  296. # otherwise, generate module class reference:
  297. if re.search('</html>', src_data, re.IGNORECASE):
  298. sys.exit()
  299. index_names = {
  300. 'd' : 'display',
  301. 'db': 'database',
  302. 'g' : 'general',
  303. 'i' : 'imagery',
  304. 'm' : 'miscellaneous',
  305. 'ps': 'postscript',
  306. 'p' : 'paint',
  307. 'r' : 'raster',
  308. 'r3': 'raster3d',
  309. 's' : 'sites',
  310. 't' : 'temporal',
  311. 'v' : 'vector'
  312. }
  313. def to_title(name):
  314. """Convert name of command class/family to form suitable for title"""
  315. if name == 'raster3d':
  316. return '3D raster'
  317. elif name == 'postscript':
  318. return 'PostScript'
  319. else:
  320. return name.capitalize()
  321. index_titles = {}
  322. for key, name in index_names.items():
  323. index_titles[key] = to_title(name)
  324. # process footer
  325. index = re.search('(<!-- meta page index:)(.*)(-->)', src_data, re.IGNORECASE)
  326. if index:
  327. index_name = index.group(2).strip()
  328. if '|' in index_name:
  329. index_name, index_name_cap = index_name.split('|', 1)
  330. else:
  331. index_name_cap = to_title(index_name)
  332. else:
  333. mod_class = pgm.split('.', 1)[0]
  334. index_name = index_names.get(mod_class, '')
  335. index_name_cap = index_titles.get(mod_class, '')
  336. grass_version = os.getenv("VERSION_NUMBER", "unknown")
  337. year = os.getenv("VERSION_DATE")
  338. if not year:
  339. year = str(datetime.now().year)
  340. # check the names of scripts to assign the right folder
  341. topdir = os.path.abspath(os.getenv("MODULE_TOPDIR"))
  342. curdir = os.path.abspath(os.path.curdir)
  343. if curdir.startswith(topdir):
  344. source_url = trunk_url
  345. pgmdir = curdir.replace(topdir, '').lstrip(os.path.sep)
  346. else:
  347. # addons
  348. source_url = addons_url
  349. pgmdir = os.path.sep.join(curdir.split(os.path.sep)[-3:])
  350. url_source = ''
  351. if os.getenv('SOURCE_URL', ''):
  352. # addons
  353. for prefix in index_names.keys():
  354. cwd = os.getcwd()
  355. idx = cwd.find('{0}{1}.'.format(os.path.sep, prefix))
  356. if idx > -1:
  357. pgmname = cwd[idx+1:]
  358. classname = index_names[prefix]
  359. url_source = urlparse.urljoin('{0}{1}/'.format(
  360. os.environ['SOURCE_URL'], classname),
  361. pgmname
  362. )
  363. break
  364. else:
  365. url_source = urlparse.urljoin(source_url, pgmdir)
  366. if sys.platform == 'win32':
  367. url_source = url_source.replace(os.path.sep, '/')
  368. if index_name:
  369. tree = 'grass/tree'
  370. commits = 'grass/commits'
  371. is_addon, addon_path = get_addon_path(pgm=pgm)
  372. if is_addon:
  373. # Fix gui/wxpython addon url path
  374. url_source = urlparse.urljoin(
  375. os.environ['SOURCE_URL'], addon_path.split('/', 1)[1],
  376. )
  377. tree = 'grass-addons/tree'
  378. commits = 'grass-addons/commits'
  379. sys.stdout.write(sourcecode.substitute(
  380. URL_SOURCE=url_source, PGM=pgm, URL_LOG=url_source.replace(
  381. tree, commits)))
  382. sys.stdout.write(
  383. footer_index.substitute(
  384. INDEXNAME=index_name,
  385. INDEXNAMECAP=index_name_cap,
  386. YEAR=year,
  387. GRASS_VERSION=grass_version,
  388. HTML_PAGE_FOOTER_PAGES_PATH=html_page_footer_pages_path,
  389. ),
  390. )
  391. else:
  392. sys.stdout.write(
  393. footer_noindex.substitute(
  394. YEAR=year,
  395. GRASS_VERSION=grass_version,
  396. HTML_PAGE_FOOTER_PAGES_PATH=html_page_footer_pages_path,
  397. ),
  398. )