mkhtml.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. #!/usr/bin/env python
  2. ############################################################################
  3. #
  4. # MODULE: Builds manual pages
  5. # AUTHOR(S): Markus Neteler
  6. # Glynn Clements
  7. # Martin Landa <landa.martin gmail.com>
  8. # PURPOSE: Create HTML manual page snippets
  9. # COPYRIGHT: (C) 2007-2017 by Glynn Clements
  10. # and the GRASS Development Team
  11. #
  12. # This program is free software under the GNU General
  13. # Public License (>=v2). Read the file COPYING that
  14. # comes with GRASS for details.
  15. #
  16. #############################################################################
  17. import sys
  18. import os
  19. import string
  20. import re
  21. from datetime import datetime
  22. import locale
  23. try:
  24. # Python 2 import
  25. from HTMLParser import HTMLParser
  26. except:
  27. # Python 3 import
  28. from html.parser import HTMLParser
  29. try:
  30. import urlparse
  31. except:
  32. import urllib.parse as urlparse
  33. if sys.version_info[0] == 2:
  34. PY2 = True
  35. else:
  36. PY2 = False
  37. if not PY2:
  38. unicode = str
  39. def _get_encoding():
  40. encoding = locale.getdefaultlocale()[1]
  41. if not encoding:
  42. encoding = 'UTF-8'
  43. return encoding
  44. def decode(bytes_):
  45. """Decode bytes with default locale and return (unicode) string
  46. No-op if parameter is not bytes (assumed unicode string).
  47. :param bytes bytes_: the bytes to decode
  48. """
  49. if isinstance(bytes_, unicode):
  50. return bytes_
  51. if isinstance(bytes_, bytes):
  52. enc = _get_encoding()
  53. return bytes_.decode(enc)
  54. return unicode(bytes_)
  55. pgm = sys.argv[1]
  56. src_file = "%s.html" % pgm
  57. tmp_file = "%s.tmp.html" % pgm
  58. trunk_url = "https://github.com/OSGeo/grass/tree/master/"
  59. addons_url = "https://github.com/OSGeo/grass-addons/tree/master/"
  60. header_base = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
  61. <html>
  62. <head>
  63. <title>GRASS GIS Manual: ${PGM}</title>
  64. <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
  65. <link rel="stylesheet" href="grassdocs.css" type="text/css">
  66. </head>
  67. <body bgcolor="white">
  68. <div id="container">
  69. <a href="index.html"><img src="grass_logo.png" alt="GRASS logo"></a>
  70. <hr class="header">
  71. """
  72. header_nopgm = """<h2>${PGM}</h2>
  73. """
  74. header_pgm = """<h2>NAME</h2>
  75. <em><b>${PGM}</b></em>
  76. """
  77. header_pgm_desc = """<h2>NAME</h2>
  78. <em><b>${PGM}</b></em> - ${PGM_DESC}
  79. """
  80. sourcecode = string.Template(
  81. """<h2>SOURCE CODE</h2>
  82. <p>Available at: <a href="${URL_SOURCE}">${PGM} source code</a> (<a href="${URL_LOG}">history</a>)</p>
  83. """
  84. )
  85. footer_index = string.Template(
  86. """<hr class="header">
  87. <p>
  88. <a href="index.html">Main index</a> |
  89. <a href="${INDEXNAME}.html">${INDEXNAMECAP} index</a> |
  90. <a href="topics.html">Topics index</a> |
  91. <a href="keywords.html">Keywords index</a> |
  92. <a href="graphical_index.html">Graphical index</a> |
  93. <a href="full_index.html">Full index</a>
  94. </p>
  95. <p>
  96. &copy; 2003-${YEAR}
  97. <a href="http://grass.osgeo.org">GRASS Development Team</a>,
  98. GRASS GIS ${GRASS_VERSION} Reference Manual
  99. </p>
  100. </div>
  101. </body>
  102. </html>
  103. """)
  104. footer_noindex = string.Template(
  105. """<hr class="header">
  106. <p>
  107. <a href="index.html">Main index</a> |
  108. <a href="topics.html">Topics index</a> |
  109. <a href="keywords.html">Keywords index</a> |
  110. <a href="graphical_index.html">Graphical index</a> |
  111. <a href="full_index.html">Full index</a>
  112. </p>
  113. <p>
  114. &copy; 2003-${YEAR}
  115. <a href="http://grass.osgeo.org">GRASS Development Team</a>,
  116. GRASS GIS ${GRASS_VERSION} Reference Manual
  117. </p>
  118. </div>
  119. </body>
  120. </html>
  121. """)
  122. def read_file(name):
  123. try:
  124. f = open(name, 'rb')
  125. s = f.read()
  126. f.close()
  127. if PY2:
  128. return s
  129. else:
  130. return decode(s)
  131. except IOError:
  132. return ""
  133. def create_toc(src_data):
  134. class MyHTMLParser(HTMLParser):
  135. def __init__(self):
  136. HTMLParser.__init__(self)
  137. self.reset()
  138. self.idx = 1
  139. self.tag_curr = ''
  140. self.tag_last = ''
  141. self.process_text = False
  142. self.data = []
  143. self.tags_allowed = ('h1', 'h2', 'h3')
  144. self.tags_ignored = ('img')
  145. self.text = ''
  146. def handle_starttag(self, tag, attrs):
  147. if tag in self.tags_allowed:
  148. self.process_text = True
  149. self.tag_last = self.tag_curr
  150. self.tag_curr = tag
  151. def handle_endtag(self, tag):
  152. if tag in self.tags_allowed:
  153. self.data.append((tag, '%s_%d' % (tag, self.idx),
  154. self.text))
  155. self.idx += 1
  156. self.process_text = False
  157. self.text = ''
  158. self.tag_curr = self.tag_last
  159. def handle_data(self, data):
  160. if not self.process_text:
  161. return
  162. if self.tag_curr in self.tags_allowed or self.tag_curr in self.tags_ignored:
  163. self.text += data
  164. else:
  165. self.text += '<%s>%s</%s>' % (self.tag_curr, data, self.tag_curr)
  166. # instantiate the parser and fed it some HTML
  167. parser = MyHTMLParser()
  168. parser.feed(src_data)
  169. return parser.data
  170. def escape_href(label):
  171. # remove html tags
  172. label = re.sub('<[^<]+?>', '', label)
  173. # fix &nbsp;
  174. label = label.replace('&nbsp;', '')
  175. # fix "
  176. label = label.replace('"', '')
  177. # replace space with underscore + lower
  178. return label.replace(' ', '-').lower()
  179. def write_toc(data):
  180. if not data:
  181. return
  182. fd = sys.stdout
  183. fd.write('<div class="toc">\n')
  184. fd.write('<h4 class="toc">Table of contents</h4>\n')
  185. fd.write('<ul class="toc">\n')
  186. first = True
  187. has_h2 = False
  188. in_h3 = False
  189. indent = 4
  190. for tag, href, text in data:
  191. if tag == 'h3' and not in_h3 and has_h2:
  192. fd.write('\n%s<ul class="toc">\n' % (' ' * indent))
  193. indent += 4
  194. in_h3 = True
  195. elif not first:
  196. fd.write('</li>\n')
  197. if tag == 'h2':
  198. has_h2 = True
  199. if in_h3:
  200. indent -= 4
  201. fd.write('%s</ul></li>\n' % (' ' * indent))
  202. in_h3 = False
  203. text = text.replace(u'\xa0', u' ')
  204. fd.write('%s<li class="toc"><a href="#%s" class="toc">%s</a>' % \
  205. (' ' * indent, escape_href(text), text))
  206. first = False
  207. fd.write('</li>\n</ul>\n')
  208. fd.write('</div>\n')
  209. def update_toc(data):
  210. ret_data = []
  211. pat = re.compile(r'(<(h[2|3])>)(.+)(</h[2|3]>)')
  212. idx = 1
  213. for line in data.splitlines():
  214. if pat.search(line):
  215. xline = pat.split(line)
  216. line = xline[1] + '<a name="%s">' % escape_href(xline[3]) + xline[3] + '</a>' + xline[4]
  217. idx += 1
  218. ret_data.append(line)
  219. return '\n'.join(ret_data)
  220. # process header
  221. src_data = read_file(src_file)
  222. name = re.search('(<!-- meta page name:)(.*)(-->)', src_data, re.IGNORECASE)
  223. pgm_desc = None
  224. if name:
  225. pgm = name.group(2).strip().split('-', 1)[0].strip()
  226. name_desc = re.search('(<!-- meta page name description:)(.*)(-->)', src_data, re.IGNORECASE)
  227. if name_desc:
  228. pgm_desc = name_desc.group(2).strip()
  229. desc = re.search('(<!-- meta page description:)(.*)(-->)', src_data,
  230. re.IGNORECASE)
  231. if desc:
  232. pgm = desc.group(2).strip()
  233. header_tmpl = string.Template(header_base + header_nopgm)
  234. else:
  235. if not pgm_desc:
  236. header_tmpl = string.Template(header_base + header_pgm)
  237. else:
  238. header_tmpl = string.Template(header_base + header_pgm_desc)
  239. if not re.search('<html>', src_data, re.IGNORECASE):
  240. tmp_data = read_file(tmp_file)
  241. if not re.search('<html>', tmp_data, re.IGNORECASE):
  242. sys.stdout.write(header_tmpl.substitute(PGM=pgm, PGM_DESC=pgm_desc))
  243. if tmp_data:
  244. for line in tmp_data.splitlines(True):
  245. if not re.search('</body>|</html>', line, re.IGNORECASE):
  246. sys.stdout.write(line)
  247. # create TOC
  248. write_toc(create_toc(src_data))
  249. # process body
  250. sys.stdout.write(update_toc(src_data))
  251. # if </html> is found, suppose a complete html is provided.
  252. # otherwise, generate module class reference:
  253. if re.search('</html>', src_data, re.IGNORECASE):
  254. sys.exit()
  255. index_names = {
  256. 'd' : 'display',
  257. 'db': 'database',
  258. 'g' : 'general',
  259. 'i' : 'imagery',
  260. 'm' : 'miscellaneous',
  261. 'ps': 'postscript',
  262. 'p' : 'paint',
  263. 'r' : 'raster',
  264. 'r3': 'raster3d',
  265. 's' : 'sites',
  266. 't' : 'temporal',
  267. 'v' : 'vector'
  268. }
  269. def to_title(name):
  270. """Convert name of command class/family to form suitable for title"""
  271. if name == 'raster3d':
  272. return '3D raster'
  273. elif name == 'postscript':
  274. return 'PostScript'
  275. else:
  276. return name.capitalize()
  277. index_titles = {}
  278. for key, name in index_names.items():
  279. index_titles[key] = to_title(name)
  280. # process footer
  281. index = re.search('(<!-- meta page index:)(.*)(-->)', src_data, re.IGNORECASE)
  282. if index:
  283. index_name = index.group(2).strip()
  284. if '|' in index_name:
  285. index_name, index_name_cap = index_name.split('|', 1)
  286. else:
  287. index_name_cap = to_title(index_name)
  288. else:
  289. mod_class = pgm.split('.', 1)[0]
  290. index_name = index_names.get(mod_class, '')
  291. index_name_cap = index_titles.get(mod_class, '')
  292. grass_version = os.getenv("VERSION_NUMBER", "unknown")
  293. year = os.getenv("VERSION_DATE")
  294. if not year:
  295. year = str(datetime.now().year)
  296. # check the names of scripts to assign the right folder
  297. topdir = os.path.abspath(os.getenv("MODULE_TOPDIR"))
  298. curdir = os.path.abspath(os.path.curdir)
  299. if curdir.startswith(topdir):
  300. source_url = trunk_url
  301. pgmdir = curdir.replace(topdir, '').lstrip(os.path.sep)
  302. else:
  303. # addons
  304. source_url = addons_url
  305. pgmdir = os.path.sep.join(curdir.split(os.path.sep)[-3:])
  306. url_source = ''
  307. if os.getenv('SOURCE_URL', ''):
  308. # addons
  309. for prefix in index_names.keys():
  310. cwd = os.getcwd()
  311. idx = cwd.find('{0}{1}.'.format(os.path.sep, prefix))
  312. if idx > -1:
  313. pgmname = cwd[idx+1:]
  314. classname = index_names[prefix]
  315. url_source = urlparse.urljoin('{0}{1}/'.format(
  316. os.environ['SOURCE_URL'], classname),
  317. pgmname
  318. )
  319. break
  320. else:
  321. url_source = urlparse.urljoin(source_url, pgmdir)
  322. if sys.platform == 'win32':
  323. url_source = url_source.replace(os.path.sep, '/')
  324. if index_name:
  325. sys.stdout.write(sourcecode.substitute(URL_SOURCE=url_source, PGM=pgm,
  326. URL_LOG=url_source.replace('grass/tree', 'grass/commits')))
  327. sys.stdout.write(footer_index.substitute(INDEXNAME=index_name,
  328. INDEXNAMECAP=index_name_cap,
  329. YEAR=year, GRASS_VERSION=grass_version))
  330. else:
  331. sys.stdout.write(footer_noindex.substitute(YEAR=year,
  332. GRASS_VERSION=grass_version))