mkhtml.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. #!/usr/bin/env python
  2. ############################################################################
  3. #
  4. # MODULE: Builds manual pages
  5. # AUTHOR(S): Markus Neteler
  6. # Glynn Clements
  7. # Martin Landa <landa.martin gmail.com>
  8. # PURPOSE: Create HTML manual page snippets
  9. # COPYRIGHT: (C) 2007-2014 by Glynn Clements
  10. # and the GRASS Development Team
  11. #
  12. # This program is free software under the GNU General
  13. # Public License (>=v2). Read the file COPYING that
  14. # comes with GRASS for details.
  15. #
  16. #############################################################################
  17. import sys
  18. import os
  19. import string
  20. import re
  21. from datetime import datetime
  22. from HTMLParser import HTMLParser
  23. import urlparse
  24. pgm = sys.argv[1]
  25. src_file = "%s.html" % pgm
  26. tmp_file = "%s.tmp.html" % pgm
  27. source_url = "https://trac.osgeo.org/grass/browser/grass/trunk/"
  28. header_base = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
  29. <html>
  30. <head>
  31. <title>GRASS GIS Manual: ${PGM}</title>
  32. <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
  33. <link rel="stylesheet" href="grassdocs.css" type="text/css">
  34. </head>
  35. <body bgcolor="white">
  36. <div id="container">
  37. <a href="index.html"><img src="grass_logo.png" alt="GRASS logo"></a>
  38. <hr class="header">
  39. """
  40. header_nopgm = """<h2>${PGM}</h2>
  41. """
  42. header_pgm = """<h2>NAME</h2>
  43. <em><b>${PGM}</b></em>
  44. """
  45. header_pgm_desc = """<h2>NAME</h2>
  46. <em><b>${PGM}</b></em> - ${PGM_DESC}
  47. """
  48. sourcecode = string.Template(
  49. """<h2>SOURCE CODE</h2>
  50. <p>Available at: <a href="${URL_SOURCE}">${PGM} source code</a> (<a href="${URL_LOG}">history</a>)</p>
  51. """
  52. )
  53. footer_index = string.Template(
  54. """<hr class="header">
  55. <p>
  56. <a href="index.html">Main index</a> |
  57. <a href="${INDEXNAME}.html">${INDEXNAMECAP} index</a> |
  58. <a href="topics.html">Topics index</a> |
  59. <a href="keywords.html">Keywords index</a> |
  60. <a href="graphical_index.html">Graphical index</a> |
  61. <a href="full_index.html">Full index</a>
  62. </p>
  63. <p>
  64. &copy; 2003-${YEAR}
  65. <a href="http://grass.osgeo.org">GRASS Development Team</a>,
  66. GRASS GIS ${GRASS_VERSION} Reference Manual
  67. </p>
  68. </div>
  69. </body>
  70. </html>
  71. """)
  72. footer_noindex = string.Template(
  73. """<hr class="header">
  74. <p>
  75. <a href="index.html">Main index</a> |
  76. <a href="topics.html">Topics index</a> |
  77. <a href="keywords.html">Keywords index</a> |
  78. <a href="graphical_index.html">Graphical index</a> |
  79. <a href="full_index.html">Full index</a>
  80. </p>
  81. <p>
  82. &copy; 2003-${YEAR}
  83. <a href="http://grass.osgeo.org">GRASS Development Team</a>,
  84. GRASS GIS ${GRASS_VERSION} Reference Manual
  85. </p>
  86. </div>
  87. </body>
  88. </html>
  89. """)
  90. def read_file(name):
  91. try:
  92. f = open(name, 'rb')
  93. s = f.read()
  94. f.close()
  95. return s
  96. except IOError:
  97. return ""
  98. def create_toc(src_data):
  99. class MyHTMLParser(HTMLParser):
  100. def __init__(self):
  101. self.reset()
  102. self.idx = 1
  103. self.tag_curr = ''
  104. self.tag_last = ''
  105. self.process_text = False
  106. self.data = []
  107. self.tags_allowed = ('h1', 'h2', 'h3')
  108. self.tags_ignored = ('img')
  109. self.text = ''
  110. def handle_starttag(self, tag, attrs):
  111. if tag in self.tags_allowed:
  112. self.process_text = True
  113. self.tag_last = self.tag_curr
  114. self.tag_curr = tag
  115. def handle_endtag(self, tag):
  116. if tag in self.tags_allowed:
  117. self.data.append((tag, '%s_%d' % (tag, self.idx),
  118. self.text))
  119. self.idx += 1
  120. self.process_text = False
  121. self.text = ''
  122. self.tag_curr = self.tag_last
  123. def handle_data(self, data):
  124. if not self.process_text:
  125. return
  126. if self.tag_curr in self.tags_allowed or self.tag_curr in self.tags_ignored:
  127. self.text += data
  128. else:
  129. self.text += '<%s>%s</%s>' % (self.tag_curr, data, self.tag_curr)
  130. # instantiate the parser and fed it some HTML
  131. parser = MyHTMLParser()
  132. parser.feed(src_data)
  133. return parser.data
  134. def escape_href(label):
  135. # remove html tags
  136. label = re.sub('<[^<]+?>', '', label)
  137. # fix &nbsp;
  138. label = label.replace('&nbsp;', '')
  139. # fix "
  140. label = label.replace('"', '')
  141. # replace space with underscore + lower
  142. return label.replace(' ', '-').lower()
  143. def write_toc(data):
  144. if not data:
  145. return
  146. fd = sys.stdout
  147. fd.write('<div class="toc">\n')
  148. fd.write('<h4 class="toc">Table of contents</h4>\n')
  149. fd.write('<ul class="toc">\n')
  150. first = True
  151. has_h2 = False
  152. in_h3 = False
  153. indent = 4
  154. for tag, href, text in data:
  155. if tag == 'h3' and not in_h3 and has_h2:
  156. fd.write('\n%s<ul class="toc">\n' % (' ' * indent))
  157. indent += 4
  158. in_h3 = True
  159. elif not first:
  160. fd.write('</li>\n')
  161. if tag == 'h2':
  162. has_h2 = True
  163. if in_h3:
  164. indent -= 4
  165. fd.write('%s</ul></li>\n' % (' ' * indent))
  166. in_h3 = False
  167. fd.write('%s<li class="toc"><a href="#%s" class="toc">%s</a>' % \
  168. (' ' * indent, escape_href(text), text))
  169. first = False
  170. fd.write('</li>\n</ul>\n')
  171. fd.write('</div>\n')
  172. def update_toc(data):
  173. ret_data = []
  174. pat = re.compile(r'(<(h[2|3])>)(.+)(</h[2|3]>)')
  175. idx = 1
  176. for line in data.splitlines():
  177. if pat.search(line):
  178. xline = pat.split(line)
  179. line = xline[1] + '<a name="%s">' % escape_href(xline[3]) + xline[3] + '</a>' + xline[4]
  180. idx += 1
  181. ret_data.append(line)
  182. return '\n'.join(ret_data)
  183. # process header
  184. src_data = read_file(src_file)
  185. name = re.search('(<!-- meta page name:)(.*)(-->)', src_data, re.IGNORECASE)
  186. pgm_desc = None
  187. if name:
  188. pgm = name.group(2).strip().split('-', 1)[0].strip()
  189. name_desc = re.search('(<!-- meta page name description:)(.*)(-->)', src_data, re.IGNORECASE)
  190. if name_desc:
  191. pgm_desc = name_desc.group(2).strip()
  192. desc = re.search('(<!-- meta page description:)(.*)(-->)', src_data,
  193. re.IGNORECASE)
  194. if desc:
  195. pgm = desc.group(2).strip()
  196. header_tmpl = string.Template(header_base + header_nopgm)
  197. else:
  198. if not pgm_desc:
  199. header_tmpl = string.Template(header_base + header_pgm)
  200. else:
  201. header_tmpl = string.Template(header_base + header_pgm_desc)
  202. if not re.search('<html>', src_data, re.IGNORECASE):
  203. tmp_data = read_file(tmp_file)
  204. if not re.search('<html>', tmp_data, re.IGNORECASE):
  205. sys.stdout.write(header_tmpl.substitute(PGM=pgm, PGM_DESC=pgm_desc))
  206. if tmp_data:
  207. for line in tmp_data.splitlines(True):
  208. if not re.search('</body>|</html>', line, re.IGNORECASE):
  209. sys.stdout.write(line)
  210. # create TOC
  211. write_toc(create_toc(src_data))
  212. # process body
  213. sys.stdout.write(update_toc(src_data))
  214. # if </html> is found, suppose a complete html is provided.
  215. # otherwise, generate module class reference:
  216. if re.search('</html>', src_data, re.IGNORECASE):
  217. sys.exit()
  218. index_names = {
  219. 'd' : 'display',
  220. 'db': 'database',
  221. 'g' : 'general',
  222. 'i' : 'imagery',
  223. 'm' : 'misc',
  224. 'ps': 'postscript',
  225. 'p' : 'paint',
  226. 'r' : 'raster',
  227. 'r3': 'raster3d',
  228. 's' : 'sites',
  229. 't' : 'temporal',
  230. 'v' : 'vector'
  231. }
  232. # TODO: special code for ps/postscript/PostScirpt and m/misc/Miscellaneous
  233. def to_title(name):
  234. """Convert name of command class/family to form suitable for title"""
  235. if name == 'raster3d':
  236. return '3D raster'
  237. else:
  238. return name.capitalize()
  239. index_titles = {}
  240. for key, name in index_names.iteritems():
  241. index_titles[key] = to_title(name)
  242. # process footer
  243. index = re.search('(<!-- meta page index:)(.*)(-->)', src_data, re.IGNORECASE)
  244. if index:
  245. index_name = index.group(2).strip()
  246. if '|' in index_name:
  247. index_name, index_name_cap = index_name.split('|', 1)
  248. else:
  249. index_name_cap = to_title(index_name)
  250. else:
  251. mod_class = pgm.split('.', 1)[0]
  252. index_name = index_names.get(mod_class, '')
  253. index_name_cap = index_titles.get(mod_class, '')
  254. grass_version = os.getenv("VERSION_NUMBER", "unknown")
  255. year = os.getenv("VERSION_DATE")
  256. if not year:
  257. year = str(datetime.now().year)
  258. # check the names of scripts to assign the right folder
  259. topdir = os.path.abspath(os.getenv("MODULE_TOPDIR"))
  260. curdir = os.path.abspath(os.path.curdir)
  261. pgmdir = curdir.replace(topdir, '').lstrip('/')
  262. url_source = urlparse.urljoin(source_url, pgmdir)
  263. if index_name:
  264. sys.stdout.write(sourcecode.substitute(URL_SOURCE=url_source, PGM=pgm,
  265. URL_LOG=url_source.replace('browser', 'log')))
  266. sys.stdout.write(footer_index.substitute(INDEXNAME=index_name,
  267. INDEXNAMECAP=index_name_cap,
  268. YEAR=year, GRASS_VERSION=grass_version))
  269. else:
  270. sys.stdout.write(footer_noindex.substitute(YEAR=year,
  271. GRASS_VERSION=grass_version))