mkhtml.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. #!/usr/bin/env python
  2. ############################################################################
  3. #
  4. # MODULE: Builds manual pages
  5. # AUTHOR(S): Markus Neteler
  6. # Glynn Clements
  7. # Martin Landa <landa.martin gmail.com>
  8. # PURPOSE: Create HTML manual page snippets
  9. # COPYRIGHT: (C) 2007-2014 by Glynn Clements
  10. # and the GRASS Development Team
  11. #
  12. # This program is free software under the GNU General
  13. # Public License (>=v2). Read the file COPYING that
  14. # comes with GRASS for details.
  15. #
  16. #############################################################################
  17. import sys
  18. import os
  19. import string
  20. import re
  21. from datetime import datetime
  22. from HTMLParser import HTMLParser
  23. pgm = sys.argv[1]
  24. src_file = "%s.html" % pgm
  25. tmp_file = "%s.tmp.html" % pgm
  26. header_base = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
  27. <html>
  28. <head>
  29. <title>GRASS GIS Manual: ${PGM}</title>
  30. <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
  31. <link rel="stylesheet" href="grassdocs.css" type="text/css">
  32. </head>
  33. <body bgcolor="white">
  34. <img src="grass_logo.png" alt="GRASS logo">
  35. <hr class="header">
  36. """
  37. header_nopgm = """<h2>${PGM}</h2>
  38. """
  39. header_pgm = """<h2>NAME</h2>
  40. <em><b>${PGM}</b></em>
  41. """
  42. footer_index = string.Template(\
  43. """<hr class="header">
  44. <p><a href="index.html">Main index</a> | <a href="${INDEXNAME}.html">${INDEXNAMECAP} index</a> | <a href="topics.html">Topics index</a> | <a href="keywords.html">Keywords Index</a> | <a href="full_index.html">Full index</a></p>
  45. <p>&copy; 2003-${YEAR} <a href="http://grass.osgeo.org">GRASS Development Team</a>, GRASS GIS ${GRASS_VERSION} Reference Manual</p>
  46. </body>
  47. </html>
  48. """)
  49. footer_noindex = string.Template(\
  50. """<hr class="header">
  51. <p><a href="index.html">Main index</a> | <a href="topics.html">Topics index</a> | <a href="keywords.html">Keywords Index</a> | <a href="full_index.html">Full index</a></p>
  52. <p>&copy; 2003-${YEAR} <a href="http://grass.osgeo.org">GRASS Development Team</a>, GRASS GIS ${GRASS_VERSION} Reference Manual</p>
  53. </body>
  54. </html>
  55. """)
  56. def read_file(name):
  57. try:
  58. f = open(name, 'rb')
  59. s = f.read()
  60. f.close()
  61. return s
  62. except IOError:
  63. return ""
  64. def create_toc(src_data):
  65. class MyHTMLParser(HTMLParser):
  66. def __init__(self):
  67. self.reset()
  68. self.idx = 1
  69. self.tag_curr = ''
  70. self.tag_last = ''
  71. self.process_text = False
  72. self.data = []
  73. self.tags_allowed = ('h1', 'h2', 'h3')
  74. self.tags_ignored = ('img')
  75. self.text = ''
  76. def handle_starttag(self, tag, attrs):
  77. if tag in self.tags_allowed:
  78. self.process_text = True
  79. self.tag_last = self.tag_curr
  80. self.tag_curr = tag
  81. def handle_endtag(self, tag):
  82. if tag in self.tags_allowed:
  83. self.data.append((tag, '%s_%d' % (tag, self.idx),
  84. self.text))
  85. self.idx += 1
  86. self.process_text = False
  87. self.text = ''
  88. self.tag_curr = self.tag_last
  89. def handle_data(self, data):
  90. if not self.process_text:
  91. return
  92. if self.tag_curr in self.tags_allowed or self.tag_curr in self.tags_ignored:
  93. self.text += data
  94. else:
  95. self.text += '<%s>%s</%s>' % (self.tag_curr, data, self.tag_curr)
  96. # instantiate the parser and fed it some HTML
  97. parser = MyHTMLParser()
  98. parser.feed(src_data)
  99. return parser.data
  100. def escape_href(label):
  101. # remove html tags
  102. label = re.sub('<[^<]+?>', '', label)
  103. # replace space with underscore + lower
  104. return label.replace(' ', '_').lower()
  105. def write_toc(data):
  106. if not data:
  107. return
  108. fd = sys.stdout
  109. fd.write('<div class="toc">\n')
  110. fd.write('<ul class="toc">\n')
  111. first = True
  112. has_h2 = False
  113. in_h3 = False
  114. indent = 4
  115. for tag, href, text in data:
  116. if tag == 'h3' and not in_h3 and has_h2:
  117. fd.write('\n%s<ul class="toc">\n' % (' ' * indent))
  118. indent += 4
  119. in_h3 = True
  120. elif not first:
  121. fd.write('</li>\n')
  122. if tag == 'h2':
  123. has_h2 = True
  124. if in_h3:
  125. indent -= 4
  126. fd.write('%s</ul></li>\n' % (' ' * indent))
  127. in_h3 = False
  128. fd.write('%s<li class="toc"><a href="#%s" class="toc">%s</a>' % \
  129. (' ' * indent, escape_href(text), text))
  130. first = False
  131. fd.write('</li>\n</ul>\n')
  132. fd.write('</div>\n')
  133. def update_toc(data):
  134. ret_data = []
  135. pat = re.compile(r'(<(h[2|3])>)(.+)(</h[2|3]>)')
  136. idx = 1
  137. for line in data.splitlines():
  138. if pat.search(line):
  139. xline = pat.split(line)
  140. line = xline[1] + '<a name="%s">' % escape_href(xline[3]) + xline[3] + '</a>' + xline[4]
  141. idx += 1
  142. ret_data.append(line)
  143. return '\n'.join(ret_data)
  144. # process header
  145. src_data = read_file(src_file)
  146. name = re.search('(<!-- meta page name:)(.*)(-->)', src_data, re.IGNORECASE)
  147. if name:
  148. pgm = name.group(2).strip().split('-', 1)[0].strip()
  149. desc = re.search('(<!-- meta page description:)(.*)(-->)', src_data,
  150. re.IGNORECASE)
  151. if desc:
  152. pgm = desc.group(2).strip()
  153. header_tmpl = string.Template(header_base + header_nopgm)
  154. else:
  155. header_tmpl = string.Template(header_base + header_pgm)
  156. if not re.search('<html>', src_data, re.IGNORECASE):
  157. tmp_data = read_file(tmp_file)
  158. if not re.search('<html>', tmp_data, re.IGNORECASE):
  159. sys.stdout.write(header_tmpl.substitute(PGM=pgm))
  160. if tmp_data:
  161. for line in tmp_data.splitlines(True):
  162. if not re.search('</body>|</html>', line, re.IGNORECASE):
  163. sys.stdout.write(line)
  164. # create TOC
  165. write_toc(create_toc(src_data))
  166. # process body
  167. sys.stdout.write(update_toc(src_data))
  168. # if </html> is found, suppose a complete html is provided.
  169. # otherwise, generate module class reference:
  170. if re.search('</html>', src_data, re.IGNORECASE):
  171. sys.exit()
  172. index_names = {
  173. 'd' : 'display',
  174. 'db': 'database',
  175. 'g' : 'general',
  176. 'i' : 'imagery',
  177. 'm' : 'misc',
  178. 'ps': 'postscript',
  179. 'p' : 'paint',
  180. 'r' : 'raster',
  181. 'r3': 'raster3D',
  182. 's' : 'sites',
  183. 't' : 'temporal',
  184. 'v' : 'vector'
  185. }
  186. # process footer
  187. index = re.search('(<!-- meta page index:)(.*)(-->)', src_data, re.IGNORECASE)
  188. if index:
  189. index_name_cap = index_name = index.group(2).strip()
  190. else:
  191. mod_class = pgm.split('.', 1)[0]
  192. index_name = index_names.get(mod_class, '')
  193. index_name_cap = index_name.title()
  194. grass_version = os.getenv("VERSION_NUMBER", "unknown")
  195. year = os.getenv("VERSION_DATE")
  196. if not year:
  197. year = str(datetime.now().year)
  198. if index_name:
  199. sys.stdout.write(footer_index.substitute(INDEXNAME=index_name,
  200. INDEXNAMECAP=index_name_cap,
  201. YEAR=year,
  202. GRASS_VERSION=grass_version))
  203. else:
  204. sys.stdout.write(footer_noindex.substitute(YEAR=year,
  205. GRASS_VERSION=grass_version))