g.html2man.py 1.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. #!/usr/bin/env python
  2. import sys
  3. import re
  4. from html import HTMLParser, HTMLParseError
  5. from groff import Formatter
  6. from StringIO import StringIO
  7. entities = {
  8. 'nbsp': " ",
  9. 'bull': "*"
  10. }
  11. def main():
  12. # parse HTML
  13. infile = sys.argv[1]
  14. inf = file(infile)
  15. p = HTMLParser(entities)
  16. for n, line in enumerate(inf):
  17. try:
  18. p.feed(line)
  19. except HTMLParseError, err:
  20. sys.stderr.write('%s:%d:%d: Parse error: %s\n' % (infile, err.lineno, err.offset, err.msg))
  21. sys.exit(1)
  22. except Exception, err:
  23. sys.stderr.write('%s:%d:0: Error (%s): %s\n' % (infile, n + 1, repr(err), line))
  24. sys.exit(1)
  25. p.close()
  26. inf.close()
  27. # generate groff
  28. sf = StringIO()
  29. f = Formatter(infile, sf)
  30. f.pp(p.data)
  31. s = sf.getvalue()
  32. sf.close()
  33. # strip excess whitespace
  34. blank_re = re.compile("[ \t\n]*\n([ \t]*\n)*")
  35. s = blank_re.sub('\n', s)
  36. s = s.lstrip()
  37. # write groff
  38. outf = file(sys.argv[2], 'w')
  39. outf.write(s)
  40. outf.close()
  41. if __name__ == "__main__":
  42. main()