render_parse_tree_graphviz.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. # -*- coding: utf-8 -*-
  2. """Renders parse trees with Graphviz."""
  3. from __future__ import absolute_import
  4. from __future__ import division
  5. from __future__ import print_function
  6. import base64
  7. import warnings
  8. import pygraphviz
  9. def parse_tree_graph(sentence):
  10. """Constructs a parse tree graph.
  11. Args:
  12. sentence: syntaxnet.Sentence instance.
  13. Returns:
  14. HTML graph contents, as a string.
  15. """
  16. graph = pygraphviz.AGraph(directed=True, strict=False, rankdir="TB")
  17. for i, token in enumerate(sentence.token):
  18. node_id = "tok_{}".format(i)
  19. graph.add_node(node_id, label=token.word)
  20. if token.head >= 0:
  21. src_id = "tok_{}".format(token.head)
  22. graph.add_edge(
  23. src_id,
  24. node_id,
  25. label=token.label,
  26. key="parse_{}_{}".format(node_id, src_id))
  27. with warnings.catch_warnings():
  28. # Fontconfig spews some warnings, suppress them for now. (Especially because
  29. # they can clutter IPython notebooks).
  30. warnings.simplefilter("ignore")
  31. svg = graph.draw(format="svg", prog="dot")
  32. svg = unicode(svg, "utf-8")
  33. # For both inline and "new window" displays, we show the tokens with the
  34. # graph. (The sentence order of nodes is sometimes difficult to read.)
  35. image_and_text = u"<p><em>Text:</em> {}</p>{}".format(" ".join(
  36. token.word for token in sentence.token), svg)
  37. # We generate a base64 URI. This is not too big, but older browsers may not
  38. # handle it well.
  39. new_window_html = (u"<style type='text/css'>svg { max-width: 100%; }</style>"
  40. + image_and_text).encode("utf-8")
  41. as_uri = "data:text/html;charset=utf-8;base64,{}".format(
  42. base64.b64encode(new_window_html))
  43. return u"{}<p><a target='_blank' href='{}'>Open in new window</a></p>".format(
  44. image_and_text, as_uri)