lexicon_test.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. # Copyright 2017 Google Inc. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """Tests for SyntaxNet lexicon."""
  16. import os
  17. import os.path
  18. import tensorflow as tf
  19. from google.protobuf import text_format
  20. from dragnn.python import lexicon
  21. # Imported for FLAGS.tf_master, which is used in the lexicon module.
  22. from syntaxnet import parser_trainer
  23. from syntaxnet import task_spec_pb2
  24. import syntaxnet.load_parser_ops
  25. FLAGS = tf.app.flags.FLAGS
  26. if not hasattr(FLAGS, 'test_srcdir'):
  27. FLAGS.test_srcdir = ''
  28. if not hasattr(FLAGS, 'test_tmpdir'):
  29. FLAGS.test_tmpdir = tf.test.get_temp_dir()
  30. _EXPECTED_CONTEXT = r"""
  31. input { name: "word-map" Part { file_pattern: "/tmp/word-map" } }
  32. input { name: "tag-map" Part { file_pattern: "/tmp/tag-map" } }
  33. input { name: "tag-to-category" Part { file_pattern: "/tmp/tag-to-category" } }
  34. input { name: "lcword-map" Part { file_pattern: "/tmp/lcword-map" } }
  35. input { name: "category-map" Part { file_pattern: "/tmp/category-map" } }
  36. input { name: "char-map" Part { file_pattern: "/tmp/char-map" } }
  37. input { name: "char-ngram-map" Part { file_pattern: "/tmp/char-ngram-map" } }
  38. input { name: "label-map" Part { file_pattern: "/tmp/label-map" } }
  39. input { name: "prefix-table" Part { file_pattern: "/tmp/prefix-table" } }
  40. input { name: "suffix-table" Part { file_pattern: "/tmp/suffix-table" } }
  41. """
  42. class LexiconTest(tf.test.TestCase):
  43. def testCreateLexiconContext(self):
  44. expected_context = task_spec_pb2.TaskSpec()
  45. text_format.Parse(_EXPECTED_CONTEXT, expected_context)
  46. self.assertProtoEquals(
  47. lexicon.create_lexicon_context('/tmp'), expected_context)
  48. def testBuildLexicon(self):
  49. empty_input_path = os.path.join(FLAGS.test_tmpdir, 'empty-input')
  50. lexicon_output_path = os.path.join(FLAGS.test_tmpdir, 'lexicon-output')
  51. with open(empty_input_path, 'w'):
  52. pass
  53. # The directory may already exist when running locally multiple times.
  54. if not os.path.exists(lexicon_output_path):
  55. os.mkdir(lexicon_output_path)
  56. # Just make sure this doesn't crash; the lexicon builder op is already
  57. # exercised in its own unit test.
  58. lexicon.build_lexicon(lexicon_output_path, empty_input_path)
  59. if __name__ == '__main__':
  60. tf.test.main()