evaluation_test.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. # Copyright 2017 Google Inc. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """Tests for parser evaluation."""
  16. import tensorflow as tf
  17. from dragnn.python import evaluation
  18. from syntaxnet import sentence_pb2
  19. class EvaluationTest(tf.test.TestCase):
  20. def _add_sentence(self, tags, heads, labels, corpus):
  21. """Adds a sentence to the corpus."""
  22. sentence = sentence_pb2.Sentence()
  23. for tag, head, label in zip(tags, heads, labels):
  24. sentence.token.add(word='x', start=0, end=0,
  25. tag=tag, head=head, label=label)
  26. corpus.append(sentence.SerializeToString())
  27. def setUp(self):
  28. self._gold_corpus = []
  29. self._test_corpus = []
  30. # A correct sentence.
  31. self._add_sentence(['DT'], [-1], ['ROOT'], self._gold_corpus)
  32. self._add_sentence(['DT'], [-1], ['ROOT'], self._test_corpus)
  33. # An incorrect sentence. There is one POS mistake, two head mistakes, and
  34. # one label mistake. NB: Since the label mistake occurs on the one token
  35. # with a correct head, this sentence has three mistakes w.r.t. LAS.
  36. self._add_sentence(['DT', 'JJ', 'NN'], [2, 2, -1], ['det', 'amod', 'ROOT'],
  37. self._gold_corpus)
  38. self._add_sentence(['xx', 'JJ', 'NN'], [1, 0, -1], ['det', 'amod', 'xxxx'],
  39. self._test_corpus)
  40. def testCalculateParseMetrics(self):
  41. pos, uas, las = evaluation.calculate_parse_metrics(self._gold_corpus,
  42. self._test_corpus)
  43. self.assertEqual(75, pos)
  44. self.assertEqual(50, uas)
  45. self.assertEqual(25, las)
  46. def testCalculateSegmentationMetrics(self):
  47. self._gold_corpus = []
  48. self._test_corpus = []
  49. def add_sentence_for_segment_eval(starts, ends, corpus):
  50. """Adds a sentence to the corpus."""
  51. sentence = sentence_pb2.Sentence()
  52. for start, end in zip(starts, ends):
  53. sentence.token.add(word='x', start=start, end=end)
  54. corpus.append(sentence.SerializeToString())
  55. # A test case with 5 gold words, 4 test words and 3 are correct.
  56. # -gold tokens: 'This is a gold sentence'
  57. # -test tokens: 'Thisis a gold sentence'
  58. add_sentence_for_segment_eval(
  59. [0, 5, 8, 10, 15], [3, 6, 8, 13, 22], self._gold_corpus)
  60. add_sentence_for_segment_eval(
  61. [0, 8, 10, 15], [6, 8, 13, 22], self._test_corpus)
  62. # Another test case with 3 gold words, 5 test words and 2 correct words.
  63. # -gold tokens: 'another gold sentence'
  64. # -test tokens: 'another gold sen tence'
  65. add_sentence_for_segment_eval([0, 8, 13], [6, 11, 20], self._gold_corpus)
  66. add_sentence_for_segment_eval([0, 8, 13, 17, 21], [6, 11, 15, 19, 22],
  67. self._test_corpus)
  68. prec, rec, f1 = evaluation.calculate_segmentation_metrics(self._gold_corpus,
  69. self._test_corpus)
  70. self.assertEqual(55.56, prec)
  71. self.assertEqual(62.50, rec)
  72. self.assertEqual(58.82, f1)
  73. summaries = evaluation.segmentation_summaries(self._gold_corpus,
  74. self._test_corpus)
  75. self.assertEqual({
  76. 'precision': 55.56,
  77. 'recall': 62.50,
  78. 'f1': 58.82,
  79. 'eval_metric': 58.82
  80. }, summaries)
  81. def testParserSummaries(self):
  82. summaries = evaluation.parser_summaries(self._gold_corpus,
  83. self._test_corpus)
  84. self.assertEqual({
  85. 'POS': 75,
  86. 'UAS': 50,
  87. 'LAS': 25,
  88. 'eval_metric': 25 # equals LAS
  89. }, summaries)
  90. if __name__ == '__main__':
  91. tf.test.main()