123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249 |
- # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ==============================================================================
- """Tests for vgsl_model."""
- import os
- import numpy as np
- import tensorflow as tf
- import vgsl_input
- import vgsl_model
- def _testdata(filename):
- return os.path.join('../testdata/', filename)
- def _rand(*size):
- return np.random.uniform(size=size).astype('f')
- class VgslModelTest(tf.test.TestCase):
- def testParseInputSpec(self):
- """The parser must return the numbers in the correct order.
- """
- shape = vgsl_model._ParseInputSpec(input_spec='32,42,256,3')
- self.assertEqual(
- shape,
- vgsl_input.ImageShape(
- batch_size=32, height=42, width=256, depth=3))
- # Nones must be inserted for zero sizes.
- shape = vgsl_model._ParseInputSpec(input_spec='1,0,0,3')
- self.assertEqual(
- shape,
- vgsl_input.ImageShape(
- batch_size=1, height=None, width=None, depth=3))
- def testParseOutputSpec(self):
- """The parser must return the correct args in the correct order.
- """
- out_dims, out_func, num_classes = vgsl_model._ParseOutputSpec(
- output_spec='O1c142')
- self.assertEqual(out_dims, 1)
- self.assertEqual(out_func, 'c')
- self.assertEqual(num_classes, 142)
- out_dims, out_func, num_classes = vgsl_model._ParseOutputSpec(
- output_spec='O2s99')
- self.assertEqual(out_dims, 2)
- self.assertEqual(out_func, 's')
- self.assertEqual(num_classes, 99)
- out_dims, out_func, num_classes = vgsl_model._ParseOutputSpec(
- output_spec='O0l12')
- self.assertEqual(out_dims, 0)
- self.assertEqual(out_func, 'l')
- self.assertEqual(num_classes, 12)
- def testPadLabels2d(self):
- """Must pad timesteps in labels to match logits.
- """
- with self.test_session() as sess:
- # Make placeholders for logits and labels.
- ph_logits = tf.placeholder(tf.float32, shape=(None, None, 42))
- ph_labels = tf.placeholder(tf.int64, shape=(None, None))
- padded_labels = vgsl_model._PadLabels2d(tf.shape(ph_logits)[1], ph_labels)
- # Make actual inputs.
- real_logits = _rand(4, 97, 42)
- real_labels = _rand(4, 85)
- np_array = sess.run([padded_labels],
- feed_dict={ph_logits: real_logits,
- ph_labels: real_labels})[0]
- self.assertEqual(tuple(np_array.shape), (4, 97))
- real_labels = _rand(4, 97)
- np_array = sess.run([padded_labels],
- feed_dict={ph_logits: real_logits,
- ph_labels: real_labels})[0]
- self.assertEqual(tuple(np_array.shape), (4, 97))
- real_labels = _rand(4, 100)
- np_array = sess.run([padded_labels],
- feed_dict={ph_logits: real_logits,
- ph_labels: real_labels})[0]
- self.assertEqual(tuple(np_array.shape), (4, 97))
- def testPadLabels3d(self):
- """Must pad height and width in labels to match logits.
- The tricky thing with 3-d is that the rows and columns need to remain
- intact, so we'll test it with small known data.
- """
- with self.test_session() as sess:
- # Make placeholders for logits and labels.
- ph_logits = tf.placeholder(tf.float32, shape=(None, None, None, 42))
- ph_labels = tf.placeholder(tf.int64, shape=(None, None, None))
- padded_labels = vgsl_model._PadLabels3d(ph_logits, ph_labels)
- # Make actual inputs.
- real_logits = _rand(1, 3, 4, 42)
- # Test all 9 combinations of height x width in [small, ok, big]
- real_labels = np.arange(6).reshape((1, 2, 3)) # Height small, width small
- np_array = sess.run([padded_labels],
- feed_dict={ph_logits: real_logits,
- ph_labels: real_labels})[0]
- self.assertEqual(tuple(np_array.shape), (1, 3, 4))
- self.assertAllEqual(np_array[0, :, :],
- [[0, 1, 2, 0], [3, 4, 5, 0], [0, 0, 0, 0]])
- real_labels = np.arange(8).reshape((1, 2, 4)) # Height small, width ok
- np_array = sess.run([padded_labels],
- feed_dict={ph_logits: real_logits,
- ph_labels: real_labels})[0]
- self.assertEqual(tuple(np_array.shape), (1, 3, 4))
- self.assertAllEqual(np_array[0, :, :],
- [[0, 1, 2, 3], [4, 5, 6, 7], [0, 0, 0, 0]])
- real_labels = np.arange(10).reshape((1, 2, 5)) # Height small, width big
- np_array = sess.run([padded_labels],
- feed_dict={ph_logits: real_logits,
- ph_labels: real_labels})[0]
- self.assertEqual(tuple(np_array.shape), (1, 3, 4))
- self.assertAllEqual(np_array[0, :, :],
- [[0, 1, 2, 3], [5, 6, 7, 8], [0, 0, 0, 0]])
- real_labels = np.arange(9).reshape((1, 3, 3)) # Height ok, width small
- np_array = sess.run([padded_labels],
- feed_dict={ph_logits: real_logits,
- ph_labels: real_labels})[0]
- self.assertEqual(tuple(np_array.shape), (1, 3, 4))
- self.assertAllEqual(np_array[0, :, :],
- [[0, 1, 2, 0], [3, 4, 5, 0], [6, 7, 8, 0]])
- real_labels = np.arange(12).reshape((1, 3, 4)) # Height ok, width ok
- np_array = sess.run([padded_labels],
- feed_dict={ph_logits: real_logits,
- ph_labels: real_labels})[0]
- self.assertEqual(tuple(np_array.shape), (1, 3, 4))
- self.assertAllEqual(np_array[0, :, :],
- [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]])
- real_labels = np.arange(15).reshape((1, 3, 5)) # Height ok, width big
- np_array = sess.run([padded_labels],
- feed_dict={ph_logits: real_logits,
- ph_labels: real_labels})[0]
- self.assertEqual(tuple(np_array.shape), (1, 3, 4))
- self.assertAllEqual(np_array[0, :, :],
- [[0, 1, 2, 3], [5, 6, 7, 8], [10, 11, 12, 13]])
- real_labels = np.arange(12).reshape((1, 4, 3)) # Height big, width small
- np_array = sess.run([padded_labels],
- feed_dict={ph_logits: real_logits,
- ph_labels: real_labels})[0]
- self.assertEqual(tuple(np_array.shape), (1, 3, 4))
- self.assertAllEqual(np_array[0, :, :],
- [[0, 1, 2, 0], [3, 4, 5, 0], [6, 7, 8, 0]])
- real_labels = np.arange(16).reshape((1, 4, 4)) # Height big, width ok
- np_array = sess.run([padded_labels],
- feed_dict={ph_logits: real_logits,
- ph_labels: real_labels})[0]
- self.assertEqual(tuple(np_array.shape), (1, 3, 4))
- self.assertAllEqual(np_array[0, :, :],
- [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]])
- real_labels = np.arange(20).reshape((1, 4, 5)) # Height big, width big
- np_array = sess.run([padded_labels],
- feed_dict={ph_logits: real_logits,
- ph_labels: real_labels})[0]
- self.assertEqual(tuple(np_array.shape), (1, 3, 4))
- self.assertAllEqual(np_array[0, :, :],
- [[0, 1, 2, 3], [5, 6, 7, 8], [10, 11, 12, 13]])
- def testEndToEndSizes0d(self):
- """Tests that the output sizes match when training/running real 0d data.
- Uses mnist with dual summarizing LSTMs to reduce to a single value.
- """
- filename = _testdata('mnist-tiny')
- with self.test_session() as sess:
- model = vgsl_model.InitNetwork(
- filename,
- model_spec='4,0,0,1[Cr5,5,16 Mp3,3 Lfys16 Lfxs16]O0s12',
- mode='train')
- tf.global_variables_initializer().run(session=sess)
- coord = tf.train.Coordinator()
- tf.train.start_queue_runners(sess=sess, coord=coord)
- _, step = model.TrainAStep(sess)
- self.assertEqual(step, 1)
- output, labels = model.RunAStep(sess)
- self.assertEqual(len(output.shape), 2)
- self.assertEqual(len(labels.shape), 1)
- self.assertEqual(output.shape[0], labels.shape[0])
- self.assertEqual(output.shape[1], 12)
- # TODO(rays) Support logistic and test with Imagenet (as 0d, multi-object.)
- def testEndToEndSizes1dCTC(self):
- """Tests that the output sizes match when training with CTC.
- Basic bidi LSTM on top of convolution and summarizing LSTM with CTC.
- """
- filename = _testdata('arial-32-tiny')
- with self.test_session() as sess:
- model = vgsl_model.InitNetwork(
- filename,
- model_spec='2,0,0,1[Cr5,5,16 Mp3,3 Lfys16 Lbx100]O1c105',
- mode='train')
- tf.global_variables_initializer().run(session=sess)
- coord = tf.train.Coordinator()
- tf.train.start_queue_runners(sess=sess, coord=coord)
- _, step = model.TrainAStep(sess)
- self.assertEqual(step, 1)
- output, labels = model.RunAStep(sess)
- self.assertEqual(len(output.shape), 3)
- self.assertEqual(len(labels.shape), 2)
- self.assertEqual(output.shape[0], labels.shape[0])
- # This is ctc - the only cast-iron guarantee is labels <= output.
- self.assertLessEqual(labels.shape[1], output.shape[1])
- self.assertEqual(output.shape[2], 105)
- def testEndToEndSizes1dFixed(self):
- """Tests that the output sizes match when training/running 1 data.
- Convolution, summarizing LSTM with fwd rev fwd to allow no CTC.
- """
- filename = _testdata('numbers-16-tiny')
- with self.test_session() as sess:
- model = vgsl_model.InitNetwork(
- filename,
- model_spec='8,0,0,1[Cr5,5,16 Mp3,3 Lfys16 Lfx64 Lrx64 Lfx64]O1s12',
- mode='train')
- tf.global_variables_initializer().run(session=sess)
- coord = tf.train.Coordinator()
- tf.train.start_queue_runners(sess=sess, coord=coord)
- _, step = model.TrainAStep(sess)
- self.assertEqual(step, 1)
- output, labels = model.RunAStep(sess)
- self.assertEqual(len(output.shape), 3)
- self.assertEqual(len(labels.shape), 2)
- self.assertEqual(output.shape[0], labels.shape[0])
- # Not CTC, output lengths match.
- self.assertEqual(output.shape[1], labels.shape[1])
- self.assertEqual(output.shape[2], 12)
- # TODO(rays) Get a 2-d dataset and support 2d (heat map) outputs.
- if __name__ == '__main__':
- tf.test.main()
|