# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """String network description language to define network layouts.""" import collections import tensorflow as tf from tensorflow.python.ops import parsing_ops # Named tuple for the standard tf image tensor Shape. # batch_size: Number of images to batch-up for training. # height: Fixed height of image or None for variable. # width: Fixed width of image or None for variable. # depth: Desired depth in bytes per pixel of input images. ImageShape = collections.namedtuple('ImageTensorDims', ['batch_size', 'height', 'width', 'depth']) def ImageInput(input_pattern, num_threads, shape, using_ctc, reader=None): """Creates an input image tensor from the input_pattern filenames. TODO(rays) Expand for 2-d labels, 0-d labels, and logistic targets. Args: input_pattern: Filenames of the dataset(s) to read. num_threads: Number of preprocessing threads. shape: ImageShape with the desired shape of the input. using_ctc: Take the unpadded_class labels instead of padded. reader: Function that returns an actual reader to read Examples from input files. If None, uses tf.TFRecordReader(). Returns: images: Float Tensor containing the input image scaled to [-1.28, 1.27]. heights: Tensor int64 containing the heights of the images. widths: Tensor int64 containing the widths of the images. labels: Serialized SparseTensor containing the int64 labels. sparse_labels: Serialized SparseTensor containing the int64 labels. truths: Tensor string of the utf8 truth texts. Raises: ValueError: if the optimizer type is unrecognized. """ data_files = tf.gfile.Glob(input_pattern) assert data_files, 'no files found for dataset ' + input_pattern queue_capacity = shape.batch_size * num_threads * 2 filename_queue = tf.train.string_input_producer( data_files, capacity=queue_capacity) # Create a subgraph with its own reader (but sharing the # filename_queue) for each preprocessing thread. images_and_label_lists = [] for _ in range(num_threads): image, height, width, labels, text = _ReadExamples(filename_queue, shape, using_ctc, reader) images_and_label_lists.append([image, height, width, labels, text]) # Create a queue that produces the examples in batches. images, heights, widths, labels, truths = tf.train.batch_join( images_and_label_lists, batch_size=shape.batch_size, capacity=16 * shape.batch_size, dynamic_pad=True) # Deserialize back to sparse, because the batcher doesn't do sparse. labels = tf.deserialize_many_sparse(labels, tf.int64) sparse_labels = tf.cast(labels, tf.int32) labels = tf.sparse_tensor_to_dense(labels) labels = tf.reshape(labels, [shape.batch_size, -1], name='Labels') # Crush the other shapes to just the batch dimension. heights = tf.reshape(heights, [-1], name='Heights') widths = tf.reshape(widths, [-1], name='Widths') truths = tf.reshape(truths, [-1], name='Truths') # Give the images a nice name as well. images = tf.identity(images, name='Images') tf.image_summary('Images', images) return images, heights, widths, labels, sparse_labels, truths def _ReadExamples(filename_queue, shape, using_ctc, reader=None): """Builds network input tensor ops for TF Example. Args: filename_queue: Queue of filenames, from tf.train.string_input_producer shape: ImageShape with the desired shape of the input. using_ctc: Take the unpadded_class labels instead of padded. reader: Function that returns an actual reader to read Examples from input files. If None, uses tf.TFRecordReader(). Returns: image: Float Tensor containing the input image scaled to [-1.28, 1.27]. height: Tensor int64 containing the height of the image. width: Tensor int64 containing the width of the image. labels: Serialized SparseTensor containing the int64 labels. text: Tensor string of the utf8 truth text. """ if reader: reader = reader() else: reader = tf.TFRecordReader() _, example_serialized = reader.read(filename_queue) example_serialized = tf.reshape(example_serialized, shape=[]) features = tf.parse_single_example( example_serialized, {'image/encoded': parsing_ops.FixedLenFeature( [1], dtype=tf.string, default_value=''), 'image/text': parsing_ops.FixedLenFeature( [1], dtype=tf.string, default_value=''), 'image/class': parsing_ops.VarLenFeature(dtype=tf.int64), 'image/unpadded_class': parsing_ops.VarLenFeature(dtype=tf.int64), 'image/height': parsing_ops.FixedLenFeature( [1], dtype=tf.int64, default_value=1), 'image/width': parsing_ops.FixedLenFeature( [1], dtype=tf.int64, default_value=1)}) if using_ctc: labels = features['image/unpadded_class'] else: labels = features['image/class'] labels = tf.serialize_sparse(labels) image = tf.reshape(features['image/encoded'], shape=[], name='encoded') image = _ImageProcessing(image, shape) height = tf.reshape(features['image/height'], [-1]) width = tf.reshape(features['image/width'], [-1]) text = tf.reshape(features['image/text'], shape=[]) return image, height, width, labels, text def _ImageProcessing(image_buffer, shape): """Convert a PNG string into an input tensor. We allow for fixed and variable sizes. Does fixed conversion to floats in the range [-1.28, 1.27]. Args: image_buffer: Tensor containing a PNG encoded image. shape: ImageShape with the desired shape of the input. Returns: image: Decoded, normalized image in the range [-1.28, 1.27]. """ image = tf.image.decode_png(image_buffer, channels=shape.depth) image.set_shape([shape.height, shape.width, shape.depth]) image = tf.cast(image, tf.float32) image = tf.sub(image, 128.0) image = tf.mul(image, 1 / 100.0) return image