123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371 |
- # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ==============================================================================
- """Provides utilities to preprocess images.
- The preprocessing steps for VGG were introduced in the following technical
- report:
- Very Deep Convolutional Networks For Large-Scale Image Recognition
- Karen Simonyan and Andrew Zisserman
- arXiv technical report, 2015
- PDF: http://arxiv.org/pdf/1409.1556.pdf
- ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf
- CC-BY-4.0
- More information can be obtained from the VGG website:
- www.robots.ox.ac.uk/~vgg/research/very_deep/
- """
- from __future__ import absolute_import
- from __future__ import division
- from __future__ import print_function
- import tensorflow as tf
- from tensorflow.python.ops import control_flow_ops
- slim = tf.contrib.slim
- _R_MEAN = 123.68
- _G_MEAN = 116.78
- _B_MEAN = 103.94
- _RESIZE_SIDE_MIN = 256
- _RESIZE_SIDE_MAX = 512
- def _crop(image, offset_height, offset_width, crop_height, crop_width):
- """Crops the given image using the provided offsets and sizes.
- Note that the method doesn't assume we know the input image size but it does
- assume we know the input image rank.
- Args:
- image: an image of shape [height, width, channels].
- offset_height: a scalar tensor indicating the height offset.
- offset_width: a scalar tensor indicating the width offset.
- crop_height: the height of the cropped image.
- crop_width: the width of the cropped image.
- Returns:
- the cropped (and resized) image.
- Raises:
- InvalidArgumentError: if the rank is not 3 or if the image dimensions are
- less than the crop size.
- """
- original_shape = tf.shape(image)
- rank_assertion = tf.Assert(
- tf.equal(tf.rank(image), 3),
- ['Rank of image must be equal to 3.'])
- cropped_shape = control_flow_ops.with_dependencies(
- [rank_assertion],
- tf.stack([crop_height, crop_width, original_shape[2]]))
- size_assertion = tf.Assert(
- tf.logical_and(
- tf.greater_equal(original_shape[0], crop_height),
- tf.greater_equal(original_shape[1], crop_width)),
- ['Crop size greater than the image size.'])
- offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0]))
- # Use tf.slice instead of crop_to_bounding box as it accepts tensors to
- # define the crop size.
- image = control_flow_ops.with_dependencies(
- [size_assertion],
- tf.slice(image, offsets, cropped_shape))
- return tf.reshape(image, cropped_shape)
- def _random_crop(image_list, crop_height, crop_width):
- """Crops the given list of images.
- The function applies the same crop to each image in the list. This can be
- effectively applied when there are multiple image inputs of the same
- dimension such as:
- image, depths, normals = _random_crop([image, depths, normals], 120, 150)
- Args:
- image_list: a list of image tensors of the same dimension but possibly
- varying channel.
- crop_height: the new height.
- crop_width: the new width.
- Returns:
- the image_list with cropped images.
- Raises:
- ValueError: if there are multiple image inputs provided with different size
- or the images are smaller than the crop dimensions.
- """
- if not image_list:
- raise ValueError('Empty image_list.')
- # Compute the rank assertions.
- rank_assertions = []
- for i in range(len(image_list)):
- image_rank = tf.rank(image_list[i])
- rank_assert = tf.Assert(
- tf.equal(image_rank, 3),
- ['Wrong rank for tensor %s [expected] [actual]',
- image_list[i].name, 3, image_rank])
- rank_assertions.append(rank_assert)
- image_shape = control_flow_ops.with_dependencies(
- [rank_assertions[0]],
- tf.shape(image_list[0]))
- image_height = image_shape[0]
- image_width = image_shape[1]
- crop_size_assert = tf.Assert(
- tf.logical_and(
- tf.greater_equal(image_height, crop_height),
- tf.greater_equal(image_width, crop_width)),
- ['Crop size greater than the image size.'])
- asserts = [rank_assertions[0], crop_size_assert]
- for i in range(1, len(image_list)):
- image = image_list[i]
- asserts.append(rank_assertions[i])
- shape = control_flow_ops.with_dependencies([rank_assertions[i]],
- tf.shape(image))
- height = shape[0]
- width = shape[1]
- height_assert = tf.Assert(
- tf.equal(height, image_height),
- ['Wrong height for tensor %s [expected][actual]',
- image.name, height, image_height])
- width_assert = tf.Assert(
- tf.equal(width, image_width),
- ['Wrong width for tensor %s [expected][actual]',
- image.name, width, image_width])
- asserts.extend([height_assert, width_assert])
- # Create a random bounding box.
- #
- # Use tf.random_uniform and not numpy.random.rand as doing the former would
- # generate random numbers at graph eval time, unlike the latter which
- # generates random numbers at graph definition time.
- max_offset_height = control_flow_ops.with_dependencies(
- asserts, tf.reshape(image_height - crop_height + 1, []))
- max_offset_width = control_flow_ops.with_dependencies(
- asserts, tf.reshape(image_width - crop_width + 1, []))
- offset_height = tf.random_uniform(
- [], maxval=max_offset_height, dtype=tf.int32)
- offset_width = tf.random_uniform(
- [], maxval=max_offset_width, dtype=tf.int32)
- return [_crop(image, offset_height, offset_width,
- crop_height, crop_width) for image in image_list]
- def _central_crop(image_list, crop_height, crop_width):
- """Performs central crops of the given image list.
- Args:
- image_list: a list of image tensors of the same dimension but possibly
- varying channel.
- crop_height: the height of the image following the crop.
- crop_width: the width of the image following the crop.
- Returns:
- the list of cropped images.
- """
- outputs = []
- for image in image_list:
- image_height = tf.shape(image)[0]
- image_width = tf.shape(image)[1]
- offset_height = (image_height - crop_height) / 2
- offset_width = (image_width - crop_width) / 2
- outputs.append(_crop(image, offset_height, offset_width,
- crop_height, crop_width))
- return outputs
- def _mean_image_subtraction(image, means):
- """Subtracts the given means from each image channel.
- For example:
- means = [123.68, 116.779, 103.939]
- image = _mean_image_subtraction(image, means)
- Note that the rank of `image` must be known.
- Args:
- image: a tensor of size [height, width, C].
- means: a C-vector of values to subtract from each channel.
- Returns:
- the centered image.
- Raises:
- ValueError: If the rank of `image` is unknown, if `image` has a rank other
- than three or if the number of channels in `image` doesn't match the
- number of values in `means`.
- """
- if image.get_shape().ndims != 3:
- raise ValueError('Input must be of size [height, width, C>0]')
- num_channels = image.get_shape().as_list()[-1]
- if len(means) != num_channels:
- raise ValueError('len(means) must match the number of channels')
- channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image)
- for i in range(num_channels):
- channels[i] -= means[i]
- return tf.concat(axis=2, values=channels)
- def _smallest_size_at_least(height, width, smallest_side):
- """Computes new shape with the smallest side equal to `smallest_side`.
- Computes new shape with the smallest side equal to `smallest_side` while
- preserving the original aspect ratio.
- Args:
- height: an int32 scalar tensor indicating the current height.
- width: an int32 scalar tensor indicating the current width.
- smallest_side: A python integer or scalar `Tensor` indicating the size of
- the smallest side after resize.
- Returns:
- new_height: an int32 scalar tensor indicating the new height.
- new_width: and int32 scalar tensor indicating the new width.
- """
- smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)
- height = tf.to_float(height)
- width = tf.to_float(width)
- smallest_side = tf.to_float(smallest_side)
- scale = tf.cond(tf.greater(height, width),
- lambda: smallest_side / width,
- lambda: smallest_side / height)
- new_height = tf.to_int32(height * scale)
- new_width = tf.to_int32(width * scale)
- return new_height, new_width
- def _aspect_preserving_resize(image, smallest_side):
- """Resize images preserving the original aspect ratio.
- Args:
- image: A 3-D image `Tensor`.
- smallest_side: A python integer or scalar `Tensor` indicating the size of
- the smallest side after resize.
- Returns:
- resized_image: A 3-D tensor containing the resized image.
- """
- smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)
- shape = tf.shape(image)
- height = shape[0]
- width = shape[1]
- new_height, new_width = _smallest_size_at_least(height, width, smallest_side)
- image = tf.expand_dims(image, 0)
- resized_image = tf.image.resize_bilinear(image, [new_height, new_width],
- align_corners=False)
- resized_image = tf.squeeze(resized_image)
- resized_image.set_shape([None, None, 3])
- return resized_image
- def preprocess_for_train(image,
- output_height,
- output_width,
- resize_side_min=_RESIZE_SIDE_MIN,
- resize_side_max=_RESIZE_SIDE_MAX):
- """Preprocesses the given image for training.
- Note that the actual resizing scale is sampled from
- [`resize_size_min`, `resize_size_max`].
- Args:
- image: A `Tensor` representing an image of arbitrary size.
- output_height: The height of the image after preprocessing.
- output_width: The width of the image after preprocessing.
- resize_side_min: The lower bound for the smallest side of the image for
- aspect-preserving resizing.
- resize_side_max: The upper bound for the smallest side of the image for
- aspect-preserving resizing.
- Returns:
- A preprocessed image.
- """
- resize_side = tf.random_uniform(
- [], minval=resize_side_min, maxval=resize_side_max+1, dtype=tf.int32)
- image = _aspect_preserving_resize(image, resize_side)
- image = _random_crop([image], output_height, output_width)[0]
- image.set_shape([output_height, output_width, 3])
- image = tf.to_float(image)
- image = tf.image.random_flip_left_right(image)
- return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
- def preprocess_for_eval(image, output_height, output_width, resize_side):
- """Preprocesses the given image for evaluation.
- Args:
- image: A `Tensor` representing an image of arbitrary size.
- output_height: The height of the image after preprocessing.
- output_width: The width of the image after preprocessing.
- resize_side: The smallest side of the image for aspect-preserving resizing.
- Returns:
- A preprocessed image.
- """
- image = _aspect_preserving_resize(image, resize_side)
- image = _central_crop([image], output_height, output_width)[0]
- image.set_shape([output_height, output_width, 3])
- image = tf.to_float(image)
- return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
- def preprocess_image(image, output_height, output_width, is_training=False,
- resize_side_min=_RESIZE_SIDE_MIN,
- resize_side_max=_RESIZE_SIDE_MAX):
- """Preprocesses the given image.
- Args:
- image: A `Tensor` representing an image of arbitrary size.
- output_height: The height of the image after preprocessing.
- output_width: The width of the image after preprocessing.
- is_training: `True` if we're preprocessing the image for training and
- `False` otherwise.
- resize_side_min: The lower bound for the smallest side of the image for
- aspect-preserving resizing. If `is_training` is `False`, then this value
- is used for rescaling.
- resize_side_max: The upper bound for the smallest side of the image for
- aspect-preserving resizing. If `is_training` is `False`, this value is
- ignored. Otherwise, the resize side is sampled from
- [resize_size_min, resize_size_max].
- Returns:
- A preprocessed image.
- """
- if is_training:
- return preprocess_for_train(image, output_height, output_width,
- resize_side_min, resize_side_max)
- else:
- return preprocess_for_eval(image, output_height, output_width,
- resize_side_min)
|