123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201 |
- # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ==============================================================================
- """Provides utilities to preprocess images for the ResNet networks."""
- from __future__ import absolute_import
- from __future__ import division
- from __future__ import print_function
- import tensorflow as tf
- from tensorflow.contrib.slim import nets
- from tensorflow.python.ops import control_flow_ops
- slim = tf.contrib.slim
- _R_MEAN = 123.68
- _G_MEAN = 116.78
- _B_MEAN = 103.94
- _CROP_HEIGHT = nets.resnet_v1.resnet_v1.default_image_size
- _CROP_WIDTH = nets.resnet_v1.resnet_v1.default_image_size
- _RESIZE_SIDE = 256
- def _mean_image_subtraction(image, means):
- """Subtracts the given means from each image channel.
- For example:
- means = [123.68, 116.779, 103.939]
- image = _mean_image_subtraction(image, means)
- Note that the rank of `image` must be known.
- Args:
- image: a tensor of size [height, width, C].
- means: a C-vector of values to subtract from each channel.
- Returns:
- the centered image.
- Raises:
- ValueError: If the rank of `image` is unknown, if `image` has a rank other
- than three or if the number of channels in `image` doesn't match the
- number of values in `means`.
- """
- if image.get_shape().ndims != 3:
- raise ValueError('Input must be of size [height, width, C>0]')
- num_channels = image.get_shape().as_list()[-1]
- if len(means) != num_channels:
- raise ValueError('len(means) must match the number of channels')
- channels = tf.split(2, num_channels, image)
- for i in range(num_channels):
- channels[i] -= means[i]
- return tf.concat(2, channels)
- def _smallest_size_at_least(height, width, smallest_side):
- """Computes new shape with the smallest side equal to `smallest_side`.
- Computes new shape with the smallest side equal to `smallest_side` while
- preserving the original aspect ratio.
- Args:
- height: an int32 scalar tensor indicating the current height.
- width: an int32 scalar tensor indicating the current width.
- smallest_side: an python integer indicating the smallest side of the new
- shape.
- Returns:
- new_height: an int32 scalar tensor indicating the new height.
- new_width: and int32 scalar tensor indicating the new width.
- """
- height = tf.to_float(height)
- width = tf.to_float(width)
- smallest_side = float(smallest_side)
- scale = tf.cond(tf.greater(height, width),
- lambda: smallest_side / width,
- lambda: smallest_side / height)
- new_height = tf.to_int32(height * scale)
- new_width = tf.to_int32(width * scale)
- return new_height, new_width
- def _aspect_preserving_resize(image, smallest_side):
- """Resize images preserving the original aspect ratio.
- Args:
- image: a 3-D image tensor.
- smallest_side: a python integer indicating the size of the smallest side
- after resize.
- Returns:
- resized_image: a 3-D tensor containing the resized image.
- """
- shape = tf.shape(image)
- height = shape[0]
- width = shape[1]
- new_height, new_width = _smallest_size_at_least(height, width, smallest_side)
- image = tf.expand_dims(image, 0)
- resized_image = tf.image.resize_bilinear(image, [new_height, new_width],
- align_corners=False)
- resized_image = tf.squeeze(resized_image)
- resized_image.set_shape([None, None, 3])
- return resized_image
- def _crop(image, offset_height, offset_width, crop_height, crop_width):
- """Crops the given image using the provided offsets and sizes.
- Note that the method doesn't assume we know the input image size but it does
- assume we know the input image rank.
- Args:
- image: an image of shape [height, width, channels].
- offset_height: a scalar tensor indicating the height offset.
- offset_width: a scalar tensor indicating the width offset.
- crop_height: the height of the cropped image.
- crop_width: the width of the cropped image.
- Returns:
- the cropped (and resized) image.
- Raises:
- InvalidArgumentError: if the rank is not 3 or if the image dimensions are
- less than the crop size.
- """
- original_shape = tf.shape(image)
- rank_assertion = tf.Assert(
- tf.equal(tf.rank(image), 3),
- ['Rank of image must be equal to 3.'])
- cropped_shape = control_flow_ops.with_dependencies(
- [rank_assertion],
- tf.pack([crop_height, crop_width, original_shape[2]]))
- size_assertion = tf.Assert(
- tf.logical_and(
- tf.greater_equal(original_shape[0], crop_height),
- tf.greater_equal(original_shape[1], crop_width)),
- ['Crop size greater than the image size.'])
- offsets = tf.to_int32(tf.pack([offset_height, offset_width, 0]))
- # Use tf.slice instead of crop_to_bounding box as it accepts tensors to
- # define the crop size.
- image = control_flow_ops.with_dependencies(
- [size_assertion],
- tf.slice(image, offsets, cropped_shape))
- return tf.reshape(image, cropped_shape)
- def _central_crop(image_list, crop_height, crop_width):
- """Performs central crops of the given image list.
- Args:
- image_list: a list of image tensors of the same dimension but possibly
- varying channel.
- crop_height: the height of the image following the crop.
- crop_width: the width of the image following the crop.
- Returns:
- the list of cropped images.
- """
- outputs = []
- for image in image_list:
- image_height = tf.shape(image)[0]
- image_width = tf.shape(image)[1]
- offset_height = (image_height - crop_height) / 2
- offset_width = (image_width - crop_width) / 2
- outputs.append(_crop(image, offset_height, offset_width,
- crop_height, crop_width))
- return outputs
- def preprocess_image(image,
- height=_CROP_HEIGHT,
- width=_CROP_WIDTH,
- is_training=False, # pylint: disable=unused-argument
- resize_side=_RESIZE_SIDE):
- image = _aspect_preserving_resize(image, resize_side)
- image = _central_crop([image], height, width)[0]
- image.set_shape([height, width, 3])
- image = tf.to_float(image)
- image = _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
- return image
|