123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444 |
- # Copyright 2016 The TensorFlow Authors All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ==============================================================================
- """Contains different architectures for the different DSN parts.
- We define here the modules that can be used in the different parts of the DSN
- model.
- - shared encoder (dsn_cropped_linemod, dann_xxxx)
- - private encoder (default_encoder)
- - decoder (large_decoder, gtsrb_decoder, small_decoder)
- """
- import tensorflow as tf
- #from models.domain_adaptation.domain_separation
- import utils
- slim = tf.contrib.slim
- def default_batch_norm_params(is_training=False):
- """Returns default batch normalization parameters for DSNs.
- Args:
- is_training: whether or not the model is training.
- Returns:
- a dictionary that maps batch norm parameter names (strings) to values.
- """
- return {
- # Decay for the moving averages.
- 'decay': 0.5,
- # epsilon to prevent 0s in variance.
- 'epsilon': 0.001,
- 'is_training': is_training
- }
- ################################################################################
- # PRIVATE ENCODERS
- ################################################################################
- def default_encoder(images, code_size, batch_norm_params=None,
- weight_decay=0.0):
- """Encodes the given images to codes of the given size.
- Args:
- images: a tensor of size [batch_size, height, width, 1].
- code_size: the number of hidden units in the code layer of the classifier.
- batch_norm_params: a dictionary that maps batch norm parameter names to
- values.
- weight_decay: the value for the weight decay coefficient.
- Returns:
- end_points: the code of the input.
- """
- end_points = {}
- with slim.arg_scope(
- [slim.conv2d, slim.fully_connected],
- weights_regularizer=slim.l2_regularizer(weight_decay),
- activation_fn=tf.nn.relu,
- normalizer_fn=slim.batch_norm,
- normalizer_params=batch_norm_params):
- with slim.arg_scope([slim.conv2d], kernel_size=[5, 5], padding='SAME'):
- net = slim.conv2d(images, 32, scope='conv1')
- net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
- net = slim.conv2d(net, 64, scope='conv2')
- net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
- net = slim.flatten(net)
- end_points['flatten'] = net
- net = slim.fully_connected(net, code_size, scope='fc1')
- end_points['fc3'] = net
- return end_points
- ################################################################################
- # DECODERS
- ################################################################################
- def large_decoder(codes,
- height,
- width,
- channels,
- batch_norm_params=None,
- weight_decay=0.0):
- """Decodes the codes to a fixed output size.
- Args:
- codes: a tensor of size [batch_size, code_size].
- height: the height of the output images.
- width: the width of the output images.
- channels: the number of the output channels.
- batch_norm_params: a dictionary that maps batch norm parameter names to
- values.
- weight_decay: the value for the weight decay coefficient.
- Returns:
- recons: the reconstruction tensor of shape [batch_size, height, width, 3].
- """
- with slim.arg_scope(
- [slim.conv2d, slim.fully_connected],
- weights_regularizer=slim.l2_regularizer(weight_decay),
- activation_fn=tf.nn.relu,
- normalizer_fn=slim.batch_norm,
- normalizer_params=batch_norm_params):
- net = slim.fully_connected(codes, 600, scope='fc1')
- batch_size = net.get_shape().as_list()[0]
- net = tf.reshape(net, [batch_size, 10, 10, 6])
- net = slim.conv2d(net, 32, [5, 5], scope='conv1_1')
- net = tf.image.resize_nearest_neighbor(net, (16, 16))
- net = slim.conv2d(net, 32, [5, 5], scope='conv2_1')
- net = tf.image.resize_nearest_neighbor(net, (32, 32))
- net = slim.conv2d(net, 32, [5, 5], scope='conv3_2')
- output_size = [height, width]
- net = tf.image.resize_nearest_neighbor(net, output_size)
- with slim.arg_scope([slim.conv2d], kernel_size=[3, 3]):
- net = slim.conv2d(net, channels, activation_fn=None, scope='conv4_1')
- return net
- def gtsrb_decoder(codes,
- height,
- width,
- channels,
- batch_norm_params=None,
- weight_decay=0.0):
- """Decodes the codes to a fixed output size. This decoder is specific to GTSRB
- Args:
- codes: a tensor of size [batch_size, 100].
- height: the height of the output images.
- width: the width of the output images.
- channels: the number of the output channels.
- batch_norm_params: a dictionary that maps batch norm parameter names to
- values.
- weight_decay: the value for the weight decay coefficient.
- Returns:
- recons: the reconstruction tensor of shape [batch_size, height, width, 3].
- Raises:
- ValueError: When the input code size is not 100.
- """
- batch_size, code_size = codes.get_shape().as_list()
- if code_size != 100:
- raise ValueError('The code size used as an input to the GTSRB decoder is '
- 'expected to be 100.')
- with slim.arg_scope(
- [slim.conv2d, slim.fully_connected],
- weights_regularizer=slim.l2_regularizer(weight_decay),
- activation_fn=tf.nn.relu,
- normalizer_fn=slim.batch_norm,
- normalizer_params=batch_norm_params):
- net = codes
- net = tf.reshape(net, [batch_size, 10, 10, 1])
- net = slim.conv2d(net, 32, [3, 3], scope='conv1_1')
- # First upsampling 20x20
- net = tf.image.resize_nearest_neighbor(net, [20, 20])
- net = slim.conv2d(net, 32, [3, 3], scope='conv2_1')
- output_size = [height, width]
- # Final upsampling 40 x 40
- net = tf.image.resize_nearest_neighbor(net, output_size)
- with slim.arg_scope([slim.conv2d], kernel_size=[3, 3]):
- net = slim.conv2d(net, 16, scope='conv3_1')
- net = slim.conv2d(net, channels, activation_fn=None, scope='conv3_2')
- return net
- def small_decoder(codes,
- height,
- width,
- channels,
- batch_norm_params=None,
- weight_decay=0.0):
- """Decodes the codes to a fixed output size.
- Args:
- codes: a tensor of size [batch_size, code_size].
- height: the height of the output images.
- width: the width of the output images.
- channels: the number of the output channels.
- batch_norm_params: a dictionary that maps batch norm parameter names to
- values.
- weight_decay: the value for the weight decay coefficient.
- Returns:
- recons: the reconstruction tensor of shape [batch_size, height, width, 3].
- """
- with slim.arg_scope(
- [slim.conv2d, slim.fully_connected],
- weights_regularizer=slim.l2_regularizer(weight_decay),
- activation_fn=tf.nn.relu,
- normalizer_fn=slim.batch_norm,
- normalizer_params=batch_norm_params):
- net = slim.fully_connected(codes, 300, scope='fc1')
- batch_size = net.get_shape().as_list()[0]
- net = tf.reshape(net, [batch_size, 10, 10, 3])
- net = slim.conv2d(net, 16, [3, 3], scope='conv1_1')
- net = slim.conv2d(net, 16, [3, 3], scope='conv1_2')
- output_size = [height, width]
- net = tf.image.resize_nearest_neighbor(net, output_size)
- with slim.arg_scope([slim.conv2d], kernel_size=[3, 3]):
- net = slim.conv2d(net, 16, scope='conv2_1')
- net = slim.conv2d(net, channels, activation_fn=None, scope='conv2_2')
- return net
- ################################################################################
- # SHARED ENCODERS
- ################################################################################
- def dann_mnist(images,
- weight_decay=0.0,
- prefix='model',
- num_classes=10,
- **kwargs):
- """Creates a convolution MNIST model.
- Note that this model implements the architecture for MNIST proposed in:
- Y. Ganin et al., Domain-Adversarial Training of Neural Networks (DANN),
- JMLR 2015
- Args:
- images: the MNIST digits, a tensor of size [batch_size, 28, 28, 1].
- weight_decay: the value for the weight decay coefficient.
- prefix: name of the model to use when prefixing tags.
- num_classes: the number of output classes to use.
- **kwargs: Placeholder for keyword arguments used by other shared encoders.
- Returns:
- the output logits, a tensor of size [batch_size, num_classes].
- a dictionary with key/values the layer names and tensors.
- """
- end_points = {}
- with slim.arg_scope(
- [slim.conv2d, slim.fully_connected],
- weights_regularizer=slim.l2_regularizer(weight_decay),
- activation_fn=tf.nn.relu,):
- with slim.arg_scope([slim.conv2d], padding='SAME'):
- end_points['conv1'] = slim.conv2d(images, 32, [5, 5], scope='conv1')
- end_points['pool1'] = slim.max_pool2d(
- end_points['conv1'], [2, 2], 2, scope='pool1')
- end_points['conv2'] = slim.conv2d(
- end_points['pool1'], 48, [5, 5], scope='conv2')
- end_points['pool2'] = slim.max_pool2d(
- end_points['conv2'], [2, 2], 2, scope='pool2')
- end_points['fc3'] = slim.fully_connected(
- slim.flatten(end_points['pool2']), 100, scope='fc3')
- end_points['fc4'] = slim.fully_connected(
- slim.flatten(end_points['fc3']), 100, scope='fc4')
- logits = slim.fully_connected(
- end_points['fc4'], num_classes, activation_fn=None, scope='fc5')
- return logits, end_points
- def dann_svhn(images,
- weight_decay=0.0,
- prefix='model',
- num_classes=10,
- **kwargs):
- """Creates the convolutional SVHN model.
- Note that this model implements the architecture for MNIST proposed in:
- Y. Ganin et al., Domain-Adversarial Training of Neural Networks (DANN),
- JMLR 2015
- Args:
- images: the SVHN digits, a tensor of size [batch_size, 32, 32, 3].
- weight_decay: the value for the weight decay coefficient.
- prefix: name of the model to use when prefixing tags.
- num_classes: the number of output classes to use.
- **kwargs: Placeholder for keyword arguments used by other shared encoders.
- Returns:
- the output logits, a tensor of size [batch_size, num_classes].
- a dictionary with key/values the layer names and tensors.
- """
- end_points = {}
- with slim.arg_scope(
- [slim.conv2d, slim.fully_connected],
- weights_regularizer=slim.l2_regularizer(weight_decay),
- activation_fn=tf.nn.relu,):
- with slim.arg_scope([slim.conv2d], padding='SAME'):
- end_points['conv1'] = slim.conv2d(images, 64, [5, 5], scope='conv1')
- end_points['pool1'] = slim.max_pool2d(
- end_points['conv1'], [3, 3], 2, scope='pool1')
- end_points['conv2'] = slim.conv2d(
- end_points['pool1'], 64, [5, 5], scope='conv2')
- end_points['pool2'] = slim.max_pool2d(
- end_points['conv2'], [3, 3], 2, scope='pool2')
- end_points['conv3'] = slim.conv2d(
- end_points['pool2'], 128, [5, 5], scope='conv3')
- end_points['fc3'] = slim.fully_connected(
- slim.flatten(end_points['conv3']), 3072, scope='fc3')
- end_points['fc4'] = slim.fully_connected(
- slim.flatten(end_points['fc3']), 2048, scope='fc4')
- logits = slim.fully_connected(
- end_points['fc4'], num_classes, activation_fn=None, scope='fc5')
- return logits, end_points
- def dann_gtsrb(images,
- weight_decay=0.0,
- prefix='model',
- num_classes=43,
- **kwargs):
- """Creates the convolutional GTSRB model.
- Note that this model implements the architecture for MNIST proposed in:
- Y. Ganin et al., Domain-Adversarial Training of Neural Networks (DANN),
- JMLR 2015
- Args:
- images: the GTSRB images, a tensor of size [batch_size, 40, 40, 3].
- weight_decay: the value for the weight decay coefficient.
- prefix: name of the model to use when prefixing tags.
- num_classes: the number of output classes to use.
- **kwargs: Placeholder for keyword arguments used by other shared encoders.
- Returns:
- the output logits, a tensor of size [batch_size, num_classes].
- a dictionary with key/values the layer names and tensors.
- """
- end_points = {}
- with slim.arg_scope(
- [slim.conv2d, slim.fully_connected],
- weights_regularizer=slim.l2_regularizer(weight_decay),
- activation_fn=tf.nn.relu,):
- with slim.arg_scope([slim.conv2d], padding='SAME'):
- end_points['conv1'] = slim.conv2d(images, 96, [5, 5], scope='conv1')
- end_points['pool1'] = slim.max_pool2d(
- end_points['conv1'], [2, 2], 2, scope='pool1')
- end_points['conv2'] = slim.conv2d(
- end_points['pool1'], 144, [3, 3], scope='conv2')
- end_points['pool2'] = slim.max_pool2d(
- end_points['conv2'], [2, 2], 2, scope='pool2')
- end_points['conv3'] = slim.conv2d(
- end_points['pool2'], 256, [5, 5], scope='conv3')
- end_points['pool3'] = slim.max_pool2d(
- end_points['conv3'], [2, 2], 2, scope='pool3')
- end_points['fc3'] = slim.fully_connected(
- slim.flatten(end_points['pool3']), 512, scope='fc3')
- logits = slim.fully_connected(
- end_points['fc3'], num_classes, activation_fn=None, scope='fc4')
- return logits, end_points
- def dsn_cropped_linemod(images,
- weight_decay=0.0,
- prefix='model',
- num_classes=11,
- batch_norm_params=None,
- is_training=False):
- """Creates the convolutional pose estimation model for Cropped Linemod.
- Args:
- images: the Cropped Linemod samples, a tensor of size
- [batch_size, 64, 64, 4].
- weight_decay: the value for the weight decay coefficient.
- prefix: name of the model to use when prefixing tags.
- num_classes: the number of output classes to use.
- batch_norm_params: a dictionary that maps batch norm parameter names to
- values.
- is_training: specifies whether or not we're currently training the model.
- This variable will determine the behaviour of the dropout layer.
- Returns:
- the output logits, a tensor of size [batch_size, num_classes].
- a dictionary with key/values the layer names and tensors.
- """
- end_points = {}
- tf.summary.image('{}/input_images'.format(prefix), images)
- with slim.arg_scope(
- [slim.conv2d, slim.fully_connected],
- weights_regularizer=slim.l2_regularizer(weight_decay),
- activation_fn=tf.nn.relu,
- normalizer_fn=slim.batch_norm if batch_norm_params else None,
- normalizer_params=batch_norm_params):
- with slim.arg_scope([slim.conv2d], padding='SAME'):
- end_points['conv1'] = slim.conv2d(images, 32, [5, 5], scope='conv1')
- end_points['pool1'] = slim.max_pool2d(
- end_points['conv1'], [2, 2], 2, scope='pool1')
- end_points['conv2'] = slim.conv2d(
- end_points['pool1'], 64, [5, 5], scope='conv2')
- end_points['pool2'] = slim.max_pool2d(
- end_points['conv2'], [2, 2], 2, scope='pool2')
- net = slim.flatten(end_points['pool2'])
- end_points['fc3'] = slim.fully_connected(net, 128, scope='fc3')
- net = slim.dropout(
- end_points['fc3'], 0.5, is_training=is_training, scope='dropout')
- with tf.variable_scope('quaternion_prediction'):
- predicted_quaternion = slim.fully_connected(
- net, 4, activation_fn=tf.nn.tanh)
- predicted_quaternion = tf.nn.l2_normalize(predicted_quaternion, 1)
- logits = slim.fully_connected(
- net, num_classes, activation_fn=None, scope='fc4')
- end_points['quaternion_pred'] = predicted_quaternion
- return logits, end_points
|