vgg.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """Contains model definitions for versions of the Oxford VGG network.
  16. These model definitions were introduced in the following technical report:
  17. Very Deep Convolutional Networks For Large-Scale Image Recognition
  18. Karen Simonyan and Andrew Zisserman
  19. arXiv technical report, 2015
  20. PDF: http://arxiv.org/pdf/1409.1556.pdf
  21. ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf
  22. CC-BY-4.0
  23. More information can be obtained from the VGG website:
  24. www.robots.ox.ac.uk/~vgg/research/very_deep/
  25. Usage:
  26. with slim.arg_scope(vgg.vgg_arg_scope()):
  27. outputs, end_points = vgg.vgg_a(inputs)
  28. with slim.arg_scope(vgg.vgg_arg_scope()):
  29. outputs, end_points = vgg.vgg_16(inputs)
  30. @@vgg_a
  31. @@vgg_16
  32. @@vgg_19
  33. """
  34. from __future__ import absolute_import
  35. from __future__ import division
  36. from __future__ import print_function
  37. import tensorflow as tf
  38. slim = tf.contrib.slim
  39. def vgg_arg_scope(weight_decay=0.0005):
  40. """Defines the VGG arg scope.
  41. Args:
  42. weight_decay: The l2 regularization coefficient.
  43. Returns:
  44. An arg_scope.
  45. """
  46. with slim.arg_scope([slim.conv2d, slim.fully_connected],
  47. activation_fn=tf.nn.relu,
  48. weights_regularizer=slim.l2_regularizer(weight_decay),
  49. biases_initializer=tf.zeros_initializer()):
  50. with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc:
  51. return arg_sc
  52. def vgg_a(inputs,
  53. num_classes=1000,
  54. is_training=True,
  55. dropout_keep_prob=0.5,
  56. spatial_squeeze=True,
  57. scope='vgg_a'):
  58. """Oxford Net VGG 11-Layers version A Example.
  59. Note: All the fully_connected layers have been transformed to conv2d layers.
  60. To use in classification mode, resize input to 224x224.
  61. Args:
  62. inputs: a tensor of size [batch_size, height, width, channels].
  63. num_classes: number of predicted classes.
  64. is_training: whether or not the model is being trained.
  65. dropout_keep_prob: the probability that activations are kept in the dropout
  66. layers during training.
  67. spatial_squeeze: whether or not should squeeze the spatial dimensions of the
  68. outputs. Useful to remove unnecessary dimensions for classification.
  69. scope: Optional scope for the variables.
  70. Returns:
  71. the last op containing the log predictions and end_points dict.
  72. """
  73. with tf.variable_scope(scope, 'vgg_a', [inputs]) as sc:
  74. end_points_collection = sc.name + '_end_points'
  75. # Collect outputs for conv2d, fully_connected and max_pool2d.
  76. with slim.arg_scope([slim.conv2d, slim.max_pool2d],
  77. outputs_collections=end_points_collection):
  78. net = slim.repeat(inputs, 1, slim.conv2d, 64, [3, 3], scope='conv1')
  79. net = slim.max_pool2d(net, [2, 2], scope='pool1')
  80. net = slim.repeat(net, 1, slim.conv2d, 128, [3, 3], scope='conv2')
  81. net = slim.max_pool2d(net, [2, 2], scope='pool2')
  82. net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3')
  83. net = slim.max_pool2d(net, [2, 2], scope='pool3')
  84. net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv4')
  85. net = slim.max_pool2d(net, [2, 2], scope='pool4')
  86. net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5')
  87. net = slim.max_pool2d(net, [2, 2], scope='pool5')
  88. # Use conv2d instead of fully_connected layers.
  89. net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
  90. net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
  91. scope='dropout6')
  92. net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
  93. net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
  94. scope='dropout7')
  95. net = slim.conv2d(net, num_classes, [1, 1],
  96. activation_fn=None,
  97. normalizer_fn=None,
  98. scope='fc8')
  99. # Convert end_points_collection into a end_point dict.
  100. end_points = slim.utils.convert_collection_to_dict(end_points_collection)
  101. if spatial_squeeze:
  102. net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
  103. end_points[sc.name + '/fc8'] = net
  104. return net, end_points
  105. vgg_a.default_image_size = 224
  106. def vgg_16(inputs,
  107. num_classes=1000,
  108. is_training=True,
  109. dropout_keep_prob=0.5,
  110. spatial_squeeze=True,
  111. scope='vgg_16'):
  112. """Oxford Net VGG 16-Layers version D Example.
  113. Note: All the fully_connected layers have been transformed to conv2d layers.
  114. To use in classification mode, resize input to 224x224.
  115. Args:
  116. inputs: a tensor of size [batch_size, height, width, channels].
  117. num_classes: number of predicted classes.
  118. is_training: whether or not the model is being trained.
  119. dropout_keep_prob: the probability that activations are kept in the dropout
  120. layers during training.
  121. spatial_squeeze: whether or not should squeeze the spatial dimensions of the
  122. outputs. Useful to remove unnecessary dimensions for classification.
  123. scope: Optional scope for the variables.
  124. Returns:
  125. the last op containing the log predictions and end_points dict.
  126. """
  127. with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc:
  128. end_points_collection = sc.name + '_end_points'
  129. # Collect outputs for conv2d, fully_connected and max_pool2d.
  130. with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
  131. outputs_collections=end_points_collection):
  132. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
  133. net = slim.max_pool2d(net, [2, 2], scope='pool1')
  134. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
  135. net = slim.max_pool2d(net, [2, 2], scope='pool2')
  136. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
  137. net = slim.max_pool2d(net, [2, 2], scope='pool3')
  138. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
  139. net = slim.max_pool2d(net, [2, 2], scope='pool4')
  140. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
  141. net = slim.max_pool2d(net, [2, 2], scope='pool5')
  142. # Use conv2d instead of fully_connected layers.
  143. net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
  144. net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
  145. scope='dropout6')
  146. net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
  147. net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
  148. scope='dropout7')
  149. net = slim.conv2d(net, num_classes, [1, 1],
  150. activation_fn=None,
  151. normalizer_fn=None,
  152. scope='fc8')
  153. # Convert end_points_collection into a end_point dict.
  154. end_points = slim.utils.convert_collection_to_dict(end_points_collection)
  155. if spatial_squeeze:
  156. net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
  157. end_points[sc.name + '/fc8'] = net
  158. return net, end_points
  159. vgg_16.default_image_size = 224
  160. def vgg_19(inputs,
  161. num_classes=1000,
  162. is_training=True,
  163. dropout_keep_prob=0.5,
  164. spatial_squeeze=True,
  165. scope='vgg_19'):
  166. """Oxford Net VGG 19-Layers version E Example.
  167. Note: All the fully_connected layers have been transformed to conv2d layers.
  168. To use in classification mode, resize input to 224x224.
  169. Args:
  170. inputs: a tensor of size [batch_size, height, width, channels].
  171. num_classes: number of predicted classes.
  172. is_training: whether or not the model is being trained.
  173. dropout_keep_prob: the probability that activations are kept in the dropout
  174. layers during training.
  175. spatial_squeeze: whether or not should squeeze the spatial dimensions of the
  176. outputs. Useful to remove unnecessary dimensions for classification.
  177. scope: Optional scope for the variables.
  178. Returns:
  179. the last op containing the log predictions and end_points dict.
  180. """
  181. with tf.variable_scope(scope, 'vgg_19', [inputs]) as sc:
  182. end_points_collection = sc.name + '_end_points'
  183. # Collect outputs for conv2d, fully_connected and max_pool2d.
  184. with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
  185. outputs_collections=end_points_collection):
  186. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
  187. net = slim.max_pool2d(net, [2, 2], scope='pool1')
  188. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
  189. net = slim.max_pool2d(net, [2, 2], scope='pool2')
  190. net = slim.repeat(net, 4, slim.conv2d, 256, [3, 3], scope='conv3')
  191. net = slim.max_pool2d(net, [2, 2], scope='pool3')
  192. net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv4')
  193. net = slim.max_pool2d(net, [2, 2], scope='pool4')
  194. net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv5')
  195. net = slim.max_pool2d(net, [2, 2], scope='pool5')
  196. # Use conv2d instead of fully_connected layers.
  197. net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
  198. net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
  199. scope='dropout6')
  200. net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
  201. net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
  202. scope='dropout7')
  203. net = slim.conv2d(net, num_classes, [1, 1],
  204. activation_fn=None,
  205. normalizer_fn=None,
  206. scope='fc8')
  207. # Convert end_points_collection into a end_point dict.
  208. end_points = slim.utils.convert_collection_to_dict(end_points_collection)
  209. if spatial_squeeze:
  210. net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
  211. end_points[sc.name + '/fc8'] = net
  212. return net, end_points
  213. vgg_19.default_image_size = 224
  214. # Alias
  215. vgg_d = vgg_16
  216. vgg_e = vgg_19