alexnet_benchmark.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """Timing benchmark for AlexNet inference.
  16. To run, use:
  17. bazel run -c opt --config=cuda \
  18. models/tutorials/image/alexnet:alexnet_benchmark
  19. Across 100 steps on batch size = 128.
  20. Forward pass:
  21. Run on Tesla K40c: 145 +/- 1.5 ms / batch
  22. Run on Titan X: 70 +/- 0.1 ms / batch
  23. Forward-backward pass:
  24. Run on Tesla K40c: 480 +/- 48 ms / batch
  25. Run on Titan X: 244 +/- 30 ms / batch
  26. """
  27. from __future__ import absolute_import
  28. from __future__ import division
  29. from __future__ import print_function
  30. import argparse
  31. from datetime import datetime
  32. import math
  33. import sys
  34. import time
  35. from six.moves import xrange # pylint: disable=redefined-builtin
  36. import tensorflow as tf
  37. FLAGS = None
  38. def print_activations(t):
  39. print(t.op.name, ' ', t.get_shape().as_list())
  40. def inference(images):
  41. """Build the AlexNet model.
  42. Args:
  43. images: Images Tensor
  44. Returns:
  45. pool5: the last Tensor in the convolutional component of AlexNet.
  46. parameters: a list of Tensors corresponding to the weights and biases of the
  47. AlexNet model.
  48. """
  49. parameters = []
  50. # conv1
  51. with tf.name_scope('conv1') as scope:
  52. kernel = tf.Variable(tf.truncated_normal([11, 11, 3, 64], dtype=tf.float32,
  53. stddev=1e-1), name='weights')
  54. conv = tf.nn.conv2d(images, kernel, [1, 4, 4, 1], padding='SAME')
  55. biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32),
  56. trainable=True, name='biases')
  57. bias = tf.nn.bias_add(conv, biases)
  58. conv1 = tf.nn.relu(bias, name=scope)
  59. print_activations(conv1)
  60. parameters += [kernel, biases]
  61. # lrn1
  62. # TODO(shlens, jiayq): Add a GPU version of local response normalization.
  63. # pool1
  64. pool1 = tf.nn.max_pool(conv1,
  65. ksize=[1, 3, 3, 1],
  66. strides=[1, 2, 2, 1],
  67. padding='VALID',
  68. name='pool1')
  69. print_activations(pool1)
  70. # conv2
  71. with tf.name_scope('conv2') as scope:
  72. kernel = tf.Variable(tf.truncated_normal([5, 5, 64, 192], dtype=tf.float32,
  73. stddev=1e-1), name='weights')
  74. conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding='SAME')
  75. biases = tf.Variable(tf.constant(0.0, shape=[192], dtype=tf.float32),
  76. trainable=True, name='biases')
  77. bias = tf.nn.bias_add(conv, biases)
  78. conv2 = tf.nn.relu(bias, name=scope)
  79. parameters += [kernel, biases]
  80. print_activations(conv2)
  81. # pool2
  82. pool2 = tf.nn.max_pool(conv2,
  83. ksize=[1, 3, 3, 1],
  84. strides=[1, 2, 2, 1],
  85. padding='VALID',
  86. name='pool2')
  87. print_activations(pool2)
  88. # conv3
  89. with tf.name_scope('conv3') as scope:
  90. kernel = tf.Variable(tf.truncated_normal([3, 3, 192, 384],
  91. dtype=tf.float32,
  92. stddev=1e-1), name='weights')
  93. conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME')
  94. biases = tf.Variable(tf.constant(0.0, shape=[384], dtype=tf.float32),
  95. trainable=True, name='biases')
  96. bias = tf.nn.bias_add(conv, biases)
  97. conv3 = tf.nn.relu(bias, name=scope)
  98. parameters += [kernel, biases]
  99. print_activations(conv3)
  100. # conv4
  101. with tf.name_scope('conv4') as scope:
  102. kernel = tf.Variable(tf.truncated_normal([3, 3, 384, 256],
  103. dtype=tf.float32,
  104. stddev=1e-1), name='weights')
  105. conv = tf.nn.conv2d(conv3, kernel, [1, 1, 1, 1], padding='SAME')
  106. biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
  107. trainable=True, name='biases')
  108. bias = tf.nn.bias_add(conv, biases)
  109. conv4 = tf.nn.relu(bias, name=scope)
  110. parameters += [kernel, biases]
  111. print_activations(conv4)
  112. # conv5
  113. with tf.name_scope('conv5') as scope:
  114. kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256],
  115. dtype=tf.float32,
  116. stddev=1e-1), name='weights')
  117. conv = tf.nn.conv2d(conv4, kernel, [1, 1, 1, 1], padding='SAME')
  118. biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
  119. trainable=True, name='biases')
  120. bias = tf.nn.bias_add(conv, biases)
  121. conv5 = tf.nn.relu(bias, name=scope)
  122. parameters += [kernel, biases]
  123. print_activations(conv5)
  124. # pool5
  125. pool5 = tf.nn.max_pool(conv5,
  126. ksize=[1, 3, 3, 1],
  127. strides=[1, 2, 2, 1],
  128. padding='VALID',
  129. name='pool5')
  130. print_activations(pool5)
  131. return pool5, parameters
  132. def time_tensorflow_run(session, target, info_string):
  133. """Run the computation to obtain the target tensor and print timing stats.
  134. Args:
  135. session: the TensorFlow session to run the computation under.
  136. target: the target Tensor that is passed to the session's run() function.
  137. info_string: a string summarizing this run, to be printed with the stats.
  138. Returns:
  139. None
  140. """
  141. num_steps_burn_in = 10
  142. total_duration = 0.0
  143. total_duration_squared = 0.0
  144. for i in xrange(FLAGS.num_batches + num_steps_burn_in):
  145. start_time = time.time()
  146. _ = session.run(target)
  147. duration = time.time() - start_time
  148. if i >= num_steps_burn_in:
  149. if not i % 10:
  150. print ('%s: step %d, duration = %.3f' %
  151. (datetime.now(), i - num_steps_burn_in, duration))
  152. total_duration += duration
  153. total_duration_squared += duration * duration
  154. mn = total_duration / FLAGS.num_batches
  155. vr = total_duration_squared / FLAGS.num_batches - mn * mn
  156. sd = math.sqrt(vr)
  157. print ('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
  158. (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
  159. def run_benchmark():
  160. """Run the benchmark on AlexNet."""
  161. with tf.Graph().as_default():
  162. # Generate some dummy images.
  163. image_size = 224
  164. # Note that our padding definition is slightly different the cuda-convnet.
  165. # In order to force the model to start with the same activations sizes,
  166. # we add 3 to the image_size and employ VALID padding above.
  167. images = tf.Variable(tf.random_normal([FLAGS.batch_size,
  168. image_size,
  169. image_size, 3],
  170. dtype=tf.float32,
  171. stddev=1e-1))
  172. # Build a Graph that computes the logits predictions from the
  173. # inference model.
  174. pool5, parameters = inference(images)
  175. # Build an initialization operation.
  176. init = tf.global_variables_initializer()
  177. # Start running operations on the Graph.
  178. config = tf.ConfigProto()
  179. config.gpu_options.allocator_type = 'BFC'
  180. sess = tf.Session(config=config)
  181. sess.run(init)
  182. # Run the forward benchmark.
  183. time_tensorflow_run(sess, pool5, "Forward")
  184. # Add a simple objective so we can calculate the backward pass.
  185. objective = tf.nn.l2_loss(pool5)
  186. # Compute the gradient with respect to all the parameters.
  187. grad = tf.gradients(objective, parameters)
  188. # Run the backward benchmark.
  189. time_tensorflow_run(sess, grad, "Forward-backward")
  190. def main(_):
  191. run_benchmark()
  192. if __name__ == '__main__':
  193. parser = argparse.ArgumentParser()
  194. parser.add_argument(
  195. '--batch_size',
  196. type=int,
  197. default=128,
  198. help='Batch size.'
  199. )
  200. parser.add_argument(
  201. '--num_batches',
  202. type=int,
  203. default=100,
  204. help='Number of batches to run.'
  205. )
  206. FLAGS, unparsed = parser.parse_known_args()
  207. tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)