123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247 |
- # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ==============================================================================
- """Timing benchmark for AlexNet inference.
- To run, use:
- bazel run -c opt --config=cuda \
- models/tutorials/image/alexnet:alexnet_benchmark
- Across 100 steps on batch size = 128.
- Forward pass:
- Run on Tesla K40c: 145 +/- 1.5 ms / batch
- Run on Titan X: 70 +/- 0.1 ms / batch
- Forward-backward pass:
- Run on Tesla K40c: 480 +/- 48 ms / batch
- Run on Titan X: 244 +/- 30 ms / batch
- """
- from __future__ import absolute_import
- from __future__ import division
- from __future__ import print_function
- import argparse
- from datetime import datetime
- import math
- import sys
- import time
- from six.moves import xrange # pylint: disable=redefined-builtin
- import tensorflow as tf
- FLAGS = None
- def print_activations(t):
- print(t.op.name, ' ', t.get_shape().as_list())
- def inference(images):
- """Build the AlexNet model.
- Args:
- images: Images Tensor
- Returns:
- pool5: the last Tensor in the convolutional component of AlexNet.
- parameters: a list of Tensors corresponding to the weights and biases of the
- AlexNet model.
- """
- parameters = []
- # conv1
- with tf.name_scope('conv1') as scope:
- kernel = tf.Variable(tf.truncated_normal([11, 11, 3, 64], dtype=tf.float32,
- stddev=1e-1), name='weights')
- conv = tf.nn.conv2d(images, kernel, [1, 4, 4, 1], padding='SAME')
- biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32),
- trainable=True, name='biases')
- bias = tf.nn.bias_add(conv, biases)
- conv1 = tf.nn.relu(bias, name=scope)
- print_activations(conv1)
- parameters += [kernel, biases]
- # lrn1
- # TODO(shlens, jiayq): Add a GPU version of local response normalization.
- # pool1
- pool1 = tf.nn.max_pool(conv1,
- ksize=[1, 3, 3, 1],
- strides=[1, 2, 2, 1],
- padding='VALID',
- name='pool1')
- print_activations(pool1)
- # conv2
- with tf.name_scope('conv2') as scope:
- kernel = tf.Variable(tf.truncated_normal([5, 5, 64, 192], dtype=tf.float32,
- stddev=1e-1), name='weights')
- conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding='SAME')
- biases = tf.Variable(tf.constant(0.0, shape=[192], dtype=tf.float32),
- trainable=True, name='biases')
- bias = tf.nn.bias_add(conv, biases)
- conv2 = tf.nn.relu(bias, name=scope)
- parameters += [kernel, biases]
- print_activations(conv2)
- # pool2
- pool2 = tf.nn.max_pool(conv2,
- ksize=[1, 3, 3, 1],
- strides=[1, 2, 2, 1],
- padding='VALID',
- name='pool2')
- print_activations(pool2)
- # conv3
- with tf.name_scope('conv3') as scope:
- kernel = tf.Variable(tf.truncated_normal([3, 3, 192, 384],
- dtype=tf.float32,
- stddev=1e-1), name='weights')
- conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME')
- biases = tf.Variable(tf.constant(0.0, shape=[384], dtype=tf.float32),
- trainable=True, name='biases')
- bias = tf.nn.bias_add(conv, biases)
- conv3 = tf.nn.relu(bias, name=scope)
- parameters += [kernel, biases]
- print_activations(conv3)
- # conv4
- with tf.name_scope('conv4') as scope:
- kernel = tf.Variable(tf.truncated_normal([3, 3, 384, 256],
- dtype=tf.float32,
- stddev=1e-1), name='weights')
- conv = tf.nn.conv2d(conv3, kernel, [1, 1, 1, 1], padding='SAME')
- biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
- trainable=True, name='biases')
- bias = tf.nn.bias_add(conv, biases)
- conv4 = tf.nn.relu(bias, name=scope)
- parameters += [kernel, biases]
- print_activations(conv4)
- # conv5
- with tf.name_scope('conv5') as scope:
- kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256],
- dtype=tf.float32,
- stddev=1e-1), name='weights')
- conv = tf.nn.conv2d(conv4, kernel, [1, 1, 1, 1], padding='SAME')
- biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
- trainable=True, name='biases')
- bias = tf.nn.bias_add(conv, biases)
- conv5 = tf.nn.relu(bias, name=scope)
- parameters += [kernel, biases]
- print_activations(conv5)
- # pool5
- pool5 = tf.nn.max_pool(conv5,
- ksize=[1, 3, 3, 1],
- strides=[1, 2, 2, 1],
- padding='VALID',
- name='pool5')
- print_activations(pool5)
- return pool5, parameters
- def time_tensorflow_run(session, target, info_string):
- """Run the computation to obtain the target tensor and print timing stats.
- Args:
- session: the TensorFlow session to run the computation under.
- target: the target Tensor that is passed to the session's run() function.
- info_string: a string summarizing this run, to be printed with the stats.
- Returns:
- None
- """
- num_steps_burn_in = 10
- total_duration = 0.0
- total_duration_squared = 0.0
- for i in xrange(FLAGS.num_batches + num_steps_burn_in):
- start_time = time.time()
- _ = session.run(target)
- duration = time.time() - start_time
- if i >= num_steps_burn_in:
- if not i % 10:
- print ('%s: step %d, duration = %.3f' %
- (datetime.now(), i - num_steps_burn_in, duration))
- total_duration += duration
- total_duration_squared += duration * duration
- mn = total_duration / FLAGS.num_batches
- vr = total_duration_squared / FLAGS.num_batches - mn * mn
- sd = math.sqrt(vr)
- print ('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
- (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
- def run_benchmark():
- """Run the benchmark on AlexNet."""
- with tf.Graph().as_default():
- # Generate some dummy images.
- image_size = 224
- # Note that our padding definition is slightly different the cuda-convnet.
- # In order to force the model to start with the same activations sizes,
- # we add 3 to the image_size and employ VALID padding above.
- images = tf.Variable(tf.random_normal([FLAGS.batch_size,
- image_size,
- image_size, 3],
- dtype=tf.float32,
- stddev=1e-1))
- # Build a Graph that computes the logits predictions from the
- # inference model.
- pool5, parameters = inference(images)
- # Build an initialization operation.
- init = tf.global_variables_initializer()
- # Start running operations on the Graph.
- config = tf.ConfigProto()
- config.gpu_options.allocator_type = 'BFC'
- sess = tf.Session(config=config)
- sess.run(init)
- # Run the forward benchmark.
- time_tensorflow_run(sess, pool5, "Forward")
- # Add a simple objective so we can calculate the backward pass.
- objective = tf.nn.l2_loss(pool5)
- # Compute the gradient with respect to all the parameters.
- grad = tf.gradients(objective, parameters)
- # Run the backward benchmark.
- time_tensorflow_run(sess, grad, "Forward-backward")
- def main(_):
- run_benchmark()
- if __name__ == '__main__':
- parser = argparse.ArgumentParser()
- parser.add_argument(
- '--batch_size',
- type=int,
- default=128,
- help='Batch size.'
- )
- parser.add_argument(
- '--num_batches',
- type=int,
- default=100,
- help='Number of batches to run.'
- )
- FLAGS, unparsed = parser.parse_known_args()
- tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
|