multigpu_cnn2.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. ''' Multi-GPU Training Example.
  2. Train a convolutional neural network on multiple GPU with TensorFlow.
  3. Note: Unlike previous examples, we are using TensorFlow Slim API instead of
  4. TensorFlow layers API, mainly because it is easier to set variables on CPU
  5. using Slim. But TF and Slim layers are very similar.
  6. This example is using the MNIST database of handwritten digits
  7. (http://yann.lecun.com/exdb/mnist/)
  8. Author: Aymeric Damien
  9. Project: https://github.com/aymericdamien/TensorFlow-Examples/
  10. '''
  11. from __future__ import division, print_function, absolute_import
  12. import numpy as np
  13. import tensorflow as tf
  14. import tensorflow.contrib.slim as slim
  15. import time
  16. # Import MNIST data
  17. from tensorflow.examples.tutorials.mnist import input_data
  18. mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
  19. # Training Parameters
  20. num_gpus = 1
  21. num_steps = 200
  22. learning_rate = 0.001
  23. batch_size = 1024
  24. display_step = 10
  25. # Network Parameters
  26. num_input = 784 # MNIST data input (img shape: 28*28)
  27. num_classes = 10 # MNIST total classes (0-9 digits)
  28. dropout = 1. # Dropout, probability to keep units
  29. # Build a convolutional neural network
  30. def conv_net(x, n_classes, dropout, reuse, is_training):
  31. # Define a scope for reusing the variables
  32. with tf.variable_scope('ConvNet', reuse=reuse):
  33. # MNIST data input is a 1-D vector of 784 features (28*28 pixels)
  34. # Reshape to match picture format [Height x Width x Channel]
  35. # Tensor input become 4-D: [Batch Size, Height, Width, Channel]
  36. x = tf.reshape(x, shape=[-1, 28, 28, 1])
  37. # Convolution Layer with 64 filters and a kernel size of 5
  38. x = slim.conv2d(x, 64, 5, activation_fn=tf.nn.relu)
  39. # Max Pooling (down-sampling) with strides of 2 and kernel size of 2
  40. x = slim.max_pool2d(x, 2, 2)
  41. # Convolution Layer with 256 filters and a kernel size of 5
  42. x = slim.conv2d(x, 256, 3, activation_fn=tf.nn.relu)
  43. # Convolution Layer with 512 filters and a kernel size of 5
  44. x = slim.conv2d(x, 512, 3, activation_fn=tf.nn.relu)
  45. # Max Pooling (down-sampling) with strides of 2 and kernel size of 2
  46. x = slim.max_pool2d(x, 2, 2)
  47. # Flatten the data to a 1-D vector for the fully connected layer
  48. x = slim.flatten(x)
  49. # Fully connected layer (in contrib folder for now)
  50. x = slim.fully_connected(x, 2048, activation_fn=tf.nn.relu)
  51. # Apply Dropout (if is_training is False, dropout is not applied)
  52. x = slim.dropout(x, keep_prob=dropout, is_training=is_training)
  53. # Fully connected layer (in contrib folder for now)
  54. x = slim.fully_connected(x, 1024, activation_fn=tf.nn.relu)
  55. # Apply Dropout (if is_training is False, dropout is not applied)
  56. x = slim.dropout(x, keep_prob=dropout, is_training=is_training)
  57. # Output layer, class prediction, linear activation
  58. out = slim.fully_connected(x, n_classes, activation_fn=lambda x: x)
  59. # Because 'softmax_cross_entropy_with_logits' loss already apply
  60. # softmax, we only apply softmax to testing network
  61. out = tf.nn.softmax(out) if not is_training else out
  62. return out
  63. def average_gradients(tower_grads):
  64. average_grads = []
  65. for grad_and_vars in zip(*tower_grads):
  66. # Note that each grad_and_vars looks like the following:
  67. # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
  68. grads = []
  69. for g, _ in grad_and_vars:
  70. # Add 0 dimension to the gradients to represent the tower.
  71. expanded_g = tf.expand_dims(g, 0)
  72. # Append on a 'tower' dimension which we will average over below.
  73. grads.append(expanded_g)
  74. # Average over the 'tower' dimension.
  75. grad = tf.concat(grads, 0)
  76. grad = tf.reduce_mean(grad, 0)
  77. # Keep in mind that the Variables are redundant because they are shared
  78. # across towers. So .. we will just return the first tower's pointer to
  79. # the Variable.
  80. v = grad_and_vars[0][1]
  81. grad_and_var = (grad, v)
  82. average_grads.append(grad_and_var)
  83. return average_grads
  84. # Place all ops on CPU by default
  85. with tf.device('/cpu:0'):
  86. tower_grads = []
  87. reuse_vars = False
  88. # tf Graph input
  89. X = tf.placeholder(tf.float32, [None, num_input])
  90. Y = tf.placeholder(tf.float32, [None, num_classes])
  91. # Loop over all GPUs and construct their own computation graph
  92. for i in range(num_gpus):
  93. with tf.device('/gpu:%d' % i):
  94. # Split data between GPUs
  95. _x = X[i * batch_size: (i+1) * batch_size]
  96. _y = Y[i * batch_size: (i+1) * batch_size]
  97. # Because Dropout have different behavior at training and prediction time, we
  98. # need to create 2 distinct computation graphs that share the same weights.
  99. # We need to set all layer variables on cpu0
  100. # (otherwise it would assign them to gpu0 by default)
  101. with slim.arg_scope([slim.model_variable, slim.variable], device='/cpu:0'):
  102. # Create a graph for training
  103. logits_train = conv_net(_x, num_classes, dropout,
  104. reuse=reuse_vars, is_training=True)
  105. # Create another graph for testing that reuse the same weights
  106. logits_test = conv_net(_x, num_classes, dropout,
  107. reuse=True, is_training=False)
  108. # Define loss and optimizer (with train logits, for dropout to take effect)
  109. loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
  110. logits=logits_train, labels=_y))
  111. optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
  112. grads = optimizer.compute_gradients(loss_op)
  113. # Only first GPU compute accuracy
  114. if i == 0:
  115. # Evaluate model (with test logits, for dropout to be disabled)
  116. correct_pred = tf.equal(tf.argmax(logits_test, 1), tf.argmax(_y, 1))
  117. accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
  118. reuse_vars = True
  119. tower_grads.append(grads)
  120. tower_grads = average_gradients(tower_grads)
  121. train_op = optimizer.apply_gradients(tower_grads)
  122. # Initialize the variables (i.e. assign their default value)
  123. init = tf.global_variables_initializer()
  124. # Start Training
  125. with tf.Session() as sess:
  126. # Run the initializer
  127. sess.run(init)
  128. # Keep training until reach max iterations
  129. for step in range(1, num_steps + 1):
  130. # Get a batch for each GPU
  131. batch_x, batch_y = mnist.train.next_batch(batch_size * num_gpus)
  132. # Run optimization op (backprop)
  133. ts = time.time()
  134. sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
  135. te = time.time() - ts
  136. if step % display_step == 0 or step == 1:
  137. # Calculate batch loss and accuracy
  138. loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
  139. Y: batch_y})
  140. print("Step " + str(step) + ": Minibatch Loss= " + \
  141. "{:.4f}".format(loss) + ", Training Accuracy= " + \
  142. "{:.3f}".format(acc) + ", %i Examples/sec" % int(len(batch_x)/te))
  143. step += 1
  144. print("Optimization Finished!")
  145. # Calculate accuracy for MNIST test images
  146. print("Testing Accuracy:", \
  147. np.mean([sess.run(accuracy, feed_dict={X: mnist.test.images[i:i+batch_size],
  148. Y: mnist.test.labels[i:i+batch_size]}) for i in range(0, len(mnist.test.images), batch_size)]))