multigpu_cnn.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. ''' Multi-GPU Training Example.
  2. Train a convolutional neural network on multiple GPU with TensorFlow.
  3. This example is using TensorFlow layers, see 'convolutional_network_raw' example
  4. for a raw TensorFlow implementation with variables.
  5. This example is using the MNIST database of handwritten digits
  6. (http://yann.lecun.com/exdb/mnist/)
  7. Author: Aymeric Damien
  8. Project: https://github.com/aymericdamien/TensorFlow-Examples/
  9. '''
  10. from __future__ import division, print_function, absolute_import
  11. import numpy as np
  12. import tensorflow as tf
  13. import time
  14. # Import MNIST data
  15. from tensorflow.examples.tutorials.mnist import input_data
  16. mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
  17. # Training Parameters
  18. num_gpus = 2
  19. num_steps = 200
  20. learning_rate = 0.001
  21. batch_size = 1024
  22. display_step = 10
  23. # Network Parameters
  24. num_input = 784 # MNIST data input (img shape: 28*28)
  25. num_classes = 10 # MNIST total classes (0-9 digits)
  26. dropout = 0.75 # Dropout, probability to keep units
  27. # Build a convolutional neural network
  28. def conv_net(x, n_classes, dropout, reuse, is_training):
  29. # Define a scope for reusing the variables
  30. with tf.variable_scope('ConvNet', reuse=reuse):
  31. # MNIST data input is a 1-D vector of 784 features (28*28 pixels)
  32. # Reshape to match picture format [Height x Width x Channel]
  33. # Tensor input become 4-D: [Batch Size, Height, Width, Channel]
  34. x = tf.reshape(x, shape=[-1, 28, 28, 1])
  35. # Convolution Layer with 64 filters and a kernel size of 5
  36. x = tf.layers.conv2d(x, 64, 5, activation=tf.nn.relu)
  37. # Max Pooling (down-sampling) with strides of 2 and kernel size of 2
  38. x = tf.layers.max_pooling2d(x, 2, 2)
  39. # Convolution Layer with 256 filters and a kernel size of 5
  40. x = tf.layers.conv2d(x, 256, 3, activation=tf.nn.relu)
  41. # Convolution Layer with 512 filters and a kernel size of 5
  42. x = tf.layers.conv2d(x, 512, 3, activation=tf.nn.relu)
  43. # Max Pooling (down-sampling) with strides of 2 and kernel size of 2
  44. x = tf.layers.max_pooling2d(x, 2, 2)
  45. # Flatten the data to a 1-D vector for the fully connected layer
  46. x = tf.contrib.layers.flatten(x)
  47. # Fully connected layer (in contrib folder for now)
  48. x = tf.layers.dense(x, 2048)
  49. # Apply Dropout (if is_training is False, dropout is not applied)
  50. x = tf.layers.dropout(x, rate=dropout, training=is_training)
  51. # Fully connected layer (in contrib folder for now)
  52. x = tf.layers.dense(x, 1024)
  53. # Apply Dropout (if is_training is False, dropout is not applied)
  54. x = tf.layers.dropout(x, rate=dropout, training=is_training)
  55. # Output layer, class prediction
  56. out = tf.layers.dense(x, n_classes)
  57. # Because 'softmax_cross_entropy_with_logits' loss already apply
  58. # softmax, we only apply softmax to testing network
  59. out = tf.nn.softmax(out) if not is_training else out
  60. return out
  61. def average_gradients(tower_grads):
  62. average_grads = []
  63. for grad_and_vars in zip(*tower_grads):
  64. # Note that each grad_and_vars looks like the following:
  65. # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
  66. grads = []
  67. for g, _ in grad_and_vars:
  68. # Add 0 dimension to the gradients to represent the tower.
  69. expanded_g = tf.expand_dims(g, 0)
  70. # Append on a 'tower' dimension which we will average over below.
  71. grads.append(expanded_g)
  72. # Average over the 'tower' dimension.
  73. grad = tf.concat(grads, 0)
  74. grad = tf.reduce_mean(grad, 0)
  75. # Keep in mind that the Variables are redundant because they are shared
  76. # across towers. So .. we will just return the first tower's pointer to
  77. # the Variable.
  78. v = grad_and_vars[0][1]
  79. grad_and_var = (grad, v)
  80. average_grads.append(grad_and_var)
  81. return average_grads
  82. # By default, all variables will be placed on '/gpu:0'
  83. # So we need a custom device function, to assign all variables to '/cpu:0'
  84. # Note: If GPUs are peered, '/gpu:0' can be a faster option
  85. PS_OPS = ['Variable', 'VariableV2', 'AutoReloadVariable']
  86. def assign_to_device(device, ps_device='/cpu:0'):
  87. def _assign(op):
  88. node_def = op if isinstance(op, tf.NodeDef) else op.node_def
  89. if node_def.op in PS_OPS:
  90. return "/" + ps_device
  91. else:
  92. return device
  93. return _assign
  94. # Place all ops on CPU by default
  95. with tf.device('/cpu:0'):
  96. tower_grads = []
  97. reuse_vars = False
  98. # tf Graph input
  99. X = tf.placeholder(tf.float32, [None, num_input])
  100. Y = tf.placeholder(tf.float32, [None, num_classes])
  101. # Loop over all GPUs and construct their own computation graph
  102. for i in range(num_gpus):
  103. with tf.device(assign_to_device('/gpu:{}'.format(i), ps_device='/cpu:0')):
  104. # Split data between GPUs
  105. _x = X[i * batch_size: (i+1) * batch_size]
  106. _y = Y[i * batch_size: (i+1) * batch_size]
  107. # Because Dropout have different behavior at training and prediction time, we
  108. # need to create 2 distinct computation graphs that share the same weights.
  109. # Create a graph for training
  110. logits_train = conv_net(_x, num_classes, dropout,
  111. reuse=reuse_vars, is_training=True)
  112. # Create another graph for testing that reuse the same weights
  113. logits_test = conv_net(_x, num_classes, dropout,
  114. reuse=True, is_training=False)
  115. # Define loss and optimizer (with train logits, for dropout to take effect)
  116. loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
  117. logits=logits_train, labels=_y))
  118. optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
  119. grads = optimizer.compute_gradients(loss_op)
  120. # Only first GPU compute accuracy
  121. if i == 0:
  122. # Evaluate model (with test logits, for dropout to be disabled)
  123. correct_pred = tf.equal(tf.argmax(logits_test, 1), tf.argmax(_y, 1))
  124. accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
  125. reuse_vars = True
  126. tower_grads.append(grads)
  127. tower_grads = average_gradients(tower_grads)
  128. train_op = optimizer.apply_gradients(tower_grads)
  129. # Initialize the variables (i.e. assign their default value)
  130. init = tf.global_variables_initializer()
  131. # Start Training
  132. with tf.Session() as sess:
  133. # Run the initializer
  134. sess.run(init)
  135. # Keep training until reach max iterations
  136. for step in range(1, num_steps + 1):
  137. # Get a batch for each GPU
  138. batch_x, batch_y = mnist.train.next_batch(batch_size * num_gpus)
  139. # Run optimization op (backprop)
  140. ts = time.time()
  141. sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
  142. te = time.time() - ts
  143. if step % display_step == 0 or step == 1:
  144. # Calculate batch loss and accuracy
  145. loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
  146. Y: batch_y})
  147. print("Step " + str(step) + ": Minibatch Loss= " + \
  148. "{:.4f}".format(loss) + ", Training Accuracy= " + \
  149. "{:.3f}".format(acc) + ", %i Examples/sec" % int(len(batch_x)/te))
  150. step += 1
  151. print("Optimization Finished!")
  152. # Calculate accuracy for MNIST test images
  153. print("Testing Accuracy:", \
  154. np.mean([sess.run(accuracy, feed_dict={X: mnist.test.images[i:i+batch_size],
  155. Y: mnist.test.labels[i:i+batch_size]}) for i in range(0, len(mnist.test.images), batch_size)]))