cluttered_mnist.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # =============================================================================
  15. import tensorflow as tf
  16. from spatial_transformer import transformer
  17. import numpy as np
  18. from tf_utils import weight_variable, bias_variable, dense_to_one_hot
  19. # %% Load data
  20. mnist_cluttered = np.load('./data/mnist_sequence1_sample_5distortions5x5.npz')
  21. X_train = mnist_cluttered['X_train']
  22. y_train = mnist_cluttered['y_train']
  23. X_valid = mnist_cluttered['X_valid']
  24. y_valid = mnist_cluttered['y_valid']
  25. X_test = mnist_cluttered['X_test']
  26. y_test = mnist_cluttered['y_test']
  27. # % turn from dense to one hot representation
  28. Y_train = dense_to_one_hot(y_train, n_classes=10)
  29. Y_valid = dense_to_one_hot(y_valid, n_classes=10)
  30. Y_test = dense_to_one_hot(y_test, n_classes=10)
  31. # %% Graph representation of our network
  32. # %% Placeholders for 40x40 resolution
  33. x = tf.placeholder(tf.float32, [None, 1600])
  34. y = tf.placeholder(tf.float32, [None, 10])
  35. # %% Since x is currently [batch, height*width], we need to reshape to a
  36. # 4-D tensor to use it in a convolutional graph. If one component of
  37. # `shape` is the special value -1, the size of that dimension is
  38. # computed so that the total size remains constant. Since we haven't
  39. # defined the batch dimension's shape yet, we use -1 to denote this
  40. # dimension should not change size.
  41. x_tensor = tf.reshape(x, [-1, 40, 40, 1])
  42. # %% We'll setup the two-layer localisation network to figure out the
  43. # %% parameters for an affine transformation of the input
  44. # %% Create variables for fully connected layer
  45. W_fc_loc1 = weight_variable([1600, 20])
  46. b_fc_loc1 = bias_variable([20])
  47. W_fc_loc2 = weight_variable([20, 6])
  48. # Use identity transformation as starting point
  49. initial = np.array([[1., 0, 0], [0, 1., 0]])
  50. initial = initial.astype('float32')
  51. initial = initial.flatten()
  52. b_fc_loc2 = tf.Variable(initial_value=initial, name='b_fc_loc2')
  53. # %% Define the two layer localisation network
  54. h_fc_loc1 = tf.nn.tanh(tf.matmul(x, W_fc_loc1) + b_fc_loc1)
  55. # %% We can add dropout for regularizing and to reduce overfitting like so:
  56. keep_prob = tf.placeholder(tf.float32)
  57. h_fc_loc1_drop = tf.nn.dropout(h_fc_loc1, keep_prob)
  58. # %% Second layer
  59. h_fc_loc2 = tf.nn.tanh(tf.matmul(h_fc_loc1_drop, W_fc_loc2) + b_fc_loc2)
  60. # %% We'll create a spatial transformer module to identify discriminative
  61. # %% patches
  62. out_size = (40, 40)
  63. h_trans = transformer(x_tensor, h_fc_loc2, out_size)
  64. # %% We'll setup the first convolutional layer
  65. # Weight matrix is [height x width x input_channels x output_channels]
  66. filter_size = 3
  67. n_filters_1 = 16
  68. W_conv1 = weight_variable([filter_size, filter_size, 1, n_filters_1])
  69. # %% Bias is [output_channels]
  70. b_conv1 = bias_variable([n_filters_1])
  71. # %% Now we can build a graph which does the first layer of convolution:
  72. # we define our stride as batch x height x width x channels
  73. # instead of pooling, we use strides of 2 and more layers
  74. # with smaller filters.
  75. h_conv1 = tf.nn.relu(
  76. tf.nn.conv2d(input=h_trans,
  77. filter=W_conv1,
  78. strides=[1, 2, 2, 1],
  79. padding='SAME') +
  80. b_conv1)
  81. # %% And just like the first layer, add additional layers to create
  82. # a deep net
  83. n_filters_2 = 16
  84. W_conv2 = weight_variable([filter_size, filter_size, n_filters_1, n_filters_2])
  85. b_conv2 = bias_variable([n_filters_2])
  86. h_conv2 = tf.nn.relu(
  87. tf.nn.conv2d(input=h_conv1,
  88. filter=W_conv2,
  89. strides=[1, 2, 2, 1],
  90. padding='SAME') +
  91. b_conv2)
  92. # %% We'll now reshape so we can connect to a fully-connected layer:
  93. h_conv2_flat = tf.reshape(h_conv2, [-1, 10 * 10 * n_filters_2])
  94. # %% Create a fully-connected layer:
  95. n_fc = 1024
  96. W_fc1 = weight_variable([10 * 10 * n_filters_2, n_fc])
  97. b_fc1 = bias_variable([n_fc])
  98. h_fc1 = tf.nn.relu(tf.matmul(h_conv2_flat, W_fc1) + b_fc1)
  99. h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
  100. # %% And finally our softmax layer:
  101. W_fc2 = weight_variable([n_fc, 10])
  102. b_fc2 = bias_variable([10])
  103. y_logits = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
  104. # %% Define loss/eval/training functions
  105. cross_entropy = tf.reduce_mean(
  106. tf.nn.softmax_cross_entropy_with_logits(logits=y_logits, labels=y))
  107. opt = tf.train.AdamOptimizer()
  108. optimizer = opt.minimize(cross_entropy)
  109. grads = opt.compute_gradients(cross_entropy, [b_fc_loc2])
  110. # %% Monitor accuracy
  111. correct_prediction = tf.equal(tf.argmax(y_logits, 1), tf.argmax(y, 1))
  112. accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
  113. # %% We now create a new session to actually perform the initialization the
  114. # variables:
  115. sess = tf.Session()
  116. sess.run(tf.global_variables_initializer())
  117. # %% We'll now train in minibatches and report accuracy, loss:
  118. iter_per_epoch = 100
  119. n_epochs = 500
  120. train_size = 10000
  121. indices = np.linspace(0, 10000 - 1, iter_per_epoch)
  122. indices = indices.astype('int')
  123. for epoch_i in range(n_epochs):
  124. for iter_i in range(iter_per_epoch - 1):
  125. batch_xs = X_train[indices[iter_i]:indices[iter_i+1]]
  126. batch_ys = Y_train[indices[iter_i]:indices[iter_i+1]]
  127. if iter_i % 10 == 0:
  128. loss = sess.run(cross_entropy,
  129. feed_dict={
  130. x: batch_xs,
  131. y: batch_ys,
  132. keep_prob: 1.0
  133. })
  134. print('Iteration: ' + str(iter_i) + ' Loss: ' + str(loss))
  135. sess.run(optimizer, feed_dict={
  136. x: batch_xs, y: batch_ys, keep_prob: 0.8})
  137. print('Accuracy (%d): ' % epoch_i + str(sess.run(accuracy,
  138. feed_dict={
  139. x: X_valid,
  140. y: Y_valid,
  141. keep_prob: 1.0
  142. })))
  143. # theta = sess.run(h_fc_loc2, feed_dict={
  144. # x: batch_xs, keep_prob: 1.0})
  145. # print(theta[0])