dynamic_rnn.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. '''
  2. A Dynamic Recurrent Neural Network (LSTM) implementation example using
  3. TensorFlow library. This example is using a toy dataset to classify linear
  4. sequences. The generated sequences have variable length.
  5. Long Short Term Memory paper: http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf
  6. Author: Aymeric Damien
  7. Project: https://github.com/aymericdamien/TensorFlow-Examples/
  8. '''
  9. from __future__ import print_function
  10. import tensorflow as tf
  11. import random
  12. # ====================
  13. # TOY DATA GENERATOR
  14. # ====================
  15. class ToySequenceData(object):
  16. """ Generate sequence of data with dynamic length.
  17. This class generate samples for training:
  18. - Class 0: linear sequences (i.e. [0, 1, 2, 3,...])
  19. - Class 1: random sequences (i.e. [1, 3, 10, 7,...])
  20. NOTICE:
  21. We have to pad each sequence to reach 'max_seq_len' for TensorFlow
  22. consistency (we cannot feed a numpy array with inconsistent
  23. dimensions). The dynamic calculation will then be perform thanks to
  24. 'seqlen' attribute that records every actual sequence length.
  25. """
  26. def __init__(self, n_samples=1000, max_seq_len=20, min_seq_len=3,
  27. max_value=1000):
  28. self.data = []
  29. self.labels = []
  30. self.seqlen = []
  31. for i in range(n_samples):
  32. # Random sequence length
  33. len = random.randint(min_seq_len, max_seq_len)
  34. # Monitor sequence length for TensorFlow dynamic calculation
  35. self.seqlen.append(len)
  36. # Add a random or linear int sequence (50% prob)
  37. if random.random() < .5:
  38. # Generate a linear sequence
  39. rand_start = random.randint(0, max_value - len)
  40. s = [[float(i)/max_value] for i in
  41. range(rand_start, rand_start + len)]
  42. # Pad sequence for dimension consistency
  43. s += [[0.] for i in range(max_seq_len - len)]
  44. self.data.append(s)
  45. self.labels.append([1., 0.])
  46. else:
  47. # Generate a random sequence
  48. s = [[float(random.randint(0, max_value))/max_value]
  49. for i in range(len)]
  50. # Pad sequence for dimension consistency
  51. s += [[0.] for i in range(max_seq_len - len)]
  52. self.data.append(s)
  53. self.labels.append([0., 1.])
  54. self.batch_id = 0
  55. def next(self, batch_size):
  56. """ Return a batch of data. When dataset end is reached, start over.
  57. """
  58. if self.batch_id == len(self.data):
  59. self.batch_id = 0
  60. batch_data = (self.data[self.batch_id:min(self.batch_id +
  61. batch_size, len(self.data))])
  62. batch_labels = (self.labels[self.batch_id:min(self.batch_id +
  63. batch_size, len(self.data))])
  64. batch_seqlen = (self.seqlen[self.batch_id:min(self.batch_id +
  65. batch_size, len(self.data))])
  66. self.batch_id = min(self.batch_id + batch_size, len(self.data))
  67. return batch_data, batch_labels, batch_seqlen
  68. # ==========
  69. # MODEL
  70. # ==========
  71. # Parameters
  72. learning_rate = 0.01
  73. training_iters = 1000000
  74. batch_size = 128
  75. display_step = 10
  76. # Network Parameters
  77. seq_max_len = 20 # Sequence max length
  78. n_hidden = 64 # hidden layer num of features
  79. n_classes = 2 # linear sequence or not
  80. trainset = ToySequenceData(n_samples=1000, max_seq_len=seq_max_len)
  81. testset = ToySequenceData(n_samples=500, max_seq_len=seq_max_len)
  82. # tf Graph input
  83. x = tf.placeholder("float", [None, seq_max_len, 1])
  84. y = tf.placeholder("float", [None, n_classes])
  85. # A placeholder for indicating each sequence length
  86. seqlen = tf.placeholder(tf.int32, [None])
  87. # Define weights
  88. weights = {
  89. 'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
  90. }
  91. biases = {
  92. 'out': tf.Variable(tf.random_normal([n_classes]))
  93. }
  94. def dynamicRNN(x, seqlen, weights, biases):
  95. # Prepare data shape to match `rnn` function requirements
  96. # Current data input shape: (batch_size, n_steps, n_input)
  97. # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
  98. # Permuting batch_size and n_steps
  99. x = tf.transpose(x, [1, 0, 2])
  100. # Reshaping to (n_steps*batch_size, n_input)
  101. x = tf.reshape(x, [-1, 1])
  102. # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
  103. x = tf.split(0, seq_max_len, x)
  104. # Define a lstm cell with tensorflow
  105. lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden)
  106. # Get lstm cell output, providing 'sequence_length' will perform dynamic
  107. # calculation.
  108. outputs, states = tf.nn.rnn(lstm_cell, x, dtype=tf.float32,
  109. sequence_length=seqlen)
  110. # When performing dynamic calculation, we must retrieve the last
  111. # dynamically computed output, i.e., if a sequence length is 10, we need
  112. # to retrieve the 10th output.
  113. # However TensorFlow doesn't support advanced indexing yet, so we build
  114. # a custom op that for each sample in batch size, get its length and
  115. # get the corresponding relevant output.
  116. # 'outputs' is a list of output at every timestep, we pack them in a Tensor
  117. # and change back dimension to [batch_size, n_step, n_input]
  118. outputs = tf.pack(outputs)
  119. outputs = tf.transpose(outputs, [1, 0, 2])
  120. # Hack to build the indexing and retrieve the right output.
  121. batch_size = tf.shape(outputs)[0]
  122. # Start indices for each sample
  123. index = tf.range(0, batch_size) * seq_max_len + (seqlen - 1)
  124. # Indexing
  125. outputs = tf.gather(tf.reshape(outputs, [-1, n_hidden]), index)
  126. # Linear activation, using outputs computed above
  127. return tf.matmul(outputs, weights['out']) + biases['out']
  128. pred = dynamicRNN(x, seqlen, weights, biases)
  129. # Define loss and optimizer
  130. cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
  131. optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
  132. # Evaluate model
  133. correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
  134. accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
  135. # Initializing the variables
  136. init = tf.global_variables_initializer()
  137. # Launch the graph
  138. with tf.Session() as sess:
  139. sess.run(init)
  140. step = 1
  141. # Keep training until reach max iterations
  142. while step * batch_size < training_iters:
  143. batch_x, batch_y, batch_seqlen = trainset.next(batch_size)
  144. # Run optimization op (backprop)
  145. sess.run(optimizer, feed_dict={x: batch_x, y: batch_y,
  146. seqlen: batch_seqlen})
  147. if step % display_step == 0:
  148. # Calculate batch accuracy
  149. acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y,
  150. seqlen: batch_seqlen})
  151. # Calculate batch loss
  152. loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y,
  153. seqlen: batch_seqlen})
  154. print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
  155. "{:.6f}".format(loss) + ", Training Accuracy= " + \
  156. "{:.5f}".format(acc))
  157. step += 1
  158. print("Optimization Finished!")
  159. # Calculate accuracy
  160. test_data = testset.data
  161. test_label = testset.labels
  162. test_seqlen = testset.seqlen
  163. print("Testing Accuracy:", \
  164. sess.run(accuracy, feed_dict={x: test_data, y: test_label,
  165. seqlen: test_seqlen}))