|
@@ -6,44 +6,64 @@
|
|
|
"collapsed": true
|
|
|
},
|
|
|
"source": [
|
|
|
- "'''\n",
|
|
|
- "A Bidirectional Reccurent Neural Network (LSTM) implementation example using TensorFlow library.\n",
|
|
|
- "This example is using the MNIST database of handwritten digits (http://yann.lecun.com/exdb/mnist/)\n",
|
|
|
- "Long Short Term Memory paper: http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf\n",
|
|
|
+ "# Bi-directional Recurrent Neural Network Example\n",
|
|
|
"\n",
|
|
|
- "Author: Aymeric Damien\n",
|
|
|
- "Project: https://github.com/aymericdamien/TensorFlow-Examples/\n",
|
|
|
- "'''"
|
|
|
+ "Build a bi-directional recurrent neural network (LSTM) with TensorFlow.\n",
|
|
|
+ "\n",
|
|
|
+ "- Author: Aymeric Damien\n",
|
|
|
+ "- Project: https://github.com/aymericdamien/TensorFlow-Examples/"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "## BiRNN Overview\n",
|
|
|
+ "\n",
|
|
|
+ "<img src=\"https://ai2-s2-public.s3.amazonaws.com/figures/2016-11-08/191dd7df9cb91ac22f56ed0dfa4a5651e8767a51/1-Figure2-1.png\" alt=\"nn\" style=\"width: 600px;\"/>\n",
|
|
|
+ "\n",
|
|
|
+ "References:\n",
|
|
|
+ "- [Long Short Term Memory](http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf), Sepp Hochreiter & Jurgen Schmidhuber, Neural Computation 9(8): 1735-1780, 1997.\n",
|
|
|
+ "\n",
|
|
|
+ "## MNIST Dataset Overview\n",
|
|
|
+ "\n",
|
|
|
+ "This example is using MNIST handwritten digits. The dataset contains 60,000 examples for training and 10,000 examples for testing. The digits have been size-normalized and centered in a fixed-size image (28x28 pixels) with values from 0 to 1. For simplicity, each image has been flatten and converted to a 1-D numpy array of 784 features (28*28).\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "To classify images using a recurrent neural network, we consider every image row as a sequence of pixels. Because MNIST image shape is 28*28px, we will then handle 28 sequences of 28 timesteps for every sample.\n",
|
|
|
+ "\n",
|
|
|
+ "More info: http://yann.lecun.com/exdb/mnist/"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "execution_count": 1,
|
|
|
"metadata": {
|
|
|
"collapsed": false
|
|
|
},
|
|
|
- "outputs": [],
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "Extracting /tmp/data/train-images-idx3-ubyte.gz\n",
|
|
|
+ "Extracting /tmp/data/train-labels-idx1-ubyte.gz\n",
|
|
|
+ "Extracting /tmp/data/t10k-images-idx3-ubyte.gz\n",
|
|
|
+ "Extracting /tmp/data/t10k-labels-idx1-ubyte.gz\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
"source": [
|
|
|
+ "from __future__ import print_function\n",
|
|
|
+ "\n",
|
|
|
"import tensorflow as tf\n",
|
|
|
"from tensorflow.contrib import rnn\n",
|
|
|
"import numpy as np\n",
|
|
|
"\n",
|
|
|
- "# Import MINST data\n",
|
|
|
+ "# Import MNIST data\n",
|
|
|
"from tensorflow.examples.tutorials.mnist import input_data\n",
|
|
|
- "mnist = input_data.read_data_sets(\"MNIST_data/\", one_hot=True)"
|
|
|
- ]
|
|
|
- },
|
|
|
- {
|
|
|
- "cell_type": "markdown",
|
|
|
- "metadata": {
|
|
|
- "collapsed": true
|
|
|
- },
|
|
|
- "source": [
|
|
|
- "'''\n",
|
|
|
- "To classify images using a bidirectional reccurent neural network, we consider\n",
|
|
|
- "every image row as a sequence of pixels. Because MNIST image shape is 28*28px,\n",
|
|
|
- "we will then handle 28 sequences of 28 steps for every sample.\n",
|
|
|
- "'''"
|
|
|
+ "mnist = input_data.read_data_sets(\"/tmp/data/\", one_hot=True)"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
@@ -54,35 +74,44 @@
|
|
|
},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "# Parameters\n",
|
|
|
+ "# Training Parameters\n",
|
|
|
"learning_rate = 0.001\n",
|
|
|
- "training_iters = 100000\n",
|
|
|
+ "training_steps = 10000\n",
|
|
|
"batch_size = 128\n",
|
|
|
- "display_step = 10\n",
|
|
|
+ "display_step = 200\n",
|
|
|
"\n",
|
|
|
"# Network Parameters\n",
|
|
|
- "n_input = 28 # MNIST data input (img shape: 28*28)\n",
|
|
|
- "n_steps = 28 # timesteps\n",
|
|
|
- "n_hidden = 128 # hidden layer num of features\n",
|
|
|
- "n_classes = 10 # MNIST total classes (0-9 digits)\n",
|
|
|
+ "num_input = 28 # MNIST data input (img shape: 28*28)\n",
|
|
|
+ "timesteps = 28 # timesteps\n",
|
|
|
+ "num_hidden = 128 # hidden layer num of features\n",
|
|
|
+ "num_classes = 10 # MNIST total classes (0-9 digits)\n",
|
|
|
"\n",
|
|
|
"# tf Graph input\n",
|
|
|
- "x = tf.placeholder(\"float\", [None, n_steps, n_input])\n",
|
|
|
- "y = tf.placeholder(\"float\", [None, n_classes])\n",
|
|
|
- "\n",
|
|
|
+ "X = tf.placeholder(\"float\", [None, timesteps, num_input])\n",
|
|
|
+ "Y = tf.placeholder(\"float\", [None, num_classes])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 3,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": true
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
"# Define weights\n",
|
|
|
"weights = {\n",
|
|
|
- " # Hidden layer weights => 2*n_hidden because of foward + backward cells\n",
|
|
|
- " 'out': tf.Variable(tf.random_normal([2*n_hidden, n_classes]))\n",
|
|
|
+ " # Hidden layer weights => 2*n_hidden because of forward + backward cells\n",
|
|
|
+ " 'out': tf.Variable(tf.random_normal([2*num_hidden, num_classes]))\n",
|
|
|
"}\n",
|
|
|
"biases = {\n",
|
|
|
- " 'out': tf.Variable(tf.random_normal([n_classes]))\n",
|
|
|
+ " 'out': tf.Variable(tf.random_normal([num_classes]))\n",
|
|
|
"}"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 3,
|
|
|
+ "execution_count": 4,
|
|
|
"metadata": {
|
|
|
"collapsed": false
|
|
|
},
|
|
@@ -90,18 +119,18 @@
|
|
|
"source": [
|
|
|
"def BiRNN(x, weights, biases):\n",
|
|
|
"\n",
|
|
|
- " # Prepare data shape to match `bidirectional_rnn` function requirements\n",
|
|
|
- " # Current data input shape: (batch_size, n_steps, n_input)\n",
|
|
|
- " # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)\n",
|
|
|
- " \n",
|
|
|
- " # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input)\n",
|
|
|
- " x = tf.unstack(x, n_steps, 1)\n",
|
|
|
+ " # Prepare data shape to match `rnn` function requirements\n",
|
|
|
+ " # Current data input shape: (batch_size, timesteps, n_input)\n",
|
|
|
+ " # Required shape: 'timesteps' tensors list of shape (batch_size, num_input)\n",
|
|
|
+ "\n",
|
|
|
+ " # Unstack to get a list of 'timesteps' tensors of shape (batch_size, num_input)\n",
|
|
|
+ " x = tf.unstack(x, timesteps, 1)\n",
|
|
|
"\n",
|
|
|
" # Define lstm cells with tensorflow\n",
|
|
|
" # Forward direction cell\n",
|
|
|
- " lstm_fw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)\n",
|
|
|
+ " lstm_fw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)\n",
|
|
|
" # Backward direction cell\n",
|
|
|
- " lstm_bw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)\n",
|
|
|
+ " lstm_bw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)\n",
|
|
|
"\n",
|
|
|
" # Get lstm cell output\n",
|
|
|
" try:\n",
|
|
@@ -112,25 +141,37 @@
|
|
|
" dtype=tf.float32)\n",
|
|
|
"\n",
|
|
|
" # Linear activation, using rnn inner loop last output\n",
|
|
|
- " return tf.matmul(outputs[-1], weights['out']) + biases['out']\n",
|
|
|
- "\n",
|
|
|
- "pred = BiRNN(x, weights, biases)\n",
|
|
|
+ " return tf.matmul(outputs[-1], weights['out']) + biases['out']"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 5,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": true
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "logits = BiRNN(X, weights, biases)\n",
|
|
|
+ "prediction = tf.nn.softmax(logits)\n",
|
|
|
"\n",
|
|
|
"# Define loss and optimizer\n",
|
|
|
- "cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))\n",
|
|
|
- "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)\n",
|
|
|
+ "loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(\n",
|
|
|
+ " logits=logits, labels=Y))\n",
|
|
|
+ "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
|
|
|
+ "train_op = optimizer.minimize(loss_op)\n",
|
|
|
"\n",
|
|
|
- "# Evaluate model\n",
|
|
|
- "correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))\n",
|
|
|
+ "# Evaluate model (with test logits, for dropout to be disabled)\n",
|
|
|
+ "correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))\n",
|
|
|
"accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))\n",
|
|
|
"\n",
|
|
|
- "# Initializing the variables\n",
|
|
|
+ "# Initialize the variables (i.e. assign their default value)\n",
|
|
|
"init = tf.global_variables_initializer()"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 4,
|
|
|
+ "execution_count": 6,
|
|
|
"metadata": {
|
|
|
"collapsed": false
|
|
|
},
|
|
@@ -139,118 +180,91 @@
|
|
|
"name": "stdout",
|
|
|
"output_type": "stream",
|
|
|
"text": [
|
|
|
- "Iter 1280, Minibatch Loss= 1.557283, Training Accuracy= 0.49219\n",
|
|
|
- "Iter 2560, Minibatch Loss= 1.358445, Training Accuracy= 0.56250\n",
|
|
|
- "Iter 3840, Minibatch Loss= 1.043732, Training Accuracy= 0.64062\n",
|
|
|
- "Iter 5120, Minibatch Loss= 0.796770, Training Accuracy= 0.72656\n",
|
|
|
- "Iter 6400, Minibatch Loss= 0.626206, Training Accuracy= 0.72656\n",
|
|
|
- "Iter 7680, Minibatch Loss= 1.025919, Training Accuracy= 0.65625\n",
|
|
|
- "Iter 8960, Minibatch Loss= 0.744850, Training Accuracy= 0.76562\n",
|
|
|
- "Iter 10240, Minibatch Loss= 0.530111, Training Accuracy= 0.84375\n",
|
|
|
- "Iter 11520, Minibatch Loss= 0.383806, Training Accuracy= 0.86719\n",
|
|
|
- "Iter 12800, Minibatch Loss= 0.607816, Training Accuracy= 0.82812\n",
|
|
|
- "Iter 14080, Minibatch Loss= 0.410879, Training Accuracy= 0.89062\n",
|
|
|
- "Iter 15360, Minibatch Loss= 0.335351, Training Accuracy= 0.89844\n",
|
|
|
- "Iter 16640, Minibatch Loss= 0.428004, Training Accuracy= 0.91406\n",
|
|
|
- "Iter 17920, Minibatch Loss= 0.307468, Training Accuracy= 0.91406\n",
|
|
|
- "Iter 19200, Minibatch Loss= 0.249527, Training Accuracy= 0.92188\n",
|
|
|
- "Iter 20480, Minibatch Loss= 0.148163, Training Accuracy= 0.96094\n",
|
|
|
- "Iter 21760, Minibatch Loss= 0.445275, Training Accuracy= 0.83594\n",
|
|
|
- "Iter 23040, Minibatch Loss= 0.173083, Training Accuracy= 0.93750\n",
|
|
|
- "Iter 24320, Minibatch Loss= 0.373696, Training Accuracy= 0.87500\n",
|
|
|
- "Iter 25600, Minibatch Loss= 0.509869, Training Accuracy= 0.85938\n",
|
|
|
- "Iter 26880, Minibatch Loss= 0.198096, Training Accuracy= 0.92969\n",
|
|
|
- "Iter 28160, Minibatch Loss= 0.228221, Training Accuracy= 0.92188\n",
|
|
|
- "Iter 29440, Minibatch Loss= 0.280088, Training Accuracy= 0.89844\n",
|
|
|
- "Iter 30720, Minibatch Loss= 0.300495, Training Accuracy= 0.91406\n",
|
|
|
- "Iter 32000, Minibatch Loss= 0.171746, Training Accuracy= 0.95312\n",
|
|
|
- "Iter 33280, Minibatch Loss= 0.263745, Training Accuracy= 0.89844\n",
|
|
|
- "Iter 34560, Minibatch Loss= 0.177300, Training Accuracy= 0.93750\n",
|
|
|
- "Iter 35840, Minibatch Loss= 0.160621, Training Accuracy= 0.95312\n",
|
|
|
- "Iter 37120, Minibatch Loss= 0.321745, Training Accuracy= 0.91406\n",
|
|
|
- "Iter 38400, Minibatch Loss= 0.188322, Training Accuracy= 0.93750\n",
|
|
|
- "Iter 39680, Minibatch Loss= 0.104025, Training Accuracy= 0.96875\n",
|
|
|
- "Iter 40960, Minibatch Loss= 0.291053, Training Accuracy= 0.89062\n",
|
|
|
- "Iter 42240, Minibatch Loss= 0.131189, Training Accuracy= 0.95312\n",
|
|
|
- "Iter 43520, Minibatch Loss= 0.154949, Training Accuracy= 0.92969\n",
|
|
|
- "Iter 44800, Minibatch Loss= 0.150411, Training Accuracy= 0.93750\n",
|
|
|
- "Iter 46080, Minibatch Loss= 0.117008, Training Accuracy= 0.96094\n",
|
|
|
- "Iter 47360, Minibatch Loss= 0.181344, Training Accuracy= 0.96094\n",
|
|
|
- "Iter 48640, Minibatch Loss= 0.209197, Training Accuracy= 0.94531\n",
|
|
|
- "Iter 49920, Minibatch Loss= 0.159350, Training Accuracy= 0.96094\n",
|
|
|
- "Iter 51200, Minibatch Loss= 0.124001, Training Accuracy= 0.95312\n",
|
|
|
- "Iter 52480, Minibatch Loss= 0.165183, Training Accuracy= 0.94531\n",
|
|
|
- "Iter 53760, Minibatch Loss= 0.046438, Training Accuracy= 0.97656\n",
|
|
|
- "Iter 55040, Minibatch Loss= 0.199995, Training Accuracy= 0.91406\n",
|
|
|
- "Iter 56320, Minibatch Loss= 0.057071, Training Accuracy= 0.97656\n",
|
|
|
- "Iter 57600, Minibatch Loss= 0.177065, Training Accuracy= 0.92188\n",
|
|
|
- "Iter 58880, Minibatch Loss= 0.091666, Training Accuracy= 0.96094\n",
|
|
|
- "Iter 60160, Minibatch Loss= 0.069232, Training Accuracy= 0.96875\n",
|
|
|
- "Iter 61440, Minibatch Loss= 0.127353, Training Accuracy= 0.94531\n",
|
|
|
- "Iter 62720, Minibatch Loss= 0.095795, Training Accuracy= 0.96094\n",
|
|
|
- "Iter 64000, Minibatch Loss= 0.202651, Training Accuracy= 0.96875\n",
|
|
|
- "Iter 65280, Minibatch Loss= 0.118779, Training Accuracy= 0.95312\n",
|
|
|
- "Iter 66560, Minibatch Loss= 0.043173, Training Accuracy= 0.98438\n",
|
|
|
- "Iter 67840, Minibatch Loss= 0.152280, Training Accuracy= 0.95312\n",
|
|
|
- "Iter 69120, Minibatch Loss= 0.085301, Training Accuracy= 0.96875\n",
|
|
|
- "Iter 70400, Minibatch Loss= 0.093421, Training Accuracy= 0.96094\n",
|
|
|
- "Iter 71680, Minibatch Loss= 0.096358, Training Accuracy= 0.96875\n",
|
|
|
- "Iter 72960, Minibatch Loss= 0.053386, Training Accuracy= 0.98438\n",
|
|
|
- "Iter 74240, Minibatch Loss= 0.065237, Training Accuracy= 0.97656\n",
|
|
|
- "Iter 75520, Minibatch Loss= 0.228090, Training Accuracy= 0.92188\n",
|
|
|
- "Iter 76800, Minibatch Loss= 0.106751, Training Accuracy= 0.95312\n",
|
|
|
- "Iter 78080, Minibatch Loss= 0.187795, Training Accuracy= 0.94531\n",
|
|
|
- "Iter 79360, Minibatch Loss= 0.092611, Training Accuracy= 0.96094\n",
|
|
|
- "Iter 80640, Minibatch Loss= 0.137386, Training Accuracy= 0.96875\n",
|
|
|
- "Iter 81920, Minibatch Loss= 0.106634, Training Accuracy= 0.98438\n",
|
|
|
- "Iter 83200, Minibatch Loss= 0.111749, Training Accuracy= 0.94531\n",
|
|
|
- "Iter 84480, Minibatch Loss= 0.191184, Training Accuracy= 0.94531\n",
|
|
|
- "Iter 85760, Minibatch Loss= 0.063982, Training Accuracy= 0.96094\n",
|
|
|
- "Iter 87040, Minibatch Loss= 0.092380, Training Accuracy= 0.96875\n",
|
|
|
- "Iter 88320, Minibatch Loss= 0.089899, Training Accuracy= 0.97656\n",
|
|
|
- "Iter 89600, Minibatch Loss= 0.141107, Training Accuracy= 0.94531\n",
|
|
|
- "Iter 90880, Minibatch Loss= 0.075549, Training Accuracy= 0.96094\n",
|
|
|
- "Iter 92160, Minibatch Loss= 0.186539, Training Accuracy= 0.94531\n",
|
|
|
- "Iter 93440, Minibatch Loss= 0.079639, Training Accuracy= 0.97656\n",
|
|
|
- "Iter 94720, Minibatch Loss= 0.156895, Training Accuracy= 0.95312\n",
|
|
|
- "Iter 96000, Minibatch Loss= 0.088042, Training Accuracy= 0.97656\n",
|
|
|
- "Iter 97280, Minibatch Loss= 0.076670, Training Accuracy= 0.96875\n",
|
|
|
- "Iter 98560, Minibatch Loss= 0.051336, Training Accuracy= 0.97656\n",
|
|
|
- "Iter 99840, Minibatch Loss= 0.086923, Training Accuracy= 0.98438\n",
|
|
|
+ "Step 1, Minibatch Loss= 2.6218, Training Accuracy= 0.086\n",
|
|
|
+ "Step 200, Minibatch Loss= 2.1900, Training Accuracy= 0.211\n",
|
|
|
+ "Step 400, Minibatch Loss= 2.0144, Training Accuracy= 0.375\n",
|
|
|
+ "Step 600, Minibatch Loss= 1.8729, Training Accuracy= 0.445\n",
|
|
|
+ "Step 800, Minibatch Loss= 1.8000, Training Accuracy= 0.469\n",
|
|
|
+ "Step 1000, Minibatch Loss= 1.7244, Training Accuracy= 0.453\n",
|
|
|
+ "Step 1200, Minibatch Loss= 1.5657, Training Accuracy= 0.523\n",
|
|
|
+ "Step 1400, Minibatch Loss= 1.5473, Training Accuracy= 0.547\n",
|
|
|
+ "Step 1600, Minibatch Loss= 1.5288, Training Accuracy= 0.500\n",
|
|
|
+ "Step 1800, Minibatch Loss= 1.4203, Training Accuracy= 0.555\n",
|
|
|
+ "Step 2000, Minibatch Loss= 1.2525, Training Accuracy= 0.641\n",
|
|
|
+ "Step 2200, Minibatch Loss= 1.2696, Training Accuracy= 0.594\n",
|
|
|
+ "Step 2400, Minibatch Loss= 1.2000, Training Accuracy= 0.664\n",
|
|
|
+ "Step 2600, Minibatch Loss= 1.1017, Training Accuracy= 0.625\n",
|
|
|
+ "Step 2800, Minibatch Loss= 1.2656, Training Accuracy= 0.578\n",
|
|
|
+ "Step 3000, Minibatch Loss= 1.0830, Training Accuracy= 0.656\n",
|
|
|
+ "Step 3200, Minibatch Loss= 1.1522, Training Accuracy= 0.633\n",
|
|
|
+ "Step 3400, Minibatch Loss= 0.9484, Training Accuracy= 0.680\n",
|
|
|
+ "Step 3600, Minibatch Loss= 1.0470, Training Accuracy= 0.641\n",
|
|
|
+ "Step 3800, Minibatch Loss= 1.0609, Training Accuracy= 0.586\n",
|
|
|
+ "Step 4000, Minibatch Loss= 1.1853, Training Accuracy= 0.648\n",
|
|
|
+ "Step 4200, Minibatch Loss= 0.9438, Training Accuracy= 0.750\n",
|
|
|
+ "Step 4400, Minibatch Loss= 0.7986, Training Accuracy= 0.766\n",
|
|
|
+ "Step 4600, Minibatch Loss= 0.8070, Training Accuracy= 0.750\n",
|
|
|
+ "Step 4800, Minibatch Loss= 0.8382, Training Accuracy= 0.734\n",
|
|
|
+ "Step 5000, Minibatch Loss= 0.7397, Training Accuracy= 0.766\n",
|
|
|
+ "Step 5200, Minibatch Loss= 0.7870, Training Accuracy= 0.727\n",
|
|
|
+ "Step 5400, Minibatch Loss= 0.6380, Training Accuracy= 0.828\n",
|
|
|
+ "Step 5600, Minibatch Loss= 0.7975, Training Accuracy= 0.719\n",
|
|
|
+ "Step 5800, Minibatch Loss= 0.7934, Training Accuracy= 0.766\n",
|
|
|
+ "Step 6000, Minibatch Loss= 0.6628, Training Accuracy= 0.805\n",
|
|
|
+ "Step 6200, Minibatch Loss= 0.7958, Training Accuracy= 0.672\n",
|
|
|
+ "Step 6400, Minibatch Loss= 0.6582, Training Accuracy= 0.773\n",
|
|
|
+ "Step 6600, Minibatch Loss= 0.5908, Training Accuracy= 0.812\n",
|
|
|
+ "Step 6800, Minibatch Loss= 0.6182, Training Accuracy= 0.820\n",
|
|
|
+ "Step 7000, Minibatch Loss= 0.5513, Training Accuracy= 0.812\n",
|
|
|
+ "Step 7200, Minibatch Loss= 0.6683, Training Accuracy= 0.789\n",
|
|
|
+ "Step 7400, Minibatch Loss= 0.5337, Training Accuracy= 0.828\n",
|
|
|
+ "Step 7600, Minibatch Loss= 0.6428, Training Accuracy= 0.805\n",
|
|
|
+ "Step 7800, Minibatch Loss= 0.6708, Training Accuracy= 0.797\n",
|
|
|
+ "Step 8000, Minibatch Loss= 0.4664, Training Accuracy= 0.852\n",
|
|
|
+ "Step 8200, Minibatch Loss= 0.4249, Training Accuracy= 0.859\n",
|
|
|
+ "Step 8400, Minibatch Loss= 0.7723, Training Accuracy= 0.773\n",
|
|
|
+ "Step 8600, Minibatch Loss= 0.4706, Training Accuracy= 0.859\n",
|
|
|
+ "Step 8800, Minibatch Loss= 0.4800, Training Accuracy= 0.867\n",
|
|
|
+ "Step 9000, Minibatch Loss= 0.4636, Training Accuracy= 0.891\n",
|
|
|
+ "Step 9200, Minibatch Loss= 0.5734, Training Accuracy= 0.828\n",
|
|
|
+ "Step 9400, Minibatch Loss= 0.5548, Training Accuracy= 0.875\n",
|
|
|
+ "Step 9600, Minibatch Loss= 0.3575, Training Accuracy= 0.922\n",
|
|
|
+ "Step 9800, Minibatch Loss= 0.4566, Training Accuracy= 0.844\n",
|
|
|
+ "Step 10000, Minibatch Loss= 0.5125, Training Accuracy= 0.844\n",
|
|
|
"Optimization Finished!\n",
|
|
|
- "Testing Accuracy: 0.960938\n"
|
|
|
+ "Testing Accuracy: 0.890625\n"
|
|
|
]
|
|
|
}
|
|
|
],
|
|
|
"source": [
|
|
|
- "# Launch the graph\n",
|
|
|
+ "# Start training\n",
|
|
|
"with tf.Session() as sess:\n",
|
|
|
+ "\n",
|
|
|
+ " # Run the initializer\n",
|
|
|
" sess.run(init)\n",
|
|
|
- " step = 1\n",
|
|
|
- " # Keep training until reach max iterations\n",
|
|
|
- " while step * batch_size < training_iters:\n",
|
|
|
+ "\n",
|
|
|
+ " for step in range(1, training_steps+1):\n",
|
|
|
" batch_x, batch_y = mnist.train.next_batch(batch_size)\n",
|
|
|
" # Reshape data to get 28 seq of 28 elements\n",
|
|
|
- " batch_x = batch_x.reshape((batch_size, n_steps, n_input))\n",
|
|
|
+ " batch_x = batch_x.reshape((batch_size, timesteps, num_input))\n",
|
|
|
" # Run optimization op (backprop)\n",
|
|
|
- " sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})\n",
|
|
|
- " if step % display_step == 0:\n",
|
|
|
- " # Calculate batch accuracy\n",
|
|
|
- " acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})\n",
|
|
|
- " # Calculate batch loss\n",
|
|
|
- " loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})\n",
|
|
|
- " print \"Iter \" + str(step*batch_size) + \", Minibatch Loss= \" + \\\n",
|
|
|
- " \"{:.6f}\".format(loss) + \", Training Accuracy= \" + \\\n",
|
|
|
- " \"{:.5f}\".format(acc)\n",
|
|
|
- " step += 1\n",
|
|
|
- " print \"Optimization Finished!\"\n",
|
|
|
+ " sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})\n",
|
|
|
+ " if step % display_step == 0 or step == 1:\n",
|
|
|
+ " # Calculate batch loss and accuracy\n",
|
|
|
+ " loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,\n",
|
|
|
+ " Y: batch_y})\n",
|
|
|
+ " print(\"Step \" + str(step) + \", Minibatch Loss= \" + \\\n",
|
|
|
+ " \"{:.4f}\".format(loss) + \", Training Accuracy= \" + \\\n",
|
|
|
+ " \"{:.3f}\".format(acc))\n",
|
|
|
+ "\n",
|
|
|
+ " print(\"Optimization Finished!\")\n",
|
|
|
"\n",
|
|
|
" # Calculate accuracy for 128 mnist test images\n",
|
|
|
" test_len = 128\n",
|
|
|
- " test_data = mnist.test.images[:test_len].reshape((-1, n_steps, n_input))\n",
|
|
|
+ " test_data = mnist.test.images[:test_len].reshape((-1, timesteps, num_input))\n",
|
|
|
" test_label = mnist.test.labels[:test_len]\n",
|
|
|
- " print \"Testing Accuracy:\", \\\n",
|
|
|
- " sess.run(accuracy, feed_dict={x: test_data, y: test_label})"
|
|
|
+ " print(\"Testing Accuracy:\", \\\n",
|
|
|
+ " sess.run(accuracy, feed_dict={X: test_data, Y: test_label}))\n"
|
|
|
]
|
|
|
},
|
|
|
{
|