{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Auto-Encoder Example\n", "\n", "Build a 2 layers auto-encoder with TensorFlow v2 to compress images to a lower latent space and then reconstruct them.\n", "\n", "- Author: Aymeric Damien\n", "- Project: https://github.com/aymericdamien/TensorFlow-Examples/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Auto-Encoder Overview\n", "\n", "\"ae\"\n", "\n", "References:\n", "- [Gradient-based learning applied to document recognition](http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf). Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. Proceedings of the IEEE, 86(11):2278-2324, November 1998.\n", "\n", "## MNIST Dataset Overview\n", "\n", "This example is using MNIST handwritten digits. The dataset contains 60,000 examples for training and 10,000 examples for testing. The digits have been size-normalized and centered in a fixed-size image (28x28 pixels) with values from 0 to 255. \n", "\n", "In this example, each image will be converted to float32, normalized to [0, 1] and flattened to a 1-D array of 784 features (28*28).\n", "\n", "![MNIST Dataset](http://neuralnetworksanddeeplearning.com/images/mnist_100_digits.png)\n", "\n", "More info: http://yann.lecun.com/exdb/mnist/" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from __future__ import absolute_import, division, print_function\n", "\n", "import tensorflow as tf\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# MNIST Dataset parameters.\n", "num_features = 784 # data features (img shape: 28*28).\n", "\n", "# Training parameters.\n", "learning_rate = 0.01\n", "training_steps = 20000\n", "batch_size = 256\n", "display_step = 1000\n", "\n", "# Network Parameters\n", "num_hidden_1 = 128 # 1st layer num features.\n", "num_hidden_2 = 64 # 2nd layer num features (the latent dim)." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Prepare MNIST data.\n", "from tensorflow.keras.datasets import mnist\n", "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", "# Convert to float32.\n", "x_train, x_test = x_train.astype(np.float32), x_test.astype(np.float32)\n", "# Flatten images to 1-D vector of 784 features (28*28).\n", "x_train, x_test = x_train.reshape([-1, num_features]), x_test.reshape([-1, num_features])\n", "# Normalize images value from [0, 255] to [0, 1].\n", "x_train, x_test = x_train / 255., x_test / 255." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Use tf.data API to shuffle and batch data.\n", "train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n", "train_data = train_data.repeat().shuffle(10000).batch(batch_size).prefetch(1)\n", "\n", "test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))\n", "test_data = test_data.repeat().batch(batch_size).prefetch(1)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# Store layers weight & bias\n", "\n", "# A random value generator to initialize weights.\n", "random_normal = tf.initializers.RandomNormal()\n", "\n", "weights = {\n", " 'encoder_h1': tf.Variable(random_normal([num_features, num_hidden_1])),\n", " 'encoder_h2': tf.Variable(random_normal([num_hidden_1, num_hidden_2])),\n", " 'decoder_h1': tf.Variable(random_normal([num_hidden_2, num_hidden_1])),\n", " 'decoder_h2': tf.Variable(random_normal([num_hidden_1, num_features])),\n", "}\n", "biases = {\n", " 'encoder_b1': tf.Variable(random_normal([num_hidden_1])),\n", " 'encoder_b2': tf.Variable(random_normal([num_hidden_2])),\n", " 'decoder_b1': tf.Variable(random_normal([num_hidden_1])),\n", " 'decoder_b2': tf.Variable(random_normal([num_features])),\n", "}" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Building the encoder.\n", "def encoder(x):\n", " # Encoder Hidden layer with sigmoid activation.\n", " layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']),\n", " biases['encoder_b1']))\n", " # Encoder Hidden layer with sigmoid activation.\n", " layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']),\n", " biases['encoder_b2']))\n", " return layer_2\n", "\n", "\n", "# Building the decoder.\n", "def decoder(x):\n", " # Decoder Hidden layer with sigmoid activation.\n", " layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']),\n", " biases['decoder_b1']))\n", " # Decoder Hidden layer with sigmoid activation.\n", " layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']),\n", " biases['decoder_b2']))\n", " return layer_2" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# Mean square loss between original images and reconstructed ones.\n", "def mean_square(reconstructed, original):\n", " return tf.reduce_mean(tf.pow(original - reconstructed, 2))\n", "\n", "# Adam optimizer.\n", "optimizer = tf.optimizers.Adam(learning_rate=learning_rate)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# Optimization process. \n", "def run_optimization(x):\n", " # Wrap computation inside a GradientTape for automatic differentiation.\n", " with tf.GradientTape() as g:\n", " reconstructed_image = decoder(encoder(x))\n", " loss = mean_square(reconstructed_image, x)\n", "\n", " # Variables to update, i.e. trainable variables.\n", " trainable_variables = weights.values() + biases.values()\n", " \n", " # Compute gradients.\n", " gradients = g.gradient(loss, trainable_variables)\n", " \n", " # Update W and b following gradients.\n", " optimizer.apply_gradients(zip(gradients, trainable_variables))\n", " \n", " return loss" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "step: 0, loss: 0.234978\n", "step: 1000, loss: 0.014881\n", "step: 2000, loss: 0.010402\n", "step: 3000, loss: 0.008817\n", "step: 4000, loss: 0.007337\n", "step: 5000, loss: 0.006399\n", "step: 6000, loss: 0.006039\n", "step: 7000, loss: 0.005042\n", "step: 8000, loss: 0.005235\n", "step: 9000, loss: 0.004838\n", "step: 10000, loss: 0.004552\n", "step: 11000, loss: 0.004717\n", "step: 12000, loss: 0.004550\n", "step: 13000, loss: 0.004633\n", "step: 14000, loss: 0.004469\n", "step: 15000, loss: 0.004503\n", "step: 16000, loss: 0.003971\n", "step: 17000, loss: 0.004258\n", "step: 18000, loss: 0.004012\n", "step: 19000, loss: 0.003703\n", "step: 20000, loss: 0.003933\n" ] } ], "source": [ "# Run training for the given number of steps.\n", "for step, (batch_x, _) in enumerate(train_data.take(training_steps + 1)):\n", " \n", " # Run the optimization.\n", " loss = run_optimization(batch_x)\n", " \n", " if step % display_step == 0:\n", " print(\"step: %i, loss: %f\" % (step, loss))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# Testing and Visualization.\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Original Images\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Reconstructed Images\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Encode and decode images from test set and visualize their reconstruction.\n", "n = 4\n", "canvas_orig = np.empty((28 * n, 28 * n))\n", "canvas_recon = np.empty((28 * n, 28 * n))\n", "for i, (batch_x, _) in enumerate(test_data.take(n)):\n", " # Encode and decode the digit image.\n", " reconstructed_images = decoder(encoder(batch_x))\n", " # Display original images.\n", " for j in range(n):\n", " # Draw the generated digits.\n", " img = batch_x[j].numpy().reshape([28, 28])\n", " canvas_orig[i * 28:(i + 1) * 28, j * 28:(j + 1) * 28] = img\n", " # Display reconstructed images.\n", " for j in range(n):\n", " # Draw the generated digits.\n", " reconstr_img = reconstructed_images[j].numpy().reshape([28, 28])\n", " canvas_recon[i * 28:(i + 1) * 28, j * 28:(j + 1) * 28] = reconstr_img\n", "\n", "print(\"Original Images\") \n", "plt.figure(figsize=(n, n))\n", "plt.imshow(canvas_orig, origin=\"upper\", cmap=\"gray\")\n", "plt.show()\n", "\n", "print(\"Reconstructed Images\")\n", "plt.figure(figsize=(n, n))\n", "plt.imshow(canvas_recon, origin=\"upper\", cmap=\"gray\")\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.15" } }, "nbformat": 4, "nbformat_minor": 2 }