|
@@ -0,0 +1,94 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "\n",
|
|
|
+ "# MNIST Dataset Introduction\n",
|
|
|
+ "\n",
|
|
|
+ "Most examples are using MNIST dataset of handwritten digits. It has 60,000 examples for training and 10,000 examples for testing. The digits have been size-normalized and centered in a fixed-size image, so each sample is represented as a matrix of size 28x28 with values from 0 to 1.\n",
|
|
|
+ "\n",
|
|
|
+ "## Overview\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "## Usage\n",
|
|
|
+ "In our examples, we are using TensorFlow [input_data.py](https://github.com/tensorflow/tensorflow/blob/r0.7/tensorflow/examples/tutorials/mnist/input_data.py) script to load that dataset.\n",
|
|
|
+ "It is quite useful for managing our data, and handle:\n",
|
|
|
+ "\n",
|
|
|
+ "- Dataset downloading\n",
|
|
|
+ "\n",
|
|
|
+ "- Loading the entire dataset into numpy array: \n",
|
|
|
+ "\n",
|
|
|
+ "\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": true
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# Import MNIST\n",
|
|
|
+ "from tensorflow.examples.tutorials.mnist import input_data\n",
|
|
|
+ "mnist = input_data.read_data_sets(\"/tmp/data/\", one_hot=True)\n",
|
|
|
+ "\n",
|
|
|
+ "# Load data\n",
|
|
|
+ "X_train = mnist.train.images\n",
|
|
|
+ "Y_train = mnist.train.labels\n",
|
|
|
+ "X_test = mnist.test.images\n",
|
|
|
+ "Y_test = mnist.test.labels"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "- A `next_batch` function that can iterate over the whole dataset and return only the desired fraction of the dataset samples (in order to save memory and avoid to load the entire dataset)."
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": true
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# Get the next 64 images array and labels\n",
|
|
|
+ "batch_X, batch_Y = mnist.train.next_batch(64)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "Link: http://yann.lecun.com/exdb/mnist/"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "IPython (Python 2.7)",
|
|
|
+ "language": "python",
|
|
|
+ "name": "python2"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 2
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython2",
|
|
|
+ "version": "2.7.11"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 0
|
|
|
+}
|