|
@@ -0,0 +1,530 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Load and parse data with TensorFlow 2.0 (tf.data)\n",
|
|
|
+ "\n",
|
|
|
+ "A TensorFlow 2.0 example to build input pipelines for loading data efficiently.\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "- Numpy Arrays\n",
|
|
|
+ "- Images\n",
|
|
|
+ "- CSV file\n",
|
|
|
+ "- Custom data from a Generator\n",
|
|
|
+ "\n",
|
|
|
+ "For more information about creating and loading TensorFlow's `TFRecords` data format, see: [tfrecords.ipynb](tfrecords.ipynb)\n",
|
|
|
+ "\n",
|
|
|
+ "- Author: Aymeric Damien\n",
|
|
|
+ "- Project: https://github.com/aymericdamien/TensorFlow-Examples/"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "from __future__ import absolute_import, division, print_function\n",
|
|
|
+ "\n",
|
|
|
+ "import numpy as np\n",
|
|
|
+ "import random\n",
|
|
|
+ "import requests\n",
|
|
|
+ "import string\n",
|
|
|
+ "import tarfile\n",
|
|
|
+ "import tensorflow as tf"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "### Load Numpy Arrays\n",
|
|
|
+ "\n",
|
|
|
+ "Build a data pipeline over numpy arrays."
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# Create a toy dataset (even and odd numbers, with respective labels of 0 and 1).\n",
|
|
|
+ "evens = np.arange(0, 100, step=2, dtype=np.int32)\n",
|
|
|
+ "evens_label = np.zeros(50, dtype=np.int32)\n",
|
|
|
+ "odds = np.arange(1, 100, step=2, dtype=np.int32)\n",
|
|
|
+ "odds_label = np.ones(50, dtype=np.int32)\n",
|
|
|
+ "# Concatenate arrays\n",
|
|
|
+ "features = np.concatenate([evens, odds])\n",
|
|
|
+ "labels = np.concatenate([evens_label, odds_label])\n",
|
|
|
+ "\n",
|
|
|
+ "# Load a numpy array using tf data api with `from_tensor_slices`.\n",
|
|
|
+ "data = tf.data.Dataset.from_tensor_slices((features, labels))\n",
|
|
|
+ "# Refill data indefinitely. \n",
|
|
|
+ "data = data.repeat()\n",
|
|
|
+ "# Shuffle data.\n",
|
|
|
+ "data = data.shuffle(buffer_size=100)\n",
|
|
|
+ "# Batch data (aggregate records together).\n",
|
|
|
+ "data = data.batch(batch_size=4)\n",
|
|
|
+ "# Prefetch batch (pre-load batch for faster consumption).\n",
|
|
|
+ "data = data.prefetch(buffer_size=1)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "tf.Tensor([ 9 94 29 85], shape=(4,), dtype=int32) tf.Tensor([1 0 1 1], shape=(4,), dtype=int32)\n",
|
|
|
+ "tf.Tensor([68 57 88 41], shape=(4,), dtype=int32) tf.Tensor([0 1 0 1], shape=(4,), dtype=int32)\n",
|
|
|
+ "tf.Tensor([51 19 18 56], shape=(4,), dtype=int32) tf.Tensor([1 1 0 0], shape=(4,), dtype=int32)\n",
|
|
|
+ "tf.Tensor([70 84 99 32], shape=(4,), dtype=int32) tf.Tensor([0 0 1 0], shape=(4,), dtype=int32)\n",
|
|
|
+ "tf.Tensor([40 0 25 28], shape=(4,), dtype=int32) tf.Tensor([0 0 1 0], shape=(4,), dtype=int32)\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "for batch_x, batch_y in data.take(5):\n",
|
|
|
+ " print(batch_x, batch_y)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "tf.Tensor([ 9 94 29 85], shape=(4,), dtype=int32) tf.Tensor([1 0 1 1], shape=(4,), dtype=int32)\n",
|
|
|
+ "tf.Tensor([68 57 88 41], shape=(4,), dtype=int32) tf.Tensor([0 1 0 1], shape=(4,), dtype=int32)\n",
|
|
|
+ "tf.Tensor([51 19 18 56], shape=(4,), dtype=int32) tf.Tensor([1 1 0 0], shape=(4,), dtype=int32)\n",
|
|
|
+ "tf.Tensor([70 84 99 32], shape=(4,), dtype=int32) tf.Tensor([0 0 1 0], shape=(4,), dtype=int32)\n",
|
|
|
+ "tf.Tensor([40 0 25 28], shape=(4,), dtype=int32) tf.Tensor([0 0 1 0], shape=(4,), dtype=int32)\n",
|
|
|
+ "tf.Tensor([20 38 22 79], shape=(4,), dtype=int32) tf.Tensor([0 0 0 1], shape=(4,), dtype=int32)\n",
|
|
|
+ "tf.Tensor([20 22 96 27], shape=(4,), dtype=int32) tf.Tensor([0 0 0 1], shape=(4,), dtype=int32)\n",
|
|
|
+ "tf.Tensor([34 58 86 67], shape=(4,), dtype=int32) tf.Tensor([0 0 0 1], shape=(4,), dtype=int32)\n",
|
|
|
+ "tf.Tensor([ 2 98 24 21], shape=(4,), dtype=int32) tf.Tensor([0 0 0 1], shape=(4,), dtype=int32)\n",
|
|
|
+ "tf.Tensor([16 45 18 35], shape=(4,), dtype=int32) tf.Tensor([0 1 0 1], shape=(4,), dtype=int32)\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "# Note: If you are planning on calling multiple time,\n",
|
|
|
+ "# you can user the iterator way:\n",
|
|
|
+ "ite_data = iter(data)\n",
|
|
|
+ "for i in range(5):\n",
|
|
|
+ " batch_x, batch_y = next(ite_data)\n",
|
|
|
+ " print(batch_x, batch_y)\n",
|
|
|
+ "\n",
|
|
|
+ "for i in range(5):\n",
|
|
|
+ " batch_x, batch_y = next(ite_data)\n",
|
|
|
+ " print(batch_x, batch_y)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "### Load CSV files\n",
|
|
|
+ "\n",
|
|
|
+ "Build a data pipeline from features stored in a CSV file. For this example, Titanic dataset will be used as a toy dataset stored in CSV format."
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "#### Titanic Dataset\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "survived|pclass|name|sex|age|sibsp|parch|ticket|fare\n",
|
|
|
+ "--------|------|----|---|---|-----|-----|------|----\n",
|
|
|
+ "1|1|\"Allen, Miss. Elisabeth Walton\"|female|29|0|0|24160|211.3375\n",
|
|
|
+ "1|1|\"Allison, Master. Hudson Trevor\"|male|0.9167|1|2|113781|151.5500\n",
|
|
|
+ "0|1|\"Allison, Miss. Helen Loraine\"|female|2|1|2|113781|151.5500\n",
|
|
|
+ "0|1|\"Allison, Mr. Hudson Joshua Creighton\"|male|30|1|2|113781|151.5500\n",
|
|
|
+ "...|...|...|...|...|...|...|...|..."
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# Download Titanic dataset (in csv format).\n",
|
|
|
+ "d = requests.get(\"https://raw.githubusercontent.com/tflearn/tflearn.github.io/master/resources/titanic_dataset.csv\")\n",
|
|
|
+ "with open(\"titanic_dataset.csv\", \"wb\") as f:\n",
|
|
|
+ " f.write(d.content)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# Load Titanic dataset.\n",
|
|
|
+ "# Original features: survived,pclass,name,sex,age,sibsp,parch,ticket,fare\n",
|
|
|
+ "# Select specific columns: survived,pclass,name,sex,age,fare\n",
|
|
|
+ "column_to_use = [0, 1, 2, 3, 4, 8]\n",
|
|
|
+ "record_defaults = [tf.int32, tf.int32, tf.string, tf.string, tf.float32, tf.float32]\n",
|
|
|
+ "\n",
|
|
|
+ "# Load the whole dataset file, and slice each line.\n",
|
|
|
+ "data = tf.data.experimental.CsvDataset(\"titanic_dataset.csv\", record_defaults, header=True, select_cols=column_to_use)\n",
|
|
|
+ "# Refill data indefinitely.\n",
|
|
|
+ "data = data.repeat()\n",
|
|
|
+ "# Shuffle data.\n",
|
|
|
+ "data = data.shuffle(buffer_size=1000)\n",
|
|
|
+ "# Batch data (aggregate records together).\n",
|
|
|
+ "data = data.batch(batch_size=2)\n",
|
|
|
+ "# Prefetch batch (pre-load batch for faster consumption).\n",
|
|
|
+ "data = data.prefetch(buffer_size=1)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "[1 1]\n",
|
|
|
+ "[2 2]\n",
|
|
|
+ "['Richards, Master. George Sibley' 'Rugg, Miss. Emily']\n",
|
|
|
+ "['male' 'female']\n",
|
|
|
+ "[ 0.8333 21. ]\n",
|
|
|
+ "[18.75 10.5 ]\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "for survived, pclass, name, sex, age, fare in data.take(1):\n",
|
|
|
+ " print(survived.numpy())\n",
|
|
|
+ " print(pclass.numpy())\n",
|
|
|
+ " print(name.numpy())\n",
|
|
|
+ " print(sex.numpy())\n",
|
|
|
+ " print(age.numpy())\n",
|
|
|
+ " print(fare.numpy())"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "### Load Images\n",
|
|
|
+ "\n",
|
|
|
+ "Build a data pipeline by loading images from disk. For this example, Oxford Flowers dataset will be used."
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# Download Oxford 17 flowers dataset\n",
|
|
|
+ "d = requests.get(\"http://www.robots.ox.ac.uk/~vgg/data/flowers/17/17flowers.tgz\")\n",
|
|
|
+ "with open(\"17flowers.tgz\", \"wb\") as f:\n",
|
|
|
+ " f.write(d.content)\n",
|
|
|
+ "# Extract archive.\n",
|
|
|
+ "with tarfile.open(\"17flowers.tgz\") as t:\n",
|
|
|
+ " t.extractall()"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "with open('jpg/dataset.csv', 'w') as f:\n",
|
|
|
+ " c = 0\n",
|
|
|
+ " for i in range(1360):\n",
|
|
|
+ " f.write(\"jpg/image_%04i.jpg,%i\\n\" % (i+1, c))\n",
|
|
|
+ " if (i+1) % 80 == 0:\n",
|
|
|
+ " c += 1"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# Load Images\n",
|
|
|
+ "with open(\"jpg/dataset.csv\") as f:\n",
|
|
|
+ " dataset_file = f.read().splitlines()\n",
|
|
|
+ "\n",
|
|
|
+ "# Load the whole dataset file, and slice each line.\n",
|
|
|
+ "data = tf.data.Dataset.from_tensor_slices(dataset_file)\n",
|
|
|
+ "# Refill data indefinitely.\n",
|
|
|
+ "data = data.repeat()\n",
|
|
|
+ "# Shuffle data.\n",
|
|
|
+ "data = data.shuffle(buffer_size=1000)\n",
|
|
|
+ "\n",
|
|
|
+ "# Load and pre-process images.\n",
|
|
|
+ "def load_image(path):\n",
|
|
|
+ " # Read image from path.\n",
|
|
|
+ " image = tf.io.read_file(path)\n",
|
|
|
+ " # Decode the jpeg image to array [0, 255].\n",
|
|
|
+ " image = tf.image.decode_jpeg(image)\n",
|
|
|
+ " # Resize images to a common size of 256x256.\n",
|
|
|
+ " image = tf.image.resize(image, [256, 256])\n",
|
|
|
+ " # Rescale values to [-1, 1].\n",
|
|
|
+ " image = 1. - image / 127.5\n",
|
|
|
+ " return image\n",
|
|
|
+ "# Decode each line from the dataset file.\n",
|
|
|
+ "def parse_records(line):\n",
|
|
|
+ " # File is in csv format: \"image_path,label_id\".\n",
|
|
|
+ " # TensorFlow requires a default value, but it will never be used.\n",
|
|
|
+ " image_path, image_label = tf.io.decode_csv(line, [\"\", 0])\n",
|
|
|
+ " # Apply the function to load images.\n",
|
|
|
+ " image = load_image(image_path)\n",
|
|
|
+ " return image, image_label\n",
|
|
|
+ "# Use 'map' to apply the above functions in parallel.\n",
|
|
|
+ "data = data.map(parse_records, num_parallel_calls=4)\n",
|
|
|
+ "\n",
|
|
|
+ "# Batch data (aggregate images-array together).\n",
|
|
|
+ "data = data.batch(batch_size=2)\n",
|
|
|
+ "# Prefetch batch (pre-load batch for faster consumption).\n",
|
|
|
+ "data = data.prefetch(buffer_size=1)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {
|
|
|
+ "scrolled": false
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "tf.Tensor(\n",
|
|
|
+ "[[[[-0.90260804 -0.9550551 -0.9444355 ]\n",
|
|
|
+ " [-0.9538603 -0.9715073 -0.9136642 ]\n",
|
|
|
+ " [-0.41687727 -0.37570083 -0.25462234]\n",
|
|
|
+ " ...\n",
|
|
|
+ " [ 0.4617647 0.422549 0.3754902 ]\n",
|
|
|
+ " [ 0.4934436 0.45422792 0.4071691 ]\n",
|
|
|
+ " [ 0.5530829 0.5138672 0.46680838]]\n",
|
|
|
+ "\n",
|
|
|
+ " [[-0.9301815 -0.98563874 -0.9595933 ]\n",
|
|
|
+ " [-0.9379289 -0.95557594 -0.89773285]\n",
|
|
|
+ " [-0.68581116 -0.6446346 -0.5305033 ]\n",
|
|
|
+ " ...\n",
|
|
|
+ " [ 0.46960783 0.43039215 0.38333333]\n",
|
|
|
+ " [ 0.5009191 0.46170342 0.4146446 ]\n",
|
|
|
+ " [ 0.56071925 0.52150357 0.4744447 ]]\n",
|
|
|
+ "\n",
|
|
|
+ " [[-0.9480392 -0.9862745 -0.96889937]\n",
|
|
|
+ " [-0.93367803 -0.9485103 -0.8916054 ]\n",
|
|
|
+ " [-0.9224341 -0.9033165 -0.7915518 ]\n",
|
|
|
+ " ...\n",
|
|
|
+ " [ 0.48045343 0.44123775 0.39417893]\n",
|
|
|
+ " [ 0.51623774 0.47702205 0.42996323]\n",
|
|
|
+ " [ 0.5740809 0.5348652 0.48780638]]\n",
|
|
|
+ "\n",
|
|
|
+ " ...\n",
|
|
|
+ "\n",
|
|
|
+ " [[ 0.0824219 0.37201285 0.5615885 ]\n",
|
|
|
+ " [ 0.09744179 0.3858226 0.57758886]\n",
|
|
|
+ " [ 0.1170305 0.4023859 0.59906554]\n",
|
|
|
+ " ...\n",
|
|
|
+ " [ 0.02599955 0.65661 0.7460593 ]\n",
|
|
|
+ " [-0.0751493 0.6735256 0.7022212 ]\n",
|
|
|
+ " [-0.06794965 0.73861444 0.7482958 ]]\n",
|
|
|
+ "\n",
|
|
|
+ " [[ 0.10942864 0.39136028 0.5135914 ]\n",
|
|
|
+ " [ 0.18471968 0.4658088 0.5954542 ]\n",
|
|
|
+ " [ 0.21578586 0.4813496 0.6320619 ]\n",
|
|
|
+ " ...\n",
|
|
|
+ " [ 0.22432214 0.676777 0.8324946 ]\n",
|
|
|
+ " [ 0.10089612 0.73174024 0.7959444 ]\n",
|
|
|
+ " [ 0.00907248 0.74025357 0.7495098 ]]\n",
|
|
|
+ "\n",
|
|
|
+ " [[ 0.15197992 0.43433285 0.54413676]\n",
|
|
|
+ " [ 0.20049018 0.48284316 0.60343134]\n",
|
|
|
+ " [ 0.2664752 0.5252987 0.6713772 ]\n",
|
|
|
+ " ...\n",
|
|
|
+ " [ 0.24040669 0.6644263 0.8296224 ]\n",
|
|
|
+ " [ 0.10060894 0.7192364 0.78786385]\n",
|
|
|
+ " [ 0.05363435 0.77765393 0.78206575]]]\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ " [[[-0.49571514 -0.2133621 0.6807555 ]\n",
|
|
|
+ " [-0.52243936 -0.2322433 0.66971743]\n",
|
|
|
+ " [-0.5502666 -0.24438429 0.6732628 ]\n",
|
|
|
+ " ...\n",
|
|
|
+ " [-0.61084557 -0.22653186 0.7019608 ]\n",
|
|
|
+ " [-0.60784316 -0.21568632 0.65843004]\n",
|
|
|
+ " [-0.6197916 -0.22585356 0.6411722 ]]\n",
|
|
|
+ "\n",
|
|
|
+ " [[-0.5225973 -0.24024439 0.6538732 ]\n",
|
|
|
+ " [-0.54144406 -0.26501226 0.64094764]\n",
|
|
|
+ " [-0.56139374 -0.27119768 0.6341878 ]\n",
|
|
|
+ " ...\n",
|
|
|
+ " [-0.6186887 -0.22824419 0.67053366]\n",
|
|
|
+ " [-0.59662986 -0.22015929 0.6358456 ]\n",
|
|
|
+ " [-0.6119485 -0.23387194 0.6130515 ]]\n",
|
|
|
+ "\n",
|
|
|
+ " [[-0.54999995 -0.26764703 0.61539805]\n",
|
|
|
+ " [-0.56739867 -0.28504562 0.6056473 ]\n",
|
|
|
+ " [-0.58733106 -0.297135 0.5988358 ]\n",
|
|
|
+ " ...\n",
|
|
|
+ " [-0.62097263 -0.22653186 0.62466395]\n",
|
|
|
+ " [-0.60171235 -0.21739864 0.5984136 ]\n",
|
|
|
+ " [-0.614951 -0.23063731 0.579271 ]]\n",
|
|
|
+ "\n",
|
|
|
+ " ...\n",
|
|
|
+ "\n",
|
|
|
+ " [[-0.49420047 -0.25567698 -0.29812205]\n",
|
|
|
+ " [-0.5336498 -0.31243873 -0.34749448]\n",
|
|
|
+ " [-0.5600954 -0.35433567 -0.38869584]\n",
|
|
|
+ " ...\n",
|
|
|
+ " [ 0.4558211 0.22837007 0.47150737]\n",
|
|
|
+ " [ 0.49019605 0.24705881 0.4980392 ]\n",
|
|
|
+ " [ 0.5021446 0.25900733 0.5099877 ]]\n",
|
|
|
+ "\n",
|
|
|
+ " [[-0.50617576 -0.29696214 -0.31009734]\n",
|
|
|
+ " [-0.47532892 -0.28324962 -0.28901553]\n",
|
|
|
+ " [-0.45759463 -0.28628123 -0.28675795]\n",
|
|
|
+ " ...\n",
|
|
|
+ " [ 0.46366423 0.2362132 0.4793505 ]\n",
|
|
|
+ " [ 0.4980392 0.25490195 0.5058824 ]\n",
|
|
|
+ " [ 0.5099877 0.26685047 0.51783085]]\n",
|
|
|
+ "\n",
|
|
|
+ " [[-0.45882356 -0.254902 -0.26274514]\n",
|
|
|
+ " [-0.4185791 -0.23034382 -0.23034382]\n",
|
|
|
+ " [-0.37365198 -0.21194851 -0.20410538]\n",
|
|
|
+ " ...\n",
|
|
|
+ " [ 0.46366423 0.2362132 0.4793505 ]\n",
|
|
|
+ " [ 0.4980392 0.25490195 0.5058824 ]\n",
|
|
|
+ " [ 0.5099877 0.26685047 0.51783085]]]], shape=(2, 256, 256, 3), dtype=float32) tf.Tensor([8 8], shape=(2,), dtype=int32)\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "for batch_x, batch_y in data.take(1):\n",
|
|
|
+ " print(batch_x, batch_y)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "### Load data from a Generator"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# Create a dummy generator.\n",
|
|
|
+ "def generate_features():\n",
|
|
|
+ " # Function to generate a random string.\n",
|
|
|
+ " def random_string(length):\n",
|
|
|
+ " return ''.join(random.choice(string.ascii_letters) for m in xrange(length))\n",
|
|
|
+ " # Return a random string, a random vector, and a random int.\n",
|
|
|
+ " yield random_string(4), np.random.uniform(size=4), random.randint(0, 10)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# Load a numpy array using tf data api with `from_tensor_slices`.\n",
|
|
|
+ "data = tf.data.Dataset.from_generator(generate_features, output_types=(tf.string, tf.float32, tf.int32))\n",
|
|
|
+ "# Refill data indefinitely.\n",
|
|
|
+ "data = data.repeat()\n",
|
|
|
+ "# Shuffle data.\n",
|
|
|
+ "data = data.shuffle(buffer_size=100)\n",
|
|
|
+ "# Batch data (aggregate records together).\n",
|
|
|
+ "data = data.batch(batch_size=4)\n",
|
|
|
+ "# Prefetch batch (pre-load batch for faster consumption).\n",
|
|
|
+ "data = data.prefetch(buffer_size=1)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "tf.Tensor(['snDw' 'NvMp' 'sXsw' 'qwuk'], shape=(4,), dtype=string) tf.Tensor(\n",
|
|
|
+ "[[0.22296238 0.03515657 0.3893014 0.6875752 ]\n",
|
|
|
+ " [0.05003363 0.27605608 0.23262134 0.10671499]\n",
|
|
|
+ " [0.8992419 0.34516433 0.29739627 0.8413017 ]\n",
|
|
|
+ " [0.91913974 0.7142106 0.48333576 0.04300505]], shape=(4, 4), dtype=float32) tf.Tensor([ 2 10 4 1], shape=(4,), dtype=int32)\n",
|
|
|
+ "tf.Tensor(['vdUx' 'InFi' 'nLzy' 'oklE'], shape=(4,), dtype=string) tf.Tensor(\n",
|
|
|
+ "[[0.6512162 0.8695475 0.7012295 0.6849636 ]\n",
|
|
|
+ " [0.00812997 0.01264008 0.7774404 0.44849646]\n",
|
|
|
+ " [0.92055863 0.894824 0.3628448 0.85603875]\n",
|
|
|
+ " [0.32219294 0.9767527 0.0307372 0.12051418]], shape=(4, 4), dtype=float32) tf.Tensor([9 7 4 0], shape=(4,), dtype=int32)\n",
|
|
|
+ "tf.Tensor(['ULGI' 'dBbm' 'URgs' 'Pkpt'], shape=(4,), dtype=string) tf.Tensor(\n",
|
|
|
+ "[[0.39586228 0.7472 0.3759462 0.9277406 ]\n",
|
|
|
+ " [0.44489694 0.38694733 0.9592599 0.82675934]\n",
|
|
|
+ " [0.12597603 0.299358 0.6940909 0.34155408]\n",
|
|
|
+ " [0.3401377 0.97620344 0.6047712 0.51667166]], shape=(4, 4), dtype=float32) tf.Tensor([ 4 10 0 0], shape=(4,), dtype=int32)\n",
|
|
|
+ "tf.Tensor(['kvao' 'wWvG' 'vrzf' 'cMgG'], shape=(4,), dtype=string) tf.Tensor(\n",
|
|
|
+ "[[0.8090979 0.65837437 0.9732402 0.9298921 ]\n",
|
|
|
+ " [0.67059356 0.91655296 0.52894515 0.8964492 ]\n",
|
|
|
+ " [0.05753202 0.45829964 0.74948853 0.41164723]\n",
|
|
|
+ " [0.42602295 0.8696292 0.57220364 0.9475169 ]], shape=(4, 4), dtype=float32) tf.Tensor([6 7 6 2], shape=(4,), dtype=int32)\n",
|
|
|
+ "tf.Tensor(['kyLQ' 'kxbI' 'CkQD' 'PHlJ'], shape=(4,), dtype=string) tf.Tensor(\n",
|
|
|
+ "[[0.29089147 0.6438517 0.31005543 0.31286424]\n",
|
|
|
+ " [0.0937152 0.8887667 0.24011584 0.25746483]\n",
|
|
|
+ " [0.47577712 0.53731906 0.9178111 0.3249844 ]\n",
|
|
|
+ " [0.38328 0.39294246 0.08126572 0.5995307 ]], shape=(4, 4), dtype=float32) tf.Tensor([3 1 3 2], shape=(4,), dtype=int32)\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "# Display data.\n",
|
|
|
+ "for batch_str, batch_vector, batch_int in data.take(5):\n",
|
|
|
+ " print(batch_str, batch_vector, batch_int)"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "Python 2",
|
|
|
+ "language": "python",
|
|
|
+ "name": "python2"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 2
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython2",
|
|
|
+ "version": "2.7.15"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 2
|
|
|
+}
|