12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697 |
- #!/bin/bash
- # Copyright 2016 Google Inc. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ==============================================================================
- # Script to download and preprocess the flowers data set. This data set
- # provides a demonstration for how to perform fine-tuning (i.e. tranfer
- # learning) from one model to a new data set.
- #
- # This script provides a demonstration for how to prepare an arbitrary
- # data set for training an Inception v3 model.
- #
- # We demonstrate this with the flowers data set which consists of images
- # of labeled flower images from 5 classes:
- #
- # daisy, dandelion, roses, sunflowers, tulips
- #
- # The final output of this script are sharded TFRecord files containing
- # serialized Example protocol buffers. See build_image_data.py for
- # details of how the Example protocol buffer contains image data.
- #
- # usage:
- # ./download_and_preprocess_flowers.sh [data-dir]
- set -e
- if [ -z "$1" ]; then
- echo "usage download_and_preprocess_flowers.sh [data dir]"
- exit
- fi
- # Create the output and temporary directories.
- DATA_DIR="${1%/}"
- SCRATCH_DIR="${DATA_DIR}/raw-data/"
- mkdir -p "${DATA_DIR}"
- mkdir -p "${SCRATCH_DIR}"
- WORK_DIR="$0.runfiles/inception"
- # Download the flowers data.
- DATA_URL="http://download.tensorflow.org/example_images/flower_photos.tgz"
- CURRENT_DIR=$(pwd)
- cd "${DATA_DIR}"
- TARBALL="flower_photos.tgz"
- if [ ! -f ${TARBALL} ]; then
- echo "Downloading flower data set."
- wget -O ${TARBALL} "${DATA_URL}"
- else
- echo "Skipping download of flower data."
- fi
- # Note the locations of the train and validation data.
- TRAIN_DIRECTORY="${SCRATCH_DIR}train/"
- VALIDATION_DIRECTORY="${SCRATCH_DIR}validation/"
- # Expands the data into the flower_photos/ directory and rename it as the
- # train directory.
- tar xf flower_photos.tgz
- rm -rf "${TRAIN_DIRECTORY}" "${VALIDATION_DIRECTORY}"
- mv flower_photos "${TRAIN_DIRECTORY}"
- # Generate a list of 5 labels: daisy, dandelion, roses, sunflowers, tulips
- LABELS_FILE="${SCRATCH_DIR}/labels.txt"
- ls -1 "${TRAIN_DIRECTORY}" | grep -v 'LICENSE' | sed 's/\///' | sort > "${LABELS_FILE}"
- # Generate the validation data set.
- while read LABEL; do
- VALIDATION_DIR_FOR_LABEL="${VALIDATION_DIRECTORY}${LABEL}"
- TRAIN_DIR_FOR_LABEL="${TRAIN_DIRECTORY}${LABEL}"
- # Move the first randomly selected 100 images to the validation set.
- mkdir -p "${VALIDATION_DIR_FOR_LABEL}"
- VALIDATION_IMAGES=$(ls -1 "${TRAIN_DIR_FOR_LABEL}" | shuf | head -100)
- for IMAGE in ${VALIDATION_IMAGES}; do
- mv -f "${TRAIN_DIRECTORY}${LABEL}/${IMAGE}" "${VALIDATION_DIR_FOR_LABEL}"
- done
- done < "${LABELS_FILE}"
- # Build the TFRecords version of the image data.
- cd "${CURRENT_DIR}"
- BUILD_SCRIPT="${WORK_DIR}/build_image_data"
- OUTPUT_DIRECTORY="${DATA_DIR}"
- "${BUILD_SCRIPT}" \
- --train_directory="${TRAIN_DIRECTORY}" \
- --validation_directory="${VALIDATION_DIRECTORY}" \
- --output_directory="${OUTPUT_DIRECTORY}" \
- --labels_file="${LABELS_FILE}"
|