download_and_preprocess_flowers.sh 3.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. #!/bin/bash
  2. # Copyright 2016 Google Inc. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. # ==============================================================================
  16. # Script to download and preprocess the flowers data set. This data set
  17. # provides a demonstration for how to perform fine-tuning (i.e. tranfer
  18. # learning) from one model to a new data set.
  19. #
  20. # This script provides a demonstration for how to prepare an arbitrary
  21. # data set for training an Inception v3 model.
  22. #
  23. # We demonstrate this with the flowers data set which consists of images
  24. # of labeled flower images from 5 classes:
  25. #
  26. # daisy, dandelion, roses, sunflowers, tulips
  27. #
  28. # The final output of this script are sharded TFRecord files containing
  29. # serialized Example protocol buffers. See build_image_data.py for
  30. # details of how the Example protocol buffer contains image data.
  31. #
  32. # usage:
  33. # ./download_and_preprocess_flowers.sh [data-dir]
  34. set -e
  35. if [ -z "$1" ]; then
  36. echo "usage download_and_preprocess_flowers.sh [data dir]"
  37. exit
  38. fi
  39. # Create the output and temporary directories.
  40. DATA_DIR="${1%/}"
  41. SCRATCH_DIR="${DATA_DIR}/raw-data/"
  42. mkdir -p "${DATA_DIR}"
  43. mkdir -p "${SCRATCH_DIR}"
  44. WORK_DIR="$0.runfiles/inception"
  45. # Download the flowers data.
  46. DATA_URL="http://download.tensorflow.org/example_images/flower_photos.tgz"
  47. CURRENT_DIR=$(pwd)
  48. cd "${DATA_DIR}"
  49. TARBALL="flower_photos.tgz"
  50. if [ ! -f ${TARBALL} ]; then
  51. echo "Downloading flower data set."
  52. wget -O ${TARBALL} "${DATA_URL}"
  53. else
  54. echo "Skipping download of flower data."
  55. fi
  56. # Note the locations of the train and validation data.
  57. TRAIN_DIRECTORY="${SCRATCH_DIR}train/"
  58. VALIDATION_DIRECTORY="${SCRATCH_DIR}validation/"
  59. # Expands the data into the flower_photos/ directory and rename it as the
  60. # train directory.
  61. tar xf flower_photos.tgz
  62. rm -rf "${TRAIN_DIRECTORY}" "${VALIDATION_DIRECTORY}"
  63. mv flower_photos "${TRAIN_DIRECTORY}"
  64. # Generate a list of 5 labels: daisy, dandelion, roses, sunflowers, tulips
  65. LABELS_FILE="${SCRATCH_DIR}/labels.txt"
  66. ls -1 "${TRAIN_DIRECTORY}" | grep -v 'LICENSE' | sed 's/\///' | sort > "${LABELS_FILE}"
  67. # Generate the validation data set.
  68. while read LABEL; do
  69. VALIDATION_DIR_FOR_LABEL="${VALIDATION_DIRECTORY}${LABEL}"
  70. TRAIN_DIR_FOR_LABEL="${TRAIN_DIRECTORY}${LABEL}"
  71. # Move the first randomly selected 100 images to the validation set.
  72. mkdir -p "${VALIDATION_DIR_FOR_LABEL}"
  73. VALIDATION_IMAGES=$(ls -1 "${TRAIN_DIR_FOR_LABEL}" | shuf | head -100)
  74. for IMAGE in ${VALIDATION_IMAGES}; do
  75. mv -f "${TRAIN_DIRECTORY}${LABEL}/${IMAGE}" "${VALIDATION_DIR_FOR_LABEL}"
  76. done
  77. done < "${LABELS_FILE}"
  78. # Build the TFRecords version of the image data.
  79. cd "${CURRENT_DIR}"
  80. BUILD_SCRIPT="${WORK_DIR}/build_image_data"
  81. OUTPUT_DIRECTORY="${DATA_DIR}"
  82. "${BUILD_SCRIPT}" \
  83. --train_directory="${TRAIN_DIRECTORY}" \
  84. --validation_directory="${VALIDATION_DIRECTORY}" \
  85. --output_directory="${OUTPUT_DIRECTORY}" \
  86. --labels_file="${LABELS_FILE}"