download_imagenet.sh 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. #!/bin/bash
  2. # Copyright 2016 Google Inc. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. # ==============================================================================
  16. # Script to download ImageNet Challenge 2012 training and validation data set.
  17. #
  18. # Downloads and decompresses raw images and bounding boxes.
  19. #
  20. # **IMPORTANT**
  21. # To download the raw images, the user must create an account with image-net.org
  22. # and generate a username and access_key. The latter two are required for
  23. # downloading the raw images.
  24. #
  25. # usage:
  26. # ./download_imagenet.sh [dirname]
  27. set -e
  28. if [ "x$IMAGENET_ACCESS_KEY" == x -o "x$IMAGENET_USERNAME" == x ]; then
  29. cat <<END
  30. In order to download the imagenet data, you have to create an account with
  31. image-net.org. This will get you a username and an access key. You can set the
  32. IMAGENET_USERNAME and IMAGENET_ACCESS_KEY environment variables, or you can
  33. enter the credentials here.
  34. END
  35. read -p "Username: " IMAGENET_USERNAME
  36. read -p "Access key: " IMAGENET_ACCESS_KEY
  37. fi
  38. OUTDIR="${1:-./imagenet-data}"
  39. SYNSETS_FILE="${2:-./synsets.txt}"
  40. SYNSETS_FILE="${PWD}/${SYNSETS_FILE}"
  41. echo "Saving downloaded files to $OUTDIR"
  42. mkdir -p "${OUTDIR}"
  43. CURRENT_DIR=$(pwd)
  44. BBOX_DIR="${OUTDIR}bounding_boxes"
  45. mkdir -p "${BBOX_DIR}"
  46. cd "${OUTDIR}"
  47. # Download and process all of the ImageNet bounding boxes.
  48. BASE_URL="http://www.image-net.org/challenges/LSVRC/2012/nonpub"
  49. # See here for details: http://www.image-net.org/download-bboxes
  50. BOUNDING_BOX_ANNOTATIONS="${BASE_URL}/ILSVRC2012_bbox_train_v2.tar.gz"
  51. BBOX_TAR_BALL="${BBOX_DIR}/annotations.tar.gz"
  52. echo "Downloading bounding box annotations."
  53. wget "${BOUNDING_BOX_ANNOTATIONS}" -O "${BBOX_TAR_BALL}"
  54. echo "Uncompressing bounding box annotations ..."
  55. tar xzf "${BBOX_TAR_BALL}" -C "${BBOX_DIR}"
  56. LABELS_ANNOTATED="${BBOX_DIR}/*"
  57. NUM_XML=$(ls -1 ${LABELS_ANNOTATED} | wc -l)
  58. echo "Identified ${NUM_XML} bounding box annotations."
  59. # Download and uncompress all images from the ImageNet 2012 validation dataset.
  60. VALIDATION_TARBALL="ILSVRC2012_img_val.tar"
  61. OUTPUT_PATH="${OUTDIR}validation/"
  62. mkdir -p "${OUTPUT_PATH}"
  63. cd "${OUTDIR}/.."
  64. echo "Downloading ${VALIDATION_TARBALL} to ${OUTPUT_PATH}."
  65. wget -nd -c "${BASE_URL}/${VALIDATION_TARBALL}"
  66. tar xf "${VALIDATION_TARBALL}" -C "${OUTPUT_PATH}"
  67. # Download all images from the ImageNet 2012 train dataset.
  68. TRAIN_TARBALL="ILSVRC2012_img_train.tar"
  69. OUTPUT_PATH="${OUTDIR}train/"
  70. mkdir -p "${OUTPUT_PATH}"
  71. cd "${OUTDIR}/.."
  72. echo "Downloading ${TRAIN_TARBALL} to ${OUTPUT_PATH}."
  73. wget -nd -c "${BASE_URL}/${TRAIN_TARBALL}"
  74. # Un-compress the individual tar-files within the train tar-file.
  75. echo "Uncompressing individual train tar-balls in the training data."
  76. while read SYNSET; do
  77. echo "Processing: ${SYNSET}"
  78. # Create a directory and delete anything there.
  79. mkdir -p "${OUTPUT_PATH}/${SYNSET}"
  80. rm -rf "${OUTPUT_PATH}/${SYNSET}/*"
  81. # Uncompress into the directory.
  82. tar xf "${TRAIN_TARBALL}" "${SYNSET}.tar"
  83. tar xf "${SYNSET}.tar" -C "${OUTPUT_PATH}/${SYNSET}/"
  84. rm -f "${SYNSET}.tar"
  85. echo "Finished processing: ${SYNSET}"
  86. done < "${SYNSETS_FILE}"