imagenet.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """Provides data for the ImageNet ILSVRC 2012 Dataset plus some bounding boxes.
  16. Some images have one or more bounding boxes associated with the label of the
  17. image. See details here: http://image-net.org/download-bboxes
  18. ImageNet is based upon WordNet 3.0. To uniquely identify a synset, we use
  19. "WordNet ID" (wnid), which is a concatenation of POS ( i.e. part of speech )
  20. and SYNSET OFFSET of WordNet. For more information, please refer to the
  21. WordNet documentation[http://wordnet.princeton.edu/wordnet/documentation/].
  22. "There are bounding boxes for over 3000 popular synsets available.
  23. For each synset, there are on average 150 images with bounding boxes."
  24. WARNING: Don't use for object detection, in this case all the bounding boxes
  25. of the image belong to just one class.
  26. """
  27. from __future__ import absolute_import
  28. from __future__ import division
  29. from __future__ import print_function
  30. import os
  31. import tensorflow as tf
  32. slim = tf.contrib.slim
  33. # TODO(nsilberman): Add tfrecord file type once the script is updated.
  34. _FILE_PATTERN = '%s-*'
  35. _SPLITS_TO_SIZES = {
  36. 'train': 1281167,
  37. 'validation': 50000,
  38. }
  39. _ITEMS_TO_DESCRIPTIONS = {
  40. 'image': 'A color image of varying height and width.',
  41. 'label': 'The label id of the image, integer between 0 and 999',
  42. 'label_text': 'The text of the label.',
  43. 'object/bbox': 'A list of bounding boxes.',
  44. 'object/label': 'A list of labels, one per each object.',
  45. }
  46. _NUM_CLASSES = 1001
  47. # TODO(nsilberman): Add _LABELS_TO_NAMES
  48. def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
  49. """Gets a dataset tuple with instructions for reading ImageNet.
  50. Args:
  51. split_name: A train/test split name.
  52. dataset_dir: The base directory of the dataset sources.
  53. file_pattern: The file pattern to use when matching the dataset sources.
  54. It is assumed that the pattern contains a '%s' string so that the split
  55. name can be inserted.
  56. reader: The TensorFlow reader type.
  57. Returns:
  58. A `Dataset` namedtuple.
  59. Raises:
  60. ValueError: if `split_name` is not a valid train/test split.
  61. """
  62. if split_name not in _SPLITS_TO_SIZES:
  63. raise ValueError('split name %s was not recognized.' % split_name)
  64. if not file_pattern:
  65. file_pattern = _FILE_PATTERN
  66. file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
  67. # Allowing None in the signature so that dataset_factory can use the default.
  68. if reader is None:
  69. reader = tf.TFRecordReader
  70. keys_to_features = {
  71. 'image/encoded': tf.FixedLenFeature(
  72. (), tf.string, default_value=''),
  73. 'image/format': tf.FixedLenFeature(
  74. (), tf.string, default_value='jpeg'),
  75. 'image/class/label': tf.FixedLenFeature(
  76. [], dtype=tf.int64, default_value=-1),
  77. 'image/class/text': tf.FixedLenFeature(
  78. [], dtype=tf.string, default_value=''),
  79. 'image/object/bbox/xmin': tf.VarLenFeature(
  80. dtype=tf.float32),
  81. 'image/object/bbox/ymin': tf.VarLenFeature(
  82. dtype=tf.float32),
  83. 'image/object/bbox/xmax': tf.VarLenFeature(
  84. dtype=tf.float32),
  85. 'image/object/bbox/ymax': tf.VarLenFeature(
  86. dtype=tf.float32),
  87. 'image/object/class/label': tf.VarLenFeature(
  88. dtype=tf.int64),
  89. }
  90. items_to_handlers = {
  91. 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'),
  92. 'label': slim.tfexample_decoder.Tensor('image/class/label'),
  93. 'label_text': slim.tfexample_decoder.Tensor('image/class/text'),
  94. 'object/bbox': slim.tfexample_decoder.BoundingBox(
  95. ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'),
  96. 'object/label': slim.tfexample_decoder.Tensor('image/object/class/label'),
  97. }
  98. decoder = slim.tfexample_decoder.TFExampleDecoder(
  99. keys_to_features, items_to_handlers)
  100. return slim.dataset.Dataset(
  101. data_sources=file_pattern,
  102. reader=reader,
  103. decoder=decoder,
  104. num_samples=_SPLITS_TO_SIZES[split_name],
  105. items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
  106. num_classes=_NUM_CLASSES)