imagenet.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """Provides data for the ImageNet ILSVRC 2012 Dataset plus some bounding boxes.
  16. Some images have one or more bounding boxes associated with the label of the
  17. image. See details here: http://image-net.org/download-bboxes
  18. ImageNet is based upon WordNet 3.0. To uniquely identify a synset, we use
  19. "WordNet ID" (wnid), which is a concatenation of POS ( i.e. part of speech )
  20. and SYNSET OFFSET of WordNet. For more information, please refer to the
  21. WordNet documentation[http://wordnet.princeton.edu/wordnet/documentation/].
  22. "There are bounding boxes for over 3000 popular synsets available.
  23. For each synset, there are on average 150 images with bounding boxes."
  24. WARNING: Don't use for object detection, in this case all the bounding boxes
  25. of the image belong to just one class.
  26. """
  27. from __future__ import absolute_import
  28. from __future__ import division
  29. from __future__ import print_function
  30. import os
  31. from six.moves import urllib
  32. import tensorflow as tf
  33. from datasets import dataset_utils
  34. slim = tf.contrib.slim
  35. # TODO(nsilberman): Add tfrecord file type once the script is updated.
  36. _FILE_PATTERN = '%s-*'
  37. _SPLITS_TO_SIZES = {
  38. 'train': 1281167,
  39. 'validation': 50000,
  40. }
  41. _ITEMS_TO_DESCRIPTIONS = {
  42. 'image': 'A color image of varying height and width.',
  43. 'label': 'The label id of the image, integer between 0 and 999',
  44. 'label_text': 'The text of the label.',
  45. 'object/bbox': 'A list of bounding boxes.',
  46. 'object/label': 'A list of labels, one per each object.',
  47. }
  48. _NUM_CLASSES = 1001
  49. def create_readable_names_for_imagenet_labels():
  50. """Create a dict mapping label id to human readable string.
  51. Returns:
  52. labels_to_names: dictionary where keys are integers from to 1000
  53. and values are human-readable names.
  54. We retrieve a synset file, which contains a list of valid synset labels used
  55. by ILSVRC competition. There is one synset one per line, eg.
  56. # n01440764
  57. # n01443537
  58. We also retrieve a synset_to_human_file, which contains a mapping from synsets
  59. to human-readable names for every synset in Imagenet. These are stored in a
  60. tsv format, as follows:
  61. # n02119247 black fox
  62. # n02119359 silver fox
  63. We assign each synset (in alphabetical order) an integer, starting from 1
  64. (since 0 is reserved for the background class).
  65. Code is based on
  66. https://github.com/tensorflow/models/blob/master/inception/inception/data/build_imagenet_data.py#L463
  67. """
  68. # pylint: disable=g-line-too-long
  69. base_url = 'https://raw.githubusercontent.com/tensorflow/models/master/inception/inception/data/'
  70. synset_url = '{}/imagenet_lsvrc_2015_synsets.txt'.format(base_url)
  71. synset_to_human_url = '{}/imagenet_metadata.txt'.format(base_url)
  72. filename, _ = urllib.request.urlretrieve(synset_url)
  73. synset_list = [s.strip() for s in open(filename).readlines()]
  74. num_synsets_in_ilsvrc = len(synset_list)
  75. assert num_synsets_in_ilsvrc == 1000
  76. filename, _ = urllib.request.urlretrieve(synset_to_human_url)
  77. synset_to_human_list = open(filename).readlines()
  78. num_synsets_in_all_imagenet = len(synset_to_human_list)
  79. assert num_synsets_in_all_imagenet == 21842
  80. synset_to_human = {}
  81. for s in synset_to_human_list:
  82. parts = s.strip().split('\t')
  83. assert len(parts) == 2
  84. synset = parts[0]
  85. human = parts[1]
  86. synset_to_human[synset] = human
  87. label_index = 1
  88. labels_to_names = {0: 'background'}
  89. for synset in synset_list:
  90. name = synset_to_human[synset]
  91. labels_to_names[label_index] = name
  92. label_index += 1
  93. return labels_to_names
  94. def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
  95. """Gets a dataset tuple with instructions for reading ImageNet.
  96. Args:
  97. split_name: A train/test split name.
  98. dataset_dir: The base directory of the dataset sources.
  99. file_pattern: The file pattern to use when matching the dataset sources.
  100. It is assumed that the pattern contains a '%s' string so that the split
  101. name can be inserted.
  102. reader: The TensorFlow reader type.
  103. Returns:
  104. A `Dataset` namedtuple.
  105. Raises:
  106. ValueError: if `split_name` is not a valid train/test split.
  107. """
  108. if split_name not in _SPLITS_TO_SIZES:
  109. raise ValueError('split name %s was not recognized.' % split_name)
  110. if not file_pattern:
  111. file_pattern = _FILE_PATTERN
  112. file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
  113. # Allowing None in the signature so that dataset_factory can use the default.
  114. if reader is None:
  115. reader = tf.TFRecordReader
  116. keys_to_features = {
  117. 'image/encoded': tf.FixedLenFeature(
  118. (), tf.string, default_value=''),
  119. 'image/format': tf.FixedLenFeature(
  120. (), tf.string, default_value='jpeg'),
  121. 'image/class/label': tf.FixedLenFeature(
  122. [], dtype=tf.int64, default_value=-1),
  123. 'image/class/text': tf.FixedLenFeature(
  124. [], dtype=tf.string, default_value=''),
  125. 'image/object/bbox/xmin': tf.VarLenFeature(
  126. dtype=tf.float32),
  127. 'image/object/bbox/ymin': tf.VarLenFeature(
  128. dtype=tf.float32),
  129. 'image/object/bbox/xmax': tf.VarLenFeature(
  130. dtype=tf.float32),
  131. 'image/object/bbox/ymax': tf.VarLenFeature(
  132. dtype=tf.float32),
  133. 'image/object/class/label': tf.VarLenFeature(
  134. dtype=tf.int64),
  135. }
  136. items_to_handlers = {
  137. 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'),
  138. 'label': slim.tfexample_decoder.Tensor('image/class/label'),
  139. 'label_text': slim.tfexample_decoder.Tensor('image/class/text'),
  140. 'object/bbox': slim.tfexample_decoder.BoundingBox(
  141. ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'),
  142. 'object/label': slim.tfexample_decoder.Tensor('image/object/class/label'),
  143. }
  144. decoder = slim.tfexample_decoder.TFExampleDecoder(
  145. keys_to_features, items_to_handlers)
  146. labels_to_names = None
  147. if dataset_utils.has_labels(dataset_dir):
  148. labels_to_names = dataset_utils.read_label_file(dataset_dir)
  149. else:
  150. labels_to_names = create_readable_names_for_imagenet_labels()
  151. dataset_utils.write_label_file(labels_to_names, dataset_dir)
  152. return slim.dataset.Dataset(
  153. data_sources=file_pattern,
  154. reader=reader,
  155. decoder=decoder,
  156. num_samples=_SPLITS_TO_SIZES[split_name],
  157. items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
  158. num_classes=_NUM_CLASSES,
  159. labels_to_names=labels_to_names)