mnist_m.py 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """Provides data for the MNIST-M dataset.
  16. """
  17. from __future__ import absolute_import
  18. from __future__ import division
  19. from __future__ import print_function
  20. import os
  21. import tensorflow as tf
  22. from slim.datasets import dataset_utils
  23. slim = tf.contrib.slim
  24. _FILE_PATTERN = 'mnist_m_%s.tfrecord'
  25. _SPLITS_TO_SIZES = {'train': 58001, 'valid': 1000, 'test': 9001}
  26. _NUM_CLASSES = 10
  27. _ITEMS_TO_DESCRIPTIONS = {
  28. 'image': 'A [32 x 32 x 1] RGB image.',
  29. 'label': 'A single integer between 0 and 9',
  30. }
  31. def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
  32. """Gets a dataset tuple with instructions for reading MNIST.
  33. Args:
  34. split_name: A train/test split name.
  35. dataset_dir: The base directory of the dataset sources.
  36. Returns:
  37. A `Dataset` namedtuple.
  38. Raises:
  39. ValueError: if `split_name` is not a valid train/test split.
  40. """
  41. if split_name not in _SPLITS_TO_SIZES:
  42. raise ValueError('split name %s was not recognized.' % split_name)
  43. if not file_pattern:
  44. file_pattern = _FILE_PATTERN
  45. file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
  46. # Allowing None in the signature so that dataset_factory can use the default.
  47. if reader is None:
  48. reader = tf.TFRecordReader
  49. keys_to_features = {
  50. 'image/encoded':
  51. tf.FixedLenFeature((), tf.string, default_value=''),
  52. 'image/format':
  53. tf.FixedLenFeature((), tf.string, default_value='png'),
  54. 'image/class/label':
  55. tf.FixedLenFeature(
  56. [1], tf.int64, default_value=tf.zeros([1], dtype=tf.int64)),
  57. }
  58. items_to_handlers = {
  59. 'image': slim.tfexample_decoder.Image(shape=[32, 32, 3], channels=3),
  60. 'label': slim.tfexample_decoder.Tensor('image/class/label', shape=[]),
  61. }
  62. decoder = slim.tfexample_decoder.TFExampleDecoder(
  63. keys_to_features, items_to_handlers)
  64. labels_to_names = None
  65. if dataset_utils.has_labels(dataset_dir):
  66. labels_to_names = dataset_utils.read_label_file(dataset_dir)
  67. return slim.dataset.Dataset(
  68. data_sources=file_pattern,
  69. reader=reader,
  70. decoder=decoder,
  71. num_samples=_SPLITS_TO_SIZES[split_name],
  72. num_classes=_NUM_CLASSES,
  73. items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
  74. labels_to_names=labels_to_names)