celeba_formatting.py 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. # Copyright 2016 Google Inc. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. r"""CelebA dataset formating.
  16. Download img_align_celeba.zip from
  17. http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html under the
  18. link "Align&Cropped Images" in the "Img" directory and list_eval_partition.txt
  19. under the link "Train/Val/Test Partitions" in the "Eval" directory. Then do:
  20. unzip img_align_celeba.zip
  21. Use the script as follow:
  22. python celeba_formatting.py \
  23. --partition_fn [PARTITION_FILE_PATH] \
  24. --file_out [OUTPUT_FILE_PATH_PREFIX] \
  25. --fn_root [CELEBA_FOLDER] \
  26. --set [SUBSET_INDEX]
  27. """
  28. import os
  29. import os.path
  30. import scipy.io
  31. import scipy.io.wavfile
  32. import scipy.ndimage
  33. import tensorflow as tf
  34. tf.flags.DEFINE_string("file_out", "",
  35. "Filename of the output .tfrecords file.")
  36. tf.flags.DEFINE_string("fn_root", "", "Name of root file path.")
  37. tf.flags.DEFINE_string("partition_fn", "", "Partition file path.")
  38. tf.flags.DEFINE_string("set", "", "Name of subset.")
  39. FLAGS = tf.flags.FLAGS
  40. def _int64_feature(value):
  41. return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
  42. def _bytes_feature(value):
  43. return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
  44. def main():
  45. """Main converter function."""
  46. # Celeb A
  47. with open(FLAGS.partition_fn, "r") as infile:
  48. img_fn_list = infile.readlines()
  49. img_fn_list = [elem.strip().split() for elem in img_fn_list]
  50. img_fn_list = [elem[0] for elem in img_fn_list if elem[1] == FLAGS.set]
  51. fn_root = FLAGS.fn_root
  52. num_examples = len(img_fn_list)
  53. file_out = "%s.tfrecords" % FLAGS.file_out
  54. writer = tf.python_io.TFRecordWriter(file_out)
  55. for example_idx, img_fn in enumerate(img_fn_list):
  56. if example_idx % 1000 == 0:
  57. print example_idx, "/", num_examples
  58. image_raw = scipy.ndimage.imread(os.path.join(fn_root, img_fn))
  59. rows = image_raw.shape[0]
  60. cols = image_raw.shape[1]
  61. depth = image_raw.shape[2]
  62. image_raw = image_raw.tostring()
  63. example = tf.train.Example(
  64. features=tf.train.Features(
  65. feature={
  66. "height": _int64_feature(rows),
  67. "width": _int64_feature(cols),
  68. "depth": _int64_feature(depth),
  69. "image_raw": _bytes_feature(image_raw)
  70. }
  71. )
  72. )
  73. writer.write(example.SerializeToString())
  74. writer.close()
  75. if __name__ == "__main__":
  76. main()