create_lmdb.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. import os
  2. from argparse import ArgumentParser
  3. import cv2
  4. import lmdb
  5. import numpy as np
  6. from tools import get_images_paths
  7. def store_many_lmdb(images_list, save_path):
  8. num_images = len(images_list) # number of images in our folder
  9. file_sizes = [os.path.getsize(item) for item in images_list] # all file sizes
  10. max_size_index = np.argmax(file_sizes) # the maximum file size index
  11. # maximum database size in bytes
  12. map_size = num_images * cv2.imread(images_list[max_size_index]).nbytes * 10
  13. env = lmdb.open(save_path, map_size=map_size) # create lmdb environment
  14. with env.begin(write=True) as txn: # start writing to environment
  15. for i, image in enumerate(images_list):
  16. with open(image, "rb") as file:
  17. data = file.read() # read image as bytes
  18. key = f"{i:08}" # get image key
  19. txn.put(key.encode("ascii"), data) # put the key-value into database
  20. env.close() # close the environment
  21. if __name__ == "__main__":
  22. parser = ArgumentParser()
  23. parser.add_argument(
  24. "--path",
  25. "-p",
  26. type=str,
  27. required=True,
  28. help="path to the images folder to collect",
  29. )
  30. parser.add_argument(
  31. "--output",
  32. "-o",
  33. type=str,
  34. required=True,
  35. help='path to the output environment directory file i.e. "path/to/folder/env/"',
  36. )
  37. args = parser.parse_args()
  38. if not os.path.exists(args.output):
  39. os.makedirs(args.output)
  40. images = get_images_paths(args.path)
  41. store_many_lmdb(images, args.output)