split_dataset.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. import os
  2. import random
  3. import shutil
  4. real_face_dir = os.path.join("dataset", "real")
  5. os.makedirs(real_face_dir, exist_ok=True)
  6. spoofed_face_dir = os.path.join("dataset", "spoofed")
  7. os.makedirs(spoofed_face_dir, exist_ok=True)
  8. training_real_dir = os.path.join("training_dataset", "real")
  9. os.makedirs(training_real_dir, exist_ok=True)
  10. test_real_dir = os.path.join("test_dataset", "real")
  11. os.makedirs(test_real_dir, exist_ok=True)
  12. training_spoof_dir = os.path.join("training_dataset", "spoofed")
  13. os.makedirs(training_spoof_dir, exist_ok=True)
  14. test_spoof_dir = os.path.join("test_dataset", "spoofed")
  15. os.makedirs(test_spoof_dir, exist_ok=True)
  16. _, _, real_list = next(os.walk(real_face_dir))
  17. _, _, spoof_list = next(os.walk(spoofed_face_dir))
  18. random.shuffle(real_list)
  19. real_split_idx = int(len(real_list)*.10)
  20. real_test = real_list[:real_split_idx]
  21. real_train = real_list[real_split_idx:]
  22. for file in real_test:
  23. shutil.copy(os.path.join(real_face_dir, file), test_real_dir)
  24. for file in real_train:
  25. shutil.copy(os.path.join(real_face_dir, file), training_real_dir)
  26. random.shuffle(spoof_list)
  27. spoof_split_idx = int(len(spoof_list)*.10)
  28. spoof_test = spoof_list[:spoof_split_idx]
  29. spoof_train = spoof_list[spoof_split_idx:]
  30. for file in spoof_test:
  31. shutil.copy(os.path.join(spoofed_face_dir, file), test_spoof_dir)
  32. for file in spoof_train:
  33. shutil.copy(os.path.join(spoofed_face_dir, file), training_spoof_dir)