downloadOI.py 3.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. #Author : Sunita Nayak, Big Vision LLC
  2. #### Usage example: python3 downloadOI.py --classes 'Ice_cream,Cookie' --mode train
  3. import argparse
  4. import csv
  5. import subprocess
  6. import os
  7. from tqdm import tqdm
  8. import multiprocessing
  9. from multiprocessing import Pool as thread_pool
  10. cpu_count = multiprocessing.cpu_count()
  11. parser = argparse.ArgumentParser(description='Download Class specific images from OpenImagesV4')
  12. parser.add_argument("--mode", help="Dataset category - train, validation or test", required=True)
  13. parser.add_argument("--classes", help="Names of object classes to be downloaded", required=True)
  14. parser.add_argument("--nthreads", help="Number of threads to use", required=False, type=int, default=cpu_count*2)
  15. parser.add_argument("--occluded", help="Include occluded images", required=False, type=int, default=1)
  16. parser.add_argument("--truncated", help="Include truncated images", required=False, type=int, default=1)
  17. parser.add_argument("--groupOf", help="Include groupOf images", required=False, type=int, default=1)
  18. parser.add_argument("--depiction", help="Include depiction images", required=False, type=int, default=1)
  19. parser.add_argument("--inside", help="Include inside images", required=False, type=int, default=1)
  20. args = parser.parse_args()
  21. run_mode = args.mode
  22. threads = args.nthreads
  23. classes = []
  24. for class_name in args.classes.split(','):
  25. classes.append(class_name)
  26. with open('./class-descriptions-boxable.csv', mode='r') as infile:
  27. reader = csv.reader(infile)
  28. dict_list = {rows[1]:rows[0] for rows in reader}
  29. subprocess.run(['rm', '-rf', run_mode])
  30. subprocess.run([ 'mkdir', run_mode])
  31. pool = thread_pool(threads)
  32. commands = []
  33. cnt = 0
  34. for ind in range(0, len(classes)):
  35. class_name = classes[ind]
  36. print("Class "+str(ind) + " : " + class_name)
  37. subprocess.run([ 'mkdir', run_mode+'/'+class_name])
  38. command = "grep "+dict_list[class_name.replace('_', ' ')] + " ./" + run_mode + "-annotations-bbox.csv"
  39. class_annotations = subprocess.run(command.split(), stdout=subprocess.PIPE).stdout.decode('utf-8')
  40. class_annotations = class_annotations.splitlines()
  41. for line in class_annotations:
  42. line_parts = line.split(',')
  43. #IsOccluded,IsTruncated,IsGroupOf,IsDepiction,IsInside
  44. if (args.occluded==0 and int(line_parts[8])>0):
  45. print("Skipped %s",line_parts[0])
  46. continue
  47. if (args.truncated==0 and int(line_parts[9])>0):
  48. print("Skipped %s",line_parts[0])
  49. continue
  50. if (args.groupOf==0 and int(line_parts[10])>0):
  51. print("Skipped %s",line_parts[0])
  52. continue
  53. if (args.depiction==0 and int(line_parts[11])>0):
  54. print("Skipped %s",line_parts[0])
  55. continue
  56. if (args.inside==0 and int(line_parts[12])>0):
  57. print("Skipped %s",line_parts[0])
  58. continue
  59. cnt = cnt + 1
  60. command = 'aws s3 --no-sign-request --only-show-errors cp s3://open-images-dataset/'+run_mode+'/'+line_parts[0]+'.jpg '+ run_mode+'/'+class_name+'/'+line_parts[0]+'.jpg'
  61. commands.append(command)
  62. with open('%s/%s/%s.txt'%(run_mode,class_name,line_parts[0]),'a') as f:
  63. f.write(','.join([class_name, line_parts[4], line_parts[5], line_parts[6], line_parts[7]])+'\n')
  64. print("Annotation Count : "+str(cnt))
  65. commands = list(set(commands))
  66. print("Number of images to be downloaded : "+str(len(commands)))
  67. list(tqdm(pool.imap(os.system, commands), total = len(commands) ))
  68. pool.close()
  69. pool.join()