object_detection_demo_coco.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. #!/usr/bin/env python3
  2. """
  3. Copyright (C) 2018-2021 Intel Corporation
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License.
  13. """
  14. import colorsys
  15. import logging
  16. import random
  17. import sys
  18. import os
  19. import time
  20. from argparse import ArgumentParser, SUPPRESS
  21. from pathlib import Path
  22. from time import perf_counter
  23. from yolo_classes import yolo_cls_to_ssd
  24. import cv2
  25. import numpy as np
  26. from openvino.inference_engine import IECore
  27. sys.path.append(str(Path(__file__).resolve().parents[2] / 'common/python'))
  28. import models
  29. import monitors
  30. from pipelines import AsyncPipeline
  31. from images_capture import open_images_capture
  32. from performance_metrics import PerformanceMetrics
  33. logging.basicConfig(format='[ %(levelname)s ] %(message)s', level=logging.INFO, stream=sys.stdout)
  34. log = logging.getLogger()
  35. import json
  36. def build_argparser():
  37. parser = ArgumentParser(add_help=False)
  38. args = parser.add_argument_group('Options')
  39. args.add_argument('-h', '--help', action='help', default=SUPPRESS, help='Show this help message and exit.')
  40. args.add_argument('-m', '--model', help='Required. Path to an .xml file with a trained model.',
  41. required=True, type=Path)
  42. args.add_argument('-at', '--architecture_type', help='Required. Specify model\' architecture type.',
  43. type=str, required=True, choices=('ssd', 'yolo', 'yolov4', 'faceboxes', 'centernet', 'ctpn', 'retinaface'))
  44. args.add_argument('-i', '--input', required=True,
  45. help='Required. An input to process. The input must be a single image, '
  46. 'a folder of images, video file or camera id.')
  47. args.add_argument('-d', '--device', default='CPU', type=str,
  48. help='Optional. Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is '
  49. 'acceptable. The sample will look for a suitable plugin for device specified. '
  50. 'Default value is CPU.')
  51. common_model_args = parser.add_argument_group('Common model options')
  52. common_model_args.add_argument('--labels', help='Optional. Labels mapping file.', default=None, type=str)
  53. common_model_args.add_argument('-t', '--prob_threshold', default=0.5, type=float,
  54. help='Optional. Probability threshold for detections filtering.')
  55. common_model_args.add_argument('--keep_aspect_ratio', action='store_true', default=False,
  56. help='Optional. Keeps aspect ratio on resize.')
  57. common_model_args.add_argument('--input_size', default=(600, 600), type=int, nargs=2,
  58. help='Optional. The first image size used for CTPN model reshaping. '
  59. 'Default: 600 600. Note that submitted images should have the same resolution, '
  60. 'otherwise predictions might be incorrect.')
  61. infer_args = parser.add_argument_group('Inference options')
  62. infer_args.add_argument('-nireq', '--num_infer_requests', help='Optional. Number of infer requests',
  63. default=1, type=int)
  64. infer_args.add_argument('-nstreams', '--num_streams',
  65. help='Optional. Number of streams to use for inference on the CPU or/and GPU in throughput '
  66. 'mode (for HETERO and MULTI device cases use format '
  67. '<device1>:<nstreams1>,<device2>:<nstreams2> or just <nstreams>).',
  68. default='', type=str)
  69. infer_args.add_argument('-nthreads', '--num_threads', default=None, type=int,
  70. help='Optional. Number of threads to use for inference on CPU (including HETERO cases).')
  71. io_args = parser.add_argument_group('Input/output options')
  72. io_args.add_argument('--loop', default=False, action='store_true',
  73. help='Optional. Enable reading the input in a loop.')
  74. io_args.add_argument('-o', '--output', required=False,
  75. help='Optional. Name of output to save.')
  76. io_args.add_argument('-limit', '--output_limit', required=False, default=1000, type=int,
  77. help='Optional. Number of frames to store in output. '
  78. 'If 0 is set, all frames are stored.')
  79. io_args.add_argument('--no_show', help="Optional. Don't show output.", action='store_true')
  80. io_args.add_argument('-u', '--utilization_monitors', default='', type=str,
  81. help='Optional. List of monitors to show initially.')
  82. debug_args = parser.add_argument_group('Debug options')
  83. debug_args.add_argument('-r', '--raw_output_message', help='Optional. Output inference results raw values showing.',
  84. default=False, action='store_true')
  85. return parser
  86. class ColorPalette:
  87. def __init__(self, n, rng=None):
  88. assert n > 0
  89. if rng is None:
  90. rng = random.Random(0xACE)
  91. candidates_num = 100
  92. hsv_colors = [(1.0, 1.0, 1.0)]
  93. for _ in range(1, n):
  94. colors_candidates = [(rng.random(), rng.uniform(0.8, 1.0), rng.uniform(0.5, 1.0))
  95. for _ in range(candidates_num)]
  96. min_distances = [self.min_distance(hsv_colors, c) for c in colors_candidates]
  97. arg_max = np.argmax(min_distances)
  98. hsv_colors.append(colors_candidates[arg_max])
  99. self.palette = [self.hsv2rgb(*hsv) for hsv in hsv_colors]
  100. @staticmethod
  101. def dist(c1, c2):
  102. dh = min(abs(c1[0] - c2[0]), 1 - abs(c1[0] - c2[0])) * 2
  103. ds = abs(c1[1] - c2[1])
  104. dv = abs(c1[2] - c2[2])
  105. return dh * dh + ds * ds + dv * dv
  106. @classmethod
  107. def min_distance(cls, colors_set, color_candidate):
  108. distances = [cls.dist(o, color_candidate) for o in colors_set]
  109. return np.min(distances)
  110. @staticmethod
  111. def hsv2rgb(h, s, v):
  112. return tuple(round(c * 255) for c in colorsys.hsv_to_rgb(h, s, v))
  113. def __getitem__(self, n):
  114. return self.palette[n % len(self.palette)]
  115. def __len__(self):
  116. return len(self.palette)
  117. def get_model(ie, args):
  118. if args.architecture_type == 'ssd':
  119. return models.SSD(ie, args.model, labels=args.labels, keep_aspect_ratio_resize=args.keep_aspect_ratio)
  120. elif args.architecture_type == 'ctpn':
  121. return models.CTPN(ie, args.model, input_size=args.input_size, threshold=args.prob_threshold)
  122. elif args.architecture_type == 'yolo':
  123. return models.YOLO(ie, args.model, labels=args.labels,
  124. threshold=args.prob_threshold, keep_aspect_ratio=args.keep_aspect_ratio)
  125. elif args.architecture_type == 'yolov4':
  126. return models.YoloV4(ie, args.model, labels=args.labels,
  127. threshold=args.prob_threshold, keep_aspect_ratio=args.keep_aspect_ratio)
  128. elif args.architecture_type == 'faceboxes':
  129. return models.FaceBoxes(ie, args.model, threshold=args.prob_threshold)
  130. elif args.architecture_type == 'centernet':
  131. return models.CenterNet(ie, args.model, labels=args.labels, threshold=args.prob_threshold)
  132. elif args.architecture_type == 'retinaface':
  133. return models.RetinaFace(ie, args.model, threshold=args.prob_threshold)
  134. else:
  135. raise RuntimeError('No model type or invalid model type (-at) provided: {}'.format(args.architecture_type))
  136. def get_plugin_configs(device, num_streams, num_threads):
  137. config_user_specified = {}
  138. devices_nstreams = {}
  139. if num_streams:
  140. devices_nstreams = {device: num_streams for device in ['CPU', 'GPU'] if device in device} \
  141. if num_streams.isdigit() \
  142. else dict(device.split(':', 1) for device in num_streams.split(','))
  143. if 'CPU' in device:
  144. if num_threads is not None:
  145. config_user_specified['CPU_THREADS_NUM'] = str(num_threads)
  146. if 'CPU' in devices_nstreams:
  147. config_user_specified['CPU_THROUGHPUT_STREAMS'] = devices_nstreams['CPU'] \
  148. if int(devices_nstreams['CPU']) > 0 \
  149. else 'CPU_THROUGHPUT_AUTO'
  150. if 'GPU' in device:
  151. if 'GPU' in devices_nstreams:
  152. config_user_specified['GPU_THROUGHPUT_STREAMS'] = devices_nstreams['GPU'] \
  153. if int(devices_nstreams['GPU']) > 0 \
  154. else 'GPU_THROUGHPUT_AUTO'
  155. return config_user_specified
  156. def draw_detections(frame, detections, palette, labels, threshold, id):
  157. id = str(id.strip('0').split('.')[0])
  158. size = frame.shape[:2]
  159. for detection in detections:
  160. if detection.score > threshold:
  161. xmin = max(int(detection.xmin), 0)
  162. ymin = max(int(detection.ymin), 0)
  163. xmax = min(int(detection.xmax), size[1])
  164. ymax = min(int(detection.ymax), size[0])
  165. class_id = int(detection.id)
  166. color = palette[class_id]
  167. det_label = labels[class_id] if labels and len(labels) >= class_id else '#{}'.format(class_id)
  168. cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2)
  169. cv2.putText(frame, '{} {:.1%}'.format(det_label, detection.score),
  170. (xmin, ymin - 7), cv2.FONT_HERSHEY_COMPLEX, 0.6, color, 1)
  171. if isinstance(detection, models.DetectionWithLandmarks):
  172. for landmark in detection.landmarks:
  173. cv2.circle(frame, (int(landmark[0]), int(landmark[1])), 2, (0, 255, 255), 2)
  174. # create_json(detection, size, id)
  175. return frame
  176. def print_raw_results(size, detections, labels, threshold, id):
  177. id = str(id.strip('0').split('.')[0])
  178. log.info(' Class ID | Confidence | XMIN | YMIN | XMAX | YMAX ')
  179. for detection in detections:
  180. if detection.score > threshold:
  181. xmin = max(int(detection.xmin), 0)
  182. ymin = max(int(detection.ymin), 0)
  183. xmax = min(int(detection.xmax), size[1])
  184. ymax = min(int(detection.ymax), size[0])
  185. class_id = int(detection.id)
  186. det_label = labels[class_id] if labels and len(labels) >= class_id else '#{}'.format(class_id)
  187. log.info('{:^9} | {:10f} | {:4} | {:4} | {:4} | {:4} '
  188. .format(det_label, detection.score, xmin, ymin, xmax, ymax))
  189. def main():
  190. args = build_argparser().parse_args()
  191. log.info('Initializing Inference Engine...')
  192. ie = IECore()
  193. plugin_config = get_plugin_configs(args.device, args.num_streams, args.num_threads)
  194. log.info('Loading network...')
  195. model = get_model(ie, args)
  196. detector_pipeline = AsyncPipeline(ie, model, plugin_config,
  197. device=args.device, max_num_requests=args.num_infer_requests)
  198. ### READ TIME ###
  199. read_time_start = time.time()
  200. cap = open_images_capture(args.input, args.loop)
  201. read_time_end = time.time()
  202. next_frame_id = 0
  203. next_frame_id_to_show = 0
  204. image_id = 0
  205. log.info('Starting inference...')
  206. print("To close the application, press 'CTRL+C' here or switch to the output window and press ESC key")
  207. palette = ColorPalette(len(model.labels) if model.labels else 100)
  208. metrics = PerformanceMetrics()
  209. presenter = None
  210. video_writer = cv2.VideoWriter()
  211. results_list = []
  212. detection_ids = [1, 3, 4]
  213. all_starts = 0
  214. while True:
  215. print('NEXT FRAME ID', next_frame_id)
  216. id = images[image_id]
  217. if next_frame_id == 5000:
  218. break
  219. if detector_pipeline.callback_exceptions:
  220. raise detector_pipeline.callback_exceptions[0]
  221. # Process all completed requests
  222. #### DETECTION TIME ####
  223. detect_time_start = time.time()
  224. results = detector_pipeline.get_result(next_frame_id_to_show)
  225. detect_time_end = time.time()
  226. detect_time_list.append(detect_time_end-detect_time_start)
  227. if results:
  228. objects, frame_meta = results
  229. for detection in objects:
  230. x = float(detection.xmin)
  231. y = float(detection.ymin)
  232. w = float(detection.xmax - detection.xmin)
  233. h = float(detection.ymax - detection.ymin)
  234. cls = detection.id
  235. cls = yolo_cls_to_ssd[cls]
  236. id = str(id.lstrip('0').split('.')[0])
  237. conf = detection.score
  238. # if cls in detection_ids:
  239. results_list.append({'image_id': int(id),
  240. 'category_id': cls,
  241. 'bbox': [x, y, w, h],
  242. 'score': float(conf)})
  243. frame = frame_meta['frame']
  244. post_process_start = time.time()
  245. start_time = frame_meta['start_time']
  246. all_starts += start_time
  247. all_starts += start_time
  248. if len(objects) and args.raw_output_message:
  249. print_raw_results(frame.shape[:2], objects, model.labels, args.prob_threshold, images[image_id])
  250. presenter.drawGraphs(frame)
  251. frame = draw_detections(frame, objects, palette, model.labels, args.prob_threshold, images[image_id])
  252. metrics.update(start_time, frame)
  253. post_process_end = time.time()
  254. post_process_list.append(post_process_end-post_process_start)
  255. if video_writer.isOpened() and (args.output_limit <= 0 or next_frame_id_to_show <= args.output_limit-1):
  256. video_writer.write(frame)
  257. if not args.no_show:
  258. # cv2.imshow('Detection Results', frame)
  259. cv2.imwrite(f"/home/sovit/my_data/Data_Science/Projects/openvino_experiments/model_quantization/data/images/image_{image_id}.jpg", frame)
  260. # key = cv2.waitKey(1)
  261. ESC_KEY = 27
  262. # Quit.
  263. #if key in {ord('q'), ord('Q'), ESC_KEY}:
  264. #break
  265. #presenter.handleKey(key)
  266. next_frame_id_to_show += 1
  267. image_id += 1
  268. continue
  269. if detector_pipeline.is_ready():
  270. # Get new image/frame
  271. pre_process_start = time.time()
  272. start_time = perf_counter()
  273. frame = cap.read()
  274. if frame is None:
  275. if next_frame_id == 0:
  276. raise ValueError("Can't read an image from the input")
  277. break
  278. if next_frame_id == 0:
  279. presenter = monitors.Presenter(args.utilization_monitors, 55,
  280. (round(frame.shape[1] / 4), round(frame.shape[0] / 8)))
  281. if args.output and not video_writer.open(args.output, cv2.VideoWriter_fourcc(*'MJPG'),
  282. cap.fps(), (frame.shape[1], frame.shape[0])):
  283. raise RuntimeError("Can't open video writer")
  284. # Submit for inference
  285. detector_pipeline.submit_data(frame, next_frame_id, {'frame': frame, 'start_time': start_time})
  286. pre_process_end = time.time()
  287. pre_process_list.append(pre_process_end-pre_process_start)
  288. next_frame_id += 1
  289. else:
  290. # Wait for empty request
  291. detector_pipeline.await_any()
  292. results_file = 'results.json'
  293. with open(results_file, 'w') as f:
  294. f.write(json.dumps(results_list, indent=4))
  295. detector_pipeline.await_all()
  296. # Process completed requests
  297. while detector_pipeline.has_completed_request():
  298. results = detector_pipeline.get_result(next_frame_id_to_show)
  299. if results:
  300. objects, frame_meta = results
  301. frame = frame_meta['frame']
  302. post_process_two_start = time.time()
  303. start_time = frame_meta['start_time']
  304. if len(objects) and args.raw_output_message:
  305. print()
  306. # print_raw_results(frame.shape[:2], objects, model.labels, args.prob_threshold)
  307. presenter.drawGraphs(frame)
  308. # frame = draw_detections(frame, objects, palette, model.labels, args.prob_threshold)
  309. metrics.update(start_time, frame)
  310. post_process_two_end = time.time()
  311. post_process_list_two.append(post_process_two_end-post_process_two_start)
  312. if video_writer.isOpened() and (args.output_limit <= 0 or next_frame_id_to_show <= args.output_limit-1):
  313. video_writer.write(frame)
  314. if not args.no_show:
  315. # cv2.imshow('Detection Results', frame)
  316. cv2.imwrite(f"/home/sovit/my_data/Data_Science/Projects/openvino_experiments/model_quantization/data/images/image_{frame_id}.jpg", frame)
  317. # key = cv2.waitKey(1)
  318. ESC_KEY = 27
  319. # Quit.
  320. if key in {ord('q'), ord('Q'), ESC_KEY}:
  321. break
  322. presenter.handleKey(key)
  323. next_frame_id_to_show += 1
  324. else:
  325. break
  326. metrics.print_total()
  327. print("Presentor", presenter.reportMeans())
  328. if __name__ == '__main__':
  329. images = os.listdir('mscoco/val2017')
  330. images.sort()
  331. print(images)
  332. json_obj = {}
  333. json_obj[''] = []
  334. detect_time_list = []
  335. pre_process_list = []
  336. post_process_list = []
  337. post_process_list_two = []
  338. main()