run-all.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. import argparse
  2. import os
  3. import time
  4. import cv2
  5. import dlib
  6. import numpy as np
  7. # Model files
  8. # OpenCV HAAR
  9. faceCascade = cv2.CascadeClassifier("models/haarcascade_frontalface_default.xml")
  10. # DLIB HOG
  11. hogFaceDetector = dlib.get_frontal_face_detector()
  12. # DLIB MMOD
  13. dnnFaceDetector = dlib.cnn_face_detection_model_v1(
  14. "models/mmod_human_face_detector.dat",
  15. )
  16. def detectFaceOpenCVHaar(faceCascade, frame, inHeight=300, inWidth=0):
  17. frameOpenCVHaar = frame.copy()
  18. frameHeight = frameOpenCVHaar.shape[0]
  19. frameWidth = frameOpenCVHaar.shape[1]
  20. if not inWidth:
  21. inWidth = int((frameWidth / frameHeight) * inHeight)
  22. scaleHeight = frameHeight / inHeight
  23. scaleWidth = frameWidth / inWidth
  24. frameOpenCVHaarSmall = cv2.resize(frameOpenCVHaar, (inWidth, inHeight))
  25. frameGray = cv2.cvtColor(frameOpenCVHaarSmall, cv2.COLOR_BGR2GRAY)
  26. faces = faceCascade.detectMultiScale(frameGray)
  27. bboxes = []
  28. for (x, y, w, h) in faces:
  29. x1 = x
  30. y1 = y
  31. x2 = x + w
  32. y2 = y + h
  33. cvRect = [
  34. int(x1 * scaleWidth),
  35. int(y1 * scaleHeight),
  36. int(x2 * scaleWidth),
  37. int(y2 * scaleHeight),
  38. ]
  39. bboxes.append(cvRect)
  40. cv2.rectangle(
  41. frameOpenCVHaar,
  42. (cvRect[0], cvRect[1]),
  43. (cvRect[2], cvRect[3]),
  44. (0, 255, 0),
  45. int(round(frameHeight / 150)),
  46. 4,
  47. )
  48. return frameOpenCVHaar, bboxes
  49. def detectFaceOpenCVDnn(net, frame, conf_threshold=0.7):
  50. frameOpencvDnn = frame.copy()
  51. frameHeight = frameOpencvDnn.shape[0]
  52. frameWidth = frameOpencvDnn.shape[1]
  53. blob = cv2.dnn.blobFromImage(
  54. frameOpencvDnn, 1.0, (300, 300), [104, 117, 123], False, False,
  55. )
  56. net.setInput(blob)
  57. detections = net.forward()
  58. bboxes = []
  59. for i in range(detections.shape[2]):
  60. confidence = detections[0, 0, i, 2]
  61. if confidence > conf_threshold:
  62. x1 = int(detections[0, 0, i, 3] * frameWidth)
  63. y1 = int(detections[0, 0, i, 4] * frameHeight)
  64. x2 = int(detections[0, 0, i, 5] * frameWidth)
  65. y2 = int(detections[0, 0, i, 6] * frameHeight)
  66. bboxes.append([x1, y1, x2, y2])
  67. cv2.rectangle(
  68. frameOpencvDnn,
  69. (x1, y1),
  70. (x2, y2),
  71. (0, 255, 0),
  72. int(round(frameHeight / 150)),
  73. 8,
  74. )
  75. return frameOpencvDnn, bboxes
  76. def detectFaceDlibHog(detector, frame, inHeight=300, inWidth=0):
  77. frameDlibHog = frame.copy()
  78. frameHeight = frameDlibHog.shape[0]
  79. frameWidth = frameDlibHog.shape[1]
  80. if not inWidth:
  81. inWidth = int((frameWidth / frameHeight) * inHeight)
  82. scaleHeight = frameHeight / inHeight
  83. scaleWidth = frameWidth / inWidth
  84. frameDlibHogSmall = cv2.resize(frameDlibHog, (inWidth, inHeight))
  85. frameDlibHogSmall = cv2.cvtColor(frameDlibHogSmall, cv2.COLOR_BGR2RGB)
  86. faceRects = detector(frameDlibHogSmall, 0)
  87. bboxes = []
  88. for faceRect in faceRects:
  89. cvRect = [
  90. int(faceRect.left() * scaleWidth),
  91. int(faceRect.top() * scaleHeight),
  92. int(faceRect.right() * scaleWidth),
  93. int(faceRect.bottom() * scaleHeight),
  94. ]
  95. bboxes.append(cvRect)
  96. cv2.rectangle(
  97. frameDlibHog,
  98. (cvRect[0], cvRect[1]),
  99. (cvRect[2], cvRect[3]),
  100. (0, 255, 0),
  101. int(round(frameHeight / 150)),
  102. 4,
  103. )
  104. return frameDlibHog, bboxes
  105. def detectFaceDlibMMOD(detector, frame, inHeight=300, inWidth=0):
  106. frameDlibMMOD = frame.copy()
  107. frameHeight = frameDlibMMOD.shape[0]
  108. frameWidth = frameDlibMMOD.shape[1]
  109. if not inWidth:
  110. inWidth = int((frameWidth / frameHeight) * inHeight)
  111. scaleHeight = frameHeight / inHeight
  112. scaleWidth = frameWidth / inWidth
  113. frameDlibMMODSmall = cv2.resize(frameDlibMMOD, (inWidth, inHeight))
  114. frameDlibMMODSmall = cv2.cvtColor(frameDlibMMODSmall, cv2.COLOR_BGR2RGB)
  115. faceRects = detector(frameDlibMMODSmall, 0)
  116. bboxes = []
  117. for faceRect in faceRects:
  118. cvRect = [
  119. int(faceRect.rect.left() * scaleWidth),
  120. int(faceRect.rect.top() * scaleHeight),
  121. int(faceRect.rect.right() * scaleWidth),
  122. int(faceRect.rect.bottom() * scaleHeight),
  123. ]
  124. bboxes.append(cvRect)
  125. cv2.rectangle(
  126. frameDlibMMOD,
  127. (cvRect[0], cvRect[1]),
  128. (cvRect[2], cvRect[3]),
  129. (0, 255, 0),
  130. int(round(frameHeight / 150)),
  131. 4,
  132. )
  133. return frameDlibMMOD, bboxes
  134. if __name__ == "__main__":
  135. parser = argparse.ArgumentParser(description="Face detection")
  136. parser.add_argument("--video", type=str, help="Path to video file")
  137. parser.add_argument(
  138. "--device",
  139. type=str,
  140. default="gpu",
  141. choices=["cpu", "gpu"],
  142. help="Device to use",
  143. )
  144. parser.add_argument(
  145. "--net_type",
  146. type=str,
  147. default="caffe",
  148. choices=["caffe", "tf"],
  149. help="Type of network to run",
  150. )
  151. args = parser.parse_args()
  152. net_type = args.net_type
  153. source = args.video
  154. device = args.device
  155. # OpenCV DNN supports 2 networks.
  156. # 1. FP16 version of the original Caffe implementation ( 5.4 MB )
  157. # 2. 8 bit Quantized version using TensorFlow ( 2.7 MB )
  158. if net_type == "caffe":
  159. modelFile = "models/res10_300x300_ssd_iter_140000_fp16.caffemodel"
  160. configFile = "models/deploy.prototxt"
  161. net = cv2.dnn.readNetFromCaffe(configFile, modelFile)
  162. else:
  163. modelFile = "models/opencv_face_detector_uint8.pb"
  164. configFile = "models/opencv_face_detector.pbtxt"
  165. net = cv2.dnn.readNetFromTensorflow(modelFile, configFile)
  166. if device == "cpu":
  167. net.setPreferableBackend(cv2.dnn.DNN_TARGET_CPU)
  168. else:
  169. net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
  170. net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
  171. if source:
  172. cap = cv2.VideoCapture(source)
  173. else:
  174. cap = cv2.VideoCapture(0, cv2.CAP_V4L)
  175. hasFrame, frame = cap.read()
  176. outputFolder = "output-dnn-videos"
  177. if source:
  178. outputFile = os.path.basename(source)[:-4] + ".avi"
  179. else:
  180. outputFile = "grabbed_from_camera.avi"
  181. if not os.path.exists(outputFolder):
  182. os.makedirs(outputFolder)
  183. vid_writer = cv2.VideoWriter(
  184. os.path.join(outputFolder, outputFile),
  185. cv2.VideoWriter_fourcc("M", "J", "P", "G"),
  186. 25,
  187. (frame.shape[1], frame.shape[0]),
  188. )
  189. frame_count = 0
  190. tt_opencvHaar = 0
  191. tt_opencvDnn = 0
  192. tt_dlibHog = 0
  193. tt_dlibMmod = 0
  194. while True:
  195. hasFrame, frame = cap.read()
  196. if not hasFrame:
  197. break
  198. frame_count += 1
  199. t = time.time()
  200. outOpencvHaar, bboxes = detectFaceOpenCVHaar(faceCascade, frame)
  201. tt_opencvHaar += time.time() - t
  202. fpsOpencvHaar = frame_count / tt_opencvHaar
  203. label = "OpenCV Haar; FPS : {:.2f}".format(fpsOpencvHaar)
  204. cv2.putText(
  205. outOpencvHaar,
  206. label,
  207. (10, 50),
  208. cv2.FONT_HERSHEY_SIMPLEX,
  209. 1.3,
  210. (0, 0, 255),
  211. 3,
  212. cv2.LINE_AA,
  213. )
  214. t = time.time()
  215. outOpencvDnn, bboxes = detectFaceOpenCVDnn(net, frame)
  216. tt_opencvDnn += time.time() - t
  217. fpsOpencvDnn = frame_count / tt_opencvDnn
  218. label = "OpenCV DNN {} FPS : {:.2f}".format(device.upper(), fpsOpencvDnn)
  219. cv2.putText(
  220. outOpencvDnn,
  221. label,
  222. (10, 50),
  223. cv2.FONT_HERSHEY_SIMPLEX,
  224. 1.3,
  225. (0, 0, 255),
  226. 3,
  227. cv2.LINE_AA,
  228. )
  229. t = time.time()
  230. outDlibHog, bboxes = detectFaceDlibHog(hogFaceDetector, frame)
  231. tt_dlibHog += time.time() - t
  232. fpsDlibHog = frame_count / tt_dlibHog
  233. label = "DLIB HoG; FPS : {:.2f}".format(fpsDlibHog)
  234. cv2.putText(
  235. outDlibHog,
  236. label,
  237. (10, 50),
  238. cv2.FONT_HERSHEY_SIMPLEX,
  239. 1.3,
  240. (0, 0, 255),
  241. 3,
  242. cv2.LINE_AA,
  243. )
  244. t = time.time()
  245. outDlibMMOD, bboxes = detectFaceDlibMMOD(dnnFaceDetector, frame)
  246. tt_dlibMmod += time.time() - t
  247. fpsDlibMmod = frame_count / tt_dlibMmod
  248. label = "DLIB MMOD; FPS : {:.2f}".format(fpsDlibMmod)
  249. cv2.putText(
  250. outDlibMMOD,
  251. label,
  252. (10, 50),
  253. cv2.FONT_HERSHEY_SIMPLEX,
  254. 1.3,
  255. (0, 0, 255),
  256. 3,
  257. cv2.LINE_AA,
  258. )
  259. top = np.hstack([outOpencvHaar, outOpencvDnn])
  260. bottom = np.hstack([outDlibHog, outDlibMMOD])
  261. combined = np.vstack([top, bottom])
  262. cv2.imshow("Face Detection Comparison", combined)
  263. if frame_count == 1:
  264. tt_opencvHaar = 0
  265. tt_opencvDnn = 0
  266. tt_dlibHog = 0
  267. tt_dlibMmod = 0
  268. vid_writer.write(combined)
  269. k = cv2.waitKey(5)
  270. if k == 27:
  271. break
  272. cv2.destroyAllWindows()
  273. vid_writer.release()