face_detection_dlib_mmod.cpp 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. #include <iostream>
  2. #include <string>
  3. #include <vector>
  4. #include <stdlib.h>
  5. #include <opencv2/core.hpp>
  6. #include <opencv2/imgproc.hpp>
  7. #include <opencv2/highgui.hpp>
  8. #include <dlib/opencv.h>
  9. #include <dlib/image_processing.h>
  10. #include <dlib/dnn.h>
  11. #include <dlib/data_io.h>
  12. using namespace cv;
  13. using namespace std;
  14. using namespace dlib;
  15. // Network Definition
  16. /////////////////////////////////////////////////////////////////////////////////////////////////////
  17. template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>;
  18. template <long num_filters, typename SUBNET> using con5 = con<num_filters,5,5,1,1,SUBNET>;
  19. template <typename SUBNET> using downsampler = relu<affine<con5d<32, relu<affine<con5d<32, relu<affine<con5d<16,SUBNET>>>>>>>>>;
  20. template <typename SUBNET> using rcon5 = relu<affine<con5<45,SUBNET>>>;
  21. using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>;
  22. /////////////////////////////////////////////////////////////////////////////////////////////////////
  23. void detectFaceDlibMMOD(net_type mmodFaceDetector, Mat &frameDlibMmod, int inHeight=300, int inWidth=0)
  24. {
  25. int frameHeight = frameDlibMmod.rows;
  26. int frameWidth = frameDlibMmod.cols;
  27. if (!inWidth)
  28. inWidth = (int)((frameWidth / (float)frameHeight) * inHeight);
  29. float scaleHeight = frameHeight / (float)inHeight;
  30. float scaleWidth = frameWidth / (float)inWidth;
  31. Mat frameDlibMmodSmall;
  32. resize(frameDlibMmod, frameDlibMmodSmall, Size(inWidth, inHeight));
  33. // Convert OpenCV image format to Dlib's image format
  34. cv_image<bgr_pixel> dlibIm(frameDlibMmodSmall);
  35. matrix<rgb_pixel> dlibMatrix;
  36. assign_image(dlibMatrix, dlibIm);
  37. // Detect faces in the image
  38. std::vector<dlib::mmod_rect> faceRects = mmodFaceDetector(dlibMatrix);
  39. for ( size_t i = 0; i < faceRects.size(); i++ )
  40. {
  41. int x1 = (int)(faceRects[i].rect.left() * scaleWidth);
  42. int y1 = (int)(faceRects[i].rect.top() * scaleHeight);
  43. int x2 = (int)(faceRects[i].rect.right() * scaleWidth);
  44. int y2 = (int)(faceRects[i].rect.bottom() * scaleHeight);
  45. cv::rectangle(frameDlibMmod, Point(x1, y1), Point(x2, y2), Scalar(0,255,0), (int)(frameHeight/150.0), 4);
  46. }
  47. }
  48. int main( int argc, const char** argv )
  49. {
  50. String mmodModelPath = "models/mmod_human_face_detector.dat";
  51. net_type mmodFaceDetector;
  52. deserialize(mmodModelPath) >> mmodFaceDetector;
  53. VideoCapture source;
  54. if (argc == 1)
  55. source.open(0, CAP_V4L);
  56. else
  57. source.open(argv[1]);
  58. Mat frame;
  59. double tt_dlibMmod = 0;
  60. double fpsDlibMmod = 0;
  61. while (true)
  62. {
  63. source >> frame;
  64. if (frame.empty())
  65. break;
  66. double t = cv::getTickCount();
  67. detectFaceDlibMMOD ( mmodFaceDetector, frame );
  68. tt_dlibMmod = ((double)cv::getTickCount() - t)/cv::getTickFrequency();
  69. fpsDlibMmod = 1/tt_dlibMmod;
  70. putText(frame, format("DLIB MMOD; FPS = %.2f",fpsDlibMmod), Point(10, 50), FONT_HERSHEY_SIMPLEX, 1.3, Scalar(0, 0, 255), 4);
  71. imshow("DLIB - MMOD Face Detection", frame);
  72. int k = waitKey(5);
  73. if(k == 27)
  74. {
  75. destroyAllWindows();
  76. break;
  77. }
  78. }
  79. }