ICILearn
/
learnopencv
mirror of https://github.com/spmallick/learnopencv.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
							# Face Detection

# Import required modules
import cv2
import depthai as dai
import time
import blobconverter

# Define Frame
FRAME_SIZE = (640, 400)

# Define NN model name and input size
# If you define the blob make make sure the MODEL_NAME and ZOO_TYPE are None
# DET_INPUT_SIZE = (672, 384)
# model_name = None
# zoo_type = None
# blob_path = "models/face-detection-adas-0001.blob"

DET_INPUT_SIZE = (300, 300)
model_name = "face-detection-retail-0004"
zoo_type = "depthai"
blob_path = None

# DET_INPUT_SIZE = (672, 384)
# model_name = "face-detection-adas-0001"
# zoo_type = "intel"
# blob_path = None


# Start defining a pipeline
pipeline = dai.Pipeline()

# Define a source - RGB camera
cam = pipeline.createColorCamera()
cam.setPreviewSize(FRAME_SIZE[0], FRAME_SIZE[1])
cam.setInterleaved(False)
cam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)

# Define mono camera sources for stereo depth
mono_left = pipeline.createMonoCamera()
mono_left.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
mono_left.setBoardSocket(dai.CameraBoardSocket.LEFT)
mono_right = pipeline.createMonoCamera()
mono_right.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
mono_right.setBoardSocket(dai.CameraBoardSocket.RIGHT)

# Create stereo depth node
stereo = pipeline.createStereoDepth()
stereo.setLeftRightCheck(True)

# Linking
mono_left.out.link(stereo.left)
mono_right.out.link(stereo.right)

cam.setBoardSocket(dai.CameraBoardSocket.RGB)

# Convert model from OMZ to blob
if model_name is not None:
    blob_path = blobconverter.from_zoo(
        name=model_name,
        shaves=6,
        zoo_type=zoo_type
    )

# Define face detection NN node
face_spac_det_nn = pipeline.createMobileNetSpatialDetectionNetwork()
face_spac_det_nn.setConfidenceThreshold(0.75)
face_spac_det_nn.setBlobPath(blob_path)
face_spac_det_nn.setDepthLowerThreshold(100)
face_spac_det_nn.setDepthUpperThreshold(5000)

# Define face detection input config
face_det_manip = pipeline.createImageManip()
face_det_manip.initialConfig.setResize(DET_INPUT_SIZE[0], DET_INPUT_SIZE[1])
face_det_manip.initialConfig.setKeepAspectRatio(False)

# Linking
cam.preview.link(face_det_manip.inputImage)
face_det_manip.out.link(face_spac_det_nn.input)
stereo.depth.link(face_spac_det_nn.inputDepth)

# Create preview output
x_preview_out = pipeline.createXLinkOut()
x_preview_out.setStreamName("preview")
cam.preview.link(x_preview_out.input)

# Create detection output
det_out = pipeline.createXLinkOut()
det_out.setStreamName('det_out')
face_spac_det_nn.out.link(det_out.input)

# Create preview output
disparity_out = pipeline.createXLinkOut()
disparity_out.setStreamName("disparity")
stereo.disparity.link(disparity_out.input)


def display_info(frame, disp_frame, bbox, coordinates, status, status_color, fps):
    # Display bounding box
    cv2.rectangle(frame, bbox, status_color[status], 2)

    # Create background for showing details
    cv2.rectangle(frame, (5, 5, 175, 100), (50, 0, 0), -1)

    # Display authentication status on the frame
    cv2.putText(frame, status, (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, status_color[status])

    # Display instructions on the frame
    cv2.putText(frame, f'FPS: {fps:.2f}', (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255))

    # Display bbox and depth value on the disparity frame
    if coordinates is not None:
        cv2.rectangle(disp_frame, bbox, status_color[status], 2)
        cv2.rectangle(disp_frame, (5, 5, 185, 50), (50, 0, 0), -1)
        _, _, coord_z = coordinates
        cv2.putText(disp_frame, f'Depth: {coord_z}mm', (15, 35), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255))


# Frame count
frame_count = 0

# Placeholder fps value
fps = 0

# Used to record the time when we processed last frames
prev_frame_time = 0

# Used to record the time at which we processed current frames
new_frame_time = 0

# Set status colors
status_color = {
    'Face Detected': (0, 255, 0),
    'No Face Detected': (0, 0, 255)
}

# Start pipeline
with dai.Device(pipeline) as device:

    # Output queue will be used to get the right camera frames from the outputs defined above
    q_cam = device.getOutputQueue(name="preview", maxSize=1, blocking=False)

    # Output queue will be used to get nn data from the video frames.
    q_det = device.getOutputQueue(name="det_out", maxSize=1, blocking=False)

    # Output queue will be used to get disparity map from stereo node.
    q_disp = device.getOutputQueue(name="disparity", maxSize=1, blocking=False)

    # # Output queue will be used to get nn data from the video frames.
    # q_bbox_depth_mapping = device.getOutputQueue(name="bbox_depth_mapping_out", maxSize=4, blocking=False)

    while True:
        # Get right camera frame
        in_cam = q_cam.get()
        frame = in_cam.getCvFrame()

        # Get disparity frame
        in_disp = q_disp.get()
        disp_frame = in_disp.getCvFrame()

        # Calculate a multiplier for color mapping disparity map
        disparityMultiplier = 255 / stereo.getMaxDisparity()

        # Colormap disparity for display.
        disp_frame = (disp_frame * disparityMultiplier).astype('uint8')

        # Apply color map to disparity map
        disp_frame = cv2.applyColorMap(disp_frame, cv2.COLORMAP_JET)

        bbox = None
        coordinates = None

        inDet = q_det.tryGet()

        if inDet is not None:
            detections = inDet.detections

            # if face detected
            if len(detections) is not 0:
                detection = detections[0]

                # Correct bounding box
                xmin = max(0, detection.xmin)
                ymin = max(0, detection.ymin)
                xmax = min(detection.xmax, 1)
                ymax = min(detection.ymax, 1)

                # Calculate coordinates
                x = int(xmin*FRAME_SIZE[0])
                y = int(ymin*FRAME_SIZE[1])
                w = int(xmax*FRAME_SIZE[0]-xmin*FRAME_SIZE[0])
                h = int(ymax*FRAME_SIZE[1]-ymin*FRAME_SIZE[1])

                bbox = (x, y, w, h)

                # Get spacial coordinates
                coord_x = detection.spatialCoordinates.x
                coord_y = detection.spatialCoordinates.y
                coord_z = detection.spatialCoordinates.z

                coordinates = (coord_x, coord_y, coord_z)

        # Check if a face was detected in the frame
        if bbox:
            # Face detected
            status = 'Face Detected'
        else:
            # No face detected
            status = 'No Face Detected'

        # Display info on frame
        display_info(frame, disp_frame, bbox, coordinates, status, status_color, fps)

        # Calculate average fps
        if frame_count % 10 == 0:
            # Time when we finish processing last 100 frames
            new_frame_time = time.time()

            # Fps will be number of frame processed in one second
            fps = 1 / ((new_frame_time - prev_frame_time)/10)
            prev_frame_time = new_frame_time

        # Capture the key pressed
        key_pressed = cv2.waitKey(1) & 0xff

        # Stop the program if Esc key was pressed
        if key_pressed == 27:
            break

        # Display the final frame
        cv2.imshow("Face Cam", frame)

        # Display the disparity frame
        cv2.imshow("Disparity Map", disp_frame)

        # Increment frame count
        frame_count += 1

cv2.destroyAllWindows()