yolov5.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. import cv2
  2. import numpy as np
  3. # Constants.
  4. INPUT_WIDTH = 640
  5. INPUT_HEIGHT = 640
  6. SCORE_THRESHOLD = 0.5
  7. NMS_THRESHOLD = 0.45
  8. CONFIDENCE_THRESHOLD = 0.45
  9. # Text parameters.
  10. FONT_FACE = cv2.FONT_HERSHEY_SIMPLEX
  11. FONT_SCALE = 0.7
  12. THICKNESS = 1
  13. # Colors
  14. BLACK = (0,0,0)
  15. BLUE = (255,178,50)
  16. YELLOW = (0,255,255)
  17. RED = (0,0,255)
  18. def draw_label(input_image, label, left, top):
  19. """Draw text onto image at location."""
  20. # Get text size.
  21. text_size = cv2.getTextSize(label, FONT_FACE, FONT_SCALE, THICKNESS)
  22. dim, baseline = text_size[0], text_size[1]
  23. # Use text size to create a BLACK rectangle.
  24. cv2.rectangle(input_image, (left, top), (left + dim[0], top + dim[1] + baseline), BLACK, cv2.FILLED);
  25. # Display text inside the rectangle.
  26. cv2.putText(input_image, label, (left, top + dim[1]), FONT_FACE, FONT_SCALE, YELLOW, THICKNESS, cv2.LINE_AA)
  27. def pre_process(input_image, net):
  28. # Create a 4D blob from a frame.
  29. blob = cv2.dnn.blobFromImage(input_image, 1/255, (INPUT_WIDTH, INPUT_HEIGHT), [0,0,0], 1, crop=False)
  30. # Sets the input to the network.
  31. net.setInput(blob)
  32. # Runs the forward pass to get output of the output layers.
  33. output_layers = net.getUnconnectedOutLayersNames()
  34. outputs = net.forward(output_layers)
  35. # print(outputs[0].shape)
  36. return outputs
  37. def post_process(input_image, outputs):
  38. # Lists to hold respective values while unwrapping.
  39. class_ids = []
  40. confidences = []
  41. boxes = []
  42. # Rows.
  43. rows = outputs[0].shape[1]
  44. image_height, image_width = input_image.shape[:2]
  45. # Resizing factor.
  46. x_factor = image_width / INPUT_WIDTH
  47. y_factor = image_height / INPUT_HEIGHT
  48. # Iterate through 25200 detections.
  49. for r in range(rows):
  50. row = outputs[0][0][r]
  51. confidence = row[4]
  52. # Discard bad detections and continue.
  53. if confidence >= CONFIDENCE_THRESHOLD:
  54. classes_scores = row[5:]
  55. # Get the index of max class score.
  56. class_id = np.argmax(classes_scores)
  57. # Continue if the class score is above threshold.
  58. if (classes_scores[class_id] > SCORE_THRESHOLD):
  59. confidences.append(confidence)
  60. class_ids.append(class_id)
  61. cx, cy, w, h = row[0], row[1], row[2], row[3]
  62. left = int((cx - w/2) * x_factor)
  63. top = int((cy - h/2) * y_factor)
  64. width = int(w * x_factor)
  65. height = int(h * y_factor)
  66. box = np.array([left, top, width, height])
  67. boxes.append(box)
  68. # Perform non maximum suppression to eliminate redundant overlapping boxes with
  69. # lower confidences.
  70. indices = cv2.dnn.NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)
  71. for i in indices:
  72. box = boxes[i]
  73. left = box[0]
  74. top = box[1]
  75. width = box[2]
  76. height = box[3]
  77. cv2.rectangle(input_image, (left, top), (left + width, top + height), BLUE, 3*THICKNESS)
  78. label = "{}:{:.2f}".format(classes[class_ids[i]], confidences[i])
  79. draw_label(input_image, label, left, top)
  80. return input_image
  81. if __name__ == '__main__':
  82. # Load class names.
  83. classesFile = "coco.names"
  84. classes = None
  85. with open(classesFile, 'rt') as f:
  86. classes = f.read().rstrip('\n').split('\n')
  87. # Load image.
  88. frame = cv2.imread('sample.jpg')
  89. # Give the weight files to the model and load the network using them.
  90. modelWeights = "models/yolov5s.onnx"
  91. net = cv2.dnn.readNet(modelWeights)
  92. # Process image.
  93. detections = pre_process(frame, net)
  94. img = post_process(frame.copy(), detections)
  95. # Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)
  96. t, _ = net.getPerfProfile()
  97. label = 'Inference time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency())
  98. print(label)
  99. cv2.putText(img, label, (20, 40), FONT_FACE, FONT_SCALE, RED, THICKNESS, cv2.LINE_AA)
  100. cv2.imshow('Output', img)
  101. cv2.waitKey(0)