grabcut-scan.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. import cv2
  2. import numpy as np
  3. import glob
  4. import time
  5. import csv
  6. def order_points(pts):
  7. """Rearrange coordinates to order:
  8. top-left, top-right, bottom-right, bottom-left"""
  9. rect = np.zeros((4, 2), dtype='float32')
  10. pts = np.array(pts)
  11. s = pts.sum(axis=1)
  12. # Top-left point will have the smallest sum.
  13. rect[0] = pts[np.argmin(s)]
  14. # Bottom-right point will have the largest sum.
  15. rect[2] = pts[np.argmax(s)]
  16. diff = np.diff(pts, axis=1)
  17. # Top-right point will have the smallest difference.
  18. rect[1] = pts[np.argmin(diff)]
  19. # Bottom-left will have the largest difference.
  20. rect[3] = pts[np.argmax(diff)]
  21. # return the ordered coordinates
  22. return rect.astype('int').tolist()
  23. def scan(img):
  24. # Resize image to workable size
  25. dim_limit = 1080
  26. max_dim = max(img.shape)
  27. if max_dim > dim_limit:
  28. resize_scale = dim_limit / max_dim
  29. img = cv2.resize(img, None, fx=resize_scale, fy=resize_scale)
  30. # Create a copy of resized original image for later use
  31. orig_img = img.copy()
  32. # cv2.imshow("original_resized", orig_img)
  33. # Repeated Closing operation to remove text from the document.
  34. kernel = np.ones((5, 5), np.uint8)
  35. img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel, iterations=3)
  36. # cv2.imshow("morphologyEX", img)
  37. # GrabCut
  38. mask = np.zeros(img.shape[:2], np.uint8)
  39. bgdModel = np.zeros((1, 65), np.float64)
  40. fgdModel = np.zeros((1, 65), np.float64)
  41. rect = (20, 20, img.shape[1] - 20, img.shape[0] - 20)
  42. cv2.grabCut(img, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
  43. mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
  44. img = img * mask2[:, :, np.newaxis]
  45. # cv2.imshow("grabcut", img)
  46. # Convert to grayscale.
  47. gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  48. gray = cv2.GaussianBlur(gray, (11, 11), 0)
  49. # cv2.imshow("gray_blurred", gray)
  50. # Edge Detection.
  51. canny = cv2.Canny(gray, 0, 200)
  52. canny = cv2.dilate(canny, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)))
  53. # cv2.imshow("canny_dilate", canny)
  54. # Finding contours for the detected edges.
  55. contours, hierarchy = cv2.findContours(canny, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
  56. # Keeping only the largest detected contour.
  57. page = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
  58. # Detecting Edges through Contour approximation.
  59. # Loop over the contours.
  60. if len(page) == 0:
  61. return orig_img
  62. for c in page:
  63. # Approximate the contour.
  64. epsilon = 0.02 * cv2.arcLength(c, True)
  65. corners = cv2.approxPolyDP(c, epsilon, True)
  66. # If our approximated contour has four points.
  67. if len(corners) == 4:
  68. break
  69. # Sorting the corners and converting them to desired shape.
  70. corners = sorted(np.concatenate(corners).tolist())
  71. # For 4 corner points being detected.
  72. # Rearranging the order of the corner points.
  73. corners = order_points(corners)
  74. (tl, tr, br, bl) = corners
  75. # Draw points
  76. points_img = cv2.cvtColor(canny, cv2.COLOR_GRAY2BGR)
  77. point_count = 0
  78. for corner in corners:
  79. cv2.circle(points_img, corner, 3, (255, 0, 0), -1)
  80. point_count += 1
  81. cv2.putText(points_img, str(point_count), corner, cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
  82. # cv2.imshow("points", points_img)
  83. # Finding the maximum width.
  84. widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
  85. widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
  86. maxWidth = max(int(widthA), int(widthB))
  87. # Finding the maximum height.
  88. heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
  89. heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
  90. maxHeight = max(int(heightA), int(heightB))
  91. # Final destination co-ordinates.
  92. destination_corners = [[0, 0], [maxWidth, 0], [maxWidth, maxHeight], [0, maxHeight]]
  93. # Getting the homography.
  94. M = cv2.getPerspectiveTransform(np.float32(corners), np.float32(destination_corners))
  95. # Perspective transform using homography.
  96. final = cv2.warpPerspective(orig_img, M, (maxWidth, maxHeight), flags=cv2.INTER_LINEAR)
  97. return final
  98. runtime = []
  99. for img_path in glob.glob('inputs/*.jpg'):
  100. try:
  101. img = cv2.imread(img_path)
  102. print(img_path)
  103. t1 = time.time()
  104. scanned_img = scan(img)
  105. t2 = time.time()
  106. runtime.append({'image': img_path, 'time': t2 - t1})
  107. # cv2.imshow("scanner", scanned_img)
  108. cv2.imwrite('grabcutop/' + img_path.split('/')[-1], scanned_img)
  109. print("scanned")
  110. key = cv2.waitKey(0)
  111. if key == 27:
  112. break
  113. except:
  114. print('fail')
  115. csv_columns = ['image', 'time']
  116. with open('time.csv', 'w') as f:
  117. writer = csv.DictWriter(f, fieldnames=csv_columns)
  118. writer.writeheader()
  119. for data in runtime:
  120. writer.writerow(data)
  121. cv2.destroyAllWindows()