ocr_simple.py 1.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041
  1. import cv2
  2. import sys
  3. import pytesseract
  4. if __name__ == '__main__':
  5. if len(sys.argv) < 2:
  6. print('Usage: python ocr_simple.py image.jpg')
  7. sys.exit(1)
  8. # Read image path from command line
  9. imPath = sys.argv[1]
  10. # Uncomment the line below to provide path to tesseract manually
  11. # pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
  12. # Define config parameters.
  13. # '-l eng' for using the English language
  14. # '--oem 1' sets the OCR Engine Mode to LSTM only.
  15. #
  16. # There are four OCR Engine Mode (oem) available
  17. # 0 Legacy engine only.
  18. # 1 Neural nets LSTM engine only.
  19. # 2 Legacy + LSTM engines.
  20. # 3 Default, based on what is available.
  21. #
  22. # '--psm 3' sets the Page Segmentation Mode (psm) to auto.
  23. # Other important psm modes will be discussed in a future post.
  24. config = ('-l eng --oem 1 --psm 3')
  25. # Read image from disk
  26. im = cv2.imread(imPath, cv2.IMREAD_COLOR)
  27. # Run tesseract OCR on image
  28. text = pytesseract.image_to_string(im, config=config)
  29. # Print recognized text
  30. print(text)