ocr_simple.cpp 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. #include <string>
  2. #include <tesseract/baseapi.h>
  3. #include <leptonica/allheaders.h>
  4. #include <opencv2/opencv.hpp>
  5. using namespace std;
  6. using namespace cv;
  7. int main(int argc, char* argv[])
  8. {
  9. string outText;
  10. string imPath = argv[1];
  11. // Create Tesseract object
  12. tesseract::TessBaseAPI *ocr = new tesseract::TessBaseAPI();
  13. /*
  14. Initialize OCR engine to use English (eng) and The LSTM
  15. OCR engine.
  16. There are four OCR Engine Mode (oem) available
  17. OEM_TESSERACT_ONLY Legacy engine only.
  18. OEM_LSTM_ONLY Neural nets LSTM engine only.
  19. OEM_TESSERACT_LSTM_COMBINED Legacy + LSTM engines.
  20. OEM_DEFAULT Default, based on what is available.
  21. */
  22. ocr->Init(NULL, "eng", tesseract::OEM_LSTM_ONLY);
  23. // Set Page segmentation mode to PSM_AUTO (3)
  24. // Other important psm modes will be discussed in a future post.
  25. ocr->SetPageSegMode(tesseract::PSM_AUTO);
  26. // Open input image using OpenCV
  27. Mat im = cv::imread(imPath, IMREAD_COLOR);
  28. // Set image data
  29. ocr->SetImage(im.data, im.cols, im.rows, 3, im.step);
  30. // Run Tesseract OCR on image
  31. outText = string(ocr->GetUTF8Text());
  32. // print recognized text
  33. cout << outText << endl;
  34. // Destroy used object and release memory
  35. ocr->End();
  36. return EXIT_SUCCESS;
  37. }