193 lines
6.5 KiB
C++
193 lines
6.5 KiB
C++
|
|
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||
|
|
//
|
||
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
|
// you may not use this file except in compliance with the License.
|
||
|
|
// You may obtain a copy of the License at
|
||
|
|
//
|
||
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
//
|
||
|
|
// Unless required by applicable law or agreed to in writing, software
|
||
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
|
// See the License for the specific language governing permissions and
|
||
|
|
// limitations under the License.
|
||
|
|
|
||
|
|
#include <include/paddleocr.h>
|
||
|
|
|
||
|
|
namespace PaddleOCR {
|
||
|
|
|
||
|
|
PPOCR::PPOCR() {
|
||
|
|
this->_limit_type = "max";
|
||
|
|
this->_det_db_score_mode = "slow";
|
||
|
|
this->_is_scale = true;
|
||
|
|
this->_det_db_thresh = 0.3;
|
||
|
|
this->_det_db_box_thresh = 0.6;
|
||
|
|
this->_det_db_unclip_ratio = 1.5;
|
||
|
|
this->_use_dilation = false;
|
||
|
|
this->_cls_batch_num = 1;
|
||
|
|
this->_cls_thresh = 0.9;
|
||
|
|
this->_rec_batch_num = 1;
|
||
|
|
};
|
||
|
|
|
||
|
|
bool PPOCR::Initialize(std::string detectionModelDir, std::string classifierModelDir, std::string recognizerModelDir, std::string labelDir) {
|
||
|
|
this->detector_ = std::make_unique<Detector>(detectionModelDir);
|
||
|
|
if (!classifierModelDir.empty()) {
|
||
|
|
this->classifier_ = std::make_unique<Classifier>(classifierModelDir);
|
||
|
|
}
|
||
|
|
this->recognizer_ = std::make_unique<Recognizer>(recognizerModelDir, labelDir);
|
||
|
|
if (detector_) detector_->SetParameters(_limit_type, _det_db_score_mode, _is_scale, _det_db_thresh, _det_db_box_thresh, _det_db_unclip_ratio, _use_dilation);
|
||
|
|
if (classifier_) classifier_->SetParameters(_cls_batch_num, _cls_thresh);
|
||
|
|
if (recognizer_) recognizer_->SetParameters(_rec_batch_num);
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
void PPOCR::SetParameters(std::string limit_type,
|
||
|
|
std::string det_db_score_mode,
|
||
|
|
bool is_scale,
|
||
|
|
double det_db_thresh,
|
||
|
|
double det_db_box_thresh,
|
||
|
|
double det_db_unclip_ratio,
|
||
|
|
bool use_dilation,
|
||
|
|
int cls_batch_num,
|
||
|
|
double cls_thresh,
|
||
|
|
int rec_batch_num)
|
||
|
|
{
|
||
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||
|
|
this->_limit_type = limit_type;
|
||
|
|
this->_det_db_score_mode = det_db_score_mode;
|
||
|
|
this->_is_scale = is_scale;
|
||
|
|
this->_det_db_thresh = det_db_thresh;
|
||
|
|
this->_det_db_box_thresh = det_db_box_thresh;
|
||
|
|
this->_det_db_unclip_ratio = det_db_unclip_ratio;
|
||
|
|
this->_use_dilation = use_dilation;
|
||
|
|
this->_cls_batch_num = cls_batch_num;
|
||
|
|
this->_cls_thresh = cls_thresh;
|
||
|
|
this->_rec_batch_num = rec_batch_num;
|
||
|
|
if (detector_) detector_->SetParameters(limit_type, det_db_score_mode, is_scale, det_db_thresh, det_db_box_thresh, det_db_unclip_ratio, use_dilation);
|
||
|
|
if (classifier_) classifier_->SetParameters(cls_batch_num, cls_thresh);
|
||
|
|
if (recognizer_) recognizer_->SetParameters(rec_batch_num);
|
||
|
|
}
|
||
|
|
void PPOCR::GetParameters(std::string& limit_type,
|
||
|
|
std::string& det_db_score_mode,
|
||
|
|
bool& is_scale,
|
||
|
|
double& det_db_thresh,
|
||
|
|
double& det_db_box_thresh,
|
||
|
|
double& det_db_unclip_ratio,
|
||
|
|
bool& use_dilation,
|
||
|
|
int& cls_batch_num,
|
||
|
|
double& cls_thresh,
|
||
|
|
int& rec_batch_num)
|
||
|
|
{
|
||
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||
|
|
if (detector_) detector_->GetParameters(limit_type, det_db_score_mode, is_scale, det_db_thresh, det_db_box_thresh, det_db_unclip_ratio, use_dilation);
|
||
|
|
if (classifier_) classifier_->GetParameters(cls_batch_num, cls_thresh);
|
||
|
|
if (recognizer_) recognizer_->GetParameters(rec_batch_num);
|
||
|
|
}
|
||
|
|
//std::vector<OCRPredictResult> PPOCR::ocr(cv::Mat img)
|
||
|
|
//{
|
||
|
|
// std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||
|
|
// try {
|
||
|
|
// std::vector<OCRPredictResult> ocr_result;
|
||
|
|
// // detect the sentence in input image
|
||
|
|
// this->detector_->Run(img, ocr_result);
|
||
|
|
// // crop image
|
||
|
|
// std::vector<cv::Mat> img_list;
|
||
|
|
// for (int j = 0; j < ocr_result.size(); j++) {
|
||
|
|
// cv::Mat crop_img;
|
||
|
|
// crop_img = Utility::GetRotateCropImage(img, ocr_result[j].box);
|
||
|
|
// img_list.push_back(crop_img);
|
||
|
|
// }
|
||
|
|
|
||
|
|
// if (this->classifier_ != nullptr) {
|
||
|
|
// // find the reversed sentence and flip it
|
||
|
|
// this->classifier_->Run(img_list, ocr_result);
|
||
|
|
// for (int i = 0; i < img_list.size(); i++) {
|
||
|
|
// if (ocr_result[i].cls_label % 2 == 1 &&
|
||
|
|
// ocr_result[i].cls_score > _cls_thresh) {
|
||
|
|
// cv::rotate(img_list[i], img_list[i], 1);
|
||
|
|
// }
|
||
|
|
// }
|
||
|
|
// }
|
||
|
|
|
||
|
|
// // recognize the words in sentence and print them
|
||
|
|
// this->recognizer_->Run(img_list, ocr_result);
|
||
|
|
|
||
|
|
// return ocr_result;
|
||
|
|
// }
|
||
|
|
// catch (const std::exception& e) {
|
||
|
|
// std::cerr << e.what() << std::endl;
|
||
|
|
// return std::vector<OCRPredictResult>();
|
||
|
|
// }
|
||
|
|
//}
|
||
|
|
std::vector<OCRPredictResult> PPOCR::ocr(const cv::Mat& img) {
|
||
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||
|
|
std::vector<OCRPredictResult> ocr_result;
|
||
|
|
|
||
|
|
try {
|
||
|
|
if (img.empty()) {
|
||
|
|
std::cerr << "[PPOCR] Input image is empty!" << std::endl;
|
||
|
|
return ocr_result;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (!this->detector_ || !this->recognizer_) {
|
||
|
|
std::cerr << "[PPOCR] Detector or recognizer not initialized!" << std::endl;
|
||
|
|
return ocr_result;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Run detector
|
||
|
|
this->detector_->Run(img, ocr_result);
|
||
|
|
|
||
|
|
// Crop each detected region
|
||
|
|
std::vector<cv::Mat> img_list;
|
||
|
|
for (const auto& result : ocr_result) {
|
||
|
|
try {
|
||
|
|
cv::Mat crop_img = Utility::GetRotateCropImage(img, result.box);
|
||
|
|
img_list.push_back(crop_img);
|
||
|
|
}
|
||
|
|
catch (const std::exception& e) {
|
||
|
|
std::cerr << "[PPOCR] Error cropping region: " << e.what() << std::endl;
|
||
|
|
img_list.push_back(cv::Mat()); // Push empty mat to preserve indexing
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Run classifier if available
|
||
|
|
if (this->classifier_) {
|
||
|
|
try {
|
||
|
|
this->classifier_->Run(img_list, ocr_result);
|
||
|
|
for (size_t i = 0; i < img_list.size() && i < ocr_result.size(); ++i) {
|
||
|
|
if (!img_list[i].empty() &&
|
||
|
|
(ocr_result[i].cls_label % 2 == 1) &&
|
||
|
|
(ocr_result[i].cls_score > _cls_thresh)) {
|
||
|
|
cv::rotate(img_list[i], img_list[i], cv::ROTATE_180); // same as rotate(img, img, 1)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
catch (const std::exception& e) {
|
||
|
|
std::cerr << "[PPOCR] Classifier error: " << e.what() << std::endl;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Run recognizer
|
||
|
|
try {
|
||
|
|
this->recognizer_->Run(img_list, ocr_result);
|
||
|
|
}
|
||
|
|
catch (const std::exception& e) {
|
||
|
|
std::cerr << "[PPOCR] Recognizer error: " << e.what() << std::endl;
|
||
|
|
}
|
||
|
|
|
||
|
|
}
|
||
|
|
catch (const std::exception& e) {
|
||
|
|
std::cerr << "[PPOCR] General exception: " << e.what() << std::endl;
|
||
|
|
}
|
||
|
|
catch (...) {
|
||
|
|
std::cerr << "[PPOCR] Unknown exception occurred!" << std::endl;
|
||
|
|
}
|
||
|
|
|
||
|
|
return ocr_result;
|
||
|
|
}
|
||
|
|
PPOCR::~PPOCR() {
|
||
|
|
if (detector_) detector_.reset();
|
||
|
|
if (classifier_) classifier_.reset();
|
||
|
|
if (recognizer_) recognizer_.reset();
|
||
|
|
}
|
||
|
|
} // namespace PaddleOCR
|