Files
ANSCORE/modules/ANSOCR/ANSPaddleOCR/src/paddleocr.cpp

193 lines
6.5 KiB
C++

// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <include/paddleocr.h>
namespace PaddleOCR {
PPOCR::PPOCR() {
this->_limit_type = "max";
this->_det_db_score_mode = "slow";
this->_is_scale = true;
this->_det_db_thresh = 0.3;
this->_det_db_box_thresh = 0.6;
this->_det_db_unclip_ratio = 1.5;
this->_use_dilation = false;
this->_cls_batch_num = 1;
this->_cls_thresh = 0.9;
this->_rec_batch_num = 1;
};
bool PPOCR::Initialize(std::string detectionModelDir, std::string classifierModelDir, std::string recognizerModelDir, std::string labelDir) {
this->detector_ = std::make_unique<Detector>(detectionModelDir);
if (!classifierModelDir.empty()) {
this->classifier_ = std::make_unique<Classifier>(classifierModelDir);
}
this->recognizer_ = std::make_unique<Recognizer>(recognizerModelDir, labelDir);
if (detector_) detector_->SetParameters(_limit_type, _det_db_score_mode, _is_scale, _det_db_thresh, _det_db_box_thresh, _det_db_unclip_ratio, _use_dilation);
if (classifier_) classifier_->SetParameters(_cls_batch_num, _cls_thresh);
if (recognizer_) recognizer_->SetParameters(_rec_batch_num);
return true;
}
void PPOCR::SetParameters(std::string limit_type,
std::string det_db_score_mode,
bool is_scale,
double det_db_thresh,
double det_db_box_thresh,
double det_db_unclip_ratio,
bool use_dilation,
int cls_batch_num,
double cls_thresh,
int rec_batch_num)
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
this->_limit_type = limit_type;
this->_det_db_score_mode = det_db_score_mode;
this->_is_scale = is_scale;
this->_det_db_thresh = det_db_thresh;
this->_det_db_box_thresh = det_db_box_thresh;
this->_det_db_unclip_ratio = det_db_unclip_ratio;
this->_use_dilation = use_dilation;
this->_cls_batch_num = cls_batch_num;
this->_cls_thresh = cls_thresh;
this->_rec_batch_num = rec_batch_num;
if (detector_) detector_->SetParameters(limit_type, det_db_score_mode, is_scale, det_db_thresh, det_db_box_thresh, det_db_unclip_ratio, use_dilation);
if (classifier_) classifier_->SetParameters(cls_batch_num, cls_thresh);
if (recognizer_) recognizer_->SetParameters(rec_batch_num);
}
void PPOCR::GetParameters(std::string& limit_type,
std::string& det_db_score_mode,
bool& is_scale,
double& det_db_thresh,
double& det_db_box_thresh,
double& det_db_unclip_ratio,
bool& use_dilation,
int& cls_batch_num,
double& cls_thresh,
int& rec_batch_num)
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (detector_) detector_->GetParameters(limit_type, det_db_score_mode, is_scale, det_db_thresh, det_db_box_thresh, det_db_unclip_ratio, use_dilation);
if (classifier_) classifier_->GetParameters(cls_batch_num, cls_thresh);
if (recognizer_) recognizer_->GetParameters(rec_batch_num);
}
//std::vector<OCRPredictResult> PPOCR::ocr(cv::Mat img)
//{
// std::lock_guard<std::recursive_mutex> lock(_mutex);
// try {
// std::vector<OCRPredictResult> ocr_result;
// // detect the sentence in input image
// this->detector_->Run(img, ocr_result);
// // crop image
// std::vector<cv::Mat> img_list;
// for (int j = 0; j < ocr_result.size(); j++) {
// cv::Mat crop_img;
// crop_img = Utility::GetRotateCropImage(img, ocr_result[j].box);
// img_list.push_back(crop_img);
// }
// if (this->classifier_ != nullptr) {
// // find the reversed sentence and flip it
// this->classifier_->Run(img_list, ocr_result);
// for (int i = 0; i < img_list.size(); i++) {
// if (ocr_result[i].cls_label % 2 == 1 &&
// ocr_result[i].cls_score > _cls_thresh) {
// cv::rotate(img_list[i], img_list[i], 1);
// }
// }
// }
// // recognize the words in sentence and print them
// this->recognizer_->Run(img_list, ocr_result);
// return ocr_result;
// }
// catch (const std::exception& e) {
// std::cerr << e.what() << std::endl;
// return std::vector<OCRPredictResult>();
// }
//}
std::vector<OCRPredictResult> PPOCR::ocr(const cv::Mat& img) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
std::vector<OCRPredictResult> ocr_result;
try {
if (img.empty()) {
std::cerr << "[PPOCR] Input image is empty!" << std::endl;
return ocr_result;
}
if (!this->detector_ || !this->recognizer_) {
std::cerr << "[PPOCR] Detector or recognizer not initialized!" << std::endl;
return ocr_result;
}
// Run detector
this->detector_->Run(img, ocr_result);
// Crop each detected region
std::vector<cv::Mat> img_list;
for (const auto& result : ocr_result) {
try {
cv::Mat crop_img = Utility::GetRotateCropImage(img, result.box);
img_list.push_back(crop_img);
}
catch (const std::exception& e) {
std::cerr << "[PPOCR] Error cropping region: " << e.what() << std::endl;
img_list.push_back(cv::Mat()); // Push empty mat to preserve indexing
}
}
// Run classifier if available
if (this->classifier_) {
try {
this->classifier_->Run(img_list, ocr_result);
for (size_t i = 0; i < img_list.size() && i < ocr_result.size(); ++i) {
if (!img_list[i].empty() &&
(ocr_result[i].cls_label % 2 == 1) &&
(ocr_result[i].cls_score > _cls_thresh)) {
cv::rotate(img_list[i], img_list[i], cv::ROTATE_180); // same as rotate(img, img, 1)
}
}
}
catch (const std::exception& e) {
std::cerr << "[PPOCR] Classifier error: " << e.what() << std::endl;
}
}
// Run recognizer
try {
this->recognizer_->Run(img_list, ocr_result);
}
catch (const std::exception& e) {
std::cerr << "[PPOCR] Recognizer error: " << e.what() << std::endl;
}
}
catch (const std::exception& e) {
std::cerr << "[PPOCR] General exception: " << e.what() << std::endl;
}
catch (...) {
std::cerr << "[PPOCR] Unknown exception occurred!" << std::endl;
}
return ocr_result;
}
PPOCR::~PPOCR() {
if (detector_) detector_.reset();
if (classifier_) classifier_.reset();
if (recognizer_) recognizer_.reset();
}
} // namespace PaddleOCR