// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include namespace PaddleOCR { PPOCR::PPOCR() { this->_limit_type = "max"; this->_det_db_score_mode = "slow"; this->_is_scale = true; this->_det_db_thresh = 0.3; this->_det_db_box_thresh = 0.6; this->_det_db_unclip_ratio = 1.5; this->_use_dilation = false; this->_cls_batch_num = 1; this->_cls_thresh = 0.9; this->_rec_batch_num = 1; }; bool PPOCR::Initialize(std::string detectionModelDir, std::string classifierModelDir, std::string recognizerModelDir, std::string labelDir) { this->detector_ = std::make_unique(detectionModelDir); if (!classifierModelDir.empty()) { this->classifier_ = std::make_unique(classifierModelDir); } this->recognizer_ = std::make_unique(recognizerModelDir, labelDir); if (detector_) detector_->SetParameters(_limit_type, _det_db_score_mode, _is_scale, _det_db_thresh, _det_db_box_thresh, _det_db_unclip_ratio, _use_dilation); if (classifier_) classifier_->SetParameters(_cls_batch_num, _cls_thresh); if (recognizer_) recognizer_->SetParameters(_rec_batch_num); return true; } void PPOCR::SetParameters(std::string limit_type, std::string det_db_score_mode, bool is_scale, double det_db_thresh, double det_db_box_thresh, double det_db_unclip_ratio, bool use_dilation, int cls_batch_num, double cls_thresh, int rec_batch_num) { std::lock_guard lock(_mutex); this->_limit_type = limit_type; this->_det_db_score_mode = det_db_score_mode; this->_is_scale = is_scale; this->_det_db_thresh = det_db_thresh; this->_det_db_box_thresh = det_db_box_thresh; this->_det_db_unclip_ratio = det_db_unclip_ratio; this->_use_dilation = use_dilation; this->_cls_batch_num = cls_batch_num; this->_cls_thresh = cls_thresh; this->_rec_batch_num = rec_batch_num; if (detector_) detector_->SetParameters(limit_type, det_db_score_mode, is_scale, det_db_thresh, det_db_box_thresh, det_db_unclip_ratio, use_dilation); if (classifier_) classifier_->SetParameters(cls_batch_num, cls_thresh); if (recognizer_) recognizer_->SetParameters(rec_batch_num); } void PPOCR::GetParameters(std::string& limit_type, std::string& det_db_score_mode, bool& is_scale, double& det_db_thresh, double& det_db_box_thresh, double& det_db_unclip_ratio, bool& use_dilation, int& cls_batch_num, double& cls_thresh, int& rec_batch_num) { std::lock_guard lock(_mutex); if (detector_) detector_->GetParameters(limit_type, det_db_score_mode, is_scale, det_db_thresh, det_db_box_thresh, det_db_unclip_ratio, use_dilation); if (classifier_) classifier_->GetParameters(cls_batch_num, cls_thresh); if (recognizer_) recognizer_->GetParameters(rec_batch_num); } //std::vector PPOCR::ocr(cv::Mat img) //{ // std::lock_guard lock(_mutex); // try { // std::vector ocr_result; // // detect the sentence in input image // this->detector_->Run(img, ocr_result); // // crop image // std::vector img_list; // for (int j = 0; j < ocr_result.size(); j++) { // cv::Mat crop_img; // crop_img = Utility::GetRotateCropImage(img, ocr_result[j].box); // img_list.push_back(crop_img); // } // if (this->classifier_ != nullptr) { // // find the reversed sentence and flip it // this->classifier_->Run(img_list, ocr_result); // for (int i = 0; i < img_list.size(); i++) { // if (ocr_result[i].cls_label % 2 == 1 && // ocr_result[i].cls_score > _cls_thresh) { // cv::rotate(img_list[i], img_list[i], 1); // } // } // } // // recognize the words in sentence and print them // this->recognizer_->Run(img_list, ocr_result); // return ocr_result; // } // catch (const std::exception& e) { // std::cerr << e.what() << std::endl; // return std::vector(); // } //} std::vector PPOCR::ocr(const cv::Mat& img) { std::lock_guard lock(_mutex); std::vector ocr_result; try { if (img.empty()) { std::cerr << "[PPOCR] Input image is empty!" << std::endl; return ocr_result; } if (!this->detector_ || !this->recognizer_) { std::cerr << "[PPOCR] Detector or recognizer not initialized!" << std::endl; return ocr_result; } // Run detector this->detector_->Run(img, ocr_result); // Crop each detected region std::vector img_list; for (const auto& result : ocr_result) { try { cv::Mat crop_img = Utility::GetRotateCropImage(img, result.box); img_list.push_back(crop_img); } catch (const std::exception& e) { std::cerr << "[PPOCR] Error cropping region: " << e.what() << std::endl; img_list.push_back(cv::Mat()); // Push empty mat to preserve indexing } } // Run classifier if available if (this->classifier_) { try { this->classifier_->Run(img_list, ocr_result); for (size_t i = 0; i < img_list.size() && i < ocr_result.size(); ++i) { if (!img_list[i].empty() && (ocr_result[i].cls_label % 2 == 1) && (ocr_result[i].cls_score > _cls_thresh)) { cv::rotate(img_list[i], img_list[i], cv::ROTATE_180); // same as rotate(img, img, 1) } } } catch (const std::exception& e) { std::cerr << "[PPOCR] Classifier error: " << e.what() << std::endl; } } // Run recognizer try { this->recognizer_->Run(img_list, ocr_result); } catch (const std::exception& e) { std::cerr << "[PPOCR] Recognizer error: " << e.what() << std::endl; } } catch (const std::exception& e) { std::cerr << "[PPOCR] General exception: " << e.what() << std::endl; } catch (...) { std::cerr << "[PPOCR] Unknown exception occurred!" << std::endl; } return ocr_result; } PPOCR::~PPOCR() { if (detector_) detector_.reset(); if (classifier_) classifier_.reset(); if (recognizer_) recognizer_.reset(); } } // namespace PaddleOCR