#include "PaddleOCRV5RTEngine.h" #include #include namespace ANSCENTER { namespace rtocr { bool PaddleOCRV5RTEngine::Initialize(const std::string& detModelPath, const std::string& clsModelPath, const std::string& recModelPath, const std::string& dictPath, int gpuId, const std::string& engineCacheDir) { std::lock_guard lock(_mutex); gpuId_ = gpuId; if (!engineCacheDir.empty()) { engineCacheDir_ = engineCacheDir; } try { // 1. Initialize detector detector_ = std::make_unique(); if (!detector_->Initialize(detModelPath, gpuId_, engineCacheDir_, detMaxSideLen_)) { std::cerr << "[PaddleOCRV5RTEngine] Failed to initialize detector" << std::endl; return false; } // 2. Initialize classifier (optional - only if path provided) if (!clsModelPath.empty()) { classifier_ = std::make_unique(); if (!classifier_->Initialize(clsModelPath, gpuId_, engineCacheDir_)) { std::cerr << "[PaddleOCRV5RTEngine] Warning: Failed to initialize classifier, skipping" << std::endl; classifier_.reset(); } } // 3. Initialize recognizer recognizer_ = std::make_unique(); recognizer_->SetRecImageHeight(recImgH_); recognizer_->SetRecImageMaxWidth(recImgMaxW_); if (!recognizer_->Initialize(recModelPath, dictPath, gpuId_, engineCacheDir_)) { std::cerr << "[PaddleOCRV5RTEngine] Failed to initialize recognizer" << std::endl; return false; } std::cout << "[PaddleOCRV5RTEngine] Initialized successfully" << " (detector: yes, classifier: " << (classifier_ ? "yes" : "no") << ", recognizer: yes)" << std::endl; return true; } catch (const std::exception& e) { std::cerr << "[PaddleOCRV5RTEngine] Initialize failed: " << e.what() << std::endl; return false; } } std::vector PaddleOCRV5RTEngine::ocr(const cv::Mat& image) { std::lock_guard lock(_mutex); std::vector results; if (!detector_ || !recognizer_ || image.empty()) return results; try { // 1. Detection: find text boxes std::vector textBoxes = detector_->Detect( image, detMaxSideLen_, detDbThresh_, detBoxThresh_, detUnclipRatio_, useDilation_); if (textBoxes.empty()) return results; // 2. Crop text regions std::vector croppedImages; croppedImages.reserve(textBoxes.size()); for (size_t i = 0; i < textBoxes.size(); i++) { cv::Mat cropped = GetRotateCropImage(image, textBoxes[i]); if (cropped.empty()) continue; croppedImages.push_back(cropped); } if (croppedImages.size() != textBoxes.size()) { // Some crops failed, rebuild aligned arrays std::vector validBoxes; std::vector validCrops; for (size_t i = 0; i < textBoxes.size(); i++) { cv::Mat cropped = GetRotateCropImage(image, textBoxes[i]); if (!cropped.empty()) { validBoxes.push_back(textBoxes[i]); validCrops.push_back(cropped); } } textBoxes = validBoxes; croppedImages = validCrops; } // 3. Classification (optional): check orientation and rotate if needed std::vector clsLabels(croppedImages.size(), 0); std::vector clsScores(croppedImages.size(), 0.0f); if (classifier_) { auto clsResults = classifier_->Classify(croppedImages, clsThresh_); for (size_t i = 0; i < clsResults.size() && i < croppedImages.size(); i++) { clsLabels[i] = clsResults[i].first; clsScores[i] = clsResults[i].second; // Rotate 180 degrees if label is odd and confidence is high enough if (clsLabels[i] % 2 == 1 && clsScores[i] > clsThresh_) { cv::rotate(croppedImages[i], croppedImages[i], cv::ROTATE_180); } } } // 4. Recognition: extract text from cropped images std::vector textLines = recognizer_->RecognizeBatch(croppedImages); // 5. Combine results results.reserve(textBoxes.size()); for (size_t i = 0; i < textBoxes.size(); i++) { OCRPredictResult res; // Convert box to [[x,y], ...] format for (int j = 0; j < 4; j++) { res.box.push_back({ static_cast(textBoxes[i].points[j].x), static_cast(textBoxes[i].points[j].y) }); } if (i < textLines.size()) { res.text = textLines[i].text; res.score = textLines[i].score; } res.cls_label = clsLabels[i]; res.cls_score = clsScores[i]; results.push_back(res); } return results; } catch (const std::exception& e) { std::cerr << "[PaddleOCRV5RTEngine] OCR failed: " << e.what() << std::endl; return results; } } } // namespace rtocr } // namespace ANSCENTER