#include "PaddleOCRV5Engine.h" #include "EPLoader.h" #include #include #include namespace ANSCENTER { namespace onnxocr { bool PaddleOCRV5Engine::Initialize(const std::string& detModelPath, const std::string& clsModelPath, const std::string& recModelPath, const std::string& dictPath) { std::lock_guard lock(_mutex); ModelLoadingGuard mlg(_modelLoading); try { // Initialize detector (also triggers EPLoader init in BasicOrtHandler) detector_ = std::make_unique(detModelPath); std::cout << "[PaddleOCRV5Engine] Detector initialized: " << detModelPath << std::endl; // Ensure this DLL's copy of Ort::Global::api_ is initialized. // BasicOrtHandler sets it in ONNXEngine.dll, but each DLL has its own // inline-static copy. Without this, inference calls from ANSOCR.dll crash. if (Ort::Global::api_ == nullptr) { Ort::InitApi(static_cast(EPLoader::GetOrtApiRaw())); } // Initialize classifier (optional) if (!clsModelPath.empty()) { classifier_ = std::make_unique(clsModelPath); std::cout << "[PaddleOCRV5Engine] Classifier initialized: " << clsModelPath << std::endl; } else { classifier_.reset(); std::cout << "[PaddleOCRV5Engine] Classifier skipped (no model path)" << std::endl; } // Initialize recognizer recognizer_ = std::make_unique(recModelPath); if (!recognizer_->LoadDictionary(dictPath)) { std::cerr << "[PaddleOCRV5Engine] Failed to load dictionary" << std::endl; return false; } std::cout << "[PaddleOCRV5Engine] Recognizer initialized: " << recModelPath << std::endl; _initialized = true; std::cout << "[PaddleOCRV5Engine] Pipeline initialized successfully" << std::endl; return true; } catch (const std::exception& e) { std::cerr << "[PaddleOCRV5Engine] Initialization failed: " << e.what() << std::endl; detector_.reset(); classifier_.reset(); recognizer_.reset(); _initialized = false; return false; } } std::vector PaddleOCRV5Engine::ocr(const cv::Mat& img) { if (_modelLoading.load()) return {}; std::vector results; { auto lk = TryLockWithTimeout("PaddleOCRV5Engine::ocr"); if (!lk.owns_lock()) return results; if (!_initialized || img.empty()) return results; } // _mutex released — heavy pipeline runs lock-free // Step 1: Text Detection auto boxes = detector_->Detect(img, _maxSideLen, _detDbThresh, _detBoxThresh, _detUnclipRatio, _useDilation); if (boxes.empty()) { return results; } // Step 2: Crop detected text regions std::vector croppedImages; croppedImages.reserve(boxes.size()); for (auto& box : boxes) { cv::Mat cropped = GetRotateCropImage(img, box); if (!cropped.empty()) { croppedImages.push_back(cropped); } } // Step 3: Classification (optional) std::vector cls_labels(croppedImages.size(), 0); std::vector cls_scores(croppedImages.size(), 0.0f); if (classifier_) { classifier_->Classify(croppedImages, cls_labels, cls_scores, _clsThresh); // Rotate images classified as upside-down (label=1 and score > threshold) for (size_t i = 0; i < croppedImages.size(); i++) { if (cls_labels[i] % 2 == 1 && cls_scores[i] > _clsThresh) { cv::rotate(croppedImages[i], croppedImages[i], cv::ROTATE_180); } } } // Step 4: Text Recognition auto textLines = recognizer_->RecognizeBatch(croppedImages); // Step 5: Combine results for (size_t i = 0; i < boxes.size() && i < textLines.size(); i++) { OCRPredictResult result; // Convert TextBox points to box format [[x0,y0], [x1,y1], [x2,y2], [x3,y3]] result.box.resize(4); for (int j = 0; j < 4; j++) { result.box[j] = { static_cast(boxes[i].points[j].x), static_cast(boxes[i].points[j].y) }; } result.text = textLines[i].text; result.score = textLines[i].score; result.cls_label = cls_labels[i]; result.cls_score = cls_scores[i]; results.push_back(result); } return results; } TextLine PaddleOCRV5Engine::recognizeOnly(const cv::Mat& croppedImage) { if (_modelLoading.load()) return { "", 0.0f }; { auto lk = TryLockWithTimeout("PaddleOCRV5Engine::recognizeOnly"); if (!lk.owns_lock()) return { "", 0.0f }; if (!_initialized || !recognizer_ || croppedImage.empty()) return { "", 0.0f }; } return recognizer_->Recognize(croppedImage); } } // namespace onnxocr } // namespace ANSCENTER