Refactor project structure

2026-03-28 19:56:39 +11:00
parent 1d267378b2
commit 8a2e721058
511 changed files with 59 additions and 48 deletions
--- a/modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp
+++ b/modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp
@@ -0,0 +1,130 @@
+#include "PaddleOCRV5Engine.h"
+#include "EPLoader.h"
+
+#include <opencv2/imgproc.hpp>
+#include <iostream>
+#include <algorithm>
+
+namespace ANSCENTER {
+namespace onnxocr {
+
+bool PaddleOCRV5Engine::Initialize(const std::string& detModelPath,
+                                    const std::string& clsModelPath,
+                                    const std::string& recModelPath,
+                                    const std::string& dictPath) {
+    std::lock_guard<std::recursive_mutex> lock(_mutex);
+
+    try {
+        // Initialize detector (also triggers EPLoader init in BasicOrtHandler)
+        detector_ = std::make_unique<ONNXOCRDetector>(detModelPath);
+        std::cout << "[PaddleOCRV5Engine] Detector initialized: " << detModelPath << std::endl;
+
+        // Ensure this DLL's copy of Ort::Global<void>::api_ is initialized.
+        // BasicOrtHandler sets it in ONNXEngine.dll, but each DLL has its own
+        // inline-static copy. Without this, inference calls from ANSOCR.dll crash.
+        if (Ort::Global<void>::api_ == nullptr) {
+            Ort::InitApi(static_cast<const OrtApi*>(EPLoader::GetOrtApiRaw()));
+        }
+
+        // Initialize classifier (optional)
+        if (!clsModelPath.empty()) {
+            classifier_ = std::make_unique<ONNXOCRClassifier>(clsModelPath);
+            std::cout << "[PaddleOCRV5Engine] Classifier initialized: " << clsModelPath << std::endl;
+        }
+        else {
+            classifier_.reset();
+            std::cout << "[PaddleOCRV5Engine] Classifier skipped (no model path)" << std::endl;
+        }
+
+        // Initialize recognizer
+        recognizer_ = std::make_unique<ONNXOCRRecognizer>(recModelPath);
+        if (!recognizer_->LoadDictionary(dictPath)) {
+            std::cerr << "[PaddleOCRV5Engine] Failed to load dictionary" << std::endl;
+            return false;
+        }
+        std::cout << "[PaddleOCRV5Engine] Recognizer initialized: " << recModelPath << std::endl;
+
+        _initialized = true;
+        std::cout << "[PaddleOCRV5Engine] Pipeline initialized successfully" << std::endl;
+        return true;
+    }
+    catch (const std::exception& e) {
+        std::cerr << "[PaddleOCRV5Engine] Initialization failed: " << e.what() << std::endl;
+        detector_.reset();
+        classifier_.reset();
+        recognizer_.reset();
+        _initialized = false;
+        return false;
+    }
+}
+
+std::vector<OCRPredictResult> PaddleOCRV5Engine::ocr(const cv::Mat& img) {
+    std::lock_guard<std::recursive_mutex> lock(_mutex);
+
+    std::vector<OCRPredictResult> results;
+
+    if (!_initialized || img.empty()) {
+        return results;
+    }
+
+    // Step 1: Text Detection
+    auto boxes = detector_->Detect(img, _maxSideLen, _detDbThresh, _detBoxThresh, _detUnclipRatio, _useDilation);
+
+    if (boxes.empty()) {
+        return results;
+    }
+
+    // Step 2: Crop detected text regions
+    std::vector<cv::Mat> croppedImages;
+    croppedImages.reserve(boxes.size());
+    for (auto& box : boxes) {
+        cv::Mat cropped = GetRotateCropImage(img, box);
+        if (!cropped.empty()) {
+            croppedImages.push_back(cropped);
+        }
+    }
+
+    // Step 3: Classification (optional)
+    std::vector<int> cls_labels(croppedImages.size(), 0);
+    std::vector<float> cls_scores(croppedImages.size(), 0.0f);
+
+    if (classifier_) {
+        classifier_->Classify(croppedImages, cls_labels, cls_scores, _clsThresh);
+
+        // Rotate images classified as upside-down (label=1 and score > threshold)
+        for (size_t i = 0; i < croppedImages.size(); i++) {
+            if (cls_labels[i] % 2 == 1 && cls_scores[i] > _clsThresh) {
+                cv::rotate(croppedImages[i], croppedImages[i], cv::ROTATE_180);
+            }
+        }
+    }
+
+    // Step 4: Text Recognition
+    auto textLines = recognizer_->RecognizeBatch(croppedImages);
+
+    // Step 5: Combine results
+    for (size_t i = 0; i < boxes.size() && i < textLines.size(); i++) {
+        OCRPredictResult result;
+
+        // Convert TextBox points to box format [[x0,y0], [x1,y1], [x2,y2], [x3,y3]]
+        result.box.resize(4);
+        for (int j = 0; j < 4; j++) {
+            result.box[j] = {
+                static_cast<int>(boxes[i].points[j].x),
+                static_cast<int>(boxes[i].points[j].y)
+            };
+        }
+
+        result.text      = textLines[i].text;
+        result.score      = textLines[i].score;
+        result.cls_label  = cls_labels[i];
+        result.cls_score  = cls_scores[i];
+
+        results.push_back(result);
+    }
+
+    return results;
+}
+
+} // namespace onnxocr
+} // namespace ANSCENTER