modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp

#include "PaddleOCRV5Engine.h"
#include "EPLoader.h"

#include <opencv2/imgproc.hpp>
#include <iostream>
#include <algorithm>

namespace ANSCENTER {
namespace onnxocr {

bool PaddleOCRV5Engine::Initialize(const std::string& detModelPath,
                                    const std::string& clsModelPath,
                                    const std::string& recModelPath,
                                    const std::string& dictPath,
                                    bool preferTensorRT) {
    std::lock_guard<std::recursive_mutex> lock(_mutex);
    ModelLoadingGuard mlg(_modelLoading);

    // High-perf options.  The OCR sub-models split into two groups:
    //
    //   1. Detector — its input shape varies continuously with every
    //      plate-ROI aspect ratio.  TRT EP is a poor fit because it
    //      builds a fresh engine for each unique shape (minutes each).
    //      We keep it on CUDA EP with the largest cuDNN workspace and
    //      let cuDNN HEURISTIC handle the per-shape algo selection.
    //
    //   2. Classifier + Recognizer — fixed-bucket shapes (cls is
    //      [1,3,80,160], rec is [1,3,48,{320,480,640,960}]).  These
    //      benefit massively from TRT EP because the engine is built
    //      once per shape and reused forever.
    OrtHandlerOptions detectorOpts;
    // Detector uses CUDA EP with *conservative* cuDNN workspace.
    // Empirical: on VRAM-constrained GPUs (LPD TRT engine + rec TRT
    // engine + ORT arena in play) the max-workspace mode causes cuDNN
    // to pick Winograd/implicit-precomp-GEMM variants that silently
    // fall back to slow NO-WORKSPACE algorithms when the big workspace
    // can't be allocated. With "0" cuDNN picks algorithms that are
    // known to fit and runs ~10x faster in practice.
    detectorOpts.useMaxCudnnWorkspace = false;
    detectorOpts.preferTensorRT       = false;   // never TRT for the detector

    // Classifier (fixed [1,3,80,160]): TRT with no profile is fine.
    OrtHandlerOptions classifierOpts;
    classifierOpts.useMaxCudnnWorkspace = true;
    classifierOpts.preferTensorRT       = preferTensorRT;
    classifierOpts.trtFP16              = true;

    // Recognizer: needs a DYNAMIC profile so one TRT engine covers every
    // (batch, bucket_width) pair we generate at runtime. Without this,
    // each new shape triggers a ~80s engine rebuild mid-stream when a
    // new plate appears or the plate count changes.
    //
    // Profile range:
    //   batch  : 1 .. 16       (16 plates worth of crops is generous)
    //   H      : 48 (fixed)
    //   W      : 320 .. 960    (covers all 4 recognizer buckets)
    //
    // Query the actual input name from the .onnx file instead of
    // hardcoding — PaddleOCR usually exports it as "x" but the name can
    // vary across model versions.
    OrtHandlerOptions recognizerOpts;
    recognizerOpts.useMaxCudnnWorkspace = true;
    recognizerOpts.preferTensorRT       = preferTensorRT;
    recognizerOpts.trtFP16              = true;
    if (preferTensorRT) {
        std::string recInputName = BasicOrtHandler::QueryModelInputName(recModelPath);
        if (recInputName.empty()) {
            std::cerr << "[PaddleOCRV5Engine] Could not query recognizer "
                         "input name — defaulting to 'x'" << std::endl;
            recInputName = "x";
        }
        std::cout << "[PaddleOCRV5Engine] Recognizer input name: '"
                  << recInputName << "' — building TRT dynamic profile "
                  << "[batch=1..16, W=320..960]" << std::endl;
        recognizerOpts.trtProfileMinShapes = recInputName + ":1x3x48x320";
        recognizerOpts.trtProfileOptShapes = recInputName + ":4x3x48x480";
        recognizerOpts.trtProfileMaxShapes = recInputName + ":16x3x48x960";
    }

    try {
        // Initialize detector (also triggers EPLoader init in BasicOrtHandler)
        detector_ = std::make_unique<ONNXOCRDetector>(detModelPath, detectorOpts);
        std::cout << "[PaddleOCRV5Engine] Detector initialized: " << detModelPath << std::endl;

        // Ensure this DLL's copy of Ort::Global<void>::api_ is initialized.
        // BasicOrtHandler sets it in ONNXEngine.dll, but each DLL has its own
        // inline-static copy. Without this, inference calls from ANSOCR.dll crash.
        if (Ort::Global<void>::api_ == nullptr) {
            Ort::InitApi(static_cast<const OrtApi*>(EPLoader::GetOrtApiRaw()));
        }

        // Initialize classifier (optional)
        if (!clsModelPath.empty()) {
            classifier_ = std::make_unique<ONNXOCRClassifier>(clsModelPath, classifierOpts);
            std::cout << "[PaddleOCRV5Engine] Classifier initialized: " << clsModelPath << std::endl;
        }
        else {
            classifier_.reset();
            std::cout << "[PaddleOCRV5Engine] Classifier skipped (no model path)" << std::endl;
        }

        // Initialize recognizer
        recognizer_ = std::make_unique<ONNXOCRRecognizer>(recModelPath, recognizerOpts);
        if (!recognizer_->LoadDictionary(dictPath)) {
            std::cerr << "[PaddleOCRV5Engine] Failed to load dictionary" << std::endl;
            return false;
        }
        std::cout << "[PaddleOCRV5Engine] Recognizer initialized: " << recModelPath << std::endl;

        // Pre-warm classifier (fixed [1,3,80,160]) and recognizer (4
        // bucket widths) so the first frame doesn't pay the cuDNN/TRT
        // algorithm-selection tax. The detector is intentionally NOT
        // warmed up: its input shape varies continuously with each
        // plate-ROI aspect ratio, so a warmup at any single canonical
        // shape would cost minutes (TRT) or be useless (CUDA cache miss
        // on the real frame anyway). Real frames will pay the per-shape
        // cuDNN HEURISTIC cost on first use.
        std::cout << "[PaddleOCRV5Engine] Warming up OCR pipeline..." << std::endl;
        if (classifier_) classifier_->Warmup();
        if (recognizer_) recognizer_->Warmup();
        std::cout << "[PaddleOCRV5Engine] Warmup complete" << std::endl;

        _initialized = true;
        std::cout << "[PaddleOCRV5Engine] Pipeline initialized successfully" << std::endl;
        return true;
    }
    catch (const std::exception& e) {
        std::cerr << "[PaddleOCRV5Engine] Initialization failed: " << e.what() << std::endl;
        detector_.reset();
        classifier_.reset();
        recognizer_.reset();
        _initialized = false;
        return false;
    }
}

std::vector<OCRPredictResult> PaddleOCRV5Engine::ocr(const cv::Mat& img) {
    if (_modelLoading.load()) return {};

    std::vector<OCRPredictResult> results;

    {
        auto lk = TryLockWithTimeout("PaddleOCRV5Engine::ocr");
        if (!lk.owns_lock()) return results;
        if (!_initialized || img.empty()) return results;
    }
    // _mutex released — heavy pipeline runs lock-free

    // Step 1: Text Detection
    auto boxes = detector_->Detect(img, _maxSideLen, _detDbThresh, _detBoxThresh, _detUnclipRatio, _useDilation);

    if (boxes.empty()) {
        return results;
    }

    // Step 2: Crop detected text regions
    std::vector<cv::Mat> croppedImages;
    croppedImages.reserve(boxes.size());
    for (auto& box : boxes) {
        cv::Mat cropped = GetRotateCropImage(img, box);
        if (!cropped.empty()) {
            croppedImages.push_back(cropped);
        }
    }

    // Step 3: Classification (optional)
    std::vector<int> cls_labels(croppedImages.size(), 0);
    std::vector<float> cls_scores(croppedImages.size(), 0.0f);

    if (classifier_) {
        classifier_->Classify(croppedImages, cls_labels, cls_scores, _clsThresh);

        // Rotate images classified as upside-down (label=1 and score > threshold)
        for (size_t i = 0; i < croppedImages.size(); i++) {
            if (cls_labels[i] % 2 == 1 && cls_scores[i] > _clsThresh) {
                cv::rotate(croppedImages[i], croppedImages[i], cv::ROTATE_180);
            }
        }
    }

    // Step 4: Text Recognition
    auto textLines = recognizer_->RecognizeBatch(croppedImages);

    // Step 5: Combine results
    for (size_t i = 0; i < boxes.size() && i < textLines.size(); i++) {
        OCRPredictResult result;

        // Convert TextBox points to box format [[x0,y0], [x1,y1], [x2,y2], [x3,y3]]
        result.box.resize(4);
        for (int j = 0; j < 4; j++) {
            result.box[j] = {
                static_cast<int>(boxes[i].points[j].x),
                static_cast<int>(boxes[i].points[j].y)
            };
        }

        result.text      = textLines[i].text;
        result.score      = textLines[i].score;
        result.cls_label  = cls_labels[i];
        result.cls_score  = cls_scores[i];

        results.push_back(result);
    }

    return results;
}

TextLine PaddleOCRV5Engine::recognizeOnly(const cv::Mat& croppedImage) {
    if (_modelLoading.load()) return { "", 0.0f };
    {
        auto lk = TryLockWithTimeout("PaddleOCRV5Engine::recognizeOnly");
        if (!lk.owns_lock()) return { "", 0.0f };
        if (!_initialized || !recognizer_ || croppedImage.empty()) return { "", 0.0f };
    }
    return recognizer_->Recognize(croppedImage);
}

std::vector<TextLine> PaddleOCRV5Engine::recognizeMany(const std::vector<cv::Mat>& croppedImages) {
    if (_modelLoading.load()) return std::vector<TextLine>(croppedImages.size());
    {
        auto lk = TryLockWithTimeout("PaddleOCRV5Engine::recognizeMany");
        if (!lk.owns_lock()) return std::vector<TextLine>(croppedImages.size());
        if (!_initialized || !recognizer_ || croppedImages.empty()) {
            return std::vector<TextLine>(croppedImages.size());
        }
    }
    // Delegates to the bucketed, batched path in ONNXOCRRecognizer.
    return recognizer_->RecognizeBatch(croppedImages);
}

} // namespace onnxocr
} // namespace ANSCENTER
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`#include "PaddleOCRV5Engine.h"`
			`#include "EPLoader.h"`

			`#include <opencv2/imgproc.hpp>`
			`#include <iostream>`
			`#include <algorithm>`

			`namespace ANSCENTER {`
			`namespace onnxocr {`

			`bool PaddleOCRV5Engine::Initialize(const std::string& detModelPath,`
			`const std::string& clsModelPath,`
			`const std::string& recModelPath,`
Improve ALPR_OCR peformance 2026-04-14 20:30:21 +10:00			`const std::string& dictPath,`
			`bool preferTensorRT) {`
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`std::lock_guard<std::recursive_mutex> lock(_mutex);`
Fix mutex lock issues (OCR and FR) 2026-04-13 20:38:40 +10:00			`ModelLoadingGuard mlg(_modelLoading);`
Initial setup for CLion 2026-03-28 16:54:11 +11:00
Improve ALPR_OCR peformance 2026-04-14 20:30:21 +10:00			`// High-perf options. The OCR sub-models split into two groups:`
			`//`
			`// 1. Detector — its input shape varies continuously with every`
			`// plate-ROI aspect ratio. TRT EP is a poor fit because it`
			`// builds a fresh engine for each unique shape (minutes each).`
			`// We keep it on CUDA EP with the largest cuDNN workspace and`
			`// let cuDNN HEURISTIC handle the per-shape algo selection.`
			`//`
			`// 2. Classifier + Recognizer — fixed-bucket shapes (cls is`
			`// [1,3,80,160], rec is [1,3,48,{320,480,640,960}]). These`
			`// benefit massively from TRT EP because the engine is built`
			`// once per shape and reused forever.`
			`OrtHandlerOptions detectorOpts;`
			`// Detector uses CUDA EP with conservative cuDNN workspace.`
			`// Empirical: on VRAM-constrained GPUs (LPD TRT engine + rec TRT`
			`// engine + ORT arena in play) the max-workspace mode causes cuDNN`
			`// to pick Winograd/implicit-precomp-GEMM variants that silently`
			`// fall back to slow NO-WORKSPACE algorithms when the big workspace`
			`// can't be allocated. With "0" cuDNN picks algorithms that are`
			`// known to fit and runs ~10x faster in practice.`
			`detectorOpts.useMaxCudnnWorkspace = false;`
			`detectorOpts.preferTensorRT = false; // never TRT for the detector`

			`// Classifier (fixed [1,3,80,160]): TRT with no profile is fine.`
			`OrtHandlerOptions classifierOpts;`
			`classifierOpts.useMaxCudnnWorkspace = true;`
			`classifierOpts.preferTensorRT = preferTensorRT;`
			`classifierOpts.trtFP16 = true;`

			`// Recognizer: needs a DYNAMIC profile so one TRT engine covers every`
			`// (batch, bucket_width) pair we generate at runtime. Without this,`
			`// each new shape triggers a ~80s engine rebuild mid-stream when a`
			`// new plate appears or the plate count changes.`
			`//`
			`// Profile range:`
			`// batch : 1 .. 16 (16 plates worth of crops is generous)`
			`// H : 48 (fixed)`
			`// W : 320 .. 960 (covers all 4 recognizer buckets)`
			`//`
			`// Query the actual input name from the .onnx file instead of`
			`// hardcoding — PaddleOCR usually exports it as "x" but the name can`
			`// vary across model versions.`
			`OrtHandlerOptions recognizerOpts;`
			`recognizerOpts.useMaxCudnnWorkspace = true;`
			`recognizerOpts.preferTensorRT = preferTensorRT;`
			`recognizerOpts.trtFP16 = true;`
			`if (preferTensorRT) {`
			`std::string recInputName = BasicOrtHandler::QueryModelInputName(recModelPath);`
			`if (recInputName.empty()) {`
			`std::cerr << "[PaddleOCRV5Engine] Could not query recognizer "`
			`"input name — defaulting to 'x'" << std::endl;`
			`recInputName = "x";`
			`}`
			`std::cout << "[PaddleOCRV5Engine] Recognizer input name: '"`
			`<< recInputName << "' — building TRT dynamic profile "`
			`<< "[batch=1..16, W=320..960]" << std::endl;`
			`recognizerOpts.trtProfileMinShapes = recInputName + ":1x3x48x320";`
			`recognizerOpts.trtProfileOptShapes = recInputName + ":4x3x48x480";`
			`recognizerOpts.trtProfileMaxShapes = recInputName + ":16x3x48x960";`
			`}`

Initial setup for CLion 2026-03-28 16:54:11 +11:00			`try {`
			`// Initialize detector (also triggers EPLoader init in BasicOrtHandler)`
Improve ALPR_OCR peformance 2026-04-14 20:30:21 +10:00			`detector_ = std::make_unique<ONNXOCRDetector>(detModelPath, detectorOpts);`
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`std::cout << "[PaddleOCRV5Engine] Detector initialized: " << detModelPath << std::endl;`

			`// Ensure this DLL's copy of Ort::Global<void>::api_ is initialized.`
			`// BasicOrtHandler sets it in ONNXEngine.dll, but each DLL has its own`
			`// inline-static copy. Without this, inference calls from ANSOCR.dll crash.`
			`if (Ort::Global<void>::api_ == nullptr) {`
			`Ort::InitApi(static_cast<const OrtApi*>(EPLoader::GetOrtApiRaw()));`
			`}`

			`// Initialize classifier (optional)`
			`if (!clsModelPath.empty()) {`
Improve ALPR_OCR peformance 2026-04-14 20:30:21 +10:00			`classifier_ = std::make_unique<ONNXOCRClassifier>(clsModelPath, classifierOpts);`
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`std::cout << "[PaddleOCRV5Engine] Classifier initialized: " << clsModelPath << std::endl;`
			`}`
			`else {`
			`classifier_.reset();`
			`std::cout << "[PaddleOCRV5Engine] Classifier skipped (no model path)" << std::endl;`
			`}`

			`// Initialize recognizer`
Improve ALPR_OCR peformance 2026-04-14 20:30:21 +10:00			`recognizer_ = std::make_unique<ONNXOCRRecognizer>(recModelPath, recognizerOpts);`
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`if (!recognizer_->LoadDictionary(dictPath)) {`
			`std::cerr << "[PaddleOCRV5Engine] Failed to load dictionary" << std::endl;`
			`return false;`
			`}`
			`std::cout << "[PaddleOCRV5Engine] Recognizer initialized: " << recModelPath << std::endl;`

Improve ALPR_OCR peformance 2026-04-14 20:30:21 +10:00			`// Pre-warm classifier (fixed [1,3,80,160]) and recognizer (4`
			`// bucket widths) so the first frame doesn't pay the cuDNN/TRT`
			`// algorithm-selection tax. The detector is intentionally NOT`
			`// warmed up: its input shape varies continuously with each`
			`// plate-ROI aspect ratio, so a warmup at any single canonical`
			`// shape would cost minutes (TRT) or be useless (CUDA cache miss`
			`// on the real frame anyway). Real frames will pay the per-shape`
			`// cuDNN HEURISTIC cost on first use.`
			`std::cout << "[PaddleOCRV5Engine] Warming up OCR pipeline..." << std::endl;`
			`if (classifier_) classifier_->Warmup();`
			`if (recognizer_) recognizer_->Warmup();`
			`std::cout << "[PaddleOCRV5Engine] Warmup complete" << std::endl;`

Initial setup for CLion 2026-03-28 16:54:11 +11:00			`_initialized = true;`
			`std::cout << "[PaddleOCRV5Engine] Pipeline initialized successfully" << std::endl;`
			`return true;`
			`}`
			`catch (const std::exception& e) {`
			`std::cerr << "[PaddleOCRV5Engine] Initialization failed: " << e.what() << std::endl;`
			`detector_.reset();`
			`classifier_.reset();`
			`recognizer_.reset();`
			`_initialized = false;`
			`return false;`
			`}`
			`}`

			`std::vector<OCRPredictResult> PaddleOCRV5Engine::ocr(const cv::Mat& img) {`
Fix mutex lock issues (OCR and FR) 2026-04-13 20:38:40 +10:00			`if (_modelLoading.load()) return {};`
Initial setup for CLion 2026-03-28 16:54:11 +11:00
			`std::vector<OCRPredictResult> results;`

Fix mutex lock issues (OCR and FR) 2026-04-13 20:38:40 +10:00			`{`
			`auto lk = TryLockWithTimeout("PaddleOCRV5Engine::ocr");`
			`if (!lk.owns_lock()) return results;`
			`if (!_initialized \|\| img.empty()) return results;`
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`}`
Fix mutex lock issues (OCR and FR) 2026-04-13 20:38:40 +10:00			`// _mutex released — heavy pipeline runs lock-free`
Initial setup for CLion 2026-03-28 16:54:11 +11:00
			`// Step 1: Text Detection`
			`auto boxes = detector_->Detect(img, _maxSideLen, _detDbThresh, _detBoxThresh, _detUnclipRatio, _useDilation);`

			`if (boxes.empty()) {`
			`return results;`
			`}`

			`// Step 2: Crop detected text regions`
			`std::vector<cv::Mat> croppedImages;`
			`croppedImages.reserve(boxes.size());`
			`for (auto& box : boxes) {`
			`cv::Mat cropped = GetRotateCropImage(img, box);`
			`if (!cropped.empty()) {`
			`croppedImages.push_back(cropped);`
			`}`
			`}`

			`// Step 3: Classification (optional)`
			`std::vector<int> cls_labels(croppedImages.size(), 0);`
			`std::vector<float> cls_scores(croppedImages.size(), 0.0f);`

			`if (classifier_) {`
			`classifier_->Classify(croppedImages, cls_labels, cls_scores, _clsThresh);`

			`// Rotate images classified as upside-down (label=1 and score > threshold)`
			`for (size_t i = 0; i < croppedImages.size(); i++) {`
			`if (cls_labels[i] % 2 == 1 && cls_scores[i] > _clsThresh) {`
			`cv::rotate(croppedImages[i], croppedImages[i], cv::ROTATE_180);`
			`}`
			`}`
			`}`

			`// Step 4: Text Recognition`
			`auto textLines = recognizer_->RecognizeBatch(croppedImages);`

			`// Step 5: Combine results`
			`for (size_t i = 0; i < boxes.size() && i < textLines.size(); i++) {`
			`OCRPredictResult result;`

			`// Convert TextBox points to box format [[x0,y0], [x1,y1], [x2,y2], [x3,y3]]`
			`result.box.resize(4);`
			`for (int j = 0; j < 4; j++) {`
			`result.box[j] = {`
			`static_cast<int>(boxes[i].points[j].x),`
			`static_cast<int>(boxes[i].points[j].y)`
			`};`
			`}`

			`result.text = textLines[i].text;`
			`result.score = textLines[i].score;`
			`result.cls_label = cls_labels[i];`
			`result.cls_score = cls_scores[i];`

			`results.push_back(result);`
			`}`

			`return results;`
			`}`

Initial OCR to support ALPR mode with country support 2026-03-29 22:51:39 +11:00			`TextLine PaddleOCRV5Engine::recognizeOnly(const cv::Mat& croppedImage) {`
Fix mutex lock issues (OCR and FR) 2026-04-13 20:38:40 +10:00			`if (_modelLoading.load()) return { "", 0.0f };`
			`{`
			`auto lk = TryLockWithTimeout("PaddleOCRV5Engine::recognizeOnly");`
			`if (!lk.owns_lock()) return { "", 0.0f };`
			`if (!_initialized \|\| !recognizer_ \|\| croppedImage.empty()) return { "", 0.0f };`
			`}`
Initial OCR to support ALPR mode with country support 2026-03-29 22:51:39 +11:00			`return recognizer_->Recognize(croppedImage);`
			`}`

Improve ALPR_OCR peformance 2026-04-14 20:30:21 +10:00			`std::vector<TextLine> PaddleOCRV5Engine::recognizeMany(const std::vector<cv::Mat>& croppedImages) {`
			`if (_modelLoading.load()) return std::vector<TextLine>(croppedImages.size());`
			`{`
			`auto lk = TryLockWithTimeout("PaddleOCRV5Engine::recognizeMany");`
			`if (!lk.owns_lock()) return std::vector<TextLine>(croppedImages.size());`
			`if (!_initialized \|\| !recognizer_ \|\| croppedImages.empty()) {`
			`return std::vector<TextLine>(croppedImages.size());`
			`}`
			`}`
			`// Delegates to the bucketed, batched path in ONNXOCRRecognizer.`
			`return recognizer_->RecognizeBatch(croppedImages);`
			`}`

Initial setup for CLion 2026-03-28 16:54:11 +11:00			`} // namespace onnxocr`
			`} // namespace ANSCENTER`