Support tracker to improve ALPR_OCR

2026-04-14 21:18:10 +10:00
parent f9a0af8949
commit 5706615ed5
4 changed files with 435 additions and 62 deletions
--- a/engines/ONNXEngine/ONNXEngine.h
+++ b/engines/ONNXEngine/ONNXEngine.h
@@ -253,23 +253,50 @@ namespace ANSCENTER {
    // cuDNN workspace.  Default-constructed = identical to the legacy
    // behavior (CUDA EP only, minimal cuDNN workspace).
    // ====================================================================
    // ====================================================================
    //  OrtHandlerOptions
    //
    //  Per-session knobs for the ORT execution providers. Options are
    //  grouped by target backend. A field set for one backend is silently
    //  ignored by every other backend — e.g. `trtProfileMinShapes` only
    //  affects TensorRT EP (NVIDIA); DirectML and OpenVINO don't read it.
    //
    //  When adding a new backend optimization:
    //    - put the new field in the correct backend section below
    //    - NEVER reuse an NVIDIA field for AMD/Intel tuning
    //    - update the matching Build*OcrOptions() helper in
    //      PaddleOCRV5Engine.cpp to populate it
    //
    //  The NVIDIA section is considered locked — it's been tuned end-to-end
    //  for the ANSALPR pipeline and should not change unless fixing a
    //  specific NVIDIA-observable regression.
    // ====================================================================
    struct OrtHandlerOptions {
-        // Try to attach TensorRT EP before CUDA EP (NVIDIA only).
+        // ----------------------------------------------------------------
-        // Falls back to CUDA EP automatically if TRT EP creation or session
+        //  NVIDIA (CUDA EP + TensorRT EP) — LOCKED
-        // creation fails.  Engines are cached on disk for fast reload.
+        //
        //  These fields only have effect when the resolved execution
        //  provider is CUDA EP or TensorRT EP. DirectML (AMD), OpenVINO
        //  (Intel), and CPU EP silently ignore every field below. Do not
        //  repurpose them for other backends.
        // ----------------------------------------------------------------
        // Try to attach TensorRT EP before CUDA EP. Falls back to CUDA EP
        // automatically if TRT EP creation or session creation fails.
        // Engines are cached on disk for fast reload.
        bool preferTensorRT = false;
-        // Use the largest cuDNN conv workspace.  cuDNN can then pick fast
+        // Use the largest cuDNN conv workspace. cuDNN can then pick fast
        // algorithms (Winograd, implicit-precomp-GEMM with big workspaces).
        // Defaults off because some deployments share VRAM with TRT engines
        // and need the minimal-workspace mode to avoid OOM.
        bool useMaxCudnnWorkspace = false;
-        // Where to cache built TRT engines.  Empty → default
+        // Where to cache built TRT engines. Empty → default
-        // %TEMP%/ANSCENTER/TRTEngineCache.  Only used when preferTensorRT.
+        // %TEMP%/ANSCENTER/TRTEngineCache. Only used when preferTensorRT.
        std::string trtEngineCacheDir;
-        // FP16 builds for TRT EP.  Recommended for inference; ignored if
+        // FP16 builds for TRT EP. Recommended for inference; ignored if
        // preferTensorRT is false.
        bool trtFP16 = true;
@@ -286,6 +313,28 @@ namespace ANSCENTER {
        std::string trtProfileMinShapes;
        std::string trtProfileOptShapes;
        std::string trtProfileMaxShapes;
        // ----------------------------------------------------------------
        //  Intel (OpenVINO EP) — OPEN FOR OPTIMIZATION
        //
        //  Currently unused. Future Intel-specific tuning (cache_dir for
        //  kernel cache, explicit device selection, INT8 routing, etc.)
        //  should add fields here and wire them through the OpenVINO
        //  branch of initialize_handler(). Do NOT put Intel logic inside
        //  TryAppendCUDA or TryAppendTensorRT.
        // ----------------------------------------------------------------
        // (Intel fields go here — none yet)
        // ----------------------------------------------------------------
        //  AMD (DirectML EP / MIGraphX EP) — OPEN FOR OPTIMIZATION
        //
        //  Currently unused. Future AMD-specific tuning (graph optimization
        //  gate for RDNA3+, MIGraphX cache dir on Linux, etc.) should add
        //  fields here and wire them through the DirectML branch of
        //  initialize_handler(). Do NOT put AMD logic inside TryAppendCUDA
        //  or TryAppendTensorRT.
        // ----------------------------------------------------------------
        // (AMD fields go here — none yet)
    };
    // ====================================================================
--- a/modules/ANSLPR/ANSLPR_OCR.cpp
+++ b/modules/ANSLPR/ANSLPR_OCR.cpp
@@ -547,6 +547,181 @@ namespace ANSCENTER
 		return colour;
 	}
 	// ── Full-frame vs pipeline auto-detection ────────────────────────────
 	// Mirror of ANSALPR_OD::shouldUseALPRChecker. The auto-detection logic
 	// watches whether consecutive frames from a given camera have the exact
 	// same (width, height). Pre-cropped pipeline inputs vary by a few
 	// pixels per crop, so the exact-match check fails and we return false.
 	// Real video frames are pixel-identical across frames, so after a few
 	// consistent frames we flip into FULL-FRAME mode and start running the
 	// ALPRChecker voting + ensureUniquePlateText dedup.
 	bool ANSALPR_OCR::shouldUseALPRChecker(const cv::Size& imageSize,
 	                                        const std::string& cameraId) {
 		// Force disabled via SetALPRCheckerEnabled(false) → never use.
 		if (!_enableALPRChecker) return false;
 		// Small images are always pipeline crops — skip auto-detection.
 		if (imageSize.width < ImageSizeTracker::MIN_FULLFRAME_WIDTH) return false;
 		auto& tracker = _imageSizeTrackers[cameraId];
 		bool wasFullFrame = tracker.detectedFullFrame;
 		if (imageSize == tracker.lastSize) {
 			tracker.consistentCount++;
 			if (tracker.consistentCount >= ImageSizeTracker::CONFIRM_THRESHOLD) {
 				tracker.detectedFullFrame = true;
 			}
 		} else {
 			tracker.lastSize          = imageSize;
 			tracker.consistentCount   = 1;
 			tracker.detectedFullFrame = false;
 		}
 		if (tracker.detectedFullFrame != wasFullFrame) {
 			ANS_DBG("ALPR_OCR_Checker",
 			        "cam=%s mode auto-detected: %s (img=%dx%d consistent=%d)",
 			        cameraId.c_str(),
 			        tracker.detectedFullFrame ? "FULL-FRAME (tracker ON)" : "PIPELINE (tracker OFF)",
 			        imageSize.width, imageSize.height, tracker.consistentCount);
 		}
 		return tracker.detectedFullFrame;
 	}
 	// ── Spatial plate dedup with accumulated scoring ─────────────────────
 	// Mirror of ANSALPR_OD::ensureUniquePlateText. When more than one
 	// detection in the same frame ends up with the same plate text (e.g.
 	// tracker occlusion or two cars in a single frame reading the same
 	// string), we resolve the ambiguity by accumulating confidence per
 	// spatial location across frames. The location with the higher running
 	// score keeps the plate text; the loser has its className cleared and
 	// is dropped from the output.
 	void ANSALPR_OCR::ensureUniquePlateText(std::vector<Object>& results,
 	                                        const std::string& cameraId) {
 		std::lock_guard<std::mutex> plateLock(_plateIdentitiesMutex);
 		auto& identities = _plateIdentities[cameraId];
 		// Auto-detect mode by detection count.
 		//   1 detection  → pipeline/single-crop mode → no dedup needed.
 		//   2+ detections → full-frame mode → apply accumulated scoring.
 		if (results.size() <= 1) {
 			// Still age out stale spatial identities from previous full-frame calls
 			if (!identities.empty()) {
 				constexpr int MAX_UNSEEN_FRAMES = 30;
 				for (auto& id : identities) id.framesSinceLastSeen++;
 				for (auto it = identities.begin(); it != identities.end(); ) {
 					if (it->framesSinceLastSeen > MAX_UNSEEN_FRAMES) {
 						it = identities.erase(it);
 					} else {
 						++it;
 					}
 				}
 			}
 			return;
 		}
 		// Helper: IoU between two rects.
 		auto computeIoU = [](const cv::Rect& a, const cv::Rect& b) -> float {
 			int x1 = std::max(a.x, b.x);
 			int y1 = std::max(a.y, b.y);
 			int x2 = std::min(a.x + a.width,  b.x + b.width);
 			int y2 = std::min(a.y + a.height, b.y + b.height);
 			if (x2 <= x1 || y2 <= y1) return 0.0f;
 			float intersection = static_cast<float>((x2 - x1) * (y2 - y1));
 			float unionArea = static_cast<float>(a.area() + b.area()) - intersection;
 			return (unionArea > 0.0f) ? intersection / unionArea : 0.0f;
 		};
 		// Helper: find matching spatial identity by bounding-box overlap.
 		auto findSpatialMatch = [&](const cv::Rect& box,
 		                            const std::string& plateText) -> SpatialPlateIdentity* {
 			for (auto& id : identities) {
 				if (id.plateText == plateText) {
 					cv::Rect storedRect(
 						static_cast<int>(id.center.x - box.width  * 0.5f),
 						static_cast<int>(id.center.y - box.height * 0.5f),
 						box.width, box.height);
 					if (computeIoU(box, storedRect) > PLATE_SPATIAL_MATCH_THRESHOLD) {
 						return &id;
 					}
 				}
 			}
 			return nullptr;
 		};
 		// Step 1: Build map of plateText → candidate indices
 		std::unordered_map<std::string, std::vector<size_t>> plateCandidates;
 		for (size_t i = 0; i < results.size(); ++i) {
 			if (results[i].className.empty()) continue;
 			plateCandidates[results[i].className].push_back(i);
 		}
 		// Step 2: Resolve duplicates using spatial accumulated scores
 		for (auto& [plateText, indices] : plateCandidates) {
 			if (indices.size() <= 1) continue;
 			size_t winner = indices[0];
 			float  bestScore = 0.0f;
 			for (size_t idx : indices) {
 				float score = results[idx].confidence;
 				auto* match = findSpatialMatch(results[idx].box, plateText);
 				if (match) {
 					score = match->accumulatedScore + results[idx].confidence;
 				}
 				if (score > bestScore) {
 					bestScore = score;
 					winner    = idx;
 				}
 			}
 			for (size_t idx : indices) {
 				if (idx != winner) {
 					results[idx].className.clear();
 				}
 			}
 		}
 		// Step 3: Update spatial identities — winners accumulate, losers decay
 		constexpr float DECAY_FACTOR     = 0.8f;
 		constexpr float MIN_SCORE        = 0.1f;
 		constexpr int   MAX_UNSEEN_FRAMES = 30;
 		for (auto& id : identities) id.framesSinceLastSeen++;
 		for (auto& r : results) {
 			if (r.className.empty()) continue;
 			cv::Point2f center(
 				r.box.x + r.box.width  * 0.5f,
 				r.box.y + r.box.height * 0.5f);
 			auto* match = findSpatialMatch(r.box, r.className);
 			if (match) {
 				match->accumulatedScore += r.confidence;
 				match->center            = center;
 				match->framesSinceLastSeen = 0;
 			} else {
 				identities.push_back({ center, r.className, r.confidence, 0 });
 			}
 		}
 		// Decay unseen identities and remove stale ones
 		for (auto it = identities.begin(); it != identities.end(); ) {
 			if (it->framesSinceLastSeen > 0) {
 				it->accumulatedScore *= DECAY_FACTOR;
 			}
 			if (it->accumulatedScore < MIN_SCORE || it->framesSinceLastSeen > MAX_UNSEEN_FRAMES) {
 				it = identities.erase(it);
 			} else {
 				++it;
 			}
 		}
 		// Step 4: Remove entries with cleared plate text
 		results.erase(
 			std::remove_if(results.begin(), results.end(),
 				[](const Object& o) { return o.className.empty(); }),
 			results.end());
 	}
 	// ── OCR on a single plate ROI ────────────────────────────────────────
 	// Returns the plate text via the out-parameter and populates alprExtraInfo
 	// with the structured ALPR JSON (zone parts) when ALPR mode is active.
@@ -712,6 +887,13 @@ namespace ANSCENTER
 			std::vector<Object> output;
 			output.reserve(plateInfos.size());
 			// Decide once per frame whether the tracker-based correction
 			// layer should run. We auto-detect full-frame vs pipeline mode
 			// by watching for pixel-identical consecutive frames, exactly
 			// the same way ANSALPR_OD does it.
 			const bool useChecker = shouldUseALPRChecker(
 				cv::Size(frameWidth, frameHeight), cameraId);
 			for (const auto& info : plateInfos) {
 				std::string combinedText;
 				for (size_t cropIdx : info.cropIndices) {
@@ -726,8 +908,9 @@ namespace ANSCENTER
 				Object lprObject = lprOutput[info.origIndex];
 				lprObject.cameraId = cameraId;
-				// Cross-frame stabilization (unchanged)
+				// Cross-frame stabilization: per-track majority vote in
-				if (_enableALPRChecker) {
+				// full-frame mode, raw OCR text in pipeline mode.
 				if (useChecker) {
 					lprObject.className = alprChecker.checkPlateByTrackId(
 						cameraId, combinedText, lprObject.trackId);
 				}
@@ -747,6 +930,14 @@ namespace ANSCENTER
 				output.push_back(std::move(lprObject));
 			}
 			// Spatial dedup: if two detections in the same frame ended up
 			// with the same plate text, keep only the one whose spatial
 			// history has the higher accumulated confidence. Skip this in
 			// pipeline mode because there's only ever one plate per call.
 			if (useChecker) {
 				ensureUniquePlateText(output, cameraId);
 			}
 			return output;
 		}
 		catch (const cv::Exception& e) {
--- a/modules/ANSLPR/ANSLPR_OCR.h
+++ b/modules/ANSLPR/ANSLPR_OCR.h
@@ -6,6 +6,7 @@
 #include <map>
 #include <string>
 #include <mutex>
 #include <unordered_map>
 #include <utility>
 #include <vector>
@@ -45,6 +46,66 @@ namespace ANSCENTER
        ALPRChecker     alprChecker;
        // ----------------------------------------------------------------
        //  Full-frame vs pipeline auto-detection (ported from ANSALPR_OD)
        //
        //  When the caller feeds ANSLPR_OCR pre-cropped vehicle ROIs (each
        //  frame is a different small image), the tracker can't work — the
        //  LP detector sees a totally new image every call so trackIds mean
        //  nothing. In that "pipeline" mode we must skip the ALPRChecker
        //  voting layer entirely and return raw OCR results.
        //
        //  When the caller feeds full-frame video (same resolution every
        //  frame, plates moving through the scene), the tracker works
        //  normally and we run plate text through ALPRChecker majority
        //  voting + spatial dedup to stabilise readings.
        //
        //  Mode is auto-detected by watching whether consecutive frames
        //  share the exact same (width, height) for at least
        //  CONFIRM_THRESHOLD frames. Pipeline crops vary by a few pixels;
        //  full-frame video is pixel-identical.
        // ----------------------------------------------------------------
        struct ImageSizeTracker {
            cv::Size lastSize{ 0, 0 };
            int      consistentCount = 0;
            bool     detectedFullFrame = false;
            static constexpr int CONFIRM_THRESHOLD   = 5;
            static constexpr int MIN_FULLFRAME_WIDTH = 1000;
        };
        std::unordered_map<std::string, ImageSizeTracker> _imageSizeTrackers;
        [[nodiscard]] bool shouldUseALPRChecker(const cv::Size& imageSize,
                                                const std::string& cameraId);
        // ----------------------------------------------------------------
        //  Spatial plate identity persistence (ported from ANSALPR_OD)
        //
        //  Prevents the same plate string from appearing on two different
        //  vehicles in the same frame. The LP tracker may briefly assign
        //  the same trackId to two different plates when vehicles pass
        //  each other, or two different trackIds to the same plate when
        //  occlusion breaks a track. In either case, OCR can produce the
        //  same text for two spatial locations for a frame or two — which
        //  looks like "plate flicker" in the UI.
        //
        //  ensureUniquePlateText() resolves the ambiguity by accumulating
        //  confidence per spatial location. When two detections share a
        //  plate text, the one whose spatial history has the higher score
        //  wins and the other has its className cleared.
        // ----------------------------------------------------------------
        struct SpatialPlateIdentity {
            cv::Point2f center;                   // plate center in frame coords
            std::string plateText;
            float       accumulatedScore    = 0.0f;
            int         framesSinceLastSeen = 0;
        };
        std::mutex _plateIdentitiesMutex;
        std::unordered_map<std::string, std::vector<SpatialPlateIdentity>> _plateIdentities;
        static constexpr float PLATE_SPATIAL_MATCH_THRESHOLD = 0.3f; // IoU threshold
        void ensureUniquePlateText(std::vector<Object>& results,
                                   const std::string& cameraId);
        // --- Original model zip path (reused for ANSONNXOCR initialization) ---
        std::string     _modelZipFilePath;
--- a/modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp
+++ b/modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp
@@ -8,60 +8,62 @@
 namespace ANSCENTER {
 namespace onnxocr {
-bool PaddleOCRV5Engine::Initialize(const std::string& detModelPath,
+// ============================================================================
-                                    const std::string& clsModelPath,
+//  Per-backend OCR option builders
-                                    const std::string& recModelPath,
+//
-                                    const std::string& dictPath,
+//  Each backend (NVIDIA / AMD / Intel / CPU) has its own helper that returns
-                                    bool preferTensorRT) {
+//  a fully-populated set of OrtHandlerOptions for the detector, classifier,
-    std::lock_guard<std::recursive_mutex> lock(_mutex);
+//  and recognizer sub-models. PaddleOCRV5Engine::Initialize dispatches to the
-    ModelLoadingGuard mlg(_modelLoading);
+//  correct helper based on the engine type that EPLoader resolved at startup.
 //
 //  Adding a new backend optimization is a strictly contained change: touch
 //  only that backend's builder. The others — especially NVIDIA, which is
 //  hand-tuned and should not regress — stay untouched.
 // ============================================================================
-    // High-perf options.  The OCR sub-models split into two groups:
+namespace {
-    //
+
-    //   1. Detector — its input shape varies continuously with every
+struct PerModelOcrOptions {
    //      plate-ROI aspect ratio.  TRT EP is a poor fit because it
    //      builds a fresh engine for each unique shape (minutes each).
    //      We keep it on CUDA EP with the largest cuDNN workspace and
    //      let cuDNN HEURISTIC handle the per-shape algo selection.
    //
    //   2. Classifier + Recognizer — fixed-bucket shapes (cls is
    //      [1,3,80,160], rec is [1,3,48,{320,480,640,960}]).  These
    //      benefit massively from TRT EP because the engine is built
    //      once per shape and reused forever.
    OrtHandlerOptions detectorOpts;
    // Detector uses CUDA EP with *conservative* cuDNN workspace.
    // Empirical: on VRAM-constrained GPUs (LPD TRT engine + rec TRT
    // engine + ORT arena in play) the max-workspace mode causes cuDNN
    // to pick Winograd/implicit-precomp-GEMM variants that silently
    // fall back to slow NO-WORKSPACE algorithms when the big workspace
    // can't be allocated. With "0" cuDNN picks algorithms that are
    // known to fit and runs ~10x faster in practice.
    detectorOpts.useMaxCudnnWorkspace = false;
    detectorOpts.preferTensorRT       = false;   // never TRT for the detector
    // Classifier (fixed [1,3,80,160]): TRT with no profile is fine.
    OrtHandlerOptions classifierOpts;
    classifierOpts.useMaxCudnnWorkspace = true;
    classifierOpts.preferTensorRT       = preferTensorRT;
    classifierOpts.trtFP16              = true;
    // Recognizer: needs a DYNAMIC profile so one TRT engine covers every
    // (batch, bucket_width) pair we generate at runtime. Without this,
    // each new shape triggers a ~80s engine rebuild mid-stream when a
    // new plate appears or the plate count changes.
    //
    // Profile range:
    //   batch  : 1 .. 16       (16 plates worth of crops is generous)
    //   H      : 48 (fixed)
    //   W      : 320 .. 960    (covers all 4 recognizer buckets)
    //
    // Query the actual input name from the .onnx file instead of
    // hardcoding — PaddleOCR usually exports it as "x" but the name can
    // vary across model versions.
    OrtHandlerOptions recognizerOpts;
-    recognizerOpts.useMaxCudnnWorkspace = true;
+};
-    recognizerOpts.preferTensorRT       = preferTensorRT;
+
-    recognizerOpts.trtFP16              = true;
+// ----------------------------------------------------------------------------
 //  NVIDIA — LOCKED. Do NOT modify this helper unless fixing a specific
 //  NVIDIA-observable regression.
 //
 //  The OCR sub-models split into two groups:
 //    1. Detector — variable input shape per plate-ROI aspect. TRT EP is a
 //       poor fit (one engine build per unique shape, minutes each). Runs on
 //       CUDA EP with *conservative* cuDNN workspace: empirical measurements
 //       showed that max-workspace mode forces cuDNN to pick Winograd/
 //       implicit-precomp-GEMM variants that silently fall back to slow
 //       NO-WORKSPACE algorithms when the big workspace can't be allocated
 //       under VRAM pressure (LPD TRT engine + rec TRT engine + ORT arena).
 //    2. Classifier + Recognizer — TRT EP. Classifier has fixed shape so no
 //       profile is needed. Recognizer gets a dynamic profile
 //       [batch=1..16, W=320..960] so a single pre-built engine handles every
 //       runtime shape without mid-stream rebuilds (fixes 60–90 s hangs).
 // ----------------------------------------------------------------------------
 static PerModelOcrOptions BuildNvidiaOcrOptions(
        const std::string& recModelPath,
        bool preferTensorRT) {
    PerModelOcrOptions opts;
    // Detector: CUDA EP, conservative workspace, never TRT.
    opts.detectorOpts.useMaxCudnnWorkspace = false;
    opts.detectorOpts.preferTensorRT       = false;
    // Classifier: TRT EP, no profile (fixed [1,3,80,160]).
    opts.classifierOpts.useMaxCudnnWorkspace = true;
    opts.classifierOpts.preferTensorRT       = preferTensorRT;
    opts.classifierOpts.trtFP16              = true;
    // Recognizer: TRT EP with dynamic shape profile.
    opts.recognizerOpts.useMaxCudnnWorkspace = true;
    opts.recognizerOpts.preferTensorRT       = preferTensorRT;
    opts.recognizerOpts.trtFP16              = true;
    if (preferTensorRT) {
        std::string recInputName = BasicOrtHandler::QueryModelInputName(recModelPath);
        if (recInputName.empty()) {
@@ -72,10 +74,80 @@ bool PaddleOCRV5Engine::Initialize(const std::string& detModelPath,
        std::cout << "[PaddleOCRV5Engine] Recognizer input name: '"
                  << recInputName << "' — building TRT dynamic profile "
                  << "[batch=1..16, W=320..960]" << std::endl;
-        recognizerOpts.trtProfileMinShapes = recInputName + ":1x3x48x320";
+        opts.recognizerOpts.trtProfileMinShapes = recInputName + ":1x3x48x320";
-        recognizerOpts.trtProfileOptShapes = recInputName + ":4x3x48x480";
+        opts.recognizerOpts.trtProfileOptShapes = recInputName + ":4x3x48x480";
-        recognizerOpts.trtProfileMaxShapes = recInputName + ":16x3x48x960";
+        opts.recognizerOpts.trtProfileMaxShapes = recInputName + ":16x3x48x960";
    }
    return opts;
 }
 // ----------------------------------------------------------------------------
 //  Intel (OpenVINO EP) — placeholder.
 //
 //  Returns default-constructed options: no backend-specific tuning applied
 //  yet. When adding Intel optimizations (OpenVINO cache_dir, explicit device
 //  selection, INT8 paths, etc.), add the corresponding fields to the Intel
 //  section of OrtHandlerOptions and populate them here.
 // ----------------------------------------------------------------------------
 static PerModelOcrOptions BuildIntelOcrOptions() {
    return PerModelOcrOptions{};  // defaults everywhere
 }
 // ----------------------------------------------------------------------------
 //  AMD (DirectML EP / MIGraphX EP) — placeholder.
 //
 //  Returns default-constructed options: no backend-specific tuning applied
 //  yet. When adding AMD optimizations (graph opt gate for RDNA3+ desktop
 //  cards, MIGraphX cache on Linux, etc.), add the corresponding fields to
 //  the AMD section of OrtHandlerOptions and populate them here.
 // ----------------------------------------------------------------------------
 static PerModelOcrOptions BuildAmdOcrOptions() {
    return PerModelOcrOptions{};  // defaults everywhere
 }
 // ----------------------------------------------------------------------------
 //  CPU / unknown hardware — no tuning.
 // ----------------------------------------------------------------------------
 static PerModelOcrOptions BuildDefaultOcrOptions() {
    return PerModelOcrOptions{};  // defaults everywhere
 }
 // Dispatch entry point used by Initialize().
 static PerModelOcrOptions BuildOcrOptionsForBackend(
        const std::string& recModelPath,
        bool preferTensorRT) {
    const EngineType backend = EPLoader::Current().type;
    switch (backend) {
        case EngineType::NVIDIA_GPU:
            return BuildNvidiaOcrOptions(recModelPath, preferTensorRT);
        case EngineType::AMD_GPU:
            return BuildAmdOcrOptions();
        case EngineType::OPENVINO_GPU:
            return BuildIntelOcrOptions();
        case EngineType::CPU:
        default:
            return BuildDefaultOcrOptions();
    }
 }
 } // namespace (anonymous)
 bool PaddleOCRV5Engine::Initialize(const std::string& detModelPath,
                                    const std::string& clsModelPath,
                                    const std::string& recModelPath,
                                    const std::string& dictPath,
                                    bool preferTensorRT) {
    std::lock_guard<std::recursive_mutex> lock(_mutex);
    ModelLoadingGuard mlg(_modelLoading);
    // Dispatch to the correct per-backend option builder. The NVIDIA path
    // is fully locked-in; AMD/Intel/CPU paths currently return defaults
    // and are the place to add future backend-specific tuning.
    const PerModelOcrOptions opts =
        BuildOcrOptionsForBackend(recModelPath, preferTensorRT);
    const OrtHandlerOptions& detectorOpts   = opts.detectorOpts;
    const OrtHandlerOptions& classifierOpts = opts.classifierOpts;
    const OrtHandlerOptions& recognizerOpts = opts.recognizerOpts;
    try {
        // Initialize detector (also triggers EPLoader init in BasicOrtHandler)