diff --git a/engines/ONNXEngine/ONNXEngine.h b/engines/ONNXEngine/ONNXEngine.h
index 44f7418..6e6cb27 100644
--- a/engines/ONNXEngine/ONNXEngine.h
+++ b/engines/ONNXEngine/ONNXEngine.h
@@ -253,23 +253,50 @@ namespace ANSCENTER {
     // cuDNN workspace.  Default-constructed = identical to the legacy
     // behavior (CUDA EP only, minimal cuDNN workspace).
     // ====================================================================
+    // ====================================================================
+    //  OrtHandlerOptions
+    //
+    //  Per-session knobs for the ORT execution providers. Options are
+    //  grouped by target backend. A field set for one backend is silently
+    //  ignored by every other backend — e.g. `trtProfileMinShapes` only
+    //  affects TensorRT EP (NVIDIA); DirectML and OpenVINO don't read it.
+    //
+    //  When adding a new backend optimization:
+    //    - put the new field in the correct backend section below
+    //    - NEVER reuse an NVIDIA field for AMD/Intel tuning
+    //    - update the matching Build*OcrOptions() helper in
+    //      PaddleOCRV5Engine.cpp to populate it
+    //
+    //  The NVIDIA section is considered locked — it's been tuned end-to-end
+    //  for the ANSALPR pipeline and should not change unless fixing a
+    //  specific NVIDIA-observable regression.
+    // ====================================================================
     struct OrtHandlerOptions {
-        // Try to attach TensorRT EP before CUDA EP (NVIDIA only).
-        // Falls back to CUDA EP automatically if TRT EP creation or session
-        // creation fails.  Engines are cached on disk for fast reload.
+        // ----------------------------------------------------------------
+        //  NVIDIA (CUDA EP + TensorRT EP) — LOCKED
+        //
+        //  These fields only have effect when the resolved execution
+        //  provider is CUDA EP or TensorRT EP. DirectML (AMD), OpenVINO
+        //  (Intel), and CPU EP silently ignore every field below. Do not
+        //  repurpose them for other backends.
+        // ----------------------------------------------------------------
+
+        // Try to attach TensorRT EP before CUDA EP. Falls back to CUDA EP
+        // automatically if TRT EP creation or session creation fails.
+        // Engines are cached on disk for fast reload.
         bool preferTensorRT = false;
 
-        // Use the largest cuDNN conv workspace.  cuDNN can then pick fast
+        // Use the largest cuDNN conv workspace. cuDNN can then pick fast
         // algorithms (Winograd, implicit-precomp-GEMM with big workspaces).
         // Defaults off because some deployments share VRAM with TRT engines
         // and need the minimal-workspace mode to avoid OOM.
         bool useMaxCudnnWorkspace = false;
 
-        // Where to cache built TRT engines.  Empty → default
-        // %TEMP%/ANSCENTER/TRTEngineCache.  Only used when preferTensorRT.
+        // Where to cache built TRT engines. Empty → default
+        // %TEMP%/ANSCENTER/TRTEngineCache. Only used when preferTensorRT.
         std::string trtEngineCacheDir;
 
-        // FP16 builds for TRT EP.  Recommended for inference; ignored if
+        // FP16 builds for TRT EP. Recommended for inference; ignored if
         // preferTensorRT is false.
         bool trtFP16 = true;
 
@@ -286,6 +313,28 @@ namespace ANSCENTER {
         std::string trtProfileMinShapes;
         std::string trtProfileOptShapes;
         std::string trtProfileMaxShapes;
+
+        // ----------------------------------------------------------------
+        //  Intel (OpenVINO EP) — OPEN FOR OPTIMIZATION
+        //
+        //  Currently unused. Future Intel-specific tuning (cache_dir for
+        //  kernel cache, explicit device selection, INT8 routing, etc.)
+        //  should add fields here and wire them through the OpenVINO
+        //  branch of initialize_handler(). Do NOT put Intel logic inside
+        //  TryAppendCUDA or TryAppendTensorRT.
+        // ----------------------------------------------------------------
+        // (Intel fields go here — none yet)
+
+        // ----------------------------------------------------------------
+        //  AMD (DirectML EP / MIGraphX EP) — OPEN FOR OPTIMIZATION
+        //
+        //  Currently unused. Future AMD-specific tuning (graph optimization
+        //  gate for RDNA3+, MIGraphX cache dir on Linux, etc.) should add
+        //  fields here and wire them through the DirectML branch of
+        //  initialize_handler(). Do NOT put AMD logic inside TryAppendCUDA
+        //  or TryAppendTensorRT.
+        // ----------------------------------------------------------------
+        // (AMD fields go here — none yet)
     };
 
     // ====================================================================
diff --git a/modules/ANSLPR/ANSLPR_OCR.cpp b/modules/ANSLPR/ANSLPR_OCR.cpp
index a2f8383..2a73222 100644
--- a/modules/ANSLPR/ANSLPR_OCR.cpp
+++ b/modules/ANSLPR/ANSLPR_OCR.cpp
@@ -547,6 +547,181 @@ namespace ANSCENTER
 		return colour;
 	}
 
+	// ── Full-frame vs pipeline auto-detection ────────────────────────────
+	// Mirror of ANSALPR_OD::shouldUseALPRChecker. The auto-detection logic
+	// watches whether consecutive frames from a given camera have the exact
+	// same (width, height). Pre-cropped pipeline inputs vary by a few
+	// pixels per crop, so the exact-match check fails and we return false.
+	// Real video frames are pixel-identical across frames, so after a few
+	// consistent frames we flip into FULL-FRAME mode and start running the
+	// ALPRChecker voting + ensureUniquePlateText dedup.
+	bool ANSALPR_OCR::shouldUseALPRChecker(const cv::Size& imageSize,
+	                                        const std::string& cameraId) {
+		// Force disabled via SetALPRCheckerEnabled(false) → never use.
+		if (!_enableALPRChecker) return false;
+
+		// Small images are always pipeline crops — skip auto-detection.
+		if (imageSize.width < ImageSizeTracker::MIN_FULLFRAME_WIDTH) return false;
+
+		auto& tracker = _imageSizeTrackers[cameraId];
+		bool wasFullFrame = tracker.detectedFullFrame;
+		if (imageSize == tracker.lastSize) {
+			tracker.consistentCount++;
+			if (tracker.consistentCount >= ImageSizeTracker::CONFIRM_THRESHOLD) {
+				tracker.detectedFullFrame = true;
+			}
+		} else {
+			tracker.lastSize          = imageSize;
+			tracker.consistentCount   = 1;
+			tracker.detectedFullFrame = false;
+		}
+		if (tracker.detectedFullFrame != wasFullFrame) {
+			ANS_DBG("ALPR_OCR_Checker",
+			        "cam=%s mode auto-detected: %s (img=%dx%d consistent=%d)",
+			        cameraId.c_str(),
+			        tracker.detectedFullFrame ? "FULL-FRAME (tracker ON)" : "PIPELINE (tracker OFF)",
+			        imageSize.width, imageSize.height, tracker.consistentCount);
+		}
+		return tracker.detectedFullFrame;
+	}
+
+	// ── Spatial plate dedup with accumulated scoring ─────────────────────
+	// Mirror of ANSALPR_OD::ensureUniquePlateText. When more than one
+	// detection in the same frame ends up with the same plate text (e.g.
+	// tracker occlusion or two cars in a single frame reading the same
+	// string), we resolve the ambiguity by accumulating confidence per
+	// spatial location across frames. The location with the higher running
+	// score keeps the plate text; the loser has its className cleared and
+	// is dropped from the output.
+	void ANSALPR_OCR::ensureUniquePlateText(std::vector<Object>& results,
+	                                        const std::string& cameraId) {
+		std::lock_guard<std::mutex> plateLock(_plateIdentitiesMutex);
+		auto& identities = _plateIdentities[cameraId];
+
+		// Auto-detect mode by detection count.
+		//   1 detection  → pipeline/single-crop mode → no dedup needed.
+		//   2+ detections → full-frame mode → apply accumulated scoring.
+		if (results.size() <= 1) {
+			// Still age out stale spatial identities from previous full-frame calls
+			if (!identities.empty()) {
+				constexpr int MAX_UNSEEN_FRAMES = 30;
+				for (auto& id : identities) id.framesSinceLastSeen++;
+				for (auto it = identities.begin(); it != identities.end(); ) {
+					if (it->framesSinceLastSeen > MAX_UNSEEN_FRAMES) {
+						it = identities.erase(it);
+					} else {
+						++it;
+					}
+				}
+			}
+			return;
+		}
+
+		// Helper: IoU between two rects.
+		auto computeIoU = [](const cv::Rect& a, const cv::Rect& b) -> float {
+			int x1 = std::max(a.x, b.x);
+			int y1 = std::max(a.y, b.y);
+			int x2 = std::min(a.x + a.width,  b.x + b.width);
+			int y2 = std::min(a.y + a.height, b.y + b.height);
+			if (x2 <= x1 || y2 <= y1) return 0.0f;
+			float intersection = static_cast<float>((x2 - x1) * (y2 - y1));
+			float unionArea = static_cast<float>(a.area() + b.area()) - intersection;
+			return (unionArea > 0.0f) ? intersection / unionArea : 0.0f;
+		};
+
+		// Helper: find matching spatial identity by bounding-box overlap.
+		auto findSpatialMatch = [&](const cv::Rect& box,
+		                            const std::string& plateText) -> SpatialPlateIdentity* {
+			for (auto& id : identities) {
+				if (id.plateText == plateText) {
+					cv::Rect storedRect(
+						static_cast<int>(id.center.x - box.width  * 0.5f),
+						static_cast<int>(id.center.y - box.height * 0.5f),
+						box.width, box.height);
+					if (computeIoU(box, storedRect) > PLATE_SPATIAL_MATCH_THRESHOLD) {
+						return &id;
+					}
+				}
+			}
+			return nullptr;
+		};
+
+		// Step 1: Build map of plateText → candidate indices
+		std::unordered_map<std::string, std::vector<size_t>> plateCandidates;
+		for (size_t i = 0; i < results.size(); ++i) {
+			if (results[i].className.empty()) continue;
+			plateCandidates[results[i].className].push_back(i);
+		}
+
+		// Step 2: Resolve duplicates using spatial accumulated scores
+		for (auto& [plateText, indices] : plateCandidates) {
+			if (indices.size() <= 1) continue;
+
+			size_t winner = indices[0];
+			float  bestScore = 0.0f;
+
+			for (size_t idx : indices) {
+				float score = results[idx].confidence;
+				auto* match = findSpatialMatch(results[idx].box, plateText);
+				if (match) {
+					score = match->accumulatedScore + results[idx].confidence;
+				}
+				if (score > bestScore) {
+					bestScore = score;
+					winner    = idx;
+				}
+			}
+
+			for (size_t idx : indices) {
+				if (idx != winner) {
+					results[idx].className.clear();
+				}
+			}
+		}
+
+		// Step 3: Update spatial identities — winners accumulate, losers decay
+		constexpr float DECAY_FACTOR     = 0.8f;
+		constexpr float MIN_SCORE        = 0.1f;
+		constexpr int   MAX_UNSEEN_FRAMES = 30;
+
+		for (auto& id : identities) id.framesSinceLastSeen++;
+
+		for (auto& r : results) {
+			if (r.className.empty()) continue;
+
+			cv::Point2f center(
+				r.box.x + r.box.width  * 0.5f,
+				r.box.y + r.box.height * 0.5f);
+
+			auto* match = findSpatialMatch(r.box, r.className);
+			if (match) {
+				match->accumulatedScore += r.confidence;
+				match->center            = center;
+				match->framesSinceLastSeen = 0;
+			} else {
+				identities.push_back({ center, r.className, r.confidence, 0 });
+			}
+		}
+
+		// Decay unseen identities and remove stale ones
+		for (auto it = identities.begin(); it != identities.end(); ) {
+			if (it->framesSinceLastSeen > 0) {
+				it->accumulatedScore *= DECAY_FACTOR;
+			}
+			if (it->accumulatedScore < MIN_SCORE || it->framesSinceLastSeen > MAX_UNSEEN_FRAMES) {
+				it = identities.erase(it);
+			} else {
+				++it;
+			}
+		}
+
+		// Step 4: Remove entries with cleared plate text
+		results.erase(
+			std::remove_if(results.begin(), results.end(),
+				[](const Object& o) { return o.className.empty(); }),
+			results.end());
+	}
+
 	// ── OCR on a single plate ROI ────────────────────────────────────────
 	// Returns the plate text via the out-parameter and populates alprExtraInfo
 	// with the structured ALPR JSON (zone parts) when ALPR mode is active.
@@ -712,6 +887,13 @@ namespace ANSCENTER
 			std::vector<Object> output;
 			output.reserve(plateInfos.size());
 
+			// Decide once per frame whether the tracker-based correction
+			// layer should run. We auto-detect full-frame vs pipeline mode
+			// by watching for pixel-identical consecutive frames, exactly
+			// the same way ANSALPR_OD does it.
+			const bool useChecker = shouldUseALPRChecker(
+				cv::Size(frameWidth, frameHeight), cameraId);
+
 			for (const auto& info : plateInfos) {
 				std::string combinedText;
 				for (size_t cropIdx : info.cropIndices) {
@@ -726,8 +908,9 @@ namespace ANSCENTER
 				Object lprObject = lprOutput[info.origIndex];
 				lprObject.cameraId = cameraId;
 
-				// Cross-frame stabilization (unchanged)
-				if (_enableALPRChecker) {
+				// Cross-frame stabilization: per-track majority vote in
+				// full-frame mode, raw OCR text in pipeline mode.
+				if (useChecker) {
 					lprObject.className = alprChecker.checkPlateByTrackId(
 						cameraId, combinedText, lprObject.trackId);
 				}
@@ -747,6 +930,14 @@ namespace ANSCENTER
 				output.push_back(std::move(lprObject));
 			}
 
+			// Spatial dedup: if two detections in the same frame ended up
+			// with the same plate text, keep only the one whose spatial
+			// history has the higher accumulated confidence. Skip this in
+			// pipeline mode because there's only ever one plate per call.
+			if (useChecker) {
+				ensureUniquePlateText(output, cameraId);
+			}
+
 			return output;
 		}
 		catch (const cv::Exception& e) {
diff --git a/modules/ANSLPR/ANSLPR_OCR.h b/modules/ANSLPR/ANSLPR_OCR.h
index fa53597..f1e8f49 100644
--- a/modules/ANSLPR/ANSLPR_OCR.h
+++ b/modules/ANSLPR/ANSLPR_OCR.h
@@ -6,6 +6,7 @@
 #include <map>
 #include <string>
 #include <mutex>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -45,6 +46,66 @@ namespace ANSCENTER
 
         ALPRChecker     alprChecker;
 
+        // ----------------------------------------------------------------
+        //  Full-frame vs pipeline auto-detection (ported from ANSALPR_OD)
+        //
+        //  When the caller feeds ANSLPR_OCR pre-cropped vehicle ROIs (each
+        //  frame is a different small image), the tracker can't work — the
+        //  LP detector sees a totally new image every call so trackIds mean
+        //  nothing. In that "pipeline" mode we must skip the ALPRChecker
+        //  voting layer entirely and return raw OCR results.
+        //
+        //  When the caller feeds full-frame video (same resolution every
+        //  frame, plates moving through the scene), the tracker works
+        //  normally and we run plate text through ALPRChecker majority
+        //  voting + spatial dedup to stabilise readings.
+        //
+        //  Mode is auto-detected by watching whether consecutive frames
+        //  share the exact same (width, height) for at least
+        //  CONFIRM_THRESHOLD frames. Pipeline crops vary by a few pixels;
+        //  full-frame video is pixel-identical.
+        // ----------------------------------------------------------------
+        struct ImageSizeTracker {
+            cv::Size lastSize{ 0, 0 };
+            int      consistentCount = 0;
+            bool     detectedFullFrame = false;
+            static constexpr int CONFIRM_THRESHOLD   = 5;
+            static constexpr int MIN_FULLFRAME_WIDTH = 1000;
+        };
+        std::unordered_map<std::string, ImageSizeTracker> _imageSizeTrackers;
+
+        [[nodiscard]] bool shouldUseALPRChecker(const cv::Size& imageSize,
+                                                const std::string& cameraId);
+
+        // ----------------------------------------------------------------
+        //  Spatial plate identity persistence (ported from ANSALPR_OD)
+        //
+        //  Prevents the same plate string from appearing on two different
+        //  vehicles in the same frame. The LP tracker may briefly assign
+        //  the same trackId to two different plates when vehicles pass
+        //  each other, or two different trackIds to the same plate when
+        //  occlusion breaks a track. In either case, OCR can produce the
+        //  same text for two spatial locations for a frame or two — which
+        //  looks like "plate flicker" in the UI.
+        //
+        //  ensureUniquePlateText() resolves the ambiguity by accumulating
+        //  confidence per spatial location. When two detections share a
+        //  plate text, the one whose spatial history has the higher score
+        //  wins and the other has its className cleared.
+        // ----------------------------------------------------------------
+        struct SpatialPlateIdentity {
+            cv::Point2f center;                   // plate center in frame coords
+            std::string plateText;
+            float       accumulatedScore    = 0.0f;
+            int         framesSinceLastSeen = 0;
+        };
+        std::mutex _plateIdentitiesMutex;
+        std::unordered_map<std::string, std::vector<SpatialPlateIdentity>> _plateIdentities;
+        static constexpr float PLATE_SPATIAL_MATCH_THRESHOLD = 0.3f; // IoU threshold
+
+        void ensureUniquePlateText(std::vector<Object>& results,
+                                   const std::string& cameraId);
+
         // --- Original model zip path (reused for ANSONNXOCR initialization) ---
         std::string     _modelZipFilePath;
 
diff --git a/modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp b/modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp
index 79fb541..71406d4 100644
--- a/modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp
+++ b/modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp
@@ -8,60 +8,62 @@
 namespace ANSCENTER {
 namespace onnxocr {
 
-bool PaddleOCRV5Engine::Initialize(const std::string& detModelPath,
-                                    const std::string& clsModelPath,
-                                    const std::string& recModelPath,
-                                    const std::string& dictPath,
-                                    bool preferTensorRT) {
-    std::lock_guard<std::recursive_mutex> lock(_mutex);
-    ModelLoadingGuard mlg(_modelLoading);
+// ============================================================================
+//  Per-backend OCR option builders
+//
+//  Each backend (NVIDIA / AMD / Intel / CPU) has its own helper that returns
+//  a fully-populated set of OrtHandlerOptions for the detector, classifier,
+//  and recognizer sub-models. PaddleOCRV5Engine::Initialize dispatches to the
+//  correct helper based on the engine type that EPLoader resolved at startup.
+//
+//  Adding a new backend optimization is a strictly contained change: touch
+//  only that backend's builder. The others — especially NVIDIA, which is
+//  hand-tuned and should not regress — stay untouched.
+// ============================================================================
 
-    // High-perf options.  The OCR sub-models split into two groups:
-    //
-    //   1. Detector — its input shape varies continuously with every
-    //      plate-ROI aspect ratio.  TRT EP is a poor fit because it
-    //      builds a fresh engine for each unique shape (minutes each).
-    //      We keep it on CUDA EP with the largest cuDNN workspace and
-    //      let cuDNN HEURISTIC handle the per-shape algo selection.
-    //
-    //   2. Classifier + Recognizer — fixed-bucket shapes (cls is
-    //      [1,3,80,160], rec is [1,3,48,{320,480,640,960}]).  These
-    //      benefit massively from TRT EP because the engine is built
-    //      once per shape and reused forever.
+namespace {
+
+struct PerModelOcrOptions {
     OrtHandlerOptions detectorOpts;
-    // Detector uses CUDA EP with *conservative* cuDNN workspace.
-    // Empirical: on VRAM-constrained GPUs (LPD TRT engine + rec TRT
-    // engine + ORT arena in play) the max-workspace mode causes cuDNN
-    // to pick Winograd/implicit-precomp-GEMM variants that silently
-    // fall back to slow NO-WORKSPACE algorithms when the big workspace
-    // can't be allocated. With "0" cuDNN picks algorithms that are
-    // known to fit and runs ~10x faster in practice.
-    detectorOpts.useMaxCudnnWorkspace = false;
-    detectorOpts.preferTensorRT       = false;   // never TRT for the detector
-
-    // Classifier (fixed [1,3,80,160]): TRT with no profile is fine.
     OrtHandlerOptions classifierOpts;
-    classifierOpts.useMaxCudnnWorkspace = true;
-    classifierOpts.preferTensorRT       = preferTensorRT;
-    classifierOpts.trtFP16              = true;
-
-    // Recognizer: needs a DYNAMIC profile so one TRT engine covers every
-    // (batch, bucket_width) pair we generate at runtime. Without this,
-    // each new shape triggers a ~80s engine rebuild mid-stream when a
-    // new plate appears or the plate count changes.
-    //
-    // Profile range:
-    //   batch  : 1 .. 16       (16 plates worth of crops is generous)
-    //   H      : 48 (fixed)
-    //   W      : 320 .. 960    (covers all 4 recognizer buckets)
-    //
-    // Query the actual input name from the .onnx file instead of
-    // hardcoding — PaddleOCR usually exports it as "x" but the name can
-    // vary across model versions.
     OrtHandlerOptions recognizerOpts;
-    recognizerOpts.useMaxCudnnWorkspace = true;
-    recognizerOpts.preferTensorRT       = preferTensorRT;
-    recognizerOpts.trtFP16              = true;
+};
+
+// ----------------------------------------------------------------------------
+//  NVIDIA — LOCKED. Do NOT modify this helper unless fixing a specific
+//  NVIDIA-observable regression.
+//
+//  The OCR sub-models split into two groups:
+//    1. Detector — variable input shape per plate-ROI aspect. TRT EP is a
+//       poor fit (one engine build per unique shape, minutes each). Runs on
+//       CUDA EP with *conservative* cuDNN workspace: empirical measurements
+//       showed that max-workspace mode forces cuDNN to pick Winograd/
+//       implicit-precomp-GEMM variants that silently fall back to slow
+//       NO-WORKSPACE algorithms when the big workspace can't be allocated
+//       under VRAM pressure (LPD TRT engine + rec TRT engine + ORT arena).
+//    2. Classifier + Recognizer — TRT EP. Classifier has fixed shape so no
+//       profile is needed. Recognizer gets a dynamic profile
+//       [batch=1..16, W=320..960] so a single pre-built engine handles every
+//       runtime shape without mid-stream rebuilds (fixes 60–90 s hangs).
+// ----------------------------------------------------------------------------
+static PerModelOcrOptions BuildNvidiaOcrOptions(
+        const std::string& recModelPath,
+        bool preferTensorRT) {
+    PerModelOcrOptions opts;
+
+    // Detector: CUDA EP, conservative workspace, never TRT.
+    opts.detectorOpts.useMaxCudnnWorkspace = false;
+    opts.detectorOpts.preferTensorRT       = false;
+
+    // Classifier: TRT EP, no profile (fixed [1,3,80,160]).
+    opts.classifierOpts.useMaxCudnnWorkspace = true;
+    opts.classifierOpts.preferTensorRT       = preferTensorRT;
+    opts.classifierOpts.trtFP16              = true;
+
+    // Recognizer: TRT EP with dynamic shape profile.
+    opts.recognizerOpts.useMaxCudnnWorkspace = true;
+    opts.recognizerOpts.preferTensorRT       = preferTensorRT;
+    opts.recognizerOpts.trtFP16              = true;
     if (preferTensorRT) {
         std::string recInputName = BasicOrtHandler::QueryModelInputName(recModelPath);
         if (recInputName.empty()) {
@@ -72,10 +74,80 @@ bool PaddleOCRV5Engine::Initialize(const std::string& detModelPath,
         std::cout << "[PaddleOCRV5Engine] Recognizer input name: '"
                   << recInputName << "' — building TRT dynamic profile "
                   << "[batch=1..16, W=320..960]" << std::endl;
-        recognizerOpts.trtProfileMinShapes = recInputName + ":1x3x48x320";
-        recognizerOpts.trtProfileOptShapes = recInputName + ":4x3x48x480";
-        recognizerOpts.trtProfileMaxShapes = recInputName + ":16x3x48x960";
+        opts.recognizerOpts.trtProfileMinShapes = recInputName + ":1x3x48x320";
+        opts.recognizerOpts.trtProfileOptShapes = recInputName + ":4x3x48x480";
+        opts.recognizerOpts.trtProfileMaxShapes = recInputName + ":16x3x48x960";
     }
+    return opts;
+}
+
+// ----------------------------------------------------------------------------
+//  Intel (OpenVINO EP) — placeholder.
+//
+//  Returns default-constructed options: no backend-specific tuning applied
+//  yet. When adding Intel optimizations (OpenVINO cache_dir, explicit device
+//  selection, INT8 paths, etc.), add the corresponding fields to the Intel
+//  section of OrtHandlerOptions and populate them here.
+// ----------------------------------------------------------------------------
+static PerModelOcrOptions BuildIntelOcrOptions() {
+    return PerModelOcrOptions{};  // defaults everywhere
+}
+
+// ----------------------------------------------------------------------------
+//  AMD (DirectML EP / MIGraphX EP) — placeholder.
+//
+//  Returns default-constructed options: no backend-specific tuning applied
+//  yet. When adding AMD optimizations (graph opt gate for RDNA3+ desktop
+//  cards, MIGraphX cache on Linux, etc.), add the corresponding fields to
+//  the AMD section of OrtHandlerOptions and populate them here.
+// ----------------------------------------------------------------------------
+static PerModelOcrOptions BuildAmdOcrOptions() {
+    return PerModelOcrOptions{};  // defaults everywhere
+}
+
+// ----------------------------------------------------------------------------
+//  CPU / unknown hardware — no tuning.
+// ----------------------------------------------------------------------------
+static PerModelOcrOptions BuildDefaultOcrOptions() {
+    return PerModelOcrOptions{};  // defaults everywhere
+}
+
+// Dispatch entry point used by Initialize().
+static PerModelOcrOptions BuildOcrOptionsForBackend(
+        const std::string& recModelPath,
+        bool preferTensorRT) {
+    const EngineType backend = EPLoader::Current().type;
+    switch (backend) {
+        case EngineType::NVIDIA_GPU:
+            return BuildNvidiaOcrOptions(recModelPath, preferTensorRT);
+        case EngineType::AMD_GPU:
+            return BuildAmdOcrOptions();
+        case EngineType::OPENVINO_GPU:
+            return BuildIntelOcrOptions();
+        case EngineType::CPU:
+        default:
+            return BuildDefaultOcrOptions();
+    }
+}
+
+} // namespace (anonymous)
+
+bool PaddleOCRV5Engine::Initialize(const std::string& detModelPath,
+                                    const std::string& clsModelPath,
+                                    const std::string& recModelPath,
+                                    const std::string& dictPath,
+                                    bool preferTensorRT) {
+    std::lock_guard<std::recursive_mutex> lock(_mutex);
+    ModelLoadingGuard mlg(_modelLoading);
+
+    // Dispatch to the correct per-backend option builder. The NVIDIA path
+    // is fully locked-in; AMD/Intel/CPU paths currently return defaults
+    // and are the place to add future backend-specific tuning.
+    const PerModelOcrOptions opts =
+        BuildOcrOptionsForBackend(recModelPath, preferTensorRT);
+    const OrtHandlerOptions& detectorOpts   = opts.detectorOpts;
+    const OrtHandlerOptions& classifierOpts = opts.classifierOpts;
+    const OrtHandlerOptions& recognizerOpts = opts.recognizerOpts;
 
     try {
         // Initialize detector (also triggers EPLoader init in BasicOrtHandler)