Support tracker to improve ALPR_OCR

2026-04-14 21:18:10 +10:00
parent f9a0af8949
commit 5706615ed5
4 changed files with 435 additions and 62 deletions
--- a/engines/ONNXEngine/ONNXEngine.h
+++ b/engines/ONNXEngine/ONNXEngine.h
@@ -253,23 +253,50 @@ namespace ANSCENTER {
    // cuDNN workspace.  Default-constructed = identical to the legacy
    // behavior (CUDA EP only, minimal cuDNN workspace).
    // ====================================================================
+    // ====================================================================
+    //  OrtHandlerOptions
+    //
+    //  Per-session knobs for the ORT execution providers. Options are
+    //  grouped by target backend. A field set for one backend is silently
+    //  ignored by every other backend — e.g. `trtProfileMinShapes` only
+    //  affects TensorRT EP (NVIDIA); DirectML and OpenVINO don't read it.
+    //
+    //  When adding a new backend optimization:
+    //    - put the new field in the correct backend section below
+    //    - NEVER reuse an NVIDIA field for AMD/Intel tuning
+    //    - update the matching Build*OcrOptions() helper in
+    //      PaddleOCRV5Engine.cpp to populate it
+    //
+    //  The NVIDIA section is considered locked — it's been tuned end-to-end
+    //  for the ANSALPR pipeline and should not change unless fixing a
+    //  specific NVIDIA-observable regression.
+    // ====================================================================
    struct OrtHandlerOptions {
-        // Try to attach TensorRT EP before CUDA EP (NVIDIA only).
-        // Falls back to CUDA EP automatically if TRT EP creation or session
-        // creation fails.  Engines are cached on disk for fast reload.
+        // ----------------------------------------------------------------
+        //  NVIDIA (CUDA EP + TensorRT EP) — LOCKED
+        //
+        //  These fields only have effect when the resolved execution
+        //  provider is CUDA EP or TensorRT EP. DirectML (AMD), OpenVINO
+        //  (Intel), and CPU EP silently ignore every field below. Do not
+        //  repurpose them for other backends.
+        // ----------------------------------------------------------------
+
+        // Try to attach TensorRT EP before CUDA EP. Falls back to CUDA EP
+        // automatically if TRT EP creation or session creation fails.
+        // Engines are cached on disk for fast reload.
        bool preferTensorRT = false;

-        // Use the largest cuDNN conv workspace.  cuDNN can then pick fast
+        // Use the largest cuDNN conv workspace. cuDNN can then pick fast
        // algorithms (Winograd, implicit-precomp-GEMM with big workspaces).
        // Defaults off because some deployments share VRAM with TRT engines
        // and need the minimal-workspace mode to avoid OOM.
        bool useMaxCudnnWorkspace = false;

-        // Where to cache built TRT engines.  Empty → default
-        // %TEMP%/ANSCENTER/TRTEngineCache.  Only used when preferTensorRT.
+        // Where to cache built TRT engines. Empty → default
+        // %TEMP%/ANSCENTER/TRTEngineCache. Only used when preferTensorRT.
        std::string trtEngineCacheDir;

-        // FP16 builds for TRT EP.  Recommended for inference; ignored if
+        // FP16 builds for TRT EP. Recommended for inference; ignored if
        // preferTensorRT is false.
        bool trtFP16 = true;

@@ -286,6 +313,28 @@ namespace ANSCENTER {
        std::string trtProfileMinShapes;
        std::string trtProfileOptShapes;
        std::string trtProfileMaxShapes;
+
+        // ----------------------------------------------------------------
+        //  Intel (OpenVINO EP) — OPEN FOR OPTIMIZATION
+        //
+        //  Currently unused. Future Intel-specific tuning (cache_dir for
+        //  kernel cache, explicit device selection, INT8 routing, etc.)
+        //  should add fields here and wire them through the OpenVINO
+        //  branch of initialize_handler(). Do NOT put Intel logic inside
+        //  TryAppendCUDA or TryAppendTensorRT.
+        // ----------------------------------------------------------------
+        // (Intel fields go here — none yet)
+
+        // ----------------------------------------------------------------
+        //  AMD (DirectML EP / MIGraphX EP) — OPEN FOR OPTIMIZATION
+        //
+        //  Currently unused. Future AMD-specific tuning (graph optimization
+        //  gate for RDNA3+, MIGraphX cache dir on Linux, etc.) should add
+        //  fields here and wire them through the DirectML branch of
+        //  initialize_handler(). Do NOT put AMD logic inside TryAppendCUDA
+        //  or TryAppendTensorRT.
+        // ----------------------------------------------------------------
+        // (AMD fields go here — none yet)
    };

    // ====================================================================