Add CPU/GPU gate and support new ANSALPR using OCR

2026-04-12 17:16:16 +10:00
parent 27083a6530
commit 0a8aaed215
30 changed files with 1870 additions and 2166 deletions
--- a/modules/ANSCV/ANSCVVendorGate.h
+++ b/modules/ANSCV/ANSCVVendorGate.h
@@ -0,0 +1,59 @@
+#pragma once
+// ANSCVVendorGate.h — Cached NVIDIA hardware check for ANSCV.dll.
+//
+// ANSCV.dll links against CUDA::cudart_static + CUDA::cublasLt + CUDA::nvjpeg
+// because it hosts NVDEC hardware decode, NV12 GPU frame pool, and the RTSP /
+// SRT / RTMP / MJPEG / FLV players that feed NV12 frames into the downstream
+// inference DLLs (ANSLPR, ANSOCR, ANSFR).
+//
+// Several code paths in ANSCV call into the CUDA runtime unconditionally:
+//   • Post-NVDEC memory pool cleanup in Destroy/Reconnect
+//   • cudaGetDeviceCount() probes inside AutoConfigureHWDecoders
+//   • nvJPEG encoder helpers
+//
+// On NVIDIA hardware these are fine.  On AMD / Intel / pure-CPU machines:
+//   • cudart_static is linked, but calling it wakes up CUDA driver state
+//     that was never needed — wastes address space and (when combined with
+//     DirectML decode on AMD) has been observed to destabilise amdkmdag.
+//   • The post-NVDEC cleanup runs even though no NVDEC decoder was ever
+//     created, which is pure waste on AMD/Intel.
+//
+// Solution: gate every CUDA runtime call behind this cached predicate, which
+// evaluates CheckHardwareInformation() exactly once per process.  If the
+// detected engine is not NVIDIA_GPU, all CUDA/NVDEC cleanup paths become
+// no-ops — decoders fall back to DXVA/D3D11VA/CPU automatically via the
+// existing AutoConfigureHWDecoders_Platform() fallback.
+//
+// Mirrors the ANSLPR_OD / ANSOCR / ANSFR vendor gates that were added to
+// ANSALPR_OD::LoadEngine, CreateANSOCRHandleEx, and CreateANSRFHandle.
+
+#include "ANSLicense.h"
+#include <atomic>
+
+namespace anscv_vendor_gate {
+
+// Lazily evaluates ANSLicenseHelper::CheckHardwareInformation() once and
+// caches the result.  Thread-safe: the first call on any thread performs
+// the detection, all subsequent calls return the cached bool.  Using an
+// atomic bool + init-flag avoids pulling in std::call_once and its
+// exception-safety overhead (the helper is on the hot decoder path).
+[[nodiscard]] inline bool IsNvidiaGpuAvailable() noexcept {
+    static std::atomic<int> s_state{0};   // 0 = unknown, 1 = NVIDIA, 2 = non-NVIDIA
+    int cached = s_state.load(std::memory_order_acquire);
+    if (cached != 0) return cached == 1;
+    try {
+        const ANSCENTER::EngineType detected =
+            ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();
+        const bool isNvidia = (detected == ANSCENTER::EngineType::NVIDIA_GPU);
+        // Last-writer-wins is fine — CheckHardwareInformation is deterministic.
+        s_state.store(isNvidia ? 1 : 2, std::memory_order_release);
+        return isNvidia;
+    } catch (...) {
+        // If detection throws (should not happen), fail safe to non-NVIDIA so
+        // we never activate CUDA runtime on unknown hardware.
+        s_state.store(2, std::memory_order_release);
+        return false;
+    }
+}
+
+} // namespace anscv_vendor_gate