modules/ANSCV/ANSCVVendorGate.h

#pragma once
// ANSCVVendorGate.h — Cached NVIDIA hardware check for ANSCV.dll.
//
// ANSCV.dll links against CUDA::cudart_static + CUDA::cublasLt + CUDA::nvjpeg
// because it hosts NVDEC hardware decode, NV12 GPU frame pool, and the RTSP /
// SRT / RTMP / MJPEG / FLV players that feed NV12 frames into the downstream
// inference DLLs (ANSLPR, ANSOCR, ANSFR).
//
// Several code paths in ANSCV call into the CUDA runtime unconditionally:
//   • Post-NVDEC memory pool cleanup in Destroy/Reconnect
//   • cudaGetDeviceCount() probes inside AutoConfigureHWDecoders
//   • nvJPEG encoder helpers
//
// On NVIDIA hardware these are fine.  On AMD / Intel / pure-CPU machines:
//   • cudart_static is linked, but calling it wakes up CUDA driver state
//     that was never needed — wastes address space and (when combined with
//     DirectML decode on AMD) has been observed to destabilise amdkmdag.
//   • The post-NVDEC cleanup runs even though no NVDEC decoder was ever
//     created, which is pure waste on AMD/Intel.
//
// Solution: gate every CUDA runtime call behind this cached predicate, which
// evaluates CheckHardwareInformation() exactly once per process.  If the
// detected engine is not NVIDIA_GPU, all CUDA/NVDEC cleanup paths become
// no-ops — decoders fall back to DXVA/D3D11VA/CPU automatically via the
// existing AutoConfigureHWDecoders_Platform() fallback.
//
// Mirrors the ANSLPR_OD / ANSOCR / ANSFR vendor gates that were added to
// ANSALPR_OD::LoadEngine, CreateANSOCRHandleEx, and CreateANSRFHandle.

#include "ANSLicense.h"
#include <atomic>

namespace anscv_vendor_gate {

// Lazily evaluates ANSLicenseHelper::CheckHardwareInformation() once and
// caches the result.  Thread-safe: the first call on any thread performs
// the detection, all subsequent calls return the cached bool.  Using an
// atomic bool + init-flag avoids pulling in std::call_once and its
// exception-safety overhead (the helper is on the hot decoder path).
[[nodiscard]] inline bool IsNvidiaGpuAvailable() noexcept {
    static std::atomic<int> s_state{0};   // 0 = unknown, 1 = NVIDIA, 2 = non-NVIDIA
    int cached = s_state.load(std::memory_order_acquire);
    if (cached != 0) return cached == 1;
    try {
        const ANSCENTER::EngineType detected =
            ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();
        const bool isNvidia = (detected == ANSCENTER::EngineType::NVIDIA_GPU);
        // Last-writer-wins is fine — CheckHardwareInformation is deterministic.
        s_state.store(isNvidia ? 1 : 2, std::memory_order_release);
        return isNvidia;
    } catch (...) {
        // If detection throws (should not happen), fail safe to non-NVIDIA so
        // we never activate CUDA runtime on unknown hardware.
        s_state.store(2, std::memory_order_release);
        return false;
    }
}

} // namespace anscv_vendor_gate
Add CPU/GPU gate and support new ANSALPR using OCR 2026-04-12 17:16:16 +10:00			`#pragma once`
			`// ANSCVVendorGate.h — Cached NVIDIA hardware check for ANSCV.dll.`
			`//`
			`// ANSCV.dll links against CUDA::cudart_static + CUDA::cublasLt + CUDA::nvjpeg`
			`// because it hosts NVDEC hardware decode, NV12 GPU frame pool, and the RTSP /`
			`// SRT / RTMP / MJPEG / FLV players that feed NV12 frames into the downstream`
			`// inference DLLs (ANSLPR, ANSOCR, ANSFR).`
			`//`
			`// Several code paths in ANSCV call into the CUDA runtime unconditionally:`
			`// • Post-NVDEC memory pool cleanup in Destroy/Reconnect`
			`// • cudaGetDeviceCount() probes inside AutoConfigureHWDecoders`
			`// • nvJPEG encoder helpers`
			`//`
			`// On NVIDIA hardware these are fine. On AMD / Intel / pure-CPU machines:`
			`// • cudart_static is linked, but calling it wakes up CUDA driver state`
			`// that was never needed — wastes address space and (when combined with`
			`// DirectML decode on AMD) has been observed to destabilise amdkmdag.`
			`// • The post-NVDEC cleanup runs even though no NVDEC decoder was ever`
			`// created, which is pure waste on AMD/Intel.`
			`//`
			`// Solution: gate every CUDA runtime call behind this cached predicate, which`
			`// evaluates CheckHardwareInformation() exactly once per process. If the`
			`// detected engine is not NVIDIA_GPU, all CUDA/NVDEC cleanup paths become`
			`// no-ops — decoders fall back to DXVA/D3D11VA/CPU automatically via the`
			`// existing AutoConfigureHWDecoders_Platform() fallback.`
			`//`
			`// Mirrors the ANSLPR_OD / ANSOCR / ANSFR vendor gates that were added to`
			`// ANSALPR_OD::LoadEngine, CreateANSOCRHandleEx, and CreateANSRFHandle.`

			`#include "ANSLicense.h"`
			`#include <atomic>`

			`namespace anscv_vendor_gate {`

			`// Lazily evaluates ANSLicenseHelper::CheckHardwareInformation() once and`
			`// caches the result. Thread-safe: the first call on any thread performs`
			`// the detection, all subsequent calls return the cached bool. Using an`
			`// atomic bool + init-flag avoids pulling in std::call_once and its`
			`// exception-safety overhead (the helper is on the hot decoder path).`
			`[[nodiscard]] inline bool IsNvidiaGpuAvailable() noexcept {`
			`static std::atomic<int> s_state{0}; // 0 = unknown, 1 = NVIDIA, 2 = non-NVIDIA`
			`int cached = s_state.load(std::memory_order_acquire);`
			`if (cached != 0) return cached == 1;`
			`try {`
			`const ANSCENTER::EngineType detected =`
			`ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();`
			`const bool isNvidia = (detected == ANSCENTER::EngineType::NVIDIA_GPU);`
			`// Last-writer-wins is fine — CheckHardwareInformation is deterministic.`
			`s_state.store(isNvidia ? 1 : 2, std::memory_order_release);`
			`return isNvidia;`
			`} catch (...) {`
			`// If detection throws (should not happen), fail safe to non-NVIDIA so`
			`// we never activate CUDA runtime on unknown hardware.`
			`s_state.store(2, std::memory_order_release);`
			`return false;`
			`}`
			`}`

			`} // namespace anscv_vendor_gate`