Add CPU/GPU gate and support new ANSALPR using OCR
This commit is contained in:
59
modules/ANSCV/ANSCVVendorGate.h
Normal file
59
modules/ANSCV/ANSCVVendorGate.h
Normal file
@@ -0,0 +1,59 @@
|
||||
#pragma once
|
||||
// ANSCVVendorGate.h — Cached NVIDIA hardware check for ANSCV.dll.
|
||||
//
|
||||
// ANSCV.dll links against CUDA::cudart_static + CUDA::cublasLt + CUDA::nvjpeg
|
||||
// because it hosts NVDEC hardware decode, NV12 GPU frame pool, and the RTSP /
|
||||
// SRT / RTMP / MJPEG / FLV players that feed NV12 frames into the downstream
|
||||
// inference DLLs (ANSLPR, ANSOCR, ANSFR).
|
||||
//
|
||||
// Several code paths in ANSCV call into the CUDA runtime unconditionally:
|
||||
// • Post-NVDEC memory pool cleanup in Destroy/Reconnect
|
||||
// • cudaGetDeviceCount() probes inside AutoConfigureHWDecoders
|
||||
// • nvJPEG encoder helpers
|
||||
//
|
||||
// On NVIDIA hardware these are fine. On AMD / Intel / pure-CPU machines:
|
||||
// • cudart_static is linked, but calling it wakes up CUDA driver state
|
||||
// that was never needed — wastes address space and (when combined with
|
||||
// DirectML decode on AMD) has been observed to destabilise amdkmdag.
|
||||
// • The post-NVDEC cleanup runs even though no NVDEC decoder was ever
|
||||
// created, which is pure waste on AMD/Intel.
|
||||
//
|
||||
// Solution: gate every CUDA runtime call behind this cached predicate, which
|
||||
// evaluates CheckHardwareInformation() exactly once per process. If the
|
||||
// detected engine is not NVIDIA_GPU, all CUDA/NVDEC cleanup paths become
|
||||
// no-ops — decoders fall back to DXVA/D3D11VA/CPU automatically via the
|
||||
// existing AutoConfigureHWDecoders_Platform() fallback.
|
||||
//
|
||||
// Mirrors the ANSLPR_OD / ANSOCR / ANSFR vendor gates that were added to
|
||||
// ANSALPR_OD::LoadEngine, CreateANSOCRHandleEx, and CreateANSRFHandle.
|
||||
|
||||
#include "ANSLicense.h"
|
||||
#include <atomic>
|
||||
|
||||
namespace anscv_vendor_gate {
|
||||
|
||||
// Lazily evaluates ANSLicenseHelper::CheckHardwareInformation() once and
|
||||
// caches the result. Thread-safe: the first call on any thread performs
|
||||
// the detection, all subsequent calls return the cached bool. Using an
|
||||
// atomic bool + init-flag avoids pulling in std::call_once and its
|
||||
// exception-safety overhead (the helper is on the hot decoder path).
|
||||
[[nodiscard]] inline bool IsNvidiaGpuAvailable() noexcept {
|
||||
static std::atomic<int> s_state{0}; // 0 = unknown, 1 = NVIDIA, 2 = non-NVIDIA
|
||||
int cached = s_state.load(std::memory_order_acquire);
|
||||
if (cached != 0) return cached == 1;
|
||||
try {
|
||||
const ANSCENTER::EngineType detected =
|
||||
ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();
|
||||
const bool isNvidia = (detected == ANSCENTER::EngineType::NVIDIA_GPU);
|
||||
// Last-writer-wins is fine — CheckHardwareInformation is deterministic.
|
||||
s_state.store(isNvidia ? 1 : 2, std::memory_order_release);
|
||||
return isNvidia;
|
||||
} catch (...) {
|
||||
// If detection throws (should not happen), fail safe to non-NVIDIA so
|
||||
// we never activate CUDA runtime on unknown hardware.
|
||||
s_state.store(2, std::memory_order_release);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace anscv_vendor_gate
|
||||
Reference in New Issue
Block a user