58 lines
2.5 KiB
C
58 lines
2.5 KiB
C
|
|
#pragma once
|
||
|
|
// ANSODVendorGate.h — Cached NVIDIA hardware check for ANSODEngine.dll.
|
||
|
|
//
|
||
|
|
// ANSODEngine.dll links against CUDA::cudart_static + CUDA::cublas +
|
||
|
|
// CUDA::cublasLt and hosts the TensorRT inference classes (ANSRTYOLO,
|
||
|
|
// TENSORRTOD, TENSORRTCL, TENSORRTSEG, TENSORRTPOSE, ANSSAM3, ANSYOLOV10RTOD,
|
||
|
|
// ANSYOLOV12RTOD, ANSTENSORRTPOSE) plus the NV12 preprocess helper and the
|
||
|
|
// TRT engine pool.
|
||
|
|
//
|
||
|
|
// The dllmain factory already hard-gates TRT class instantiation on
|
||
|
|
// NVIDIA_GPU and falls back to ONNX Runtime / OpenVINO on AMD/Intel/CPU.
|
||
|
|
// However, several support paths still call into the CUDA runtime
|
||
|
|
// unconditionally:
|
||
|
|
// • GetNumGPUs() / GetPoolMaxSlotsPerGpu() / CheckGPUVRAM() helpers
|
||
|
|
// (called from inside NVIDIA_GPU guards today, but safer to gate at
|
||
|
|
// source so a future refactor cannot accidentally wake up cudart on
|
||
|
|
// AMD/Intel).
|
||
|
|
// • A few case labels in the model-type switch instantiate TRT classes
|
||
|
|
// without an explicit NVIDIA_GPU check — they are currently unreachable
|
||
|
|
// due to upstream modelType rewriting, but leaving them unguarded
|
||
|
|
// creates a maintenance trap.
|
||
|
|
//
|
||
|
|
// Solution: a single process-wide cached predicate that evaluates
|
||
|
|
// CheckHardwareInformation() exactly once. On AMD/Intel/CPU the predicate
|
||
|
|
// returns false and every gated site short-circuits before touching any
|
||
|
|
// CUDA API.
|
||
|
|
//
|
||
|
|
// Mirrors ANSCVVendorGate / ANSLPR_OD::isNvidiaEngine / ANSOCR factory gate
|
||
|
|
// / ANSFR CreateANSRFHandle vendor log. Keeps the four shipped DLLs on a
|
||
|
|
// single, auditable pattern.
|
||
|
|
|
||
|
|
#include "ANSLicense.h"
|
||
|
|
#include <atomic>
|
||
|
|
|
||
|
|
namespace ansod_vendor_gate {
|
||
|
|
|
||
|
|
// Lazily evaluates ANSLicenseHelper::CheckHardwareInformation() once and
|
||
|
|
// caches the result. Thread-safe via std::atomic<int> (0 = unknown,
|
||
|
|
// 1 = NVIDIA, 2 = non-NVIDIA). No std::call_once overhead on the hot
|
||
|
|
// inference path. Fails safe to non-NVIDIA on exception.
|
||
|
|
[[nodiscard]] inline bool IsNvidiaGpuAvailable() noexcept {
|
||
|
|
static std::atomic<int> s_state{0};
|
||
|
|
int cached = s_state.load(std::memory_order_acquire);
|
||
|
|
if (cached != 0) return cached == 1;
|
||
|
|
try {
|
||
|
|
const ANSCENTER::EngineType detected =
|
||
|
|
ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();
|
||
|
|
const bool isNvidia = (detected == ANSCENTER::EngineType::NVIDIA_GPU);
|
||
|
|
s_state.store(isNvidia ? 1 : 2, std::memory_order_release);
|
||
|
|
return isNvidia;
|
||
|
|
} catch (...) {
|
||
|
|
s_state.store(2, std::memory_order_release);
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
} // namespace ansod_vendor_gate
|