Add CPU/GPU gate and support new ANSALPR using OCR

This commit is contained in:
2026-04-12 17:16:16 +10:00
parent 27083a6530
commit 0a8aaed215
30 changed files with 1870 additions and 2166 deletions

View File

@@ -680,6 +680,19 @@ namespace ANSCENTER {
std::vector<float> ANSFaceRecognizer::RunArcFace(const cv::Mat& inputImage) {
std::vector<float> embedding;
// Defense-in-depth: this function uses m_gpuStream / cv::cuda::GpuMat
// upload path, which is only valid on NVIDIA hardware. Callers in
// Feature() and ExtractEmbeddings() already gate on engineType, but
// the method is public — refuse to run on AMD/Intel/CPU so we never
// touch m_gpuStream (lazy-initialized, nullptr on non-NVIDIA) or
// m_gpuRgb.upload() which would activate the CUDA runtime.
if (engineType != EngineType::NVIDIA_GPU) {
_logger.LogError("ANSFaceRecognizer::RunArcFace",
"RunArcFace is NVIDIA-only; called on engineType="
+ std::to_string(static_cast<int>(engineType)), __FILE__, __LINE__);
return embedding;
}
// Early validation before locking
if (inputImage.empty()) {
_logger.LogError("ANSFaceRecognizer::RunArcFace",
@@ -701,6 +714,13 @@ namespace ANSCENTER {
return embedding;
}
if (!m_gpuStream || !m_trtEngine) {
_logger.LogError("ANSFaceRecognizer::RunArcFace",
"GPU stream or TRT engine not available (engineType="
+ std::to_string(static_cast<int>(engineType)) + ")", __FILE__, __LINE__);
return embedding;
}
try {
// CPU preprocessing: resize + BGR→RGB before GPU upload
// Reduces PCIe transfer and eliminates GPU cvtColor/resize overhead
@@ -761,6 +781,17 @@ namespace ANSCENTER {
{
std::vector<std::vector<float>> embeddings;
// Defense-in-depth: TensorRT + cv::cuda::GpuMat batch path is NVIDIA-only.
// Callers in ExtractEmbeddings() already gate on engineType, but this is a
// public method — refuse to run on AMD/Intel/CPU so we never touch the
// TRT engine or cv::cuda primitives on non-NVIDIA hardware.
if (engineType != EngineType::NVIDIA_GPU) {
_logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
"RunArcFaceBatch is NVIDIA-only; called on engineType="
+ std::to_string(static_cast<int>(engineType)), __FILE__, __LINE__);
return embeddings;
}
try {
// Early validation checks
if (!_isInitialized) {
@@ -775,6 +806,12 @@ namespace ANSCENTER {
return embeddings;
}
if (!m_gpuStream) {
_logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
"GPU stream not initialized", __FILE__, __LINE__);
return embeddings;
}
if (faceROIs.empty()) {
return embeddings;
}

View File

@@ -97,14 +97,33 @@ public:
};
// Determine maxSlotsPerGpu based on GPU topology:
// 1 GPU → 1 (single slot, no round-robin needed)
// >1 GPU, VRAM<24GB → 1 (round-robin: 1 slot per GPU)
// >1 GPU, VRAM24GB → -1 (elastic: on-demand slot growth)
// non-NVIDIA (AMD/Intel/CPU) → 1 (no TensorRT pool, never grows)
// 1 NVIDIA GPU → 1 (single slot, no round-robin needed)
// >1 GPU, VRAM<24GB → 1 (round-robin: 1 slot per GPU)
// >1 GPU, VRAM≥24GB → -1 (elastic: on-demand slot growth)
//
// IMPORTANT: Must be gated on CheckHardwareInformation() first — calling
// cudaGetDeviceCount/cudaSetDevice/cudaMemGetInfo on non-NVIDIA hardware
// wakes up the CUDA runtime unnecessarily and, combined with DirectML on
// AMD, has been observed to trigger amdkmdag instability. Return 1 early
// on anything that isn't a detected NVIDIA GPU so the TRT pool is never
// exercised on those machines.
static int GetPoolMaxSlotsPerGpu() {
static int s_result = INT_MIN;
static std::mutex s_mutex;
std::lock_guard<std::mutex> lk(s_mutex);
if (s_result != INT_MIN) return s_result;
const ANSCENTER::EngineType detected =
ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();
if (detected != ANSCENTER::EngineType::NVIDIA_GPU) {
s_result = 1;
std::cout << "Info [FR GPU]: engineType=" << static_cast<int>(detected)
<< " — not NVIDIA, TRT pool disabled (slot=1), skipping CUDA probe"
<< std::endl;
return s_result;
}
int gpuCount = 0;
cudaGetDeviceCount(&gpuCount);
if (gpuCount <= 1) {
@@ -211,6 +230,26 @@ extern "C" ANSFR_API int CreateANSRFHandle(ANSCENTER::ANSFacialRecognition**
if (!Handle || !licenseKey || !configFilePath || !databaseFilePath || !recogniserFilePath) return -1;
// Log the detected vendor path so field triage between NVIDIA / AMD /
// Intel / CPU machines is trivial from the debug log. Mirrors the
// vendorTag logging already in ANSLPR_OD::LoadEngine and ANSOCR
// CreateANSOCRHandleEx.
{
ANSCENTER::EngineType detected =
ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();
const char* vendorTag =
detected == ANSCENTER::EngineType::NVIDIA_GPU ? "NVIDIA_GPU (TensorRT + CUDA preproc, SCRFD face detector)" :
detected == ANSCENTER::EngineType::AMD_GPU ? "AMD_GPU (ONNX Runtime / DirectML, OV face detector, NV12/CUDA DISABLED)" :
detected == ANSCENTER::EngineType::OPENVINO_GPU ? "OPENVINO_GPU (OpenVINO, OV face detector, NV12/CUDA DISABLED)" :
"CPU (ONNX Runtime / OpenVINO CPU, NV12/CUDA DISABLED)";
char buf[224];
snprintf(buf, sizeof(buf),
"[ANSFR] CreateANSRFHandle: detected engineType=%d [%s]\n",
static_cast<int>(detected), vendorTag);
OutputDebugStringA(buf);
std::cout << buf;
}
// Release existing handle if called twice (prevents leak from LabVIEW)
if (*Handle) {
if (UnregisterFRHandle(*Handle)) {