Add CPU/GPU gate and support new ANSALPR using OCR

2026-04-12 17:16:16 +10:00
parent 27083a6530
commit 0a8aaed215
30 changed files with 1870 additions and 2166 deletions
--- a/modules/ANSFR/ANSFaceRecognizer.cpp
+++ b/modules/ANSFR/ANSFaceRecognizer.cpp
@@ -680,6 +680,19 @@ namespace ANSCENTER {
 	std::vector<float> ANSFaceRecognizer::RunArcFace(const cv::Mat& inputImage) {
 		std::vector<float> embedding;

+		// Defense-in-depth: this function uses m_gpuStream / cv::cuda::GpuMat
+		// upload path, which is only valid on NVIDIA hardware.  Callers in
+		// Feature() and ExtractEmbeddings() already gate on engineType, but
+		// the method is public — refuse to run on AMD/Intel/CPU so we never
+		// touch m_gpuStream (lazy-initialized, nullptr on non-NVIDIA) or
+		// m_gpuRgb.upload() which would activate the CUDA runtime.
+		if (engineType != EngineType::NVIDIA_GPU) {
+			_logger.LogError("ANSFaceRecognizer::RunArcFace",
+				"RunArcFace is NVIDIA-only; called on engineType="
+				+ std::to_string(static_cast<int>(engineType)), __FILE__, __LINE__);
+			return embedding;
+		}
+
 		// Early validation before locking
 		if (inputImage.empty()) {
 			_logger.LogError("ANSFaceRecognizer::RunArcFace",
@@ -701,6 +714,13 @@ namespace ANSCENTER {
 			return embedding;
 		}

+		if (!m_gpuStream || !m_trtEngine) {
+			_logger.LogError("ANSFaceRecognizer::RunArcFace",
+				"GPU stream or TRT engine not available (engineType="
+				+ std::to_string(static_cast<int>(engineType)) + ")", __FILE__, __LINE__);
+			return embedding;
+		}
+
 		try {
 			// CPU preprocessing: resize + BGR→RGB before GPU upload
 			// Reduces PCIe transfer and eliminates GPU cvtColor/resize overhead
@@ -761,6 +781,17 @@ namespace ANSCENTER {
 	{
 		std::vector<std::vector<float>> embeddings;

+		// Defense-in-depth: TensorRT + cv::cuda::GpuMat batch path is NVIDIA-only.
+		// Callers in ExtractEmbeddings() already gate on engineType, but this is a
+		// public method — refuse to run on AMD/Intel/CPU so we never touch the
+		// TRT engine or cv::cuda primitives on non-NVIDIA hardware.
+		if (engineType != EngineType::NVIDIA_GPU) {
+			_logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
+				"RunArcFaceBatch is NVIDIA-only; called on engineType="
+				+ std::to_string(static_cast<int>(engineType)), __FILE__, __LINE__);
+			return embeddings;
+		}
+
 		try {
 			// Early validation checks
 			if (!_isInitialized) {
@@ -775,6 +806,12 @@ namespace ANSCENTER {
 				return embeddings;
 			}

+			if (!m_gpuStream) {
+				_logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
+					"GPU stream not initialized", __FILE__, __LINE__);
+				return embeddings;
+			}
+
 			if (faceROIs.empty()) {
 				return embeddings;
 			}
--- a/modules/ANSFR/dllmain.cpp
+++ b/modules/ANSFR/dllmain.cpp
@@ -97,14 +97,33 @@ public:
 };

 // Determine maxSlotsPerGpu based on GPU topology:
-//   1 GPU            → 1  (single slot, no round-robin needed)
-//   >1 GPU, VRAM<24GB → 1  (round-robin: 1 slot per GPU)
-//   >1 GPU, VRAM≥24GB → -1 (elastic: on-demand slot growth)
+//   non-NVIDIA (AMD/Intel/CPU) → 1 (no TensorRT pool, never grows)
+//   1 NVIDIA GPU               → 1 (single slot, no round-robin needed)
+//   >1 GPU, VRAM<24GB          → 1 (round-robin: 1 slot per GPU)
+//   >1 GPU, VRAM≥24GB          → -1 (elastic: on-demand slot growth)
+//
+// IMPORTANT: Must be gated on CheckHardwareInformation() first — calling
+// cudaGetDeviceCount/cudaSetDevice/cudaMemGetInfo on non-NVIDIA hardware
+// wakes up the CUDA runtime unnecessarily and, combined with DirectML on
+// AMD, has been observed to trigger amdkmdag instability.  Return 1 early
+// on anything that isn't a detected NVIDIA GPU so the TRT pool is never
+// exercised on those machines.
 static int GetPoolMaxSlotsPerGpu() {
    static int s_result = INT_MIN;
    static std::mutex s_mutex;
    std::lock_guard<std::mutex> lk(s_mutex);
    if (s_result != INT_MIN) return s_result;
+
+    const ANSCENTER::EngineType detected =
+        ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();
+    if (detected != ANSCENTER::EngineType::NVIDIA_GPU) {
+        s_result = 1;
+        std::cout << "Info [FR GPU]: engineType=" << static_cast<int>(detected)
+                  << " — not NVIDIA, TRT pool disabled (slot=1), skipping CUDA probe"
+                  << std::endl;
+        return s_result;
+    }
+
    int gpuCount = 0;
    cudaGetDeviceCount(&gpuCount);
    if (gpuCount <= 1) {
@@ -211,6 +230,26 @@ extern "C" ANSFR_API int		   CreateANSRFHandle(ANSCENTER::ANSFacialRecognition**

        if (!Handle || !licenseKey || !configFilePath || !databaseFilePath || !recogniserFilePath) return -1;

+        // Log the detected vendor path so field triage between NVIDIA / AMD /
+        // Intel / CPU machines is trivial from the debug log.  Mirrors the
+        // vendorTag logging already in ANSLPR_OD::LoadEngine and ANSOCR
+        // CreateANSOCRHandleEx.
+        {
+            ANSCENTER::EngineType detected =
+                ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();
+            const char* vendorTag =
+                detected == ANSCENTER::EngineType::NVIDIA_GPU   ? "NVIDIA_GPU (TensorRT + CUDA preproc, SCRFD face detector)" :
+                detected == ANSCENTER::EngineType::AMD_GPU      ? "AMD_GPU (ONNX Runtime / DirectML, OV face detector, NV12/CUDA DISABLED)" :
+                detected == ANSCENTER::EngineType::OPENVINO_GPU ? "OPENVINO_GPU (OpenVINO, OV face detector, NV12/CUDA DISABLED)" :
+                                                                  "CPU (ONNX Runtime / OpenVINO CPU, NV12/CUDA DISABLED)";
+            char buf[224];
+            snprintf(buf, sizeof(buf),
+                "[ANSFR] CreateANSRFHandle: detected engineType=%d [%s]\n",
+                static_cast<int>(detected), vendorTag);
+            OutputDebugStringA(buf);
+            std::cout << buf;
+        }
+
        // Release existing handle if called twice (prevents leak from LabVIEW)
        if (*Handle) {
            if (UnregisterFRHandle(*Handle)) {