Add CPU/GPU gate and support new ANSALPR using OCR
This commit is contained in:
@@ -680,6 +680,19 @@ namespace ANSCENTER {
|
||||
std::vector<float> ANSFaceRecognizer::RunArcFace(const cv::Mat& inputImage) {
|
||||
std::vector<float> embedding;
|
||||
|
||||
// Defense-in-depth: this function uses m_gpuStream / cv::cuda::GpuMat
|
||||
// upload path, which is only valid on NVIDIA hardware. Callers in
|
||||
// Feature() and ExtractEmbeddings() already gate on engineType, but
|
||||
// the method is public — refuse to run on AMD/Intel/CPU so we never
|
||||
// touch m_gpuStream (lazy-initialized, nullptr on non-NVIDIA) or
|
||||
// m_gpuRgb.upload() which would activate the CUDA runtime.
|
||||
if (engineType != EngineType::NVIDIA_GPU) {
|
||||
_logger.LogError("ANSFaceRecognizer::RunArcFace",
|
||||
"RunArcFace is NVIDIA-only; called on engineType="
|
||||
+ std::to_string(static_cast<int>(engineType)), __FILE__, __LINE__);
|
||||
return embedding;
|
||||
}
|
||||
|
||||
// Early validation before locking
|
||||
if (inputImage.empty()) {
|
||||
_logger.LogError("ANSFaceRecognizer::RunArcFace",
|
||||
@@ -701,6 +714,13 @@ namespace ANSCENTER {
|
||||
return embedding;
|
||||
}
|
||||
|
||||
if (!m_gpuStream || !m_trtEngine) {
|
||||
_logger.LogError("ANSFaceRecognizer::RunArcFace",
|
||||
"GPU stream or TRT engine not available (engineType="
|
||||
+ std::to_string(static_cast<int>(engineType)) + ")", __FILE__, __LINE__);
|
||||
return embedding;
|
||||
}
|
||||
|
||||
try {
|
||||
// CPU preprocessing: resize + BGR→RGB before GPU upload
|
||||
// Reduces PCIe transfer and eliminates GPU cvtColor/resize overhead
|
||||
@@ -761,6 +781,17 @@ namespace ANSCENTER {
|
||||
{
|
||||
std::vector<std::vector<float>> embeddings;
|
||||
|
||||
// Defense-in-depth: TensorRT + cv::cuda::GpuMat batch path is NVIDIA-only.
|
||||
// Callers in ExtractEmbeddings() already gate on engineType, but this is a
|
||||
// public method — refuse to run on AMD/Intel/CPU so we never touch the
|
||||
// TRT engine or cv::cuda primitives on non-NVIDIA hardware.
|
||||
if (engineType != EngineType::NVIDIA_GPU) {
|
||||
_logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
|
||||
"RunArcFaceBatch is NVIDIA-only; called on engineType="
|
||||
+ std::to_string(static_cast<int>(engineType)), __FILE__, __LINE__);
|
||||
return embeddings;
|
||||
}
|
||||
|
||||
try {
|
||||
// Early validation checks
|
||||
if (!_isInitialized) {
|
||||
@@ -775,6 +806,12 @@ namespace ANSCENTER {
|
||||
return embeddings;
|
||||
}
|
||||
|
||||
if (!m_gpuStream) {
|
||||
_logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
|
||||
"GPU stream not initialized", __FILE__, __LINE__);
|
||||
return embeddings;
|
||||
}
|
||||
|
||||
if (faceROIs.empty()) {
|
||||
return embeddings;
|
||||
}
|
||||
|
||||
@@ -97,14 +97,33 @@ public:
|
||||
};
|
||||
|
||||
// Determine maxSlotsPerGpu based on GPU topology:
|
||||
// 1 GPU → 1 (single slot, no round-robin needed)
|
||||
// >1 GPU, VRAM<24GB → 1 (round-robin: 1 slot per GPU)
|
||||
// >1 GPU, VRAM≥24GB → -1 (elastic: on-demand slot growth)
|
||||
// non-NVIDIA (AMD/Intel/CPU) → 1 (no TensorRT pool, never grows)
|
||||
// 1 NVIDIA GPU → 1 (single slot, no round-robin needed)
|
||||
// >1 GPU, VRAM<24GB → 1 (round-robin: 1 slot per GPU)
|
||||
// >1 GPU, VRAM≥24GB → -1 (elastic: on-demand slot growth)
|
||||
//
|
||||
// IMPORTANT: Must be gated on CheckHardwareInformation() first — calling
|
||||
// cudaGetDeviceCount/cudaSetDevice/cudaMemGetInfo on non-NVIDIA hardware
|
||||
// wakes up the CUDA runtime unnecessarily and, combined with DirectML on
|
||||
// AMD, has been observed to trigger amdkmdag instability. Return 1 early
|
||||
// on anything that isn't a detected NVIDIA GPU so the TRT pool is never
|
||||
// exercised on those machines.
|
||||
static int GetPoolMaxSlotsPerGpu() {
|
||||
static int s_result = INT_MIN;
|
||||
static std::mutex s_mutex;
|
||||
std::lock_guard<std::mutex> lk(s_mutex);
|
||||
if (s_result != INT_MIN) return s_result;
|
||||
|
||||
const ANSCENTER::EngineType detected =
|
||||
ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();
|
||||
if (detected != ANSCENTER::EngineType::NVIDIA_GPU) {
|
||||
s_result = 1;
|
||||
std::cout << "Info [FR GPU]: engineType=" << static_cast<int>(detected)
|
||||
<< " — not NVIDIA, TRT pool disabled (slot=1), skipping CUDA probe"
|
||||
<< std::endl;
|
||||
return s_result;
|
||||
}
|
||||
|
||||
int gpuCount = 0;
|
||||
cudaGetDeviceCount(&gpuCount);
|
||||
if (gpuCount <= 1) {
|
||||
@@ -211,6 +230,26 @@ extern "C" ANSFR_API int CreateANSRFHandle(ANSCENTER::ANSFacialRecognition**
|
||||
|
||||
if (!Handle || !licenseKey || !configFilePath || !databaseFilePath || !recogniserFilePath) return -1;
|
||||
|
||||
// Log the detected vendor path so field triage between NVIDIA / AMD /
|
||||
// Intel / CPU machines is trivial from the debug log. Mirrors the
|
||||
// vendorTag logging already in ANSLPR_OD::LoadEngine and ANSOCR
|
||||
// CreateANSOCRHandleEx.
|
||||
{
|
||||
ANSCENTER::EngineType detected =
|
||||
ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();
|
||||
const char* vendorTag =
|
||||
detected == ANSCENTER::EngineType::NVIDIA_GPU ? "NVIDIA_GPU (TensorRT + CUDA preproc, SCRFD face detector)" :
|
||||
detected == ANSCENTER::EngineType::AMD_GPU ? "AMD_GPU (ONNX Runtime / DirectML, OV face detector, NV12/CUDA DISABLED)" :
|
||||
detected == ANSCENTER::EngineType::OPENVINO_GPU ? "OPENVINO_GPU (OpenVINO, OV face detector, NV12/CUDA DISABLED)" :
|
||||
"CPU (ONNX Runtime / OpenVINO CPU, NV12/CUDA DISABLED)";
|
||||
char buf[224];
|
||||
snprintf(buf, sizeof(buf),
|
||||
"[ANSFR] CreateANSRFHandle: detected engineType=%d [%s]\n",
|
||||
static_cast<int>(detected), vendorTag);
|
||||
OutputDebugStringA(buf);
|
||||
std::cout << buf;
|
||||
}
|
||||
|
||||
// Release existing handle if called twice (prevents leak from LabVIEW)
|
||||
if (*Handle) {
|
||||
if (UnregisterFRHandle(*Handle)) {
|
||||
|
||||
Reference in New Issue
Block a user