#include "SCRFDFaceDetector.h" #include "ANSGpuFrameRegistry.h" #include "NV12PreprocessHelper.h" // tl_currentGpuFrame() #include "Utility.h" #include //#define FNS_DEBUG namespace ANSCENTER { // Initialization function with memory leak handling bool ANSSCRFDFD::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) { // Clean up existing resources before reinitialization const bool engineAlreadyLoaded = _isInitialized && m_trtEngine != nullptr; if (!engineAlreadyLoaded) Destroy(); // Call base class Initialize bool result = ANSFDBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap); if (!result) return false; labelMap = "Face"; _licenseValid = true; try { _modelConfig = modelConfig; _modelConfig.inpHeight = 640; _modelConfig.inpWidth = 640; _modelConfig.modelType = ModelType::FACEDETECT; _modelConfig.detectionType = DetectionType::FACEDETECTOR; std::string onnxfile = CreateFilePath(_modelFolder, "scrfdface.onnx"); if (!std::filesystem::exists(onnxfile)) { this->_logger.LogError("ANSSCRFDFD::Initialize. Model scrfdface.onnx file does not exist", onnxfile, __FILE__, __LINE__); return false; } _modelFilePath = onnxfile; // Initialize TensorRT via shared pool if (!m_trtEngine) { m_options.precision = ANSCENTER::Precision::FP16; m_options.optBatchSize = _modelConfig.gpuOptBatchSize; m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize; m_options.deviceIndex = _modelConfig.gpuDeviceIndex; m_options.maxInputHeight = _modelConfig.maxInputHeight; m_options.minInputHeight = _modelConfig.minInputHeight; m_options.optInputHeight = _modelConfig.optInputHeight; m_options.maxInputWidth = _modelConfig.maxInputWidth; m_options.minInputWidth = _modelConfig.minInputWidth; m_options.optInputWidth = _modelConfig.optInputWidth; m_options.calibrationBatchSize = 1; m_poolKey = { _modelFilePath, static_cast(m_options.precision), m_options.maxBatchSize }; m_trtEngine = EnginePoolManager::instance().acquire( m_poolKey, m_options, _modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu); m_usingSharedPool = (m_trtEngine != nullptr); if (!m_trtEngine) { this->_logger.LogError("ANSSCRFDFD::Initialize. Unable to build or load TensorRT engine.", _modelFilePath, __FILE__, __LINE__); return false; } } fmc = 3; feat_stride_fpn = { 8, 16, 32 }; num_anchors = 2; use_kps = true; _movementObjects.clear(); _retainDetectedFaces = 0; _isInitialized = true; return true; } catch (const std::exception& e) { this->_logger.LogFatal("ANSSCRFDFD::Initialize", e.what(), __FILE__, __LINE__); return false; } } bool ANSSCRFDFD::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) { try { // We need to get the _modelFolder bool result = ANSFDBase::LoadModel(modelZipFilePath, modelZipPassword); if (!result) return false; const bool engineAlreadyLoaded = _isInitialized && m_trtEngine != nullptr; _modelConfig.modelType = ModelType::FACEDETECT; _modelConfig.detectionType = DetectionType::FACEDETECTOR; _modelConfig.inpHeight = 640; _modelConfig.inpWidth = 640; _movementObjects.clear(); _retainDetectedFaces = 0; std::string onnxfile = CreateFilePath(_modelFolder, "scrfdface.onnx"); if (!std::filesystem::exists(onnxfile)) { this->_logger.LogError("ANSSCRFDFD::Initialize. Model scrfdface.onnx file does not exist", onnxfile, __FILE__, __LINE__); return false; } _modelFilePath = onnxfile; if (!m_trtEngine) { m_options.precision = ANSCENTER::Precision::FP16; m_options.optBatchSize = _modelConfig.gpuOptBatchSize; m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize; m_options.deviceIndex = _modelConfig.gpuDeviceIndex; m_options.maxInputHeight = _modelConfig.maxInputHeight; m_options.minInputHeight = _modelConfig.minInputHeight; m_options.optInputHeight = _modelConfig.optInputHeight; m_options.maxInputWidth = _modelConfig.maxInputWidth; m_options.minInputWidth = _modelConfig.minInputWidth; m_options.optInputWidth = _modelConfig.optInputWidth; m_poolKey = { _modelFilePath, static_cast(m_options.precision), m_options.maxBatchSize }; m_trtEngine = EnginePoolManager::instance().acquire( m_poolKey, m_options, _modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu); m_usingSharedPool = (m_trtEngine != nullptr); if (!m_trtEngine) { this->_logger.LogError("ANSSCRFDFD::LoadModel. Unable to build or load TensorRT engine.", _modelFilePath, __FILE__, __LINE__); return false; } } fmc = 3; feat_stride_fpn = { 8, 16, 32 }; num_anchors = 2; use_kps = true; _movementObjects.clear(); _retainDetectedFaces = 0; _isInitialized = true; return true; } catch (std::exception& e) { this->_logger.LogFatal("ANSSCRFDFD::LoadModel", e.what(), __FILE__, __LINE__); return false; } } bool ANSSCRFDFD::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) { try { // We need to get the _modelFolder bool result = ANSFDBase::LoadModelFromFolder(licenseKey, modelConfig, modelName, className, modelFolder, labelMap); if (!result) return false; std::string _modelName = modelName; if (_modelName.empty()) { _modelName = "scrfdface"; } _modelConfig.inpHeight = 640; _modelConfig.inpWidth = 640; _movementObjects.clear(); _retainDetectedFaces = 0; std::string modelFullName = _modelName + ".onnx"; std::string onnxfile = CreateFilePath(_modelFolder, modelFullName); if (std::filesystem::exists(onnxfile)) { _modelFilePath = onnxfile; this->_logger.LogDebug("ANSSCRFDFD::LoadModel. Loading scrfdface weight", _modelFilePath, __FILE__, __LINE__); } else { this->_logger.LogError("ANSSCRFDFD::LoadModel. Model scrfdface.onnx file is not exist", _modelFilePath, __FILE__, __LINE__); return false; } return true; } catch (std::exception& e) { this->_logger.LogFatal("ANSSCRFDFD::LoadModel", e.what(), __FILE__, __LINE__); return false; } } bool ANSSCRFDFD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) { std::lock_guard lock(_mutex); if (!FileExist(_modelFilePath)) { optimizedModelFolder = ""; return false; } optimizedModelFolder = GetParentFolder(_modelFilePath); // Check if the engine already exists to avoid reinitializing if (!m_trtEngine) { // Fixed batch size of 1 for this model m_options.optBatchSize = _modelConfig.gpuOptBatchSize; m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize; m_options.deviceIndex = _modelConfig.gpuDeviceIndex; m_options.maxInputHeight = _modelConfig.maxInputHeight; m_options.minInputHeight = _modelConfig.minInputHeight; m_options.optInputHeight = _modelConfig.optInputHeight; m_options.maxInputWidth = _modelConfig.maxInputWidth; m_options.minInputWidth = _modelConfig.minInputWidth; m_options.optInputWidth = _modelConfig.optInputWidth; m_options.engineFileDir = optimizedModelFolder; // Use FP16 or FP32 precision based on the input flag m_options.precision = Precision::FP16; // Create the TensorRT inference engine m_trtEngine = std::make_shared>(m_options); } // Build the TensorRT engine auto succ = m_trtEngine->buildWithRetry(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE); if (!succ) { const std::string errMsg = "Error: Unable to build the TensorRT engine. " "Try increasing TensorRT log severity to kVERBOSE."; this->_logger.LogError("ANSSCRFDFD::OptimizeModel", errMsg, __FILE__, __LINE__); return false; } std::string optimizedFaceAttributeModelFolder; bool result = ANSFDBase::OptimizeModel(fp16, optimizedFaceAttributeModelFolder); return result; } std::vector ANSSCRFDFD::RunInference(const cv::Mat& input, bool useDynamicImage, bool validateFace, bool facelivenessCheck) { if (facelivenessCheck) { std::vector rawFaceResults = Inference(input, "CustomCam", useDynamicImage, validateFace); std::vector facesWithLivenessResults = ValidateLivenessFaces(input, rawFaceResults, "CustomCam"); return facesWithLivenessResults; } else { return Inference(input, "CustomCam", useDynamicImage, validateFace); } } std::vector ANSSCRFDFD::RunInference(const cv::Mat& input, const std::string& camera_id, bool useDynamicImage, bool validateFace, bool facelivenessCheck) { if (facelivenessCheck) { std::vector rawFaceResults = Inference(input, camera_id, useDynamicImage, validateFace); std::vector facesWithLivenessResults = ValidateLivenessFaces(input, rawFaceResults, camera_id); return facesWithLivenessResults; } else { return Inference(input, camera_id, useDynamicImage, validateFace); } } std::vector ANSSCRFDFD::Inference(const cv::Mat& input, const std::string& camera_id, bool useDynamicImage, bool validateFace) { // Phase 1: Validation + image preprocessing (brief lock) cv::Mat im; bool croppedFace; float scoreThreshold; { std::lock_guard lock(_mutex); if (!_licenseValid) { _logger.LogError("ANSSCRFDFD::Inference", "Invalid license", __FILE__, __LINE__); return {}; } if (!_isInitialized) { _logger.LogError("ANSSCRFDFD::Inference", "Model is not initialized", __FILE__, __LINE__); return {}; } if (input.empty() || input.cols < 10 || input.rows < 10) { _logger.LogError("ANSSCRFDFD::Inference", "Invalid input image", __FILE__, __LINE__); return {}; } croppedFace = !useDynamicImage; scoreThreshold = _modelConfig.detectionScoreThreshold; if (croppedFace) { constexpr int border = 200; cv::copyMakeBorder(input, im, border, border, border, border, cv::BORDER_REPLICATE); if (im.rows > 1280) { const float aspectRatio = static_cast(im.cols) / static_cast(im.rows); constexpr int newHeight = 1280; const int newWidth = static_cast(newHeight * aspectRatio); cv::resize(im, im, cv::Size(newWidth, newHeight)); } } else { im = input; } } // Phase 2: Detect faces (mutex released — Detect manages its own brief locks around GPU inference) std::vector detectedFaces; try { detectedFaces = Detect(im); } catch (const std::exception& e) { _logger.LogFatal("ANSSCRFDFD::Inference", e.what(), __FILE__, __LINE__); return {}; } catch (...) { _logger.LogFatal("ANSSCRFDFD::Inference", "Unknown exception occurred", __FILE__, __LINE__); return {}; } if (detectedFaces.empty()) { return {}; } // Phase 3: Process detected faces (operates on per-call local data — no shared state) const int originalWidth = croppedFace ? input.cols : 0; const int originalHeight = croppedFace ? input.rows : 0; constexpr int border = 200; constexpr float borderF = 200.0f; // NV12 affine warp: precompute scale factors (display-res → full-res NV12) float nv12ScaleX = 1.f, nv12ScaleY = 1.f; int nv12FullW = 0, nv12FullH = 0; bool nv12AffineAvailable = false; const int inferenceGpu = m_trtEngine ? m_trtEngine->getPreferredDeviceIndex() : 0; if (!croppedFace && m_nv12Helper.isCudaContextHealthy(_logger, "SCRFD")) { auto* gpuData = tl_currentGpuFrame(); if (gpuData && gpuData->pixelFormat == 23 && gpuData->width > 0 && gpuData->height > 0) { nv12ScaleX = static_cast(gpuData->width) / im.cols; nv12ScaleY = static_cast(gpuData->height) / im.rows; nv12FullW = gpuData->width; nv12FullH = gpuData->height; nv12AffineAvailable = true; } } std::vector output; output.reserve(detectedFaces.size()); for (auto& face : detectedFaces) { if (face.confidence <= scoreThreshold) { continue; } if (validateFace && !isValidFace(face.polygon, face.box, 27)) { continue; } // Get face mask — try NV12 affine warp first, fall back to CPU warpAffine cv::Mat mask; cv::cuda::GpuMat gpuMask; if (nv12AffineAvailable && face.polygon.size() == 5) { // Compute affine matrix on CPU (fast ~0.01ms) static const std::vector kTemplate112 = []() { const std::vector face_template = { {0.34191607f, 0.46157411f}, {0.65653393f, 0.45983393f}, {0.50022500f, 0.64050536f}, {0.37097589f, 0.82469196f}, {0.63151696f, 0.82325089f} }; std::vector tpl; tpl.reserve(5); for (const auto& pt : face_template) tpl.emplace_back(pt.x * 112.0f, pt.y * 112.0f); return tpl; }(); cv::Mat affineMatrix = cv::estimateAffinePartial2D( face.polygon, kTemplate112); if (!affineMatrix.empty()) { auto nv12Face = m_nv12Helper.tryNV12AffineWarp( im, inferenceGpu, affineMatrix, 112, 112, nv12ScaleX, nv12ScaleY, _logger, "SCRFD"); if (nv12Face.succeeded) { // Log first successful NV12 affine warp (once per instance) static bool s_nv12AffineLogged = false; if (!s_nv12AffineLogged) { s_nv12AffineLogged = true; _logger.LogInfo("ANSSCRFDFD::Inference", "NV12 affine warp ACTIVE: face aligned from " + std::to_string(nv12FullW) + "x" + std::to_string(nv12FullH) + " NV12 -> 112x112 BGR (display=" + std::to_string(im.cols) + "x" + std::to_string(im.rows) + " scaleX=" + std::to_string(nv12ScaleX) + " scaleY=" + std::to_string(nv12ScaleY) + ")", __FILE__, __LINE__); } mask = std::move(nv12Face.alignedFaceBGR); gpuMask = std::move(nv12Face.gpuAlignedFace); } } } // CPU fallback if (mask.empty()) { mask = Preprocess(im, face.polygon, im); } if (mask.empty()) { _logger.LogError("ANSSCRFDFD::Inference", "Cannot get mask image", __FILE__, __LINE__); continue; } // Build result object Object result; result.classId = 0; result.className = "Face"; result.confidence = face.confidence; result.cameraId = camera_id; result.polygon = std::move(face.polygon); result.mask = std::move(mask); result.gpuMask = std::move(gpuMask); if (croppedFace) { // Adjust coordinates for border offset const int x1_new = std::max(0, face.box.x - border); const int y1_new = std::max(0, face.box.y - border); const int x2_new = std::min(originalWidth, face.box.x + face.box.width - border); const int y2_new = std::min(originalHeight, face.box.y + face.box.height - border); result.box = cv::Rect(x1_new, y1_new, std::max(0, x2_new - x1_new), std::max(0, y2_new - y1_new)); result.kps.reserve(face.kps.size()); for (const auto& pt : face.kps) { result.kps.emplace_back(pt - borderF); } } else { result.box = face.box; result.kps = std::move(face.kps); } output.push_back(std::move(result)); } return output; } std::vector ANSSCRFDFD::InferenceDynamic(const cv::Mat& input, const std::string& camera_id) { std::lock_guard lock(_mutex); std::vector output; try { if (!_licenseValid) { _logger.LogError("ANSSCRFDFD::Inference", "Invalid license", __FILE__, __LINE__); return output; } if (!_isInitialized) { _logger.LogError("ANSSCRFDFD::Inference", "Model is not initialized", __FILE__, __LINE__); return output; } if (input.empty() || input.cols < 10 || input.rows < 10) { _logger.LogError("ANSSCRFDFD::Inference", "Invalid input image", __FILE__, __LINE__); return output; } bool croppedFace = (input.cols <= 300 || input.rows <= 300); cv::Mat im; try { if (croppedFace) { cv::copyMakeBorder(input, im, 200, 200, 200, 200, cv::BORDER_REPLICATE); } else { im = input.clone(); } } catch (const std::exception& e) { _logger.LogError("ANSSCRFDFD::Inference", std::string("copyMakeBorder failed: ") + e.what(), __FILE__, __LINE__); return output; } const int originalWidth = input.cols; const int originalHeight = input.rows; std::vector sections = createSlideScreens(im); int lowestPriority = getLowestPriorityRegion(); if ((_currentPriority > lowestPriority) || (_currentPriority == 0)) { _currentPriority = getHighestPriorityRegion(); } else { _currentPriority++; } cv::Rect regionByPriority = getRegionByPriority(_currentPriority); _detectedArea = regionByPriority; #ifdef FNS_DEBUG cv::Mat draw = input.clone(); cv::rectangle(draw, _detectedArea, cv::Scalar(0, 0, 255), 2); #endif std::vector filteredFaceObjects; if (_detectedArea.width > 50 && _detectedArea.height > 50) { try { cv::Mat activeFrame = im(_detectedArea).clone(); std::vector rawDetections = Detect(activeFrame); filteredFaceObjects = AdjustDetectedBoundingBoxes(rawDetections, _detectedArea, im.size(), 0.9); #ifdef FNS_DEBUG cv::imshow("Active Area", activeFrame); cv::waitKey(1); #endif } catch (const std::exception& e) { _logger.LogError("ANSSCRFDFD::Inference", std::string("Detect() failed: ") + e.what(), __FILE__, __LINE__); return output; } } for (const auto& face : filteredFaceObjects) { try { if (face.confidence < _modelConfig.detectionScoreThreshold) continue; #ifdef FNS_DEBUG // draw landmarks for (cv::Point2f point : face.polygon) { cv::circle(draw, cv::Point(point.x + _detectedArea.x, point.y + _detectedArea.y), 2, cv::Scalar(0, 255, 0), -1); } #endif if (!isValidFace(face.polygon, face.box, 27, _detectedArea.x, _detectedArea.y)) continue; Object result; int x_min = face.box.x; int y_min = face.box.y; int x_max = x_min + face.box.width; int y_max = y_min + face.box.height; if (croppedFace) { x_min = std::max(0, x_min - 200); y_min = std::max(0, y_min - 200); x_max = std::min(originalWidth, x_max - 200); y_max = std::min(originalHeight, y_max - 200); } int width_half = std::abs((x_max - x_min) / 2); int height_half = std::abs((y_max - y_min) / 2); int xc = x_min + width_half; int yc = y_min + height_half; int c = std::max(width_half, height_half); int x1_new = std::max(0, xc - c); int y1_new = std::max(0, yc - c); int x2_new = std::min(originalWidth, xc + c); int y2_new = std::min(originalHeight, yc + c); result.classId = 0; result.className = "Face"; result.confidence = face.confidence; result.box = cv::Rect(x1_new, y1_new, x2_new - x1_new, y2_new - y1_new); result.kps = face.kps; result.cameraId = camera_id; #ifdef FNS_DEBUG cv::rectangle(draw, result.box, cv::Scalar(0, 0, 255), 2); #endif try { result.mask = GetCroppedFaceScale(im, x1_new, y1_new, x2_new, y2_new, 112); } catch (const std::exception& e) { _logger.LogError("ANSSCRFDFD::Inference", std::string("GetCroppedFaceScale failed: ") + e.what(), __FILE__, __LINE__); continue; } if (!result.mask.empty()) { output.push_back(result); } } catch (const std::exception& e) { _logger.LogError("ANSSCRFDFD::Inference", std::string("Processing one face failed: ") + e.what(), __FILE__, __LINE__); continue; } } #ifdef FNS_DEBUG cv::resize(draw, draw, cv::Size(1920, 1080)); cv::imshow("Detected Areas", draw); cv::waitKey(1); draw.release(); #endif return output; } catch (const std::exception& e) { _logger.LogFatal("ANSSCRFDFD::TensorRTInference", e.what(), __FILE__, __LINE__); } catch (...) { _logger.LogFatal("ANSSCRFDFD::TensorRTInference", "Unknown exception occurred", __FILE__, __LINE__); } return output; } std::vector ANSSCRFDFD::Detect(const cv::Mat& input) { // Phase 1: Validation + engine dims (brief lock) int net_h, net_w; float imgHeight, imgWidth; { std::lock_guard lock(_mutex); if (input.empty() || input.cols < 10 || input.rows < 10) { this->_logger.LogError("ANSSCRFDFD::Detect", "Invalid input image", __FILE__, __LINE__); return {}; } if (!m_trtEngine) { this->_logger.LogFatal("ANSSCRFDFD::Detect", "TensorRT engine not initialized", __FILE__, __LINE__); return {}; } imgHeight = static_cast(input.rows); imgWidth = static_cast(input.cols); // Get and validate expected input dims auto inputDims = m_trtEngine->getInputDims(); if (inputDims.empty() || inputDims[0].nbDims < 3) { this->_logger.LogFatal("ANSSCRFDFD::Detect", "Invalid input dimensions", __FILE__, __LINE__); return {}; } net_h = inputDims[0].d[1]; net_w = inputDims[0].d[2]; // Optional check against configured INPUT_H/INPUT_W if (net_h != INPUT_H || net_w != INPUT_W) { this->_logger.LogFatal( "ANSSCRFDFD::Detect", "Engine input dims mismatch with configured INPUT_H/INPUT_W", __FILE__, __LINE__ ); return {}; } } // Compute scale and padding (fully local math — no lock needed) const float w_r = static_cast(net_w) / imgWidth; const float h_r = static_cast(net_h) / imgHeight; const float r = std::min(w_r, h_r); const int new_unpad_w = static_cast(imgWidth * r); const int new_unpad_h = static_cast(imgHeight * r); const int pad_w = net_w - new_unpad_w; // >= 0 const int pad_h = net_h - new_unpad_h; // >= 0 const int dw = pad_w / 2; const int dh = pad_h / 2; SCRFDScaleParams scale_params; scale_params.ratio = r; scale_params.dw = dw; scale_params.dh = dh; scale_params.flag = true; // Phase 2: CUDA preprocessing + inference (mutex released — pool dispatches to idle GPU slot) std::vector> inputs; bool usedNV12 = false; try { // Clear any sticky CUDA error from transient graph-capture failures cudaError_t priorErr = cudaGetLastError(); if (priorErr != cudaSuccess) { this->_logger.LogWarn( "ANSSCRFDFD::Detect", std::string("Cleared prior CUDA error before SCRFD preprocessing: ") + cudaGetErrorString(priorErr), __FILE__, __LINE__); } // Try NV12 fast path first (fused NV12→RGB + center-padded letterbox) const int inferenceGpu = m_trtEngine ? m_trtEngine->getPreferredDeviceIndex() : 0; auto nv12 = m_nv12Helper.tryNV12(input, inferenceGpu, net_w, net_h, NV12PreprocessHelper::scrfdCenterLetterboxLauncher(dw, dh), _logger, "SCRFD"); if (nv12.succeeded) { inputs = {{ std::move(nv12.gpuRGB) }}; usedNV12 = true; } else if (nv12.useBgrFullRes) { // BGR full-res path — preprocess the full-res image instead // (fall through to standard BGR path with nv12.bgrFullResImg) // For simplicity, use the standard BGR path below with the original input } if (!usedNV12) { // CPU center-padded letterbox + BGR->RGB, then upload small image cv::Mat srcImg; if (input.channels() == 1) { cv::cvtColor(input, srcImg, cv::COLOR_GRAY2BGR); } else if (input.channels() == 3) { srcImg = input; } else { this->_logger.LogError("ANSSCRFDFD::Detect", "Unsupported channel count", __FILE__, __LINE__); return {}; } // CPU resize to unpadded size cv::Mat cpuResized; if (srcImg.rows != new_unpad_h || srcImg.cols != new_unpad_w) { cv::resize(srcImg, cpuResized, cv::Size(new_unpad_w, new_unpad_h), 0, 0, cv::INTER_LINEAR); } else { cpuResized = srcImg; } // CPU center-pad to net_w x net_h cv::Mat cpuPadded(net_h, net_w, CV_8UC3, cv::Scalar(0, 0, 0)); cpuResized.copyTo(cpuPadded(cv::Rect(dw, dh, new_unpad_w, new_unpad_h))); // CPU BGR -> RGB cv::Mat cpuRGB; cv::cvtColor(cpuPadded, cpuRGB, cv::COLOR_BGR2RGB); // Upload small padded image to GPU cv::cuda::Stream stream; cv::cuda::GpuMat d_padded; d_padded.upload(cpuRGB, stream); stream.waitForCompletion(); std::vector inputVec; inputVec.emplace_back(std::move(d_padded)); inputs.emplace_back(std::move(inputVec)); } m_nv12Helper.tickInference(); } catch (const std::exception& e) { this->_logger.LogError( "ANSSCRFDFD::Detect", std::string("CUDA preprocessing failed: ") + e.what(), __FILE__, __LINE__ ); return {}; } std::vector>> featureVectors; try { if (!m_trtEngine->runInference(inputs, featureVectors)) { this->_logger.LogFatal("ANSSCRFDFD::Detect", "Inference failed", __FILE__, __LINE__); return {}; } } catch (const std::exception& e) { this->_logger.LogFatal( "ANSSCRFDFD::Detect", std::string("runInference exception: ") + e.what(), __FILE__, __LINE__ ); return {}; } // Phase 3: Postprocessing (brief lock — generate_bboxes_kps uses center_points) std::vector filteredFaceObjects; { std::lock_guard lock(_mutex); try { std::vector proposedFaceObjects; this->generate_bboxes_kps( scale_params, proposedFaceObjects, featureVectors[0], _modelConfig.detectionScoreThreshold, imgHeight, imgWidth ); this->nms_bboxes_kps( proposedFaceObjects, filteredFaceObjects, _modelConfig.modelMNSThreshold, 400 ); } catch (const std::exception& e) { this->_logger.LogError( "ANSSCRFDFD::Detect", std::string("Post-processing failed: ") + e.what(), __FILE__, __LINE__ ); return {}; } } return filteredFaceObjects; } std::vector ANSSCRFDFD::TensorRTInferene(const cv::Mat& inputImage, const std::string& camera_id, bool useDynamicImage) { std::lock_guard lock(_mutex); std::vector output; output.clear(); if (!_licenseValid) { this->_logger.LogError("ANSSCRFDFD::TensorRTInferene", "Invalid license", __FILE__, __LINE__); return output; } if (!_isInitialized) { this->_logger.LogError("ANSSCRFDFD::TensorRTInferene", "Model is not initialized", __FILE__, __LINE__); return output; } try { //0. Resize image if (inputImage.empty()) return output; if ((inputImage.cols < 10) || (inputImage.rows < 10)) return output; bool croppedFace = false; // Check if the image is cropped face image cv::Mat im = inputImage.clone(); int orginalHeight = im.rows; int orginalWidth = im.cols; if ((inputImage.size[0] <= 300) || (inputImage.size[1] <= 300)) croppedFace = true; if (croppedFace) cv::copyMakeBorder(inputImage, im, 200, 200, 200, 200, cv::BORDER_REPLICATE); std::vector activeROIs; if (useDynamicImage) { std::vector movementResults = DetectMovement(im, camera_id); std::vector movementObjects; if ((!movementResults.empty()) && ((movementResults.size() < 12))) { movementObjects.insert(movementObjects.end(), movementResults.begin(), movementResults.end()); if (!_movementObjects.empty())movementObjects.insert(movementObjects.end(), _movementObjects.begin(), _movementObjects.end()); } else { if (!_movementObjects.empty())movementObjects.insert(movementObjects.end(), _movementObjects.begin(), _movementObjects.end()); } activeROIs.clear(); if (!movementObjects.empty()) { std::vector localActiveROIs = GenerateFixedROIs(movementObjects, _modelConfig.inpHeight, _modelConfig.inpWidth, im.cols, im.rows); activeROIs.insert(activeROIs.end(), localActiveROIs.begin(), localActiveROIs.end()); } else { activeROIs.push_back(cv::Rect(0, 0, im.cols, im.rows));// Use the orginal image } if ((activeROIs.size() <= 0) || (activeROIs.empty())) { return output; } UpdateAndFilterDetectionObjects(_movementObjects, 80); } else { activeROIs.push_back(cv::Rect(0, 0, im.cols, im.rows));// Use the orginal image } #ifdef FACEDEBUG cv::Mat draw = im.clone(); for (int i = 0; i < movementObjects.size(); i++) { cv::rectangle(draw, movementObjects[i].box, cv::Scalar(0, 255, 255), 2); // RED for detectedArea } for (int i = 0; i < activeROIs.size(); i++) { cv::rectangle(draw, activeROIs[i], cv::Scalar(0, 0, 255), 2); // RED for detectedArea } #endif for (int j = 0; j < activeROIs.size(); j++) { cv::Rect activeROI = activeROIs[j]; activeROI.x = std::max(0, activeROI.x); activeROI.y = std::max(0, activeROI.y); activeROI.width = std::min(im.cols, activeROI.width); activeROI.height = std::min(im.rows, activeROI.height); cv::Mat frame = im(activeROI).clone(); std::vector filteredFaceObjects = Detect(frame); // 5. Return the detected objects for (int i = 0; i < filteredFaceObjects.size(); i++) { if (filteredFaceObjects[i].confidence > _modelConfig.detectionScoreThreshold) { #ifdef FACEDEBUG cv::Rect faceRect; faceRect.x = filteredFaceObjects[i].box.x + activeROI.x; faceRect.y = filteredFaceObjects[i].box.y + activeROI.y; faceRect.width = filteredFaceObjects[i].box.width; faceRect.height = filteredFaceObjects[i].box.height; cv::rectangle(draw, faceRect, cv::Scalar(225, 255, 0), 2); // RED for detectedArea #endif // Check if the face is valid if (isValidFace(filteredFaceObjects[i].polygon, filteredFaceObjects[i].box), 27) { Object result; // 0. Get the face bounding box int x_min = filteredFaceObjects[i].box.x + activeROI.x; int y_min = filteredFaceObjects[i].box.y + activeROI.y; int x_max = filteredFaceObjects[i].box.width + filteredFaceObjects[i].box.x + activeROI.x; int y_max = filteredFaceObjects[i].box.height + filteredFaceObjects[i].box.y + activeROI.y; #ifdef FACEDEBUG // draw landmarks for (cv::Point2f point : filteredFaceObjects[i].polygon) { cv::circle(draw, cv::Point(point.x + activeROI.x, point.y + activeROI.y), 2, cv::Scalar(0, 255, 0), -1); } #endif if (croppedFace) { x_min = std::max(0, x_min - 200); y_min = std::max(0, y_min - 200); x_max = std::min(orginalWidth, x_max - 200); y_max = std::min(orginalHeight, y_max - 200); } // 1. Calculate the centered coordinates and dimensions int width_half = abs((x_max - x_min) / 2); int height_half = abs((y_max - y_min) / 2); int xc = x_min + width_half; int yc = y_min + height_half; int c = std::max(width_half, height_half); // 2. Calculate the new bounding box coordinates (square with center at xc, yc) int x1_new = std::max(0, xc - c); int y1_new = std::max(0, yc - c); int x2_new = std::min(orginalWidth, xc + c); int y2_new = std::min(orginalHeight, yc + c); // 3. Update the bounding box coordinates result.classId = 0; result.className = "Face"; result.confidence = filteredFaceObjects[i].confidence; result.box.x = x1_new; result.box.y = y1_new; result.box.width = x2_new - x1_new; result.box.height = y2_new - y1_new; //result.polygon = ANSUtilityHelper::RectToNormalizedPolygon(result.box, inputImage.cols, inputImage.rows); result.mask = GetCroppedFaceScale(inputImage, x1_new, y1_new, x2_new, y2_new, 112); result.kps = filteredFaceObjects[i].kps; // landmarks as array of x,y,x,y... result.cameraId = camera_id; if (!result.mask.empty()) { output.push_back(result); if (useDynamicImage) { //// Check if movement object contain results before adding to movement objects result.extraInfo = "0"; // Find if obj already exists in detectionObjects using ContainsIntersectingObject auto it = std::find_if(_movementObjects.begin(), _movementObjects.end(), [&](Object& existingObj) { return ContainsIntersectingObject(_movementObjects, result); }); if (it != _movementObjects.end()) { *it = result; // Replace existing object with the new one } else { // If not found, add the new object to the list _movementObjects.push_back(result); } } } } } } frame.release(); } im.release(); #ifdef FACEDEBUG cv::imshow("Combined Detected Areas", draw);// Debugging: Diplsay the frame with the combined detected areas cv::waitKey(1);// Debugging: Diplsay the frame with the combined detected areas draw.release();// Debugging: Diplsay the frame with the combined detected areas #endif return output; } catch (std::exception& e) { this->_logger.LogFatal("ANSSCRFDFD::TensorRTInferene", e.what(), __FILE__, __LINE__); return output; } } ANSSCRFDFD::~ANSSCRFDFD() { try { Destroy(); } catch (std::exception& e) { this->_logger.LogFatal("ANSSCRFDFD::Destroy", e.what(), __FILE__, __LINE__); } } bool ANSSCRFDFD::Destroy() { try { _isInitialized = false; _licenseValid = false; _modelFilePath.clear(); m_nv12Helper.destroy(); if (m_usingSharedPool) { EnginePoolManager::instance().release(m_poolKey); m_trtEngine.reset(); m_usingSharedPool = false; } else if (m_trtEngine) { m_trtEngine.reset(); } return true; } catch (std::exception& e) { this->_logger.LogFatal("ANSSCRFDFD::Destroy", e.what(), __FILE__, __LINE__); return false; } } // SCRFDFD implementation (private) void ANSSCRFDFD::resize_unscale(const cv::Mat& mat, cv::Mat& mat_rs, int target_height, int target_width, SCRFDScaleParams& scale_params) { if (mat.empty()) return; int img_height = static_cast(mat.rows); int img_width = static_cast(mat.cols); mat_rs = cv::Mat(target_height, target_width, CV_8UC3, cv::Scalar(0, 0, 0)); // scale ratio (new / old) new_shape(h,w) float w_r = (float)target_width / (float)img_width; float h_r = (float)target_height / (float)img_height; float r = std::min(w_r, h_r); // compute padding int new_unpad_w = static_cast((float)img_width * r); // floor int new_unpad_h = static_cast((float)img_height * r); // floor int pad_w = target_width - new_unpad_w; // >=0 int pad_h = target_height - new_unpad_h; // >=0 int dw = pad_w / 2; int dh = pad_h / 2; // resize with unscaling cv::Mat new_unpad_mat; // cv::Mat new_unpad_mat = mat.clone(); // may not need clone. cv::resize(mat, new_unpad_mat, cv::Size(new_unpad_w, new_unpad_h)); new_unpad_mat.copyTo(mat_rs(cv::Rect(dw, dh, new_unpad_w, new_unpad_h))); // record scale params. scale_params.ratio = r; scale_params.dw = dw; scale_params.dh = dh; scale_params.flag = true; } void ANSSCRFDFD::generate_points(const int target_height, const int target_width) { if (center_points_is_update) return; // 8, 16, 32 for (auto stride : feat_stride_fpn) { unsigned int num_grid_w = target_width / stride; unsigned int num_grid_h = target_height / stride; // y for (unsigned int i = 0; i < num_grid_h; ++i) { // x for (unsigned int j = 0; j < num_grid_w; ++j) { // num_anchors, col major for (unsigned int k = 0; k < num_anchors; ++k) { SCRFDPoint point; point.cx = (float)j; point.cy = (float)i; point.stride = (float)stride; center_points[stride].push_back(point); } } } } center_points_is_update = true; } void ANSSCRFDFD::generate_bboxes_kps(const SCRFDScaleParams& scale_params, std::vector& bbox_kps_collection, std::vector>& output_tensors, float score_threshold, float img_height, float img_width) { // score_8,score_16,score_32,bbox_8,bbox_16,bbox_32 std::vector score_8 = output_tensors.at(0); // e.g [1,12800,1] std::vector score_16 = output_tensors.at(1); // e.g [1,3200,1] std::vector score_32 = output_tensors.at(2); // e.g [1,800,1] std::vector bbox_8 = output_tensors.at(3); // e.g [1,12800,4] std::vector bbox_16 = output_tensors.at(4); // e.g [1,3200,4] std::vector bbox_32 = output_tensors.at(5); // e.g [1,800,4] // generate center points. const float input_height = INPUT_H;// static_cast(input_node_dims.at(2)); // e.g 640 const float input_width = INPUT_W;// static_cast(input_node_dims.at(3)); // e.g 640 this->generate_points(input_height, input_width); bbox_kps_collection.clear(); if (use_kps) { std::vector kps_8 = output_tensors.at(6); // e.g [1,12800,10] std::vector kps_16 = output_tensors.at(7); // e.g [1,3200,10] std::vector kps_32 = output_tensors.at(8); // e.g [1,800,10] // level 8 & 16 & 32 with kps this->generate_bboxes_kps_single_stride(scale_params, score_8, bbox_8, kps_8, 8, score_threshold, img_height, img_width, bbox_kps_collection); this->generate_bboxes_kps_single_stride(scale_params, score_16, bbox_16, kps_16, 16, score_threshold, img_height, img_width, bbox_kps_collection); this->generate_bboxes_kps_single_stride(scale_params, score_32, bbox_32, kps_32, 32, score_threshold, img_height, img_width, bbox_kps_collection); } // no kps else { // level 8 & 16 & 32 this->generate_bboxes_single_stride(scale_params, score_8, bbox_8, 8, score_threshold, img_height, img_width, bbox_kps_collection); this->generate_bboxes_single_stride(scale_params, score_16, bbox_16, 16, score_threshold, img_height, img_width, bbox_kps_collection); this->generate_bboxes_single_stride(scale_params, score_32, bbox_32, 32, score_threshold, img_height, img_width, bbox_kps_collection); } } void ANSSCRFDFD::generate_bboxes_single_stride( const SCRFDScaleParams& scale_params, std::vector& score_pred, std::vector& bbox_pred, unsigned int stride, float score_threshold, float img_height, float img_width, std::vector& bbox_kps_collection) { unsigned int nms_pre_ = (stride / 8) * nms_pre; // 1 * 1000,2*1000,... nms_pre_ = nms_pre_ >= nms_pre ? nms_pre_ : nms_pre; const unsigned int num_points = score_pred.size();// stride_dims.at(1); // 12800 const float* score_ptr = score_pred.data(); // [1,12800,1] const float* bbox_ptr = bbox_pred.data(); // [1,12800,4] float ratio = scale_params.ratio; int dw = scale_params.dw; int dh = scale_params.dh; unsigned int count = 0; auto& stride_points = center_points[stride]; for (unsigned int i = 0; i < num_points; ++i) { const float cls_conf = score_ptr[i]; if (cls_conf < score_threshold) continue; // filter auto& point = stride_points.at(i); const float cx = point.cx; // cx const float cy = point.cy; // cy const float s = point.stride; // stride // bbox const float* offsets = bbox_ptr + i * 4; float l = offsets[0]; // left float t = offsets[1]; // top float r = offsets[2]; // right float b = offsets[3]; // bottom Object box_kps; float x1 = ((cx - l) * s - (float)dw) / ratio; // cx - l x1 float y1 = ((cy - t) * s - (float)dh) / ratio; // cy - t y1 float x2 = ((cx + r) * s - (float)dw) / ratio; // cx + r x2 float y2 = ((cy + b) * s - (float)dh) / ratio; // cy + b y2 box_kps.box.x = std::max(0.f, x1); box_kps.box.y = std::max(0.f, y1); box_kps.box.width = std::min(img_width - 1.f, x2 - x1); box_kps.box.height = std::min(img_height - 1.f, y2 - y1); box_kps.confidence = cls_conf; box_kps.classId = 0; box_kps.className = "face"; bbox_kps_collection.push_back(box_kps); count += 1; // limit boxes for nms. if (count > max_nms) break; } if (bbox_kps_collection.size() > nms_pre_) { std::sort( bbox_kps_collection.begin(), bbox_kps_collection.end(), [](const Object& a, const Object& b) { return a.confidence > b.confidence; } ); // sort inplace // trunc bbox_kps_collection.resize(nms_pre_); } } void ANSSCRFDFD::generate_bboxes_kps_single_stride( const SCRFDScaleParams& scale_params, std::vector& score_pred, std::vector& bbox_pred, std::vector& kps_pred, unsigned int stride, float score_threshold, float img_height, float img_width, std::vector& bbox_kps_collection) { unsigned int nms_pre_ = (stride / 8) * nms_pre; // 1 * 1000,2*1000,... nms_pre_ = nms_pre_ >= nms_pre ? nms_pre_ : nms_pre; const unsigned int num_points = score_pred.size(); // 12800 const float* score_ptr = score_pred.data(); // [1,12800,1] const float* bbox_ptr = bbox_pred.data(); // [1,12800,4] const float* kps_ptr = kps_pred.data(); // [1,12800,10] float ratio = scale_params.ratio; int dw = scale_params.dw; int dh = scale_params.dh; unsigned int count = 0; auto& stride_points = center_points[stride]; for (unsigned int i = 0; i < num_points; ++i) { const float cls_conf = score_ptr[i]; if (cls_conf < score_threshold) continue; // filter auto& point = stride_points.at(i); const float cx = point.cx; // cx const float cy = point.cy; // cy const float s = point.stride; // stride // bbox const float* offsets = bbox_ptr + i * 4; float l = offsets[0]; // left float t = offsets[1]; // top float r = offsets[2]; // right float b = offsets[3]; // bottom Object box_kps; float x1 = ((cx - l) * s - (float)dw) / ratio; // cx - l x1 float y1 = ((cy - t) * s - (float)dh) / ratio; // cy - t y1 float x2 = ((cx + r) * s - (float)dw) / ratio; // cx + r x2 float y2 = ((cy + b) * s - (float)dh) / ratio; // cy + b y2 box_kps.box.x = (int)std::max(0.f, x1); box_kps.box.y = (int)std::max(0.f, y1); box_kps.box.width = (int)std::min(img_width - 1.f, x2 - x1); box_kps.box.height = (int)std::min(img_height - 1.f, y2 - y1); box_kps.confidence = cls_conf; box_kps.classId = 0; box_kps.className = "face"; // landmarks const float* kps_offsets = kps_ptr + i * 10; for (unsigned int j = 0; j < 10; j += 2) { cv::Point2f kps; float kps_l = kps_offsets[j]; float kps_t = kps_offsets[j + 1]; float kps_x = ((cx + kps_l) * s - (float)dw) / ratio; // cx + l x float kps_y = ((cy + kps_t) * s - (float)dh) / ratio; // cy + t y kps.x = std::min(std::max(0.f, kps_x), img_width - 1.f); kps.y = std::min(std::max(0.f, kps_y), img_height - 1.f); box_kps.kps.push_back(kps.x); box_kps.kps.push_back(kps.y); box_kps.polygon.push_back(kps); // landmarks as polygon } bbox_kps_collection.push_back(box_kps); count += 1; // limit boxes for nms. if (count > max_nms) break; } if (bbox_kps_collection.size() > nms_pre_) { std::sort( bbox_kps_collection.begin(), bbox_kps_collection.end(), [](const Object& a, const Object& b) { return a.confidence > b.confidence; } ); // sort inplace // trunc bbox_kps_collection.resize(nms_pre_); } } float ANSSCRFDFD::getIouOfObjects(const Object& a, const Object& b) { // Retrieve the bounding boxes const cv::Rect& boxA = a.box; const cv::Rect& boxB = b.box; // Compute the coordinates of the intersection rectangle int inner_x1 = std::max(boxA.x, boxB.x); int inner_y1 = std::max(boxA.y, boxB.y); int inner_x2 = std::min(boxA.x + boxA.width, boxB.x + boxB.width); int inner_y2 = std::min(boxA.y + boxA.height, boxB.y + boxB.height); // Calculate width and height of the intersection int inner_w = inner_x2 - inner_x1; int inner_h = inner_y2 - inner_y1; // If there's no overlap, return 0 if (inner_w <= 0 || inner_h <= 0) { return 0.0f; } // Calculate the area of the intersection float inner_area = static_cast(inner_w * inner_h); // Calculate the areas of the two boxes float areaA = static_cast(boxA.width * boxA.height); float areaB = static_cast(boxB.width * boxB.height); // Calculate the union area float union_area = areaA + areaB - inner_area; // Avoid division by zero and return IoU if (union_area <= 0.0f) { return 0.0f; } return inner_area / union_area; } void ANSSCRFDFD::nms_bboxes_kps(std::vector& input, std::vector& output, float iou_threshold, unsigned int topk) { if (input.empty()) return; std::sort( input.begin(), input.end(), [](const Object& a, const Object& b) { return a.confidence > b.confidence; } ); const unsigned int box_num = input.size(); std::vector merged(box_num, 0); unsigned int count = 0; for (unsigned int i = 0; i < box_num; ++i) { if (merged[i]) continue; std::vector buf; buf.push_back(input[i]); merged[i] = 1; for (unsigned int j = i + 1; j < box_num; ++j) { if (merged[j]) continue; float iou = getIouOfObjects(input[i], input[j]); //static_cast(input[i].box.iou_of(input[j].box)); if (iou > iou_threshold) { merged[j] = 1; buf.push_back(input[j]); } } output.push_back(buf[0]); // keep top k count += 1; if (count >= topk) break; } } }