#include "ANSOVFaceDetector.h" #include "ANSGpuFrameRegistry.h" #include "NV12PreprocessHelper.h" // tl_currentGpuFrame() #include #include #include #include #include "Utility.h" //#define FNS_DEBUG namespace ANSCENTER { bool ANSOVFD::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) { bool result = ANSFDBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap); labelMap = "Face"; _licenseValid = true; std::vector labels{ labelMap }; if (!_licenseValid) return false; try { _modelConfig = modelConfig; _modelConfig.modelType = ModelType::FACEDETECT; _modelConfig.detectionType = DetectionType::FACEDETECTOR; _modelConfig.inpHeight = 640; _modelConfig.inpWidth = 640; if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; if (_isInitialized) { _face_detector.reset(); // Releases previously allocated memory for face detection _isInitialized = false; // Reset initialization flag } std::string onnxModel = CreateFilePath(_modelFolder, "scrfd.onnx"); _scrfdModelPath = onnxModel; this->_face_detector = std::make_unique(onnxModel); _isInitialized = true; _movementObjects.clear(); _retainDetectedFaces = 0; return true; } catch (const std::exception& e) { this->_logger.LogFatal("ANSOVFD::Initialize", e.what(), __FILE__, __LINE__); return false; } } bool ANSOVFD::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) { try { // We need to get the _modelFolder bool result = ANSFDBase::LoadModel(modelZipFilePath, modelZipPassword); if (!result) return false; std::string onnxModel = CreateFilePath(_modelFolder, "scrfd.onnx"); //this->_face_detector = std::make_unique(onnxModel); _isInitialized = true; return _isInitialized; } catch (std::exception& e) { this->_logger.LogFatal("ANSOVFD::LoadModel", e.what(), __FILE__, __LINE__); return false; } } bool ANSOVFD::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName,std::string className, const std::string& modelFolder, std::string& labelMap) { try { bool result = ANSFDBase::LoadModelFromFolder(licenseKey, modelConfig,modelName, className, modelFolder, labelMap); if (!result) return false; std::string _modelName = modelName; if (_modelName.empty()) { _modelName = "scrfd"; } std::string modelFullName = _modelName +".onnx"; // We need to get the modelfolder from here _modelConfig = modelConfig; _modelConfig.modelType = ModelType::FACEDETECT; _modelConfig.detectionType = DetectionType::FACEDETECTOR; _modelConfig.inpHeight = 640; // Only for Yolo model _modelConfig.inpWidth = 640; if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; if (_isInitialized) { _face_detector.reset(); // Releases previously allocated memory for face detection _isInitialized = false; // Reset initialization flag } _scrfdModelPath = modelFullName; this->_face_detector = std::make_unique(modelFullName); _isInitialized = true; return _isInitialized; } catch (std::exception& e) { this->_logger.LogFatal("ANSOVFD::LoadModel", e.what(), __FILE__, __LINE__); return false; } } bool ANSOVFD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) { if (!FileExist(_modelFilePath)) { optimizedModelFolder = ""; return false; } bool result = ANSFDBase::OptimizeModel(fp16, optimizedModelFolder); return result; } std::vector ANSOVFD::RunInference(const cv::Mat& input, const std::string& camera_id, bool useDynamicImage, bool validateFace, bool facelivenessCheck) { // ── DML device-lost recovery (outside mutex) ────────────── if (_dmlDeviceLost && _face_detector) { // The DML session is broken — recreate on CPU try { auto cpuDetector = std::make_unique( _scrfdModelPath, ANSCENTER::EngineType::CPU); { std::lock_guard guard(_mtx); _face_detector = std::move(cpuDetector); } _logger.LogInfo("ANSOVFD::RunInference", "CPU fallback session created successfully", __FILE__, __LINE__); } catch (const std::exception& re) { _logger.LogFatal("ANSOVFD::RunInference", std::string("CPU fallback exception: ") + re.what(), __FILE__, __LINE__); std::lock_guard guard(_mtx); _face_detector.reset(); _isInitialized = false; return {}; } _dmlDeviceLost = false; // Recovery complete } if (facelivenessCheck) { std::vector rawFaceResults = Inference(input, camera_id, useDynamicImage, validateFace); std::vector facesWithLivenessResults = ValidateLivenessFaces(input, rawFaceResults, camera_id); return facesWithLivenessResults; } else { return Inference(input, camera_id, useDynamicImage, validateFace); } } std::vector ANSOVFD::RunInference(const cv::Mat& input, bool useDynamicImage, bool validateFace, bool facelivenessCheck) { // ── DML device-lost recovery (outside mutex) ────────────── if (_dmlDeviceLost && _face_detector) { try { auto cpuDetector = std::make_unique( _scrfdModelPath, ANSCENTER::EngineType::CPU); { std::lock_guard guard(_mtx); _face_detector = std::move(cpuDetector); } _logger.LogInfo("ANSOVFD::RunInference", "CPU fallback session created successfully", __FILE__, __LINE__); } catch (const std::exception& re) { _logger.LogFatal("ANSOVFD::RunInference", std::string("CPU fallback exception: ") + re.what(), __FILE__, __LINE__); std::lock_guard guard(_mtx); _face_detector.reset(); _isInitialized = false; return {}; } _dmlDeviceLost = false; } if (facelivenessCheck) { std::vector rawFaceResults = Inference(input, "CustomCam", useDynamicImage, validateFace); std::vector facesWithLivenessResults = ValidateLivenessFaces(input, rawFaceResults, "CustomCam"); return facesWithLivenessResults; } else { return Inference(input, "CustomCam", useDynamicImage, validateFace); } } std::vector ANSOVFD::Inference(const cv::Mat& input, const std::string& camera_id, bool useDynamicImage, bool validateFace) { std::lock_guard guard(_mtx); try { // Step 1: Validate license and initialization if (!_licenseValid) { _logger.LogError("ANSOVFD::RunInference", "Invalid license", __FILE__, __LINE__); return {}; } if (!_isInitialized || !_face_detector) { _logger.LogError("ANSOVFD::RunInference", "Model or face detector not initialized", __FILE__, __LINE__); return {}; } // Step 2: Validate input image if (input.empty() || input.cols < 5 || input.rows < 5) { _logger.LogError("ANSOVFD::RunInference", "Invalid input image", __FILE__, __LINE__); return {}; } // Step 3: Convert to BGR if needed cv::Mat processedImage; if (input.channels() == 1) { cv::cvtColor(input, processedImage, cv::COLOR_GRAY2BGR); } else if (input.channels() == 3) { processedImage = input; // Shallow copy - safe } else { _logger.LogError("ANSOVFD::RunInference", "Unsupported number of image channels", __FILE__, __LINE__); return {}; } // Step 4: Prepare image for detection const bool croppedFace = !useDynamicImage; const int originalWidth = input.cols; const int originalHeight = input.rows; cv::Mat im; int offsetX = 0, offsetY = 0; // Track padding offset if (croppedFace) { // Add padding cv::copyMakeBorder(processedImage, im, 200, 200, 200, 200, cv::BORDER_REPLICATE); offsetX = 200; offsetY = 200; // Resize if too large if (im.rows > 1280) { const float aspectRatio = static_cast(im.cols) / im.rows; const int newHeight = 1280; const int newWidth = static_cast(newHeight * aspectRatio); // Update offsets for resize const float scale = static_cast(newHeight) / im.rows; offsetX = static_cast(offsetX * scale); offsetY = static_cast(offsetY * scale); cv::resize(im, im, cv::Size(newWidth, newHeight)); } } else { im = processedImage; // Shallow copy } // Step 5: Run face detection std::vector detectionResults; _face_detector->detect(im, detectionResults); // Step 6: Process detections std::vector output; output.reserve(detectionResults.size()); // Pre-allocate // Try to get full-res image from registry for higher quality face alignment. // On Intel-only systems (no NVIDIA GPU), NV12 may be in CPU memory from // D3D11VA decode. Convert to BGR once and use for all face alignments. cv::Mat fullResImg; float landmarkScaleX = 1.f, landmarkScaleY = 1.f; if (!croppedFace && !detectionResults.empty()) { auto* gpuData = tl_currentGpuFrame(); if (gpuData && gpuData->width > im.cols && gpuData->height > im.rows) { if (gpuData->pixelFormat == 1000 && gpuData->yPlane) { // BGR full-res from ANSVideoPlayer/ANSFilePlayer fullResImg = cv::Mat(gpuData->height, gpuData->width, CV_8UC3, gpuData->yPlane, gpuData->yLinesize).clone(); landmarkScaleX = static_cast(gpuData->width) / im.cols; landmarkScaleY = static_cast(gpuData->height) / im.rows; } else if (gpuData->pixelFormat == 23 && !gpuData->isCudaDevicePtr && gpuData->yPlane && gpuData->uvPlane && (gpuData->width % 2) == 0 && (gpuData->height % 2) == 0) { // CPU NV12 — convert to BGR for face alignment try { cv::Mat yPlane(gpuData->height, gpuData->width, CV_8UC1, gpuData->yPlane, gpuData->yLinesize); cv::Mat uvPlane(gpuData->height / 2, gpuData->width / 2, CV_8UC2, gpuData->uvPlane, gpuData->uvLinesize); cv::cvtColorTwoPlane(yPlane, uvPlane, fullResImg, cv::COLOR_YUV2BGR_NV12); landmarkScaleX = static_cast(gpuData->width) / im.cols; landmarkScaleY = static_cast(gpuData->height) / im.rows; } catch (...) { /* NV12 conversion failed — skip full-res */ } } else if (gpuData->pixelFormat == 23 && gpuData->isCudaDevicePtr && gpuData->cpuYPlane && gpuData->cpuUvPlane && (gpuData->width % 2) == 0 && (gpuData->height % 2) == 0) { // CUDA NV12 with CPU fallback planes try { cv::Mat yPlane(gpuData->height, gpuData->width, CV_8UC1, gpuData->cpuYPlane, gpuData->cpuYLinesize); cv::Mat uvPlane(gpuData->height / 2, gpuData->width / 2, CV_8UC2, gpuData->cpuUvPlane, gpuData->cpuUvLinesize); cv::cvtColorTwoPlane(yPlane, uvPlane, fullResImg, cv::COLOR_YUV2BGR_NV12); landmarkScaleX = static_cast(gpuData->width) / im.cols; landmarkScaleY = static_cast(gpuData->height) / im.rows; } catch (...) { /* NV12 conversion failed — skip full-res */ } } } } for (const auto& obj : detectionResults) { // Check confidence threshold const float confidence = obj.box.score; if (confidence < _modelConfig.detectionScoreThreshold) { continue; } // Validate face landmarks if (validateFace && !isValidFace(obj.landmarks.points,obj.box.rect(), 27)) { continue; } Object result; result.classId = 0; result.className = "Face"; result.confidence = confidence; result.cameraId = camera_id; // Get bounding box cv::Rect bbox = obj.box.rect(); // Process face for mask — use full-res image when available // for higher quality 112x112 face alignment cv::Mat alignImage; std::vector face_landmark_5 = obj.landmarks.points; if (!fullResImg.empty()) { // Scale landmarks from display-res to full-res for (auto& pt : face_landmark_5) { pt.x *= landmarkScaleX; pt.y *= landmarkScaleY; } alignImage = fullResImg; } else { alignImage = im; } result.mask = Preprocess(alignImage, face_landmark_5, alignImage); if (result.mask.empty()) { _logger.LogError("ANSOVFD::RunInference", "Cannot get mask image", __FILE__, __LINE__); continue; } // Adjust coordinates back to original image space if (croppedFace) { // Remove padding offset bbox.x = std::max(0, bbox.x - offsetX); bbox.y = std::max(0, bbox.y - offsetY); // Clamp to original image bounds bbox.x = std::min(bbox.x, originalWidth); bbox.y = std::min(bbox.y, originalHeight); bbox.width = std::min(bbox.width, originalWidth - bbox.x); bbox.height = std::min(bbox.height, originalHeight - bbox.y); result.box = bbox; // Adjust landmarks result.kps.reserve(obj.landmarks.points.size() * 2); for (const auto& pt : obj.landmarks.points) { result.kps.push_back(std::max(0.0f, pt.x - offsetX)); result.kps.push_back(std::max(0.0f, pt.y - offsetY)); } } else { result.box = bbox; // Copy landmarks directly result.kps.reserve(obj.landmarks.points.size() * 2); for (const auto& pt : obj.landmarks.points) { result.kps.push_back(pt.x); result.kps.push_back(pt.y); } } output.push_back(std::move(result)); } return output; } catch (const std::exception& e) { const std::string msg = e.what(); // DML device-removal detection (see ANSONNXYOLO.cpp for details) if (msg.find("887A0005") != std::string::npos) { if (!_dmlDeviceLost) { _dmlDeviceLost = true; _logger.LogFatal("ANSOVFD::RunInference", "DirectML GPU device lost (887A0005) — will attempt CPU fallback on next call", __FILE__, __LINE__); } return {}; } _logger.LogFatal("ANSOVFD::RunInference", "Exception: " + msg, __FILE__, __LINE__); return {}; } catch (...) { _logger.LogFatal("ANSOVFD::RunInference", "Unknown fatal exception", __FILE__, __LINE__); return {}; } } ANSOVFD::~ANSOVFD() { try { Destroy(); } catch (std::exception& e) { this->_logger.LogFatal("ANSOVFD::Destroy", e.what(), __FILE__, __LINE__); } } bool ANSOVFD::Destroy() { try { #ifdef USEOVFACEDETECTOR _faceLandmark.reset(); // Releases previously allocated memory for face landmark #endif _face_detector.reset(); // Releases previously allocated memory for face detection _isInitialized = false; // Reset initialization flag if (FolderExist(_modelFolder)) { if (!DeleteFolder(_modelFolder)) { this->_logger.LogError("ANSOVFD::Destroy", "Failed to release ANSOVFD Models", __FILE__, __LINE__); } } return true; } catch (std::exception& e) { this->_logger.LogFatal("ANSOVFD::Destroy", e.what(), __FILE__, __LINE__); return false; } } #ifdef USEOVFACEDETECTOR std::shared_ptr ANSOVFD::read(const ov::Core& core, std::string pathToModel) { std::shared_ptr model = core.read_model(pathToModel); model_input_height = model->input().get_shape()[3]; model_input_width = model->input().get_shape()[2]; ov::OutputVector outputs = model->outputs(); if (outputs.size() == 1) { output = outputs.front().get_any_name(); const auto& outShape = outputs.front().get_shape(); if (outShape.size() != 4) { this->_logger.LogFatal("ANSOVFD::read.", "Face Detection model output should have 4 dimentions", __FILE__, __LINE__); return model; } objectSize = outputs.front().get_shape()[3]; if (objectSize != 7) { this->_logger.LogFatal("ANSOVFD::read. ", "Face Detection model output layer should have 7 as a last dimension", __FILE__, __LINE__); return model; } if (outShape[2] != ndetections) { this->_logger.LogFatal("ANSOVFD::read. ", "Face Detection model output must contain 200 detections", __FILE__, __LINE__); return model; } } else { for (const auto& out : outputs) { const auto& outShape = out.get_shape(); if (outShape.size() == 2 && outShape.back() == 5) { output = out.get_any_name(); if (outShape[0] != ndetections) { this->_logger.LogFatal("ANSOVFD::read. ", "Face Detection model output must contain 200 detections", __FILE__, __LINE__); return model; } objectSize = outShape.back(); } else if (outShape.size() == 1 && out.get_element_type() == ov::element::i32) { labels_output = out.get_any_name(); } } if (output.empty() || labels_output.empty()) { this->_logger.LogFatal("ANSOVFD::read. ", "Face Detection model must contain either single DetectionOutput", __FILE__, __LINE__); return model; } } ov::preprocess::PrePostProcessor ppp(model); ppp.input().tensor(). set_element_type(ov::element::u8). set_layout("NHWC"); ppp.input().preprocess().convert_layout("NCHW"); ppp.output(output).tensor().set_element_type(ov::element::f32); model = ppp.build(); ov::set_batch(model, 1); inShape = model->input().get_shape(); return model; } bool ANSOVFD::InitFaceDetector() { if (_isInitialized) { _faceLandmark.reset(); // Releases previously allocated memory for face landmark _isInitialized = false; // Reset initialization flag } // Check if the Yolo model is available std::string yoloModel = CreateFilePath(_modelFolder, "ansylfd.xml"); if (std::filesystem::exists(yoloModel)) { _modelFilePath = yoloModel; _useYoloFaceDetector = true; } else { _useYoloFaceDetector = false; std::string xmlfile = CreateFilePath(_modelFolder, "ansovfd.xml"); if (std::filesystem::exists(xmlfile)) { _modelFilePath = xmlfile; } else { this->_logger.LogError("ANSOVFD::Initialize. Face detector model file is not exist", _modelFilePath, __FILE__, __LINE__); return false; } } std::string deviceName = GetOpenVINODevice(); std::string landmarkModel = CreateFilePath(_modelFolder, "landmarks.xml"); if (std::filesystem::exists(landmarkModel)) { _landmarkModelFilePath = landmarkModel; this->_logger.LogDebug("ANSOVFD::Initialize. Loading landmarks weight", _landmarkModelFilePath, __FILE__, __LINE__); ov::Core lmcore; CnnConfig landmarks_config(_landmarkModelFilePath, "Facial Landmarks Regression"); if (deviceName == "NPU") { lmcore.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); lmcore.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); landmarks_config.m_deviceName = "AUTO:NPU,GPU"; } else { //lmcore.set_property(deviceName, ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); landmarks_config.m_deviceName = deviceName; } landmarks_config.m_max_batch_size = 1; landmarks_config.m_core = lmcore; this->_faceLandmark = std::make_unique(landmarks_config); if (!_faceLandmark) { this->_logger.LogFatal("ANSOVFD::Initialize", "Failed to initialize face recognizer model", __FILE__, __LINE__); } } else { this->_logger.LogError("ANSOVFD::LoadModel. Model landmarks.xml file is not exist", _landmarkModelFilePath, __FILE__, __LINE__); } if (_useYoloFaceDetector) InitialYoloModel(deviceName); else InitialSSDModel(deviceName); _isInitialized = true; return _isInitialized; } void ANSOVFD::InitialSSDModel(const std::string& deviceName) { try { bb_enlarge_coefficient = 1.0; bb_dx_coefficient = 1; bb_dy_coefficient = 1; ov::Core core; if (deviceName == "NPU") { core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); ov::CompiledModel cml = core.compile_model(read(core, _modelFilePath), "AUTO:NPU,GPU"); request = cml.create_infer_request(); inTensor = request.get_input_tensor(); inTensor.set_shape(inShape); } else { //ore.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); //core.set_property("CPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); ov::CompiledModel cml = core.compile_model(read(core, _modelFilePath), deviceName); request = cml.create_infer_request(); inTensor = request.get_input_tensor(); inTensor.set_shape(inShape); } } catch (const std::exception& e) { this->_logger.LogFatal("ANSOVFD::InitialSSDModel", e.what(), __FILE__, __LINE__); } } void ANSOVFD::InitialYoloModel(const std::string& deviceName) { try { ov::Core core; if (deviceName == "NPU") { core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); compiled_model_ = core.compile_model(_modelFilePath, "AUTO:NPU,GPU"); } else { // Configure and compile for individual device //core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); //core.set_property("CPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); compiled_model_ = core.compile_model(_modelFilePath, deviceName); } request = compiled_model_.create_infer_request(); } catch (const std::exception& e) { this->_logger.LogFatal("OPENVINOOD::InitialModel", e.what(), __FILE__, __LINE__); } } std::vector ANSOVFD::runOVInference(const cv::Mat& frame, const std::string& cameraId) { std::vector detectionOutputs; if (frame.empty()) { this->_logger.LogError("ANSOVFD::runOVInference", "Input image is empty", __FILE__, __LINE__); return detectionOutputs; } if ((frame.cols < 10) || (frame.rows < 10)) return detectionOutputs; try { width = static_cast(frame.cols); height = static_cast(frame.rows); resize2tensor(frame, inTensor); request.infer(); float* detections = request.get_tensor(output).data(); if (!labels_output.empty()) { const int32_t* labels = request.get_tensor(labels_output).data(); for (size_t i = 0; i < ndetections; i++) { Object r; r.className = labels[i]; r.confidence = detections[i * objectSize + 4]; if (r.confidence <= _modelConfig.detectionScoreThreshold) { continue; } r.box.x = static_cast(detections[i * objectSize] / model_input_width * width); r.box.y = static_cast(detections[i * objectSize + 1] / model_input_height * height); r.box.width = static_cast(detections[i * objectSize + 2] / model_input_width * width - r.box.x); r.box.height = static_cast(detections[i * objectSize + 3] / model_input_height * height - r.box.y); // Make square and enlarge face bounding box for more robust operation of face analytics networks int bb_width = r.box.width; int bb_height = r.box.height; int bb_center_x = r.box.x + bb_width / 2; int bb_center_y = r.box.y + bb_height / 2; int max_of_sizes = std::max(bb_width, bb_height); int bb_new_width = static_cast(bb_enlarge_coefficient * max_of_sizes); int bb_new_height = static_cast(bb_enlarge_coefficient * max_of_sizes); r.box.x = bb_center_x - static_cast(std::floor(bb_dx_coefficient * bb_new_width / 2)); r.box.y = bb_center_y - static_cast(std::floor(bb_dy_coefficient * bb_new_height / 2)); r.box.width = bb_new_width; r.box.height = bb_new_height; r.box.y = std::max(0, r.box.y); r.box.x = std::max(0, r.box.x); r.box.width = std::min(r.box.width, frame.cols - r.box.x); r.box.height = std::min(r.box.height, frame.rows - r.box.y); r.cameraId = cameraId; if (r.confidence > _modelConfig.detectionScoreThreshold) { detectionOutputs.push_back(r); } } } for (size_t i = 0; i < ndetections; i++) { float image_id = detections[i * objectSize]; if (image_id < 0) { break; } Object r; r.className = static_cast(detections[i * objectSize + 1]); r.confidence = detections[i * objectSize + 2]; if (r.confidence <= _modelConfig.detectionScoreThreshold) { continue; } r.box.x = static_cast(detections[i * objectSize + 3] * width); r.box.y = static_cast(detections[i * objectSize + 4] * height); r.box.width = static_cast(detections[i * objectSize + 5] * width - r.box.x); r.box.height = static_cast(detections[i * objectSize + 6] * height - r.box.y); // Make square and enlarge face bounding box for more robust operation of face analytics networks int bb_width = r.box.width; int bb_height = r.box.height; int bb_center_x = r.box.x + bb_width / 2; int bb_center_y = r.box.y + bb_height / 2; int max_of_sizes = std::max(bb_width, bb_height); int bb_new_width = static_cast(bb_enlarge_coefficient * max_of_sizes); int bb_new_height = static_cast(bb_enlarge_coefficient * max_of_sizes); r.box.x = bb_center_x - static_cast(std::floor(bb_dx_coefficient * bb_new_width / 2)); r.box.y = bb_center_y - static_cast(std::floor(bb_dy_coefficient * bb_new_height / 2)); r.box.width = bb_new_width; r.box.height = bb_new_height; r.box.y = std::max(0, r.box.y); r.box.x = std::max(0, r.box.x); r.box.width = std::min(r.box.width, frame.cols - r.box.x); r.box.height = std::min(r.box.height, frame.rows - r.box.y); r.cameraId = cameraId; if (r.confidence > _modelConfig.detectionScoreThreshold) { detectionOutputs.push_back(r); } } return detectionOutputs; } catch (std::exception& e) { this->_logger.LogFatal("ANSOVFD::runOVInference", e.what(), __FILE__, __LINE__); return detectionOutputs; } } cv::Mat ANSOVFD::PreProcessing(const cv::Mat& source) { try { int col = source.cols; int row = source.rows; int _max = MAX(col, row); cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3); if ((col > 0) && (row > 0))source.copyTo(result(cv::Rect(0, 0, col, row))); return result; } catch (std::exception& e) { this->_logger.LogFatal("ANSOVFD::PreProcessing", e.what(), __FILE__, __LINE__); return source; } } std::vector ANSOVFD::runYoloInference(const cv::Mat& input, const std::string& cameraId) { std::vector outputs; outputs.clear(); try { if (input.empty()) { this->_logger.LogError("ANSOVFD::RunInference", "Input image is empty", __FILE__, __LINE__); return outputs; } if ((input.cols < 10) || (input.rows < 10)) return outputs; // Step 0: Prepare input cv::Mat img = input.clone(); cv::Mat letterbox_img = PreProcessing(img); float scale = letterbox_img.size[0] / 640.0; cv::Mat blob = cv::dnn::blobFromImage(letterbox_img, 1.0 / 255.0, cv::Size(640, 640), cv::Scalar(), true); // Step 1: Feed blob to the network // Get input port for model with one input auto input_port = compiled_model_.input(); // Create tensor from external memory ov::Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), blob.ptr(0)); // Set input tensor for model with one input request.set_input_tensor(input_tensor); // Step 3. Start inference request.infer(); // Step 4. Get output auto output = request.get_output_tensor(0); auto output_shape = output.get_shape(); int rows = output_shape[2]; //8400 int dimensions = output_shape[1]; //84: box[cx, cy, w, h]+80 classes scores // Step 5. Post-processing float* data = output.data(); cv::Mat output_buffer(output_shape[1], output_shape[2], CV_32F, data); transpose(output_buffer, output_buffer); //[8400,84] std::vector class_ids; std::vector class_scores; std::vector boxes; // Figure out the bbox, class_id and class_score for (int i = 0; i < output_buffer.rows; i++) { cv::Mat classes_scores = output_buffer.row(i).colRange(4, output_shape[1]); cv::Point class_id; double maxClassScore; minMaxLoc(classes_scores, 0, &maxClassScore, 0, &class_id); if (maxClassScore > _modelConfig.detectionScoreThreshold) { class_scores.push_back(maxClassScore); class_ids.push_back(class_id.x); float cx = output_buffer.at(i, 0); float cy = output_buffer.at(i, 1); float w = output_buffer.at(i, 2); float h = output_buffer.at(i, 3); int left = int((cx - 0.5 * w) * scale); int top = int((cy - 0.5 * h) * scale); int width = int(w * scale); int height = int(h * scale); left = std::max(0, left); top = std::max(0, top); width = std::min(width, img.cols - left); height = std::min(height, img.rows - top); boxes.push_back(cv::Rect(left, top, width, height)); } } //NMS std::vector indices; cv::dnn::NMSBoxes(boxes, class_scores, _modelConfig.modelConfThreshold, _modelConfig.modelMNSThreshold, indices); for (int i = 0; i < indices.size(); i++) { Object result; int id = indices[i]; if (class_scores[id] > _modelConfig.detectionScoreThreshold) { int classId = class_ids[id]; result.classId = classId; result.className = "Face"; result.confidence = class_scores[id]; result.box = boxes[id]; outputs.push_back(result); } } img.release(); letterbox_img.release(); return outputs; } catch (std::exception& e) { this->_logger.LogFatal("OPENVINOOD::runYoloInference", e.what(), __FILE__, __LINE__); return outputs; } } #endif }