741 lines
34 KiB
C++
741 lines
34 KiB
C++
#include "ANSOVFaceDetector.h"
|
|
#include "ANSGpuFrameRegistry.h"
|
|
#include "NV12PreprocessHelper.h" // tl_currentGpuFrame()
|
|
#include <models/detection_model_ssd.h>
|
|
#include <pipelines/metadata.h>
|
|
#include <models/input_data.h>
|
|
#include <models/results.h>
|
|
#include "Utility.h"
|
|
//#define FNS_DEBUG
|
|
|
|
namespace ANSCENTER {
|
|
bool ANSOVFD::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) {
|
|
bool result = ANSFDBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
|
|
labelMap = "Face";
|
|
_licenseValid = true;
|
|
std::vector<std::string> labels{ labelMap };
|
|
if (!_licenseValid) return false;
|
|
try {
|
|
_modelConfig = modelConfig;
|
|
_modelConfig.modelType = ModelType::FACEDETECT;
|
|
_modelConfig.detectionType = DetectionType::FACEDETECTOR;
|
|
_modelConfig.inpHeight = 640;
|
|
_modelConfig.inpWidth = 640;
|
|
if (_modelConfig.modelMNSThreshold < 0.2)
|
|
_modelConfig.modelMNSThreshold = 0.5;
|
|
if (_modelConfig.modelConfThreshold < 0.2)
|
|
_modelConfig.modelConfThreshold = 0.5;
|
|
if (_isInitialized) {
|
|
_face_detector.reset(); // Releases previously allocated memory for face detection
|
|
_isInitialized = false; // Reset initialization flag
|
|
}
|
|
std::string onnxModel = CreateFilePath(_modelFolder, "scrfd.onnx");
|
|
this->_face_detector = std::make_unique<SCRFD>(onnxModel);
|
|
_isInitialized = true;
|
|
_movementObjects.clear();
|
|
_retainDetectedFaces = 0;
|
|
return true;
|
|
}
|
|
catch (const std::exception& e) {
|
|
this->_logger.LogFatal("ANSOVFD::Initialize", e.what(), __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
bool ANSOVFD::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) {
|
|
try {
|
|
// We need to get the _modelFolder
|
|
bool result = ANSFDBase::LoadModel(modelZipFilePath, modelZipPassword);
|
|
if (!result) return false;
|
|
std::string onnxModel = CreateFilePath(_modelFolder, "scrfd.onnx");
|
|
//this->_face_detector = std::make_unique<SCRFD>(onnxModel);
|
|
_isInitialized = true;
|
|
return _isInitialized;
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("ANSOVFD::LoadModel", e.what(), __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
|
|
}
|
|
bool ANSOVFD::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName,std::string className, const std::string& modelFolder, std::string& labelMap) {
|
|
try {
|
|
bool result = ANSFDBase::LoadModelFromFolder(licenseKey, modelConfig,modelName, className, modelFolder, labelMap);
|
|
if (!result) return false;
|
|
std::string _modelName = modelName;
|
|
if (_modelName.empty()) {
|
|
_modelName = "scrfd";
|
|
}
|
|
std::string modelFullName = _modelName +".onnx";
|
|
// We need to get the modelfolder from here
|
|
_modelConfig = modelConfig;
|
|
_modelConfig.modelType = ModelType::FACEDETECT;
|
|
_modelConfig.detectionType = DetectionType::FACEDETECTOR;
|
|
_modelConfig.inpHeight = 640; // Only for Yolo model
|
|
_modelConfig.inpWidth = 640;
|
|
if (_modelConfig.modelMNSThreshold < 0.2)
|
|
_modelConfig.modelMNSThreshold = 0.5;
|
|
if (_modelConfig.modelConfThreshold < 0.2)
|
|
_modelConfig.modelConfThreshold = 0.5;
|
|
if (_isInitialized) {
|
|
_face_detector.reset(); // Releases previously allocated memory for face detection
|
|
_isInitialized = false; // Reset initialization flag
|
|
}
|
|
this->_face_detector = std::make_unique<SCRFD>(modelFullName);
|
|
_isInitialized = true;
|
|
return _isInitialized;
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("ANSOVFD::LoadModel", e.what(), __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
bool ANSOVFD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
|
|
if (!FileExist(_modelFilePath)) {
|
|
optimizedModelFolder = "";
|
|
return false;
|
|
}
|
|
bool result = ANSFDBase::OptimizeModel(fp16, optimizedModelFolder);
|
|
return result;
|
|
}
|
|
std::vector<Object> ANSOVFD::RunInference(const cv::Mat& input, const std::string& camera_id, bool useDynamicImage, bool validateFace, bool facelivenessCheck) {
|
|
if (facelivenessCheck) {
|
|
std::vector<Object> rawFaceResults = Inference(input, camera_id, useDynamicImage, validateFace);
|
|
std::vector<Object> facesWithLivenessResults = ValidateLivenessFaces(input, rawFaceResults, camera_id);
|
|
return facesWithLivenessResults;
|
|
}
|
|
else {
|
|
return Inference(input, camera_id, useDynamicImage, validateFace);
|
|
}
|
|
}
|
|
std::vector<Object> ANSOVFD::RunInference(const cv::Mat& input, bool useDynamicImage, bool validateFace, bool facelivenessCheck) {
|
|
if (facelivenessCheck) {
|
|
std::vector<Object> rawFaceResults = Inference(input, "CustomCam", useDynamicImage, validateFace);
|
|
std::vector<Object> facesWithLivenessResults = ValidateLivenessFaces(input, rawFaceResults, "CustomCam");
|
|
return facesWithLivenessResults;
|
|
}
|
|
else {
|
|
return Inference(input, "CustomCam", useDynamicImage, validateFace);
|
|
}
|
|
}
|
|
|
|
std::vector<Object> ANSOVFD::Inference(const cv::Mat& input,
|
|
const std::string& camera_id,
|
|
bool useDynamicImage,
|
|
bool validateFace)
|
|
{
|
|
std::lock_guard<std::mutex> guard(_mtx);
|
|
|
|
try {
|
|
// Step 1: Validate license and initialization
|
|
if (!_licenseValid) {
|
|
_logger.LogError("ANSOVFD::RunInference", "Invalid license",
|
|
__FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
|
|
if (!_isInitialized || !_face_detector) {
|
|
_logger.LogError("ANSOVFD::RunInference",
|
|
"Model or face detector not initialized",
|
|
__FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
|
|
// Step 2: Validate input image
|
|
if (input.empty() || input.cols < 5 || input.rows < 5) {
|
|
_logger.LogError("ANSOVFD::RunInference", "Invalid input image",
|
|
__FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
|
|
// Step 3: Convert to BGR if needed
|
|
cv::Mat processedImage;
|
|
if (input.channels() == 1) {
|
|
cv::cvtColor(input, processedImage, cv::COLOR_GRAY2BGR);
|
|
}
|
|
else if (input.channels() == 3) {
|
|
processedImage = input; // Shallow copy - safe
|
|
}
|
|
else {
|
|
_logger.LogError("ANSOVFD::RunInference",
|
|
"Unsupported number of image channels",
|
|
__FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
|
|
// Step 4: Prepare image for detection
|
|
const bool croppedFace = !useDynamicImage;
|
|
const int originalWidth = input.cols;
|
|
const int originalHeight = input.rows;
|
|
|
|
cv::Mat im;
|
|
int offsetX = 0, offsetY = 0; // Track padding offset
|
|
|
|
if (croppedFace) {
|
|
// Add padding
|
|
cv::copyMakeBorder(processedImage, im, 200, 200, 200, 200,
|
|
cv::BORDER_REPLICATE);
|
|
offsetX = 200;
|
|
offsetY = 200;
|
|
|
|
// Resize if too large
|
|
if (im.rows > 1280) {
|
|
const float aspectRatio = static_cast<float>(im.cols) / im.rows;
|
|
const int newHeight = 1280;
|
|
const int newWidth = static_cast<int>(newHeight * aspectRatio);
|
|
|
|
// Update offsets for resize
|
|
const float scale = static_cast<float>(newHeight) / im.rows;
|
|
offsetX = static_cast<int>(offsetX * scale);
|
|
offsetY = static_cast<int>(offsetY * scale);
|
|
|
|
cv::resize(im, im, cv::Size(newWidth, newHeight));
|
|
}
|
|
}
|
|
else {
|
|
im = processedImage; // Shallow copy
|
|
}
|
|
|
|
// Step 5: Run face detection
|
|
std::vector<ANSCENTER::types::BoxfWithLandmarks> detectionResults;
|
|
_face_detector->detect(im, detectionResults);
|
|
|
|
// Step 6: Process detections
|
|
std::vector<Object> output;
|
|
output.reserve(detectionResults.size()); // Pre-allocate
|
|
|
|
// Try to get full-res image from registry for higher quality face alignment.
|
|
// On Intel-only systems (no NVIDIA GPU), NV12 may be in CPU memory from
|
|
// D3D11VA decode. Convert to BGR once and use for all face alignments.
|
|
cv::Mat fullResImg;
|
|
float landmarkScaleX = 1.f, landmarkScaleY = 1.f;
|
|
if (!croppedFace && !detectionResults.empty()) {
|
|
auto* gpuData = tl_currentGpuFrame();
|
|
if (gpuData && gpuData->width > im.cols && gpuData->height > im.rows) {
|
|
if (gpuData->pixelFormat == 1000 && gpuData->yPlane) {
|
|
// BGR full-res from ANSVideoPlayer/ANSFilePlayer
|
|
fullResImg = cv::Mat(gpuData->height, gpuData->width, CV_8UC3,
|
|
gpuData->yPlane, gpuData->yLinesize).clone();
|
|
landmarkScaleX = static_cast<float>(gpuData->width) / im.cols;
|
|
landmarkScaleY = static_cast<float>(gpuData->height) / im.rows;
|
|
}
|
|
else if (gpuData->pixelFormat == 23 && !gpuData->isCudaDevicePtr &&
|
|
gpuData->yPlane && gpuData->uvPlane &&
|
|
(gpuData->width % 2) == 0 && (gpuData->height % 2) == 0) {
|
|
// CPU NV12 — convert to BGR for face alignment
|
|
try {
|
|
cv::Mat yPlane(gpuData->height, gpuData->width, CV_8UC1,
|
|
gpuData->yPlane, gpuData->yLinesize);
|
|
cv::Mat uvPlane(gpuData->height / 2, gpuData->width / 2, CV_8UC2,
|
|
gpuData->uvPlane, gpuData->uvLinesize);
|
|
cv::cvtColorTwoPlane(yPlane, uvPlane, fullResImg, cv::COLOR_YUV2BGR_NV12);
|
|
landmarkScaleX = static_cast<float>(gpuData->width) / im.cols;
|
|
landmarkScaleY = static_cast<float>(gpuData->height) / im.rows;
|
|
} catch (...) { /* NV12 conversion failed — skip full-res */ }
|
|
}
|
|
else if (gpuData->pixelFormat == 23 && gpuData->isCudaDevicePtr &&
|
|
gpuData->cpuYPlane && gpuData->cpuUvPlane &&
|
|
(gpuData->width % 2) == 0 && (gpuData->height % 2) == 0) {
|
|
// CUDA NV12 with CPU fallback planes
|
|
try {
|
|
cv::Mat yPlane(gpuData->height, gpuData->width, CV_8UC1,
|
|
gpuData->cpuYPlane, gpuData->cpuYLinesize);
|
|
cv::Mat uvPlane(gpuData->height / 2, gpuData->width / 2, CV_8UC2,
|
|
gpuData->cpuUvPlane, gpuData->cpuUvLinesize);
|
|
cv::cvtColorTwoPlane(yPlane, uvPlane, fullResImg, cv::COLOR_YUV2BGR_NV12);
|
|
landmarkScaleX = static_cast<float>(gpuData->width) / im.cols;
|
|
landmarkScaleY = static_cast<float>(gpuData->height) / im.rows;
|
|
} catch (...) { /* NV12 conversion failed — skip full-res */ }
|
|
}
|
|
}
|
|
}
|
|
|
|
for (const auto& obj : detectionResults) {
|
|
// Check confidence threshold
|
|
const float confidence = obj.box.score;
|
|
if (confidence < _modelConfig.detectionScoreThreshold) {
|
|
continue;
|
|
}
|
|
|
|
// Validate face landmarks
|
|
if (validateFace && !isValidFace(obj.landmarks.points,obj.box.rect(), 27)) {
|
|
continue;
|
|
}
|
|
|
|
Object result;
|
|
result.classId = 0;
|
|
result.className = "Face";
|
|
result.confidence = confidence;
|
|
result.cameraId = camera_id;
|
|
|
|
// Get bounding box
|
|
cv::Rect bbox = obj.box.rect();
|
|
|
|
// Process face for mask — use full-res image when available
|
|
// for higher quality 112x112 face alignment
|
|
cv::Mat alignImage;
|
|
std::vector<cv::Point2f> face_landmark_5 = obj.landmarks.points;
|
|
if (!fullResImg.empty()) {
|
|
// Scale landmarks from display-res to full-res
|
|
for (auto& pt : face_landmark_5) {
|
|
pt.x *= landmarkScaleX;
|
|
pt.y *= landmarkScaleY;
|
|
}
|
|
alignImage = fullResImg;
|
|
} else {
|
|
alignImage = im;
|
|
}
|
|
result.mask = Preprocess(alignImage, face_landmark_5, alignImage);
|
|
|
|
if (result.mask.empty()) {
|
|
_logger.LogError("ANSOVFD::RunInference", "Cannot get mask image",
|
|
__FILE__, __LINE__);
|
|
continue;
|
|
}
|
|
|
|
// Adjust coordinates back to original image space
|
|
if (croppedFace) {
|
|
// Remove padding offset
|
|
bbox.x = std::max(0, bbox.x - offsetX);
|
|
bbox.y = std::max(0, bbox.y - offsetY);
|
|
|
|
// Clamp to original image bounds
|
|
bbox.x = std::min(bbox.x, originalWidth);
|
|
bbox.y = std::min(bbox.y, originalHeight);
|
|
bbox.width = std::min(bbox.width, originalWidth - bbox.x);
|
|
bbox.height = std::min(bbox.height, originalHeight - bbox.y);
|
|
|
|
result.box = bbox;
|
|
|
|
// Adjust landmarks
|
|
result.kps.reserve(obj.landmarks.points.size() * 2);
|
|
for (const auto& pt : obj.landmarks.points) {
|
|
result.kps.push_back(std::max(0.0f, pt.x - offsetX));
|
|
result.kps.push_back(std::max(0.0f, pt.y - offsetY));
|
|
}
|
|
}
|
|
else {
|
|
result.box = bbox;
|
|
|
|
// Copy landmarks directly
|
|
result.kps.reserve(obj.landmarks.points.size() * 2);
|
|
for (const auto& pt : obj.landmarks.points) {
|
|
result.kps.push_back(pt.x);
|
|
result.kps.push_back(pt.y);
|
|
}
|
|
}
|
|
|
|
output.push_back(std::move(result));
|
|
}
|
|
|
|
return output;
|
|
|
|
}
|
|
catch (const std::exception& e) {
|
|
_logger.LogFatal("ANSOVFD::RunInference",
|
|
"Exception: " + std::string(e.what()),
|
|
__FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
catch (...) {
|
|
_logger.LogFatal("ANSOVFD::RunInference", "Unknown fatal exception",
|
|
__FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
}
|
|
|
|
ANSOVFD::~ANSOVFD() {
|
|
try {
|
|
Destroy();
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("ANSOVFD::Destroy", e.what(), __FILE__, __LINE__);
|
|
}
|
|
}
|
|
bool ANSOVFD::Destroy() {
|
|
try {
|
|
#ifdef USEOVFACEDETECTOR
|
|
_faceLandmark.reset(); // Releases previously allocated memory for face landmark
|
|
#endif
|
|
_face_detector.reset(); // Releases previously allocated memory for face detection
|
|
_isInitialized = false; // Reset initialization flag
|
|
if (FolderExist(_modelFolder)) {
|
|
if (!DeleteFolder(_modelFolder)) {
|
|
this->_logger.LogError("ANSOVFD::Destroy", "Failed to release ANSOVFD Models", __FILE__, __LINE__);
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("ANSOVFD::Destroy", e.what(), __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
|
|
#ifdef USEOVFACEDETECTOR
|
|
std::shared_ptr<ov::Model> ANSOVFD::read(const ov::Core& core, std::string pathToModel) {
|
|
std::shared_ptr<ov::Model> model = core.read_model(pathToModel);
|
|
model_input_height = model->input().get_shape()[3];
|
|
model_input_width = model->input().get_shape()[2];
|
|
ov::OutputVector outputs = model->outputs();
|
|
if (outputs.size() == 1) {
|
|
output = outputs.front().get_any_name();
|
|
const auto& outShape = outputs.front().get_shape();
|
|
if (outShape.size() != 4) {
|
|
this->_logger.LogFatal("ANSOVFD::read.", "Face Detection model output should have 4 dimentions", __FILE__, __LINE__);
|
|
return model;
|
|
}
|
|
objectSize = outputs.front().get_shape()[3];
|
|
if (objectSize != 7) {
|
|
this->_logger.LogFatal("ANSOVFD::read. ", "Face Detection model output layer should have 7 as a last dimension", __FILE__, __LINE__);
|
|
return model;
|
|
}
|
|
if (outShape[2] != ndetections) {
|
|
this->_logger.LogFatal("ANSOVFD::read. ", "Face Detection model output must contain 200 detections", __FILE__, __LINE__);
|
|
return model;
|
|
}
|
|
}
|
|
else {
|
|
for (const auto& out : outputs) {
|
|
const auto& outShape = out.get_shape();
|
|
if (outShape.size() == 2 && outShape.back() == 5) {
|
|
output = out.get_any_name();
|
|
if (outShape[0] != ndetections) {
|
|
this->_logger.LogFatal("ANSOVFD::read. ", "Face Detection model output must contain 200 detections", __FILE__, __LINE__);
|
|
return model;
|
|
}
|
|
objectSize = outShape.back();
|
|
}
|
|
else if (outShape.size() == 1 && out.get_element_type() == ov::element::i32) {
|
|
labels_output = out.get_any_name();
|
|
}
|
|
}
|
|
if (output.empty() || labels_output.empty()) {
|
|
this->_logger.LogFatal("ANSOVFD::read. ", "Face Detection model must contain either single DetectionOutput", __FILE__, __LINE__);
|
|
return model;
|
|
}
|
|
}
|
|
|
|
ov::preprocess::PrePostProcessor ppp(model);
|
|
ppp.input().tensor().
|
|
set_element_type(ov::element::u8).
|
|
set_layout("NHWC");
|
|
ppp.input().preprocess().convert_layout("NCHW");
|
|
ppp.output(output).tensor().set_element_type(ov::element::f32);
|
|
model = ppp.build();
|
|
ov::set_batch(model, 1);
|
|
inShape = model->input().get_shape();
|
|
return model;
|
|
}
|
|
bool ANSOVFD::InitFaceDetector() {
|
|
if (_isInitialized) {
|
|
_faceLandmark.reset(); // Releases previously allocated memory for face landmark
|
|
_isInitialized = false; // Reset initialization flag
|
|
}
|
|
// Check if the Yolo model is available
|
|
std::string yoloModel = CreateFilePath(_modelFolder, "ansylfd.xml");
|
|
if (std::filesystem::exists(yoloModel)) {
|
|
_modelFilePath = yoloModel;
|
|
_useYoloFaceDetector = true;
|
|
}
|
|
else {
|
|
_useYoloFaceDetector = false;
|
|
std::string xmlfile = CreateFilePath(_modelFolder, "ansovfd.xml");
|
|
if (std::filesystem::exists(xmlfile)) {
|
|
_modelFilePath = xmlfile;
|
|
}
|
|
else {
|
|
this->_logger.LogError("ANSOVFD::Initialize. Face detector model file is not exist", _modelFilePath, __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
std::string deviceName = GetOpenVINODevice();
|
|
std::string landmarkModel = CreateFilePath(_modelFolder, "landmarks.xml");
|
|
if (std::filesystem::exists(landmarkModel)) {
|
|
_landmarkModelFilePath = landmarkModel;
|
|
this->_logger.LogDebug("ANSOVFD::Initialize. Loading landmarks weight", _landmarkModelFilePath, __FILE__, __LINE__);
|
|
ov::Core lmcore;
|
|
CnnConfig landmarks_config(_landmarkModelFilePath, "Facial Landmarks Regression");
|
|
if (deviceName == "NPU") {
|
|
lmcore.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
|
lmcore.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
|
landmarks_config.m_deviceName = "AUTO:NPU,GPU";
|
|
}
|
|
else {
|
|
//lmcore.set_property(deviceName, ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
|
landmarks_config.m_deviceName = deviceName;
|
|
}
|
|
landmarks_config.m_max_batch_size = 1;
|
|
landmarks_config.m_core = lmcore;
|
|
this->_faceLandmark = std::make_unique<VectorCNN>(landmarks_config);
|
|
if (!_faceLandmark) {
|
|
this->_logger.LogFatal("ANSOVFD::Initialize", "Failed to initialize face recognizer model", __FILE__, __LINE__);
|
|
}
|
|
}
|
|
else {
|
|
this->_logger.LogError("ANSOVFD::LoadModel. Model landmarks.xml file is not exist", _landmarkModelFilePath, __FILE__, __LINE__);
|
|
}
|
|
if (_useYoloFaceDetector) InitialYoloModel(deviceName);
|
|
else InitialSSDModel(deviceName);
|
|
|
|
_isInitialized = true;
|
|
return _isInitialized;
|
|
}
|
|
void ANSOVFD::InitialSSDModel(const std::string& deviceName) {
|
|
try {
|
|
bb_enlarge_coefficient = 1.0;
|
|
bb_dx_coefficient = 1;
|
|
bb_dy_coefficient = 1;
|
|
ov::Core core;
|
|
if (deviceName == "NPU") {
|
|
core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
|
core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
|
ov::CompiledModel cml = core.compile_model(read(core, _modelFilePath), "AUTO:NPU,GPU");
|
|
request = cml.create_infer_request();
|
|
inTensor = request.get_input_tensor();
|
|
inTensor.set_shape(inShape);
|
|
}
|
|
else {
|
|
//ore.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
|
//core.set_property("CPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
|
ov::CompiledModel cml = core.compile_model(read(core, _modelFilePath), deviceName);
|
|
request = cml.create_infer_request();
|
|
inTensor = request.get_input_tensor();
|
|
inTensor.set_shape(inShape);
|
|
}
|
|
}
|
|
catch (const std::exception& e) {
|
|
this->_logger.LogFatal("ANSOVFD::InitialSSDModel", e.what(), __FILE__, __LINE__);
|
|
}
|
|
}
|
|
void ANSOVFD::InitialYoloModel(const std::string& deviceName) {
|
|
try {
|
|
ov::Core core;
|
|
if (deviceName == "NPU") {
|
|
core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
|
core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
|
compiled_model_ = core.compile_model(_modelFilePath, "AUTO:NPU,GPU");
|
|
}
|
|
else {
|
|
// Configure and compile for individual device
|
|
//core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
|
//core.set_property("CPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
|
compiled_model_ = core.compile_model(_modelFilePath, deviceName);
|
|
}
|
|
request = compiled_model_.create_infer_request();
|
|
}
|
|
catch (const std::exception& e) {
|
|
this->_logger.LogFatal("OPENVINOOD::InitialModel", e.what(), __FILE__, __LINE__);
|
|
}
|
|
}
|
|
std::vector<Object> ANSOVFD::runOVInference(const cv::Mat& frame, const std::string& cameraId) {
|
|
std::vector<Object> detectionOutputs;
|
|
if (frame.empty()) {
|
|
this->_logger.LogError("ANSOVFD::runOVInference", "Input image is empty", __FILE__, __LINE__);
|
|
return detectionOutputs;
|
|
}
|
|
if ((frame.cols < 10) || (frame.rows < 10)) return detectionOutputs;
|
|
try {
|
|
width = static_cast<float>(frame.cols);
|
|
height = static_cast<float>(frame.rows);
|
|
resize2tensor(frame, inTensor);
|
|
request.infer();
|
|
float* detections = request.get_tensor(output).data<float>();
|
|
if (!labels_output.empty()) {
|
|
const int32_t* labels = request.get_tensor(labels_output).data<int32_t>();
|
|
for (size_t i = 0; i < ndetections; i++) {
|
|
Object r;
|
|
r.className = labels[i];
|
|
r.confidence = detections[i * objectSize + 4];
|
|
|
|
if (r.confidence <= _modelConfig.detectionScoreThreshold) {
|
|
continue;
|
|
}
|
|
r.box.x = static_cast<int>(detections[i * objectSize] / model_input_width * width);
|
|
r.box.y = static_cast<int>(detections[i * objectSize + 1] / model_input_height * height);
|
|
r.box.width = static_cast<int>(detections[i * objectSize + 2] / model_input_width * width - r.box.x);
|
|
r.box.height = static_cast<int>(detections[i * objectSize + 3] / model_input_height * height - r.box.y);
|
|
|
|
// Make square and enlarge face bounding box for more robust operation of face analytics networks
|
|
int bb_width = r.box.width;
|
|
int bb_height = r.box.height;
|
|
int bb_center_x = r.box.x + bb_width / 2;
|
|
int bb_center_y = r.box.y + bb_height / 2;
|
|
int max_of_sizes = std::max(bb_width, bb_height);
|
|
int bb_new_width = static_cast<int>(bb_enlarge_coefficient * max_of_sizes);
|
|
int bb_new_height = static_cast<int>(bb_enlarge_coefficient * max_of_sizes);
|
|
r.box.x = bb_center_x - static_cast<int>(std::floor(bb_dx_coefficient * bb_new_width / 2));
|
|
r.box.y = bb_center_y - static_cast<int>(std::floor(bb_dy_coefficient * bb_new_height / 2));
|
|
r.box.width = bb_new_width;
|
|
r.box.height = bb_new_height;
|
|
|
|
r.box.y = std::max(0, r.box.y);
|
|
r.box.x = std::max(0, r.box.x);
|
|
r.box.width = std::min(r.box.width, frame.cols - r.box.x);
|
|
r.box.height = std::min(r.box.height, frame.rows - r.box.y);
|
|
r.cameraId = cameraId;
|
|
|
|
if (r.confidence > _modelConfig.detectionScoreThreshold) {
|
|
detectionOutputs.push_back(r);
|
|
}
|
|
}
|
|
}
|
|
|
|
for (size_t i = 0; i < ndetections; i++) {
|
|
float image_id = detections[i * objectSize];
|
|
if (image_id < 0) {
|
|
break;
|
|
}
|
|
Object r;
|
|
r.className = static_cast<int>(detections[i * objectSize + 1]);
|
|
r.confidence = detections[i * objectSize + 2];
|
|
|
|
if (r.confidence <= _modelConfig.detectionScoreThreshold) {
|
|
continue;
|
|
}
|
|
r.box.x = static_cast<int>(detections[i * objectSize + 3] * width);
|
|
r.box.y = static_cast<int>(detections[i * objectSize + 4] * height);
|
|
r.box.width = static_cast<int>(detections[i * objectSize + 5] * width - r.box.x);
|
|
r.box.height = static_cast<int>(detections[i * objectSize + 6] * height - r.box.y);
|
|
|
|
// Make square and enlarge face bounding box for more robust operation of face analytics networks
|
|
int bb_width = r.box.width;
|
|
int bb_height = r.box.height;
|
|
int bb_center_x = r.box.x + bb_width / 2;
|
|
int bb_center_y = r.box.y + bb_height / 2;
|
|
int max_of_sizes = std::max(bb_width, bb_height);
|
|
int bb_new_width = static_cast<int>(bb_enlarge_coefficient * max_of_sizes);
|
|
int bb_new_height = static_cast<int>(bb_enlarge_coefficient * max_of_sizes);
|
|
r.box.x = bb_center_x - static_cast<int>(std::floor(bb_dx_coefficient * bb_new_width / 2));
|
|
r.box.y = bb_center_y - static_cast<int>(std::floor(bb_dy_coefficient * bb_new_height / 2));
|
|
r.box.width = bb_new_width;
|
|
r.box.height = bb_new_height;
|
|
|
|
r.box.y = std::max(0, r.box.y);
|
|
r.box.x = std::max(0, r.box.x);
|
|
r.box.width = std::min(r.box.width, frame.cols - r.box.x);
|
|
r.box.height = std::min(r.box.height, frame.rows - r.box.y);
|
|
r.cameraId = cameraId;
|
|
|
|
if (r.confidence > _modelConfig.detectionScoreThreshold) {
|
|
detectionOutputs.push_back(r);
|
|
}
|
|
}
|
|
return detectionOutputs;
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("ANSOVFD::runOVInference", e.what(), __FILE__, __LINE__);
|
|
return detectionOutputs;
|
|
}
|
|
|
|
}
|
|
cv::Mat ANSOVFD::PreProcessing(const cv::Mat& source) {
|
|
try {
|
|
int col = source.cols;
|
|
int row = source.rows;
|
|
int _max = MAX(col, row);
|
|
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
|
|
if ((col > 0) && (row > 0))source.copyTo(result(cv::Rect(0, 0, col, row)));
|
|
return result;
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("ANSOVFD::PreProcessing", e.what(), __FILE__, __LINE__);
|
|
return source;
|
|
}
|
|
}
|
|
std::vector<Object> ANSOVFD::runYoloInference(const cv::Mat& input, const std::string& cameraId) {
|
|
std::vector<Object> outputs;
|
|
outputs.clear();
|
|
try {
|
|
if (input.empty()) {
|
|
this->_logger.LogError("ANSOVFD::RunInference", "Input image is empty", __FILE__, __LINE__);
|
|
return outputs;
|
|
}
|
|
if ((input.cols < 10) || (input.rows < 10)) return outputs;
|
|
|
|
// Step 0: Prepare input
|
|
cv::Mat img = input.clone();
|
|
cv::Mat letterbox_img = PreProcessing(img);
|
|
float scale = letterbox_img.size[0] / 640.0;
|
|
cv::Mat blob = cv::dnn::blobFromImage(letterbox_img, 1.0 / 255.0, cv::Size(640, 640), cv::Scalar(), true);
|
|
|
|
// Step 1: Feed blob to the network
|
|
// Get input port for model with one input
|
|
auto input_port = compiled_model_.input();
|
|
// Create tensor from external memory
|
|
ov::Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), blob.ptr(0));
|
|
// Set input tensor for model with one input
|
|
request.set_input_tensor(input_tensor);
|
|
|
|
// Step 3. Start inference
|
|
request.infer();
|
|
|
|
// Step 4. Get output
|
|
auto output = request.get_output_tensor(0);
|
|
auto output_shape = output.get_shape();
|
|
int rows = output_shape[2]; //8400
|
|
int dimensions = output_shape[1]; //84: box[cx, cy, w, h]+80 classes scores
|
|
|
|
// Step 5. Post-processing
|
|
float* data = output.data<float>();
|
|
cv::Mat output_buffer(output_shape[1], output_shape[2], CV_32F, data);
|
|
transpose(output_buffer, output_buffer); //[8400,84]
|
|
|
|
std::vector<int> class_ids;
|
|
std::vector<float> class_scores;
|
|
std::vector<cv::Rect> boxes;
|
|
|
|
// Figure out the bbox, class_id and class_score
|
|
for (int i = 0; i < output_buffer.rows; i++) {
|
|
cv::Mat classes_scores = output_buffer.row(i).colRange(4, output_shape[1]);
|
|
cv::Point class_id;
|
|
double maxClassScore;
|
|
minMaxLoc(classes_scores, 0, &maxClassScore, 0, &class_id);
|
|
|
|
if (maxClassScore > _modelConfig.detectionScoreThreshold) {
|
|
class_scores.push_back(maxClassScore);
|
|
class_ids.push_back(class_id.x);
|
|
float cx = output_buffer.at<float>(i, 0);
|
|
float cy = output_buffer.at<float>(i, 1);
|
|
float w = output_buffer.at<float>(i, 2);
|
|
float h = output_buffer.at<float>(i, 3);
|
|
int left = int((cx - 0.5 * w) * scale);
|
|
int top = int((cy - 0.5 * h) * scale);
|
|
int width = int(w * scale);
|
|
int height = int(h * scale);
|
|
left = std::max(0, left);
|
|
top = std::max(0, top);
|
|
width = std::min(width, img.cols - left);
|
|
height = std::min(height, img.rows - top);
|
|
boxes.push_back(cv::Rect(left, top, width, height));
|
|
}
|
|
}
|
|
|
|
//NMS
|
|
std::vector<int> indices;
|
|
cv::dnn::NMSBoxes(boxes, class_scores, _modelConfig.modelConfThreshold, _modelConfig.modelMNSThreshold, indices);
|
|
for (int i = 0; i < indices.size(); i++)
|
|
{
|
|
Object result;
|
|
int id = indices[i];
|
|
if (class_scores[id] > _modelConfig.detectionScoreThreshold) {
|
|
int classId = class_ids[id];
|
|
result.classId = classId;
|
|
result.className = "Face";
|
|
result.confidence = class_scores[id];
|
|
result.box = boxes[id];
|
|
outputs.push_back(result);
|
|
}
|
|
}
|
|
img.release();
|
|
letterbox_img.release();
|
|
return outputs;
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("OPENVINOOD::runYoloInference", e.what(), __FILE__, __LINE__);
|
|
return outputs;
|
|
}
|
|
}
|
|
#endif
|
|
}
|