ANSODEngine/ANSOVFaceDetector.cpp

#include "ANSOVFaceDetector.h"
#include "ANSGpuFrameRegistry.h"
#include "NV12PreprocessHelper.h"   // tl_currentGpuFrame()
#include <models/detection_model_ssd.h>
#include <pipelines/metadata.h>
#include <models/input_data.h>
#include <models/results.h>
#include "Utility.h"
//#define FNS_DEBUG

namespace ANSCENTER {
    bool ANSOVFD::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) {
        bool result = ANSFDBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
        labelMap = "Face";
        _licenseValid = true;
        std::vector<std::string> labels{ labelMap };
        if (!_licenseValid) return false;
        try {
            _modelConfig = modelConfig;
            _modelConfig.modelType = ModelType::FACEDETECT;
            _modelConfig.detectionType = DetectionType::FACEDETECTOR;
            _modelConfig.inpHeight = 640; 
            _modelConfig.inpWidth = 640;
            if (_modelConfig.modelMNSThreshold < 0.2)
                _modelConfig.modelMNSThreshold = 0.5;
            if (_modelConfig.modelConfThreshold < 0.2)
                _modelConfig.modelConfThreshold = 0.5;
            if (_isInitialized) {
                _face_detector.reset(); // Releases previously allocated memory for face detection
                _isInitialized = false;  // Reset initialization flag
            }
            std::string onnxModel = CreateFilePath(_modelFolder, "scrfd.onnx");
            this->_face_detector = std::make_unique<SCRFD>(onnxModel);
            _isInitialized = true;
			_movementObjects.clear();
			_retainDetectedFaces = 0;
            return true;
        }
        catch (const std::exception& e) {
            this->_logger.LogFatal("ANSOVFD::Initialize", e.what(), __FILE__, __LINE__);
            return false;
        }
    }
    bool ANSOVFD::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) {
        try {
            // We need to get the _modelFolder
            bool result = ANSFDBase::LoadModel(modelZipFilePath, modelZipPassword);
            if (!result) return false;
            std::string onnxModel = CreateFilePath(_modelFolder, "scrfd.onnx");
            //this->_face_detector = std::make_unique<SCRFD>(onnxModel);
            _isInitialized = true;
            return _isInitialized;
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("ANSOVFD::LoadModel", e.what(), __FILE__, __LINE__);
            return false;
        }

    }
    bool ANSOVFD::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName,std::string className, const std::string& modelFolder, std::string& labelMap) {
        try {
            bool result = ANSFDBase::LoadModelFromFolder(licenseKey, modelConfig,modelName, className, modelFolder, labelMap);
            if (!result) return false;
            std::string _modelName = modelName;
            if (_modelName.empty()) {
                _modelName = "scrfd";
            }
			std::string modelFullName = _modelName +".onnx";
            // We need to get the modelfolder from here
            _modelConfig = modelConfig;
            _modelConfig.modelType = ModelType::FACEDETECT;
            _modelConfig.detectionType = DetectionType::FACEDETECTOR;
            _modelConfig.inpHeight = 640; // Only for Yolo model
            _modelConfig.inpWidth = 640;
            if (_modelConfig.modelMNSThreshold < 0.2)
                _modelConfig.modelMNSThreshold = 0.5;
            if (_modelConfig.modelConfThreshold < 0.2)
                _modelConfig.modelConfThreshold = 0.5;
            if (_isInitialized) {
                _face_detector.reset(); // Releases previously allocated memory for face detection
                _isInitialized = false;  // Reset initialization flag
            }
            this->_face_detector = std::make_unique<SCRFD>(modelFullName);
            _isInitialized = true;
            return _isInitialized;
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("ANSOVFD::LoadModel", e.what(), __FILE__, __LINE__);
            return false;
        }
    }
    bool ANSOVFD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
        if (!FileExist(_modelFilePath)) {
            optimizedModelFolder = "";
            return false;
        }
        bool result = ANSFDBase::OptimizeModel(fp16, optimizedModelFolder);
        return result;
    }
    std::vector<Object> ANSOVFD::RunInference(const cv::Mat& input, const std::string& camera_id, bool useDynamicImage, bool validateFace, bool facelivenessCheck) {
        if (facelivenessCheck) {
            std::vector<Object> rawFaceResults = Inference(input, camera_id, useDynamicImage, validateFace);
            std::vector<Object> facesWithLivenessResults = ValidateLivenessFaces(input, rawFaceResults, camera_id);
            return facesWithLivenessResults;
        }
        else {
            return Inference(input, camera_id, useDynamicImage, validateFace);
        }
    }
    std::vector<Object> ANSOVFD::RunInference(const cv::Mat& input, bool useDynamicImage, bool validateFace, bool facelivenessCheck) {
        if (facelivenessCheck) {
            std::vector<Object> rawFaceResults = Inference(input, "CustomCam", useDynamicImage, validateFace);
            std::vector<Object> facesWithLivenessResults = ValidateLivenessFaces(input, rawFaceResults, "CustomCam");
            return facesWithLivenessResults;
        }
        else {
            return Inference(input, "CustomCam", useDynamicImage, validateFace);
        }
    } 

    std::vector<Object> ANSOVFD::Inference(const cv::Mat& input,
        const std::string& camera_id,
        bool useDynamicImage,
        bool validateFace)
    {
        std::lock_guard<std::mutex> guard(_mtx);

        try {
            // Step 1: Validate license and initialization
            if (!_licenseValid) {
                _logger.LogError("ANSOVFD::RunInference", "Invalid license",
                    __FILE__, __LINE__);
                return {};
            }

            if (!_isInitialized || !_face_detector) {
                _logger.LogError("ANSOVFD::RunInference",
                    "Model or face detector not initialized",
                    __FILE__, __LINE__);
                return {};
            }

            // Step 2: Validate input image
            if (input.empty() || input.cols < 5 || input.rows < 5) {
                _logger.LogError("ANSOVFD::RunInference", "Invalid input image",
                    __FILE__, __LINE__);
                return {};
            }

            // Step 3: Convert to BGR if needed
            cv::Mat processedImage;
            if (input.channels() == 1) {
                cv::cvtColor(input, processedImage, cv::COLOR_GRAY2BGR);
            }
            else if (input.channels() == 3) {
                processedImage = input;  // Shallow copy - safe
            }
            else {
                _logger.LogError("ANSOVFD::RunInference",
                    "Unsupported number of image channels",
                    __FILE__, __LINE__);
                return {};
            }

            // Step 4: Prepare image for detection
            const bool croppedFace = !useDynamicImage;
            const int originalWidth = input.cols;
            const int originalHeight = input.rows;

            cv::Mat im;
            int offsetX = 0, offsetY = 0;  // Track padding offset

            if (croppedFace) {
                // Add padding
                cv::copyMakeBorder(processedImage, im, 200, 200, 200, 200,
                    cv::BORDER_REPLICATE);
                offsetX = 200;
                offsetY = 200;

                // Resize if too large
                if (im.rows > 1280) {
                    const float aspectRatio = static_cast<float>(im.cols) / im.rows;
                    const int newHeight = 1280;
                    const int newWidth = static_cast<int>(newHeight * aspectRatio);

                    // Update offsets for resize
                    const float scale = static_cast<float>(newHeight) / im.rows;
                    offsetX = static_cast<int>(offsetX * scale);
                    offsetY = static_cast<int>(offsetY * scale);

                    cv::resize(im, im, cv::Size(newWidth, newHeight));
                }
            }
            else {
                im = processedImage;  // Shallow copy
            }

            // Step 5: Run face detection
            std::vector<ANSCENTER::types::BoxfWithLandmarks> detectionResults;
            _face_detector->detect(im, detectionResults);

            // Step 6: Process detections
            std::vector<Object> output;
            output.reserve(detectionResults.size());  // Pre-allocate

            // Try to get full-res image from registry for higher quality face alignment.
            // On Intel-only systems (no NVIDIA GPU), NV12 may be in CPU memory from
            // D3D11VA decode. Convert to BGR once and use for all face alignments.
            cv::Mat fullResImg;
            float landmarkScaleX = 1.f, landmarkScaleY = 1.f;
            if (!croppedFace && !detectionResults.empty()) {
                auto* gpuData = tl_currentGpuFrame();
                if (gpuData && gpuData->width > im.cols && gpuData->height > im.rows) {
                    if (gpuData->pixelFormat == 1000 && gpuData->yPlane) {
                        // BGR full-res from ANSVideoPlayer/ANSFilePlayer
                        fullResImg = cv::Mat(gpuData->height, gpuData->width, CV_8UC3,
                                            gpuData->yPlane, gpuData->yLinesize).clone();
                        landmarkScaleX = static_cast<float>(gpuData->width) / im.cols;
                        landmarkScaleY = static_cast<float>(gpuData->height) / im.rows;
                    }
                    else if (gpuData->pixelFormat == 23 && !gpuData->isCudaDevicePtr &&
                             gpuData->yPlane && gpuData->uvPlane &&
                             (gpuData->width % 2) == 0 && (gpuData->height % 2) == 0) {
                        // CPU NV12 — convert to BGR for face alignment
                        try {
                            cv::Mat yPlane(gpuData->height, gpuData->width, CV_8UC1,
                                          gpuData->yPlane, gpuData->yLinesize);
                            cv::Mat uvPlane(gpuData->height / 2, gpuData->width / 2, CV_8UC2,
                                           gpuData->uvPlane, gpuData->uvLinesize);
                            cv::cvtColorTwoPlane(yPlane, uvPlane, fullResImg, cv::COLOR_YUV2BGR_NV12);
                            landmarkScaleX = static_cast<float>(gpuData->width) / im.cols;
                            landmarkScaleY = static_cast<float>(gpuData->height) / im.rows;
                        } catch (...) { /* NV12 conversion failed — skip full-res */ }
                    }
                    else if (gpuData->pixelFormat == 23 && gpuData->isCudaDevicePtr &&
                             gpuData->cpuYPlane && gpuData->cpuUvPlane &&
                             (gpuData->width % 2) == 0 && (gpuData->height % 2) == 0) {
                        // CUDA NV12 with CPU fallback planes
                        try {
                            cv::Mat yPlane(gpuData->height, gpuData->width, CV_8UC1,
                                          gpuData->cpuYPlane, gpuData->cpuYLinesize);
                            cv::Mat uvPlane(gpuData->height / 2, gpuData->width / 2, CV_8UC2,
                                           gpuData->cpuUvPlane, gpuData->cpuUvLinesize);
                            cv::cvtColorTwoPlane(yPlane, uvPlane, fullResImg, cv::COLOR_YUV2BGR_NV12);
                            landmarkScaleX = static_cast<float>(gpuData->width) / im.cols;
                            landmarkScaleY = static_cast<float>(gpuData->height) / im.rows;
                        } catch (...) { /* NV12 conversion failed — skip full-res */ }
                    }
                }
            }

            for (const auto& obj : detectionResults) {
                // Check confidence threshold
                const float confidence = obj.box.score;
                if (confidence < _modelConfig.detectionScoreThreshold) {
                    continue;
                }

                // Validate face landmarks
                if (validateFace && !isValidFace(obj.landmarks.points,obj.box.rect(), 27)) {
                    continue;
                }

                Object result;
                result.classId = 0;
                result.className = "Face";
                result.confidence = confidence;
                result.cameraId = camera_id;

                // Get bounding box
                cv::Rect bbox = obj.box.rect();

                // Process face for mask — use full-res image when available
                // for higher quality 112x112 face alignment
                cv::Mat alignImage;
                std::vector<cv::Point2f> face_landmark_5 = obj.landmarks.points;
                if (!fullResImg.empty()) {
                    // Scale landmarks from display-res to full-res
                    for (auto& pt : face_landmark_5) {
                        pt.x *= landmarkScaleX;
                        pt.y *= landmarkScaleY;
                    }
                    alignImage = fullResImg;
                } else {
                    alignImage = im;
                }
                result.mask = Preprocess(alignImage, face_landmark_5, alignImage);

                if (result.mask.empty()) {
                    _logger.LogError("ANSOVFD::RunInference", "Cannot get mask image",
                        __FILE__, __LINE__);
                    continue;
                }

                // Adjust coordinates back to original image space
                if (croppedFace) {
                    // Remove padding offset
                    bbox.x = std::max(0, bbox.x - offsetX);
                    bbox.y = std::max(0, bbox.y - offsetY);

                    // Clamp to original image bounds
                    bbox.x = std::min(bbox.x, originalWidth);
                    bbox.y = std::min(bbox.y, originalHeight);
                    bbox.width = std::min(bbox.width, originalWidth - bbox.x);
                    bbox.height = std::min(bbox.height, originalHeight - bbox.y);

                    result.box = bbox;

                    // Adjust landmarks
                    result.kps.reserve(obj.landmarks.points.size() * 2);
                    for (const auto& pt : obj.landmarks.points) {
                        result.kps.push_back(std::max(0.0f, pt.x - offsetX));
                        result.kps.push_back(std::max(0.0f, pt.y - offsetY));
                    }
                }
                else {
                    result.box = bbox;

                    // Copy landmarks directly
                    result.kps.reserve(obj.landmarks.points.size() * 2);
                    for (const auto& pt : obj.landmarks.points) {
                        result.kps.push_back(pt.x);
                        result.kps.push_back(pt.y);
                    }
                }

                output.push_back(std::move(result));
            }

            return output;

        }
        catch (const std::exception& e) {
            _logger.LogFatal("ANSOVFD::RunInference",
                "Exception: " + std::string(e.what()),
                __FILE__, __LINE__);
            return {};
        }
        catch (...) {
            _logger.LogFatal("ANSOVFD::RunInference", "Unknown fatal exception",
                __FILE__, __LINE__);
            return {};
        }
    }

    ANSOVFD::~ANSOVFD() {
        try {
			Destroy();
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("ANSOVFD::Destroy", e.what(), __FILE__, __LINE__);
        }
    }
    bool ANSOVFD::Destroy() {
        try {
#ifdef USEOVFACEDETECTOR
            _faceLandmark.reset();  // Releases previously allocated memory for face landmark
#endif
			_face_detector.reset(); // Releases previously allocated memory for face detection
            _isInitialized = false;  // Reset initialization flag
            if (FolderExist(_modelFolder)) {
                if (!DeleteFolder(_modelFolder)) {
                    this->_logger.LogError("ANSOVFD::Destroy", "Failed to release ANSOVFD Models", __FILE__, __LINE__);
                }
            }
            return true;
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("ANSOVFD::Destroy", e.what(), __FILE__, __LINE__);
            return false;
        }
    }
    

#ifdef USEOVFACEDETECTOR
    std::shared_ptr<ov::Model> ANSOVFD::read(const ov::Core& core, std::string pathToModel) {
        std::shared_ptr<ov::Model> model = core.read_model(pathToModel);
        model_input_height = model->input().get_shape()[3];
        model_input_width = model->input().get_shape()[2];
        ov::OutputVector outputs = model->outputs();
        if (outputs.size() == 1) {
            output = outputs.front().get_any_name();
            const auto& outShape = outputs.front().get_shape();
            if (outShape.size() != 4) {
                this->_logger.LogFatal("ANSOVFD::read.", "Face Detection model output should have 4 dimentions", __FILE__, __LINE__);
                return model;
            }
            objectSize = outputs.front().get_shape()[3];
            if (objectSize != 7) {
                this->_logger.LogFatal("ANSOVFD::read. ", "Face Detection model output layer should have 7 as a last dimension", __FILE__, __LINE__);
                return model;
            }
            if (outShape[2] != ndetections) {
                this->_logger.LogFatal("ANSOVFD::read. ", "Face Detection model output must contain 200 detections", __FILE__, __LINE__);
                return model;
            }
        }
        else {
            for (const auto& out : outputs) {
                const auto& outShape = out.get_shape();
                if (outShape.size() == 2 && outShape.back() == 5) {
                    output = out.get_any_name();
                    if (outShape[0] != ndetections) {
                        this->_logger.LogFatal("ANSOVFD::read. ", "Face Detection model output must contain 200 detections", __FILE__, __LINE__);
                        return model;
                    }
                    objectSize = outShape.back();
                }
                else if (outShape.size() == 1 && out.get_element_type() == ov::element::i32) {
                    labels_output = out.get_any_name();
                }
            }
            if (output.empty() || labels_output.empty()) {
                this->_logger.LogFatal("ANSOVFD::read. ", "Face Detection model must contain either single DetectionOutput", __FILE__, __LINE__);
                return model;
            }
        }

        ov::preprocess::PrePostProcessor ppp(model);
        ppp.input().tensor().
            set_element_type(ov::element::u8).
            set_layout("NHWC");
        ppp.input().preprocess().convert_layout("NCHW");
        ppp.output(output).tensor().set_element_type(ov::element::f32);
        model = ppp.build();
        ov::set_batch(model, 1);
        inShape = model->input().get_shape();
        return model;
    }
    bool ANSOVFD::InitFaceDetector() {
        if (_isInitialized) {
            _faceLandmark.reset();  // Releases previously allocated memory for face landmark
            _isInitialized = false;  // Reset initialization flag
        }
        // Check if the Yolo model is available
        std::string yoloModel = CreateFilePath(_modelFolder, "ansylfd.xml");
        if (std::filesystem::exists(yoloModel)) {
            _modelFilePath = yoloModel;
            _useYoloFaceDetector = true;
        }
        else {
            _useYoloFaceDetector = false;
            std::string xmlfile = CreateFilePath(_modelFolder, "ansovfd.xml");
            if (std::filesystem::exists(xmlfile)) {
                _modelFilePath = xmlfile;
            }
            else {
                this->_logger.LogError("ANSOVFD::Initialize.  Face detector model file is not exist", _modelFilePath, __FILE__, __LINE__);
                return false;
            }
        }
        std::string deviceName = GetOpenVINODevice();
        std::string landmarkModel = CreateFilePath(_modelFolder, "landmarks.xml");
        if (std::filesystem::exists(landmarkModel)) {
            _landmarkModelFilePath = landmarkModel;
            this->_logger.LogDebug("ANSOVFD::Initialize.  Loading landmarks weight", _landmarkModelFilePath, __FILE__, __LINE__);
            ov::Core lmcore;
            CnnConfig landmarks_config(_landmarkModelFilePath, "Facial Landmarks Regression");
            if (deviceName == "NPU") {
                lmcore.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
                lmcore.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
                landmarks_config.m_deviceName = "AUTO:NPU,GPU";
            }
            else {
                //lmcore.set_property(deviceName, ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
                landmarks_config.m_deviceName = deviceName;
            }
            landmarks_config.m_max_batch_size = 1;
            landmarks_config.m_core = lmcore;
            this->_faceLandmark = std::make_unique<VectorCNN>(landmarks_config);
            if (!_faceLandmark) {
                this->_logger.LogFatal("ANSOVFD::Initialize", "Failed to initialize face recognizer model", __FILE__, __LINE__);
            }
        }
        else {
            this->_logger.LogError("ANSOVFD::LoadModel.  Model landmarks.xml file is not exist", _landmarkModelFilePath, __FILE__, __LINE__);
        }
        if (_useYoloFaceDetector) InitialYoloModel(deviceName);
        else InitialSSDModel(deviceName);

        _isInitialized = true;
        return _isInitialized;
    }
    void  ANSOVFD::InitialSSDModel(const std::string& deviceName) {
        try {
            bb_enlarge_coefficient = 1.0;
            bb_dx_coefficient = 1;
            bb_dy_coefficient = 1;
            ov::Core core;
            if (deviceName == "NPU") {
                core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
                core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
                ov::CompiledModel cml = core.compile_model(read(core, _modelFilePath), "AUTO:NPU,GPU");
                request = cml.create_infer_request();
                inTensor = request.get_input_tensor();
                inTensor.set_shape(inShape);
            }
            else {
                //ore.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
                //core.set_property("CPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
                ov::CompiledModel cml = core.compile_model(read(core, _modelFilePath), deviceName);
                request = cml.create_infer_request();
                inTensor = request.get_input_tensor();
                inTensor.set_shape(inShape);
            }
        }
        catch (const std::exception& e) {
            this->_logger.LogFatal("ANSOVFD::InitialSSDModel", e.what(), __FILE__, __LINE__);
        }
    }
    void ANSOVFD::InitialYoloModel(const std::string& deviceName) {
        try {
            ov::Core core;
            if (deviceName == "NPU") {
                core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
                core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
                compiled_model_ = core.compile_model(_modelFilePath, "AUTO:NPU,GPU");
            }
            else {
                // Configure and compile for individual device
                //core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
                //core.set_property("CPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
                compiled_model_ = core.compile_model(_modelFilePath, deviceName);
            }
            request = compiled_model_.create_infer_request();
        }
        catch (const std::exception& e) {
            this->_logger.LogFatal("OPENVINOOD::InitialModel", e.what(), __FILE__, __LINE__);
        }
    }
    std::vector<Object>  ANSOVFD::runOVInference(const cv::Mat& frame, const std::string& cameraId) {
        std::vector<Object> detectionOutputs;
        if (frame.empty()) {
            this->_logger.LogError("ANSOVFD::runOVInference", "Input image is empty", __FILE__, __LINE__);
            return detectionOutputs;
        }
        if ((frame.cols < 10) || (frame.rows < 10)) return detectionOutputs;
        try {
            width = static_cast<float>(frame.cols);
            height = static_cast<float>(frame.rows);
            resize2tensor(frame, inTensor);
            request.infer();
            float* detections = request.get_tensor(output).data<float>();
            if (!labels_output.empty()) {
                const int32_t* labels = request.get_tensor(labels_output).data<int32_t>();
                for (size_t i = 0; i < ndetections; i++) {
                    Object r;
                    r.className = labels[i];
                    r.confidence = detections[i * objectSize + 4];

                    if (r.confidence <= _modelConfig.detectionScoreThreshold) {
                        continue;
                    }
                    r.box.x = static_cast<int>(detections[i * objectSize] / model_input_width * width);
                    r.box.y = static_cast<int>(detections[i * objectSize + 1] / model_input_height * height);
                    r.box.width = static_cast<int>(detections[i * objectSize + 2] / model_input_width * width - r.box.x);
                    r.box.height = static_cast<int>(detections[i * objectSize + 3] / model_input_height * height - r.box.y);

                    // Make square and enlarge face bounding box for more robust operation of face analytics networks
                    int bb_width = r.box.width;
                    int bb_height = r.box.height;
                    int bb_center_x = r.box.x + bb_width / 2;
                    int bb_center_y = r.box.y + bb_height / 2;
                    int max_of_sizes = std::max(bb_width, bb_height);
                    int bb_new_width = static_cast<int>(bb_enlarge_coefficient * max_of_sizes);
                    int bb_new_height = static_cast<int>(bb_enlarge_coefficient * max_of_sizes);
                    r.box.x = bb_center_x - static_cast<int>(std::floor(bb_dx_coefficient * bb_new_width / 2));
                    r.box.y = bb_center_y - static_cast<int>(std::floor(bb_dy_coefficient * bb_new_height / 2));
                    r.box.width = bb_new_width;
                    r.box.height = bb_new_height;

                    r.box.y = std::max(0, r.box.y);
                    r.box.x = std::max(0, r.box.x);
                    r.box.width = std::min(r.box.width, frame.cols - r.box.x);
                    r.box.height = std::min(r.box.height, frame.rows - r.box.y);
                    r.cameraId = cameraId;

                    if (r.confidence > _modelConfig.detectionScoreThreshold) {
                        detectionOutputs.push_back(r);
                    }
                }
            }

            for (size_t i = 0; i < ndetections; i++) {
                float image_id = detections[i * objectSize];
                if (image_id < 0) {
                    break;
                }
                Object r;
                r.className = static_cast<int>(detections[i * objectSize + 1]);
                r.confidence = detections[i * objectSize + 2];

                if (r.confidence <= _modelConfig.detectionScoreThreshold) {
                    continue;
                }
                r.box.x = static_cast<int>(detections[i * objectSize + 3] * width);
                r.box.y = static_cast<int>(detections[i * objectSize + 4] * height);
                r.box.width = static_cast<int>(detections[i * objectSize + 5] * width - r.box.x);
                r.box.height = static_cast<int>(detections[i * objectSize + 6] * height - r.box.y);

                // Make square and enlarge face bounding box for more robust operation of face analytics networks
                int bb_width = r.box.width;
                int bb_height = r.box.height;
                int bb_center_x = r.box.x + bb_width / 2;
                int bb_center_y = r.box.y + bb_height / 2;
                int max_of_sizes = std::max(bb_width, bb_height);
                int bb_new_width = static_cast<int>(bb_enlarge_coefficient * max_of_sizes);
                int bb_new_height = static_cast<int>(bb_enlarge_coefficient * max_of_sizes);
                r.box.x = bb_center_x - static_cast<int>(std::floor(bb_dx_coefficient * bb_new_width / 2));
                r.box.y = bb_center_y - static_cast<int>(std::floor(bb_dy_coefficient * bb_new_height / 2));
                r.box.width = bb_new_width;
                r.box.height = bb_new_height;

                r.box.y = std::max(0, r.box.y);
                r.box.x = std::max(0, r.box.x);
                r.box.width = std::min(r.box.width, frame.cols - r.box.x);
                r.box.height = std::min(r.box.height, frame.rows - r.box.y);
                r.cameraId = cameraId;

                if (r.confidence > _modelConfig.detectionScoreThreshold) {
                    detectionOutputs.push_back(r);
                }
            }
            return detectionOutputs;
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("ANSOVFD::runOVInference", e.what(), __FILE__, __LINE__);
            return detectionOutputs;
        }

    }
    cv::Mat ANSOVFD::PreProcessing(const cv::Mat& source) {
        try {
            int col = source.cols;
            int row = source.rows;
            int _max = MAX(col, row);
            cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
            if ((col > 0) && (row > 0))source.copyTo(result(cv::Rect(0, 0, col, row)));
            return result;
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("ANSOVFD::PreProcessing", e.what(), __FILE__, __LINE__);
            return source;
        }
    }
    std::vector<Object>  ANSOVFD::runYoloInference(const cv::Mat& input, const std::string& cameraId) {
        std::vector<Object> outputs;
        outputs.clear();
        try {
            if (input.empty()) {
                this->_logger.LogError("ANSOVFD::RunInference", "Input image is empty", __FILE__, __LINE__);
                return outputs;
            }
            if ((input.cols < 10) || (input.rows < 10)) return outputs;

            // Step 0: Prepare input
            cv::Mat img = input.clone();
            cv::Mat letterbox_img = PreProcessing(img);
            float scale = letterbox_img.size[0] / 640.0;
            cv::Mat blob = cv::dnn::blobFromImage(letterbox_img, 1.0 / 255.0, cv::Size(640, 640), cv::Scalar(), true);

            // Step 1: Feed blob to the network
            // Get input port for model with one input
            auto input_port = compiled_model_.input();
            // Create tensor from external memory
            ov::Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), blob.ptr(0));
            // Set input tensor for model with one input
            request.set_input_tensor(input_tensor);

            // Step 3. Start inference 
            request.infer();

            // Step 4. Get output
            auto output = request.get_output_tensor(0);
            auto output_shape = output.get_shape();
            int rows = output_shape[2];        //8400
            int dimensions = output_shape[1];  //84: box[cx, cy, w, h]+80 classes scores

            // Step 5. Post-processing
            float* data = output.data<float>();
            cv::Mat output_buffer(output_shape[1], output_shape[2], CV_32F, data);
            transpose(output_buffer, output_buffer); //[8400,84]

            std::vector<int> class_ids;
            std::vector<float> class_scores;
            std::vector<cv::Rect> boxes;

            // Figure out the bbox, class_id and class_score
            for (int i = 0; i < output_buffer.rows; i++) {
                cv::Mat classes_scores = output_buffer.row(i).colRange(4, output_shape[1]);
                cv::Point class_id;
                double maxClassScore;
                minMaxLoc(classes_scores, 0, &maxClassScore, 0, &class_id);

                if (maxClassScore > _modelConfig.detectionScoreThreshold) {
                    class_scores.push_back(maxClassScore);
                    class_ids.push_back(class_id.x);
                    float cx = output_buffer.at<float>(i, 0);
                    float cy = output_buffer.at<float>(i, 1);
                    float w = output_buffer.at<float>(i, 2);
                    float h = output_buffer.at<float>(i, 3);
                    int left = int((cx - 0.5 * w) * scale);
                    int top = int((cy - 0.5 * h) * scale);
                    int width = int(w * scale);
                    int height = int(h * scale);
                    left = std::max(0, left);
                    top = std::max(0, top);
                    width = std::min(width, img.cols - left);
                    height = std::min(height, img.rows - top);
                    boxes.push_back(cv::Rect(left, top, width, height));
                }
            }

            //NMS
            std::vector<int> indices;
            cv::dnn::NMSBoxes(boxes, class_scores, _modelConfig.modelConfThreshold, _modelConfig.modelMNSThreshold, indices);
            for (int i = 0; i < indices.size(); i++)
            {
                Object result;
                int id = indices[i];
                if (class_scores[id] > _modelConfig.detectionScoreThreshold) {
                    int classId = class_ids[id];
                    result.classId = classId;
                    result.className = "Face";
                    result.confidence = class_scores[id];
                    result.box = boxes[id];
                    outputs.push_back(result);
                }
            }
            img.release();
            letterbox_img.release();
            return outputs;
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("OPENVINOOD::runYoloInference", e.what(), __FILE__, __LINE__);
            return outputs;
        }
    }
#endif
}