ANSCORE/modules/ANSODEngine/SCRFDOVFaceDetector.cpp

#include "SCRFDOVFaceDetector.h"
#include "Utility.h"

// Still not working
namespace ANSCENTER {
    bool ANSOVSCRFDFD::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) {
        bool result = ANSFDBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
        labelMap = "Face";
        // We do not need to check for the license
        _licenseValid = true;
        if (!_licenseValid) return false;
        try {
            _modelConfig = modelConfig;
            _modelConfig.modelType = ModelType::FACEDETECT;
            _modelConfig.detectionType = DetectionType::FACEDETECTOR;

            // We need to get the modelfolder from here
            std::string xmlfile = CreateFilePath(_modelFolder, "scrfdface.xml");
            if (std::filesystem::exists(xmlfile)) {
                _modelFilePath = xmlfile;
                this->_logger.LogDebug("ANSOVSCRFDFD::Initialize.  Loading SCRFD weight", _modelFilePath, __FILE__, __LINE__);
            }
            else {
                this->_logger.LogError("ANSOVSCRFDFD::Initialize.  Model scrfdface.xml file is not exist", _modelFilePath, __FILE__, __LINE__);
                return false;
            }

            ov::Core core;
            std::shared_ptr<ov::Model> model = core.read_model(_modelFilePath);
            //ov::preprocess::PrePostProcessor ppp = ov::preprocess::PrePostProcessor(model);
            const ov::Shape& inputShape = model->input().get_shape();

            ov::Layout layout = ov::layout::get_layout(model->input());
            if (layout.empty()) {
                layout = getLayoutFromShape(model->input().get_partial_shape());
                slog::warn << "Automatically detected layout '" << layout.to_string() << "' for input '"
                    << model->input().get_any_name() << "' will be used." << slog::endl;
            }


            const ov::Layout& inputLayout = layout;

            //ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC").set_color_format(ov::preprocess::ColorFormat::BGR);
            //ppp.input().preprocess().convert_element_type(ov::element::f32).convert_color(ov::preprocess::ColorFormat::RGB).scale({ 255, 255, 255 });
            //ppp.input().model().set_layout("NCHW");
            ////ppp.output().tensor().set_element_type(ov::element::f32);

            ov::preprocess::PrePostProcessor ppp(model);
            inputTransform.setPrecision(ppp, model->input().get_any_name());
            ppp.input().tensor().set_layout({ "NHWC" });

            ppp.input().model().set_layout(inputLayout);

            // --------------------------- Reading image input parameters -------------------------------------------
            inputsNames.push_back(model->input().get_any_name());
            netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
            netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];

            // --------------------------- Prepare output  -----------------------------------------------------

            int outputSize = model->outputs().size();
            std::cout << "output Size:" << outputSize << std::endl;


            const ov::Layout outputLayout{ "CHW" };
            maxProposalsCount = model->outputs().front().get_shape()[ov::layout::height_idx(outputLayout)];
            for (const auto& output : model->outputs()) {
                const auto outTensorName = output.get_any_name();
                outputsNames.push_back(outTensorName);
                ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout);
            }
            std::sort(outputsNames.begin(), outputsNames.end());
            model = ppp.build();
            compiled_model_ = core.compile_model(model, "GPU");
                    inference_request_ = compiled_model_.create_infer_request();
            const std::vector<ov::Output<ov::Node>> inputs = model->inputs();
            const ov::Shape input_shape = inputs[0].get_shape();

            short height = input_shape[1];
            short width = input_shape[2];
            model_input_shape_ = cv::Size2f(width, height);

            const std::vector<ov::Output<ov::Node>> outputs = model->outputs();
            const ov::Shape output_shape = outputs[0].get_shape();

            height = output_shape[1];
            width = output_shape[2];
            model_output_shape_ = cv::Size(width, height);
            _isInitialized  = true;
            return true;
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("ANSOVSCRFDFD::Initialize", e.what(), __FILE__, __LINE__);
            std::cout << "Error:" << e.what();
            return false;
        }
    }

    bool ANSOVSCRFDFD::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) {
        try {
            // We need to get the _modelFolder
            bool result = ANSFDBase::LoadModel(modelZipFilePath, modelZipPassword);
            if (!result) return false;

            // We need to get the modelfolder from here
            std::string xmlfile = CreateFilePath(_modelFolder, "scrfdface.xml");
            if (std::filesystem::exists(xmlfile)) {
                _modelFilePath = xmlfile;
                this->_logger.LogDebug("ANSOVSCRFDFD::LoadModel.  Loading scrfdface weight", _modelFilePath, __FILE__, __LINE__);
            }
            else {
                this->_logger.LogError("ANSOVSCRFDFD::LoadModel.  Model scrfdface.xml file is not exist", _modelFilePath, __FILE__, __LINE__);
                return false;
            }
            return true;
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("ANSOVSCRFDFD::LoadModel", e.what(), __FILE__, __LINE__);
            return false;
        }

    }
    bool ANSOVSCRFDFD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
        if (!FileExist(_modelFilePath)) {
            optimizedModelFolder = "";
            return false;
        }
        return true;
    }
    cv::Rect ANSOVSCRFDFD::GetBoundingBox(const cv::Rect& src) {
        cv::Rect box = src;
        box.x = (box.x - 0.5 * box.width) * factor_.x;
        box.y = (box.y - 0.5 * box.height) * factor_.y;
        box.width *= factor_.x;
        box.height *= factor_.y;

        return box;
    }

    std::vector<Object>ANSOVSCRFDFD::PostProcessing() {
        try {
            std::vector<int> class_list;
            std::vector<float> confidence_list;
            std::vector<cv::Rect> box_list;

            float* detections = inference_request_.get_output_tensor().data<float>();
            const cv::Mat detection_outputs(model_output_shape_, CV_32F, (float*)detections);

            for (int i = 0; i < detection_outputs.cols; ++i) {
                const cv::Mat classes_scores = detection_outputs.col(i).rowRange(4, detection_outputs.rows);

                cv::Point class_id;
                double score;

                cv::minMaxLoc(classes_scores, nullptr, &score, nullptr, &class_id);

                if (score > 0.4) {
                    class_list.push_back(class_id.y);
                    confidence_list.push_back(score);

                    const float x = detection_outputs.at<float>(0, i);
                    const float y = detection_outputs.at<float>(1, i);
                    const float w = detection_outputs.at<float>(2, i);
                    const float h = detection_outputs.at<float>(3, i);

                    cv::Rect box;

                    box.x = static_cast<int>(x);
                    box.y = static_cast<int>(y);
                    box.width = static_cast<int>(w);
                    box.height = static_cast<int>(h);

                    box_list.push_back(box);
                }
            }

            std::vector<int> NMS_result;
            cv::dnn::NMSBoxes(box_list, confidence_list, 0.5, 0.5, NMS_result);

            std::vector<Object> output;
            for (int i = 0; i < NMS_result.size(); i++)
            {
                Object result;
                int id = NMS_result[i];
                result.classId = class_list[id];
                result.confidence = confidence_list[id];
                result.box = GetBoundingBox(box_list[id]);
                output.push_back(result);
            }
            return output;
        }

        catch (const std::exception& e) {
            std::vector<Object> result;
            result.clear();
            std::cout << "ANSOVDetector::PostprocessImage. " << e.what() << std::endl;
            return result;
        }

    }
    void ANSOVSCRFDFD::PreprocessImage(cv::Mat& frame) {
        try {
            cv::resize(frame, resized_frame_, model_input_shape_, 0, 0, cv::INTER_AREA);
            factor_.x = static_cast<float>(frame.cols / model_input_shape_.width);
            factor_.y = static_cast<float>(frame.rows / model_input_shape_.height);
            float* input_data = (float*)resized_frame_.data;
            input_tensor_ = ov::Tensor(compiled_model_.input().get_element_type(), compiled_model_.input().get_shape(), input_data);
            inference_request_.set_input_tensor(input_tensor_);
        }
        catch (const std::exception& e) {
            std::cout << "ANSOVSCRFDFD::PreprocessImage. " << e.what() << std::endl;
        }
        catch (...) {
            std::cout << "ANSOVSCRFDFD::PreprocessImage. " << "unknown exception" << std::endl;
        }
    }
    std::vector<Object> ANSOVSCRFDFD::RunInference(const cv::Mat& input) {
        std::vector<Object> outputs;
        outputs.clear();
        if (!_licenseValid) {
            this->_logger.LogError("ANSOVSCRFDFD::RunInference", "Invalid license", __FILE__, __LINE__);
            return outputs;
        }
        if (!_isInitialized) {
			this->_logger.LogError("ANSOVSCRFDFD::RunInference", "Model is not initialized", __FILE__, __LINE__);
			return outputs;
		}
        try {
            bool croppedFace = false; // Check if the image is cropped face image
            cv::Mat im = input.clone();
            //std::cout << "Width x Height=" << input.size[0] << "x" << input.size[1] << std::endl;
            // We know that the image sizes <=300 px, it is likely that image is cropped for face only
            if ((input.size[0] <= 300) || (input.size[1] <= 300)) croppedFace = true;
            if (croppedFace) cv::copyMakeBorder(input, im, 200, 200, 200, 200, cv::BORDER_REPLICATE);

            this->PreprocessImage(im);

            // Perform inference
            inference_request_.infer();


            std::cout << "Model outputs:" << std::endl;
            auto ovOutputs= compiled_model_.outputs();
            for (int i = 0; i < ovOutputs.size();i++) {
                auto output_tensor = inference_request_.get_output_tensor(i);
                std::cout << "Output Tensor for " << ovOutputs[i].get_any_name() << " obtained." << std::endl;
            }

            auto scores_tensor = inference_request_.get_output_tensor(1);
            auto scores_shape = scores_tensor.get_shape();

            // Access the data from the tensor
            float* scores_data = scores_tensor.data<float>();
            int num_detections = scores_shape[2]; // Should be 800

            // Define a confidence threshold
            float confidence_threshold = 0.1;

            // Collecting indices of valid detections
            std::vector<int> valid_detections;
            for (int i = 0; i < num_detections; i++) {
                float score = scores_data[i];
   /*             std::cout << "score [" << i << "]=" << score << std:: endl;*/
                if (score > confidence_threshold) {
                    valid_detections.push_back(i);
                    //std::cout << "Detection " << i << " is valid with a score of " << score << std::endl;
                }
            }

            std::cout << "Total valid detections: " << valid_detections.size() << std::endl;


           /* float* score = inference_request_.get_output_tensor(2).data<float>();
            //if (score != nullptr) {
            //    std::cout << "Score:" << score[0]<<std::endl;
            //}
            float* bbox = inference_request_.get_output_tensor(5).data<float>();*/
            // Get the output tensor


            //// Step 0: Prepare input
            //this->PreprocessImage(im);

            ////Synchronous mode
            //inference_request_.infer();
            //std::vector<Object> output = PostProcessing();


            //if (res.size() > 0)
            //{
            //    // Peform face alignment
            //    //std::vector<cv::Mat> detectedFaces =fastdeploy::vision::utils::AlignFaceWithFivePoints(im, res);
            //    std::vector<cv::Mat> detectedFaces = ANSUtilityHelper::AlignFaceWithFivePoints(im, res.boxes, res.landmarks);
            //    if (res.size() == detectedFaces.size())
            //    {
            //        for (int i = 0; i < res.boxes.size(); i++)
            //        {
            //            Object result;
            //            float confidence = res.scores[i];
            //            if (confidence > _modelConfig.detectionScoreThreshold)
            //            {
            //                int x_min = res.boxes[i][0];
            //                int y_min = res.boxes[i][1];
            //                int x_max = res.boxes[i][2];
            //                int y_max = res.boxes[i][3];
            //                result.classId = 0;
            //                result.className = "Face";
            //                result.confidence = confidence;
            //                result.box.x = x_min;
            //                result.box.y = y_min;
            //                if (croppedFace) {
            //                    if (x_min <= 200) x_min = 200;
            //                    if (y_min <= 200) y_min = 200;
            //                    result.box.x = x_min - 200;
            //                    result.box.y = y_min - 200;
            //                }
            //                result.box.width = x_max - x_min;
            //                result.box.height = y_max - y_min;
            //                result.mask = detectedFaces.at(i).clone();
            //                output.push_back(result);
            //            }
            //        }
            //    }
            //    //detectedFaces.clear();
            //}
            im.release();
            return outputs;
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("ANSOVSCRFDFD::RunInference", e.what(), __FILE__, __LINE__);
            return outputs;
        }
    }

    ANSOVSCRFDFD::~ANSOVSCRFDFD() {
        try {
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("ANSOVSCRFDFD::Destroy", e.what(), __FILE__, __LINE__);
        }
    }
    bool ANSOVSCRFDFD::Destroy() {
        return true;
    }
}