#ifndef SCRFDFaceDetector_H
#define SCRFDFaceDetector_H
#pragma once
#include <string.h>
#include <vector>
#include <iostream>
#include <typeinfo>
#include "ANSEngineCommon.h"
#include "engine.h"
#include "engine/EnginePoolManager.h"
#include "NV12PreprocessHelper.h"
//#define FACEDEBUG 

namespace ANSCENTER {
 
    class ANSENGINE_API ANSSCRFDFD :public ANSFDBase {
    public:
        virtual bool Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) override;
        virtual bool LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword)override;
        virtual bool LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap)override;
        std::vector<Object> RunInference(const cv::Mat& input, bool useDynamicImage = true, bool validateFace=false, bool facelivenessCheck = true);
        std::vector<Object> RunInference(const cv::Mat& input, const std::string& camera_id, bool useDynamicImage = true, bool validateFace=false, bool facelivenessCheck = true);
        bool OptimizeModel(bool fp16, std::string& optimizedModelFolder);
        bool Destroy();
        ~ANSSCRFDFD();
    private:
        std::string        _modelFilePath;
        std::recursive_mutex	    _mutex;
        std::vector<Object> _movementObjects;
        int                _retainDetectedFaces{ 0 };
        ANSCENTER::Options m_options;
        NV12PreprocessHelper m_nv12Helper;
        std::shared_ptr<Engine<float>> m_trtEngine = nullptr;
        EnginePoolManager<float>::PoolKey m_poolKey;
        bool m_usingSharedPool = false;

        int m_maxSlotsPerGpu{ 1 };  // 1 = one slot per GPU, multi-GPU round-robin (no elastic)
        void SetMaxSlotsPerGpu(int n) override { m_maxSlotsPerGpu = n; }

        const std::array<float, 3> SUB_VALS{ 0.5f, 0.5f, 0.5f };
        const std::array<float, 3> DIV_VALS{ 0.5f, 0.5f, 0.5f };
        const bool NORMALIZE = true;
        std::vector<Object> TensorRTInferene(const cv::Mat& input, const std::string& camera_id, bool useDynamicImage = false);
		std::vector<Object> Detect(const cv::Mat& input);
        std::vector<Object> Inference(const cv::Mat& input, const std::string& camera_id, bool DynamicImage = false, bool validateFace=false);
        std::vector<Object> InferenceDynamic(const cv::Mat& input, const std::string& camera_id);

		// Special variables for TensorRT
        const int INPUT_H = 640;
        const int INPUT_W = 640;
        typedef struct
        {
            float cx;
            float cy;
            float stride;
        } SCRFDPoint;
        typedef struct
        {
            float ratio;
            int dw;
            int dh;
            bool flag;
        } SCRFDScaleParams;

        unsigned int fmc = 3; // feature map count
        bool use_kps = false;
        unsigned int num_anchors = 2;
        std::vector<int> feat_stride_fpn = { 8, 16, 32 }; // steps, may [8, 16, 32, 64, 128]
        std::unordered_map<int, std::vector<SCRFDPoint>> center_points;
        bool center_points_is_update = false;
        static constexpr const unsigned int nms_pre = 1000;
        static constexpr const unsigned int max_nms = 30000;
        void resize_unscale(const cv::Mat& mat,
            cv::Mat& mat_rs,
            int target_height,
            int target_width,
            SCRFDScaleParams& scale_params);
        // generate once.
        void generate_points(const int target_height, const int target_width);
        void generate_bboxes_single_stride(const SCRFDScaleParams& scale_params,
            std::vector<float>& score_pred,
            std::vector<float>& bbox_pred,
            unsigned int stride,
            float score_threshold,
            float img_height,
            float img_width,
            std::vector<Object>& bbox_kps_collection);

        void generate_bboxes_kps_single_stride(const SCRFDScaleParams& scale_params,
            std::vector<float>& score_pred,
            std::vector<float>& bbox_pred,
            std::vector<float>& kps_pred,
            unsigned int stride,
            float score_threshold,
            float img_height,
            float img_width,
            std::vector<Object>& bbox_kps_collection);

        void generate_bboxes_kps(const SCRFDScaleParams& scale_params,
            std::vector<Object>& bbox_kps_collection,
            std::vector<std::vector<float>>& output_tensors,
            float score_threshold, float img_height,
            float img_width); // rescale & exclude

        void nms_bboxes_kps(std::vector<Object>& input,
            std::vector<Object>& output,
            float iou_threshold, unsigned int topk);

		float getIouOfObjects(const Object& a, const Object& b);
    };
}
#endif