#ifndef ANSTENSORRTPOSE_H
#define ANSTENSORRTPOSE_H
#pragma once
#include "ANSEngineCommon.h"
#include "engine.h"
#include "ANSGpuFrameRegistry.h"
#include "NV12PreprocessHelper.h"
namespace ANSCENTER {
    // TensorRT class
    class ANSENGINE_API ANSTENSORRTPOSE :public ANSODBase {
    public:
        virtual bool Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) override;
        virtual bool LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword)override;
        virtual bool LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap)override;
        virtual bool OptimizeModel(bool fp16, std::string& optimizedModelFolder);
        std::vector<Object> RunInference(const cv::Mat& input);
        std::vector<Object> RunInference(const cv::Mat& input, const std::string& camera_id);
        virtual std::vector<std::vector<Object>> RunInferencesBatch(
            const std::vector<cv::Mat>& inputs, const std::string& camera_id) override;
        bool Destroy();
        ~ANSTENSORRTPOSE();
    private:
        std::string     _modelFilePath;
        bool            _modelLoadValid;
        bool            _fp16{ false };
        int             m_maxSlotsPerGpu{ 1 };   // 1 = one slot per GPU, multi-GPU round-robin (no elastic)
        void SetMaxSlotsPerGpu(int n) override { m_maxSlotsPerGpu = n; }

        // NV12 fast-path helper
        NV12PreprocessHelper m_nv12Helper;
    private:
        // Per-call batch metadata — passed by out-param to avoid shared state
        struct BatchMetadata {
            std::vector<int>   imgHeights;
            std::vector<int>   imgWidths;
            std::vector<float> ratios;
        };
        // Per-call single-image metadata
        struct ImageMetadata {
            float ratio     = 1.f;
            float imgWidth  = 0.f;
            float imgHeight = 0.f;
        };

        // Preprocess the input; fills outMeta with per-call image geometry
        std::vector<std::vector<cv::cuda::GpuMat>> Preprocess(const cv::Mat& inputImage,
                                                               ImageMetadata& outMeta);
        // Postprocess the output for pose model
        std::vector<Object> PostProcessPose(std::vector<float>& featureVector,
                                            const std::string& camera_id,
                                            const ImageMetadata& meta);
        std::vector<Object> DetectObjects(const cv::Mat& inputImage, const std::string& camera_id);

        std::vector<std::vector<Object>> DetectObjectsBatch(const std::vector<cv::Mat>& inputImages,
                                                            const std::string& camera_id);
        std::vector<std::vector<cv::cuda::GpuMat>> PreprocessBatch(const std::vector<cv::Mat>& inputImages,
                                                                    BatchMetadata& outMetadata);
        std::vector<Object> PostProcessPoseBatch(std::vector<float>& featureVector,
                                                 const std::string& camera_id,
                                                 size_t batchIdx,
                                                 const BatchMetadata& metadata);

        std::unique_ptr<Engine<float>> m_trtEngine = nullptr;
        // Used for image preprocessing
        // YoloV8 model expects values between [0.f, 1.f] so we use the following params
        const std::array<float, 3> SUB_VALS{ 0.f, 0.f, 0.f };
        const std::array<float, 3> DIV_VALS{ 1.f, 1.f, 1.f };
        const bool NORMALIZE = true;
        ANSCENTER::Options m_options;

        // Filter thresholds
        float PROBABILITY_THRESHOLD;
        float NMS_THRESHOLD;
        int TOP_K;

        // Segmentation constants
        int SEG_CHANNELS;
        int SEG_H;
        int SEG_W;
        float SEGMENTATION_THRESHOLD;
        // Pose estimation constant
        int NUM_KPS;
        float KPS_THRESHOLD;

    };
}
#endif