#ifndef ANSTENSORRTOD_H
#define ANSTENSORRTOD_H
#pragma once
#include "ANSEngineCommon.h"
#include "engine.h"
#include "ANSGpuFrameRegistry.h"
#include "NV12PreprocessHelper.h"
namespace ANSCENTER {
    // TensorRT class
    class ANSENGINE_API TENSORRTOD :public ANSODBase {
    public:
        virtual bool Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) override;
        virtual bool LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword)override;
        virtual bool LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className,const std::string& modelFolder, std::string& labelMap)override;
        virtual bool OptimizeModel(bool fp16, std::string& optimizedModelFolder);
        std::vector<Object> RunInference(const cv::Mat& input);
        std::vector<Object> RunInference(const cv::Mat& input, const std::string& camera_id); 
        virtual std::vector<std::vector<Object>> RunInferencesBatch(const std::vector<cv::Mat>& inputs, const std::string& camera_id) override;
        bool Destroy();
        ~TENSORRTOD();
    private:
        std::string     _modelFilePath;
        bool            _modelLoadValid;
        bool            _fp16{ false };
		bool 		    _isFixedBatch{ false };
        int             m_maxSlotsPerGpu{ 1 };   // 1 = one slot per GPU, multi-GPU round-robin (no elastic)
        void SetMaxSlotsPerGpu(int n) override { m_maxSlotsPerGpu = n; }

        // NV12 fast-path helper (shared with ANSRTYOLO, ANSYOLOV10RTOD, ANSYOLOV12RTOD)
        NV12PreprocessHelper m_nv12Helper;

        struct BatchMetadata {
            std::vector<int> imgHeights;
            std::vector<int> imgWidths;
            std::vector<float> ratios;
        };

        // Per-call image geometry — passed between Preprocess and Postprocess
        // instead of shared member variables, enabling concurrent inference.
        struct ImageMetadata {
            float ratio     = 1.f;
            float imgWidth  = 0.f;
            float imgHeight = 0.f;
        };
    private: 
        // Preprocess the input; fills outMeta with per-call image geometry
        std::vector<std::vector<cv::cuda::GpuMat>> Preprocess(const cv::Mat& inputImage,
                                                               ImageMetadata& outMeta);
        // Postprocess the output
        std::vector<Object> Postprocess(std::vector<float>& featureVector,
                                        const std::string& camera_id,
                                        const ImageMetadata& meta);

        // Postprocess the output for segmentation model
        std::vector<Object> PostProcessSegmentation(std::vector<std::vector<float>>& featureVectors,
                                                    const std::string& camera_id,
                                                    const ImageMetadata& meta);

        // Postprocess the output for pose model
        std::vector<Object> PostProcessPose(std::vector<float>& featureVector,
                                            const std::string& camera_id,
                                            const ImageMetadata& meta);
        std::vector<Object> DetectObjects(const cv::Mat& inputImage, const std::string& camera_id);
        
		// Correct batch detection function signature
        std::vector<std::vector<Object>> DetectObjectsBatch(const std::vector<cv::Mat>& inputImages,const std::string& camera_id);
        std::vector<std::vector<cv::cuda::GpuMat>> PreprocessBatch(const std::vector<cv::Mat>& inputImages,BatchMetadata& outMetadata);  // Return metadata by reference
        std::vector<Object> PostprocessBatch(std::vector<float>& featureVector,const std::string& camera_id,size_t batchIdx,const BatchMetadata& metadata);  // Accept metadata as parameter


        std::unique_ptr<Engine<float>> m_trtEngine = nullptr;

        // Used for image preprocessing
        // YoloV8 model expects values between [0.f, 1.f] so we use the following params
        const std::array<float, 3> SUB_VALS{ 0.f, 0.f, 0.f };
        const std::array<float, 3> DIV_VALS{ 1.f, 1.f, 1.f };
        const bool NORMALIZE = true;

        ANSCENTER::Options m_options;

        // Filter thresholds
        float PROBABILITY_THRESHOLD;
        float NMS_THRESHOLD;
        int TOP_K;

        // Segmentation constants
        int SEG_CHANNELS;
        int SEG_H;
        int SEG_W;
        float SEGMENTATION_THRESHOLD;        
        // Pose estimation constant
        int NUM_KPS;
        float KPS_THRESHOLD;

    };
}
#endif