ANSCORE/ANSODEngine/ANSYOLOV10RTOD.cpp

#include "ANSYOLOV10RTOD.h"
#include "Utility.h"
#include <opencv2/cudaimgproc.hpp>
#include <future>
#include <NvOnnxParser.h>
namespace ANSCENTER
{
    bool ANSYOLOV10RTOD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
        std::lock_guard<std::recursive_mutex> lock(_mutex);
        if (!ANSODBase::OptimizeModel(fp16, optimizedModelFolder)) {
            return false;
        }
        if (!FileExist(_modelFilePath)) {
            this->_logger.LogFatal("ANSYOLOV10RTOD::OptimizeModel", "Raw model file path is not exist", __FILE__, __LINE__);
            return false;
        }
        try {
            _fp16 = fp16;
            optimizedModelFolder = GetParentFolder(_modelFilePath);
            // Check if the engine already exists to avoid reinitializing
            if (!m_trtEngine) {
                // Fixed batch size of 1 for this model
                m_options.optBatchSize = _modelConfig.gpuOptBatchSize;
                m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize;
                m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
                m_options.maxInputHeight = _modelConfig.maxInputHeight;
                m_options.minInputHeight = _modelConfig.minInputHeight;
                m_options.optInputHeight = _modelConfig.optInputHeight;
                m_options.maxInputWidth = _modelConfig.maxInputWidth;
                m_options.minInputWidth = _modelConfig.minInputWidth;
                m_options.optInputWidth = _modelConfig.optInputWidth;
                m_options.engineFileDir = optimizedModelFolder;
                // Use FP16 or FP32 precision based on the input flag
                m_options.precision = (_fp16 ? Precision::FP16 : Precision::FP32);
                // Create the TensorRT inference engine
                m_trtEngine = std::make_unique<Engine<float>>(m_options);
            }
            // Build the TensorRT engine
            auto succ = m_trtEngine->buildWithRetry(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE);
            if (!succ) {
                const std::string errMsg =
                    "Error: Unable to build the TensorRT engine. "
                    "Try increasing TensorRT log severity to kVERBOSE.";
                this->_logger.LogError("TENSORRTOD::OptimizeModel", errMsg, __FILE__, __LINE__);
                _modelLoadValid = false;
                return false;
            }
            _modelLoadValid = true;
            return true;
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("ANSYOLOV10RTOD::OptimizeModel", e.what(), __FILE__, __LINE__);
            optimizedModelFolder = "";
            return false;
        }
    }
    bool ANSYOLOV10RTOD::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) {
        std::lock_guard<std::recursive_mutex> lock(_mutex);
        try {
            bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword);
            if (!result) return false;
            _modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION;
            _modelConfig.modelType = ModelType::TENSORRT;
            _modelConfig.inpHeight = 640;
            _modelConfig.inpWidth = 640;
            if (_modelConfig.modelMNSThreshold < 0.2)
                _modelConfig.modelMNSThreshold = 0.5;
            if (_modelConfig.modelConfThreshold < 0.2)
                _modelConfig.modelConfThreshold = 0.5;
            if (_modelConfig.modelMNSThreshold < 0.2)
                _modelConfig.modelMNSThreshold = 0.5;
            if (_modelConfig.modelConfThreshold < 0.2)
                _modelConfig.modelConfThreshold = 0.5;
            if (_modelConfig.precisionType == PrecisionType::FP16)_fp16 = true;
            if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133)  // 133 = COCO wholebody max
                _modelConfig.numKPS = 17;
            if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define
            _fp16 = true; // Load Model from Here
            // Load Model from Here
            TOP_K = 100;
            SEG_CHANNELS = 32;
            PROBABILITY_THRESHOLD = _modelConfig.detectionScoreThreshold;
            NMS_THRESHOLD = _modelConfig.modelMNSThreshold;
            SEGMENTATION_THRESHOLD = 0.5f;
            SEG_H = 160;
            SEG_W = 160;
            NUM_KPS = _modelConfig.numKPS;
            KPS_THRESHOLD = _modelConfig.kpsThreshold;
            SEG_CHANNELS = 32;      // For segmentation
            if (!m_trtEngine) {
                // Fixed batch size of 1 for this model
                m_options.optBatchSize = _modelConfig.gpuOptBatchSize;
                m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize;
                m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
                m_options.maxInputHeight = _modelConfig.maxInputHeight;
                m_options.minInputHeight = _modelConfig.minInputHeight;
                m_options.optInputHeight = _modelConfig.optInputHeight;
                m_options.maxInputWidth = _modelConfig.maxInputWidth;
                m_options.minInputWidth = _modelConfig.minInputWidth;
                m_options.optInputWidth = _modelConfig.optInputWidth;
                m_options.engineFileDir = _modelFolder;
                // Use FP16 or FP32 precision based on the input flag
                m_options.precision = (_fp16 ? Precision::FP16 : Precision::FP32);
                // Create the TensorRT inference engine
                m_trtEngine = std::make_unique<Engine<float>>(m_options);
            }

            // 0. Check if the configuration file exist
            if (FileExist(_modelConfigFile)) {
                ModelType modelType;
                std::vector<int> inputShape;
                _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
                if (inputShape.size() == 2) {
                    if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
                    if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
                }
            }
            else
            {// This is old version of model zip file
                _modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx");
                _classFilePath = CreateFilePath(_modelFolder, "classes.names");
                std::ifstream isValidFileName(_classFilePath);
                if (!isValidFileName)
                {
                    this->_logger.LogDebug("ANSYOLOV10RTOD::Initialize.  Load classes from string", _classFilePath, __FILE__, __LINE__);
                    LoadClassesFromString();
                }
                else {
                    this->_logger.LogDebug("ANSYOLOV10RTOD::Initialize.  Load classes from file", _classFilePath, __FILE__, __LINE__);
                    LoadClassesFromFile();
                }
            }

            // 2. Load the TensorRT engine file
            if (this->_loadEngineOnCreation) {
                auto succ = m_trtEngine->buildLoadNetwork(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu);
                if (!succ) {
                    const std::string errMsg = "Error: Unable to load TensorRT engine weights into memory.";
                    this->_logger.LogError("TENSORRTOD::Initialize", errMsg, __FILE__, __LINE__);
                    _modelLoadValid = false;
                    return false;
                }

            }
            _modelLoadValid = true;
            _isInitialized = true;
            return true;
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("ANSYOLOV10RTOD::LoadModel", e.what(), __FILE__, __LINE__);
            return false;
        }

    }
    bool ANSYOLOV10RTOD::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) {
        std::lock_guard<std::recursive_mutex> lock(_mutex);
        try {
            bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig,modelName, className, modelFolder, labelMap);
            if (!result) return false;
            std::string _modelName = modelName;
            if (_modelName.empty()) {
                _modelName = "train_last";
            }
            std::string modelFullName = _modelName + ".onnx";
            // Parsing for YOLO only here
            _modelConfig = modelConfig;
            _modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION;
            _modelConfig.modelType = ModelType::TENSORRT;
            _modelConfig.inpHeight = 640;
            _modelConfig.inpWidth = 640;
            if (_modelConfig.modelMNSThreshold < 0.2)
                _modelConfig.modelMNSThreshold = 0.5;
            if (_modelConfig.modelConfThreshold < 0.2)
                _modelConfig.modelConfThreshold = 0.5;
            if (_modelConfig.precisionType == PrecisionType::FP16)_fp16 = true;
            if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133)  // 133 = COCO wholebody max
                _modelConfig.numKPS = 17;
            if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define
            _fp16 = true; // Load Model from Here
            // Load Model from Here
            TOP_K = 100;
            SEG_CHANNELS = 32;
            PROBABILITY_THRESHOLD = _modelConfig.detectionScoreThreshold;
            NMS_THRESHOLD = _modelConfig.modelMNSThreshold;
            SEGMENTATION_THRESHOLD = 0.5f;
            SEG_H = 160;
            SEG_W = 160;
            NUM_KPS = _modelConfig.numKPS;
            KPS_THRESHOLD = _modelConfig.kpsThreshold;
            SEG_CHANNELS = 32;      // For segmentation
            if (!m_trtEngine) {
                // Fixed batch size of 1 for this model
                m_options.optBatchSize = _modelConfig.gpuOptBatchSize;
                m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize;
                m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
                m_options.maxInputHeight = _modelConfig.maxInputHeight;
                m_options.minInputHeight = _modelConfig.minInputHeight;
                m_options.optInputHeight = _modelConfig.optInputHeight;
                m_options.maxInputWidth = _modelConfig.maxInputWidth;
                m_options.minInputWidth = _modelConfig.minInputWidth;
                m_options.optInputWidth = _modelConfig.optInputWidth;
                m_options.engineFileDir = _modelFolder;
                // Use FP16 or FP32 precision based on the input flag
                m_options.precision = (_fp16 ? Precision::FP16 : Precision::FP32);
                // Create the TensorRT inference engine
                m_trtEngine = std::make_unique<Engine<float>>(m_options);
            }

            // 0. Check if the configuration file exist
            if (FileExist(_modelConfigFile)) {
                ModelType modelType;
                std::vector<int> inputShape;
                _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
                if (inputShape.size() == 2) {
                    if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
                    if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
                }
            }
            else
            {// This is old version of model zip file
                _modelFilePath = CreateFilePath(_modelFolder, modelFullName);
                _classFilePath = CreateFilePath(_modelFolder, className);
                std::ifstream isValidFileName(_classFilePath);
                if (!isValidFileName)
                {
                    this->_logger.LogDebug("ANSYOLOV10RTOD::Initialize.  Load classes from string", _classFilePath, __FILE__, __LINE__);
                    LoadClassesFromString();
                }
                else {
                    this->_logger.LogDebug("ANSYOLOV10RTOD::Initialize.  Load classes from file", _classFilePath, __FILE__, __LINE__);
                    LoadClassesFromFile();
                }
            }

            // 1. Load labelMap and engine
            labelMap.clear();
            if (!_classes.empty())
                labelMap = VectorToCommaSeparatedString(_classes);


            // 2. Load the TensorRT engine file
            if (this->_loadEngineOnCreation) {
                auto succ = m_trtEngine->buildLoadNetwork(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu);
                if (!succ) {
                    const std::string errMsg = "Error: Unable to load TensorRT engine weights into memory.";
                    this->_logger.LogError("ANSYOLOV10RTOD::Initialize", errMsg, __FILE__, __LINE__);
                    _modelLoadValid = false;
                    return false;
                }
            }
            _modelLoadValid = true;
            _isInitialized = true;
            return true;
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("ANSYOLOV10RTOD::LoadModel", e.what(), __FILE__, __LINE__);
            return false;
        }
    }
    bool ANSYOLOV10RTOD::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap)
    {
        std::lock_guard<std::recursive_mutex> lock(_mutex);
        try {
            const bool engineAlreadyLoaded = _modelLoadValid && _isInitialized && m_trtEngine != nullptr;
            _modelLoadValid = false;
            bool result = ANSODBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
            if (!result) return false;
            // Parsing for YOLO only here
            _modelConfig = modelConfig;
            _modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION;
            _modelConfig.modelType = ModelType::TENSORRT;
            _modelConfig.inpHeight = 640;
            _modelConfig.inpWidth = 640;
			if (_modelConfig.precisionType == PrecisionType::FP16)_fp16 = true;
            if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133)  // 133 = COCO wholebody max
                _modelConfig.numKPS = 17;
            if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define
            _fp16 = true; // Load Model from Here
            // Load Model from Here
            TOP_K = 100;
            SEG_CHANNELS = 32;
            PROBABILITY_THRESHOLD = _modelConfig.detectionScoreThreshold;
            NMS_THRESHOLD = _modelConfig.modelMNSThreshold;
            SEGMENTATION_THRESHOLD = 0.5f;
            SEG_H = 160;
            SEG_W = 160;
            NUM_KPS = _modelConfig.numKPS;
            KPS_THRESHOLD = _modelConfig.kpsThreshold;
            SEG_CHANNELS = 32;      // For segmentation
            if (!m_trtEngine) {
                // Fixed batch size of 1 for this model
                m_options.optBatchSize = _modelConfig.gpuOptBatchSize;
                m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize;
                m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
                m_options.maxInputHeight = _modelConfig.maxInputHeight;
                m_options.minInputHeight = _modelConfig.minInputHeight;
                m_options.optInputHeight = _modelConfig.optInputHeight;
                m_options.maxInputWidth = _modelConfig.maxInputWidth;
                m_options.minInputWidth = _modelConfig.minInputWidth;
                m_options.optInputWidth = _modelConfig.optInputWidth;
                m_options.engineFileDir = _modelFolder;
                // Use FP16 or FP32 precision based on the input flag
                m_options.precision = (_fp16 ? Precision::FP16 : Precision::FP32);
                // Create the TensorRT inference engine
                m_trtEngine = std::make_unique<Engine<float>>(m_options);
            }

            // 0. Check if the configuration file exist
            if (FileExist(_modelConfigFile)) {
                ModelType modelType;
                std::vector<int> inputShape;
                _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
                if (inputShape.size() == 2) {
                    if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
                    if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
                }
            }
            else
            {// This is old version of model zip file
                _modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx");
                _classFilePath = CreateFilePath(_modelFolder, "classes.names");
                std::ifstream isValidFileName(_classFilePath);
                if (!isValidFileName)
                {
                    this->_logger.LogDebug("ANSYOLOV10RTOD::Initialize.  Load classes from string", _classFilePath, __FILE__, __LINE__);
                    LoadClassesFromString();
                }
                else {
                    this->_logger.LogDebug("ANSYOLOV10RTOD::Initialize.  Load classes from file", _classFilePath, __FILE__, __LINE__);
                    LoadClassesFromFile();
                }
            }

            // 1. Load labelMap and engine
            labelMap.clear();
            if (!_classes.empty())
                labelMap = VectorToCommaSeparatedString(_classes);


       	 // 2. Load the TensorRT engine file
            if (this->_loadEngineOnCreation && !engineAlreadyLoaded) {
                auto succ = m_trtEngine->buildLoadNetwork(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu);
                if (!succ) {
                    const std::string errMsg = "Error: Unable to load TensorRT engine weights into memory.";
                    this->_logger.LogError("TENSORRTOD::Initialize", errMsg, __FILE__, __LINE__);
                    _modelLoadValid = false;
                    return false;
                }
            }
            _modelLoadValid = true;
            _isInitialized = true;
            return true;
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("ANSYOLOV10RTOD::Initialize", e.what(), __FILE__, __LINE__);
            return false;
        }
    }
    std::vector<Object> ANSYOLOV10RTOD::RunInference(const cv::Mat& inputImgBGR) {
		return  RunInference(inputImgBGR, "TensorRT10Cam");
    }

    std::vector<Object> ANSYOLOV10RTOD::RunInference(const cv::Mat& inputImgBGR, const std::string& camera_id) {
        // Validate under brief lock
        {
            std::lock_guard<std::recursive_mutex> lock(_mutex);
            if (!_modelLoadValid) {
                this->_logger.LogFatal("ANSYOLOV10RTOD::RunInference",
                    "Cannot load the TensorRT model. Please check if it is exist", __FILE__, __LINE__);
                return {};
            }
            if (!_licenseValid) {
                this->_logger.LogFatal("ANSYOLOV10RTOD::RunInference",
                    "Runtime license is not valid or expired. Please contact ANSCENTER", __FILE__, __LINE__);
                return {};
            }
            if (!_isInitialized) {
                this->_logger.LogFatal("ANSYOLOV10RTOD::RunInference",
                    "Initialisation is not valid or expired. Please contact ANSCENTER", __FILE__, __LINE__);
                return {};
            }
            if (inputImgBGR.empty() || inputImgBGR.cols < 10 || inputImgBGR.rows < 10) {
                return {};
            }
        }
        try {
            return DetectObjects(inputImgBGR, camera_id);
        }
        catch (const std::exception& e) {
            this->_logger.LogFatal("ANSYOLOV10RTOD::RunInference", e.what(), __FILE__, __LINE__);
            return {};
        }
    }


    ANSYOLOV10RTOD::~ANSYOLOV10RTOD() {
        try {
			Destroy();
        }
        catch (std::exception& e) {
            this->_logger.LogError("ANSYOLOV10RTOD::~ANSYOLOV10RTOD()", e.what(), __FILE__, __LINE__);
        }
    }
    bool ANSYOLOV10RTOD::Destroy() {
        try {
            m_trtEngine.reset();
            m_nv12Helper.destroy();
            return true;
        }
        catch (std::exception& e) {
            this->_logger.LogError("ANSYOLOV10RTOD::~ANSYOLOV10RTOD()", e.what(), __FILE__, __LINE__);
            return false;
        }
    }

    // private
    std::vector<Object> ANSYOLOV10RTOD::DetectObjects(const cv::Mat& inputImage, const std::string& camera_id) {
        // Phase 1: Preprocess under brief lock — try NV12 fast path first
        ImageMetadata meta;
        std::vector<std::vector<cv::cuda::GpuMat>> input;
        bool usedNV12 = false;
        float bgrFullResScaleX = 1.0f, bgrFullResScaleY = 1.0f;
        {
            std::lock_guard<std::recursive_mutex> lock(_mutex);
            const int inferenceGpu = m_trtEngine ? m_trtEngine->getPreferredDeviceIndex() : 0;
            const auto& inputDims = m_trtEngine->getInputDims();
            const int inputW = inputDims[0].d[2];
            const int inputH = inputDims[0].d[1];

            auto nv12 = m_nv12Helper.tryNV12(inputImage, inferenceGpu, inputW, inputH,
                                              NV12PreprocessHelper::defaultYOLOLauncher(),
                                              _logger, "ANSYOLOV10RTOD");
            if (nv12.succeeded) {
                meta.imgWidth  = nv12.metaWidth;
                meta.imgHeight = nv12.metaHeight;
                meta.ratio     = nv12.ratio;
                input = {{ std::move(nv12.gpuRGB) }};
                usedNV12 = true;
            }
            else if (nv12.useBgrFullRes) {
                input = Preprocess(nv12.bgrFullResImg, meta);
                usedNV12 = !input.empty();
                bgrFullResScaleX = nv12.bgrFullResScaleX;
                bgrFullResScaleY = nv12.bgrFullResScaleY;
            }

            if (input.empty()) {
                input = Preprocess(inputImage, meta);
            }
            m_nv12Helper.tickInference();
        }
        if (input.empty()) return {};

        // Phase 2: Inference -- mutex released; pool dispatches to idle GPU slot
        std::vector<std::vector<std::vector<float>>> featureVectors;
        auto succ = m_trtEngine->runInference(input, featureVectors);
        if (!succ) {
            this->_logger.LogFatal("ANSYOLOV10RTOD::DetectObjects", "Error running inference", __FILE__, __LINE__);
            return {};
        }

        // Phase 3: Postprocess under lock
        std::vector<Object> ret;
        {
            std::lock_guard<std::recursive_mutex> lock(_mutex);
            const auto& numOutputs = m_trtEngine->getOutputDims().size();
            if (numOutputs == 1) {
                std::vector<float> featureVector;
                Engine<float>::transformOutput(featureVectors, featureVector);
                const auto& outputDims = m_trtEngine->getOutputDims();
                int numChannels = outputDims[outputDims.size() - 1].d[1];
                if (numChannels == 56) {
                    ret = PostProcessPose(featureVector, camera_id, meta);
                }
                else {
                    ret = Postprocess(featureVector, camera_id, meta);
                }
            }
            else {
                std::vector<std::vector<float>> featureVector;
                Engine<float>::transformOutput(featureVectors, featureVector);
                ret = PostProcessSegmentation(featureVector, camera_id, meta);
            }
        }

        // Rescale coords from full-res to display-res (BGR full-res path)
        if (bgrFullResScaleX != 1.0f || bgrFullResScaleY != 1.0f) {
            for (auto& obj : ret) {
                obj.box.x      = static_cast<int>(obj.box.x      * bgrFullResScaleX);
                obj.box.y      = static_cast<int>(obj.box.y      * bgrFullResScaleY);
                obj.box.width  = static_cast<int>(obj.box.width  * bgrFullResScaleX);
                obj.box.height = static_cast<int>(obj.box.height * bgrFullResScaleY);
                for (auto& pt : obj.polygon) {
                    pt.x *= bgrFullResScaleX;
                    pt.y *= bgrFullResScaleY;
                }
                for (size_t k = 0; k + 2 < obj.kps.size(); k += 3) {
                    obj.kps[k]     *= bgrFullResScaleX;
                    obj.kps[k + 1] *= bgrFullResScaleY;
                }
            }
        }

        if (_trackerEnabled) {
            ret = ApplyTracking(ret, camera_id);
            if (_stabilizationEnabled) ret = StabilizeDetections(ret, camera_id);
        }
        return ret;
    }
    std::vector<std::vector<cv::cuda::GpuMat>> ANSYOLOV10RTOD::Preprocess(const cv::Mat& inputImage, ImageMetadata& outMeta) {
        try {
            if (!_licenseValid) {
                _logger.LogFatal("ANSYOLOV10RTOD::Preprocess", "Invalid license", __FILE__, __LINE__);
                return {};
            }

            const auto& inputDims = m_trtEngine->getInputDims();
            const int inputH = inputDims[0].d[1];
            const int inputW = inputDims[0].d[2];

            // Upload input image to GPU
            cv::cuda::Stream stream;
            cv::cuda::GpuMat img;

            if (inputImage.empty()) {
                _logger.LogFatal("ANSYOLOV10RTOD::Preprocess", "Empty input image", __FILE__, __LINE__);
                return {};
            }

            // Convert grayscale to BGR if needed
            if (inputImage.channels() == 1) {
                cv::Mat img3Channel;
                cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
                img.upload(img3Channel, stream);
            }
            else {
                img.upload(inputImage, stream);
            }

            // Convert to RGB
            cv::cuda::GpuMat imgRGB;
            cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
            stream.waitForCompletion();

            outMeta.imgHeight = imgRGB.rows;
            outMeta.imgWidth = imgRGB.cols;

            if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
                outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(imgRGB.cols),
                    inputDims[0].d[1] / static_cast<float>(imgRGB.rows));

                cv::cuda::GpuMat resized = imgRGB;

                // Resize to the model's expected input size while maintaining aspect ratio with padding
                if (resized.rows != inputDims[0].d[1] || resized.cols != inputDims[0].d[2]) {
                    resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputDims[0].d[1], inputDims[0].d[2]);
                }

                // Convert to format expected by our inference engine
                std::vector<cv::cuda::GpuMat> input{ std::move(resized) };
                std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) };
                return inputs;
            }
            else {
                this->_logger.LogFatal("TENSORRTCL::Preprocess",
                    "Image height or width is zero after processing (Width: " + std::to_string(outMeta.imgWidth) +
                    ", Height: " + std::to_string(outMeta.imgHeight) + ")",
                    __FILE__, __LINE__);
                return {};
            }
        }
        catch (const std::exception& e) {
            _logger.LogFatal("ANSYOLOV10RTOD::Preprocess", e.what(), __FILE__, __LINE__);
            return {};
        }
    }
    std::vector<Object> ANSYOLOV10RTOD::Postprocess(std::vector<float>& featureVector, const std::string& camera_id, const ImageMetadata& meta) {
        try {
            const auto& outputDims = m_trtEngine->getOutputDims();
            std::vector<Object> objects;
            int outputLength = outputDims[0].d[1];
            int classNameSize = _classes.size();
            for (int i = 0; i < outputLength; i++) {
                // Compute the starting index for the current detection result in the 'result' array
                int s = 6 * i;
                // Check if the confidence score of the detection is above a threshold (0.2 in this case)
                if ((float)featureVector[s + 4] > this->_modelConfig.detectionScoreThreshold) {
                    // Extract the coordinates and dimensions of the bounding box (normalized values)
                    float cx = featureVector[s + 0];  // Center x-coordinate
                    float cy = featureVector[s + 1];  // Center y-coordinate
                    float dx = featureVector[s + 2];  // Bottom-right x-coordinate
                    float dy = featureVector[s + 3];  // Bottom-right y-coordinate

                    // Convert normalized coordinates and dimensions to pixel values using the scaling factor
                    int x = (int)((cx)*meta.ratio);           // Top-left x-coordinate of the bounding box
                    int y = (int)((cy)*meta.ratio);           // Top-left y-coordinate of the bounding box
                    int width = (int)((dx - cx) * meta.ratio);  // Width of the bounding box
                    int height = (int)((dy - cy) * meta.ratio); // Height of the bounding box

                    x = std::max(x, 0);
                    y = std::max(y, 0);
                    width = MIN(width, meta.imgWidth - x);
                    height = MIN(height, meta.imgHeight - y);

                    // Create a cv::Rect object to represent the bounding box
                    cv::Rect box(x, y, width, height);
                    Object obj;
                    obj.box = box;
                    obj.polygon = ANSUtilityHelper::RectToNormalizedPolygon(obj.box, meta.imgWidth, meta.imgHeight);

                    obj.confidence = (float)featureVector[s + 4];
                    obj.classId = (int)featureVector[s + 5];
                    if (!_classes.empty()) {
                        if (obj.classId < classNameSize) {
                            obj.className = _classes[obj.classId];
                        }
                        else {
                            obj.className = _classes[classNameSize - 1]; // Use last valid class name if out of range
                        }
                    }
                    else {
                        obj.className = "Unknown"; // Fallback if _classes is empty
                    }
                    obj.cameraId = camera_id;
                    objects.push_back(obj);
                }
            }

            //// Run NMS
            //EnqueueDetection(objects, camera_id);
            return objects;
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("TENSORRTOD::Postproces", e.what(), __FILE__, __LINE__);
            std::vector<Object> result;
            result.clear();
            return result;
        }

    }
    std::vector<Object> ANSYOLOV10RTOD::PostProcessSegmentation(std::vector<std::vector<float>>& featureVectors, const std::string& camera_id, const ImageMetadata& meta) {
        try {
            if (!_licenseValid) {
                this->_logger.LogFatal("TENSORRTOD::PostProcessSegmentation", "Invalid license", __FILE__, __LINE__);
                std::vector<Object> result;
                result.clear();
                return result;
            }
            const auto& outputDims = m_trtEngine->getOutputDims();

            int numChannels = outputDims[0].d[1];
            int numAnchors = outputDims[0].d[2];

            const auto numClasses = numChannels - SEG_CHANNELS - 4;

            // Ensure the output lengths are correct
            if (featureVectors[0].size() != static_cast<size_t>(numChannels) * numAnchors) {
                std::vector<Object>result;
                result.clear();
                return result;
            }

            if (featureVectors[1].size() != static_cast<size_t>(SEG_CHANNELS) * SEG_H * SEG_W) {
                std::vector<Object>result;
                result.clear();
                return result;
            }

            cv::Mat output = cv::Mat(numChannels, numAnchors, CV_32F, featureVectors[0].data());
            output = output.t();

            cv::Mat protos = cv::Mat(SEG_CHANNELS, SEG_H * SEG_W, CV_32F, featureVectors[1].data());

            std::vector<int> labels;
            std::vector<float> scores;
            std::vector<cv::Rect> bboxes;
            std::vector<cv::Mat> maskConfs;
            std::vector<int> indices;

            // Object the bounding boxes and class labels
            for (int i = 0; i < numAnchors; i++) {
                auto rowPtr = output.row(i).ptr<float>();
                auto bboxesPtr = rowPtr;
                auto scoresPtr = rowPtr + 4;
                auto maskConfsPtr = rowPtr + 4 + numClasses;
                auto maxSPtr = std::max_element(scoresPtr, scoresPtr + numClasses);
                float score = *maxSPtr;
                if (score > this->_modelConfig.detectionScoreThreshold) {
                    float x = *bboxesPtr++;
                    float y = *bboxesPtr++;
                    float w = *bboxesPtr++;
                    float h = *bboxesPtr;

                    float x0 = std::clamp((x - 0.5f * w) * meta.ratio, 0.f, meta.imgWidth);
                    float y0 = std::clamp((y - 0.5f * h) * meta.ratio, 0.f, meta.imgHeight);
                    float x1 = std::clamp((x + 0.5f * w) * meta.ratio, 0.f, meta.imgWidth);
                    float y1 = std::clamp((y + 0.5f * h) * meta.ratio, 0.f, meta.imgHeight);

                    int label = maxSPtr - scoresPtr;
                    cv::Rect_<float> bbox;
                    bbox.x = x0;
                    bbox.y = y0;
                    bbox.width = x1 - x0;
                    bbox.height = y1 - y0;
                    bbox.x = std::clamp(bbox.x, 0.f, meta.imgWidth);
                    bbox.y = std::clamp(bbox.y, 0.f, meta.imgHeight);
                    bbox.width = std::clamp(bbox.width, 0.f, meta.imgWidth - bbox.x);
                    bbox.height = std::clamp(bbox.height, 0.f, meta.imgHeight - bbox.y);
                    cv::Mat maskConf = cv::Mat(1, SEG_CHANNELS, CV_32F, maskConfsPtr);
                    bboxes.push_back(bbox);
                    labels.push_back(label);
                    scores.push_back(score);
                    maskConfs.push_back(maskConf);
                }
            }

            // Require OpenCV 4.7 for this function
            cv::dnn::NMSBoxesBatched(bboxes, scores, labels, PROBABILITY_THRESHOLD, NMS_THRESHOLD, indices);
			int classNameSize = static_cast<int>(_classes.size());
            // Obtain the segmentation masks
            cv::Mat masks;
            std::vector<Object> objs;
            for (auto& i : indices) {
                if (scores[i] > PROBABILITY_THRESHOLD) {
                    cv::Rect tmp = bboxes[i];
                    Object obj;
                    obj.classId = labels[i];
                    if (!_classes.empty()) {
                        if (obj.classId < classNameSize) {
                            obj.className = _classes[obj.classId];
                        }
                        else {
                            obj.className = _classes[classNameSize - 1]; // Use last valid class name if out of range
                        }
                    }
                    else {
                        obj.className = "Unknown"; // Fallback if _classes is empty
                    }
                    obj.box = tmp;
                    obj.confidence = scores[i];
                    masks.push_back(maskConfs[i]);
                    objs.push_back(obj);
                }
            }

            // Convert segmentation mask to original frame
            if (!masks.empty()) {
                cv::Mat matmulRes = (masks * protos).t();
                cv::Mat maskMat = matmulRes.reshape(indices.size(), { _modelConfig.inpWidth, _modelConfig.inpHeight });

                std::vector<cv::Mat> maskChannels;
                cv::split(maskMat, maskChannels);
                const auto inputDims = m_trtEngine->getInputDims();

                cv::Rect roi;
                if (meta.imgHeight > meta.imgWidth) {
                    roi = cv::Rect(0, 0, _modelConfig.inpWidth * meta.imgWidth / meta.imgHeight, _modelConfig.inpHeight);
                }
                else {
                    roi = cv::Rect(0, 0, _modelConfig.inpWidth, _modelConfig.inpHeight * meta.imgHeight / meta.imgWidth);
                }


                for (size_t i = 0; i < indices.size(); i++)
                {
                    cv::Mat dest, mask;
                    cv::exp(-maskChannels[i], dest);
                    dest = 1.0 / (1.0 + dest);
                    dest = dest(roi);
                    objs[i].cameraId = camera_id;
                    cv::resize(
                        dest,
                        mask,
                        cv::Size(static_cast<int>(meta.imgWidth), static_cast<int>(meta.imgHeight)),
                        cv::INTER_LINEAR
                    );
                    objs[i].mask = mask(objs[i].box) > _modelConfig.modelConfThreshold;// Need to check segmentation
                }
            }
            //EnqueueDetection(objs, camera_id);
            return objs;
        }
        catch (std::exception& e) {
            this->_logger.LogFatal("TENSORRTOD::PostProcessSegmentation", e.what(), __FILE__, __LINE__);
            std::vector<Object>result;
            result.clear();
            return result;
        }
    }
    std::vector<Object> ANSYOLOV10RTOD::PostProcessPose(std::vector<float>& featureVector, const std::string& camera_id, const ImageMetadata& meta) {
        const auto& outputDims = m_trtEngine->getOutputDims();
        auto numChannels = outputDims[0].d[1];
        auto numAnchors = outputDims[0].d[2];

        std::vector<cv::Rect> bboxes;
        std::vector<float> scores;
        std::vector<int> labels;
        std::vector<int> indices;
        std::vector<std::vector<float>> kpss;

        cv::Mat output = cv::Mat(numChannels, numAnchors, CV_32F, featureVector.data());
        output = output.t();

        // Get all the YOLO proposals
        for (int i = 0; i < numAnchors; i++) {
            auto rowPtr = output.row(i).ptr<float>();
            auto bboxesPtr = rowPtr;
            auto scoresPtr = rowPtr + 4;
            auto kps_ptr = rowPtr + 5;
            float score = *scoresPtr;
            if (score > this->_modelConfig.detectionScoreThreshold)
            {
                float x = *bboxesPtr++;
                float y = *bboxesPtr++;
                float w = *bboxesPtr++;
                float h = *bboxesPtr;

                float x0 = std::clamp((x - 0.5f * w) * meta.ratio, 0.f, meta.imgWidth);
                float y0 = std::clamp((y - 0.5f * h) * meta.ratio, 0.f, meta.imgHeight);
                float x1 = std::clamp((x + 0.5f * w) * meta.ratio, 0.f, meta.imgWidth);
                float y1 = std::clamp((y + 0.5f * h) * meta.ratio, 0.f, meta.imgHeight);

                cv::Rect_<float> bbox;
                bbox.x = x0;
                bbox.y = y0;
                bbox.width = x1 - x0;
                bbox.height = y1 - y0;
                bbox.x = std::clamp(bbox.x, 0.f, meta.imgWidth);
                bbox.y = std::clamp(bbox.y, 0.f, meta.imgHeight);
                bbox.width = std::clamp(bbox.width, 0.f, meta.imgWidth - bbox.x);
                bbox.height = std::clamp(bbox.height, 0.f, meta.imgHeight - bbox.y);
                std::vector<float> kps;
                for (int k = 0; k < NUM_KPS; k++) {
                    float kpsX = *(kps_ptr + 3 * k) * meta.ratio;
                    float kpsY = *(kps_ptr + 3 * k + 1) * meta.ratio;
                    float kpsS = *(kps_ptr + 3 * k + 2);
                    kpsX = std::clamp(kpsX, 0.f, meta.imgWidth);
                    kpsY = std::clamp(kpsY, 0.f, meta.imgHeight);
                    kps.push_back(kpsX);
                    kps.push_back(kpsY);
                    kps.push_back(kpsS);
                }

                bboxes.push_back(bbox);
                labels.push_back(0); // All detected objects are people
                scores.push_back(score);
                kpss.push_back(kps);
            }
        }

        // Run NMS
        cv::dnn::NMSBoxesBatched(bboxes, scores, labels, PROBABILITY_THRESHOLD, NMS_THRESHOLD, indices);
        std::vector<Object> objects;
		int classNameSize = _classes.size();
        for (auto& chosenIdx : indices) {
            if (scores[chosenIdx] > PROBABILITY_THRESHOLD) {
                Object obj{};
                obj.confidence = scores[chosenIdx];
                obj.classId = labels[chosenIdx];
                if (!_classes.empty()) {
                    if (obj.classId < classNameSize) {
                        obj.className = _classes[obj.classId];
                    }
                    else {
                        obj.className = _classes[classNameSize - 1]; // Use last valid class name if out of range
                    }
                }
                else {
                    obj.className = "Unknown"; // Fallback if _classes is empty
                }
                obj.box = bboxes[chosenIdx];
                obj.kps = kpss[chosenIdx];
                obj.cameraId = camera_id;
                objects.push_back(obj);
            }
        }
        //EnqueueDetection(objects, camera_id);
        return objects;
    }


    std::vector<std::vector<Object>> ANSYOLOV10RTOD::DetectObjectsBatch(
        const std::vector<cv::Mat>& inputImages, const std::string& camera_id)
    {
        // Validate under brief lock
        {
            std::lock_guard<std::recursive_mutex> lock(_mutex);
            if (inputImages.empty()) {
                _logger.LogFatal("ANSYOLOV10RTOD::DetectObjectsBatch",
                    "Empty input images vector", __FILE__, __LINE__);
                return {};
            }
        }

        // Auto-split if batch exceeds engine capacity
        const int maxBatch = m_options.maxBatchSize > 0 ? m_options.maxBatchSize : 1;
        if (static_cast<int>(inputImages.size()) > maxBatch) {
            const size_t numImages = inputImages.size();
            std::vector<std::vector<Object>> allResults;
            allResults.reserve(numImages);
            // Process chunks sequentially to avoid GPU contention on the same engine
            for (size_t start = 0; start < numImages; start += static_cast<size_t>(maxBatch)) {
                const size_t end = std::min(start + static_cast<size_t>(maxBatch), numImages);
                std::vector<cv::Mat> chunk(inputImages.begin() + start, inputImages.begin() + end);
                auto chunkResults = DetectObjectsBatch(chunk, camera_id);
                if (chunkResults.size() == chunk.size()) {
                    for (auto& r : chunkResults) allResults.push_back(std::move(r));
                }
                else {
                    _logger.LogError("ANSYOLOV10RTOD::DetectObjectsBatch",
                        "Chunk returned " + std::to_string(chunkResults.size()) +
                        " results, expected " + std::to_string(chunk.size()) +
                        ". Padding with empty results.", __FILE__, __LINE__);
                    for (auto& r : chunkResults) allResults.push_back(std::move(r));
                    for (size_t pad = chunkResults.size(); pad < chunk.size(); ++pad) {
                        allResults.push_back({});
                    }
                }
            }
            return allResults;
        }

        _logger.LogDebug("ANSYOLOV10RTOD::DetectObjectsBatch",
            "Processing batch of " + std::to_string(inputImages.size()) + " images",
            __FILE__, __LINE__);

        // Phase 1: Preprocess under brief lock
        BatchMetadata metadata;
        std::vector<std::vector<cv::cuda::GpuMat>> inputs;
        {
            std::lock_guard<std::recursive_mutex> lock(_mutex);
            inputs = PreprocessBatch(inputImages, metadata);
        }
        if (inputs.empty() || inputs[0].empty()) {
            _logger.LogFatal("ANSYOLOV10RTOD::DetectObjectsBatch",
                "Preprocessing failed", __FILE__, __LINE__);
            return {};
        }

        // Phase 2: Inference -- mutex released; pool dispatches to idle GPU slot
        std::vector<std::vector<std::vector<float>>> featureVectors;
        auto succ = m_trtEngine->runInference(inputs, featureVectors);
        if (!succ) {
            _logger.LogFatal("ANSYOLOV10RTOD::DetectObjectsBatch",
                "Error running inference", __FILE__, __LINE__);
            return {};
        }

        // Phase 3: Parallel postprocessing with model-type dispatch
        const size_t numBatch = featureVectors.size();
        const auto& outputDims = m_trtEngine->getOutputDims();
        const size_t numOutputs = outputDims.size();
        std::vector<std::vector<Object>> batchDetections(numBatch);
        std::vector<std::future<std::vector<Object>>> postFutures;
        postFutures.reserve(numBatch);

        for (size_t batchIdx = 0; batchIdx < numBatch; ++batchIdx) {
            const auto& batchOutput = featureVectors[batchIdx];
            ImageMetadata imgMeta;
            imgMeta.ratio     = metadata.ratios[batchIdx];
            imgMeta.imgWidth  = static_cast<float>(metadata.imgWidths[batchIdx]);
            imgMeta.imgHeight = static_cast<float>(metadata.imgHeights[batchIdx]);

            if (numOutputs == 1) {
                std::vector<float> fv = batchOutput.empty() ? std::vector<float>{} : batchOutput[0];
                int numChannels = outputDims[0].d[1];
                if (numChannels == 56) {
                    postFutures.push_back(std::async(std::launch::async,
                        [this, fv = std::move(fv), cid = camera_id, im = imgMeta]() mutable {
                            return PostProcessPose(fv, cid, im);
                        }));
                } else {
                    postFutures.push_back(std::async(std::launch::async,
                        [this, fv = std::move(fv), cid = camera_id,
                         idx = batchIdx, &metadata, im = imgMeta]() mutable {
                            return PostprocessBatch(fv, cid, idx, metadata);
                        }));
                }
            } else {
                std::vector<std::vector<float>> fv2d;
                fv2d.reserve(batchOutput.size());
                for (const auto& out : batchOutput) fv2d.push_back(out);
                postFutures.push_back(std::async(std::launch::async,
                    [this, fv2d = std::move(fv2d), cid = camera_id, im = imgMeta]() mutable {
                        return PostProcessSegmentation(fv2d, cid, im);
                    }));
            }
        }
        // Gather results in original order; metadata stays alive until all futures joined
        for (size_t i = 0; i < numBatch; ++i)
            batchDetections[i] = postFutures[i].get();

        _logger.LogDebug("ANSYOLOV10RTOD::DetectObjectsBatch",
            "Batch processing complete. Images: " + std::to_string(numBatch),
            __FILE__, __LINE__);
        return batchDetections;
    }

    std::vector<std::vector<Object>> ANSYOLOV10RTOD::RunInferencesBatch(
        const std::vector<cv::Mat>& inputs, const std::string& camera_id)
    {
        {
            std::lock_guard<std::recursive_mutex> lock(_mutex);
            if (!_modelLoadValid) {
                _logger.LogError("ANSYOLOV10RTOD::RunInferencesBatch",
                    "Model not loaded", __FILE__, __LINE__);
                return {};
            }
            if (!_licenseValid) {
                _logger.LogError("ANSYOLOV10RTOD::RunInferencesBatch",
                    "Invalid license", __FILE__, __LINE__);
                return {};
            }
            if (!_isInitialized) {
                _logger.LogError("ANSYOLOV10RTOD::RunInferencesBatch",
                    "Engine not initialized", __FILE__, __LINE__);
                return {};
            }
            if (inputs.empty()) return {};
        }
        try {
            return DetectObjectsBatch(inputs, camera_id);
        }
        catch (const std::exception& e) {
            _logger.LogFatal("ANSYOLOV10RTOD::RunInferencesBatch", e.what(), __FILE__, __LINE__);
            return {};
        }
    }
    std::vector<std::vector<cv::cuda::GpuMat>> ANSYOLOV10RTOD::PreprocessBatch(const std::vector<cv::Mat>& inputImages, BatchMetadata& outMetadata) {
        try {
            if (!_licenseValid) {
                _logger.LogFatal("ANSYOLOV10RTOD::PreprocessBatch", "Invalid license", __FILE__, __LINE__);
                return {};
            }

            const auto& inputDims = m_trtEngine->getInputDims();
            const int inputH = inputDims[0].d[1];
            const int inputW = inputDims[0].d[2];

            // Store original image dimensions for each image in batch
            outMetadata.imgHeights.resize(inputImages.size());
            outMetadata.imgWidths.resize(inputImages.size());
            outMetadata.ratios.resize(inputImages.size());

            std::vector<cv::cuda::GpuMat> batchProcessed;
            batchProcessed.reserve(inputImages.size());

            cv::cuda::Stream stream;

            // Process each image
            for (size_t i = 0; i < inputImages.size(); ++i) {
                const auto& inputImage = inputImages[i];

                if (inputImage.empty()) {
                    _logger.LogFatal("ANSYOLOV10RTOD::PreprocessBatch",
                        "Empty input image at index " + std::to_string(i), __FILE__, __LINE__);
                    return {};
                }

                // Upload to GPU
                cv::cuda::GpuMat img;

                // Convert grayscale to BGR if needed
                if (inputImage.channels() == 1) {
                    cv::Mat img3Channel;
                    cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
                    img.upload(img3Channel, stream);
                }
                else {
                    img.upload(inputImage, stream);
                }

                // Convert to RGB
                cv::cuda::GpuMat imgRGB;
                cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);

                // Store original dimensions
                outMetadata.imgHeights[i] = imgRGB.rows;
                outMetadata.imgWidths[i] = imgRGB.cols;

                if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) {
                    _logger.LogFatal("ANSYOLOV10RTOD::PreprocessBatch",
                        "Image " + std::to_string(i) + " has invalid dimensions (Width: " +
                        std::to_string(outMetadata.imgWidths[i]) + ", Height: " +
                        std::to_string(outMetadata.imgHeights[i]) + ")",
                        __FILE__, __LINE__);
                    return {};
                }

                // Calculate ratio for this image
                outMetadata.ratios[i] = 1.f / std::min(inputW / static_cast<float>(imgRGB.cols),
                    inputH / static_cast<float>(imgRGB.rows));

                // Resize with padding
                cv::cuda::GpuMat resized = imgRGB;
                if (resized.rows != inputH || resized.cols != inputW) {
                    resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputH, inputW);
                }

                batchProcessed.push_back(std::move(resized));
            }

            stream.waitForCompletion();

            // Return as required format: vector<vector<GpuMat>>
            // For single input model, we have one input tensor containing all batch images
            std::vector<std::vector<cv::cuda::GpuMat>> inputs;
            inputs.push_back(std::move(batchProcessed));

            return inputs;
        }
        catch (const std::exception& e) {
            _logger.LogFatal("ANSYOLOV10RTOD::PreprocessBatch", e.what(), __FILE__, __LINE__);
            return {};
        }
    }
    std::vector<Object> ANSYOLOV10RTOD::PostprocessBatch(std::vector<float>& featureVector,
        const std::string& camera_id,
        size_t batchIdx,
        const BatchMetadata& metadata) {
        try {
            const auto& outputDims = m_trtEngine->getOutputDims();
            std::vector<Object> objects;

            int outputLength = outputDims[0].d[1];
            int classNameSize = _classes.size();

            // Get the ratio and dimensions for this specific image in the batch
            float ratio = metadata.ratios[batchIdx];
            int imgWidth = metadata.imgWidths[batchIdx];
            int imgHeight = metadata.imgHeights[batchIdx];

            for (int i = 0; i < outputLength; i++) {
                // Compute the starting index for the current detection result
                int s = 6 * i;

                // Check confidence threshold
                if ((float)featureVector[s + 4] > _modelConfig.detectionScoreThreshold) {
                    // Extract bounding box coordinates
                    float cx = featureVector[s + 0];  // Center x-coordinate
                    float cy = featureVector[s + 1];  // Center y-coordinate
                    float dx = featureVector[s + 2];  // Bottom-right x-coordinate
                    float dy = featureVector[s + 3];  // Bottom-right y-coordinate

                    // Convert to pixel values using this image's ratio
                    int x = (int)(cx * ratio);
                    int y = (int)(cy * ratio);
                    int width = (int)((dx - cx) * ratio);
                    int height = (int)((dy - cy) * ratio);

                    // Clamp to image boundaries
                    x = std::max(x, 0);
                    y = std::max(y, 0);
                    width = std::min(width, imgWidth - x);  // FIXED: Changed from MIN to std::min
                    height = std::min(height, imgHeight - y);  // FIXED: Changed from MIN to std::min

                    // Create object
                    cv::Rect box(x, y, width, height);
                    Object obj;
                    obj.box = box;
                    obj.polygon = ANSUtilityHelper::RectToNormalizedPolygon(obj.box, imgWidth, imgHeight);
                    obj.confidence = (float)featureVector[s + 4];
                    obj.classId = (int)featureVector[s + 5];

                    if (!_classes.empty()) {
                        if (obj.classId < classNameSize) {
                            obj.className = _classes[obj.classId];
                        }
                        else {
                            obj.className = _classes[classNameSize - 1];
                        }
                    }
                    else {
                        obj.className = "Unknown";
                    }

                    obj.cameraId = camera_id;
                    objects.push_back(obj);
                }
            }

            return objects;
        }
        catch (std::exception& e) {
            _logger.LogFatal("ANSYOLOV10RTOD::PostprocessBatch", e.what(), __FILE__, __LINE__);
            return {};
        }
    }
}