Refactor project structure

2026-03-28 19:56:39 +11:00
parent 1d267378b2
commit 8a2e721058
511 changed files with 59 additions and 48 deletions
--- a/modules/ANSODEngine/ANSYOLOV12RTOD.cpp
+++ b/modules/ANSODEngine/ANSYOLOV12RTOD.cpp
@@ -0,0 +1,935 @@
+#include "ANSYOLOV12RTOD.h"
+#include "Utility.h"
+#include <opencv2/cudaimgproc.hpp>
+#include <future>
+namespace ANSCENTER
+{
+    bool ANSYOLOV12RTOD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
+        std::lock_guard<std::recursive_mutex> lock(_mutex);
+        if (!ANSODBase::OptimizeModel(fp16, optimizedModelFolder)) {
+            return false;
+        }
+        if (!FileExist(_modelFilePath)) {
+            this->_logger.LogFatal("ANSYOLOV12RTOD::OptimizeModel", "Raw model file path does not exist", __FILE__, __LINE__);
+            return false;
+        }
+        try {
+            _fp16 = fp16;
+            optimizedModelFolder = GetParentFolder(_modelFilePath);
+            // Check if the engine already exists to avoid reinitializing
+            if (!m_trtEngine) {
+                m_options.optBatchSize = _modelConfig.gpuOptBatchSize;
+                m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize;
+                m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
+                m_options.maxInputHeight = _modelConfig.maxInputHeight;
+                m_options.minInputHeight = _modelConfig.minInputHeight;
+                m_options.optInputHeight = _modelConfig.optInputHeight;
+                m_options.maxInputWidth = _modelConfig.maxInputWidth;
+                m_options.minInputWidth = _modelConfig.minInputWidth;
+                m_options.optInputWidth = _modelConfig.optInputWidth;
+                m_options.engineFileDir = optimizedModelFolder;
+                // Use FP16 or FP32 precision based on the input flag
+                m_options.precision = (_fp16 ? Precision::FP16 : Precision::FP32);
+                // Create the TensorRT inference engine
+                m_trtEngine = std::make_unique<Engine<float>>(m_options);
+            }
+
+            // Build the TensorRT engine
+            auto succ = m_trtEngine->buildWithRetry(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE);
+            if (!succ) {
+                const std::string errMsg =
+                    "Error: Unable to build the TensorRT engine. "
+                    "Try increasing TensorRT log severity to kVERBOSE.";
+                this->_logger.LogError("ANSYOLOV12RTOD::OptimizeModel", errMsg, __FILE__, __LINE__);
+                _modelLoadValid = false;
+                return false;
+            }
+            _modelLoadValid = true;
+            return true;
+        }
+        catch (const std::exception& e) {
+            this->_logger.LogFatal("ANSYOLOV12RTOD::OptimizeModel", e.what(), __FILE__, __LINE__);
+            optimizedModelFolder.clear();
+            return false;
+        }
+    }
+    bool ANSYOLOV12RTOD::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) {
+        std::lock_guard<std::recursive_mutex> lock(_mutex);
+        try {
+            bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword);
+            if (!result) return false;
+            _modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION;
+            _modelConfig.modelType = ModelType::TENSORRT;
+            _modelConfig.inpHeight = 640;
+            _modelConfig.inpWidth = 640;
+            if (_modelConfig.modelMNSThreshold < 0.2)
+                _modelConfig.modelMNSThreshold = 0.5;
+            if (_modelConfig.modelConfThreshold < 0.2)
+                _modelConfig.modelConfThreshold = 0.5;
+            if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133)  // 133 = COCO wholebody max
+                _modelConfig.numKPS = 17;
+            if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define
+            // if (_modelConfig.precisionType == PrecisionType::FP16)_fp16 = true;
+            _fp16 = true; // Load Model from Here
+            // Load Model from Here
+            TOP_K = 100;
+            SEG_CHANNELS = 32;
+            PROBABILITY_THRESHOLD = _modelConfig.detectionScoreThreshold;
+            NMS_THRESHOLD = _modelConfig.modelMNSThreshold;
+            SEGMENTATION_THRESHOLD = 0.5f;
+            SEG_H = 160;
+            SEG_W = 160;
+            NUM_KPS = _modelConfig.numKPS;
+            KPS_THRESHOLD = _modelConfig.kpsThreshold;
+            SEG_CHANNELS = 32;      // For segmentation 
+
+            if (!m_trtEngine) {
+                // Fixed batch size of 1 for this model
+                m_options.optBatchSize = _modelConfig.gpuOptBatchSize;
+                m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize;
+				m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
+                m_options.maxInputHeight = _modelConfig.maxInputHeight;
+                m_options.minInputHeight = _modelConfig.minInputHeight;
+                m_options.optInputHeight = _modelConfig.optInputHeight;
+                m_options.maxInputWidth = _modelConfig.maxInputWidth;
+                m_options.minInputWidth = _modelConfig.minInputWidth;
+                m_options.optInputWidth = _modelConfig.optInputWidth;
+                m_options.engineFileDir = _modelFolder;
+                // Use FP16 or FP32 precision based on the input flag
+                m_options.precision = (_fp16 ? Precision::FP16 : Precision::FP32);
+                // Create the TensorRT inference engine
+                m_trtEngine = std::make_unique<Engine<float>>(m_options);
+            }
+            // 0. Check if the configuration file exist
+            if (FileExist(_modelConfigFile)) {
+                ModelType modelType;
+                std::vector<int> inputShape;
+                _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
+                if (inputShape.size() == 2) {
+                    if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
+                    if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
+                }
+            }
+            else {// This is old version of model zip file
+                _modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx");
+                _classFilePath = CreateFilePath(_modelFolder, "classes.names");
+                std::ifstream isValidFileName(_classFilePath);
+                if (!isValidFileName)
+                {
+                    this->_logger.LogDebug("ANSYOLOV12RTOD::Initialize.  Load classes from string", _classFilePath, __FILE__, __LINE__);
+                    LoadClassesFromString();
+                }
+                else {
+                    this->_logger.LogDebug("ANSYOLOV12RTOD::Initialize.  Load classes from file", _classFilePath, __FILE__, __LINE__);
+                    LoadClassesFromFile();
+                }
+            }
+            // Load the TensorRT engine file
+            if (this->_loadEngineOnCreation) {
+                auto succ = m_trtEngine->buildLoadNetwork(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu);
+                if (!succ) {
+                    const std::string errMsg = "Error: Unable to load TensorRT engine weights into memory. " + _modelFilePath;
+                    this->_logger.LogError("ANSYOLOV12RTOD::Initialize", errMsg, __FILE__, __LINE__);
+                    _modelLoadValid = false;
+                    return false;
+                }
+
+            }
+            _modelLoadValid = true;
+            _isInitialized = true;
+            return true;
+        }
+        catch (std::exception& e) {
+            this->_logger.LogFatal("ANSYOLOV12RTOD::LoadModel", e.what(), __FILE__, __LINE__);
+            return false;
+        }
+
+    }
+    bool ANSYOLOV12RTOD::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName,std::string className, const std::string& modelFolder, std::string& labelMap) {
+        std::lock_guard<std::recursive_mutex> lock(_mutex);
+        try {
+            bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig,modelName, className,modelFolder, labelMap);
+            if (!result) return false;
+            std::string _modelName = modelName;
+            if (_modelName.empty()) {
+                _modelName = "train_last";
+            }
+            std::string modelFullName = _modelName + ".onnx";
+            _modelConfig = modelConfig;
+            _modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION;
+            _modelConfig.modelType = ModelType::TENSORRT;
+            _modelConfig.inpHeight = 640;
+            _modelConfig.inpWidth = 640;
+            if (_modelConfig.modelMNSThreshold < 0.2)
+                _modelConfig.modelMNSThreshold = 0.5;
+            if (_modelConfig.modelConfThreshold < 0.2)
+                _modelConfig.modelConfThreshold = 0.5;
+            if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133)  // 133 = COCO wholebody max
+                _modelConfig.numKPS = 17;
+            if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define
+            // if (_modelConfig.precisionType == PrecisionType::FP16)_fp16 = true;
+            _fp16 = true; // Load Model from Here
+            // Load Model from Here
+            TOP_K = 100;
+            SEG_CHANNELS = 32;
+            PROBABILITY_THRESHOLD = _modelConfig.detectionScoreThreshold;
+            NMS_THRESHOLD = _modelConfig.modelMNSThreshold;
+            SEGMENTATION_THRESHOLD = 0.5f;
+            SEG_H = 160;
+            SEG_W = 160;
+            NUM_KPS = _modelConfig.numKPS;
+            KPS_THRESHOLD = _modelConfig.kpsThreshold;
+            SEG_CHANNELS = 32;      // For segmentation 
+
+            if (!m_trtEngine) {
+                // Fixed batch size of 1 for this model
+                m_options.optBatchSize = _modelConfig.gpuOptBatchSize;
+                m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize;
+                m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
+                m_options.maxInputHeight = _modelConfig.maxInputHeight;
+                m_options.minInputHeight = _modelConfig.minInputHeight;
+                m_options.optInputHeight = _modelConfig.optInputHeight;
+                m_options.maxInputWidth = _modelConfig.maxInputWidth;
+                m_options.minInputWidth = _modelConfig.minInputWidth;
+                m_options.optInputWidth = _modelConfig.optInputWidth;
+                m_options.engineFileDir = _modelFolder;
+                // Use FP16 or FP32 precision based on the input flag
+                m_options.precision = (_fp16 ? Precision::FP16 : Precision::FP32);
+                // Create the TensorRT inference engine
+                m_trtEngine = std::make_unique<Engine<float>>(m_options);
+            }
+            // 0. Check if the configuration file exist
+            if (FileExist(_modelConfigFile)) {
+                ModelType modelType;
+                std::vector<int> inputShape;
+                _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
+                if (inputShape.size() == 2) {
+                    if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
+                    if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
+                }
+            }
+            else {// This is old version of model zip file
+                _modelFilePath = CreateFilePath(_modelFolder, modelFullName);
+                _classFilePath = CreateFilePath(_modelFolder, className);
+                std::ifstream isValidFileName(_classFilePath);
+                if (!isValidFileName)
+                {
+                    this->_logger.LogDebug("ANSYOLOV12RTOD::Initialize.  Load classes from string", _classFilePath, __FILE__, __LINE__);
+                    LoadClassesFromString();
+                }
+                else {
+                    this->_logger.LogDebug("ANSYOLOV12RTOD::Initialize.  Load classes from file", _classFilePath, __FILE__, __LINE__);
+                    LoadClassesFromFile();
+                }
+            }
+            // 1. Load labelMap and engine
+            labelMap.clear();
+            if (!_classes.empty())
+                labelMap = VectorToCommaSeparatedString(_classes);
+
+            // Load the TensorRT engine file
+            if (this->_loadEngineOnCreation) {
+                auto succ = m_trtEngine->buildLoadNetwork(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu);
+                if (!succ) {
+                    const std::string errMsg = "Error: Unable to load TensorRT engine weights into memory. " + _modelFilePath;
+                    this->_logger.LogError("ANSYOLOV12RTOD::Initialize", errMsg, __FILE__, __LINE__);
+                    _modelLoadValid = false;
+                    return false;
+                }
+            }
+            _modelLoadValid = true;
+            _isInitialized = true;
+            return true;
+        }
+        catch (std::exception& e) {
+            this->_logger.LogFatal("ANSYOLOV12RTOD::LoadModel", e.what(), __FILE__, __LINE__);
+            return false;
+        }
+    }
+    bool ANSYOLOV12RTOD::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) {
+        std::lock_guard<std::recursive_mutex> lock(_mutex);
+        try {
+            const bool engineAlreadyLoaded = _modelLoadValid && _isInitialized && m_trtEngine != nullptr;
+            _modelLoadValid = false;
+            bool result = ANSODBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
+            if (!result) return false;
+            // Parsing for YOLO only here
+            _modelConfig = modelConfig;
+            _modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION;
+            _modelConfig.modelType = ModelType::TENSORRT;
+            _modelConfig.inpHeight = 640;
+            _modelConfig.inpWidth = 640;
+            if (_modelConfig.modelMNSThreshold < 0.2)
+                _modelConfig.modelMNSThreshold = 0.5;
+            if (_modelConfig.modelConfThreshold < 0.2)
+                _modelConfig.modelConfThreshold = 0.5;
+            if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133)  // 133 = COCO wholebody max
+                _modelConfig.numKPS = 17;
+            if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define
+            // if (_modelConfig.precisionType == PrecisionType::FP16)_fp16 = true;
+            _fp16 = true; // Load Model from Here
+            // Load Model from Here
+            TOP_K = 100;
+            SEG_CHANNELS = 32;
+            PROBABILITY_THRESHOLD = _modelConfig.detectionScoreThreshold;
+            NMS_THRESHOLD = _modelConfig.modelMNSThreshold;
+            SEGMENTATION_THRESHOLD = 0.5f;
+            SEG_H = 160;
+            SEG_W = 160;
+            NUM_KPS = _modelConfig.numKPS;
+            KPS_THRESHOLD = _modelConfig.kpsThreshold;
+            SEG_CHANNELS = 32;      // For segmentation 
+
+            if (!m_trtEngine) {
+                // Fixed batch size of 1 for this model
+                m_options.optBatchSize = _modelConfig.gpuOptBatchSize;
+                m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize;
+                m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
+                m_options.engineFileDir = _modelFolder;
+                // Use FP16 or FP32 precision based on the input flag
+                m_options.precision = (_fp16 ? Precision::FP16 : Precision::FP32);
+                // Create the TensorRT inference engine
+                m_trtEngine = std::make_unique<Engine<float>>(m_options);
+            }
+            // 0. Check if the configuration file exist
+            if (FileExist(_modelConfigFile)) {
+                ModelType modelType;
+                std::vector<int> inputShape;
+                _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
+                if (inputShape.size() == 2) {
+                    if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
+                    if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
+                }
+            }
+            else {// This is old version of model zip file
+                _modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx");
+                _classFilePath = CreateFilePath(_modelFolder, "classes.names");
+                std::ifstream isValidFileName(_classFilePath);
+                if (!isValidFileName)
+                {
+                    this->_logger.LogDebug("ANSYOLOV12RTOD::Initialize.  Load classes from string", _classFilePath, __FILE__, __LINE__);
+                    LoadClassesFromString();
+                }
+                else {
+                    this->_logger.LogDebug("ANSYOLOV12RTOD::Initialize.  Load classes from file", _classFilePath, __FILE__, __LINE__);
+                    LoadClassesFromFile();
+                }
+            }
+            // 1. Load labelMap and engine
+            labelMap.clear();
+            if (!_classes.empty())
+                labelMap = VectorToCommaSeparatedString(_classes);
+
+            // Load the TensorRT engine file
+            if (this->_loadEngineOnCreation && !engineAlreadyLoaded) {
+                auto succ = m_trtEngine->buildLoadNetwork(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu);
+                if (!succ) {
+                    const std::string errMsg = "Error: Unable to load TensorRT engine weights into memory. " + _modelFilePath;
+                    this->_logger.LogError("ANSYOLOV12RTOD::Initialize", errMsg, __FILE__, __LINE__);
+                    _modelLoadValid = false;
+                    return false;
+                }
+            }
+            _modelLoadValid = true;
+            _isInitialized = true;
+            return true;
+        }
+        catch (std::exception& e) {
+            this->_logger.LogFatal("ANSYOLOV12RTOD::Initialize", e.what(), __FILE__, __LINE__);
+            return false;
+        }
+    }
+    std::vector<Object> ANSYOLOV12RTOD::RunInference(const cv::Mat& inputImgBGR) {
+		return RunInference(inputImgBGR, "TensorRT12Cam");
+    }
+    std::vector<Object> ANSYOLOV12RTOD::RunInference(const cv::Mat& inputImgBGR, const std::string& camera_id) {
+        std::lock_guard<std::recursive_mutex> lock(_mutex);
+
+        // Validate model, license, and initialization
+        if (!_modelLoadValid) {
+            this->_logger.LogFatal("ANSYOLOV12RTOD::RunInference",
+                "Cannot load the TensorRT model. Please check if it is exist", __FILE__, __LINE__);
+            return {};
+        }
+
+        if (!_licenseValid) {
+            this->_logger.LogFatal("ANSYOLOV12RTOD::RunInference",
+                "Runtime license is not valid or expired. Please contact ANSCENTER", __FILE__, __LINE__);
+            return {};
+        }
+
+        if (!_isInitialized) {
+            this->_logger.LogFatal("ANSYOLOV12RTOD::RunInference",
+                "Model is not initialized", __FILE__, __LINE__);
+            return {};
+        }
+
+        // Validate input
+        if (inputImgBGR.empty() || inputImgBGR.cols < 10 || inputImgBGR.rows < 10) {
+            return {};
+        }
+
+        try {
+            return DetectObjects(inputImgBGR, camera_id);
+
+        }
+        catch (const std::exception& e) {
+            this->_logger.LogFatal("ANSYOLOV12RTOD::RunInference", e.what(), __FILE__, __LINE__);
+            return {};
+        }
+    }   
+    
+    ANSYOLOV12RTOD::~ANSYOLOV12RTOD() {
+        try {
+            Destroy();
+        }
+        catch (std::exception& e) {
+            this->_logger.LogError("ANSYOLOV12RTOD::~ANSYOLOV12RTOD()", e.what(), __FILE__, __LINE__);
+        }
+    }
+    bool ANSYOLOV12RTOD::Destroy() {
+        try {
+            m_trtEngine.reset();
+            m_nv12Helper.destroy();
+            return true;
+        }
+        catch (std::exception& e) {
+            this->_logger.LogError("ANSYOLOV12RTOD::~ANSYOLOV12RTOD()", e.what(), __FILE__, __LINE__);
+            return false;
+        }
+    }
+
+    // private
+    std::vector<Object> ANSYOLOV12RTOD::DetectObjects(const cv::Mat& inputImage, const std::string& camera_id) {
+        // Phase 1: Preprocess under brief lock — try NV12 fast path first
+        ImageMetadata meta;
+        std::vector<std::vector<cv::cuda::GpuMat>> input;
+        bool usedNV12 = false;
+        float bgrFullResScaleX = 1.0f, bgrFullResScaleY = 1.0f;
+        {
+            std::lock_guard<std::recursive_mutex> lock(_mutex);
+            const int inferenceGpu = m_trtEngine ? m_trtEngine->getPreferredDeviceIndex() : 0;
+            const auto& inputDims = m_trtEngine->getInputDims();
+            const int inputW = inputDims[0].d[2];
+            const int inputH = inputDims[0].d[1];
+
+            auto nv12 = m_nv12Helper.tryNV12(inputImage, inferenceGpu, inputW, inputH,
+                                              NV12PreprocessHelper::defaultYOLOLauncher(),
+                                              _logger, "ANSYOLOV12RTOD");
+            if (nv12.succeeded) {
+                meta.imgWidth  = nv12.metaWidth;
+                meta.imgHeight = nv12.metaHeight;
+                meta.ratio     = nv12.ratio;
+                input = {{ std::move(nv12.gpuRGB) }};
+                usedNV12 = true;
+            }
+            else if (nv12.useBgrFullRes) {
+                input = Preprocess(nv12.bgrFullResImg, meta);
+                usedNV12 = !input.empty();
+                bgrFullResScaleX = nv12.bgrFullResScaleX;
+                bgrFullResScaleY = nv12.bgrFullResScaleY;
+            }
+
+            if (input.empty()) {
+                input = Preprocess(inputImage, meta);
+            }
+            m_nv12Helper.tickInference();
+        }
+        if (input.empty()) return {};
+
+        // Phase 2: Inference - mutex released; pool dispatches to idle GPU slot
+        std::vector<std::vector<std::vector<float>>> featureVectors;
+        auto succ = m_trtEngine->runInference(input, featureVectors);
+        if (!succ) {
+            this->_logger.LogFatal("ANSYOLOV12RTOD::DetectObjects", "Error running inference", __FILE__, __LINE__);
+            return {};
+        }
+
+        // Phase 3: Postprocess under brief lock
+        std::lock_guard<std::recursive_mutex> lock(_mutex);
+        std::vector<float> featureVector;
+        Engine<float>::transformOutput(featureVectors, featureVector);
+        auto ret = Postprocess(featureVector, camera_id, meta);
+
+        // Rescale coords from full-res to display-res (BGR full-res path)
+        if (bgrFullResScaleX != 1.0f || bgrFullResScaleY != 1.0f) {
+            for (auto& obj : ret) {
+                obj.box.x      = static_cast<int>(obj.box.x      * bgrFullResScaleX);
+                obj.box.y      = static_cast<int>(obj.box.y      * bgrFullResScaleY);
+                obj.box.width  = static_cast<int>(obj.box.width  * bgrFullResScaleX);
+                obj.box.height = static_cast<int>(obj.box.height * bgrFullResScaleY);
+                for (auto& pt : obj.polygon) {
+                    pt.x *= bgrFullResScaleX;
+                    pt.y *= bgrFullResScaleY;
+                }
+                for (size_t k = 0; k + 2 < obj.kps.size(); k += 3) {
+                    obj.kps[k]     *= bgrFullResScaleX;
+                    obj.kps[k + 1] *= bgrFullResScaleY;
+                }
+            }
+        }
+
+        if (_trackerEnabled) {
+            ret = ApplyTracking(ret, camera_id);
+            if (_stabilizationEnabled) ret = StabilizeDetections(ret, camera_id);
+        }
+        return ret;
+    }
+    std::vector<std::vector<cv::cuda::GpuMat>> ANSYOLOV12RTOD::Preprocess(const cv::Mat& inputImage, ImageMetadata& outMeta) {
+
+        try {
+            if (!_licenseValid) {
+                this->_logger.LogFatal("ANSYOLOV12RTOD::Preprocess", "Invalid license", __FILE__, __LINE__);
+                return {};
+            }
+
+            const auto& inputDims = m_trtEngine->getInputDims();
+            cv::cuda::Stream stream;
+            cv::cuda::GpuMat img;
+
+            // Upload to GPU
+            if (inputImage.channels() == 1) {
+                cv::Mat img3Channel;
+                cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
+                img.upload(img3Channel, stream);
+            }
+            else {
+                img.upload(inputImage, stream);
+            }
+
+            // Convert BGR to RGB
+            cv::cuda::GpuMat imgRGB;
+            cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
+            stream.waitForCompletion();
+
+            outMeta.imgHeight = imgRGB.rows;
+            outMeta.imgWidth = imgRGB.cols;
+            if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
+                outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(imgRGB.cols),
+                    inputDims[0].d[1] / static_cast<float>(imgRGB.rows));
+
+                cv::cuda::GpuMat resized = imgRGB;
+
+                // Resize to the model's expected input size while maintaining aspect ratio with padding
+                if (resized.rows != inputDims[0].d[1] || resized.cols != inputDims[0].d[2]) {
+                    resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputDims[0].d[1], inputDims[0].d[2]);
+                }
+
+                // Convert to format expected by our inference engine
+                std::vector<cv::cuda::GpuMat> input{ std::move(resized) };
+                std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) };
+                return inputs;
+            }
+            else {
+                this->_logger.LogFatal("TENSORRTCL::Preprocess",
+                    "Image height or width is zero after processing (Width: " + std::to_string(outMeta.imgWidth) +
+                    ", Height: " + std::to_string(outMeta.imgHeight) + ")",
+                    __FILE__, __LINE__);
+                return {};
+            }
+        }
+        catch (const std::exception& e) {
+            this->_logger.LogFatal("ANSYOLOV12RTOD::Preprocess", e.what(), __FILE__, __LINE__);
+            return {};
+        }
+    }
+    std::vector<Object> ANSYOLOV12RTOD::Postprocess(std::vector<float>& featureVector, const std::string& camera_id, const ImageMetadata& meta) {
+        try {
+            const auto& outputDims = m_trtEngine->getOutputDims();
+            auto numChannels = outputDims[0].d[1];// similar to detection_attribute_size
+            auto numAnchors = outputDims[0].d[2]; // similar to num_detections
+
+            auto numClasses = _classes.size();
+
+            std::vector<cv::Rect> bboxes;
+            std::vector<float> scores;
+            std::vector<int> labels;
+            std::vector<int> indices;
+
+            cv::Mat output = cv::Mat(numChannels, numAnchors, CV_32F, featureVector.data());
+            output = output.t();
+
+            // Get all the YOLO proposals
+            for (int i = 0; i < numAnchors; i++) {
+                auto rowPtr = output.row(i).ptr<float>();
+                auto bboxesPtr = rowPtr;
+                auto scoresPtr = rowPtr + 4;
+                auto maxSPtr = std::max_element(scoresPtr, scoresPtr + numClasses);
+                float score = *maxSPtr;
+                if (score > this->_modelConfig.detectionScoreThreshold) {
+                    float x = *bboxesPtr++;
+                    float y = *bboxesPtr++;
+                    float w = *bboxesPtr++;
+                    float h = *bboxesPtr;
+
+                    float x0 = std::clamp((x - 0.5f * w) * meta.ratio, 0.f, meta.imgWidth);
+                    float y0 = std::clamp((y - 0.5f * h) * meta.ratio, 0.f, meta.imgHeight);
+                    float x1 = std::clamp((x + 0.5f * w) * meta.ratio, 0.f, meta.imgWidth);
+                    float y1 = std::clamp((y + 0.5f * h) * meta.ratio, 0.f, meta.imgHeight);
+
+                    int label = maxSPtr - scoresPtr;
+                    cv::Rect_<float> bbox;
+                    bbox.x = x0;
+                    bbox.y = y0;
+                    bbox.width = x1 - x0;
+                    bbox.height = y1 - y0;
+					bbox.x = std::clamp(bbox.x, 0.f, meta.imgWidth);
+					bbox.y = std::clamp(bbox.y, 0.f, meta.imgHeight);
+					bbox.width = std::clamp(bbox.width, 0.f, meta.imgWidth);
+					bbox.height = std::clamp(bbox.height, 0.f, meta.imgHeight);
+                    bboxes.push_back(bbox);
+                    labels.push_back(label);
+                    scores.push_back(score);
+                }
+            }
+
+            // Run NMS
+            cv::dnn::NMSBoxesBatched(bboxes, scores, labels, PROBABILITY_THRESHOLD, NMS_THRESHOLD, indices);
+			int classNameSize = static_cast<int>(_classes.size());
+            std::vector<Object> objects;
+
+            // Choose the top k detections
+            for (auto& chosenIdx : indices) {
+                if (scores[chosenIdx] > _modelConfig.detectionScoreThreshold) {
+                    Object obj{};
+                    obj.confidence = scores[chosenIdx];
+                    obj.classId = labels[chosenIdx];
+                    obj.box = bboxes[chosenIdx];
+                    obj.polygon = ANSUtilityHelper::RectToNormalizedPolygon(obj.box, meta.imgWidth, meta.imgHeight);
+                    if (!_classes.empty()) {
+                        if (obj.classId < classNameSize) {
+                            obj.className = _classes[obj.classId];
+                        }
+                        else {
+                            obj.className = _classes[classNameSize - 1]; // Use last valid class name if out of range
+                        }
+                    }
+                    else {
+                        obj.className = "Unknown"; // Fallback if _classes is empty
+                    }
+                    obj.cameraId = camera_id;
+                    objects.push_back(obj);
+                }
+            }
+            //EnqueueDetection(objects, camera_id);
+            return objects;
+        }
+        catch (std::exception& e) {
+            this->_logger.LogFatal("ANSYOLOV12RTOD::Postproces", e.what(), __FILE__, __LINE__);
+            std::vector<Object> result;
+            result.clear();
+            return result;
+        }
+
+    }
+
+    
+    std::vector<std::vector<Object>> ANSYOLOV12RTOD::DetectObjectsBatch(const std::vector<cv::Mat>& inputImages, const std::string& camera_id) {
+        // Validate under brief lock
+        {
+            std::lock_guard<std::recursive_mutex> lock(_mutex);
+            if (inputImages.empty()) {
+                _logger.LogFatal("ANSYOLOV12RTOD::DetectObjectsBatch", "Empty input images vector", __FILE__, __LINE__);
+                return {};
+            }
+        }
+
+        // Auto-split if batch exceeds engine capacity
+        const int maxBatch = m_options.maxBatchSize > 0 ? m_options.maxBatchSize : 1;
+        if (static_cast<int>(inputImages.size()) > maxBatch) {
+            const size_t numImages = inputImages.size();
+            std::vector<std::vector<Object>> allResults;
+            allResults.reserve(numImages);
+            // Process chunks sequentially to avoid GPU contention on the same engine
+            for (size_t start = 0; start < numImages; start += static_cast<size_t>(maxBatch)) {
+                const size_t end = std::min(start + static_cast<size_t>(maxBatch), numImages);
+                std::vector<cv::Mat> chunk(inputImages.begin() + start, inputImages.begin() + end);
+                auto chunkResults = DetectObjectsBatch(chunk, camera_id);
+                if (chunkResults.size() == chunk.size()) {
+                    for (auto& r : chunkResults) allResults.push_back(std::move(r));
+                }
+                else {
+                    _logger.LogError("ANSYOLOV12RTOD::DetectObjectsBatch",
+                        "Chunk returned " + std::to_string(chunkResults.size()) +
+                        " results, expected " + std::to_string(chunk.size()) +
+                        ". Padding with empty results.", __FILE__, __LINE__);
+                    for (auto& r : chunkResults) allResults.push_back(std::move(r));
+                    for (size_t pad = chunkResults.size(); pad < chunk.size(); ++pad) {
+                        allResults.push_back({});
+                    }
+                }
+            }
+            return allResults;
+        }
+
+        _logger.LogDebug("ANSYOLOV12RTOD::DetectObjectsBatch",
+            "Processing batch of " + std::to_string(inputImages.size()) + " images",
+            __FILE__, __LINE__);
+
+        // Phase 1: Preprocess under brief lock
+        BatchMetadata metadata;
+        std::vector<std::vector<cv::cuda::GpuMat>> inputs;
+        {
+            std::lock_guard<std::recursive_mutex> lock(_mutex);
+            inputs = PreprocessBatch(inputImages, metadata);
+        }
+        if (inputs.empty() || inputs[0].empty()) {
+            _logger.LogFatal("ANSYOLOV12RTOD::DetectObjectsBatch", "Preprocessing failed", __FILE__, __LINE__);
+            return {};
+        }
+
+        // Phase 2: Inference - mutex released; pool dispatches to idle GPU slot
+        std::vector<std::vector<std::vector<float>>> featureVectors;
+        auto succ = m_trtEngine->runInference(inputs, featureVectors);
+        if (!succ) {
+            _logger.LogFatal("ANSYOLOV12RTOD::DetectObjectsBatch", "Error running inference", __FILE__, __LINE__);
+            return {};
+        }
+
+        // Phase 3: Parallel postprocessing - each image is independent
+        const size_t numBatch = featureVectors.size();
+        std::vector<std::vector<Object>> batchDetections(numBatch);
+        std::vector<std::future<std::vector<Object>>> postFutures;
+        postFutures.reserve(numBatch);
+
+        for (size_t batchIdx = 0; batchIdx < numBatch; ++batchIdx) {
+            const auto& batchOutput = featureVectors[batchIdx];
+            std::vector<float> featureVector =
+                batchOutput.empty() ? std::vector<float>{} : batchOutput[0];
+            postFutures.push_back(std::async(std::launch::async,
+                [this, fv = std::move(featureVector), cid = camera_id,
+                 idx = batchIdx, &metadata]() mutable {
+                    return PostprocessBatch(fv, cid, idx, metadata);
+                }));
+        }
+        // Gather results in original order; metadata stays alive until all futures joined
+        for (size_t i = 0; i < numBatch; ++i)
+            batchDetections[i] = postFutures[i].get();
+
+        _logger.LogDebug("ANSYOLOV12RTOD::DetectObjectsBatch",
+            "Batch processing complete. Images: " + std::to_string(numBatch),
+            __FILE__, __LINE__);
+        return batchDetections;
+    }
+
+    std::vector<std::vector<cv::cuda::GpuMat>> ANSYOLOV12RTOD::PreprocessBatch(const std::vector<cv::Mat>& inputImages, BatchMetadata& outMetadata) {
+
+        try {
+            if (!_licenseValid) {
+                _logger.LogFatal("ANSYOLOV12RTOD::PreprocessBatch", "Invalid license", __FILE__, __LINE__);
+                return {};
+            }
+
+            const auto& inputDims = m_trtEngine->getInputDims();
+            const int inputH = inputDims[0].d[1];
+            const int inputW = inputDims[0].d[2];
+
+            // Store original image dimensions for each image in batch
+            outMetadata.imgHeights.resize(inputImages.size());
+            outMetadata.imgWidths.resize(inputImages.size());
+            outMetadata.ratios.resize(inputImages.size());
+
+            std::vector<cv::cuda::GpuMat> batchProcessed;
+            batchProcessed.reserve(inputImages.size());
+
+            cv::cuda::Stream stream;
+
+            // Process each image
+            for (size_t i = 0; i < inputImages.size(); ++i) {
+                const auto& inputImage = inputImages[i];
+
+                if (inputImage.empty()) {
+                    _logger.LogFatal("ANSYOLOV12RTOD::PreprocessBatch",
+                        "Empty input image at index " + std::to_string(i), __FILE__, __LINE__);
+                    return {};
+                }
+
+                // Upload to GPU
+                cv::cuda::GpuMat img;
+
+                // Convert grayscale to BGR if needed
+                if (inputImage.channels() == 1) {
+                    cv::Mat img3Channel;
+                    cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
+                    img.upload(img3Channel, stream);
+                }
+                else {
+                    img.upload(inputImage, stream);
+                }
+
+                // Convert BGR to RGB
+                cv::cuda::GpuMat imgRGB;
+                cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
+
+                // Store original dimensions
+                outMetadata.imgHeights[i] = imgRGB.rows;
+                outMetadata.imgWidths[i] = imgRGB.cols;
+
+                if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) {
+                    _logger.LogFatal("ANSYOLOV12RTOD::PreprocessBatch",
+                        "Image " + std::to_string(i) + " has invalid dimensions (Width: " +
+                        std::to_string(outMetadata.imgWidths[i]) + ", Height: " +
+                        std::to_string(outMetadata.imgHeights[i]) + ")",
+                        __FILE__, __LINE__);
+                    return {};
+                }
+
+                // Calculate ratio for this image
+                outMetadata.ratios[i] = 1.f / std::min(inputW / static_cast<float>(imgRGB.cols),
+                    inputH / static_cast<float>(imgRGB.rows));
+
+                // Resize with padding
+                cv::cuda::GpuMat resized = imgRGB;
+                if (resized.rows != inputH || resized.cols != inputW) {
+                    resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputH, inputW);
+                }
+
+                batchProcessed.push_back(std::move(resized));
+            }
+
+            stream.waitForCompletion();
+
+            // Return as required format
+            std::vector<std::vector<cv::cuda::GpuMat>> inputs;
+            inputs.push_back(std::move(batchProcessed));
+
+            return inputs;
+        }
+        catch (const std::exception& e) {
+            _logger.LogFatal("ANSYOLOV12RTOD::PreprocessBatch", e.what(), __FILE__, __LINE__);
+            return {};
+        }
+    }
+
+    std::vector<Object> ANSYOLOV12RTOD::PostprocessBatch(std::vector<float>& featureVector,
+        const std::string& camera_id,
+        size_t batchIdx,
+        const BatchMetadata& metadata) {
+
+        try {
+            const auto& outputDims = m_trtEngine->getOutputDims();
+            auto numChannels = outputDims[0].d[1];  // detection_attribute_size
+            auto numAnchors = outputDims[0].d[2];   // num_detections
+
+            auto numClasses = _classes.size();
+
+            // Get the ratio and dimensions for this specific image in the batch
+            float ratio = metadata.ratios[batchIdx];
+            int imgWidth = metadata.imgWidths[batchIdx];
+            int imgHeight = metadata.imgHeights[batchIdx];
+
+            std::vector<cv::Rect> bboxes;
+            std::vector<float> scores;
+            std::vector<int> labels;
+            std::vector<int> indices;
+
+            cv::Mat output = cv::Mat(numChannels, numAnchors, CV_32F, featureVector.data());
+            output = output.t();
+
+            // Get all the YOLO proposals
+            for (int i = 0; i < numAnchors; i++) {
+                auto rowPtr = output.row(i).ptr<float>();
+                auto bboxesPtr = rowPtr;
+                auto scoresPtr = rowPtr + 4;
+                auto maxSPtr = std::max_element(scoresPtr, scoresPtr + numClasses);
+                float score = *maxSPtr;
+
+                if (score > _modelConfig.detectionScoreThreshold) {
+                    float x = *bboxesPtr++;
+                    float y = *bboxesPtr++;
+                    float w = *bboxesPtr++;
+                    float h = *bboxesPtr;
+
+                    // Use batch-specific ratio and dimensions
+                    float x0 = std::clamp((x - 0.5f * w) * ratio, 0.f, static_cast<float>(imgWidth));
+                    float y0 = std::clamp((y - 0.5f * h) * ratio, 0.f, static_cast<float>(imgHeight));
+                    float x1 = std::clamp((x + 0.5f * w) * ratio, 0.f, static_cast<float>(imgWidth));
+                    float y1 = std::clamp((y + 0.5f * h) * ratio, 0.f, static_cast<float>(imgHeight));
+
+                    int label = maxSPtr - scoresPtr;
+                    cv::Rect_<float> bbox;
+                    bbox.x = x0;
+                    bbox.y = y0;
+                    bbox.width = x1 - x0;
+                    bbox.height = y1 - y0;
+
+                    // Clamp bbox to image boundaries
+                    bbox.x = std::clamp(bbox.x, 0.f, static_cast<float>(imgWidth));
+                    bbox.y = std::clamp(bbox.y, 0.f, static_cast<float>(imgHeight));
+                    bbox.width = std::clamp(bbox.width, 0.f, static_cast<float>(imgWidth));
+                    bbox.height = std::clamp(bbox.height, 0.f, static_cast<float>(imgHeight));
+
+                    bboxes.push_back(bbox);
+                    labels.push_back(label);
+                    scores.push_back(score);
+                }
+            }
+
+            // Run NMS
+            cv::dnn::NMSBoxesBatched(bboxes, scores, labels, PROBABILITY_THRESHOLD, NMS_THRESHOLD, indices);
+
+            int classNameSize = static_cast<int>(_classes.size());
+            std::vector<Object> objects;
+
+            // Choose the top k detections
+            for (auto& chosenIdx : indices) {
+                if (scores[chosenIdx] > _modelConfig.detectionScoreThreshold) {
+                    Object obj{};
+                    obj.confidence = scores[chosenIdx];
+                    obj.classId = labels[chosenIdx];
+                    obj.box = bboxes[chosenIdx];
+                    obj.polygon = ANSUtilityHelper::RectToNormalizedPolygon(obj.box, imgWidth, imgHeight);
+
+                    if (!_classes.empty()) {
+                        if (obj.classId < classNameSize) {
+                            obj.className = _classes[obj.classId];
+                        }
+                        else {
+                            obj.className = _classes[classNameSize - 1];
+                        }
+                    }
+                    else {
+                        obj.className = "Unknown";
+                    }
+
+                    obj.cameraId = camera_id;
+                    objects.push_back(obj);
+                }
+            }
+
+            return objects;
+        }
+        catch (std::exception& e) {
+            _logger.LogFatal("ANSYOLOV12RTOD::PostprocessBatch", e.what(), __FILE__, __LINE__);
+            return {};
+        }
+    }
+
+    std::vector<std::vector<Object>> ANSYOLOV12RTOD::RunInferencesBatch(
+        const std::vector<cv::Mat>& inputs, const std::string& camera_id)
+    {
+        {
+            std::lock_guard<std::recursive_mutex> lock(_mutex);
+            if (!_modelLoadValid) {
+                _logger.LogError("ANSYOLOV12RTOD::RunInferencesBatch", "Model not loaded", __FILE__, __LINE__);
+                return {};
+            }
+            if (!_licenseValid) {
+                _logger.LogError("ANSYOLOV12RTOD::RunInferencesBatch", "Invalid license", __FILE__, __LINE__);
+                return {};
+            }
+            if (!_isInitialized) {
+                _logger.LogError("ANSYOLOV12RTOD::RunInferencesBatch", "Engine not initialized", __FILE__, __LINE__);
+                return {};
+            }
+            if (inputs.empty()) return {};
+        }
+        try {
+            return DetectObjectsBatch(inputs, camera_id);
+        }
+        catch (const std::exception& e) {
+            _logger.LogFatal("ANSYOLOV12RTOD::RunInferencesBatch", e.what(), __FILE__, __LINE__);
+            return {};
+        }
+    }
+}