#include "ANSTENSORRTOD.h" #include "Utility.h" #include #include namespace ANSCENTER { bool TENSORRTOD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) { std::lock_guard lock(_mutex); if (!ANSODBase::OptimizeModel(fp16, optimizedModelFolder)) { return false; } if (!FileExist(_modelFilePath)) { this->_logger.LogFatal("TENSORRTOD::OptimizeModel", "Raw model file path does not exist", __FILE__, __LINE__); return false; } try { _fp16 = fp16; optimizedModelFolder = GetParentFolder(_modelFilePath); // Check if the engine already exists to avoid reinitializing if (!m_trtEngine) { // Fixed batch size of 1 for this model m_options.optBatchSize = _modelConfig.gpuOptBatchSize; m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize; m_options.deviceIndex = _modelConfig.gpuDeviceIndex; m_options.maxInputHeight = _modelConfig.maxInputHeight; m_options.minInputHeight = _modelConfig.minInputHeight; m_options.optInputHeight = _modelConfig.optInputHeight; m_options.maxInputWidth = _modelConfig.maxInputWidth; m_options.minInputWidth = _modelConfig.minInputWidth; m_options.optInputWidth = _modelConfig.optInputWidth; m_options.engineFileDir = optimizedModelFolder; m_options.precision = (_fp16 ? Precision::FP16 : Precision::FP32); // Create the TensorRT inference engine m_trtEngine = std::make_unique>(m_options); } // Build the TensorRT engine auto succ = m_trtEngine->buildWithRetry(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE); if (!succ) { const std::string errMsg = "Error: Unable to build the TensorRT engine. " "Try increasing TensorRT log severity to kVERBOSE."; this->_logger.LogError("TENSORRTOD::OptimizeModel", errMsg, __FILE__, __LINE__); _modelLoadValid = false; return false; } // Sync GPU-capped batch sizes from engine (build may reduce based on VRAM tier) m_options.maxBatchSize = m_trtEngine->getOptions().maxBatchSize; m_options.optBatchSize = m_trtEngine->getOptions().optBatchSize; _modelLoadValid = true; return true; } catch (const std::exception& e) { this->_logger.LogFatal("TENSORRTOD::OptimizeModel", e.what(), __FILE__, __LINE__); optimizedModelFolder.clear(); return false; } } bool TENSORRTOD::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) { std::lock_guard lock(_mutex); ModelLoadingGuard mlg(_modelLoading); try { _isFixedBatch = false; bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword); if (!result) return false; _modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION; _modelConfig.modelType = ModelType::TENSORRT; _modelConfig.inpHeight = 640; _modelConfig.inpWidth = 640; if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133) // 133 = COCO wholebody max _modelConfig.numKPS = 17; if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define _fp16 = true; // Load Model from Here // Load Model from Here TOP_K = 100; SEG_CHANNELS = 32; PROBABILITY_THRESHOLD = _modelConfig.detectionScoreThreshold; NMS_THRESHOLD = _modelConfig.modelMNSThreshold; SEGMENTATION_THRESHOLD = 0.5f; SEG_H = 160; SEG_W = 160; NUM_KPS = _modelConfig.numKPS; KPS_THRESHOLD = _modelConfig.kpsThreshold; SEG_CHANNELS = 32; // For segmentation if (!m_trtEngine) { // Fixed batch size of 1 for this model m_options.optBatchSize = _modelConfig.gpuOptBatchSize; m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize; m_options.deviceIndex = _modelConfig.gpuDeviceIndex; m_options.maxInputHeight = _modelConfig.maxInputHeight; m_options.minInputHeight = _modelConfig.minInputHeight; m_options.optInputHeight = _modelConfig.optInputHeight; m_options.maxInputWidth = _modelConfig.maxInputWidth; m_options.minInputWidth = _modelConfig.minInputWidth; m_options.optInputWidth = _modelConfig.optInputWidth; m_options.engineFileDir = _modelFolder; // Use FP16 or FP32 precision based on the input flag m_options.precision = (_fp16 ? Precision::FP16 : Precision::FP32); // Create the TensorRT inference engine m_trtEngine = std::make_unique>(m_options); } // 0. Check if the configuration file exist if (FileExist(_modelConfigFile)) { ModelType modelType; std::vector inputShape; _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape); if (inputShape.size() == 2) { if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0]; if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1]; } } else {// This is old version of model zip file _modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx"); _classFilePath = CreateFilePath(_modelFolder, "classes.names"); std::ifstream isValidFileName(_classFilePath); if (!isValidFileName) { this->_logger.LogDebug("TENSORRTOD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__); LoadClassesFromString(); } else { this->_logger.LogDebug("TENSORRTOD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__); LoadClassesFromFile(); } } // Load the TensorRT engine file if (this->_loadEngineOnCreation) { auto succ = m_trtEngine->buildLoadNetwork(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu); if (!succ) { const std::string errMsg = "Error: Unable to load TensorRT engine weights into memory. " + _modelFilePath; this->_logger.LogError("TENSORRTOD::Initialize", errMsg, __FILE__, __LINE__); _modelLoadValid = false; return false; } // Sync GPU-capped batch sizes from engine (build may reduce based on VRAM tier) m_options.maxBatchSize = m_trtEngine->getOptions().maxBatchSize; m_options.optBatchSize = m_trtEngine->getOptions().optBatchSize; } _modelLoadValid = true; _isInitialized = true; return true; } catch (std::exception& e) { this->_logger.LogFatal("TENSORRTOD::LoadModel", e.what(), __FILE__, __LINE__); return false; } } bool TENSORRTOD::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) { std::lock_guard lock(_mutex); ModelLoadingGuard mlg(_modelLoading); try { _isFixedBatch = false; bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig, modelName, className, modelFolder, labelMap); if (!result) return false; _modelConfig = modelConfig; _modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION; _modelConfig.modelType = ModelType::TENSORRT; _modelConfig.inpHeight = 640; _modelConfig.precisionType = PrecisionType::FP32; // Default to FP16 for TensorRT models if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133) // 133 = COCO wholebody max _modelConfig.numKPS = 17; _modelConfig.inpWidth = 640; if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; if (_modelConfig.kpsThreshold <= 0)_modelConfig.kpsThreshold = 0.5; // If not define _fp16 = true; // Load Model from Here // Load Model from Here TOP_K = 100; SEG_CHANNELS = 32; PROBABILITY_THRESHOLD = _modelConfig.detectionScoreThreshold; NMS_THRESHOLD = _modelConfig.modelMNSThreshold; SEGMENTATION_THRESHOLD = 0.5f; SEG_H = 160; SEG_W = 160; NUM_KPS = _modelConfig.numKPS; KPS_THRESHOLD = _modelConfig.kpsThreshold; SEG_CHANNELS = 32; // For segmentation std::string _modelName = modelName; if (_modelName.empty()) { _modelName = "train_last"; } std::string modelFullName = _modelName + ".onnx"; if (!m_trtEngine) { // Fixed batch size of 1 for this model m_options.optBatchSize = _modelConfig.gpuOptBatchSize; m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize; m_options.deviceIndex = _modelConfig.gpuDeviceIndex; m_options.maxInputHeight = _modelConfig.maxInputHeight; m_options.minInputHeight = _modelConfig.minInputHeight; m_options.optInputHeight = _modelConfig.optInputHeight; m_options.maxInputWidth = _modelConfig.maxInputWidth; m_options.minInputWidth = _modelConfig.minInputWidth; m_options.optInputWidth = _modelConfig.optInputWidth; m_options.engineFileDir = _modelFolder; // Use FP16 or FP32 precision based on the input flag m_options.precision = (_fp16 ? Precision::FP16 : Precision::FP32); // Create the TensorRT inference engine m_trtEngine = std::make_unique>(m_options); } // 0. Check if the configuration file exist if (FileExist(_modelConfigFile)) { ModelType modelType; std::vector inputShape; _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape); if (inputShape.size() == 2) { if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0]; if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1]; } } else {// This is old version of model zip file _modelFilePath = CreateFilePath(_modelFolder, modelFullName); _classFilePath = CreateFilePath(_modelFolder, className); std::ifstream isValidFileName(_classFilePath); if (!isValidFileName) { this->_logger.LogDebug("TENSORRTOD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__); LoadClassesFromString(); } else { this->_logger.LogDebug("TENSORRTOD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__); LoadClassesFromFile(); } } // 1. Load labelMap and engine labelMap.clear(); if (!_classes.empty()) labelMap = VectorToCommaSeparatedString(_classes); // Load the TensorRT engine file if (this->_loadEngineOnCreation) { auto succ = m_trtEngine->buildLoadNetwork(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu); if (!succ) { const std::string errMsg = "Error: Unable to load TensorRT engine weights into memory. " + _modelFilePath; this->_logger.LogError("TENSORRTOD::Initialize", errMsg, __FILE__, __LINE__); _modelLoadValid = false; return false; } // Sync GPU-capped batch sizes from engine (build may reduce based on VRAM tier) m_options.maxBatchSize = m_trtEngine->getOptions().maxBatchSize; m_options.optBatchSize = m_trtEngine->getOptions().optBatchSize; } _modelLoadValid = true; _isInitialized = true; return true; } catch (std::exception& e) { this->_logger.LogFatal("TENSORRTOD::LoadModelFromFolder", e.what(), __FILE__, __LINE__); return false; } } bool TENSORRTOD::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) { std::lock_guard lock(_mutex); ModelLoadingGuard mlg(_modelLoading); try { const bool engineAlreadyLoaded = _modelLoadValid && _isInitialized && m_trtEngine != nullptr; _modelLoadValid = false; _isFixedBatch = false; bool result = ANSODBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap); if (!result) return false; // Parsing for YOLO only here _modelConfig = modelConfig; _modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION; _modelConfig.modelType = ModelType::TENSORRT; _modelConfig.inpHeight = 640; _modelConfig.inpWidth = 640; if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133) // 133 = COCO wholebody max _modelConfig.numKPS = 17; _modelConfig.precisionType = PrecisionType::FP32; // Default to FP16 for TensorRT models if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define _fp16 = true; // Load Model from Here // Load Model from Here TOP_K = 100; SEG_CHANNELS = 32; PROBABILITY_THRESHOLD = _modelConfig.detectionScoreThreshold; NMS_THRESHOLD = _modelConfig.modelMNSThreshold; SEGMENTATION_THRESHOLD = 0.5f; SEG_H = 160; SEG_W = 160; NUM_KPS = _modelConfig.numKPS; KPS_THRESHOLD = _modelConfig.kpsThreshold; SEG_CHANNELS = 32; // For segmentation if (!m_trtEngine) { // Fixed batch size of 1 for this model m_options.optBatchSize = _modelConfig.gpuOptBatchSize; m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize; m_options.deviceIndex = _modelConfig.gpuDeviceIndex; m_options.maxInputHeight = _modelConfig.maxInputHeight; m_options.minInputHeight = _modelConfig.minInputHeight; m_options.optInputHeight = _modelConfig.optInputHeight; m_options.maxInputWidth = _modelConfig.maxInputWidth; m_options.minInputWidth = _modelConfig.minInputWidth; m_options.optInputWidth = _modelConfig.optInputWidth; m_options.engineFileDir = _modelFolder; // Use FP16 or FP32 precision based on the input flag m_options.precision = (_fp16 ? Precision::FP16 : Precision::FP32); // Create the TensorRT inference engine m_trtEngine = std::make_unique>(m_options); } // 0. Check if the configuration file exist if (FileExist(_modelConfigFile)) { ModelType modelType; std::vector inputShape; _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape); if (inputShape.size() == 2) { if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0]; if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1]; } } else {// This is old version of model zip file _modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx"); _classFilePath = CreateFilePath(_modelFolder, "classes.names"); std::ifstream isValidFileName(_classFilePath); if (!isValidFileName) { this->_logger.LogDebug("TENSORRTOD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__); LoadClassesFromString(); } else { this->_logger.LogDebug("TENSORRTOD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__); LoadClassesFromFile(); } } // 1. Load labelMap and engine labelMap.clear(); if (!_classes.empty()) labelMap = VectorToCommaSeparatedString(_classes); // Load the TensorRT engine file if (this->_loadEngineOnCreation && !engineAlreadyLoaded) { auto succ = m_trtEngine->buildLoadNetwork(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu); if (!succ) { const std::string errMsg = "Error: Unable to load TensorRT engine weights into memory. " + _modelFilePath; this->_logger.LogError("TENSORRTOD::Initialize", errMsg, __FILE__, __LINE__); _modelLoadValid = false; return false; } // Sync GPU-capped batch sizes from engine (build may reduce based on VRAM tier) m_options.maxBatchSize = m_trtEngine->getOptions().maxBatchSize; m_options.optBatchSize = m_trtEngine->getOptions().optBatchSize; } _modelLoadValid = true; _isInitialized = true; return true; } catch (std::exception& e) { this->_logger.LogFatal("TENSORRTOD::Initialize", e.what(), __FILE__, __LINE__); return false; } } std::vector