#include "ANSTENSORRTOD.h" #include "Utility.h" #include #include namespace ANSCENTER { bool TENSORRTOD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) { std::lock_guard lock(_mutex); if (!ANSODBase::OptimizeModel(fp16, optimizedModelFolder)) { return false; } if (!FileExist(_modelFilePath)) { this->_logger.LogFatal("TENSORRTOD::OptimizeModel", "Raw model file path does not exist", __FILE__, __LINE__); return false; } try { _fp16 = fp16; optimizedModelFolder = GetParentFolder(_modelFilePath); // Check if the engine already exists to avoid reinitializing if (!m_trtEngine) { // Fixed batch size of 1 for this model m_options.optBatchSize = _modelConfig.gpuOptBatchSize; m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize; m_options.deviceIndex = _modelConfig.gpuDeviceIndex; m_options.maxInputHeight = _modelConfig.maxInputHeight; m_options.minInputHeight = _modelConfig.minInputHeight; m_options.optInputHeight = _modelConfig.optInputHeight; m_options.maxInputWidth = _modelConfig.maxInputWidth; m_options.minInputWidth = _modelConfig.minInputWidth; m_options.optInputWidth = _modelConfig.optInputWidth; m_options.engineFileDir = optimizedModelFolder; m_options.precision = (_fp16 ? Precision::FP16 : Precision::FP32); // Create the TensorRT inference engine m_trtEngine = std::make_unique>(m_options); } // Build the TensorRT engine auto succ = m_trtEngine->buildWithRetry(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE); if (!succ) { const std::string errMsg = "Error: Unable to build the TensorRT engine. " "Try increasing TensorRT log severity to kVERBOSE."; this->_logger.LogError("TENSORRTOD::OptimizeModel", errMsg, __FILE__, __LINE__); _modelLoadValid = false; return false; } // Sync GPU-capped batch sizes from engine (build may reduce based on VRAM tier) m_options.maxBatchSize = m_trtEngine->getOptions().maxBatchSize; m_options.optBatchSize = m_trtEngine->getOptions().optBatchSize; _modelLoadValid = true; return true; } catch (const std::exception& e) { this->_logger.LogFatal("TENSORRTOD::OptimizeModel", e.what(), __FILE__, __LINE__); optimizedModelFolder.clear(); return false; } } bool TENSORRTOD::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) { std::lock_guard lock(_mutex); ModelLoadingGuard mlg(_modelLoading); try { _isFixedBatch = false; bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword); if (!result) return false; _modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION; _modelConfig.modelType = ModelType::TENSORRT; _modelConfig.inpHeight = 640; _modelConfig.inpWidth = 640; if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133) // 133 = COCO wholebody max _modelConfig.numKPS = 17; if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define _fp16 = true; // Load Model from Here // Load Model from Here TOP_K = 100; SEG_CHANNELS = 32; PROBABILITY_THRESHOLD = _modelConfig.detectionScoreThreshold; NMS_THRESHOLD = _modelConfig.modelMNSThreshold; SEGMENTATION_THRESHOLD = 0.5f; SEG_H = 160; SEG_W = 160; NUM_KPS = _modelConfig.numKPS; KPS_THRESHOLD = _modelConfig.kpsThreshold; SEG_CHANNELS = 32; // For segmentation if (!m_trtEngine) { // Fixed batch size of 1 for this model m_options.optBatchSize = _modelConfig.gpuOptBatchSize; m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize; m_options.deviceIndex = _modelConfig.gpuDeviceIndex; m_options.maxInputHeight = _modelConfig.maxInputHeight; m_options.minInputHeight = _modelConfig.minInputHeight; m_options.optInputHeight = _modelConfig.optInputHeight; m_options.maxInputWidth = _modelConfig.maxInputWidth; m_options.minInputWidth = _modelConfig.minInputWidth; m_options.optInputWidth = _modelConfig.optInputWidth; m_options.engineFileDir = _modelFolder; // Use FP16 or FP32 precision based on the input flag m_options.precision = (_fp16 ? Precision::FP16 : Precision::FP32); // Create the TensorRT inference engine m_trtEngine = std::make_unique>(m_options); } // 0. Check if the configuration file exist if (FileExist(_modelConfigFile)) { ModelType modelType; std::vector inputShape; _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape); if (inputShape.size() == 2) { if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0]; if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1]; } } else {// This is old version of model zip file _modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx"); _classFilePath = CreateFilePath(_modelFolder, "classes.names"); std::ifstream isValidFileName(_classFilePath); if (!isValidFileName) { this->_logger.LogDebug("TENSORRTOD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__); LoadClassesFromString(); } else { this->_logger.LogDebug("TENSORRTOD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__); LoadClassesFromFile(); } } // Load the TensorRT engine file if (this->_loadEngineOnCreation) { auto succ = m_trtEngine->buildLoadNetwork(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu); if (!succ) { const std::string errMsg = "Error: Unable to load TensorRT engine weights into memory. " + _modelFilePath; this->_logger.LogError("TENSORRTOD::Initialize", errMsg, __FILE__, __LINE__); _modelLoadValid = false; return false; } // Sync GPU-capped batch sizes from engine (build may reduce based on VRAM tier) m_options.maxBatchSize = m_trtEngine->getOptions().maxBatchSize; m_options.optBatchSize = m_trtEngine->getOptions().optBatchSize; } _modelLoadValid = true; _isInitialized = true; return true; } catch (std::exception& e) { this->_logger.LogFatal("TENSORRTOD::LoadModel", e.what(), __FILE__, __LINE__); return false; } } bool TENSORRTOD::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) { std::lock_guard lock(_mutex); ModelLoadingGuard mlg(_modelLoading); try { _isFixedBatch = false; bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig, modelName, className, modelFolder, labelMap); if (!result) return false; _modelConfig = modelConfig; _modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION; _modelConfig.modelType = ModelType::TENSORRT; _modelConfig.inpHeight = 640; _modelConfig.precisionType = PrecisionType::FP32; // Default to FP16 for TensorRT models if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133) // 133 = COCO wholebody max _modelConfig.numKPS = 17; _modelConfig.inpWidth = 640; if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; if (_modelConfig.kpsThreshold <= 0)_modelConfig.kpsThreshold = 0.5; // If not define _fp16 = true; // Load Model from Here // Load Model from Here TOP_K = 100; SEG_CHANNELS = 32; PROBABILITY_THRESHOLD = _modelConfig.detectionScoreThreshold; NMS_THRESHOLD = _modelConfig.modelMNSThreshold; SEGMENTATION_THRESHOLD = 0.5f; SEG_H = 160; SEG_W = 160; NUM_KPS = _modelConfig.numKPS; KPS_THRESHOLD = _modelConfig.kpsThreshold; SEG_CHANNELS = 32; // For segmentation std::string _modelName = modelName; if (_modelName.empty()) { _modelName = "train_last"; } std::string modelFullName = _modelName + ".onnx"; if (!m_trtEngine) { // Fixed batch size of 1 for this model m_options.optBatchSize = _modelConfig.gpuOptBatchSize; m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize; m_options.deviceIndex = _modelConfig.gpuDeviceIndex; m_options.maxInputHeight = _modelConfig.maxInputHeight; m_options.minInputHeight = _modelConfig.minInputHeight; m_options.optInputHeight = _modelConfig.optInputHeight; m_options.maxInputWidth = _modelConfig.maxInputWidth; m_options.minInputWidth = _modelConfig.minInputWidth; m_options.optInputWidth = _modelConfig.optInputWidth; m_options.engineFileDir = _modelFolder; // Use FP16 or FP32 precision based on the input flag m_options.precision = (_fp16 ? Precision::FP16 : Precision::FP32); // Create the TensorRT inference engine m_trtEngine = std::make_unique>(m_options); } // 0. Check if the configuration file exist if (FileExist(_modelConfigFile)) { ModelType modelType; std::vector inputShape; _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape); if (inputShape.size() == 2) { if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0]; if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1]; } } else {// This is old version of model zip file _modelFilePath = CreateFilePath(_modelFolder, modelFullName); _classFilePath = CreateFilePath(_modelFolder, className); std::ifstream isValidFileName(_classFilePath); if (!isValidFileName) { this->_logger.LogDebug("TENSORRTOD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__); LoadClassesFromString(); } else { this->_logger.LogDebug("TENSORRTOD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__); LoadClassesFromFile(); } } // 1. Load labelMap and engine labelMap.clear(); if (!_classes.empty()) labelMap = VectorToCommaSeparatedString(_classes); // Load the TensorRT engine file if (this->_loadEngineOnCreation) { auto succ = m_trtEngine->buildLoadNetwork(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu); if (!succ) { const std::string errMsg = "Error: Unable to load TensorRT engine weights into memory. " + _modelFilePath; this->_logger.LogError("TENSORRTOD::Initialize", errMsg, __FILE__, __LINE__); _modelLoadValid = false; return false; } // Sync GPU-capped batch sizes from engine (build may reduce based on VRAM tier) m_options.maxBatchSize = m_trtEngine->getOptions().maxBatchSize; m_options.optBatchSize = m_trtEngine->getOptions().optBatchSize; } _modelLoadValid = true; _isInitialized = true; return true; } catch (std::exception& e) { this->_logger.LogFatal("TENSORRTOD::LoadModelFromFolder", e.what(), __FILE__, __LINE__); return false; } } bool TENSORRTOD::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) { std::lock_guard lock(_mutex); ModelLoadingGuard mlg(_modelLoading); try { const bool engineAlreadyLoaded = _modelLoadValid && _isInitialized && m_trtEngine != nullptr; _modelLoadValid = false; _isFixedBatch = false; bool result = ANSODBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap); if (!result) return false; // Parsing for YOLO only here _modelConfig = modelConfig; _modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION; _modelConfig.modelType = ModelType::TENSORRT; _modelConfig.inpHeight = 640; _modelConfig.inpWidth = 640; if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133) // 133 = COCO wholebody max _modelConfig.numKPS = 17; _modelConfig.precisionType = PrecisionType::FP32; // Default to FP16 for TensorRT models if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define _fp16 = true; // Load Model from Here // Load Model from Here TOP_K = 100; SEG_CHANNELS = 32; PROBABILITY_THRESHOLD = _modelConfig.detectionScoreThreshold; NMS_THRESHOLD = _modelConfig.modelMNSThreshold; SEGMENTATION_THRESHOLD = 0.5f; SEG_H = 160; SEG_W = 160; NUM_KPS = _modelConfig.numKPS; KPS_THRESHOLD = _modelConfig.kpsThreshold; SEG_CHANNELS = 32; // For segmentation if (!m_trtEngine) { // Fixed batch size of 1 for this model m_options.optBatchSize = _modelConfig.gpuOptBatchSize; m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize; m_options.deviceIndex = _modelConfig.gpuDeviceIndex; m_options.maxInputHeight = _modelConfig.maxInputHeight; m_options.minInputHeight = _modelConfig.minInputHeight; m_options.optInputHeight = _modelConfig.optInputHeight; m_options.maxInputWidth = _modelConfig.maxInputWidth; m_options.minInputWidth = _modelConfig.minInputWidth; m_options.optInputWidth = _modelConfig.optInputWidth; m_options.engineFileDir = _modelFolder; // Use FP16 or FP32 precision based on the input flag m_options.precision = (_fp16 ? Precision::FP16 : Precision::FP32); // Create the TensorRT inference engine m_trtEngine = std::make_unique>(m_options); } // 0. Check if the configuration file exist if (FileExist(_modelConfigFile)) { ModelType modelType; std::vector inputShape; _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape); if (inputShape.size() == 2) { if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0]; if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1]; } } else {// This is old version of model zip file _modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx"); _classFilePath = CreateFilePath(_modelFolder, "classes.names"); std::ifstream isValidFileName(_classFilePath); if (!isValidFileName) { this->_logger.LogDebug("TENSORRTOD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__); LoadClassesFromString(); } else { this->_logger.LogDebug("TENSORRTOD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__); LoadClassesFromFile(); } } // 1. Load labelMap and engine labelMap.clear(); if (!_classes.empty()) labelMap = VectorToCommaSeparatedString(_classes); // Load the TensorRT engine file if (this->_loadEngineOnCreation && !engineAlreadyLoaded) { auto succ = m_trtEngine->buildLoadNetwork(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu); if (!succ) { const std::string errMsg = "Error: Unable to load TensorRT engine weights into memory. " + _modelFilePath; this->_logger.LogError("TENSORRTOD::Initialize", errMsg, __FILE__, __LINE__); _modelLoadValid = false; return false; } // Sync GPU-capped batch sizes from engine (build may reduce based on VRAM tier) m_options.maxBatchSize = m_trtEngine->getOptions().maxBatchSize; m_options.optBatchSize = m_trtEngine->getOptions().optBatchSize; } _modelLoadValid = true; _isInitialized = true; return true; } catch (std::exception& e) { this->_logger.LogFatal("TENSORRTOD::Initialize", e.what(), __FILE__, __LINE__); return false; } } std::vector TENSORRTOD::RunInference(const cv::Mat& inputImgBGR) { return RunInference(inputImgBGR, ""); } std::vector TENSORRTOD::RunInference(const cv::Mat& inputImgBGR,const std::string& camera_id) { // Validate state under a brief lock — do NOT hold across DetectObjects so that // the Engine pool can run concurrent inferences on different GPU slots. if (!PreInferenceCheck("TENSORRTOD::RunInference")) return {}; try { return DetectObjects(inputImgBGR, camera_id); } catch (const std::exception& e) { _logger.LogFatal("TENSORRTOD::RunInference", e.what(), __FILE__, __LINE__); return {}; } } std::vector> TENSORRTOD::RunInferencesBatch(const std::vector& inputs, const std::string& camera_id) { // Validate state under a brief lock — do NOT hold across DetectObjectsBatch so that // the Engine pool can serve concurrent batch requests on different GPU slots. if (!PreInferenceCheck("TENSORRTOD::RunInferencesBatch")) return {}; try { if (_isFixedBatch) return ANSODBase::RunInferencesBatch(inputs, camera_id); else return DetectObjectsBatch(inputs, camera_id); } catch (const std::exception& e) { this->_logger.LogFatal("TENSORRTOD::RunInferenceBatch", e.what(), __FILE__, __LINE__); return {}; } } TENSORRTOD::~TENSORRTOD() { try { Destroy(); } catch (std::exception& e) { this->_logger.LogError("TENSORRTOD::~TENSORRTOD()", e.what(), __FILE__, __LINE__); } } bool TENSORRTOD::Destroy() { try { m_trtEngine.reset(); // Releases the current engine and sets m_trtEngine to nullptr. return true; } catch (std::exception& e) { this->_logger.LogError("TENSORRTOD::~TENSORRTOD()", e.what(), __FILE__, __LINE__); return false; } } // private std::vector TENSORRTOD::DetectObjects(const cv::Mat& inputImage, const std::string& camera_id) { try { // --- 1. Set GPU device context --- if (m_trtEngine) { m_trtEngine->setDeviceContext(); } // --- 1b. CUDA context health check --- if (!m_nv12Helper.isCudaContextHealthy(_logger, "TENSORRTOD")) { return {}; } // --- 2. Preprocess under lock --- // Try NV12 fast path first, falls back to standard GPU preprocessing. ImageMetadata meta; std::vector> input; bool usedNV12 = false; float bgrFullResScaleX = 1.0f, bgrFullResScaleY = 1.0f; { std::lock_guard lock(_mutex); const int inferenceGpu = m_trtEngine ? m_trtEngine->getPreferredDeviceIndex() : 0; const auto& inputDims = m_trtEngine->getInputDims(); const int inputW = inputDims[0].d[2]; const int inputH = inputDims[0].d[1]; auto nv12 = m_nv12Helper.tryNV12(inputImage, inferenceGpu, inputW, inputH, NV12PreprocessHelper::defaultYOLOLauncher(), _logger, "TENSORRTOD"); if (nv12.succeeded) { meta.imgWidth = nv12.metaWidth; meta.imgHeight = nv12.metaHeight; meta.ratio = nv12.ratio; input = {{ std::move(nv12.gpuRGB) }}; usedNV12 = true; } else if (nv12.useBgrFullRes) { input = Preprocess(nv12.bgrFullResImg, meta); usedNV12 = !input.empty(); bgrFullResScaleX = nv12.bgrFullResScaleX; bgrFullResScaleY = nv12.bgrFullResScaleY; } if (input.empty()) { input = Preprocess(inputImage, meta); } m_nv12Helper.tickInference(); } if (input.empty()) { this->_logger.LogWarn("TENSORRTOD::DetectObjects", "Skipped: preprocessing returned empty input", __FILE__, __LINE__); return {}; } // Phase 2: Inference — mutex released so the Engine pool can serve concurrent callers // on different GPU slots simultaneously. std::vector>> featureVectors; if (!m_trtEngine->runInference(input, featureVectors)) { this->_logger.LogError("TENSORRTOD::DetectObjects", "Error running inference", __FILE__, __LINE__); return {}; } // Phase 3: Postprocess under lock (reads _classes and _modelConfig). std::lock_guard lock(_mutex); std::vector ret; const auto& numOutputs = m_trtEngine->getOutputDims().size(); if (numOutputs == 1) { // Object detection or pose estimation std::vector featureVector; Engine::transformOutput(featureVectors, featureVector); const auto& outputDims = m_trtEngine->getOutputDims(); int numChannels = outputDims[outputDims.size() - 1].d[1]; if (numChannels == 56) { ret = PostProcessPose(featureVector, camera_id, meta); } else { ret = Postprocess(featureVector, camera_id, meta); } } else { // Segmentation std::vector> featureVector; Engine::transformOutput(featureVectors, featureVector); ret = PostProcessSegmentation(featureVector, camera_id, meta); } // --- 4b. Rescale coords from full-res to display-res (BGR full-res path) --- if (bgrFullResScaleX != 1.0f || bgrFullResScaleY != 1.0f) { for (auto& obj : ret) { obj.box.x = static_cast(obj.box.x * bgrFullResScaleX); obj.box.y = static_cast(obj.box.y * bgrFullResScaleY); obj.box.width = static_cast(obj.box.width * bgrFullResScaleX); obj.box.height = static_cast(obj.box.height * bgrFullResScaleY); for (auto& pt : obj.polygon) { pt.x *= bgrFullResScaleX; pt.y *= bgrFullResScaleY; } for (size_t k = 0; k + 2 < obj.kps.size(); k += 3) { obj.kps[k] *= bgrFullResScaleX; obj.kps[k + 1] *= bgrFullResScaleY; } } } // Apply tracker and stabilization if enabled if (_trackerEnabled) { ret = ApplyTracking(ret, camera_id); if (_stabilizationEnabled) ret = StabilizeDetections(ret, camera_id); } return ret; } catch (std::exception& e) { this->_logger.LogFatal("TENSORRTOD::DetectObjects", e.what(), __FILE__, __LINE__); return {}; } } std::vector> TENSORRTOD::Preprocess(const cv::Mat& inputImage, ImageMetadata& outMeta) { std::lock_guard lock(_mutex); try { if (!_licenseValid) { this->_logger.LogFatal("TENSORRTOD::Preprocess", "Invalid license", __FILE__, __LINE__); return {}; } // Get model input dimensions const auto& inputDims = m_trtEngine->getInputDims(); const int inputH = inputDims[0].d[1]; const int inputW = inputDims[0].d[2]; // --- CPU preprocessing: resize + BGR->RGB before GPU upload --- cv::Mat srcImg = inputImage; if (srcImg.channels() == 1) { cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR); } // Set image size parameters from ORIGINAL image (before resize) outMeta.imgHeight = static_cast(srcImg.rows); outMeta.imgWidth = static_cast(srcImg.cols); if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) { outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast(srcImg.cols), inputDims[0].d[1] / static_cast(srcImg.rows)); const auto& outputDims = m_trtEngine->getOutputDims(); const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2; // CPU resize to model input size cv::Mat cpuResized; if (srcImg.rows != inputH || srcImg.cols != inputW) { if (isClassification) { cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR); } else { cpuResized = Engine::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW); } } else { cpuResized = srcImg; } // CPU BGR -> RGB cv::Mat cpuRGB; cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB); // Upload small image to GPU cv::cuda::Stream stream; cv::cuda::GpuMat gpuResized; gpuResized.upload(cpuRGB, stream); stream.waitForCompletion(); // Convert to format expected by our inference engine std::vector input{ std::move(gpuResized) }; std::vector> inputs{ std::move(input) }; return inputs; } else { this->_logger.LogFatal("TENSORRTCL::Preprocess", "Image height or width is zero after processing (Width: " + std::to_string(outMeta.imgWidth) + ", Height: " + std::to_string(outMeta.imgHeight) + ")", __FILE__, __LINE__); return {}; } } catch (const std::exception& e) { this->_logger.LogWarn("TENSORRTOD::Preprocess", std::string("Skipped frame: ") + e.what(), __FILE__, __LINE__); return {}; } } std::vector TENSORRTOD::PostProcessSegmentation(std::vector>& featureVectors, const std::string& camera_id, const ImageMetadata& meta) { std::lock_guard lock(_mutex); try { if (!_licenseValid) { this->_logger.LogFatal("TENSORRTOD::PostProcessSegmentation", "Invalid license", __FILE__, __LINE__); std::vector result; result.clear(); return result; } const auto& outputDims = m_trtEngine->getOutputDims(); int numChannels = outputDims[0].d[1]; int numAnchors = outputDims[0].d[2]; const auto numClasses = numChannels - SEG_CHANNELS - 4; // Ensure the output lengths are correct if (featureVectors[0].size() != static_cast(numChannels) * numAnchors) { return {}; } if (featureVectors[1].size() != static_cast(SEG_CHANNELS) * SEG_H * SEG_W) { return {}; } cv::Mat output = cv::Mat(numChannels, numAnchors, CV_32F, featureVectors[0].data()); output = output.t(); cv::Mat protos = cv::Mat(SEG_CHANNELS, SEG_H * SEG_W, CV_32F, featureVectors[1].data()); std::vector labels; std::vector scores; std::vector bboxes; std::vector maskConfs; std::vector indices; // Object the bounding boxes and class labels for (int i = 0; i < numAnchors; i++) { auto rowPtr = output.row(i).ptr(); auto bboxesPtr = rowPtr; auto scoresPtr = rowPtr + 4; auto maskConfsPtr = rowPtr + 4 + numClasses; auto maxSPtr = std::max_element(scoresPtr, scoresPtr + numClasses); float score = *maxSPtr; if (score > this->_modelConfig.detectionScoreThreshold) { float x = *bboxesPtr++; float y = *bboxesPtr++; float w = *bboxesPtr++; float h = *bboxesPtr; float x0 = std::clamp((x - 0.5f * w) * meta.ratio, 0.f, meta.imgWidth); float y0 = std::clamp((y - 0.5f * h) * meta.ratio, 0.f, meta.imgHeight); float x1 = std::clamp((x + 0.5f * w) * meta.ratio, 0.f, meta.imgWidth); float y1 = std::clamp((y + 0.5f * h) * meta.ratio, 0.f, meta.imgHeight); int label = maxSPtr - scoresPtr; cv::Rect_ bbox; bbox.x = x0; bbox.y = y0; bbox.width = x1 - x0; bbox.height = y1 - y0; bbox.x = std::max(0.f, bbox.x); bbox.y = std::max(0.f, bbox.y); bbox.width = std::min(meta.imgWidth - bbox.x, bbox.width); bbox.height = std::min(meta.imgHeight - bbox.y, bbox.height); cv::Mat maskConf = cv::Mat(1, SEG_CHANNELS, CV_32F, maskConfsPtr); bboxes.push_back(bbox); labels.push_back(label); scores.push_back(score); maskConfs.push_back(maskConf); } } cv::dnn::NMSBoxesBatched(bboxes, scores, labels, PROBABILITY_THRESHOLD, NMS_THRESHOLD, indices); cv::Mat masks; int classNameSize = static_cast(_classes.size()); std::vector objs; for (auto& i : indices) { if (scores[i] > _modelConfig.detectionScoreThreshold) { cv::Rect tmp = bboxes[i]; Object obj; obj.classId = labels[i]; if (!_classes.empty()) { if (obj.classId < classNameSize) { obj.className = _classes[obj.classId]; } else { obj.className = _classes[classNameSize - 1]; // Use last valid class name if out of range } } else { obj.className = "Unknown"; // Fallback if _classes is empty } obj.box = tmp; obj.confidence = scores[i]; obj.className = _classes[labels[i]]; masks.push_back(maskConfs[i]); objs.push_back(obj); } } if (!masks.empty()) { cv::Mat matmulRes = (masks * protos).t(); cv::Mat maskMat = matmulRes.reshape(indices.size(), { _modelConfig.inpWidth, _modelConfig.inpHeight }); std::vector maskChannels; cv::split(maskMat, maskChannels); const auto inputDims = m_trtEngine->getInputDims(); cv::Rect roi; if (meta.imgHeight > meta.imgWidth) { roi = cv::Rect(0, 0, _modelConfig.inpWidth * meta.imgWidth / meta.imgHeight, _modelConfig.inpHeight); } else { roi = cv::Rect(0, 0, _modelConfig.inpWidth, _modelConfig.inpHeight * meta.imgHeight / meta.imgWidth); } for (size_t i = 0; i < indices.size(); i++) { cv::Mat dest, mask; cv::exp(-maskChannels[i], dest); dest = 1.0 / (1.0 + dest); dest = dest(roi); objs[i].cameraId = camera_id; cv::resize( dest, mask, cv::Size(static_cast(meta.imgWidth), static_cast(meta.imgHeight)), cv::INTER_LINEAR ); objs[i].mask = mask(objs[i].box) > _modelConfig.modelConfThreshold;// Need to check segmentation } } //EnqueueDetection(objs, camera_id); return objs; } catch (std::exception& e) { this->_logger.LogFatal("TENSORRTOD::PostProcessSegmentation", e.what(), __FILE__, __LINE__); std::vectorresult; result.clear(); return result; } } std::vector TENSORRTOD::PostProcessPose(std::vector& featureVector, const std::string& camera_id, const ImageMetadata& meta) { std::lock_guard lock(_mutex); try { const auto& outputDims = m_trtEngine->getOutputDims(); auto numChannels = outputDims[0].d[1]; auto numAnchors = outputDims[0].d[2]; std::vector bboxes; std::vector scores; std::vector labels; std::vector indices; std::vector> kpss; cv::Mat output = cv::Mat(numChannels, numAnchors, CV_32F, featureVector.data()); output = output.t(); // Get all the YOLO proposals for (int i = 0; i < numAnchors; i++) { auto rowPtr = output.row(i).ptr(); auto bboxesPtr = rowPtr; auto scoresPtr = rowPtr + 4; auto kps_ptr = rowPtr + 5; float score = *scoresPtr; if (score > this->_modelConfig.detectionScoreThreshold) { float x = *bboxesPtr++; float y = *bboxesPtr++; float w = *bboxesPtr++; float h = *bboxesPtr; float x0 = std::clamp((x - 0.5f * w) * meta.ratio, 0.f, meta.imgWidth); float y0 = std::clamp((y - 0.5f * h) * meta.ratio, 0.f, meta.imgHeight); float x1 = std::clamp((x + 0.5f * w) * meta.ratio, 0.f, meta.imgWidth); float y1 = std::clamp((y + 0.5f * h) * meta.ratio, 0.f, meta.imgHeight); cv::Rect_ bbox; bbox.x = x0; bbox.y = y0; bbox.width = x1 - x0; bbox.height = y1 - y0; bbox.x = std::max(0.f, bbox.x); bbox.y = std::max(0.f, bbox.y); bbox.width = std::min(meta.imgWidth - bbox.x, bbox.width); bbox.height = std::min(meta.imgHeight - bbox.y, bbox.height); std::vector kps; for (int k = 0; k < NUM_KPS; k++) { float kpsX = *(kps_ptr + 3 * k) * meta.ratio; float kpsY = *(kps_ptr + 3 * k + 1) * meta.ratio; float kpsS = *(kps_ptr + 3 * k + 2); kpsX = std::clamp(kpsX, 0.f, meta.imgWidth); kpsY = std::clamp(kpsY, 0.f, meta.imgHeight); kps.push_back(kpsX); kps.push_back(kpsY); kps.push_back(kpsS); } bboxes.push_back(bbox); labels.push_back(0); // All detected objects are people scores.push_back(score); kpss.push_back(kps); } } // Run NMS cv::dnn::NMSBoxesBatched(bboxes, scores, labels, PROBABILITY_THRESHOLD, NMS_THRESHOLD, indices); std::vector objects; int classNameSize = static_cast(_classes.size()); // Choose the top k detections for (auto& chosenIdx : indices) { if (scores[chosenIdx] > _modelConfig.detectionScoreThreshold) { Object obj{}; obj.confidence = scores[chosenIdx]; obj.classId = labels[chosenIdx]; if (!_classes.empty()) { if (obj.classId < classNameSize) { obj.className = _classes[obj.classId]; } else { obj.className = _classes[classNameSize - 1]; // Use last valid class name if out of range } } else { obj.className = "Unknown"; // Fallback if _classes is empty } obj.box = bboxes[chosenIdx]; obj.polygon = ANSUtilityHelper::RectToNormalizedPolygon(obj.box, meta.imgWidth, meta.imgHeight); obj.kps = kpss[chosenIdx]; obj.cameraId = camera_id; objects.push_back(obj); } } //EnqueueDetection(objects, camera_id); return objects; } catch (std::exception& e) { this->_logger.LogFatal("TENSORRTOD::PostProcessPose", e.what(), __FILE__, __LINE__); std::vector result; result.clear(); return result; } } std::vector TENSORRTOD::Postprocess(std::vector& featureVector, const std::string& camera_id, const ImageMetadata& meta) { std::lock_guard lock(_mutex); try { const auto& outputDims = m_trtEngine->getOutputDims(); auto numChannels = outputDims[0].d[1]; auto numAnchors = outputDims[0].d[2]; auto numClasses = _classes.size(); std::vector bboxes; std::vector scores; std::vector labels; std::vector indices; cv::Mat output = cv::Mat(numChannels, numAnchors, CV_32F, featureVector.data()); output = output.t(); // Get all the YOLO proposals for (int i = 0; i < numAnchors; i++) { auto rowPtr = output.row(i).ptr(); auto bboxesPtr = rowPtr; auto scoresPtr = rowPtr + 4; auto maxSPtr = std::max_element(scoresPtr, scoresPtr + numClasses); float score = *maxSPtr; if (score > this->_modelConfig.detectionScoreThreshold) { float x = *bboxesPtr++; float y = *bboxesPtr++; float w = *bboxesPtr++; float h = *bboxesPtr; float x0 = std::clamp((x - 0.5f * w) * meta.ratio, 0.f, meta.imgWidth); float y0 = std::clamp((y - 0.5f * h) * meta.ratio, 0.f, meta.imgHeight); float x1 = std::clamp((x + 0.5f * w) * meta.ratio, 0.f, meta.imgWidth); float y1 = std::clamp((y + 0.5f * h) * meta.ratio, 0.f, meta.imgHeight); int label = maxSPtr - scoresPtr; cv::Rect_ bbox; bbox.x = x0; bbox.y = y0; bbox.width = x1 - x0; bbox.height = y1 - y0; bbox.x = std::max(0.f, bbox.x); bbox.y = std::max(0.f, bbox.y); bbox.width = std::min(meta.imgWidth - bbox.x, bbox.width); bbox.height = std::min(meta.imgHeight - bbox.y, bbox.height); bboxes.push_back(bbox); labels.push_back(label); scores.push_back(score); } } // Run NMS cv::dnn::NMSBoxesBatched(bboxes, scores, labels, PROBABILITY_THRESHOLD, NMS_THRESHOLD, indices); std::vector objects; int classNameSize = static_cast(_classes.size()); // Choose the top k detections for (auto& chosenIdx : indices) { if (scores[chosenIdx] > _modelConfig.detectionScoreThreshold) { Object obj{}; obj.confidence = scores[chosenIdx]; obj.classId = labels[chosenIdx]; obj.box = bboxes[chosenIdx]; obj.polygon = ANSUtilityHelper::RectToNormalizedPolygon(obj.box, meta.imgWidth, meta.imgHeight); if (!_classes.empty()) { if (obj.classId < classNameSize) { obj.className = _classes[obj.classId]; } else { obj.className = _classes[classNameSize - 1]; // Use last valid class name if out of range } } else { obj.className = "Unknown"; // Fallback if _classes is empty } obj.cameraId = camera_id; objects.push_back(obj); } } //EnqueueDetection(objects, camera_id); return objects; } catch (std::exception& e) { this->_logger.LogFatal("TENSORRTOD::Postproces", e.what(), __FILE__, __LINE__); std::vector result; result.clear(); return result; } } std::vector> TENSORRTOD::DetectObjectsBatch(const std::vector& inputImages,const std::string& camera_id) { if (inputImages.empty()) { _logger.LogError("TENSORRTOD::DetectObjectsBatch", "Empty input images vector", __FILE__, __LINE__); return {}; } // Auto-split if batch exceeds engine capacity const int maxBatch = m_options.maxBatchSize > 0 ? m_options.maxBatchSize : 1; if (static_cast(inputImages.size()) > maxBatch && maxBatch > 0) { const size_t numImages = inputImages.size(); std::vector> allResults; allResults.reserve(numImages); // Process chunks sequentially to avoid GPU contention on the same engine for (size_t start = 0; start < numImages; start += static_cast(maxBatch)) { const size_t end = std::min(start + static_cast(maxBatch), numImages); std::vector chunk(inputImages.begin() + start, inputImages.begin() + end); auto chunkResults = DetectObjectsBatch(chunk, camera_id); if (chunkResults.size() == chunk.size()) { for (auto& r : chunkResults) allResults.push_back(std::move(r)); } else { _logger.LogError("TENSORTRTOD::DetectObjectsBatch", "Chunk returned " + std::to_string(chunkResults.size()) + " results, expected " + std::to_string(chunk.size()) + ". Padding with empty results.", __FILE__, __LINE__); for (auto& r : chunkResults) allResults.push_back(std::move(r)); for (size_t pad = chunkResults.size(); pad < chunk.size(); ++pad) { allResults.push_back({}); } } } return allResults; } try { const size_t realCount = inputImages.size(); // ── Pad batch to next power-of-2 ───────────────────────────── // Eliminates batch-size thrashing (e.g. 3→4→3→4) and ensures // every inference hits a pre-warmed CUDA graph. The padding // images are duplicates of the last real image (cheapest option // — avoids allocating new cv::Mat memory). size_t paddedCount = 1; while (paddedCount < realCount) paddedCount *= 2; // Clamp to engine max batch paddedCount = std::min(paddedCount, static_cast(maxBatch)); const std::vector* batchPtr = &inputImages; std::vector paddedImages; if (paddedCount > realCount) { paddedImages.reserve(paddedCount); paddedImages.insert(paddedImages.end(), inputImages.begin(), inputImages.end()); // Duplicate last image for padding slots for (size_t p = realCount; p < paddedCount; ++p) paddedImages.push_back(inputImages.back()); batchPtr = &paddedImages; } // Create local metadata for this batch BatchMetadata metadata; // Preprocess all images in batch (including padding) const auto inputs = PreprocessBatch(*batchPtr, metadata); if (inputs.empty() || inputs[0].empty()) { _logger.LogError("TENSORRTOD::DetectObjectsBatch", "Preprocessing failed", __FILE__, __LINE__); return {}; } // Run batch inference std::vector>> featureVectors; auto succ = m_trtEngine->runInference(inputs, featureVectors); if (!succ) { _logger.LogError("TENSORRTOD::DetectObjectsBatch", "Error running inference", __FILE__, __LINE__); // Do NOT set _isFixedBatch = true here. A transient failure (CUDA OOM, // stream error, etc.) should not permanently fall back to single-image mode. return {}; } // Validate output size (against padded count) if (featureVectors.size() != paddedCount) { _logger.LogError("TENSORRTOD::DetectObjectsBatch", "Output batch size mismatch", __FILE__, __LINE__); return {}; } // Trim to real count — discard padding results featureVectors.resize(realCount); // Process results in parallel -- each image's postprocess is fully // independent; no shared mutable state exists between per-image calls. const auto& outputDims = m_trtEngine->getOutputDims(); const size_t numOutputs = outputDims.size(); const size_t numBatch = featureVectors.size(); std::vector> batchDetections(numBatch); std::vector>> postFutures; postFutures.reserve(numBatch); for (size_t batchIdx = 0; batchIdx < numBatch; ++batchIdx) { const auto& batchOutput = featureVectors[batchIdx]; ImageMetadata imgMeta; imgMeta.ratio = metadata.ratios[batchIdx]; imgMeta.imgWidth = static_cast(metadata.imgWidths[batchIdx]); imgMeta.imgHeight = static_cast(metadata.imgHeights[batchIdx]); if (numOutputs == 1) { std::vector featureVector = batchOutput.empty() ? std::vector{} : batchOutput[0]; if (batchOutput.empty()) { _logger.LogWarn("TENSORRTOD::DetectObjectsBatch", "Empty output for image " + std::to_string(batchIdx), __FILE__, __LINE__); } const int numChannels = outputDims[0].d[1]; if (numChannels == 56) { postFutures.push_back(std::async(std::launch::async, [this, fv = std::move(featureVector), cid = camera_id, meta = imgMeta]() mutable { return PostProcessPose(fv, cid, meta); })); } else { postFutures.push_back(std::async(std::launch::async, [this, fv = std::move(featureVector), cid = camera_id, idx = batchIdx, &metadata]() mutable { return PostprocessBatch(fv, cid, idx, metadata); })); } } else { if (batchOutput.empty()) { _logger.LogWarn("TENSORRTOD::DetectObjectsBatch", "Empty output for image " + std::to_string(batchIdx), __FILE__, __LINE__); } std::vector> featureVector2d; featureVector2d.reserve(batchOutput.size()); for (const auto& out : batchOutput) featureVector2d.push_back(out); postFutures.push_back(std::async(std::launch::async, [this, fv2d = std::move(featureVector2d), cid = camera_id, meta = imgMeta]() mutable { return PostProcessSegmentation(fv2d, cid, meta); })); } } // Gather results in original order; metadata stays alive until here. for (size_t i = 0; i < numBatch; ++i) batchDetections[i] = postFutures[i].get(); return batchDetections; } catch (const std::exception& e) { _logger.LogFatal("TENSORRTOD::DetectObjectsBatch", e.what(), __FILE__, __LINE__); return {}; } } std::vector> TENSORRTOD::PreprocessBatch(const std::vector& inputImages,BatchMetadata& outMetadata) { if (!_licenseValid) { _logger.LogError("TENSORRTOD::PreprocessBatch", "Invalid license", __FILE__, __LINE__); return {}; } if (inputImages.empty()) { _logger.LogError("TENSORRTOD::PreprocessBatch", "Empty input images vector", __FILE__, __LINE__); return {}; } try { const auto& inputDims = m_trtEngine->getInputDims(); if (inputDims.empty()) { _logger.LogError("TENSORRTOD::PreprocessBatch", "No input dimensions available", __FILE__, __LINE__); return {}; } const int inputH = inputDims[0].d[1]; const int inputW = inputDims[0].d[2]; if (inputH <= 0 || inputW <= 0) { _logger.LogError("TENSORRTOD::PreprocessBatch", "Invalid model input dimensions", __FILE__, __LINE__); return {}; } // Initialize output metadata outMetadata.imgHeights.resize(inputImages.size()); outMetadata.imgWidths.resize(inputImages.size()); outMetadata.ratios.resize(inputImages.size()); std::vector batchProcessed; batchProcessed.reserve(inputImages.size()); cv::cuda::Stream stream; for (size_t i = 0; i < inputImages.size(); ++i) { const auto& inputImage = inputImages[i]; if (inputImage.empty()) { _logger.LogError("TENSORRTOD::PreprocessBatch", "Empty input image at index " + std::to_string(i), __FILE__, __LINE__); return {}; } // CPU preprocessing: resize + BGR->RGB before GPU upload cv::Mat srcImg = inputImage; if (srcImg.channels() == 1) { cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR); } else if (srcImg.channels() != 3) { _logger.LogError("TENSORRTOD::PreprocessBatch", "Unsupported channel count at index " + std::to_string(i), __FILE__, __LINE__); return {}; } // Store in output metadata from ORIGINAL image outMetadata.imgHeights[i] = srcImg.rows; outMetadata.imgWidths[i] = srcImg.cols; if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) { _logger.LogError("TENSORRTOD::PreprocessBatch", "Invalid dimensions for image " + std::to_string(i), __FILE__, __LINE__); return {}; } const auto& outputDims = m_trtEngine->getOutputDims(); const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2; const float scaleW = inputW / static_cast(srcImg.cols); const float scaleH = inputH / static_cast(srcImg.rows); outMetadata.ratios[i] = isClassification ? 1.f : 1.f / std::min(scaleW, scaleH); cv::Mat cpuResized; if (srcImg.rows != inputH || srcImg.cols != inputW) { if (isClassification) { cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR); } else { cpuResized = Engine::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW); } } else { cpuResized = srcImg; } cv::Mat cpuRGB; cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB); cv::cuda::GpuMat gpuResized; gpuResized.upload(cpuRGB, stream); batchProcessed.push_back(std::move(gpuResized)); } stream.waitForCompletion(); std::vector> inputs; inputs.push_back(std::move(batchProcessed)); return inputs; } catch (const std::exception& e) { _logger.LogFatal("TENSORRTOD::PreprocessBatch", e.what(), __FILE__, __LINE__); return {}; } } std::vector TENSORRTOD::PostprocessBatch(std::vector& featureVector,const std::string& camera_id,size_t batchIdx,const BatchMetadata& metadata) { try { // Bounds checking if (batchIdx >= metadata.ratios.size() || batchIdx >= metadata.imgWidths.size() || batchIdx >= metadata.imgHeights.size()) { _logger.LogError("TENSORRTOD::PostprocessBatch", "Batch index out of range", __FILE__, __LINE__); return {}; } const auto& outputDims = m_trtEngine->getOutputDims(); auto numChannels = outputDims[0].d[1]; auto numAnchors = outputDims[0].d[2]; auto numClasses = _classes.size(); // Get batch-specific metadata - NO LOCK NEEDED! const float ratio = metadata.ratios[batchIdx]; const int imgWidth = metadata.imgWidths[batchIdx]; const int imgHeight = metadata.imgHeights[batchIdx]; std::vector bboxes; std::vector scores; std::vector labels; std::vector indices; bboxes.reserve(numAnchors / 10); scores.reserve(numAnchors / 10); labels.reserve(numAnchors / 10); cv::Mat output = cv::Mat(numChannels, numAnchors, CV_32F, featureVector.data()); output = output.t(); for (int i = 0; i < numAnchors; i++) { auto rowPtr = output.row(i).ptr(); auto bboxesPtr = rowPtr; auto scoresPtr = rowPtr + 4; auto maxSPtr = std::max_element(scoresPtr, scoresPtr + numClasses); float score = *maxSPtr; if (score > _modelConfig.detectionScoreThreshold) { float x = *bboxesPtr++; float y = *bboxesPtr++; float w = *bboxesPtr++; float h = *bboxesPtr; float x0 = std::clamp((x - 0.5f * w) * ratio, 0.f, static_cast(imgWidth)); float y0 = std::clamp((y - 0.5f * h) * ratio, 0.f, static_cast(imgHeight)); float x1 = std::clamp((x + 0.5f * w) * ratio, 0.f, static_cast(imgWidth)); float y1 = std::clamp((y + 0.5f * h) * ratio, 0.f, static_cast(imgHeight)); int label = static_cast(maxSPtr - scoresPtr); cv::Rect_ bbox; bbox.x = x0; bbox.y = y0; bbox.width = x1 - x0; bbox.height = y1 - y0; if (bbox.width > 0.f && bbox.height > 0.f) { bboxes.push_back(bbox); labels.push_back(label); scores.push_back(score); } } } cv::dnn::NMSBoxesBatched(bboxes, scores, labels, PROBABILITY_THRESHOLD, NMS_THRESHOLD, indices); int classNameSize = static_cast(_classes.size()); std::vector objects; objects.reserve(indices.size()); for (auto& chosenIdx : indices) { if (scores[chosenIdx] > _modelConfig.detectionScoreThreshold) { Object obj{}; obj.confidence = scores[chosenIdx]; obj.classId = labels[chosenIdx]; obj.box = bboxes[chosenIdx]; obj.polygon = ANSUtilityHelper::RectToNormalizedPolygon( obj.box, imgWidth, imgHeight); if (!_classes.empty()) { if (obj.classId < classNameSize) { obj.className = _classes[obj.classId]; } else { obj.className = _classes[classNameSize - 1]; } } else { obj.className = "Unknown"; } obj.cameraId = camera_id; objects.push_back(std::move(obj)); } } return objects; } catch (const std::exception& e) { _logger.LogFatal("TENSORRTOD::PostprocessBatch", e.what(), __FILE__, __LINE__); return {}; } } }