2026-03-28 16:54:11 +11:00
|
|
|
|
#include "ANSRTYOLO.h"
|
|
|
|
|
|
#include "Utility.h"
|
2026-04-04 20:19:54 +11:00
|
|
|
|
#include "ANSLicense.h" // ANS_DBG macro for DebugView
|
2026-03-28 16:54:11 +11:00
|
|
|
|
#include <future>
|
|
|
|
|
|
#include <numeric>
|
|
|
|
|
|
#include <cmath>
|
|
|
|
|
|
#include <cstring>
|
|
|
|
|
|
#include <opencv2/cudaimgproc.hpp>
|
|
|
|
|
|
#include <opencv2/cudawarping.hpp>
|
|
|
|
|
|
#include <opencv2/core/cuda_stream_accessor.hpp>
|
|
|
|
|
|
|
|
|
|
|
|
namespace ANSCENTER {
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// ANSODBase interface — OptimizeModel
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
bool ANSRTYOLO::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
|
|
|
|
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
|
|
|
|
if (!ANSODBase::OptimizeModel(fp16, optimizedModelFolder)) return false;
|
|
|
|
|
|
if (!FileExist(_modelFilePath)) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::OptimizeModel", "Raw model file path does not exist", __FILE__, __LINE__);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
try {
|
|
|
|
|
|
_fp16 = fp16;
|
|
|
|
|
|
optimizedModelFolder = GetParentFolder(_modelFilePath);
|
|
|
|
|
|
if (!m_trtEngine) {
|
|
|
|
|
|
m_options.optBatchSize = _modelConfig.gpuOptBatchSize;
|
|
|
|
|
|
m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize;
|
|
|
|
|
|
m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
|
|
|
|
|
|
m_options.maxInputHeight = _modelConfig.maxInputHeight;
|
|
|
|
|
|
m_options.minInputHeight = _modelConfig.minInputHeight;
|
|
|
|
|
|
m_options.optInputHeight = _modelConfig.optInputHeight;
|
|
|
|
|
|
m_options.maxInputWidth = _modelConfig.maxInputWidth;
|
|
|
|
|
|
m_options.minInputWidth = _modelConfig.minInputWidth;
|
|
|
|
|
|
m_options.optInputWidth = _modelConfig.optInputWidth;
|
|
|
|
|
|
m_options.engineFileDir = optimizedModelFolder;
|
|
|
|
|
|
m_options.precision = (_fp16 ? Precision::FP16 : Precision::FP32);
|
|
|
|
|
|
m_trtEngine = std::make_shared<Engine<float>>(m_options);
|
|
|
|
|
|
}
|
|
|
|
|
|
auto succ = m_trtEngine->buildWithRetry(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE);
|
|
|
|
|
|
if (!succ) {
|
|
|
|
|
|
_logger.LogError("ANSRTYOLO::OptimizeModel",
|
|
|
|
|
|
"Error: Unable to build TensorRT engine. " + _modelFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::OptimizeModel", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// ANSODBase interface — LoadModel
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
bool ANSRTYOLO::LoadModel(const std::string& modelZipFilePath,
|
|
|
|
|
|
const std::string& modelZipPassword) {
|
|
|
|
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
|
|
|
|
try {
|
|
|
|
|
|
_isFixedBatch = false;
|
|
|
|
|
|
bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword);
|
|
|
|
|
|
if (!result) return false;
|
|
|
|
|
|
|
|
|
|
|
|
_modelConfig.modelType = ModelType::TENSORRT;
|
|
|
|
|
|
if (_modelConfig.inpHeight <= 0) _modelConfig.inpHeight = 640;
|
|
|
|
|
|
if (_modelConfig.inpWidth <= 0) _modelConfig.inpWidth = 640;
|
|
|
|
|
|
if (_modelConfig.modelMNSThreshold < 0.2f) _modelConfig.modelMNSThreshold = 0.5f;
|
|
|
|
|
|
if (_modelConfig.modelConfThreshold < 0.2f) _modelConfig.modelConfThreshold = 0.5f;
|
|
|
|
|
|
if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133)
|
|
|
|
|
|
_modelConfig.numKPS = 17;
|
|
|
|
|
|
if (_modelConfig.kpsThreshold <= 0) _modelConfig.kpsThreshold = 0.5f;
|
|
|
|
|
|
_fp16 = true;
|
|
|
|
|
|
|
|
|
|
|
|
TOP_K = 300;
|
|
|
|
|
|
SEG_CHANNELS = 32;
|
|
|
|
|
|
PROBABILITY_THRESHOLD = _modelConfig.detectionScoreThreshold;
|
|
|
|
|
|
NMS_THRESHOLD = _modelConfig.modelMNSThreshold;
|
|
|
|
|
|
SEGMENTATION_THRESHOLD = 0.5f;
|
|
|
|
|
|
SEG_H = 160;
|
|
|
|
|
|
SEG_W = 160;
|
|
|
|
|
|
NUM_KPS = _modelConfig.numKPS;
|
|
|
|
|
|
KPS_THRESHOLD = _modelConfig.kpsThreshold;
|
|
|
|
|
|
|
|
|
|
|
|
m_options.optBatchSize = _modelConfig.gpuOptBatchSize;
|
|
|
|
|
|
m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize;
|
|
|
|
|
|
m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
|
|
|
|
|
|
m_options.maxInputHeight = _modelConfig.maxInputHeight;
|
|
|
|
|
|
m_options.minInputHeight = _modelConfig.minInputHeight;
|
|
|
|
|
|
m_options.optInputHeight = _modelConfig.optInputHeight;
|
|
|
|
|
|
m_options.maxInputWidth = _modelConfig.maxInputWidth;
|
|
|
|
|
|
m_options.minInputWidth = _modelConfig.minInputWidth;
|
|
|
|
|
|
m_options.optInputWidth = _modelConfig.optInputWidth;
|
|
|
|
|
|
m_options.engineFileDir = _modelFolder;
|
|
|
|
|
|
m_options.precision = (_fp16 ? Precision::FP16 : Precision::FP32);
|
|
|
|
|
|
|
|
|
|
|
|
_modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx");
|
|
|
|
|
|
if (FileExist(_modelConfigFile)) {
|
|
|
|
|
|
ModelType modelType;
|
|
|
|
|
|
std::vector<int> inputShape;
|
|
|
|
|
|
_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
|
|
|
|
|
|
if (inputShape.size() == 2) {
|
|
|
|
|
|
if (inputShape[0] > 0) _modelConfig.inpHeight = inputShape[0];
|
|
|
|
|
|
if (inputShape[1] > 0) _modelConfig.inpWidth = inputShape[1];
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
_classFilePath = CreateFilePath(_modelFolder, "classes.names");
|
|
|
|
|
|
std::ifstream isValid(_classFilePath);
|
|
|
|
|
|
if (!isValid) LoadClassesFromString();
|
|
|
|
|
|
else LoadClassesFromFile();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (this->_loadEngineOnCreation) {
|
|
|
|
|
|
if (!m_trtEngine) {
|
|
|
|
|
|
m_poolKey = { _modelFilePath,
|
|
|
|
|
|
static_cast<int>(m_options.precision), m_options.maxBatchSize };
|
|
|
|
|
|
m_trtEngine = EnginePoolManager<float>::instance().acquire(
|
|
|
|
|
|
m_poolKey, m_options, _modelFilePath,
|
|
|
|
|
|
SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu);
|
|
|
|
|
|
m_usingSharedPool = (m_trtEngine != nullptr);
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!m_trtEngine) {
|
|
|
|
|
|
_logger.LogError("ANSRTYOLO::LoadModel",
|
|
|
|
|
|
"Error: Unable to load TensorRT engine. " + _modelFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
_modelLoadValid = false;
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
m_options.maxBatchSize = m_trtEngine->getOptions().maxBatchSize;
|
|
|
|
|
|
m_options.optBatchSize = m_trtEngine->getOptions().optBatchSize;
|
|
|
|
|
|
m_trtEngine->warmUp();
|
|
|
|
|
|
}
|
|
|
|
|
|
_modelLoadValid = true;
|
|
|
|
|
|
_isInitialized = true;
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::LoadModel", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// ANSODBase interface — LoadModelFromFolder
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
bool ANSRTYOLO::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig,
|
|
|
|
|
|
std::string modelName, std::string className,
|
|
|
|
|
|
const std::string& modelFolder, std::string& labelMap) {
|
|
|
|
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
|
|
|
|
try {
|
|
|
|
|
|
_isFixedBatch = false;
|
|
|
|
|
|
bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig,
|
|
|
|
|
|
modelName, className, modelFolder, labelMap);
|
|
|
|
|
|
if (!result) return false;
|
|
|
|
|
|
|
|
|
|
|
|
_modelConfig = modelConfig;
|
|
|
|
|
|
_modelConfig.modelType = ModelType::TENSORRT;
|
|
|
|
|
|
if (_modelConfig.inpHeight <= 0) _modelConfig.inpHeight = 640;
|
|
|
|
|
|
if (_modelConfig.inpWidth <= 0) _modelConfig.inpWidth = 640;
|
|
|
|
|
|
_modelConfig.precisionType = PrecisionType::FP32;
|
|
|
|
|
|
if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133)
|
|
|
|
|
|
_modelConfig.numKPS = 17;
|
|
|
|
|
|
if (_modelConfig.modelMNSThreshold < 0.2f) _modelConfig.modelMNSThreshold = 0.5f;
|
|
|
|
|
|
if (_modelConfig.modelConfThreshold < 0.2f) _modelConfig.modelConfThreshold = 0.5f;
|
|
|
|
|
|
if (_modelConfig.kpsThreshold <= 0) _modelConfig.kpsThreshold = 0.5f;
|
|
|
|
|
|
_fp16 = true;
|
|
|
|
|
|
|
|
|
|
|
|
TOP_K = 300;
|
|
|
|
|
|
SEG_CHANNELS = 32;
|
|
|
|
|
|
PROBABILITY_THRESHOLD = _modelConfig.detectionScoreThreshold;
|
|
|
|
|
|
NMS_THRESHOLD = _modelConfig.modelMNSThreshold;
|
|
|
|
|
|
SEGMENTATION_THRESHOLD = 0.5f;
|
|
|
|
|
|
SEG_H = 160;
|
|
|
|
|
|
SEG_W = 160;
|
|
|
|
|
|
NUM_KPS = _modelConfig.numKPS;
|
|
|
|
|
|
KPS_THRESHOLD = _modelConfig.kpsThreshold;
|
|
|
|
|
|
|
|
|
|
|
|
std::string _modelName = modelName;
|
|
|
|
|
|
if (_modelName.empty()) _modelName = "train_last";
|
|
|
|
|
|
std::string modelFullName = _modelName + ".onnx";
|
|
|
|
|
|
|
|
|
|
|
|
m_options.optBatchSize = _modelConfig.gpuOptBatchSize;
|
|
|
|
|
|
m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize;
|
|
|
|
|
|
m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
|
|
|
|
|
|
m_options.maxInputHeight = _modelConfig.maxInputHeight;
|
|
|
|
|
|
m_options.minInputHeight = _modelConfig.minInputHeight;
|
|
|
|
|
|
m_options.optInputHeight = _modelConfig.optInputHeight;
|
|
|
|
|
|
m_options.maxInputWidth = _modelConfig.maxInputWidth;
|
|
|
|
|
|
m_options.minInputWidth = _modelConfig.minInputWidth;
|
|
|
|
|
|
m_options.optInputWidth = _modelConfig.optInputWidth;
|
|
|
|
|
|
m_options.engineFileDir = _modelFolder;
|
|
|
|
|
|
m_options.precision = (_fp16 ? Precision::FP16 : Precision::FP32);
|
|
|
|
|
|
|
|
|
|
|
|
_modelFilePath = CreateFilePath(_modelFolder, modelFullName);
|
|
|
|
|
|
if (FileExist(_modelConfigFile)) {
|
|
|
|
|
|
ModelType modelType;
|
|
|
|
|
|
std::vector<int> inputShape;
|
|
|
|
|
|
_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
|
|
|
|
|
|
if (inputShape.size() == 2) {
|
|
|
|
|
|
if (inputShape[0] > 0) _modelConfig.inpHeight = inputShape[0];
|
|
|
|
|
|
if (inputShape[1] > 0) _modelConfig.inpWidth = inputShape[1];
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
_classFilePath = CreateFilePath(_modelFolder, className);
|
|
|
|
|
|
std::ifstream isValid(_classFilePath);
|
|
|
|
|
|
if (!isValid) LoadClassesFromString();
|
|
|
|
|
|
else LoadClassesFromFile();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
labelMap.clear();
|
|
|
|
|
|
if (!_classes.empty())
|
|
|
|
|
|
labelMap = VectorToCommaSeparatedString(_classes);
|
|
|
|
|
|
|
|
|
|
|
|
if (this->_loadEngineOnCreation) {
|
|
|
|
|
|
if (!m_trtEngine) {
|
|
|
|
|
|
m_poolKey = { _modelFilePath,
|
|
|
|
|
|
static_cast<int>(m_options.precision), m_options.maxBatchSize };
|
|
|
|
|
|
m_trtEngine = EnginePoolManager<float>::instance().acquire(
|
|
|
|
|
|
m_poolKey, m_options, _modelFilePath,
|
|
|
|
|
|
SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu);
|
|
|
|
|
|
m_usingSharedPool = (m_trtEngine != nullptr);
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!m_trtEngine) {
|
|
|
|
|
|
_logger.LogError("ANSRTYOLO::LoadModelFromFolder",
|
|
|
|
|
|
"Error: Unable to load TensorRT engine. " + _modelFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
_modelLoadValid = false;
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
m_options.maxBatchSize = m_trtEngine->getOptions().maxBatchSize;
|
|
|
|
|
|
m_options.optBatchSize = m_trtEngine->getOptions().optBatchSize;
|
|
|
|
|
|
m_trtEngine->warmUp();
|
|
|
|
|
|
}
|
|
|
|
|
|
_modelLoadValid = true;
|
|
|
|
|
|
_isInitialized = true;
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::LoadModelFromFolder", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// ANSODBase interface — Initialize
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
bool ANSRTYOLO::Initialize(std::string licenseKey, ModelConfig modelConfig,
|
|
|
|
|
|
const std::string& modelZipFilePath,
|
|
|
|
|
|
const std::string& modelZipPassword,
|
|
|
|
|
|
std::string& labelMap) {
|
|
|
|
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
|
|
|
|
try {
|
|
|
|
|
|
const bool engineAlreadyLoaded = _modelLoadValid && _isInitialized && m_trtEngine != nullptr;
|
|
|
|
|
|
_modelLoadValid = false;
|
|
|
|
|
|
_isFixedBatch = false;
|
|
|
|
|
|
bool result = ANSODBase::Initialize(licenseKey, modelConfig,
|
|
|
|
|
|
modelZipFilePath, modelZipPassword, labelMap);
|
|
|
|
|
|
if (!result) return false;
|
|
|
|
|
|
|
|
|
|
|
|
_modelConfig = modelConfig;
|
|
|
|
|
|
_modelConfig.modelType = ModelType::TENSORRT;
|
|
|
|
|
|
if (_modelConfig.inpHeight <= 0) _modelConfig.inpHeight = 640;
|
|
|
|
|
|
if (_modelConfig.inpWidth <= 0) _modelConfig.inpWidth = 640;
|
|
|
|
|
|
_modelConfig.precisionType = PrecisionType::FP32;
|
|
|
|
|
|
if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133)
|
|
|
|
|
|
_modelConfig.numKPS = 17;
|
|
|
|
|
|
if (_modelConfig.modelMNSThreshold < 0.2f) _modelConfig.modelMNSThreshold = 0.5f;
|
|
|
|
|
|
if (_modelConfig.modelConfThreshold < 0.2f) _modelConfig.modelConfThreshold = 0.5f;
|
|
|
|
|
|
if (_modelConfig.kpsThreshold <= 0) _modelConfig.kpsThreshold = 0.5f;
|
|
|
|
|
|
_fp16 = true;
|
|
|
|
|
|
|
|
|
|
|
|
TOP_K = 300;
|
|
|
|
|
|
SEG_CHANNELS = 32;
|
|
|
|
|
|
PROBABILITY_THRESHOLD = _modelConfig.detectionScoreThreshold;
|
|
|
|
|
|
NMS_THRESHOLD = _modelConfig.modelMNSThreshold;
|
|
|
|
|
|
SEGMENTATION_THRESHOLD = 0.5f;
|
|
|
|
|
|
SEG_H = 160;
|
|
|
|
|
|
SEG_W = 160;
|
|
|
|
|
|
NUM_KPS = _modelConfig.numKPS;
|
|
|
|
|
|
KPS_THRESHOLD = _modelConfig.kpsThreshold;
|
|
|
|
|
|
|
|
|
|
|
|
m_options.optBatchSize = _modelConfig.gpuOptBatchSize;
|
|
|
|
|
|
m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize;
|
|
|
|
|
|
m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
|
|
|
|
|
|
m_options.maxInputHeight = _modelConfig.maxInputHeight;
|
|
|
|
|
|
m_options.minInputHeight = _modelConfig.minInputHeight;
|
|
|
|
|
|
m_options.optInputHeight = _modelConfig.optInputHeight;
|
|
|
|
|
|
m_options.maxInputWidth = _modelConfig.maxInputWidth;
|
|
|
|
|
|
m_options.minInputWidth = _modelConfig.minInputWidth;
|
|
|
|
|
|
m_options.optInputWidth = _modelConfig.optInputWidth;
|
|
|
|
|
|
m_options.engineFileDir = _modelFolder;
|
|
|
|
|
|
m_options.precision = (_fp16 ? Precision::FP16 : Precision::FP32);
|
|
|
|
|
|
|
|
|
|
|
|
_modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx");
|
|
|
|
|
|
if (FileExist(_modelConfigFile)) {
|
|
|
|
|
|
ModelType modelType;
|
|
|
|
|
|
std::vector<int> inputShape;
|
|
|
|
|
|
_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
|
|
|
|
|
|
if (inputShape.size() == 2) {
|
|
|
|
|
|
if (inputShape[0] > 0) _modelConfig.inpHeight = inputShape[0];
|
|
|
|
|
|
if (inputShape[1] > 0) _modelConfig.inpWidth = inputShape[1];
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
_classFilePath = CreateFilePath(_modelFolder, "classes.names");
|
|
|
|
|
|
std::ifstream isValid(_classFilePath);
|
|
|
|
|
|
if (!isValid) LoadClassesFromString();
|
|
|
|
|
|
else LoadClassesFromFile();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
labelMap.clear();
|
|
|
|
|
|
if (!_classes.empty())
|
|
|
|
|
|
labelMap = VectorToCommaSeparatedString(_classes);
|
|
|
|
|
|
|
|
|
|
|
|
if (this->_loadEngineOnCreation && !engineAlreadyLoaded) {
|
|
|
|
|
|
if (!m_trtEngine) {
|
|
|
|
|
|
m_poolKey = { _modelFilePath,
|
|
|
|
|
|
static_cast<int>(m_options.precision), m_options.maxBatchSize };
|
|
|
|
|
|
m_trtEngine = EnginePoolManager<float>::instance().acquire(
|
|
|
|
|
|
m_poolKey, m_options, _modelFilePath,
|
|
|
|
|
|
SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu);
|
|
|
|
|
|
m_usingSharedPool = (m_trtEngine != nullptr);
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!m_trtEngine) {
|
|
|
|
|
|
_logger.LogError("ANSRTYOLO::Initialize",
|
|
|
|
|
|
"Error: Unable to load TensorRT engine. " + _modelFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
_modelLoadValid = false;
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
m_options.maxBatchSize = m_trtEngine->getOptions().maxBatchSize;
|
|
|
|
|
|
m_options.optBatchSize = m_trtEngine->getOptions().optBatchSize;
|
|
|
|
|
|
m_trtEngine->warmUp();
|
|
|
|
|
|
}
|
|
|
|
|
|
_modelLoadValid = true;
|
|
|
|
|
|
_isInitialized = true;
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::Initialize", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// RunInference / RunInferencesBatch / Destroy / Destructor
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
std::vector<Object> ANSRTYOLO::RunInference(const cv::Mat& inputImgBGR) {
|
|
|
|
|
|
return RunInference(inputImgBGR, "");
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<Object> ANSRTYOLO::RunInference(const cv::Mat& inputImgBGR,
|
|
|
|
|
|
const std::string& camera_id) {
|
|
|
|
|
|
{
|
|
|
|
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
|
|
|
|
if (!_modelLoadValid) {
|
|
|
|
|
|
_logger.LogError("ANSRTYOLO::RunInference", "Cannot load TensorRT model", __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!_licenseValid) {
|
|
|
|
|
|
_logger.LogError("ANSRTYOLO::RunInference", "Invalid license", __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!_isInitialized) {
|
|
|
|
|
|
_logger.LogError("ANSRTYOLO::RunInference", "Model not initialized", __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
if (inputImgBGR.empty() || inputImgBGR.cols < 10 || inputImgBGR.rows < 10)
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
try { return DetectObjects(inputImgBGR, camera_id); }
|
|
|
|
|
|
catch (const std::exception& e) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::RunInference", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<std::vector<Object>> ANSRTYOLO::RunInferencesBatch(
|
|
|
|
|
|
const std::vector<cv::Mat>& inputs, const std::string& camera_id) {
|
|
|
|
|
|
{
|
|
|
|
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
|
|
|
|
if (!_modelLoadValid) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::RunInferencesBatch", "Cannot load the TensorRT model", __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!_licenseValid) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::RunInferencesBatch", "Runtime license is not valid or expired", __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!_isInitialized) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::RunInferencesBatch", "Initialisation is not valid", __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
if (inputs.empty()) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::RunInferencesBatch", "Input images vector is empty", __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
try {
|
|
|
|
|
|
if (_isFixedBatch) return ANSODBase::RunInferencesBatch(inputs, camera_id);
|
|
|
|
|
|
else return DetectObjectsBatch(inputs, camera_id);
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (const std::exception& e) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::RunInferencesBatch", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
ANSRTYOLO::~ANSRTYOLO() {
|
|
|
|
|
|
try { Destroy(); }
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
_logger.LogError("ANSRTYOLO::~ANSRTYOLO()", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool ANSRTYOLO::Destroy() {
|
|
|
|
|
|
try {
|
|
|
|
|
|
if (m_usingSharedPool) {
|
|
|
|
|
|
// Release our reference to the shared pool.
|
|
|
|
|
|
// Pool is destroyed only when all tasks release it.
|
|
|
|
|
|
EnginePoolManager<float>::instance().release(m_poolKey);
|
|
|
|
|
|
m_trtEngine.reset(); // drop shared_ptr (pool may survive)
|
|
|
|
|
|
m_usingSharedPool = false;
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
m_trtEngine.reset();
|
|
|
|
|
|
}
|
|
|
|
|
|
m_nv12Helper.destroy();
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
_logger.LogError("ANSRTYOLO::Destroy()", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// GPU Preprocessing — single image (pinned-memory H2D path)
|
|
|
|
|
|
//
|
|
|
|
|
|
// 1. Copy raw host image into a pinned (page-locked) buffer
|
|
|
|
|
|
// 2. Upload from pinned memory → GPU (DMA, no staging copy)
|
|
|
|
|
|
// 3. BGR→RGB colour conversion on GPU
|
|
|
|
|
|
// 4. Letterbox resize on GPU (right-bottom pad)
|
|
|
|
|
|
//
|
|
|
|
|
|
// Pinned memory eliminates the internal pageable→pinned staging
|
|
|
|
|
|
// copy that CUDA performs for normal (pageable) host memory,
|
|
|
|
|
|
// cutting the H2D transfer of a 3840×2160 BGR frame (~24 MB)
|
|
|
|
|
|
// by 60-70%.
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
std::vector<std::vector<cv::cuda::GpuMat>> ANSRTYOLO::Preprocess(
|
|
|
|
|
|
const cv::Mat& inputImage, ImageMetadata& outMeta) {
|
|
|
|
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
|
|
|
|
try {
|
|
|
|
|
|
if (!_licenseValid) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::Preprocess", "Invalid license", __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
const auto& inputDims = m_trtEngine->getInputDims();
|
|
|
|
|
|
const int inputH = inputDims[0].d[1];
|
|
|
|
|
|
const int inputW = inputDims[0].d[2];
|
|
|
|
|
|
|
|
|
|
|
|
// Early-out if CUDA context is dead (sticky error from CUVID crash etc.)
|
|
|
|
|
|
if (!m_nv12Helper.isCudaContextHealthy(_logger, "ANSRTYOLO")) return {};
|
|
|
|
|
|
|
|
|
|
|
|
cv::cuda::Stream stream;
|
|
|
|
|
|
cv::cuda::GpuMat gpuImg;
|
|
|
|
|
|
|
|
|
|
|
|
// Resolve source Mat (handle grayscale → BGR on CPU first)
|
|
|
|
|
|
if (inputImage.channels() == 1) {
|
|
|
|
|
|
cv::Mat img3Channel;
|
|
|
|
|
|
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
|
|
|
|
|
|
gpuImg.upload(img3Channel, stream);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
gpuImg.upload(inputImage, stream);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// GPU: BGR → RGB
|
|
|
|
|
|
cv::cuda::GpuMat gpuRGB;
|
|
|
|
|
|
cv::cuda::cvtColor(gpuImg, gpuRGB, cv::COLOR_BGR2RGB, 0, stream);
|
|
|
|
|
|
|
|
|
|
|
|
outMeta.imgHeight = static_cast<float>(gpuRGB.rows);
|
|
|
|
|
|
outMeta.imgWidth = static_cast<float>(gpuRGB.cols);
|
|
|
|
|
|
|
|
|
|
|
|
if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
|
|
|
|
|
|
outMeta.ratio = 1.f / std::min(
|
|
|
|
|
|
inputDims[0].d[2] / static_cast<float>(gpuRGB.cols),
|
|
|
|
|
|
inputDims[0].d[1] / static_cast<float>(gpuRGB.rows));
|
|
|
|
|
|
|
|
|
|
|
|
// Check if model is classification (output ndims <= 2)
|
|
|
|
|
|
const auto& outputDims = m_trtEngine->getOutputDims();
|
|
|
|
|
|
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
|
|
|
|
|
|
|
|
|
|
|
|
cv::cuda::GpuMat gpuResized;
|
|
|
|
|
|
if (gpuRGB.rows != inputH || gpuRGB.cols != inputW) {
|
|
|
|
|
|
if (isClassification) {
|
|
|
|
|
|
// Classification: direct resize (no letterbox padding)
|
|
|
|
|
|
cv::cuda::resize(gpuRGB, gpuResized, cv::Size(inputW, inputH),
|
|
|
|
|
|
0, 0, cv::INTER_LINEAR, stream);
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
// Detection/Seg/Pose/OBB: letterbox resize + right-bottom pad (on GPU)
|
|
|
|
|
|
gpuResized = Engine<float>::resizeKeepAspectRatioPadRightBottom(
|
|
|
|
|
|
gpuRGB, inputH, inputW);
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
gpuResized = gpuRGB;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
stream.waitForCompletion();
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<cv::cuda::GpuMat> input{ std::move(gpuResized) };
|
|
|
|
|
|
std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) };
|
|
|
|
|
|
return inputs;
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::Preprocess",
|
|
|
|
|
|
"Image height or width is zero (Width: " + std::to_string(outMeta.imgWidth) +
|
|
|
|
|
|
", Height: " + std::to_string(outMeta.imgHeight) + ")", __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (const std::exception& e) {
|
|
|
|
|
|
_logger.LogWarn("ANSRTYOLO::Preprocess", std::string("Skipped frame: ") + e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#if 0 // PreprocessFromNV12 — moved to NV12PreprocessHelper::tryNV12()
|
|
|
|
|
|
try {
|
|
|
|
|
|
if (!gpuData || !gpuData->yPlane || !gpuData->uvPlane) {
|
|
|
|
|
|
if (!m_nv12NullLogged) {
|
|
|
|
|
|
m_nv12NullLogged = true;
|
|
|
|
|
|
_logger.LogWarn("ANSRTYOLO::PreprocessFromNV12",
|
|
|
|
|
|
"Early exit: null data — gpuData=" + std::to_string(gpuData != nullptr) +
|
|
|
|
|
|
" yPlane=" + std::to_string(gpuData ? (gpuData->yPlane != nullptr) : false) +
|
|
|
|
|
|
" uvPlane=" + std::to_string(gpuData ? (gpuData->uvPlane != nullptr) : false) +
|
|
|
|
|
|
" isCuda=" + std::to_string(gpuData ? gpuData->isCudaDevicePtr : false),
|
|
|
|
|
|
__FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const auto& inputDims = m_trtEngine->getInputDims();
|
|
|
|
|
|
const int inputH = inputDims[0].d[1];
|
|
|
|
|
|
const int inputW = inputDims[0].d[2];
|
|
|
|
|
|
const int frameW = gpuData->width;
|
|
|
|
|
|
const int frameH = gpuData->height;
|
|
|
|
|
|
|
|
|
|
|
|
if (frameW <= 0 || frameH <= 0) {
|
|
|
|
|
|
if (!m_nv12DimLogged) {
|
|
|
|
|
|
m_nv12DimLogged = true;
|
|
|
|
|
|
_logger.LogWarn("ANSRTYOLO::PreprocessFromNV12",
|
|
|
|
|
|
"Early exit: bad dimensions — w=" + std::to_string(frameW) +
|
|
|
|
|
|
" h=" + std::to_string(frameH),
|
|
|
|
|
|
__FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Early-out if CUDA context is dead (sticky error from CUVID crash etc.)
|
|
|
|
|
|
if (m_cudaContextDead) {
|
|
|
|
|
|
if (!m_nv12DeadLogged) {
|
|
|
|
|
|
m_nv12DeadLogged = true;
|
|
|
|
|
|
_logger.LogWarn("ANSRTYOLO::PreprocessFromNV12",
|
|
|
|
|
|
"Early exit: CUDA context dead",
|
|
|
|
|
|
__FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Cache flag before lock is released — gpuData may be invalidated after unlock
|
|
|
|
|
|
const bool isCudaDevice = gpuData->isCudaDevicePtr;
|
|
|
|
|
|
|
|
|
|
|
|
// ── GPU index validation for zero-copy ──
|
|
|
|
|
|
// NVDEC device pointers are only valid on the CUDA context that decoded them.
|
|
|
|
|
|
// If decode GPU != inference GPU, wrapping those pointers causes
|
|
|
|
|
|
// "illegal memory access" → sticky CUDA error → entire context dies.
|
|
|
|
|
|
// Fall back to CPU memcpy+upload path when GPUs don't match.
|
|
|
|
|
|
const int inferenceGpu = m_trtEngine ? m_trtEngine->getPreferredDeviceIndex() : 0;
|
|
|
|
|
|
const bool gpuMatch = !isCudaDevice ||
|
|
|
|
|
|
gpuData->gpuIndex < 0 || // unknown = trust it
|
|
|
|
|
|
gpuData->gpuIndex == inferenceGpu;
|
|
|
|
|
|
const bool useZeroCopy = isCudaDevice && gpuMatch;
|
|
|
|
|
|
|
|
|
|
|
|
// Local plane pointers — default to gpuData's primary planes.
|
|
|
|
|
|
// Overridden below for cross-GPU fallback (CPU NV12 instead of CUDA).
|
|
|
|
|
|
uint8_t* effYPlane = gpuData->yPlane;
|
|
|
|
|
|
uint8_t* effUvPlane = gpuData->uvPlane;
|
|
|
|
|
|
int effYLinesize = gpuData->yLinesize;
|
|
|
|
|
|
int effUvLinesize = gpuData->uvLinesize;
|
|
|
|
|
|
|
|
|
|
|
|
if (isCudaDevice && !gpuMatch) {
|
|
|
|
|
|
// Cross-GPU: NV12 decoded on one GPU, inference on another.
|
|
|
|
|
|
// CPU NV12 fallback uploads full decode-res NV12 (e.g. 3840x2160 = 12.4 MB)
|
|
|
|
|
|
// over PCIe, which is SLOWER than BGR at display-res (1920x1080 = 6.2 MB).
|
|
|
|
|
|
// Measured: CPU NV12 cross-GPU = 15-39ms preproc vs BGR = 10-20ms.
|
|
|
|
|
|
// Just fall back to BGR — it's faster for the cross-GPU case.
|
|
|
|
|
|
if (!m_gpuMismatchLogged) {
|
|
|
|
|
|
m_gpuMismatchLogged = true;
|
|
|
|
|
|
_logger.LogInfo("ANSRTYOLO::PreprocessFromNV12",
|
|
|
|
|
|
"GPU mismatch (decode GPU " + std::to_string(gpuData->gpuIndex) +
|
|
|
|
|
|
" vs inference GPU " + std::to_string(inferenceGpu) +
|
|
|
|
|
|
") — skipping NV12, using BGR (faster for cross-GPU: "
|
|
|
|
|
|
"BGR uploads " + std::to_string(displayW * displayH * 3 / 1024) +
|
|
|
|
|
|
"KB display-res vs NV12 " + std::to_string(frameW * frameH * 3 / 2 / 1024) +
|
|
|
|
|
|
"KB full-res)",
|
|
|
|
|
|
__FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
if (regLock.owns_lock()) regLock.unlock();
|
|
|
|
|
|
return {}; // caller will use Preprocess(BGR) instead
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Diagnostic: log which path will be taken (once per instance)
|
|
|
|
|
|
// Note: cross-GPU case already returned {} above, so reaching here
|
|
|
|
|
|
// means either CUDA zero-copy (same GPU) or CPU NV12 upload (non-CUDA).
|
|
|
|
|
|
if (!m_nv12PathLogged) {
|
|
|
|
|
|
m_nv12PathLogged = true;
|
|
|
|
|
|
const char* pathName = useZeroCopy ? "CUDA_ZERO_COPY"
|
|
|
|
|
|
: "CPU_NV12_UPLOAD";
|
|
|
|
|
|
_logger.LogInfo("ANSRTYOLO::PreprocessFromNV12",
|
|
|
|
|
|
std::string("Path: ") + pathName +
|
|
|
|
|
|
" | isCuda=" + std::to_string(isCudaDevice) +
|
|
|
|
|
|
" gpuMatch=" + std::to_string(gpuMatch) +
|
|
|
|
|
|
" decodeGpu=" + std::to_string(gpuData->gpuIndex) +
|
|
|
|
|
|
" infGpu=" + std::to_string(inferenceGpu) +
|
|
|
|
|
|
" frame=" + std::to_string(frameW) + "x" + std::to_string(frameH) +
|
|
|
|
|
|
" effYLine=" + std::to_string(effYLinesize) +
|
|
|
|
|
|
" effUvLine=" + std::to_string(effUvLinesize) +
|
|
|
|
|
|
" effYPtr=0x" + std::to_string(reinterpret_cast<uintptr_t>(effYPlane)) +
|
|
|
|
|
|
" hasCpuFallback=" + std::to_string(gpuData->cpuYPlane != nullptr),
|
|
|
|
|
|
__FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
cv::cuda::Stream stream;
|
|
|
|
|
|
cv::cuda::GpuMat gpuY, gpuUV;
|
|
|
|
|
|
|
|
|
|
|
|
if (useZeroCopy) {
|
|
|
|
|
|
// ── CUDA zero-copy: wrap NVDEC device pointers directly ──
|
|
|
|
|
|
// No memcpy, no device-to-device copy — data stays in NVDEC VRAM.
|
|
|
|
|
|
// The fused letterbox kernel samples only ~409K pixels from the 4K
|
|
|
|
|
|
// source (vs 8.3M full copy), completing in <1ms on RTX 5080.
|
|
|
|
|
|
// We hold the registry lock until the kernel finishes reading.
|
|
|
|
|
|
gpuY = cv::cuda::GpuMat(frameH, frameW, CV_8UC1,
|
|
|
|
|
|
effYPlane, static_cast<size_t>(effYLinesize));
|
|
|
|
|
|
gpuUV = cv::cuda::GpuMat(frameH / 2, frameW, CV_8UC1,
|
|
|
|
|
|
effUvPlane, static_cast<size_t>(effUvLinesize));
|
|
|
|
|
|
// Lock released after kernel completion (stream.waitForCompletion below)
|
|
|
|
|
|
} else {
|
|
|
|
|
|
// ── CPU path: memcpy + upload (fallback for D3D11VA / sw decode) ──
|
|
|
|
|
|
// Hold registry lock during memcpy so the AVFrame can't be freed
|
|
|
|
|
|
// by another thread calling gpu_frame_attach() on the same key.
|
|
|
|
|
|
const size_t ySize = static_cast<size_t>(frameW) * frameH;
|
|
|
|
|
|
const size_t uvSize = static_cast<size_t>(frameW) * frameH / 2;
|
|
|
|
|
|
const size_t nv12Size = ySize + uvSize;
|
|
|
|
|
|
ensurePinnedBuffer(nv12Size);
|
|
|
|
|
|
if (!m_pinnedBuf) {
|
|
|
|
|
|
if (!m_nv12PinnedLogged) {
|
|
|
|
|
|
m_nv12PinnedLogged = true;
|
|
|
|
|
|
_logger.LogWarn("ANSRTYOLO::PreprocessFromNV12",
|
|
|
|
|
|
"Early exit: pinned buffer alloc failed for " +
|
|
|
|
|
|
std::to_string(nv12Size) + " bytes",
|
|
|
|
|
|
__FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Validate NV12 plane pointers before memcpy
|
|
|
|
|
|
const size_t yBufNeeded = (effYLinesize == frameW)
|
|
|
|
|
|
? ySize
|
|
|
|
|
|
: static_cast<size_t>(effYLinesize) * frameH;
|
|
|
|
|
|
const size_t uvBufNeeded = (effUvLinesize == frameW)
|
|
|
|
|
|
? uvSize
|
|
|
|
|
|
: static_cast<size_t>(effUvLinesize) * (frameH / 2);
|
|
|
|
|
|
|
|
|
|
|
|
if (!isMemoryReadable(effYPlane, std::min(yBufNeeded, (size_t)4096)) ||
|
|
|
|
|
|
!isMemoryReadable(effUvPlane, std::min(uvBufNeeded, (size_t)4096))) {
|
|
|
|
|
|
_logger.LogWarn("ANSRTYOLO::PreprocessFromNV12",
|
|
|
|
|
|
"NV12 plane pointers not readable! yPlane=0x" +
|
|
|
|
|
|
std::to_string(reinterpret_cast<uintptr_t>(effYPlane)) +
|
|
|
|
|
|
" uvPlane=0x" +
|
|
|
|
|
|
std::to_string(reinterpret_cast<uintptr_t>(effUvPlane)) +
|
|
|
|
|
|
" yLinesize=" + std::to_string(effYLinesize) +
|
|
|
|
|
|
" uvLinesize=" + std::to_string(effUvLinesize) +
|
|
|
|
|
|
" w=" + std::to_string(frameW) + " h=" + std::to_string(frameH),
|
|
|
|
|
|
__FILE__, __LINE__);
|
|
|
|
|
|
if (regLock.owns_lock()) regLock.unlock();
|
|
|
|
|
|
return {}; // fall back to BGR
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
uint8_t* dst = static_cast<uint8_t*>(m_pinnedBuf);
|
|
|
|
|
|
bool cpyOk = true;
|
|
|
|
|
|
if (effYLinesize == frameW) {
|
|
|
|
|
|
cpyOk = safeMemcpy(dst, effYPlane, ySize);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
for (int row = 0; row < frameH && cpyOk; row++)
|
|
|
|
|
|
cpyOk = safeMemcpy(dst + row * frameW,
|
|
|
|
|
|
effYPlane + row * effYLinesize, frameW);
|
|
|
|
|
|
}
|
|
|
|
|
|
if (cpyOk) {
|
|
|
|
|
|
uint8_t* uvDst = dst + ySize;
|
|
|
|
|
|
if (effUvLinesize == frameW) {
|
|
|
|
|
|
cpyOk = safeMemcpy(uvDst, effUvPlane, uvSize);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
for (int row = 0; row < frameH / 2 && cpyOk; row++)
|
|
|
|
|
|
cpyOk = safeMemcpy(uvDst + row * frameW,
|
|
|
|
|
|
effUvPlane + row * effUvLinesize, frameW);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (!cpyOk) {
|
|
|
|
|
|
_logger.LogWarn("ANSRTYOLO::PreprocessFromNV12",
|
|
|
|
|
|
"Access violation during NV12 memcpy! Falling back to BGR. "
|
|
|
|
|
|
"yPlane=0x" + std::to_string(reinterpret_cast<uintptr_t>(effYPlane)) +
|
|
|
|
|
|
" uvPlane=0x" + std::to_string(reinterpret_cast<uintptr_t>(effUvPlane)) +
|
|
|
|
|
|
" yLinesize=" + std::to_string(effYLinesize) +
|
|
|
|
|
|
" uvLinesize=" + std::to_string(effUvLinesize) +
|
|
|
|
|
|
" w=" + std::to_string(frameW) + " h=" + std::to_string(frameH) +
|
|
|
|
|
|
" avframe=0x" + std::to_string(reinterpret_cast<uintptr_t>(gpuData->avframe)),
|
|
|
|
|
|
__FILE__, __LINE__);
|
|
|
|
|
|
if (regLock.owns_lock()) regLock.unlock();
|
|
|
|
|
|
return {}; // fall back to BGR
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// NV12 data safely in pinned memory — release registry lock.
|
|
|
|
|
|
// From here on we only read from m_pinnedBuf, not from gpuData.
|
|
|
|
|
|
if (regLock.owns_lock()) regLock.unlock();
|
|
|
|
|
|
|
|
|
|
|
|
cv::Mat pinnedY(frameH, frameW, CV_8UC1, m_pinnedBuf);
|
|
|
|
|
|
cv::Mat pinnedUV(frameH / 2, frameW, CV_8UC1,
|
|
|
|
|
|
static_cast<uint8_t*>(m_pinnedBuf) + ySize);
|
|
|
|
|
|
gpuY.upload(pinnedY, stream);
|
|
|
|
|
|
gpuUV.upload(pinnedUV, stream);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Use display dimensions for coordinate mapping so postprocessed
|
|
|
|
|
|
// bboxes map to the display image (1080p), not the NV12 source (4K).
|
|
|
|
|
|
const float metaW = (displayW > 0) ? static_cast<float>(displayW) : static_cast<float>(frameW);
|
|
|
|
|
|
const float metaH = (displayH > 0) ? static_cast<float>(displayH) : static_cast<float>(frameH);
|
|
|
|
|
|
outMeta.imgWidth = metaW;
|
|
|
|
|
|
outMeta.imgHeight = metaH;
|
|
|
|
|
|
|
|
|
|
|
|
if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
|
|
|
|
|
|
outMeta.ratio = 1.f / std::min(
|
|
|
|
|
|
inputDims[0].d[2] / metaW,
|
|
|
|
|
|
inputDims[0].d[1] / metaH);
|
|
|
|
|
|
|
|
|
|
|
|
const auto& outputDims = m_trtEngine->getOutputDims();
|
|
|
|
|
|
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
|
|
|
|
|
|
|
|
|
|
|
|
cudaStream_t rawStream = cv::cuda::StreamAccessor::getStream(stream);
|
|
|
|
|
|
cv::cuda::GpuMat gpuResized;
|
|
|
|
|
|
|
|
|
|
|
|
if (isClassification) {
|
|
|
|
|
|
// Classification: NV12→RGB at full resolution, then simple resize
|
|
|
|
|
|
cv::cuda::GpuMat gpuRGB(frameH, frameW, CV_8UC3);
|
|
|
|
|
|
launchNV12ToRGB(
|
|
|
|
|
|
gpuY.ptr<uint8_t>(), static_cast<int>(gpuY.step),
|
|
|
|
|
|
gpuUV.ptr<uint8_t>(), static_cast<int>(gpuUV.step),
|
|
|
|
|
|
gpuRGB.ptr<uint8_t>(), static_cast<int>(gpuRGB.step),
|
|
|
|
|
|
frameW, frameH, rawStream);
|
|
|
|
|
|
cv::cuda::resize(gpuRGB, gpuResized, cv::Size(inputW, inputH),
|
|
|
|
|
|
0, 0, cv::INTER_LINEAR, stream);
|
|
|
|
|
|
} else if (frameW == inputW && frameH == inputH) {
|
|
|
|
|
|
// Source matches model input — direct NV12→RGB, no resize needed
|
|
|
|
|
|
gpuResized.create(inputH, inputW, CV_8UC3);
|
|
|
|
|
|
launchNV12ToRGB(
|
|
|
|
|
|
gpuY.ptr<uint8_t>(), static_cast<int>(gpuY.step),
|
|
|
|
|
|
gpuUV.ptr<uint8_t>(), static_cast<int>(gpuUV.step),
|
|
|
|
|
|
gpuResized.ptr<uint8_t>(), static_cast<int>(gpuResized.step),
|
|
|
|
|
|
frameW, frameH, rawStream);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
// Detection: fused NV12→RGB + letterbox in a SINGLE kernel at
|
|
|
|
|
|
// output resolution (e.g. 640×640). This avoids the 24MB 4K RGB
|
|
|
|
|
|
// intermediate and processes 20× fewer pixels than separate
|
|
|
|
|
|
// convert + resize for 4K→640 downscale.
|
|
|
|
|
|
float r = std::min(static_cast<float>(inputW) / frameW,
|
|
|
|
|
|
static_cast<float>(inputH) / frameH);
|
|
|
|
|
|
int unpadW = static_cast<int>(r * frameW);
|
|
|
|
|
|
int unpadH = static_cast<int>(r * frameH);
|
|
|
|
|
|
float invScale = 1.0f / r; // maps output coords → source coords
|
|
|
|
|
|
|
|
|
|
|
|
gpuResized.create(inputH, inputW, CV_8UC3);
|
|
|
|
|
|
launchNV12ToRGBLetterbox(
|
|
|
|
|
|
gpuY.ptr<uint8_t>(), static_cast<int>(gpuY.step),
|
|
|
|
|
|
gpuUV.ptr<uint8_t>(), static_cast<int>(gpuUV.step),
|
|
|
|
|
|
gpuResized.ptr<uint8_t>(), static_cast<int>(gpuResized.step),
|
|
|
|
|
|
inputW, inputH,
|
|
|
|
|
|
frameW, frameH,
|
|
|
|
|
|
unpadW, unpadH,
|
|
|
|
|
|
invScale, rawStream);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
stream.waitForCompletion();
|
|
|
|
|
|
|
|
|
|
|
|
// Release registry lock now that kernel is done reading NVDEC pointers
|
|
|
|
|
|
if (regLock.owns_lock()) regLock.unlock();
|
|
|
|
|
|
|
|
|
|
|
|
// Log NV12 fast-path usage once per instance
|
|
|
|
|
|
if (!m_nv12ActiveLogged) {
|
|
|
|
|
|
m_nv12ActiveLogged = true;
|
|
|
|
|
|
const char* mode = useZeroCopy ? "CUDA zero-copy" : "CPU upload";
|
|
|
|
|
|
const char* kernel = isClassification ? "separate" : "FUSED letterbox";
|
|
|
|
|
|
_logger.LogInfo("ANSRTYOLO::PreprocessFromNV12",
|
|
|
|
|
|
std::string(mode) + " ACTIVE (" + kernel + "): " +
|
|
|
|
|
|
std::to_string(frameW) + "x" + std::to_string(frameH) +
|
|
|
|
|
|
" NV12 -> " + std::to_string(inputW) + "x" + std::to_string(inputH) +
|
|
|
|
|
|
" display=" + std::to_string(displayW) + "x" + std::to_string(displayH),
|
|
|
|
|
|
__FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<cv::cuda::GpuMat> input{ std::move(gpuResized) };
|
|
|
|
|
|
std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) };
|
|
|
|
|
|
return inputs;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
if (!m_nv12MetaLogged) {
|
|
|
|
|
|
m_nv12MetaLogged = true;
|
|
|
|
|
|
_logger.LogWarn("ANSRTYOLO::PreprocessFromNV12",
|
|
|
|
|
|
"Early exit: metadata dims invalid — metaW=" +
|
|
|
|
|
|
std::to_string(outMeta.imgWidth) + " metaH=" +
|
|
|
|
|
|
std::to_string(outMeta.imgHeight) +
|
|
|
|
|
|
" displayW=" + std::to_string(displayW) +
|
|
|
|
|
|
" displayH=" + std::to_string(displayH),
|
|
|
|
|
|
__FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (const std::exception& e) {
|
|
|
|
|
|
_logger.LogWarn("ANSRTYOLO::PreprocessFromNV12",
|
|
|
|
|
|
std::string("NV12 fast path failed, falling back to BGR: ") + e.what(),
|
|
|
|
|
|
__FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
#endif // PreprocessFromNV12 moved to NV12PreprocessHelper
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// GPU Preprocessing — batch
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
std::vector<std::vector<cv::cuda::GpuMat>> ANSRTYOLO::PreprocessBatch(
|
|
|
|
|
|
const std::vector<cv::Mat>& inputImages, BatchMetadata& outMetadata) {
|
|
|
|
|
|
if (!_licenseValid) {
|
|
|
|
|
|
_logger.LogError("ANSRTYOLO::PreprocessBatch", "Invalid license", __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
if (inputImages.empty()) {
|
|
|
|
|
|
_logger.LogError("ANSRTYOLO::PreprocessBatch", "Empty input images vector", __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!m_nv12Helper.isCudaContextHealthy(_logger, "ANSRTYOLO")) return {};
|
|
|
|
|
|
try {
|
|
|
|
|
|
const auto& inputDims = m_trtEngine->getInputDims();
|
|
|
|
|
|
if (inputDims.empty()) {
|
|
|
|
|
|
_logger.LogError("ANSRTYOLO::PreprocessBatch", "No input dimensions available", __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
const int inputH = inputDims[0].d[1];
|
|
|
|
|
|
const int inputW = inputDims[0].d[2];
|
|
|
|
|
|
if (inputH <= 0 || inputW <= 0) {
|
|
|
|
|
|
_logger.LogError("ANSRTYOLO::PreprocessBatch", "Invalid model input dimensions", __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
outMetadata.imgHeights.resize(inputImages.size());
|
|
|
|
|
|
outMetadata.imgWidths.resize(inputImages.size());
|
|
|
|
|
|
outMetadata.ratios.resize(inputImages.size());
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<cv::cuda::GpuMat> batchProcessed;
|
|
|
|
|
|
batchProcessed.reserve(inputImages.size());
|
|
|
|
|
|
cv::cuda::Stream stream;
|
|
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < inputImages.size(); ++i) {
|
|
|
|
|
|
const auto& inputImage = inputImages[i];
|
|
|
|
|
|
if (inputImage.empty()) {
|
|
|
|
|
|
_logger.LogError("ANSRTYOLO::PreprocessBatch",
|
|
|
|
|
|
"Empty input image at index " + std::to_string(i), __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
cv::cuda::GpuMat img;
|
|
|
|
|
|
if (inputImage.channels() == 1) {
|
|
|
|
|
|
cv::Mat img3Channel;
|
|
|
|
|
|
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
|
|
|
|
|
|
img.upload(img3Channel, stream);
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (inputImage.channels() == 3) {
|
|
|
|
|
|
img.upload(inputImage, stream);
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
_logger.LogError("ANSRTYOLO::PreprocessBatch",
|
|
|
|
|
|
"Unsupported channel count at index " + std::to_string(i), __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
cv::cuda::GpuMat imgRGB;
|
|
|
|
|
|
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
|
|
|
|
|
|
|
|
|
|
|
|
outMetadata.imgHeights[i] = imgRGB.rows;
|
|
|
|
|
|
outMetadata.imgWidths[i] = imgRGB.cols;
|
|
|
|
|
|
if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) {
|
|
|
|
|
|
_logger.LogError("ANSRTYOLO::PreprocessBatch",
|
|
|
|
|
|
"Invalid dimensions for image " + std::to_string(i), __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const auto& outputDims = m_trtEngine->getOutputDims();
|
|
|
|
|
|
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
|
|
|
|
|
|
|
|
|
|
|
|
const float scaleW = inputW / static_cast<float>(imgRGB.cols);
|
|
|
|
|
|
const float scaleH = inputH / static_cast<float>(imgRGB.rows);
|
|
|
|
|
|
outMetadata.ratios[i] = isClassification ? 1.f : 1.f / std::min(scaleW, scaleH);
|
|
|
|
|
|
|
|
|
|
|
|
cv::cuda::GpuMat resized;
|
|
|
|
|
|
if (imgRGB.rows != inputH || imgRGB.cols != inputW) {
|
|
|
|
|
|
if (isClassification) {
|
|
|
|
|
|
cv::cuda::resize(imgRGB, resized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR, stream);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputH, inputW);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
resized = imgRGB;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
batchProcessed.push_back(std::move(resized));
|
|
|
|
|
|
}
|
|
|
|
|
|
stream.waitForCompletion();
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<std::vector<cv::cuda::GpuMat>> inputs;
|
|
|
|
|
|
inputs.push_back(std::move(batchProcessed));
|
|
|
|
|
|
return inputs;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (const std::exception& e) {
|
|
|
|
|
|
_logger.LogWarn("ANSRTYOLO::PreprocessBatch", std::string("Skipped batch: ") + e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// OBB NMS helpers (Prob-IoU based) — static methods
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
void ANSRTYOLO::getCovarianceComponents(const OrientedBox& box,
|
|
|
|
|
|
float& out1, float& out2, float& out3) {
|
|
|
|
|
|
if (box.width <= 0.f || box.height <= 0.f) {
|
|
|
|
|
|
out1 = out2 = out3 = 0.f;
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
const float vw = (box.width * box.width) / 12.0f;
|
|
|
|
|
|
const float vh = (box.height * box.height) / 12.0f;
|
|
|
|
|
|
const float cosT = std::cos(box.angle);
|
|
|
|
|
|
const float sinT = std::sin(box.angle);
|
|
|
|
|
|
const float cos2 = cosT * cosT;
|
|
|
|
|
|
const float sin2 = sinT * sinT;
|
|
|
|
|
|
const float sc = sinT * cosT;
|
|
|
|
|
|
out1 = vw * cos2 + vh * sin2;
|
|
|
|
|
|
out2 = vw * sin2 + vh * cos2;
|
|
|
|
|
|
out3 = (vw - vh) * sc;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<std::vector<float>> ANSRTYOLO::batchProbiou(
|
|
|
|
|
|
const std::vector<OrientedBox>& obb1,
|
|
|
|
|
|
const std::vector<OrientedBox>& obb2, float eps) {
|
|
|
|
|
|
if (obb1.empty() || obb2.empty()) return {};
|
|
|
|
|
|
const size_t n1 = obb1.size(), n2 = obb2.size();
|
|
|
|
|
|
std::vector<std::vector<float>> iouMat(n1, std::vector<float>(n2, 0.f));
|
|
|
|
|
|
|
|
|
|
|
|
struct CovData { float x, y, a, b, c; };
|
|
|
|
|
|
std::vector<CovData> cov1(n1);
|
|
|
|
|
|
for (size_t i = 0; i < n1; ++i) {
|
|
|
|
|
|
float a, b, c;
|
|
|
|
|
|
getCovarianceComponents(obb1[i], a, b, c);
|
|
|
|
|
|
cov1[i] = { obb1[i].x, obb1[i].y, a, b, c };
|
|
|
|
|
|
}
|
|
|
|
|
|
for (size_t i = 0; i < n1; ++i) {
|
|
|
|
|
|
for (size_t j = 0; j < n2; ++j) {
|
|
|
|
|
|
float a2, b2, c2;
|
|
|
|
|
|
getCovarianceComponents(obb2[j], a2, b2, c2);
|
|
|
|
|
|
float dx = cov1[i].x - obb2[j].x;
|
|
|
|
|
|
float dy = cov1[i].y - obb2[j].y;
|
|
|
|
|
|
float sA = cov1[i].a + a2, sB = cov1[i].b + b2, sC = cov1[i].c + c2;
|
|
|
|
|
|
float denom = sA * sB - sC * sC + eps;
|
|
|
|
|
|
if (denom <= eps) continue;
|
|
|
|
|
|
float t1 = ((sA*dy*dy + sB*dx*dx) * 0.25f) / denom;
|
|
|
|
|
|
float t2 = ((sC*dx*dy) * -0.5f) / denom;
|
|
|
|
|
|
float d1 = cov1[i].a*cov1[i].b - cov1[i].c*cov1[i].c;
|
|
|
|
|
|
float d2 = a2*b2 - c2*c2;
|
|
|
|
|
|
float sqrtDet = std::sqrt(std::max(d1, 0.f) * std::max(d2, 0.f) + eps);
|
|
|
|
|
|
float t3 = 0.5f * std::log((sA*sB - sC*sC) / (4.f*sqrtDet) + eps);
|
|
|
|
|
|
float bd = std::clamp(t1 + t2 + t3, eps, 100.f);
|
|
|
|
|
|
float hd = std::sqrt(1.f - std::exp(-bd) + eps);
|
|
|
|
|
|
iouMat[i][j] = 1.f - hd;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return iouMat;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<int> ANSRTYOLO::nmsRotatedImpl(
|
|
|
|
|
|
const std::vector<OrientedBox>& sortedBoxes, float iouThreshold) {
|
|
|
|
|
|
if (sortedBoxes.empty()) return {};
|
|
|
|
|
|
if (sortedBoxes.size() == 1) return { 0 };
|
|
|
|
|
|
auto iouMat = batchProbiou(sortedBoxes, sortedBoxes);
|
|
|
|
|
|
if (iouMat.empty()) return {};
|
|
|
|
|
|
const int n = static_cast<int>(sortedBoxes.size());
|
|
|
|
|
|
std::vector<int> keep;
|
|
|
|
|
|
keep.reserve(n / 2);
|
|
|
|
|
|
for (int j = 0; j < n; ++j) {
|
|
|
|
|
|
bool shouldKeep = true;
|
|
|
|
|
|
for (int i = 0; i < j; ++i) {
|
|
|
|
|
|
if (iouMat[i][j] >= iouThreshold) { shouldKeep = false; break; }
|
|
|
|
|
|
}
|
|
|
|
|
|
if (shouldKeep) keep.push_back(j);
|
|
|
|
|
|
}
|
|
|
|
|
|
return keep;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<int> ANSRTYOLO::nmsRotated(
|
|
|
|
|
|
const std::vector<OrientedBox>& boxes,
|
|
|
|
|
|
const std::vector<float>& scores, float iouThreshold) {
|
|
|
|
|
|
if (boxes.empty() || scores.empty() || boxes.size() != scores.size()) return {};
|
|
|
|
|
|
std::vector<int> sortedIdx(boxes.size());
|
|
|
|
|
|
std::iota(sortedIdx.begin(), sortedIdx.end(), 0);
|
|
|
|
|
|
std::sort(sortedIdx.begin(), sortedIdx.end(),
|
|
|
|
|
|
[&](int a, int b) { return scores[a] > scores[b]; });
|
|
|
|
|
|
std::vector<OrientedBox> sortedBoxes;
|
|
|
|
|
|
sortedBoxes.reserve(boxes.size());
|
|
|
|
|
|
for (int i : sortedIdx) sortedBoxes.push_back(boxes[i]);
|
|
|
|
|
|
auto keepSorted = nmsRotatedImpl(sortedBoxes, iouThreshold);
|
|
|
|
|
|
std::vector<int> keepOrig;
|
|
|
|
|
|
keepOrig.reserve(keepSorted.size());
|
|
|
|
|
|
for (int si : keepSorted) keepOrig.push_back(sortedIdx[si]);
|
|
|
|
|
|
return keepOrig;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<cv::Point2f> ANSRTYOLO::OBBToPoints(const OrientedBox& obb) {
|
|
|
|
|
|
float angleDeg = obb.angle * 180.0f / static_cast<float>(CV_PI);
|
|
|
|
|
|
cv::RotatedRect rr(cv::Point2f(obb.x, obb.y),
|
|
|
|
|
|
cv::Size2f(obb.width, obb.height), angleDeg);
|
|
|
|
|
|
std::vector<cv::Point2f> corners(4);
|
|
|
|
|
|
rr.points(corners.data());
|
|
|
|
|
|
return corners;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// Detection — legacy postprocess
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
std::vector<Object> ANSRTYOLO::PostprocessDetection(
|
|
|
|
|
|
std::vector<float>& featureVector,
|
|
|
|
|
|
const std::string& camera_id, const ImageMetadata& meta) {
|
|
|
|
|
|
try {
|
|
|
|
|
|
const auto& outputDims = m_trtEngine->getOutputDims();
|
|
|
|
|
|
auto numChannels = outputDims[0].d[1];
|
|
|
|
|
|
auto numAnchors = outputDims[0].d[2];
|
|
|
|
|
|
// Derive numClasses from tensor shape (4 box coords subtracted)
|
|
|
|
|
|
// rather than _classes.size() which may not match the model
|
|
|
|
|
|
auto numClasses = static_cast<size_t>(numChannels - 4);
|
|
|
|
|
|
if (!_classes.empty() && _classes.size() <= static_cast<size_t>(numChannels - 4))
|
|
|
|
|
|
numClasses = _classes.size();
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<cv::Rect> bboxes;
|
|
|
|
|
|
std::vector<float> scores;
|
|
|
|
|
|
std::vector<int> labels;
|
|
|
|
|
|
std::vector<int> indices;
|
|
|
|
|
|
|
|
|
|
|
|
cv::Mat output = cv::Mat(numChannels, numAnchors, CV_32F, featureVector.data());
|
|
|
|
|
|
output = output.t();
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < numAnchors; i++) {
|
|
|
|
|
|
auto rowPtr = output.row(i).ptr<float>();
|
|
|
|
|
|
auto bboxesPtr = rowPtr;
|
|
|
|
|
|
auto scoresPtr = rowPtr + 4;
|
|
|
|
|
|
auto maxSPtr = std::max_element(scoresPtr, scoresPtr + numClasses);
|
|
|
|
|
|
float score = *maxSPtr;
|
|
|
|
|
|
if (score > _modelConfig.detectionScoreThreshold) {
|
|
|
|
|
|
float x = *bboxesPtr++;
|
|
|
|
|
|
float y = *bboxesPtr++;
|
|
|
|
|
|
float w = *bboxesPtr++;
|
|
|
|
|
|
float h = *bboxesPtr;
|
|
|
|
|
|
float x0 = std::clamp((x - 0.5f * w) * meta.ratio, 0.f, meta.imgWidth);
|
|
|
|
|
|
float y0 = std::clamp((y - 0.5f * h) * meta.ratio, 0.f, meta.imgHeight);
|
|
|
|
|
|
float x1 = std::clamp((x + 0.5f * w) * meta.ratio, 0.f, meta.imgWidth);
|
|
|
|
|
|
float y1 = std::clamp((y + 0.5f * h) * meta.ratio, 0.f, meta.imgHeight);
|
|
|
|
|
|
int label = static_cast<int>(maxSPtr - scoresPtr);
|
|
|
|
|
|
cv::Rect_<float> bbox;
|
|
|
|
|
|
bbox.x = x0; bbox.y = y0;
|
|
|
|
|
|
bbox.width = x1 - x0; bbox.height = y1 - y0;
|
|
|
|
|
|
bbox.x = std::max(0.f, bbox.x);
|
|
|
|
|
|
bbox.y = std::max(0.f, bbox.y);
|
|
|
|
|
|
bbox.width = std::min(meta.imgWidth - bbox.x, bbox.width);
|
|
|
|
|
|
bbox.height = std::min(meta.imgHeight - bbox.y, bbox.height);
|
|
|
|
|
|
bboxes.push_back(bbox);
|
|
|
|
|
|
labels.push_back(label);
|
|
|
|
|
|
scores.push_back(score);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
cv::dnn::NMSBoxesBatched(bboxes, scores, labels, PROBABILITY_THRESHOLD, NMS_THRESHOLD, indices);
|
|
|
|
|
|
std::vector<Object> objects;
|
|
|
|
|
|
int classNameSize = static_cast<int>(_classes.size());
|
|
|
|
|
|
for (auto& chosenIdx : indices) {
|
|
|
|
|
|
if (scores[chosenIdx] > _modelConfig.detectionScoreThreshold) {
|
|
|
|
|
|
Object obj{};
|
|
|
|
|
|
obj.confidence = scores[chosenIdx];
|
|
|
|
|
|
obj.classId = labels[chosenIdx];
|
|
|
|
|
|
obj.box = bboxes[chosenIdx];
|
|
|
|
|
|
//obj.polygon = ANSUtilityHelper::RectToNormalizedPolygon(obj.box, meta.imgWidth, meta.imgHeight);
|
|
|
|
|
|
if (!_classes.empty()) {
|
|
|
|
|
|
obj.className = (obj.classId < classNameSize)
|
|
|
|
|
|
? _classes[obj.classId] : _classes[classNameSize - 1];
|
|
|
|
|
|
}
|
|
|
|
|
|
else { obj.className = "Unknown"; }
|
|
|
|
|
|
obj.cameraId = camera_id;
|
|
|
|
|
|
objects.push_back(obj);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return objects;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::PostprocessDetection", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// Detection — end2end postprocess
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
std::vector<Object> ANSRTYOLO::PostprocessDetectionE2E(
|
|
|
|
|
|
std::vector<float>& featureVector,
|
|
|
|
|
|
const std::string& camera_id, const ImageMetadata& meta) {
|
|
|
|
|
|
try {
|
|
|
|
|
|
const auto& outputDims = m_trtEngine->getOutputDims();
|
|
|
|
|
|
int numDets = outputDims[0].d[1];
|
|
|
|
|
|
int numFeat = outputDims[0].d[2]; // 6: x1,y1,x2,y2,conf,classId
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<Object> results;
|
|
|
|
|
|
results.reserve(numDets);
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < numDets; ++i) {
|
|
|
|
|
|
const float* det = featureVector.data() + i * numFeat;
|
|
|
|
|
|
float conf = det[4];
|
|
|
|
|
|
if (conf <= _modelConfig.detectionScoreThreshold) continue;
|
|
|
|
|
|
|
|
|
|
|
|
int classId = static_cast<int>(det[5]);
|
|
|
|
|
|
// Scale from model input space to original image
|
|
|
|
|
|
float x0 = std::clamp(det[0] * meta.ratio, 0.f, meta.imgWidth);
|
|
|
|
|
|
float y0 = std::clamp(det[1] * meta.ratio, 0.f, meta.imgHeight);
|
|
|
|
|
|
float x1 = std::clamp(det[2] * meta.ratio, 0.f, meta.imgWidth);
|
|
|
|
|
|
float y1 = std::clamp(det[3] * meta.ratio, 0.f, meta.imgHeight);
|
|
|
|
|
|
float w = x1 - x0, h = y1 - y0;
|
|
|
|
|
|
if (w < 1.f || h < 1.f) continue;
|
|
|
|
|
|
|
|
|
|
|
|
Object obj;
|
|
|
|
|
|
obj.classId = classId;
|
|
|
|
|
|
obj.confidence = conf;
|
|
|
|
|
|
obj.box = cv::Rect(static_cast<int>(x0), static_cast<int>(y0),
|
|
|
|
|
|
static_cast<int>(w), static_cast<int>(h));
|
|
|
|
|
|
//obj.polygon = ANSUtilityHelper::RectToNormalizedPolygon(obj.box, meta.imgWidth, meta.imgHeight);
|
|
|
|
|
|
int classNameSize = static_cast<int>(_classes.size());
|
|
|
|
|
|
if (!_classes.empty() && classId >= 0 && classId < classNameSize)
|
|
|
|
|
|
obj.className = _classes[classId];
|
|
|
|
|
|
obj.cameraId = camera_id;
|
|
|
|
|
|
results.push_back(std::move(obj));
|
|
|
|
|
|
}
|
|
|
|
|
|
return results;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::PostprocessDetectionE2E", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// OBB — legacy postprocess
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
std::vector<Object> ANSRTYOLO::PostprocessOBB(
|
|
|
|
|
|
std::vector<float>& featureVector,
|
|
|
|
|
|
const std::string& camera_id, const ImageMetadata& meta) {
|
|
|
|
|
|
try {
|
|
|
|
|
|
const auto& outputDims = m_trtEngine->getOutputDims();
|
|
|
|
|
|
int numChannels = outputDims[0].d[1];
|
|
|
|
|
|
int numAnchors = outputDims[0].d[2];
|
|
|
|
|
|
int numClasses = numChannels - 5; // 4 box + nc scores + 1 angle
|
|
|
|
|
|
if (numClasses <= 0) return {};
|
|
|
|
|
|
|
|
|
|
|
|
cv::Mat output = cv::Mat(numChannels, numAnchors, CV_32F, featureVector.data()).t();
|
|
|
|
|
|
|
|
|
|
|
|
struct OBBCandidate {
|
|
|
|
|
|
OrientedBox box;
|
|
|
|
|
|
float conf;
|
|
|
|
|
|
int classId;
|
|
|
|
|
|
};
|
|
|
|
|
|
std::vector<OBBCandidate> candidates;
|
|
|
|
|
|
candidates.reserve(numAnchors);
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < numAnchors; ++i) {
|
|
|
|
|
|
const float* row = output.ptr<float>(i);
|
|
|
|
|
|
const float* scoresPtr = row + 4;
|
|
|
|
|
|
float maxScore = -FLT_MAX;
|
|
|
|
|
|
int bestClass = -1;
|
|
|
|
|
|
for (int c = 0; c < numClasses; ++c) {
|
|
|
|
|
|
if (scoresPtr[c] > maxScore) { maxScore = scoresPtr[c]; bestClass = c; }
|
|
|
|
|
|
}
|
|
|
|
|
|
if (maxScore <= _modelConfig.detectionScoreThreshold) continue;
|
|
|
|
|
|
|
|
|
|
|
|
float angle = row[4 + numClasses];
|
|
|
|
|
|
float cx = row[0] * meta.ratio;
|
|
|
|
|
|
float cy = row[1] * meta.ratio;
|
|
|
|
|
|
float bw = row[2] * meta.ratio;
|
|
|
|
|
|
float bh = row[3] * meta.ratio;
|
|
|
|
|
|
cx = std::clamp(cx, 0.f, meta.imgWidth);
|
|
|
|
|
|
cy = std::clamp(cy, 0.f, meta.imgHeight);
|
|
|
|
|
|
|
|
|
|
|
|
candidates.push_back({ { cx, cy, bw, bh, angle }, maxScore, bestClass });
|
|
|
|
|
|
}
|
|
|
|
|
|
if (candidates.empty()) return {};
|
|
|
|
|
|
|
|
|
|
|
|
// Prob-IoU NMS
|
|
|
|
|
|
std::vector<OrientedBox> boxes;
|
|
|
|
|
|
std::vector<float> scores;
|
|
|
|
|
|
boxes.reserve(candidates.size());
|
|
|
|
|
|
scores.reserve(candidates.size());
|
|
|
|
|
|
for (const auto& c : candidates) { boxes.push_back(c.box); scores.push_back(c.conf); }
|
|
|
|
|
|
|
|
|
|
|
|
auto keepIdx = nmsRotated(boxes, scores, NMS_THRESHOLD);
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<Object> results;
|
|
|
|
|
|
int classNameSize = static_cast<int>(_classes.size());
|
|
|
|
|
|
results.reserve(std::min(static_cast<int>(keepIdx.size()), TOP_K));
|
|
|
|
|
|
for (int idx : keepIdx) {
|
|
|
|
|
|
if (static_cast<int>(results.size()) >= TOP_K) break;
|
|
|
|
|
|
const auto& c = candidates[idx];
|
|
|
|
|
|
Object obj;
|
|
|
|
|
|
obj.classId = c.classId;
|
|
|
|
|
|
obj.confidence = c.conf;
|
|
|
|
|
|
obj.kps = { c.box.x, c.box.y, c.box.width, c.box.height, c.box.angle };
|
|
|
|
|
|
auto absCorners = OBBToPoints(c.box);
|
|
|
|
|
|
obj.box = cv::boundingRect(absCorners);
|
|
|
|
|
|
// Normalize OBB corners to [0,1] and close the polygon
|
|
|
|
|
|
obj.polygon.reserve(absCorners.size() + 1);
|
|
|
|
|
|
for (const auto& pt : absCorners) {
|
|
|
|
|
|
obj.polygon.emplace_back(
|
|
|
|
|
|
std::clamp(pt.x / meta.imgWidth, 0.f, 1.f),
|
|
|
|
|
|
std::clamp(pt.y / meta.imgHeight, 0.f, 1.f));
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!obj.polygon.empty()) obj.polygon.push_back(obj.polygon.front()); // close
|
|
|
|
|
|
if (!_classes.empty() && c.classId >= 0 && c.classId < classNameSize)
|
|
|
|
|
|
obj.className = _classes[c.classId];
|
|
|
|
|
|
obj.cameraId = camera_id;
|
|
|
|
|
|
results.push_back(std::move(obj));
|
|
|
|
|
|
}
|
|
|
|
|
|
return results;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::PostprocessOBB", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// OBB — end2end postprocess
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
std::vector<Object> ANSRTYOLO::PostprocessOBBE2E(
|
|
|
|
|
|
std::vector<float>& featureVector,
|
|
|
|
|
|
const std::string& camera_id, const ImageMetadata& meta) {
|
|
|
|
|
|
try {
|
|
|
|
|
|
const auto& outputDims = m_trtEngine->getOutputDims();
|
|
|
|
|
|
int numDets = outputDims[0].d[1];
|
|
|
|
|
|
int numFeat = outputDims[0].d[2]; // 7: cx,cy,w,h,angle,conf,classId
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<Object> results;
|
|
|
|
|
|
results.reserve(numDets);
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < numDets; ++i) {
|
|
|
|
|
|
const float* det = featureVector.data() + i * numFeat;
|
|
|
|
|
|
float angle = det[4];
|
|
|
|
|
|
float conf = det[5];
|
|
|
|
|
|
if (conf <= _modelConfig.detectionScoreThreshold) continue;
|
|
|
|
|
|
|
|
|
|
|
|
float cx = det[0] * meta.ratio;
|
|
|
|
|
|
float cy = det[1] * meta.ratio;
|
|
|
|
|
|
float bw = det[2] * meta.ratio;
|
|
|
|
|
|
float bh = det[3] * meta.ratio;
|
|
|
|
|
|
int classId = static_cast<int>(det[6]);
|
|
|
|
|
|
|
|
|
|
|
|
cx = std::clamp(cx, 0.f, meta.imgWidth);
|
|
|
|
|
|
cy = std::clamp(cy, 0.f, meta.imgHeight);
|
|
|
|
|
|
|
|
|
|
|
|
OrientedBox obb{ cx, cy, bw, bh, angle };
|
|
|
|
|
|
|
|
|
|
|
|
Object obj;
|
|
|
|
|
|
obj.classId = classId;
|
|
|
|
|
|
obj.confidence = conf;
|
|
|
|
|
|
obj.kps = { cx, cy, bw, bh, angle };
|
|
|
|
|
|
auto absCorners = OBBToPoints(obb);
|
|
|
|
|
|
obj.box = cv::boundingRect(absCorners);
|
|
|
|
|
|
// Normalize OBB corners to [0,1] and close the polygon
|
|
|
|
|
|
obj.polygon.reserve(absCorners.size() + 1);
|
|
|
|
|
|
for (const auto& pt : absCorners) {
|
|
|
|
|
|
obj.polygon.emplace_back(
|
|
|
|
|
|
std::clamp(pt.x / meta.imgWidth, 0.f, 1.f),
|
|
|
|
|
|
std::clamp(pt.y / meta.imgHeight, 0.f, 1.f));
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!obj.polygon.empty()) obj.polygon.push_back(obj.polygon.front()); // close
|
|
|
|
|
|
int classNameSize = static_cast<int>(_classes.size());
|
|
|
|
|
|
if (!_classes.empty() && classId >= 0 && classId < classNameSize)
|
|
|
|
|
|
obj.className = _classes[classId];
|
|
|
|
|
|
obj.cameraId = camera_id;
|
|
|
|
|
|
results.push_back(std::move(obj));
|
|
|
|
|
|
}
|
|
|
|
|
|
return results;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::PostprocessOBBE2E", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// Segmentation — legacy postprocess
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
std::vector<Object> ANSRTYOLO::PostprocessSegmentation(
|
|
|
|
|
|
std::vector<std::vector<float>>& featureVectors,
|
|
|
|
|
|
const std::string& camera_id, const ImageMetadata& meta) {
|
|
|
|
|
|
try {
|
|
|
|
|
|
const auto& outputDims = m_trtEngine->getOutputDims();
|
|
|
|
|
|
int numChannels = outputDims[0].d[1];
|
|
|
|
|
|
int numAnchors = outputDims[0].d[2];
|
|
|
|
|
|
const auto numClasses = numChannels - SEG_CHANNELS - 4;
|
|
|
|
|
|
|
|
|
|
|
|
if (featureVectors[0].size() != static_cast<size_t>(numChannels) * numAnchors) return {};
|
|
|
|
|
|
if (featureVectors[1].size() != static_cast<size_t>(SEG_CHANNELS) * SEG_H * SEG_W) return {};
|
|
|
|
|
|
|
|
|
|
|
|
cv::Mat output = cv::Mat(numChannels, numAnchors, CV_32F, featureVectors[0].data()).t();
|
|
|
|
|
|
cv::Mat protos = cv::Mat(SEG_CHANNELS, SEG_H * SEG_W, CV_32F, featureVectors[1].data());
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<int> labels;
|
|
|
|
|
|
std::vector<float> scores;
|
|
|
|
|
|
std::vector<cv::Rect> bboxes;
|
|
|
|
|
|
std::vector<cv::Mat> maskConfs;
|
|
|
|
|
|
std::vector<int> indices;
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < numAnchors; i++) {
|
|
|
|
|
|
auto rowPtr = output.row(i).ptr<float>();
|
|
|
|
|
|
auto bboxesPtr = rowPtr;
|
|
|
|
|
|
auto scoresPtr = rowPtr + 4;
|
|
|
|
|
|
auto maskConfsPtr = rowPtr + 4 + numClasses;
|
|
|
|
|
|
auto maxSPtr = std::max_element(scoresPtr, scoresPtr + numClasses);
|
|
|
|
|
|
float score = *maxSPtr;
|
|
|
|
|
|
if (score > _modelConfig.detectionScoreThreshold) {
|
|
|
|
|
|
float x = *bboxesPtr++;
|
|
|
|
|
|
float y = *bboxesPtr++;
|
|
|
|
|
|
float w = *bboxesPtr++;
|
|
|
|
|
|
float h = *bboxesPtr;
|
|
|
|
|
|
float x0 = std::clamp((x - 0.5f * w) * meta.ratio, 0.f, meta.imgWidth);
|
|
|
|
|
|
float y0 = std::clamp((y - 0.5f * h) * meta.ratio, 0.f, meta.imgHeight);
|
|
|
|
|
|
float x1 = std::clamp((x + 0.5f * w) * meta.ratio, 0.f, meta.imgWidth);
|
|
|
|
|
|
float y1 = std::clamp((y + 0.5f * h) * meta.ratio, 0.f, meta.imgHeight);
|
|
|
|
|
|
int label = static_cast<int>(maxSPtr - scoresPtr);
|
|
|
|
|
|
cv::Rect_<float> bbox;
|
|
|
|
|
|
bbox.x = x0; bbox.y = y0;
|
|
|
|
|
|
bbox.width = x1 - x0; bbox.height = y1 - y0;
|
|
|
|
|
|
bbox.x = std::max(0.f, bbox.x);
|
|
|
|
|
|
bbox.y = std::max(0.f, bbox.y);
|
|
|
|
|
|
bbox.width = std::min(meta.imgWidth - bbox.x, bbox.width);
|
|
|
|
|
|
bbox.height = std::min(meta.imgHeight - bbox.y, bbox.height);
|
|
|
|
|
|
cv::Mat maskConf = cv::Mat(1, SEG_CHANNELS, CV_32F, maskConfsPtr);
|
|
|
|
|
|
bboxes.push_back(bbox);
|
|
|
|
|
|
labels.push_back(label);
|
|
|
|
|
|
scores.push_back(score);
|
|
|
|
|
|
maskConfs.push_back(maskConf);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
cv::dnn::NMSBoxesBatched(bboxes, scores, labels, PROBABILITY_THRESHOLD, NMS_THRESHOLD, indices);
|
|
|
|
|
|
cv::Mat masks;
|
|
|
|
|
|
int classNameSize = static_cast<int>(_classes.size());
|
|
|
|
|
|
std::vector<Object> objs;
|
|
|
|
|
|
for (auto& i : indices) {
|
|
|
|
|
|
if (scores[i] > _modelConfig.detectionScoreThreshold) {
|
|
|
|
|
|
Object obj;
|
|
|
|
|
|
obj.classId = labels[i];
|
|
|
|
|
|
if (!_classes.empty()) {
|
|
|
|
|
|
obj.className = (obj.classId < classNameSize)
|
|
|
|
|
|
? _classes[obj.classId] : _classes[classNameSize - 1];
|
|
|
|
|
|
}
|
|
|
|
|
|
else { obj.className = "Unknown"; }
|
|
|
|
|
|
obj.box = bboxes[i];
|
|
|
|
|
|
obj.confidence = scores[i];
|
|
|
|
|
|
obj.cameraId = camera_id;
|
|
|
|
|
|
masks.push_back(maskConfs[i]);
|
|
|
|
|
|
objs.push_back(obj);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (!masks.empty()) {
|
|
|
|
|
|
cv::Mat matmulRes = (masks * protos).t();
|
|
|
|
|
|
|
|
|
|
|
|
// Apply sigmoid while still a single-channel 2D matrix
|
|
|
|
|
|
cv::Mat negMat;
|
|
|
|
|
|
cv::exp(-matmulRes, negMat);
|
|
|
|
|
|
cv::Mat sigmoidFlat = 1.0 / (1.0 + negMat);
|
|
|
|
|
|
|
|
|
|
|
|
// Now reshape into multi-channel and split
|
|
|
|
|
|
cv::Mat sigmoidMat = sigmoidFlat.reshape(static_cast<int>(indices.size()),
|
|
|
|
|
|
{ SEG_H, SEG_W });
|
|
|
|
|
|
std::vector<cv::Mat> maskChannels;
|
|
|
|
|
|
cv::split(sigmoidMat, maskChannels);
|
|
|
|
|
|
|
|
|
|
|
|
// ROI in proto space (SEG_H x SEG_W), accounting for top-left letterbox padding
|
|
|
|
|
|
// ANSRTYOLO pads right-bottom, so content starts at (0,0) in proto space
|
|
|
|
|
|
cv::Rect roi;
|
|
|
|
|
|
if (meta.imgHeight > meta.imgWidth) {
|
|
|
|
|
|
int roiW = std::min(static_cast<int>(std::round(
|
|
|
|
|
|
static_cast<float>(SEG_W) * meta.imgWidth / meta.imgHeight)), SEG_W);
|
|
|
|
|
|
roi = cv::Rect(0, 0, roiW, SEG_H);
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
int roiH = std::min(static_cast<int>(std::round(
|
|
|
|
|
|
static_cast<float>(SEG_H) * meta.imgHeight / meta.imgWidth)), SEG_H);
|
|
|
|
|
|
roi = cv::Rect(0, 0, SEG_W, roiH);
|
|
|
|
|
|
}
|
|
|
|
|
|
roi &= cv::Rect(0, 0, SEG_W, SEG_H);
|
|
|
|
|
|
|
|
|
|
|
|
int imgW = static_cast<int>(meta.imgWidth);
|
|
|
|
|
|
int imgH = static_cast<int>(meta.imgHeight);
|
|
|
|
|
|
|
|
|
|
|
|
// Precompute scale factors from proto-ROI to original image
|
|
|
|
|
|
const float scaleX = static_cast<float>(imgW) / roi.width;
|
|
|
|
|
|
const float scaleY = static_cast<float>(imgH) / roi.height;
|
|
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < objs.size(); i++) {
|
|
|
|
|
|
cv::Rect safeBox = objs[i].box & cv::Rect(0, 0, imgW, imgH);
|
|
|
|
|
|
if (safeBox.area() <= 0) continue;
|
|
|
|
|
|
|
|
|
|
|
|
// Map bounding box back to proto-ROI space and crop there
|
|
|
|
|
|
int px0 = std::max(static_cast<int>(std::floor(safeBox.x / scaleX)), 0);
|
|
|
|
|
|
int py0 = std::max(static_cast<int>(std::floor(safeBox.y / scaleY)), 0);
|
|
|
|
|
|
int px1 = std::min(static_cast<int>(std::ceil((safeBox.x + safeBox.width) / scaleX)), roi.width);
|
|
|
|
|
|
int py1 = std::min(static_cast<int>(std::ceil((safeBox.y + safeBox.height) / scaleY)), roi.height);
|
|
|
|
|
|
if (px1 <= px0 || py1 <= py0) continue;
|
|
|
|
|
|
|
|
|
|
|
|
cv::Rect protoBox(roi.x + px0, roi.y + py0, px1 - px0, py1 - py0);
|
|
|
|
|
|
protoBox &= cv::Rect(0, 0, SEG_W, SEG_H);
|
|
|
|
|
|
if (protoBox.area() <= 0) continue;
|
|
|
|
|
|
|
|
|
|
|
|
// Resize only the small proto crop to the bounding box size
|
|
|
|
|
|
cv::Mat cropped = maskChannels[i](protoBox);
|
|
|
|
|
|
cv::Mat resized;
|
|
|
|
|
|
cv::resize(cropped, resized, cv::Size(safeBox.width, safeBox.height),
|
|
|
|
|
|
0, 0, cv::INTER_LINEAR);
|
|
|
|
|
|
objs[i].mask = resized > _modelConfig.modelConfThreshold;
|
|
|
|
|
|
objs[i].polygon = ANSUtilityHelper::MaskToNormalizedPolygon(
|
|
|
|
|
|
objs[i].mask, safeBox, meta.imgWidth, meta.imgHeight);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
// Fill polygon for objects that got masks
|
|
|
|
|
|
for (auto& obj : objs) {
|
|
|
|
|
|
if (obj.polygon.empty())
|
|
|
|
|
|
obj.polygon = ANSUtilityHelper::RectToNormalizedPolygon(obj.box, meta.imgWidth, meta.imgHeight);
|
|
|
|
|
|
}
|
|
|
|
|
|
return objs;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::PostprocessSegmentation", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// Segmentation — end2end postprocess
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
std::vector<Object> ANSRTYOLO::PostprocessSegE2E(
|
|
|
|
|
|
std::vector<std::vector<float>>& featureVectors,
|
|
|
|
|
|
const std::string& camera_id, const ImageMetadata& meta) {
|
|
|
|
|
|
try {
|
|
|
|
|
|
if (featureVectors.size() < 2) return {};
|
|
|
|
|
|
const auto& outputDims = m_trtEngine->getOutputDims();
|
|
|
|
|
|
int numDets = outputDims[0].d[1];
|
|
|
|
|
|
int numFeat = outputDims[0].d[2]; // 6 + nm
|
|
|
|
|
|
|
|
|
|
|
|
// Proto dimensions from second output
|
|
|
|
|
|
int nm = outputDims[1].d[1];
|
|
|
|
|
|
int protoH = outputDims[1].d[2];
|
|
|
|
|
|
int protoW = (outputDims[1].nbDims > 3) ? outputDims[1].d[3] : outputDims[1].d[2];
|
|
|
|
|
|
if (numFeat < 6 + nm) return {};
|
|
|
|
|
|
|
|
|
|
|
|
const float* raw = featureVectors[0].data();
|
|
|
|
|
|
std::vector<Object> objs;
|
|
|
|
|
|
cv::Mat maskCoeffs;
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < numDets; ++i) {
|
|
|
|
|
|
const float* det = raw + i * numFeat;
|
|
|
|
|
|
float conf = det[4];
|
|
|
|
|
|
if (conf <= _modelConfig.detectionScoreThreshold) continue;
|
|
|
|
|
|
|
|
|
|
|
|
int classId = static_cast<int>(det[5]);
|
|
|
|
|
|
float x0 = std::clamp(det[0] * meta.ratio, 0.f, meta.imgWidth);
|
|
|
|
|
|
float y0 = std::clamp(det[1] * meta.ratio, 0.f, meta.imgHeight);
|
|
|
|
|
|
float x1 = std::clamp(det[2] * meta.ratio, 0.f, meta.imgWidth);
|
|
|
|
|
|
float y1 = std::clamp(det[3] * meta.ratio, 0.f, meta.imgHeight);
|
|
|
|
|
|
float w = x1 - x0, h = y1 - y0;
|
|
|
|
|
|
if (w < 1.f || h < 1.f) continue;
|
|
|
|
|
|
|
|
|
|
|
|
Object obj;
|
|
|
|
|
|
obj.classId = classId;
|
|
|
|
|
|
obj.confidence = conf;
|
|
|
|
|
|
obj.box = cv::Rect(static_cast<int>(x0), static_cast<int>(y0),
|
|
|
|
|
|
static_cast<int>(w), static_cast<int>(h));
|
|
|
|
|
|
int classNameSize = static_cast<int>(_classes.size());
|
|
|
|
|
|
if (!_classes.empty() && classId >= 0 && classId < classNameSize)
|
|
|
|
|
|
obj.className = _classes[classId];
|
|
|
|
|
|
obj.cameraId = camera_id;
|
|
|
|
|
|
objs.push_back(std::move(obj));
|
|
|
|
|
|
|
|
|
|
|
|
cv::Mat mc(1, nm, CV_32F);
|
|
|
|
|
|
std::memcpy(mc.ptr<float>(), det + 6, nm * sizeof(float));
|
|
|
|
|
|
maskCoeffs.push_back(mc);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (!objs.empty() && !maskCoeffs.empty()) {
|
|
|
|
|
|
cv::Mat protos(nm, protoH * protoW, CV_32F, featureVectors[1].data());
|
|
|
|
|
|
cv::Mat matmulRes = (maskCoeffs * protos).t();
|
|
|
|
|
|
|
|
|
|
|
|
// Apply sigmoid while still a single-channel 2D matrix
|
|
|
|
|
|
cv::Mat negMat;
|
|
|
|
|
|
cv::exp(-matmulRes, negMat);
|
|
|
|
|
|
cv::Mat sigmoidFlat = 1.0 / (1.0 + negMat);
|
|
|
|
|
|
|
|
|
|
|
|
// Now reshape into multi-channel and split
|
|
|
|
|
|
cv::Mat sigmoidMat = sigmoidFlat.reshape(static_cast<int>(objs.size()),
|
|
|
|
|
|
{ protoH, protoW });
|
|
|
|
|
|
std::vector<cv::Mat> maskChannels;
|
|
|
|
|
|
cv::split(sigmoidMat, maskChannels);
|
|
|
|
|
|
|
|
|
|
|
|
// ROI in proto space, accounting for top-left letterbox padding
|
|
|
|
|
|
// ANSRTYOLO pads right-bottom, so content starts at (0,0) in proto space
|
|
|
|
|
|
cv::Rect roi;
|
|
|
|
|
|
if (meta.imgHeight > meta.imgWidth) {
|
|
|
|
|
|
int roiW = std::min(static_cast<int>(std::round(
|
|
|
|
|
|
static_cast<float>(protoW) * meta.imgWidth / meta.imgHeight)), protoW);
|
|
|
|
|
|
roi = cv::Rect(0, 0, roiW, protoH);
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
int roiH = std::min(static_cast<int>(std::round(
|
|
|
|
|
|
static_cast<float>(protoH) * meta.imgHeight / meta.imgWidth)), protoH);
|
|
|
|
|
|
roi = cv::Rect(0, 0, protoW, roiH);
|
|
|
|
|
|
}
|
|
|
|
|
|
roi &= cv::Rect(0, 0, protoW, protoH);
|
|
|
|
|
|
|
|
|
|
|
|
int imgW = static_cast<int>(meta.imgWidth);
|
|
|
|
|
|
int imgH = static_cast<int>(meta.imgHeight);
|
|
|
|
|
|
|
|
|
|
|
|
const float scaleX = static_cast<float>(imgW) / roi.width;
|
|
|
|
|
|
const float scaleY = static_cast<float>(imgH) / roi.height;
|
|
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < objs.size(); ++i) {
|
|
|
|
|
|
cv::Rect safebox = objs[i].box & cv::Rect(0, 0, imgW, imgH);
|
|
|
|
|
|
if (safebox.area() <= 0) continue;
|
|
|
|
|
|
|
|
|
|
|
|
int px0 = std::max(static_cast<int>(std::floor(safebox.x / scaleX)), 0);
|
|
|
|
|
|
int py0 = std::max(static_cast<int>(std::floor(safebox.y / scaleY)), 0);
|
|
|
|
|
|
int px1 = std::min(static_cast<int>(std::ceil((safebox.x + safebox.width) / scaleX)), roi.width);
|
|
|
|
|
|
int py1 = std::min(static_cast<int>(std::ceil((safebox.y + safebox.height) / scaleY)), roi.height);
|
|
|
|
|
|
if (px1 <= px0 || py1 <= py0) continue;
|
|
|
|
|
|
|
|
|
|
|
|
cv::Rect protoBox(roi.x + px0, roi.y + py0, px1 - px0, py1 - py0);
|
|
|
|
|
|
protoBox &= cv::Rect(0, 0, protoW, protoH);
|
|
|
|
|
|
if (protoBox.area() <= 0) continue;
|
|
|
|
|
|
|
|
|
|
|
|
cv::Mat cropped = maskChannels[i](protoBox);
|
|
|
|
|
|
cv::Mat resized;
|
|
|
|
|
|
cv::resize(cropped, resized, cv::Size(safebox.width, safebox.height),
|
|
|
|
|
|
0, 0, cv::INTER_LINEAR);
|
|
|
|
|
|
objs[i].mask = resized > SEGMENTATION_THRESHOLD;
|
|
|
|
|
|
objs[i].polygon = ANSUtilityHelper::MaskToNormalizedPolygon(
|
|
|
|
|
|
objs[i].mask, safebox, meta.imgWidth, meta.imgHeight);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
for (auto& obj : objs) {
|
|
|
|
|
|
if (obj.polygon.empty())
|
|
|
|
|
|
obj.polygon = ANSUtilityHelper::RectToNormalizedPolygon(obj.box, meta.imgWidth, meta.imgHeight);
|
|
|
|
|
|
}
|
|
|
|
|
|
return objs;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::PostprocessSegE2E", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// Pose — legacy postprocess
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
std::vector<Object> ANSRTYOLO::PostprocessPose(
|
|
|
|
|
|
std::vector<float>& featureVector,
|
|
|
|
|
|
const std::string& camera_id, const ImageMetadata& meta) {
|
|
|
|
|
|
try {
|
|
|
|
|
|
const auto& outputDims = m_trtEngine->getOutputDims();
|
|
|
|
|
|
auto numChannels = outputDims[0].d[1];
|
|
|
|
|
|
auto numAnchors = outputDims[0].d[2];
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<cv::Rect> bboxes;
|
|
|
|
|
|
std::vector<float> scores;
|
|
|
|
|
|
std::vector<int> labels;
|
|
|
|
|
|
std::vector<int> indices;
|
|
|
|
|
|
std::vector<std::vector<float>> kpss;
|
|
|
|
|
|
|
|
|
|
|
|
cv::Mat output = cv::Mat(numChannels, numAnchors, CV_32F, featureVector.data()).t();
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < numAnchors; i++) {
|
|
|
|
|
|
auto rowPtr = output.row(i).ptr<float>();
|
|
|
|
|
|
auto bboxesPtr = rowPtr;
|
|
|
|
|
|
auto scoresPtr = rowPtr + 4;
|
|
|
|
|
|
auto kps_ptr = rowPtr + 5;
|
|
|
|
|
|
float score = *scoresPtr;
|
|
|
|
|
|
if (score > _modelConfig.detectionScoreThreshold) {
|
|
|
|
|
|
float x = *bboxesPtr++;
|
|
|
|
|
|
float y = *bboxesPtr++;
|
|
|
|
|
|
float w = *bboxesPtr++;
|
|
|
|
|
|
float h = *bboxesPtr;
|
|
|
|
|
|
float x0 = std::clamp((x - 0.5f * w) * meta.ratio, 0.f, meta.imgWidth);
|
|
|
|
|
|
float y0 = std::clamp((y - 0.5f * h) * meta.ratio, 0.f, meta.imgHeight);
|
|
|
|
|
|
float x1 = std::clamp((x + 0.5f * w) * meta.ratio, 0.f, meta.imgWidth);
|
|
|
|
|
|
float y1 = std::clamp((y + 0.5f * h) * meta.ratio, 0.f, meta.imgHeight);
|
|
|
|
|
|
cv::Rect_<float> bbox;
|
|
|
|
|
|
bbox.x = x0; bbox.y = y0;
|
|
|
|
|
|
bbox.width = x1 - x0; bbox.height = y1 - y0;
|
|
|
|
|
|
bbox.x = std::max(0.f, bbox.x);
|
|
|
|
|
|
bbox.y = std::max(0.f, bbox.y);
|
|
|
|
|
|
bbox.width = std::min(meta.imgWidth - bbox.x, bbox.width);
|
|
|
|
|
|
bbox.height = std::min(meta.imgHeight - bbox.y, bbox.height);
|
|
|
|
|
|
std::vector<float> kps;
|
|
|
|
|
|
for (int k = 0; k < NUM_KPS; k++) {
|
|
|
|
|
|
float kpsX = std::clamp(*(kps_ptr + 3 * k) * meta.ratio, 0.f, meta.imgWidth);
|
|
|
|
|
|
float kpsY = std::clamp(*(kps_ptr + 3 * k + 1) * meta.ratio, 0.f, meta.imgHeight);
|
|
|
|
|
|
float kpsS = *(kps_ptr + 3 * k + 2);
|
|
|
|
|
|
kps.push_back(kpsX);
|
|
|
|
|
|
kps.push_back(kpsY);
|
|
|
|
|
|
kps.push_back(kpsS);
|
|
|
|
|
|
}
|
|
|
|
|
|
bboxes.push_back(bbox);
|
|
|
|
|
|
labels.push_back(0);
|
|
|
|
|
|
scores.push_back(score);
|
|
|
|
|
|
kpss.push_back(kps);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
cv::dnn::NMSBoxesBatched(bboxes, scores, labels, PROBABILITY_THRESHOLD, NMS_THRESHOLD, indices);
|
|
|
|
|
|
std::vector<Object> objects;
|
|
|
|
|
|
int classNameSize = static_cast<int>(_classes.size());
|
|
|
|
|
|
for (auto& chosenIdx : indices) {
|
|
|
|
|
|
if (scores[chosenIdx] > _modelConfig.detectionScoreThreshold) {
|
|
|
|
|
|
Object obj{};
|
|
|
|
|
|
obj.confidence = scores[chosenIdx];
|
|
|
|
|
|
obj.classId = labels[chosenIdx];
|
|
|
|
|
|
if (!_classes.empty()) {
|
|
|
|
|
|
obj.className = (obj.classId < classNameSize)
|
|
|
|
|
|
? _classes[obj.classId] : _classes[classNameSize - 1];
|
|
|
|
|
|
}
|
|
|
|
|
|
else { obj.className = "Unknown"; }
|
|
|
|
|
|
obj.box = bboxes[chosenIdx];
|
|
|
|
|
|
//obj.polygon = ANSUtilityHelper::RectToNormalizedPolygon(obj.box, meta.imgWidth, meta.imgHeight);
|
|
|
|
|
|
obj.kps = kpss[chosenIdx];
|
|
|
|
|
|
obj.cameraId = camera_id;
|
|
|
|
|
|
objects.push_back(obj);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return objects;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::PostprocessPose", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// Pose — end2end postprocess
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
std::vector<Object> ANSRTYOLO::PostprocessPoseE2E(
|
|
|
|
|
|
std::vector<float>& featureVector,
|
|
|
|
|
|
const std::string& camera_id, const ImageMetadata& meta) {
|
|
|
|
|
|
try {
|
|
|
|
|
|
const auto& outputDims = m_trtEngine->getOutputDims();
|
|
|
|
|
|
int numDets = outputDims[0].d[1];
|
|
|
|
|
|
int numFeat = outputDims[0].d[2]; // 6 + nk*3
|
|
|
|
|
|
int nk = (numFeat - 6) / 3;
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<Object> results;
|
|
|
|
|
|
results.reserve(numDets);
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < numDets; ++i) {
|
|
|
|
|
|
const float* det = featureVector.data() + i * numFeat;
|
|
|
|
|
|
float conf = det[4];
|
|
|
|
|
|
if (conf <= _modelConfig.detectionScoreThreshold) continue;
|
|
|
|
|
|
|
|
|
|
|
|
int classId = static_cast<int>(det[5]);
|
|
|
|
|
|
float x0 = std::clamp(det[0] * meta.ratio, 0.f, meta.imgWidth);
|
|
|
|
|
|
float y0 = std::clamp(det[1] * meta.ratio, 0.f, meta.imgHeight);
|
|
|
|
|
|
float x1 = std::clamp(det[2] * meta.ratio, 0.f, meta.imgWidth);
|
|
|
|
|
|
float y1 = std::clamp(det[3] * meta.ratio, 0.f, meta.imgHeight);
|
|
|
|
|
|
float w = x1 - x0, h = y1 - y0;
|
|
|
|
|
|
if (w < 1.f || h < 1.f) continue;
|
|
|
|
|
|
|
|
|
|
|
|
const float* kpsPtr = det + 6;
|
|
|
|
|
|
std::vector<float> kps;
|
|
|
|
|
|
kps.reserve(nk * 3);
|
|
|
|
|
|
for (int k = 0; k < nk; ++k) {
|
|
|
|
|
|
float kx = std::clamp(kpsPtr[3*k] * meta.ratio, 0.f, meta.imgWidth);
|
|
|
|
|
|
float ky = std::clamp(kpsPtr[3*k+1] * meta.ratio, 0.f, meta.imgHeight);
|
|
|
|
|
|
float ks = kpsPtr[3*k+2];
|
|
|
|
|
|
kps.push_back(kx);
|
|
|
|
|
|
kps.push_back(ky);
|
|
|
|
|
|
kps.push_back(ks);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Object obj;
|
|
|
|
|
|
obj.classId = classId;
|
|
|
|
|
|
obj.confidence = conf;
|
|
|
|
|
|
obj.box = cv::Rect(static_cast<int>(x0), static_cast<int>(y0),
|
|
|
|
|
|
static_cast<int>(w), static_cast<int>(h));
|
|
|
|
|
|
obj.kps = std::move(kps);
|
|
|
|
|
|
int classNameSize = static_cast<int>(_classes.size());
|
|
|
|
|
|
if (!_classes.empty() && classId >= 0 && classId < classNameSize)
|
|
|
|
|
|
obj.className = _classes[classId];
|
|
|
|
|
|
obj.cameraId = camera_id;
|
|
|
|
|
|
results.push_back(std::move(obj));
|
|
|
|
|
|
}
|
|
|
|
|
|
return results;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::PostprocessPoseE2E", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// Classification postprocess
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
std::vector<Object> ANSRTYOLO::PostprocessClassify(
|
|
|
|
|
|
std::vector<float>& featureVector,
|
|
|
|
|
|
const std::string& camera_id, const ImageMetadata& meta) {
|
|
|
|
|
|
try {
|
|
|
|
|
|
const int nc = static_cast<int>(featureVector.size());
|
|
|
|
|
|
if (nc == 0) return {};
|
|
|
|
|
|
|
|
|
|
|
|
// Check if output is already a probability distribution (sums to ~1.0).
|
|
|
|
|
|
// Some models include a Softmax layer; applying softmax again would
|
|
|
|
|
|
// flatten the distribution and cause wrong classifications.
|
|
|
|
|
|
float rawSum = 0.f;
|
|
|
|
|
|
bool allNonNeg = true;
|
|
|
|
|
|
for (int i = 0; i < nc; ++i) {
|
|
|
|
|
|
rawSum += featureVector[i];
|
|
|
|
|
|
if (featureVector[i] < 0.f) allNonNeg = false;
|
|
|
|
|
|
}
|
|
|
|
|
|
const bool alreadyNormalized = (allNonNeg && rawSum > 0.9f && rawSum < 1.1f);
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<float> probs(nc);
|
|
|
|
|
|
if (alreadyNormalized) {
|
|
|
|
|
|
for (int i = 0; i < nc; ++i) probs[i] = featureVector[i];
|
|
|
|
|
|
} else {
|
|
|
|
|
|
float maxVal = *std::max_element(featureVector.begin(), featureVector.end());
|
|
|
|
|
|
float sumExp = 0.f;
|
|
|
|
|
|
for (int i = 0; i < nc; ++i) {
|
|
|
|
|
|
probs[i] = std::exp(featureVector[i] - maxVal);
|
|
|
|
|
|
sumExp += probs[i];
|
|
|
|
|
|
}
|
|
|
|
|
|
for (int i = 0; i < nc; ++i) probs[i] /= sumExp;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int bestClass = 0;
|
|
|
|
|
|
float bestProb = 0.f;
|
|
|
|
|
|
for (int i = 0; i < nc; ++i) {
|
|
|
|
|
|
if (probs[i] > bestProb) { bestProb = probs[i]; bestClass = i; }
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const int imgW = static_cast<int>(meta.imgWidth);
|
|
|
|
|
|
const int imgH = static_cast<int>(meta.imgHeight);
|
|
|
|
|
|
|
|
|
|
|
|
Object obj;
|
|
|
|
|
|
if (imgW > 20 && imgH > 20) {
|
|
|
|
|
|
obj.box = cv::Rect(10, 10, imgW - 20, imgH - 20);
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
obj.box = cv::Rect(0, 0, imgW, imgH);
|
|
|
|
|
|
}
|
|
|
|
|
|
//obj.polygon = ANSUtilityHelper::RectToNormalizedPolygon(obj.box, meta.imgWidth, meta.imgHeight);
|
|
|
|
|
|
obj.classId = bestClass;
|
|
|
|
|
|
obj.confidence = bestProb;
|
|
|
|
|
|
obj.cameraId = camera_id;
|
|
|
|
|
|
int classNameSize = static_cast<int>(_classes.size());
|
|
|
|
|
|
if (!_classes.empty() && bestClass >= 0 && bestClass < classNameSize)
|
|
|
|
|
|
obj.className = _classes[bestClass];
|
|
|
|
|
|
return { std::move(obj) };
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::PostprocessClassify", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// DetectObjects — single image with auto-detection of task type
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
std::vector<Object> ANSRTYOLO::DetectObjects(const cv::Mat& inputImage,
|
|
|
|
|
|
const std::string& camera_id) {
|
|
|
|
|
|
try {
|
|
|
|
|
|
// --- Debug timer helper (zero-cost when _debugFlag == false) ---
|
|
|
|
|
|
using Clock = std::chrono::steady_clock;
|
|
|
|
|
|
const bool dbg = _debugFlag;
|
|
|
|
|
|
auto t0 = dbg ? Clock::now() : Clock::time_point{};
|
|
|
|
|
|
auto tPrev = t0;
|
|
|
|
|
|
auto elapsed = [&]() -> double {
|
|
|
|
|
|
auto now = Clock::now();
|
|
|
|
|
|
double ms = std::chrono::duration<double, std::milli>(now - tPrev).count();
|
|
|
|
|
|
tPrev = now;
|
|
|
|
|
|
return ms;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// --- 1. Set GPU device context ---
|
|
|
|
|
|
if (m_trtEngine) {
|
|
|
|
|
|
m_trtEngine->setDeviceContext();
|
|
|
|
|
|
}
|
|
|
|
|
|
double msSetDevice = dbg ? elapsed() : 0;
|
|
|
|
|
|
|
|
|
|
|
|
// --- 1b. CUDA context health check ---
|
|
|
|
|
|
if (!m_nv12Helper.isCudaContextHealthy(_logger, "ANSRTYOLO")) {
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// --- 2. Preprocess under lock ---
|
2026-04-04 20:19:54 +11:00
|
|
|
|
ANS_DBG("YOLO", "Preprocess START %dx%d", inputImage.cols, inputImage.rows);
|
2026-03-28 16:54:11 +11:00
|
|
|
|
ImageMetadata meta;
|
|
|
|
|
|
std::vector<std::vector<cv::cuda::GpuMat>> input;
|
|
|
|
|
|
bool usedNV12 = false;
|
|
|
|
|
|
float bgrFullResScaleX = 1.0f, bgrFullResScaleY = 1.0f;
|
|
|
|
|
|
{
|
|
|
|
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
|
|
|
|
const int inferenceGpu = m_trtEngine ? m_trtEngine->getPreferredDeviceIndex() : 0;
|
|
|
|
|
|
const auto& inputDims = m_trtEngine->getInputDims();
|
|
|
|
|
|
const int inputW = inputDims[0].d[2];
|
|
|
|
|
|
const int inputH = inputDims[0].d[1];
|
|
|
|
|
|
|
|
|
|
|
|
auto nv12 = m_nv12Helper.tryNV12(inputImage, inferenceGpu, inputW, inputH,
|
|
|
|
|
|
NV12PreprocessHelper::defaultYOLOLauncher(),
|
|
|
|
|
|
_logger, "ANSRTYOLO");
|
|
|
|
|
|
if (nv12.succeeded) {
|
|
|
|
|
|
meta.imgWidth = nv12.metaWidth;
|
|
|
|
|
|
meta.imgHeight = nv12.metaHeight;
|
|
|
|
|
|
meta.ratio = nv12.ratio;
|
|
|
|
|
|
input = {{ std::move(nv12.gpuRGB) }};
|
|
|
|
|
|
usedNV12 = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (nv12.useBgrFullRes) {
|
|
|
|
|
|
input = Preprocess(nv12.bgrFullResImg, meta);
|
|
|
|
|
|
usedNV12 = !input.empty();
|
|
|
|
|
|
bgrFullResScaleX = nv12.bgrFullResScaleX;
|
|
|
|
|
|
bgrFullResScaleY = nv12.bgrFullResScaleY;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (input.empty()) {
|
|
|
|
|
|
input = Preprocess(inputImage, meta);
|
|
|
|
|
|
}
|
|
|
|
|
|
m_nv12Helper.tickInference();
|
|
|
|
|
|
}
|
|
|
|
|
|
double msPreprocess = dbg ? elapsed() : 0;
|
|
|
|
|
|
|
|
|
|
|
|
if (input.empty()) {
|
|
|
|
|
|
_logger.LogWarn("ANSRTYOLO::DetectObjects", "Skipped: preprocessing returned empty input", __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// --- 3. TRT Inference (mutex released for concurrent GPU slots) ---
|
2026-04-04 20:19:54 +11:00
|
|
|
|
ANS_DBG("YOLO", "TRT inference START nv12=%d inputSize=%dx%d",
|
|
|
|
|
|
(int)usedNV12,
|
|
|
|
|
|
input.empty() ? 0 : (input[0].empty() ? 0 : input[0][0].cols),
|
|
|
|
|
|
input.empty() ? 0 : (input[0].empty() ? 0 : input[0][0].rows));
|
|
|
|
|
|
auto _trtStart = std::chrono::steady_clock::now();
|
2026-03-28 16:54:11 +11:00
|
|
|
|
std::vector<std::vector<std::vector<float>>> featureVectors;
|
|
|
|
|
|
if (!m_trtEngine->runInference(input, featureVectors)) {
|
2026-04-04 20:19:54 +11:00
|
|
|
|
ANS_DBG("YOLO", "ERROR: TRT runInference FAILED");
|
2026-03-28 16:54:11 +11:00
|
|
|
|
_logger.LogError("ANSRTYOLO::DetectObjects", "Error running inference", __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
2026-04-04 20:19:54 +11:00
|
|
|
|
auto _trtEnd = std::chrono::steady_clock::now();
|
|
|
|
|
|
double _trtMs = std::chrono::duration<double, std::milli>(_trtEnd - _trtStart).count();
|
|
|
|
|
|
if (_trtMs > 500.0) {
|
|
|
|
|
|
ANS_DBG("YOLO", "SLOW TRT inference: %.1fms", _trtMs);
|
|
|
|
|
|
}
|
2026-03-28 16:54:11 +11:00
|
|
|
|
double msInference = dbg ? elapsed() : 0;
|
|
|
|
|
|
|
|
|
|
|
|
// --- 4. Transform output ---
|
|
|
|
|
|
std::vector<Object> results;
|
|
|
|
|
|
bool isClassification = false;
|
|
|
|
|
|
{
|
|
|
|
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
|
|
|
|
const auto& outputDims = m_trtEngine->getOutputDims();
|
|
|
|
|
|
const size_t numOutputs = outputDims.size();
|
|
|
|
|
|
|
|
|
|
|
|
if (numOutputs >= 2) {
|
|
|
|
|
|
std::vector<std::vector<float>> featureVector2d;
|
|
|
|
|
|
Engine<float>::transformOutput(featureVectors, featureVector2d);
|
|
|
|
|
|
double msTransform = dbg ? elapsed() : 0;
|
|
|
|
|
|
|
|
|
|
|
|
int dim1 = outputDims[0].d[1];
|
|
|
|
|
|
int dim2 = outputDims[0].d[2];
|
|
|
|
|
|
if (dim1 > dim2 || dim2 <= 20)
|
|
|
|
|
|
results = PostprocessSegE2E(featureVector2d, camera_id, meta);
|
|
|
|
|
|
else
|
|
|
|
|
|
results = PostprocessSegmentation(featureVector2d, camera_id, meta);
|
|
|
|
|
|
|
|
|
|
|
|
if (dbg) {
|
|
|
|
|
|
double msPostprocess = elapsed();
|
|
|
|
|
|
_logger.LogInfo("ANSRTYOLO::DetectObjects",
|
|
|
|
|
|
"[DEBUG] Seg | " + std::string(usedNV12 ? "NV12" : "BGR") +
|
|
|
|
|
|
" | SetDev=" + std::to_string(msSetDevice) +
|
|
|
|
|
|
"ms Preproc=" + std::to_string(msPreprocess) +
|
|
|
|
|
|
"ms Inf=" + std::to_string(msInference) +
|
|
|
|
|
|
"ms Transform=" + std::to_string(msTransform) +
|
|
|
|
|
|
"ms Postproc=" + std::to_string(msPostprocess) +
|
|
|
|
|
|
"ms Det=" + std::to_string(results.size()),
|
|
|
|
|
|
__FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
std::vector<float> featureVector;
|
|
|
|
|
|
Engine<float>::transformOutput(featureVectors, featureVector);
|
|
|
|
|
|
double msTransform = dbg ? elapsed() : 0;
|
|
|
|
|
|
|
|
|
|
|
|
if (outputDims[0].nbDims <= 2) {
|
|
|
|
|
|
results = PostprocessClassify(featureVector, camera_id, meta);
|
|
|
|
|
|
isClassification = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
int dim1 = outputDims[0].d[1];
|
|
|
|
|
|
int dim2 = outputDims[0].d[2];
|
|
|
|
|
|
int nc = static_cast<int>(_classes.size());
|
|
|
|
|
|
|
|
|
|
|
|
const bool isEndToEnd = (dim1 > dim2) || (dim2 <= 20);
|
|
|
|
|
|
if (isEndToEnd) {
|
|
|
|
|
|
if (dim2 == 6)
|
|
|
|
|
|
results = PostprocessDetectionE2E(featureVector, camera_id, meta);
|
|
|
|
|
|
else if (dim2 == 7)
|
|
|
|
|
|
results = PostprocessOBBE2E(featureVector, camera_id, meta);
|
|
|
|
|
|
else if (dim2 > 7 && (dim2 - 6) % 3 == 0)
|
|
|
|
|
|
results = PostprocessPoseE2E(featureVector, camera_id, meta);
|
|
|
|
|
|
else
|
|
|
|
|
|
results = PostprocessDetectionE2E(featureVector, camera_id, meta);
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
int extra = dim1 - 4;
|
|
|
|
|
|
bool routed = false;
|
|
|
|
|
|
if (nc > 0 && nc <= extra) {
|
|
|
|
|
|
if (extra == nc) {
|
|
|
|
|
|
results = PostprocessDetection(featureVector, camera_id, meta);
|
|
|
|
|
|
routed = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (extra == nc + 1) {
|
|
|
|
|
|
results = PostprocessOBB(featureVector, camera_id, meta);
|
|
|
|
|
|
routed = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
else if ((extra - nc) % 3 == 0 && (extra - nc) >= 3) {
|
|
|
|
|
|
results = PostprocessPose(featureVector, camera_id, meta);
|
|
|
|
|
|
routed = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (!routed) {
|
|
|
|
|
|
if (extra >= 2) {
|
|
|
|
|
|
cv::Mat probe = cv::Mat(dim1, dim2, CV_32F, featureVector.data()).t();
|
|
|
|
|
|
int lastCol = dim1 - 1;
|
|
|
|
|
|
int numSamples = std::min(dim2, 100);
|
|
|
|
|
|
int angleCount = 0;
|
|
|
|
|
|
for (int s = 0; s < numSamples; ++s) {
|
|
|
|
|
|
float v = probe.at<float>(s, lastCol);
|
|
|
|
|
|
if (v >= -3.15f && v <= 3.15f) ++angleCount;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (angleCount > numSamples * 8 / 10) {
|
|
|
|
|
|
results = PostprocessOBB(featureVector, camera_id, meta);
|
|
|
|
|
|
routed = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!routed && dim1 == 56)
|
|
|
|
|
|
results = PostprocessPose(featureVector, camera_id, meta);
|
|
|
|
|
|
else if (!routed)
|
|
|
|
|
|
results = PostprocessDetection(featureVector, camera_id, meta);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (dbg) {
|
|
|
|
|
|
double msPostprocess = elapsed();
|
|
|
|
|
|
_logger.LogInfo("ANSRTYOLO::DetectObjects",
|
|
|
|
|
|
"[DEBUG] " + camera_id +
|
|
|
|
|
|
" | " + std::string(usedNV12 ? "NV12" : "BGR") +
|
|
|
|
|
|
" | SetDev=" + std::to_string(msSetDevice) +
|
|
|
|
|
|
"ms Preproc=" + std::to_string(msPreprocess) +
|
|
|
|
|
|
"ms Inf=" + std::to_string(msInference) +
|
|
|
|
|
|
"ms Transform=" + std::to_string(msTransform) +
|
|
|
|
|
|
"ms Postproc=" + std::to_string(msPostprocess) +
|
|
|
|
|
|
"ms Det=" + std::to_string(results.size()) +
|
|
|
|
|
|
(isClassification ? " [classify]" : " [detect]"),
|
|
|
|
|
|
__FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// --- 4b. Rescale coords from full-res to display-res (BGR full-res path) ---
|
|
|
|
|
|
// When ANSVideoPlayer provides full-res BGR via the registry, Preprocess
|
|
|
|
|
|
// and Postprocess operate in full-res coordinates. But the caller passed
|
|
|
|
|
|
// a display-res inputImage and expects coords in that space. Remap here.
|
|
|
|
|
|
if (bgrFullResScaleX != 1.0f || bgrFullResScaleY != 1.0f) {
|
|
|
|
|
|
for (auto& obj : results) {
|
|
|
|
|
|
obj.box.x = static_cast<int>(obj.box.x * bgrFullResScaleX);
|
|
|
|
|
|
obj.box.y = static_cast<int>(obj.box.y * bgrFullResScaleY);
|
|
|
|
|
|
obj.box.width = static_cast<int>(obj.box.width * bgrFullResScaleX);
|
|
|
|
|
|
obj.box.height = static_cast<int>(obj.box.height * bgrFullResScaleY);
|
|
|
|
|
|
// Rescale polygon points if present (segmentation / OBB)
|
|
|
|
|
|
for (auto& pt : obj.polygon) {
|
|
|
|
|
|
pt.x *= bgrFullResScaleX;
|
|
|
|
|
|
pt.y *= bgrFullResScaleY;
|
|
|
|
|
|
}
|
|
|
|
|
|
// Rescale keypoints if present (pose: x,y,conf triplets)
|
|
|
|
|
|
for (size_t k = 0; k + 2 < obj.kps.size(); k += 3) {
|
|
|
|
|
|
obj.kps[k] *= bgrFullResScaleX;
|
|
|
|
|
|
obj.kps[k + 1] *= bgrFullResScaleY;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// --- 5. Tracking + Stabilization ---
|
|
|
|
|
|
if (_trackerEnabled && !isClassification) {
|
|
|
|
|
|
results = ApplyTracking(results, camera_id);
|
|
|
|
|
|
double msTracking = dbg ? elapsed() : 0;
|
|
|
|
|
|
if (_stabilizationEnabled) {
|
|
|
|
|
|
results = StabilizeDetections(results, camera_id);
|
|
|
|
|
|
}
|
|
|
|
|
|
double msStabilize = dbg ? elapsed() : 0;
|
|
|
|
|
|
if (dbg) {
|
|
|
|
|
|
_logger.LogInfo("ANSRTYOLO::DetectObjects",
|
|
|
|
|
|
"[DEBUG] " + camera_id +
|
|
|
|
|
|
" | Tracking=" + std::to_string(msTracking) +
|
|
|
|
|
|
"ms Stabilize=" + std::to_string(msStabilize) + "ms",
|
|
|
|
|
|
__FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// --- 6. Total pipeline time ---
|
|
|
|
|
|
if (dbg) {
|
|
|
|
|
|
double msTotal = std::chrono::duration<double, std::milli>(Clock::now() - t0).count();
|
|
|
|
|
|
_logger.LogInfo("ANSRTYOLO::DetectObjects",
|
|
|
|
|
|
"[DEBUG] " + camera_id + " | TOTAL=" + std::to_string(msTotal) +
|
|
|
|
|
|
"ms (" + std::to_string(inputImage.cols) + "x" + std::to_string(inputImage.rows) +
|
|
|
|
|
|
") Results=" + std::to_string(results.size()),
|
|
|
|
|
|
__FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return results;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::DetectObjects", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// DetectObjectsBatch — batch inference with auto-detection
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
std::vector<std::vector<Object>> ANSRTYOLO::DetectObjectsBatch(
|
|
|
|
|
|
const std::vector<cv::Mat>& inputImages, const std::string& camera_id) {
|
|
|
|
|
|
if (inputImages.empty()) {
|
|
|
|
|
|
_logger.LogError("ANSRTYOLO::DetectObjectsBatch", "Empty input images vector", __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Auto-split if batch exceeds engine capacity
|
|
|
|
|
|
const int maxBatch = m_options.maxBatchSize > 0 ? m_options.maxBatchSize : 1;
|
|
|
|
|
|
if (static_cast<int>(inputImages.size()) > maxBatch && maxBatch > 0) {
|
|
|
|
|
|
const size_t numImages = inputImages.size();
|
|
|
|
|
|
std::vector<std::vector<Object>> allResults;
|
|
|
|
|
|
allResults.reserve(numImages);
|
|
|
|
|
|
for (size_t start = 0; start < numImages; start += static_cast<size_t>(maxBatch)) {
|
|
|
|
|
|
const size_t end = std::min(start + static_cast<size_t>(maxBatch), numImages);
|
|
|
|
|
|
std::vector<cv::Mat> chunk(inputImages.begin() + start, inputImages.begin() + end);
|
|
|
|
|
|
auto chunkResults = DetectObjectsBatch(chunk, camera_id);
|
|
|
|
|
|
if (chunkResults.size() == chunk.size()) {
|
|
|
|
|
|
for (auto& r : chunkResults) allResults.push_back(std::move(r));
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
_logger.LogError("ANSRTYOLO::DetectObjectsBatch",
|
|
|
|
|
|
"Chunk returned " + std::to_string(chunkResults.size()) +
|
|
|
|
|
|
" results, expected " + std::to_string(chunk.size()), __FILE__, __LINE__);
|
|
|
|
|
|
for (auto& r : chunkResults) allResults.push_back(std::move(r));
|
|
|
|
|
|
for (size_t pad = chunkResults.size(); pad < chunk.size(); ++pad)
|
|
|
|
|
|
allResults.push_back({});
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return allResults;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
// --- Debug timer helper ---
|
|
|
|
|
|
using Clock = std::chrono::steady_clock;
|
|
|
|
|
|
const bool dbg = _debugFlag;
|
|
|
|
|
|
auto t0 = dbg ? Clock::now() : Clock::time_point{};
|
|
|
|
|
|
auto tPrev = t0;
|
|
|
|
|
|
auto elapsed = [&]() -> double {
|
|
|
|
|
|
auto now = Clock::now();
|
|
|
|
|
|
double ms = std::chrono::duration<double, std::milli>(now - tPrev).count();
|
|
|
|
|
|
tPrev = now;
|
|
|
|
|
|
return ms;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// Ensure correct GPU context for preprocessing (multi-GPU safety)
|
|
|
|
|
|
if (m_trtEngine) {
|
|
|
|
|
|
m_trtEngine->setDeviceContext();
|
|
|
|
|
|
}
|
|
|
|
|
|
double msSetDevice = dbg ? elapsed() : 0;
|
|
|
|
|
|
|
|
|
|
|
|
// CUDA context health check (same as DetectObjects)
|
|
|
|
|
|
if (!m_nv12Helper.isCudaContextHealthy(_logger, "ANSRTYOLO")) {
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const size_t realCount = inputImages.size();
|
|
|
|
|
|
|
|
|
|
|
|
// Pad batch to next power-of-2
|
|
|
|
|
|
size_t paddedCount = 1;
|
|
|
|
|
|
while (paddedCount < realCount) paddedCount *= 2;
|
|
|
|
|
|
paddedCount = std::min(paddedCount, static_cast<size_t>(maxBatch));
|
|
|
|
|
|
|
|
|
|
|
|
const std::vector<cv::Mat>* batchPtr = &inputImages;
|
|
|
|
|
|
std::vector<cv::Mat> paddedImages;
|
|
|
|
|
|
if (paddedCount > realCount) {
|
|
|
|
|
|
paddedImages.reserve(paddedCount);
|
|
|
|
|
|
paddedImages.insert(paddedImages.end(), inputImages.begin(), inputImages.end());
|
|
|
|
|
|
for (size_t p = realCount; p < paddedCount; ++p)
|
|
|
|
|
|
paddedImages.push_back(inputImages.back());
|
|
|
|
|
|
batchPtr = &paddedImages;
|
|
|
|
|
|
}
|
|
|
|
|
|
double msPad = dbg ? elapsed() : 0;
|
|
|
|
|
|
|
|
|
|
|
|
BatchMetadata metadata;
|
|
|
|
|
|
const auto inputs = PreprocessBatch(*batchPtr, metadata);
|
|
|
|
|
|
double msPreprocess = dbg ? elapsed() : 0;
|
|
|
|
|
|
if (inputs.empty() || inputs[0].empty()) {
|
|
|
|
|
|
_logger.LogWarn("ANSRTYOLO::DetectObjectsBatch", "Skipped: preprocessing failed", __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Check for prior CUDA errors before inference.
|
|
|
|
|
|
cudaError_t priorErr = cudaGetLastError();
|
|
|
|
|
|
if (priorErr != cudaSuccess) {
|
|
|
|
|
|
_logger.LogWarn("ANSRTYOLO::DetectObjectsBatch",
|
|
|
|
|
|
std::string("Cleared prior CUDA error before inference: ")
|
|
|
|
|
|
+ cudaGetErrorString(priorErr),
|
|
|
|
|
|
__FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<std::vector<std::vector<float>>> featureVectors;
|
|
|
|
|
|
auto succ = m_trtEngine->runInference(inputs, featureVectors);
|
|
|
|
|
|
if (!succ) {
|
|
|
|
|
|
cudaError_t postErr = cudaPeekAtLastError();
|
|
|
|
|
|
std::string detail = "runInference returned false, batchSize="
|
|
|
|
|
|
+ std::to_string(inputs[0].size());
|
|
|
|
|
|
if (postErr != cudaSuccess) {
|
|
|
|
|
|
detail += ", CUDA error: ";
|
|
|
|
|
|
detail += cudaGetErrorString(postErr);
|
|
|
|
|
|
}
|
|
|
|
|
|
_logger.LogError("ANSRTYOLO::DetectObjectsBatch", detail, __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
double msInference = dbg ? elapsed() : 0;
|
|
|
|
|
|
|
|
|
|
|
|
if (featureVectors.size() != paddedCount) {
|
|
|
|
|
|
_logger.LogError("ANSRTYOLO::DetectObjectsBatch", "Output batch size mismatch", __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
featureVectors.resize(realCount);
|
|
|
|
|
|
|
|
|
|
|
|
const auto& outputDims = m_trtEngine->getOutputDims();
|
|
|
|
|
|
const size_t numOutputs = outputDims.size();
|
|
|
|
|
|
const size_t numBatch = featureVectors.size();
|
|
|
|
|
|
|
|
|
|
|
|
// Determine task type once (same model for all images in batch)
|
|
|
|
|
|
int dim1 = outputDims[0].d[1];
|
|
|
|
|
|
int dim2 = outputDims[0].d[2];
|
|
|
|
|
|
int nc = static_cast<int>(_classes.size());
|
|
|
|
|
|
|
|
|
|
|
|
enum class TaskType { DetLegacy, DetE2E, OBBLegacy, OBBE2E,
|
|
|
|
|
|
SegLegacy, SegE2E, PoseLegacy, PoseE2E, Classify };
|
|
|
|
|
|
TaskType taskType = TaskType::DetLegacy; // default
|
|
|
|
|
|
|
|
|
|
|
|
// E2E: dim1 > dim2 (e.g. [B,300,6]); Legacy: dim1 < dim2 (e.g. [B,84,8400])
|
|
|
|
|
|
const bool isEndToEnd = (dim1 > dim2) || (dim2 <= 20);
|
|
|
|
|
|
|
|
|
|
|
|
if (numOutputs >= 2) {
|
|
|
|
|
|
taskType = isEndToEnd ? TaskType::SegE2E : TaskType::SegLegacy;
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (outputDims[0].nbDims <= 2) {
|
|
|
|
|
|
taskType = TaskType::Classify;
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (isEndToEnd) {
|
|
|
|
|
|
if (dim2 == 6) taskType = TaskType::DetE2E;
|
|
|
|
|
|
else if (dim2 == 7) taskType = TaskType::OBBE2E;
|
|
|
|
|
|
else if (dim2 > 7 && (dim2-6) % 3 == 0) taskType = TaskType::PoseE2E;
|
|
|
|
|
|
else taskType = TaskType::DetE2E;
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
int extra = dim1 - 4;
|
|
|
|
|
|
bool routed = false;
|
|
|
|
|
|
|
|
|
|
|
|
// Try class-list-based routing first (only if class count fits within tensor)
|
|
|
|
|
|
if (nc > 0 && nc <= extra) {
|
|
|
|
|
|
if (extra == nc) { taskType = TaskType::DetLegacy; routed = true; }
|
|
|
|
|
|
else if (extra == nc + 1) { taskType = TaskType::OBBLegacy; routed = true; }
|
|
|
|
|
|
else if ((extra-nc) % 3 == 0 && (extra-nc) >= 3) { taskType = TaskType::PoseLegacy; routed = true; }
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Fallback: probe last channel for angle values to detect OBB
|
|
|
|
|
|
if (!routed && extra >= 2 && !featureVectors.empty() && !featureVectors[0].empty() && !featureVectors[0][0].empty()) {
|
|
|
|
|
|
// Transpose first image's feature vector and probe last column
|
|
|
|
|
|
cv::Mat raw(dim1, dim2, CV_32F, const_cast<float*>(featureVectors[0][0].data()));
|
|
|
|
|
|
cv::Mat probe;
|
|
|
|
|
|
cv::transpose(raw, probe); // [dim2, dim1]
|
|
|
|
|
|
int lastCol = dim1 - 1;
|
|
|
|
|
|
int numSamples = std::min(dim2, 100);
|
|
|
|
|
|
int angleCount = 0;
|
|
|
|
|
|
for (int s = 0; s < numSamples; ++s) {
|
|
|
|
|
|
float v = probe.at<float>(s, lastCol);
|
|
|
|
|
|
if (v >= -3.15f && v <= 3.15f) ++angleCount;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (angleCount > numSamples * 8 / 10) {
|
|
|
|
|
|
taskType = TaskType::OBBLegacy;
|
|
|
|
|
|
routed = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (!routed) {
|
|
|
|
|
|
if (dim1 == 56) taskType = TaskType::PoseLegacy;
|
|
|
|
|
|
else taskType = TaskType::DetLegacy;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Process each image in parallel
|
|
|
|
|
|
std::vector<std::vector<Object>> batchDetections(numBatch);
|
|
|
|
|
|
std::vector<std::future<std::vector<Object>>> postFutures;
|
|
|
|
|
|
postFutures.reserve(numBatch);
|
|
|
|
|
|
|
|
|
|
|
|
for (size_t batchIdx = 0; batchIdx < numBatch; ++batchIdx) {
|
|
|
|
|
|
const auto& batchOutput = featureVectors[batchIdx];
|
|
|
|
|
|
|
|
|
|
|
|
ImageMetadata imgMeta;
|
|
|
|
|
|
imgMeta.ratio = metadata.ratios[batchIdx];
|
|
|
|
|
|
imgMeta.imgWidth = static_cast<float>(metadata.imgWidths[batchIdx]);
|
|
|
|
|
|
imgMeta.imgHeight = static_cast<float>(metadata.imgHeights[batchIdx]);
|
|
|
|
|
|
|
|
|
|
|
|
switch (taskType) {
|
|
|
|
|
|
case TaskType::SegLegacy:
|
|
|
|
|
|
case TaskType::SegE2E: {
|
|
|
|
|
|
std::vector<std::vector<float>> fv2d;
|
|
|
|
|
|
fv2d.reserve(batchOutput.size());
|
|
|
|
|
|
for (const auto& out : batchOutput) fv2d.push_back(out);
|
|
|
|
|
|
if (taskType == TaskType::SegE2E) {
|
|
|
|
|
|
postFutures.push_back(std::async(std::launch::async,
|
|
|
|
|
|
[this, fv = std::move(fv2d), cid = camera_id, m = imgMeta]() mutable {
|
|
|
|
|
|
return PostprocessSegE2E(fv, cid, m);
|
|
|
|
|
|
}));
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
postFutures.push_back(std::async(std::launch::async,
|
|
|
|
|
|
[this, fv = std::move(fv2d), cid = camera_id, m = imgMeta]() mutable {
|
|
|
|
|
|
return PostprocessSegmentation(fv, cid, m);
|
|
|
|
|
|
}));
|
|
|
|
|
|
}
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
case TaskType::Classify: {
|
|
|
|
|
|
std::vector<float> fv = batchOutput.empty() ? std::vector<float>{} : batchOutput[0];
|
|
|
|
|
|
postFutures.push_back(std::async(std::launch::async,
|
|
|
|
|
|
[this, fv = std::move(fv), cid = camera_id, m = imgMeta]() mutable {
|
|
|
|
|
|
return PostprocessClassify(fv, cid, m);
|
|
|
|
|
|
}));
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
default: {
|
|
|
|
|
|
std::vector<float> fv = batchOutput.empty() ? std::vector<float>{} : batchOutput[0];
|
|
|
|
|
|
switch (taskType) {
|
|
|
|
|
|
case TaskType::DetLegacy:
|
|
|
|
|
|
postFutures.push_back(std::async(std::launch::async,
|
|
|
|
|
|
[this, fv = std::move(fv), cid = camera_id, m = imgMeta]() mutable {
|
|
|
|
|
|
return PostprocessDetection(fv, cid, m);
|
|
|
|
|
|
}));
|
|
|
|
|
|
break;
|
|
|
|
|
|
case TaskType::DetE2E:
|
|
|
|
|
|
postFutures.push_back(std::async(std::launch::async,
|
|
|
|
|
|
[this, fv = std::move(fv), cid = camera_id, m = imgMeta]() mutable {
|
|
|
|
|
|
return PostprocessDetectionE2E(fv, cid, m);
|
|
|
|
|
|
}));
|
|
|
|
|
|
break;
|
|
|
|
|
|
case TaskType::OBBLegacy:
|
|
|
|
|
|
postFutures.push_back(std::async(std::launch::async,
|
|
|
|
|
|
[this, fv = std::move(fv), cid = camera_id, m = imgMeta]() mutable {
|
|
|
|
|
|
return PostprocessOBB(fv, cid, m);
|
|
|
|
|
|
}));
|
|
|
|
|
|
break;
|
|
|
|
|
|
case TaskType::OBBE2E:
|
|
|
|
|
|
postFutures.push_back(std::async(std::launch::async,
|
|
|
|
|
|
[this, fv = std::move(fv), cid = camera_id, m = imgMeta]() mutable {
|
|
|
|
|
|
return PostprocessOBBE2E(fv, cid, m);
|
|
|
|
|
|
}));
|
|
|
|
|
|
break;
|
|
|
|
|
|
case TaskType::PoseLegacy:
|
|
|
|
|
|
postFutures.push_back(std::async(std::launch::async,
|
|
|
|
|
|
[this, fv = std::move(fv), cid = camera_id, m = imgMeta]() mutable {
|
|
|
|
|
|
return PostprocessPose(fv, cid, m);
|
|
|
|
|
|
}));
|
|
|
|
|
|
break;
|
|
|
|
|
|
case TaskType::PoseE2E:
|
|
|
|
|
|
postFutures.push_back(std::async(std::launch::async,
|
|
|
|
|
|
[this, fv = std::move(fv), cid = camera_id, m = imgMeta]() mutable {
|
|
|
|
|
|
return PostprocessPoseE2E(fv, cid, m);
|
|
|
|
|
|
}));
|
|
|
|
|
|
break;
|
|
|
|
|
|
default:
|
|
|
|
|
|
postFutures.push_back(std::async(std::launch::async,
|
|
|
|
|
|
[this, fv = std::move(fv), cid = camera_id, m = imgMeta]() mutable {
|
|
|
|
|
|
return PostprocessDetection(fv, cid, m);
|
|
|
|
|
|
}));
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Gather results
|
|
|
|
|
|
for (size_t i = 0; i < numBatch; ++i)
|
|
|
|
|
|
batchDetections[i] = postFutures[i].get();
|
|
|
|
|
|
|
|
|
|
|
|
// Apply tracker per frame (skip for classification models)
|
|
|
|
|
|
if (_trackerEnabled && taskType != TaskType::Classify) {
|
|
|
|
|
|
for (auto& results : batchDetections) {
|
|
|
|
|
|
if (!results.empty()) {
|
|
|
|
|
|
results = ApplyTracking(results, camera_id);
|
|
|
|
|
|
if (_stabilizationEnabled) {
|
|
|
|
|
|
results = StabilizeDetections(results, camera_id);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (dbg) {
|
|
|
|
|
|
double msPostprocess = elapsed();
|
|
|
|
|
|
double msTotal = std::chrono::duration<double, std::milli>(Clock::now() - t0).count();
|
|
|
|
|
|
_logger.LogInfo("ANSRTYOLO::DetectObjectsBatch",
|
|
|
|
|
|
"[DEBUG] " + camera_id +
|
|
|
|
|
|
" batch=" + std::to_string(realCount) +
|
|
|
|
|
|
" | SetDev=" + std::to_string(msSetDevice) +
|
|
|
|
|
|
"ms Pad=" + std::to_string(msPad) +
|
|
|
|
|
|
"ms Preproc=" + std::to_string(msPreprocess) +
|
|
|
|
|
|
"ms Inf=" + std::to_string(msInference) +
|
|
|
|
|
|
"ms Postproc=" + std::to_string(msPostprocess) +
|
|
|
|
|
|
"ms TOTAL=" + std::to_string(msTotal) + "ms",
|
|
|
|
|
|
__FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return batchDetections;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (const std::exception& e) {
|
|
|
|
|
|
_logger.LogFatal("ANSRTYOLO::DetectObjectsBatch", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
} // namespace ANSCENTER
|