2026-03-28 16:54:11 +11:00
|
|
|
|
#include "ANSYOLO12OD.h"
|
|
|
|
|
|
#include "EPLoader.h"
|
|
|
|
|
|
#ifdef USEONNXOV
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace ANSCENTER {
|
|
|
|
|
|
bool YOLO12OD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
|
|
|
|
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
|
|
|
|
if (!ANSODBase::OptimizeModel(fp16, optimizedModelFolder)) {
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (FileExist(_modelFilePath)) {
|
|
|
|
|
|
optimizedModelFolder = GetParentFolder(_modelFilePath);
|
|
|
|
|
|
this->_logger.LogDebug("YOLO12OD::OptimizeModel", "This model is optimized. No need other optimization.", __FILE__, __LINE__);
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
optimizedModelFolder = "";
|
|
|
|
|
|
this->_logger.LogFatal("YOLO12OD::OptimizeModel", "This model is not exist. Please check the model path again.", __FILE__, __LINE__);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
bool YOLO12OD::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) {
|
|
|
|
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
2026-04-13 19:48:32 +10:00
|
|
|
|
ModelLoadingGuard mlg(_modelLoading);
|
2026-03-28 16:54:11 +11:00
|
|
|
|
try {
|
|
|
|
|
|
bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword);
|
|
|
|
|
|
if (!result) return false;
|
|
|
|
|
|
_modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION;
|
|
|
|
|
|
_modelConfig.inpHeight = 640;
|
|
|
|
|
|
_modelConfig.inpWidth = 640;
|
|
|
|
|
|
if (_modelConfig.modelMNSThreshold < 0.2)
|
|
|
|
|
|
_modelConfig.modelMNSThreshold = 0.5;
|
|
|
|
|
|
if (_modelConfig.modelConfThreshold < 0.2)
|
|
|
|
|
|
_modelConfig.modelConfThreshold = 0.5;
|
|
|
|
|
|
// 0. Check if the configuration file exist
|
|
|
|
|
|
if (FileExist(_modelConfigFile)) {
|
|
|
|
|
|
ModelType modelType;
|
|
|
|
|
|
std::vector<int> inputShape;
|
|
|
|
|
|
_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
|
|
|
|
|
|
if (inputShape.size() == 2) {
|
|
|
|
|
|
if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
|
|
|
|
|
|
if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
|
|
|
|
|
|
}
|
|
|
|
|
|
_modelConfig.modelType = modelType;
|
|
|
|
|
|
_modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx");
|
|
|
|
|
|
this->_logger.LogDebug("YOLO12OD::Initialize. Loading YoloV12 weight", _modelFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
if (!FileExist(_modelFilePath)) {
|
|
|
|
|
|
this->_logger.LogError("YOLO12OD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else {// This is old version of model zip file
|
|
|
|
|
|
std::string onnxfile = CreateFilePath(_modelFolder, "train_last.onnx");
|
|
|
|
|
|
if (std::filesystem::exists(onnxfile)) {
|
|
|
|
|
|
_modelFilePath = onnxfile;
|
|
|
|
|
|
_classFilePath = CreateFilePath(_modelFolder, "classes.names");
|
|
|
|
|
|
this->_logger.LogDebug("YOLO12OD::Initialize. Loading YoloV12 weight", _modelFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
this->_logger.LogError("YOLO12OD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (FileExist(_classFilePath))
|
|
|
|
|
|
{
|
|
|
|
|
|
this->_logger.LogDebug("YOLO12OD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
LoadClassesFromFile();
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
this->_logger.LogDebug("YOLO12OD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
LoadClassesFromString();
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
_isInitialized = loadModel(_modelFilePath, true);//Assume that GPU is available;
|
|
|
|
|
|
return _isInitialized;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
this->_logger.LogFatal("YOLO12OD::LoadModel", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
bool YOLO12OD::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName,std::string className, const std::string& modelFolder, std::string& labelMap) {
|
|
|
|
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
2026-04-13 19:48:32 +10:00
|
|
|
|
ModelLoadingGuard mlg(_modelLoading);
|
2026-03-28 16:54:11 +11:00
|
|
|
|
try {
|
|
|
|
|
|
bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig, modelName, className,modelFolder, labelMap);
|
|
|
|
|
|
if (!result) return false;
|
|
|
|
|
|
std::string _modelName = modelName;
|
|
|
|
|
|
if (_modelName.empty()) {
|
|
|
|
|
|
_modelName = "train_last";
|
|
|
|
|
|
}
|
|
|
|
|
|
std::string weightFileName = _modelName + ".weights";
|
|
|
|
|
|
_modelConfig = modelConfig;
|
|
|
|
|
|
_modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION;
|
|
|
|
|
|
_modelConfig.inpHeight = 640;
|
|
|
|
|
|
_modelConfig.inpWidth = 640;
|
|
|
|
|
|
if (_modelConfig.modelMNSThreshold < 0.2)
|
|
|
|
|
|
_modelConfig.modelMNSThreshold = 0.5;
|
|
|
|
|
|
if (_modelConfig.modelConfThreshold < 0.2)
|
|
|
|
|
|
_modelConfig.modelConfThreshold = 0.5;
|
|
|
|
|
|
// 0. Check if the configuration file exist
|
|
|
|
|
|
if (FileExist(_modelConfigFile)) {
|
|
|
|
|
|
ModelType modelType;
|
|
|
|
|
|
std::vector<int> inputShape;
|
|
|
|
|
|
_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
|
|
|
|
|
|
if (inputShape.size() == 2) {
|
|
|
|
|
|
if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
|
|
|
|
|
|
if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
|
|
|
|
|
|
}
|
|
|
|
|
|
_modelConfig.modelType = modelType;
|
|
|
|
|
|
weightFileName = _modelName + ".onnx";
|
|
|
|
|
|
_modelFilePath = CreateFilePath(_modelFolder, weightFileName);
|
|
|
|
|
|
this->_logger.LogDebug("YOLO12OD::Initialize. Loading YoloV12 weight", _modelFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
if (!FileExist(_modelFilePath)) {
|
|
|
|
|
|
this->_logger.LogError("YOLO12OD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else {// This is old version of model zip file
|
|
|
|
|
|
weightFileName = _modelName + ".onnx";
|
|
|
|
|
|
std::string onnxfile = CreateFilePath(_modelFolder, weightFileName);
|
|
|
|
|
|
if (std::filesystem::exists(onnxfile)) {
|
|
|
|
|
|
_modelFilePath = onnxfile;
|
|
|
|
|
|
_classFilePath = CreateFilePath(_modelFolder, className);
|
|
|
|
|
|
this->_logger.LogDebug("YOLO12OD::Initialize. Loading YoloV8 weight", _modelFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
this->_logger.LogError("YOLO12OD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
std::ifstream isValidFileName(_classFilePath);
|
|
|
|
|
|
if (!isValidFileName)
|
|
|
|
|
|
{
|
|
|
|
|
|
this->_logger.LogDebug("YOLO12OD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
LoadClassesFromString();
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
this->_logger.LogDebug("YOLO12OD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
LoadClassesFromFile();
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 1. Load labelMap and engine
|
|
|
|
|
|
labelMap.clear();
|
|
|
|
|
|
if (!_classes.empty())
|
|
|
|
|
|
labelMap = VectorToCommaSeparatedString(_classes);
|
|
|
|
|
|
_isInitialized = loadModel(_modelFilePath, true);//Assume that GPU is available;
|
|
|
|
|
|
return _isInitialized;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
this->_logger.LogFatal("YOLO12OD::LoadModel", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
bool YOLO12OD::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) {
|
|
|
|
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
2026-04-13 19:48:32 +10:00
|
|
|
|
ModelLoadingGuard mlg(_modelLoading);
|
2026-03-28 16:54:11 +11:00
|
|
|
|
try {
|
|
|
|
|
|
bool result = ANSODBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
|
|
|
|
|
|
if (!result) return false;
|
|
|
|
|
|
// Parsing for YOLO only here
|
|
|
|
|
|
_modelConfig = modelConfig;
|
|
|
|
|
|
_modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION;
|
|
|
|
|
|
_modelConfig.inpHeight = 640;
|
|
|
|
|
|
_modelConfig.inpWidth = 640;
|
|
|
|
|
|
if (_modelConfig.modelMNSThreshold < 0.2)
|
|
|
|
|
|
_modelConfig.modelMNSThreshold = 0.5;
|
|
|
|
|
|
if (_modelConfig.modelConfThreshold < 0.2)
|
|
|
|
|
|
_modelConfig.modelConfThreshold = 0.5;
|
|
|
|
|
|
// 0. Check if the configuration file exist
|
|
|
|
|
|
if (FileExist(_modelConfigFile)) {
|
|
|
|
|
|
ModelType modelType;
|
|
|
|
|
|
std::vector<int> inputShape;
|
|
|
|
|
|
_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
|
|
|
|
|
|
if (inputShape.size() == 2) {
|
|
|
|
|
|
if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
|
|
|
|
|
|
if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
|
|
|
|
|
|
}
|
|
|
|
|
|
_modelConfig.modelType = modelType;
|
|
|
|
|
|
_modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx");
|
|
|
|
|
|
this->_logger.LogDebug("YOLO12OD::Initialize. Loading YoloV8 weight", _modelFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
if (!FileExist(_modelFilePath)) {
|
|
|
|
|
|
this->_logger.LogError("YOLO12OD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else {// This is old version of model zip file
|
|
|
|
|
|
std::string onnxfile = CreateFilePath(_modelFolder, "train_last.onnx");
|
|
|
|
|
|
if (std::filesystem::exists(onnxfile)) {
|
|
|
|
|
|
// This is the yovoV5 or yolov8 format
|
|
|
|
|
|
_modelFilePath = onnxfile;
|
|
|
|
|
|
_classFilePath = CreateFilePath(_modelFolder, "classes.names");
|
|
|
|
|
|
this->_logger.LogDebug("YOLO12OD::Initialize. Loading YoloV8/Yolov5 weight", _modelFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
this->_logger.LogError("YOLO12OD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (FileExist(_classFilePath))
|
|
|
|
|
|
{
|
|
|
|
|
|
this->_logger.LogDebug("YOLO12OD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
LoadClassesFromFile();
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
this->_logger.LogDebug("YOLO12OD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__);
|
|
|
|
|
|
LoadClassesFromString();
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 1. Load labelMap and engine
|
|
|
|
|
|
labelMap.clear();
|
|
|
|
|
|
if (!_classes.empty())
|
|
|
|
|
|
labelMap = VectorToCommaSeparatedString(_classes);
|
|
|
|
|
|
_isInitialized = loadModel(_modelFilePath, true);//Assume that GPU is available;
|
|
|
|
|
|
return _isInitialized;
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
this->_logger.LogFatal("YOLO12OD::Initialize", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
std::vector<Object> YOLO12OD::RunInference(const cv::Mat& input) {
|
|
|
|
|
|
return RunInference(input, "CustomCam");
|
|
|
|
|
|
}
|
|
|
|
|
|
std::vector<Object> YOLO12OD::RunInference(const cv::Mat& input,const std::string& camera_id)
|
|
|
|
|
|
{
|
2026-04-13 19:48:32 +10:00
|
|
|
|
if (!PreInferenceCheck("YOLO12OD::RunInference")) return {};
|
2026-03-28 16:54:11 +11:00
|
|
|
|
try {
|
|
|
|
|
|
// Validation
|
|
|
|
|
|
if (!_licenseValid) {
|
|
|
|
|
|
_logger.LogError("YOLO12OD::RunInference", "Invalid License",
|
|
|
|
|
|
__FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (!_isInitialized) {
|
|
|
|
|
|
_logger.LogError("YOLO12OD::RunInference", "Model is not initialized",
|
|
|
|
|
|
__FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (input.empty() || input.cols < 10 || input.rows < 10) {
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
auto ret = detect(input, _modelConfig.detectionScoreThreshold,
|
|
|
|
|
|
_modelConfig.modelMNSThreshold);
|
|
|
|
|
|
if (_trackerEnabled) {
|
|
|
|
|
|
ret = ApplyTracking(ret, camera_id);
|
|
|
|
|
|
if (_stabilizationEnabled) ret = StabilizeDetections(ret, camera_id);
|
|
|
|
|
|
}
|
|
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (const std::exception& e) {
|
|
|
|
|
|
_logger.LogFatal("YOLO12OD::RunInference", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
YOLO12OD::~YOLO12OD() {
|
|
|
|
|
|
try {
|
|
|
|
|
|
this->_logger.LogDebug("YOLO12OD::~YOLO12OD()", "Release YOLO12OD ", __FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
this->_logger.LogFatal("YOLO12OD::~YOLO12OD()", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
bool YOLO12OD::Destroy() {
|
|
|
|
|
|
try {
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (std::exception& e) {
|
|
|
|
|
|
this->_logger.LogFatal("YOLO12OD::Destroy()", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
bool YOLO12OD::loadModel(const std::string& modelPath, bool useGPU)
|
|
|
|
|
|
{
|
|
|
|
|
|
try {
|
|
|
|
|
|
const auto& ep = ANSCENTER::EPLoader::Current();
|
|
|
|
|
|
if (Ort::Global<void>::api_ == nullptr)
|
|
|
|
|
|
Ort::InitApi(static_cast<const OrtApi*>(EPLoader::GetOrtApiRaw()));
|
|
|
|
|
|
std::cout << "[YOLO12OD] EP ready: "
|
|
|
|
|
|
<< ANSCENTER::EPLoader::EngineTypeName(ep.type) << std::endl;
|
|
|
|
|
|
|
|
|
|
|
|
env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "ONNX_DETECTION");
|
|
|
|
|
|
sessionOptions = Ort::SessionOptions();
|
|
|
|
|
|
sessionOptions.SetIntraOpNumThreads(
|
|
|
|
|
|
std::min(6, static_cast<int>(std::thread::hardware_concurrency())));
|
|
|
|
|
|
sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
|
|
|
|
|
|
|
|
|
|
|
|
// ── Log available providers ─────────────────────────────────────────
|
|
|
|
|
|
std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
|
|
|
|
|
|
std::cout << "Available Execution Providers:" << std::endl;
|
|
|
|
|
|
for (const auto& p : availableProviders)
|
|
|
|
|
|
std::cout << " - " << p << std::endl;
|
|
|
|
|
|
|
|
|
|
|
|
// ── Attach EP based on runtime-detected hardware ────────────────────
|
|
|
|
|
|
if (useGPU) {
|
|
|
|
|
|
bool attached = false;
|
|
|
|
|
|
|
|
|
|
|
|
switch (ep.type) {
|
|
|
|
|
|
|
|
|
|
|
|
case ANSCENTER::EngineType::NVIDIA_GPU: {
|
|
|
|
|
|
auto it = std::find(availableProviders.begin(),
|
|
|
|
|
|
availableProviders.end(), "CUDAExecutionProvider");
|
|
|
|
|
|
if (it == availableProviders.end()) {
|
|
|
|
|
|
this->_logger.LogError("YOLO12OD::loadModel", "CUDAExecutionProvider not in DLL — "
|
|
|
|
|
|
"check ep/cuda/ has the CUDA ORT build.", __FILE__, __LINE__);
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
try {
|
|
|
|
|
|
OrtCUDAProviderOptionsV2* cuda_options = nullptr;
|
|
|
|
|
|
Ort::GetApi().CreateCUDAProviderOptions(&cuda_options);
|
|
|
|
|
|
|
|
|
|
|
|
const char* keys[] = { "device_id" };
|
|
|
|
|
|
const char* values[] = { "0" };
|
|
|
|
|
|
Ort::GetApi().UpdateCUDAProviderOptions(cuda_options, keys, values, 1);
|
|
|
|
|
|
|
|
|
|
|
|
sessionOptions.AppendExecutionProvider_CUDA_V2(*cuda_options);
|
|
|
|
|
|
Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options);
|
|
|
|
|
|
|
|
|
|
|
|
std::cout << "[YOLO12OD] CUDA EP attached." << std::endl;
|
|
|
|
|
|
attached = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (const Ort::Exception& e) {
|
|
|
|
|
|
this->_logger.LogError("YOLO12OD::loadModel", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
case ANSCENTER::EngineType::AMD_GPU: {
|
|
|
|
|
|
auto it = std::find(availableProviders.begin(),
|
|
|
|
|
|
availableProviders.end(), "DmlExecutionProvider");
|
|
|
|
|
|
if (it == availableProviders.end()) {
|
|
|
|
|
|
this->_logger.LogError("YOLO12OD::loadModel", "DmlExecutionProvider not in DLL — "
|
|
|
|
|
|
"check ep/directml/ has the DirectML ORT build.", __FILE__, __LINE__);
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
try {
|
|
|
|
|
|
std::unordered_map<std::string, std::string> opts = { { "device_id", "0" } };
|
|
|
|
|
|
sessionOptions.AppendExecutionProvider("DML", opts);
|
|
|
|
|
|
std::cout << "[YOLO12OD] DirectML EP attached." << std::endl;
|
|
|
|
|
|
attached = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (const Ort::Exception& e) {
|
|
|
|
|
|
this->_logger.LogError("YOLO12OD::loadModel", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
case ANSCENTER::EngineType::OPENVINO_GPU: {
|
|
|
|
|
|
auto it = std::find(availableProviders.begin(),
|
|
|
|
|
|
availableProviders.end(), "OpenVINOExecutionProvider");
|
|
|
|
|
|
if (it == availableProviders.end()) {
|
|
|
|
|
|
this->_logger.LogError("YOLO12OD::loadModel", "OpenVINOExecutionProvider not in DLL — "
|
|
|
|
|
|
"check ep/openvino/ has the OpenVINO ORT build.", __FILE__, __LINE__);
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const std::string precision = "FP16";
|
|
|
|
|
|
const std::string numberOfThreads = "8";
|
|
|
|
|
|
const std::string numberOfStreams = "8";
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<std::unordered_map<std::string, std::string>> try_configs = {
|
|
|
|
|
|
{ {"device_type","AUTO:NPU,GPU"}, {"precision",precision},
|
|
|
|
|
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
|
|
|
|
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
|
|
|
|
|
{ {"device_type","GPU.0"}, {"precision",precision},
|
|
|
|
|
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
|
|
|
|
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
|
|
|
|
|
{ {"device_type","GPU.1"}, {"precision",precision},
|
|
|
|
|
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
|
|
|
|
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
|
|
|
|
|
{ {"device_type","AUTO:GPU,CPU"}, {"precision",precision},
|
|
|
|
|
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
|
|
|
|
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
for (const auto& config : try_configs) {
|
|
|
|
|
|
try {
|
|
|
|
|
|
sessionOptions.AppendExecutionProvider_OpenVINO_V2(config);
|
|
|
|
|
|
std::cout << "[YOLO12OD] OpenVINO EP attached ("
|
|
|
|
|
|
<< config.at("device_type") << ")." << std::endl;
|
|
|
|
|
|
attached = true;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (const Ort::Exception& e) {
|
|
|
|
|
|
this->_logger.LogError("YOLO12OD::loadModel", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (!attached)
|
|
|
|
|
|
std::cerr << "[YOLO12OD] OpenVINO EP: all device configs failed." << std::endl;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (!attached) {
|
|
|
|
|
|
std::cerr << "[YOLO12OD] No GPU EP attached — running on CPU." << std::endl;
|
|
|
|
|
|
this->_logger.LogFatal("YOLO12OD::loadModel", "GPU EP not attached. Running on CPU.", __FILE__, __LINE__);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
std::cout << "[YOLO12OD] Inference device: CPU (useGPU=false)" << std::endl;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ── Load model ──────────────────────────────────────────────────────
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
std::wstring w_modelPath(modelPath.begin(), modelPath.end());
|
|
|
|
|
|
session = Ort::Session(env, w_modelPath.c_str(), sessionOptions);
|
|
|
|
|
|
#else
|
|
|
|
|
|
session = Ort::Session(env, modelPath.c_str(), sessionOptions);
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
Ort::AllocatorWithDefaultOptions allocator;
|
|
|
|
|
|
|
|
|
|
|
|
// ── Input shape ─────────────────────────────────────────────────────
|
|
|
|
|
|
Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0);
|
|
|
|
|
|
std::vector<int64_t> inputTensorShapeVec =
|
|
|
|
|
|
inputTypeInfo.GetTensorTypeAndShapeInfo().GetShape();
|
|
|
|
|
|
isDynamicInputShape = (inputTensorShapeVec.size() >= 4) &&
|
|
|
|
|
|
(inputTensorShapeVec[2] == -1 && inputTensorShapeVec[3] == -1);
|
|
|
|
|
|
|
|
|
|
|
|
if (isDynamicInputShape)
|
|
|
|
|
|
std::cout << "Dynamic input shape detected." << std::endl;
|
|
|
|
|
|
for (auto shape : inputTensorShapeVec)
|
|
|
|
|
|
std::cout << "Input shape: " << shape << std::endl;
|
|
|
|
|
|
|
|
|
|
|
|
// ── Node names ──────────────────────────────────────────────────────
|
|
|
|
|
|
auto input_name = session.GetInputNameAllocated(0, allocator);
|
|
|
|
|
|
inputNodeNameAllocatedStrings.push_back(std::move(input_name));
|
|
|
|
|
|
inputNames.push_back(inputNodeNameAllocatedStrings.back().get());
|
|
|
|
|
|
|
|
|
|
|
|
auto output_name = session.GetOutputNameAllocated(0, allocator);
|
|
|
|
|
|
outputNodeNameAllocatedStrings.push_back(std::move(output_name));
|
|
|
|
|
|
outputNames.push_back(outputNodeNameAllocatedStrings.back().get());
|
|
|
|
|
|
|
|
|
|
|
|
// ── Input image size ────────────────────────────────────────────────
|
|
|
|
|
|
if (inputTensorShapeVec.size() >= 4) {
|
|
|
|
|
|
inputImageShape = cv::Size(
|
|
|
|
|
|
static_cast<int>(inputTensorShapeVec[3]),
|
|
|
|
|
|
static_cast<int>(inputTensorShapeVec[2]));
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
this->_logger.LogFatal("YOLO12OD::loadModel", "Invalid input tensor shape.", __FILE__, __LINE__);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
numInputNodes = session.GetInputCount();
|
|
|
|
|
|
numOutputNodes = session.GetOutputCount();
|
|
|
|
|
|
|
|
|
|
|
|
std::cout << "Model loaded successfully with "
|
|
|
|
|
|
<< numInputNodes << " input nodes and "
|
|
|
|
|
|
<< numOutputNodes << " output nodes." << std::endl;
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (const std::exception& e) {
|
|
|
|
|
|
this->_logger.LogFatal("YOLO12OD::loadModel", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cv::Mat YOLO12OD::preprocess(const cv::Mat& image, std::vector<float>& blob, std::vector<int64_t>& inputTensorShape) {
|
|
|
|
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
|
|
|
|
m_imgWidth = image.cols;
|
|
|
|
|
|
m_imgHeight = image.rows;
|
|
|
|
|
|
try {
|
|
|
|
|
|
cv::Mat processedImage;
|
|
|
|
|
|
|
|
|
|
|
|
// Handle grayscale images by converting them to 3-channel BGR
|
|
|
|
|
|
if (image.channels() == 1) {
|
|
|
|
|
|
cv::cvtColor(image, processedImage, cv::COLOR_GRAY2BGR);
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
processedImage = image.clone();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
cv::Mat resizedImage;
|
|
|
|
|
|
// Resize and pad the image using letterBox utility
|
|
|
|
|
|
letterBox(processedImage, resizedImage, inputImageShape, cv::Scalar(114, 114, 114), isDynamicInputShape, false, true, 32);
|
|
|
|
|
|
|
|
|
|
|
|
// Update input tensor shape based on resized image dimensions
|
|
|
|
|
|
inputTensorShape[2] = resizedImage.rows;
|
|
|
|
|
|
inputTensorShape[3] = resizedImage.cols;
|
|
|
|
|
|
|
|
|
|
|
|
// Convert image to float and normalize to [0, 1]
|
|
|
|
|
|
resizedImage.convertTo(resizedImage, CV_32FC3, 1 / 255.0f);
|
|
|
|
|
|
|
|
|
|
|
|
// Allocate memory for the image blob in CHW format
|
|
|
|
|
|
size_t totalSize = resizedImage.cols * resizedImage.rows * resizedImage.channels();
|
|
|
|
|
|
blob.resize(totalSize);
|
|
|
|
|
|
|
|
|
|
|
|
// Split the image into separate channels and store in the blob
|
|
|
|
|
|
std::vector<cv::Mat> chw(resizedImage.channels());
|
|
|
|
|
|
for (int i = 0; i < resizedImage.channels(); ++i) {
|
|
|
|
|
|
chw[i] = cv::Mat(resizedImage.rows, resizedImage.cols, CV_32FC1, blob.data() + i * resizedImage.cols * resizedImage.rows);
|
|
|
|
|
|
}
|
|
|
|
|
|
cv::split(resizedImage, chw); // Split channels into the blob
|
|
|
|
|
|
|
|
|
|
|
|
return resizedImage;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (const std::exception& e) {
|
|
|
|
|
|
this->_logger.LogFatal("YOLO12OD::preprocess", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return cv::Mat();
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
std::vector<Object> YOLO12OD::postprocess(const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
|
|
|
|
|
|
const std::vector<Ort::Value>& outputTensors,
|
|
|
|
|
|
float confThreshold, float iouThreshold)
|
|
|
|
|
|
{
|
|
|
|
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<Object> detections;
|
|
|
|
|
|
const float* rawOutput = outputTensors[0].GetTensorData<float>(); // Extract raw output data from the first output tensor
|
|
|
|
|
|
const std::vector<int64_t> outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
|
|
|
|
|
|
|
|
|
|
|
|
// Determine the number of features and detections
|
|
|
|
|
|
const size_t num_features = outputShape[1];
|
|
|
|
|
|
const size_t num_detections = outputShape[2];
|
|
|
|
|
|
|
|
|
|
|
|
// Early exit if no detections
|
|
|
|
|
|
if (num_detections == 0) {
|
|
|
|
|
|
return detections;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Calculate number of classes based on output shape
|
|
|
|
|
|
const int numClasses = static_cast<int>(num_features) - 4;
|
|
|
|
|
|
if (numClasses <= 0) {
|
|
|
|
|
|
// Invalid number of classes
|
|
|
|
|
|
return detections;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Reserve memory for efficient appending
|
|
|
|
|
|
std::vector<BoundingBox> boxes;
|
|
|
|
|
|
boxes.reserve(num_detections);
|
|
|
|
|
|
std::vector<float> confs;
|
|
|
|
|
|
confs.reserve(num_detections);
|
|
|
|
|
|
std::vector<int> classIds;
|
|
|
|
|
|
classIds.reserve(num_detections);
|
|
|
|
|
|
std::vector<BoundingBox> nms_boxes;
|
|
|
|
|
|
nms_boxes.reserve(num_detections);
|
|
|
|
|
|
|
|
|
|
|
|
// Constants for indexing
|
|
|
|
|
|
const float* ptr = rawOutput;
|
|
|
|
|
|
|
|
|
|
|
|
for (size_t d = 0; d < num_detections; ++d) {
|
|
|
|
|
|
// Extract bounding box coordinates (center x, center y, width, height)
|
|
|
|
|
|
float centerX = ptr[0 * num_detections + d];
|
|
|
|
|
|
float centerY = ptr[1 * num_detections + d];
|
|
|
|
|
|
float width = ptr[2 * num_detections + d];
|
|
|
|
|
|
float height = ptr[3 * num_detections + d];
|
|
|
|
|
|
|
|
|
|
|
|
// Find class with the highest confidence score
|
|
|
|
|
|
int classId = -1;
|
|
|
|
|
|
float maxScore = -FLT_MAX;
|
|
|
|
|
|
for (int c = 0; c < numClasses; ++c) {
|
|
|
|
|
|
const float score = ptr[d + (4 + c) * num_detections];
|
|
|
|
|
|
if (score > maxScore) {
|
|
|
|
|
|
maxScore = score;
|
|
|
|
|
|
classId = c;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Proceed only if confidence exceeds threshold
|
|
|
|
|
|
if (maxScore > confThreshold) {
|
|
|
|
|
|
// Convert center coordinates to top-left (x1, y1)
|
|
|
|
|
|
float left = centerX - width / 2.0f;
|
|
|
|
|
|
float top = centerY - height / 2.0f;
|
|
|
|
|
|
|
|
|
|
|
|
// Scale to original image size
|
|
|
|
|
|
BoundingBox scaledBox = scaleCoords(
|
|
|
|
|
|
resizedImageShape,
|
|
|
|
|
|
BoundingBox(left, top, width, height),
|
|
|
|
|
|
originalImageSize,
|
|
|
|
|
|
true
|
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
// Round coordinates for integer pixel positions
|
|
|
|
|
|
BoundingBox roundedBox;
|
|
|
|
|
|
roundedBox.x = std::round(scaledBox.x);
|
|
|
|
|
|
roundedBox.y = std::round(scaledBox.y);
|
|
|
|
|
|
roundedBox.width = std::round(scaledBox.width);
|
|
|
|
|
|
roundedBox.height = std::round(scaledBox.height);
|
|
|
|
|
|
|
|
|
|
|
|
// Adjust NMS box coordinates to prevent overlap between classes
|
|
|
|
|
|
BoundingBox nmsBox = roundedBox;
|
|
|
|
|
|
nmsBox.x += classId * 7680; // Arbitrary offset to differentiate classes
|
|
|
|
|
|
nmsBox.y += classId * 7680;
|
|
|
|
|
|
|
|
|
|
|
|
// Add to respective containers
|
|
|
|
|
|
nms_boxes.emplace_back(nmsBox);
|
|
|
|
|
|
boxes.emplace_back(roundedBox);
|
|
|
|
|
|
confs.emplace_back(maxScore);
|
|
|
|
|
|
classIds.emplace_back(classId);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Apply Non-Maximum Suppression (NMS) to eliminate redundant detections
|
|
|
|
|
|
std::vector<int> indices;
|
|
|
|
|
|
NMSBoxes(nms_boxes, confs, confThreshold, iouThreshold, indices);
|
|
|
|
|
|
|
|
|
|
|
|
// Collect filtered detections into the result vector
|
|
|
|
|
|
int classNameSize = _classes.size();
|
|
|
|
|
|
detections.reserve(indices.size());
|
|
|
|
|
|
for (const int idx : indices) {
|
|
|
|
|
|
float conf = confs[idx];
|
|
|
|
|
|
if (conf >= confThreshold) {
|
|
|
|
|
|
Object detection;
|
|
|
|
|
|
detection.confidence = confs[idx];
|
|
|
|
|
|
detection.box.x = boxes[idx].x;
|
|
|
|
|
|
detection.box.y = boxes[idx].y;
|
|
|
|
|
|
detection.box.width = boxes[idx].width;
|
|
|
|
|
|
detection.box.height = boxes[idx].height;
|
|
|
|
|
|
detection.classId = classIds[idx];
|
|
|
|
|
|
if (!_classes.empty()) {
|
|
|
|
|
|
if (detection.classId < classNameSize) {
|
|
|
|
|
|
detection.className = _classes[detection.classId];
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
detection.className = _classes[classNameSize - 1]; // Use last valid class name if out of range
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
detection.className = "Unknown"; // Fallback if _classes is empty
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
detection.polygon = ANSUtilityHelper::RectToNormalizedPolygon(detection.box, m_imgWidth, m_imgHeight);
|
|
|
|
|
|
detections.push_back(detection);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return detections;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (const std::exception& e) {
|
|
|
|
|
|
this->_logger.LogFatal("YOLO12OD::postprocess", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return std::vector<Object>();
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<Object> YOLO12OD::detect(const cv::Mat& image, float confThreshold, float iouThreshold) {
|
|
|
|
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
|
|
|
|
try {
|
|
|
|
|
|
// Define the shape of the input tensor (batch size, channels, height, width)
|
|
|
|
|
|
std::vector<int64_t> inputTensorShape = { 1, 3, inputImageShape.height, inputImageShape.width };
|
|
|
|
|
|
|
|
|
|
|
|
// Preprocess the image and obtain the blob as a vector<float>
|
|
|
|
|
|
std::vector<float> blob;
|
|
|
|
|
|
cv::Mat preprocessedImage = preprocess(image, blob, inputTensorShape);
|
|
|
|
|
|
|
|
|
|
|
|
// Compute the total number of elements in the input tensor
|
|
|
|
|
|
size_t inputTensorSize = vectorProduct(inputTensorShape);
|
|
|
|
|
|
|
|
|
|
|
|
if (blob.size() != inputTensorSize) {
|
|
|
|
|
|
this->_logger.LogFatal("YOLO12OD::detect", "Mismatch between blob size and expected tensor size", __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Create an Ort memory info object (cached for efficiency)
|
|
|
|
|
|
static const Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
|
|
|
|
|
|
|
|
|
|
|
// Create input tensor object using the preprocessed data
|
|
|
|
|
|
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
|
|
|
|
|
|
memoryInfo,
|
|
|
|
|
|
blob.data(), // Use the vector's data directly
|
|
|
|
|
|
inputTensorSize,
|
|
|
|
|
|
inputTensorShape.data(),
|
|
|
|
|
|
inputTensorShape.size()
|
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
// Run the inference session with the input tensor and retrieve output tensors
|
|
|
|
|
|
std::vector<Ort::Value> outputTensors = session.Run(
|
|
|
|
|
|
Ort::RunOptions{ nullptr },
|
|
|
|
|
|
inputNames.data(),
|
|
|
|
|
|
&inputTensor,
|
|
|
|
|
|
numInputNodes,
|
|
|
|
|
|
outputNames.data(),
|
|
|
|
|
|
numOutputNodes
|
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
// Determine the resized image shape based on input tensor shape
|
|
|
|
|
|
cv::Size resizedImageShape(static_cast<int>(inputTensorShape[3]), static_cast<int>(inputTensorShape[2]));
|
|
|
|
|
|
|
|
|
|
|
|
// Postprocess the output tensors to obtain detections
|
|
|
|
|
|
std::vector<Object> detections = postprocess(image.size(), resizedImageShape, outputTensors, confThreshold, iouThreshold);
|
|
|
|
|
|
|
|
|
|
|
|
return detections; // Return the vector of detections
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (const std::exception& e) {
|
|
|
|
|
|
this->_logger.LogFatal("YOLO12OD::detect", e.what(), __FILE__, __LINE__);
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Utility function to clamp a value within a specified range
|
|
|
|
|
|
size_t YOLO12OD::vectorProduct(const std::vector<int64_t>& vector) {
|
|
|
|
|
|
return std::accumulate(vector.begin(), vector.end(), 1ull, std::multiplies<size_t>());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void YOLO12OD::letterBox(const cv::Mat& image, cv::Mat& outImage,
|
|
|
|
|
|
const cv::Size& newShape,
|
|
|
|
|
|
const cv::Scalar& color,
|
|
|
|
|
|
bool auto_,
|
|
|
|
|
|
bool scaleFill,
|
|
|
|
|
|
bool scaleUp,
|
|
|
|
|
|
int stride)
|
|
|
|
|
|
{
|
|
|
|
|
|
try {
|
|
|
|
|
|
// Calculate the scaling ratio to fit the image within the new shape
|
|
|
|
|
|
float ratio = std::min(static_cast<float>(newShape.height) / image.rows,
|
|
|
|
|
|
static_cast<float>(newShape.width) / image.cols);
|
|
|
|
|
|
|
|
|
|
|
|
// Prevent scaling up if not allowed
|
|
|
|
|
|
if (!scaleUp) {
|
|
|
|
|
|
ratio = std::min(ratio, 1.0f);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Calculate new dimensions after scaling
|
|
|
|
|
|
int newUnpadW = static_cast<int>(std::round(image.cols * ratio));
|
|
|
|
|
|
int newUnpadH = static_cast<int>(std::round(image.rows * ratio));
|
|
|
|
|
|
|
|
|
|
|
|
// Calculate padding needed to reach the desired shape
|
|
|
|
|
|
int dw = newShape.width - newUnpadW;
|
|
|
|
|
|
int dh = newShape.height - newUnpadH;
|
|
|
|
|
|
|
|
|
|
|
|
if (auto_) {
|
|
|
|
|
|
// Ensure padding is a multiple of stride for model compatibility
|
|
|
|
|
|
dw = (dw % stride) / 2;
|
|
|
|
|
|
dh = (dh % stride) / 2;
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (scaleFill) {
|
|
|
|
|
|
// Scale to fill without maintaining aspect ratio
|
|
|
|
|
|
newUnpadW = newShape.width;
|
|
|
|
|
|
newUnpadH = newShape.height;
|
|
|
|
|
|
ratio = std::min(static_cast<float>(newShape.width) / image.cols,
|
|
|
|
|
|
static_cast<float>(newShape.height) / image.rows);
|
|
|
|
|
|
dw = 0;
|
|
|
|
|
|
dh = 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
// Evenly distribute padding on both sides
|
|
|
|
|
|
// Calculate separate padding for left/right and top/bottom to handle odd padding
|
|
|
|
|
|
int padLeft = dw / 2;
|
|
|
|
|
|
int padRight = dw - padLeft;
|
|
|
|
|
|
int padTop = dh / 2;
|
|
|
|
|
|
int padBottom = dh - padTop;
|
|
|
|
|
|
|
|
|
|
|
|
// Resize the image if the new dimensions differ
|
|
|
|
|
|
if (image.cols != newUnpadW || image.rows != newUnpadH) {
|
|
|
|
|
|
cv::resize(image, outImage, cv::Size(newUnpadW, newUnpadH), 0, 0, cv::INTER_LINEAR);
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
// Avoid unnecessary copying if dimensions are the same
|
|
|
|
|
|
outImage = image;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Apply padding to reach the desired shape
|
|
|
|
|
|
cv::copyMakeBorder(outImage, outImage, padTop, padBottom, padLeft, padRight, cv::BORDER_CONSTANT, color);
|
|
|
|
|
|
return; // Exit early since padding is already applied
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Resize the image if the new dimensions differ
|
|
|
|
|
|
if (image.cols != newUnpadW || image.rows != newUnpadH) {
|
|
|
|
|
|
cv::resize(image, outImage, cv::Size(newUnpadW, newUnpadH), 0, 0, cv::INTER_LINEAR);
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
// Avoid unnecessary copying if dimensions are the same
|
|
|
|
|
|
outImage = image;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Calculate separate padding for left/right and top/bottom to handle odd padding
|
|
|
|
|
|
int padLeft = dw / 2;
|
|
|
|
|
|
int padRight = dw - padLeft;
|
|
|
|
|
|
int padTop = dh / 2;
|
|
|
|
|
|
int padBottom = dh - padTop;
|
|
|
|
|
|
|
|
|
|
|
|
// Apply padding to reach the desired shape
|
|
|
|
|
|
cv::copyMakeBorder(outImage, outImage, padTop, padBottom, padLeft, padRight, cv::BORDER_CONSTANT, color);
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (const std::exception& e) {
|
|
|
|
|
|
std::cerr << "Error in letterBox: " << e.what() << std::endl;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
BoundingBox YOLO12OD::scaleCoords(const cv::Size& imageShape, BoundingBox coords,
|
|
|
|
|
|
const cv::Size& imageOriginalShape, bool p_Clip)
|
|
|
|
|
|
{
|
|
|
|
|
|
BoundingBox result;
|
|
|
|
|
|
try {
|
|
|
|
|
|
float gain = std::min(static_cast<float>(imageShape.height) / static_cast<float>(imageOriginalShape.height),
|
|
|
|
|
|
static_cast<float>(imageShape.width) / static_cast<float>(imageOriginalShape.width));
|
|
|
|
|
|
|
|
|
|
|
|
int padX = static_cast<int>(std::round((imageShape.width - imageOriginalShape.width * gain) / 2.0f));
|
|
|
|
|
|
int padY = static_cast<int>(std::round((imageShape.height - imageOriginalShape.height * gain) / 2.0f));
|
|
|
|
|
|
|
|
|
|
|
|
result.x = static_cast<int>(std::round((coords.x - padX) / gain));
|
|
|
|
|
|
result.y = static_cast<int>(std::round((coords.y - padY) / gain));
|
|
|
|
|
|
result.width = static_cast<int>(std::round(coords.width / gain));
|
|
|
|
|
|
result.height = static_cast<int>(std::round(coords.height / gain));
|
|
|
|
|
|
|
|
|
|
|
|
if (p_Clip) {
|
|
|
|
|
|
result.x = clamp(result.x, 0, imageOriginalShape.width);
|
|
|
|
|
|
result.y = clamp(result.y, 0, imageOriginalShape.height);
|
|
|
|
|
|
result.width = clamp(result.width, 0, imageOriginalShape.width - result.x);
|
|
|
|
|
|
result.height = clamp(result.height, 0, imageOriginalShape.height - result.y);
|
|
|
|
|
|
}
|
|
|
|
|
|
return result;
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (const std::exception& e) {
|
|
|
|
|
|
std::cerr << "Error in scaleCoords: " << e.what() << std::endl;
|
|
|
|
|
|
return result;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Optimized Non-Maximum Suppression Function
|
|
|
|
|
|
void YOLO12OD::NMSBoxes(const std::vector<BoundingBox>& boundingBoxes,
|
|
|
|
|
|
const std::vector<float>& scores,
|
|
|
|
|
|
float scoreThreshold,
|
|
|
|
|
|
float nmsThreshold,
|
|
|
|
|
|
std::vector<int>& indices)
|
|
|
|
|
|
{
|
|
|
|
|
|
indices.clear();
|
|
|
|
|
|
|
|
|
|
|
|
const size_t numBoxes = boundingBoxes.size();
|
|
|
|
|
|
if (numBoxes == 0) {
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Step 1: Filter out boxes with scores below the threshold
|
|
|
|
|
|
// and create a list of indices sorted by descending scores
|
|
|
|
|
|
std::vector<int> sortedIndices;
|
|
|
|
|
|
sortedIndices.reserve(numBoxes);
|
|
|
|
|
|
for (size_t i = 0; i < numBoxes; ++i) {
|
|
|
|
|
|
if (scores[i] >= scoreThreshold) {
|
|
|
|
|
|
sortedIndices.push_back(static_cast<int>(i));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// If no boxes remain after thresholding
|
|
|
|
|
|
if (sortedIndices.empty()) {
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Sort the indices based on scores in descending order
|
|
|
|
|
|
std::sort(sortedIndices.begin(), sortedIndices.end(),
|
|
|
|
|
|
[&scores](int idx1, int idx2) {
|
|
|
|
|
|
return scores[idx1] > scores[idx2];
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
// Step 2: Precompute the areas of all boxes
|
|
|
|
|
|
std::vector<float> areas(numBoxes, 0.0f);
|
|
|
|
|
|
for (size_t i = 0; i < numBoxes; ++i) {
|
|
|
|
|
|
areas[i] = boundingBoxes[i].width * boundingBoxes[i].height;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Step 3: Suppression mask to mark boxes that are suppressed
|
|
|
|
|
|
std::vector<bool> suppressed(numBoxes, false);
|
|
|
|
|
|
|
|
|
|
|
|
// Step 4: Iterate through the sorted list and suppress boxes with high IoU
|
|
|
|
|
|
for (size_t i = 0; i < sortedIndices.size(); ++i) {
|
|
|
|
|
|
int currentIdx = sortedIndices[i];
|
|
|
|
|
|
if (suppressed[currentIdx]) {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Select the current box as a valid detection
|
|
|
|
|
|
indices.push_back(currentIdx);
|
|
|
|
|
|
|
|
|
|
|
|
const BoundingBox& currentBox = boundingBoxes[currentIdx];
|
|
|
|
|
|
const float x1_max = currentBox.x;
|
|
|
|
|
|
const float y1_max = currentBox.y;
|
|
|
|
|
|
const float x2_max = currentBox.x + currentBox.width;
|
|
|
|
|
|
const float y2_max = currentBox.y + currentBox.height;
|
|
|
|
|
|
const float area_current = areas[currentIdx];
|
|
|
|
|
|
|
|
|
|
|
|
// Compare IoU of the current box with the rest
|
|
|
|
|
|
for (size_t j = i + 1; j < sortedIndices.size(); ++j) {
|
|
|
|
|
|
int compareIdx = sortedIndices[j];
|
|
|
|
|
|
if (suppressed[compareIdx]) {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const BoundingBox& compareBox = boundingBoxes[compareIdx];
|
|
|
|
|
|
const float x1 = std::max(x1_max, static_cast<float>(compareBox.x));
|
|
|
|
|
|
const float y1 = std::max(y1_max, static_cast<float>(compareBox.y));
|
|
|
|
|
|
const float x2 = std::min(x2_max, static_cast<float>(compareBox.x + compareBox.width));
|
|
|
|
|
|
const float y2 = std::min(y2_max, static_cast<float>(compareBox.y + compareBox.height));
|
|
|
|
|
|
|
|
|
|
|
|
const float interWidth = x2 - x1;
|
|
|
|
|
|
const float interHeight = y2 - y1;
|
|
|
|
|
|
|
|
|
|
|
|
if (interWidth <= 0 || interHeight <= 0) {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const float intersection = interWidth * interHeight;
|
|
|
|
|
|
const float unionArea = area_current + areas[compareIdx] - intersection;
|
|
|
|
|
|
const float iou = (unionArea > 0.0f) ? (intersection / unionArea) : 0.0f;
|
|
|
|
|
|
|
|
|
|
|
|
if (iou > nmsThreshold) {
|
|
|
|
|
|
suppressed[compareIdx] = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
}
|