1849 lines
63 KiB
C++
1849 lines
63 KiB
C++
#include "ANSYOLOOD.h"
|
|
#include "Utility.h"
|
|
#include "EPLoader.h"
|
|
#include "ANSGpuFrameRegistry.h"
|
|
#include "NV12PreprocessHelper.h" // tl_currentGpuFrame()
|
|
#ifdef USEONNXOV
|
|
//#include <openvino_provider_factory.h>
|
|
#endif
|
|
|
|
namespace ANSCENTER
|
|
{
|
|
void NMSBoxes(const std::vector<BoundingBox>& boundingBoxes,const std::vector<float>& scores,float scoreThreshold,float nmsThreshold,std::vector<int>& indices)
|
|
{
|
|
indices.clear();
|
|
const size_t numBoxes = boundingBoxes.size();
|
|
if (numBoxes == 0) {
|
|
return;
|
|
}
|
|
|
|
// Step 1: Filter out boxes with scores below the threshold
|
|
// and create a list of indices sorted by descending scores
|
|
std::vector<int> sortedIndices;
|
|
sortedIndices.reserve(numBoxes);
|
|
for (size_t i = 0; i < numBoxes; ++i) {
|
|
if (scores[i] >= scoreThreshold) {
|
|
sortedIndices.push_back(static_cast<int>(i));
|
|
}
|
|
}
|
|
|
|
// If no boxes remain after thresholding
|
|
if (sortedIndices.empty()) {
|
|
return;
|
|
}
|
|
|
|
// Sort the indices based on scores in descending order
|
|
std::sort(sortedIndices.begin(), sortedIndices.end(),
|
|
[&scores](int idx1, int idx2) {
|
|
return scores[idx1] > scores[idx2];
|
|
});
|
|
|
|
// Step 2: Precompute the areas of all boxes
|
|
std::vector<float> areas(numBoxes, 0.0f);
|
|
for (size_t i = 0; i < numBoxes; ++i) {
|
|
areas[i] = boundingBoxes[i].width * boundingBoxes[i].height;
|
|
}
|
|
|
|
// Step 3: Suppression mask to mark boxes that are suppressed
|
|
std::vector<bool> suppressed(numBoxes, false);
|
|
|
|
// Step 4: Iterate through the sorted list and suppress boxes with high IoU
|
|
for (size_t i = 0; i < sortedIndices.size(); ++i) {
|
|
int currentIdx = sortedIndices[i];
|
|
if (suppressed[currentIdx]) {
|
|
continue;
|
|
}
|
|
|
|
// Select the current box as a valid detection
|
|
indices.push_back(currentIdx);
|
|
|
|
const BoundingBox& currentBox = boundingBoxes[currentIdx];
|
|
const float x1_max = currentBox.x;
|
|
const float y1_max = currentBox.y;
|
|
const float x2_max = currentBox.x + currentBox.width;
|
|
const float y2_max = currentBox.y + currentBox.height;
|
|
const float area_current = areas[currentIdx];
|
|
|
|
// Compare IoU of the current box with the rest
|
|
for (size_t j = i + 1; j < sortedIndices.size(); ++j) {
|
|
int compareIdx = sortedIndices[j];
|
|
if (suppressed[compareIdx]) {
|
|
continue;
|
|
}
|
|
|
|
const BoundingBox& compareBox = boundingBoxes[compareIdx];
|
|
const float x1 = std::max(x1_max, static_cast<float>(compareBox.x));
|
|
const float y1 = std::max(y1_max, static_cast<float>(compareBox.y));
|
|
const float x2 = std::min(x2_max, static_cast<float>(compareBox.x + compareBox.width));
|
|
const float y2 = std::min(y2_max, static_cast<float>(compareBox.y + compareBox.height));
|
|
|
|
const float interWidth = x2 - x1;
|
|
const float interHeight = y2 - y1;
|
|
|
|
if (interWidth <= 0 || interHeight <= 0) {
|
|
continue;
|
|
}
|
|
|
|
const float intersection = interWidth * interHeight;
|
|
const float unionArea = area_current + areas[compareIdx] - intersection;
|
|
const float iou = (unionArea > 0.0f) ? (intersection / unionArea) : 0.0f;
|
|
|
|
if (iou > nmsThreshold) {
|
|
suppressed[compareIdx] = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// End of YOLO V8
|
|
// Utility functions
|
|
void YOLOOD::getBestClassInfo(std::vector<float>::iterator it, const int& numClasses, float& bestConf, int& bestClassId)
|
|
{
|
|
try {
|
|
// first 5 element are box and obj confidence
|
|
bestClassId = 5;
|
|
bestConf = 0;
|
|
|
|
for (int i = 5; i < numClasses + 5; i++)
|
|
{
|
|
if (it[i] > bestConf)
|
|
{
|
|
bestConf = it[i];
|
|
bestClassId = i - 5;
|
|
}
|
|
}
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::getBestClassInfo", e.what(), __FILE__, __LINE__);
|
|
}
|
|
}
|
|
size_t YOLOOD::vectorProduct(const std::vector<int64_t>& vector)
|
|
{
|
|
try {
|
|
if (vector.empty())
|
|
return 0;
|
|
size_t product = 1;
|
|
for (const auto& element : vector)
|
|
product *= element;
|
|
|
|
return product;
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::vectorProduct", e.what(), __FILE__, __LINE__);
|
|
return 0;
|
|
}
|
|
}
|
|
void YOLOOD::letterbox(const cv::Mat& image, cv::Mat& outImage, const cv::Size& newShape,
|
|
const cv::Scalar& color, bool auto_, bool scaleFill, bool scaleUp, int stride)
|
|
{
|
|
try {
|
|
cv::Size shape = image.size();
|
|
float r = std::min((float)newShape.height / (float)shape.height,
|
|
(float)newShape.width / (float)shape.width);
|
|
if (!scaleUp)
|
|
r = std::min(r, 1.0f);
|
|
|
|
int newUnpad[2]{ (int)std::round((float)shape.width * r),
|
|
(int)std::round((float)shape.height * r) };
|
|
|
|
auto dw = (float)(newShape.width - newUnpad[0]);
|
|
auto dh = (float)(newShape.height - newUnpad[1]);
|
|
|
|
if (auto_) {
|
|
dw = (float)((int)dw % stride);
|
|
dh = (float)((int)dh % stride);
|
|
}
|
|
else if (scaleFill) {
|
|
dw = 0.0f;
|
|
dh = 0.0f;
|
|
newUnpad[0] = newShape.width;
|
|
newUnpad[1] = newShape.height;
|
|
}
|
|
|
|
dw /= 2.0f;
|
|
dh /= 2.0f;
|
|
|
|
// Fix: Use OR instead of AND, and handle the else case
|
|
if (shape.width != newUnpad[0] || shape.height != newUnpad[1]) {
|
|
cv::resize(image, outImage, cv::Size(newUnpad[0], newUnpad[1]));
|
|
}
|
|
else {
|
|
outImage = image.clone();
|
|
}
|
|
|
|
// Fix: Better padding calculation
|
|
int top = (int)dh;
|
|
int bottom = newShape.height - newUnpad[1] - top;
|
|
int left = (int)dw;
|
|
int right = newShape.width - newUnpad[0] - left;
|
|
|
|
cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
|
|
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::letterbox", e.what(), __FILE__, __LINE__);
|
|
}
|
|
}
|
|
void YOLOOD::scaleCoords(const cv::Size& imageShape, cv::Rect& coords, const cv::Size& imageOriginalShape)
|
|
{
|
|
try {
|
|
float gain = std::min((float)imageShape.height / (float)imageOriginalShape.height,
|
|
(float)imageShape.width / (float)imageOriginalShape.width);
|
|
|
|
int pad[2] = { (int)(((float)imageShape.width - (float)imageOriginalShape.width * gain) / 2.0f),
|
|
(int)(((float)imageShape.height - (float)imageOriginalShape.height * gain) / 2.0f) };
|
|
|
|
coords.x = (int)std::round(((float)(coords.x - pad[0]) / gain));
|
|
coords.y = (int)std::round(((float)(coords.y - pad[1]) / gain));
|
|
|
|
coords.width = (int)std::round(((float)coords.width / gain));
|
|
coords.height = (int)std::round(((float)coords.height / gain));
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::scaleCoords", e.what(), __FILE__, __LINE__);
|
|
}
|
|
}
|
|
bool YOLOOD::loadYoloModel(const std::string& modelPath, const bool& isGPU, const cv::Size& inputSize)
|
|
{
|
|
try {
|
|
const auto& ep = ANSCENTER::EPLoader::Current();
|
|
if (Ort::Global<void>::api_ == nullptr)
|
|
Ort::InitApi(static_cast<const OrtApi*>(EPLoader::GetOrtApiRaw()));
|
|
std::cout << "[YOLOOD] EP ready: "
|
|
<< ANSCENTER::EPLoader::EngineTypeName(ep.type) << std::endl;
|
|
|
|
env = Ort::Env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, "ONNX_DETECTION");
|
|
sessionOptions = Ort::SessionOptions();
|
|
sessionOptions.SetIntraOpNumThreads(
|
|
std::min(6, static_cast<int>(std::thread::hardware_concurrency())));
|
|
sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
|
|
|
|
// DirectML REQUIRES these two settings per ORT documentation
|
|
if (ep.type == ANSCENTER::EngineType::AMD_GPU) {
|
|
sessionOptions.DisableMemPattern();
|
|
sessionOptions.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
|
|
}
|
|
|
|
// ── Log available providers ─────────────────────────────────────────
|
|
std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
|
|
std::cout << "Available Execution Providers:" << std::endl;
|
|
for (const auto& p : availableProviders)
|
|
std::cout << " - " << p << std::endl;
|
|
// ── Attach EP based on runtime-detected hardware ────────────────────
|
|
// No #ifdef, no isGPU flag — EPLoader owns the decision
|
|
if (isGPU) {
|
|
bool attached = false;
|
|
|
|
switch (ep.type) {
|
|
|
|
case ANSCENTER::EngineType::NVIDIA_GPU: {
|
|
auto cuda_it = std::find(availableProviders.begin(),
|
|
availableProviders.end(),
|
|
"CUDAExecutionProvider");
|
|
if (cuda_it == availableProviders.end()) {
|
|
this->_logger.LogError("YOLOOD::loadYoloModel. CUDA EP failed:", "CUDAExecutionProvider not in DLL — "
|
|
"check ep/cuda/ has the CUDA ORT build.", __FILE__, __LINE__);
|
|
break;
|
|
}
|
|
try {
|
|
OrtCUDAProviderOptionsV2* cuda_options = nullptr;
|
|
Ort::GetApi().CreateCUDAProviderOptions(&cuda_options);
|
|
|
|
const char* keys[] = { "device_id" };
|
|
const char* values[] = { "0" };
|
|
Ort::GetApi().UpdateCUDAProviderOptions(cuda_options, keys, values, 1);
|
|
|
|
sessionOptions.AppendExecutionProvider_CUDA_V2(*cuda_options);
|
|
|
|
Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options);
|
|
|
|
std::cout << "[YOLOOD] CUDA EP attached." << std::endl;
|
|
attached = true;
|
|
}
|
|
catch (const Ort::Exception& e) {
|
|
this->_logger.LogError("YOLOOD::loadYoloModel. CUDA EP failed:", e.what(), __FILE__, __LINE__);
|
|
}
|
|
break;
|
|
}
|
|
case ANSCENTER::EngineType::AMD_GPU: {
|
|
auto it = std::find(availableProviders.begin(),
|
|
availableProviders.end(),
|
|
"DmlExecutionProvider");
|
|
if (it == availableProviders.end()) {
|
|
this->_logger.LogError("YOLOOD::loadYoloModel. DirectML EP failed:", "DmlExecutionProvider not in DLL — "
|
|
"check ep/directml/ has the DirectML ORT build.", __FILE__, __LINE__);
|
|
break;
|
|
}
|
|
try {
|
|
std::unordered_map<std::string, std::string> opts = {
|
|
{ "device_id", "0" }
|
|
};
|
|
sessionOptions.AppendExecutionProvider("DML", opts);
|
|
std::cout << "[YOLOOD] DirectML EP attached." << std::endl;
|
|
attached = true;
|
|
}
|
|
catch (const Ort::Exception& e) {
|
|
this->_logger.LogError("YOLOOD::loadYoloModel. DirectML EP failed:", e.what(), __FILE__, __LINE__);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case ANSCENTER::EngineType::OPENVINO_GPU: {
|
|
auto it = std::find(availableProviders.begin(),
|
|
availableProviders.end(),
|
|
"OpenVINOExecutionProvider");
|
|
if (it == availableProviders.end()) {
|
|
this->_logger.LogError("YOLOOD::loadYoloModel", "OpenVINOExecutionProvider not in DLL — "
|
|
"check ep/openvino/ has the OpenVINO ORT build.", __FILE__, __LINE__);
|
|
break;
|
|
}
|
|
|
|
// Try device configs in priority order
|
|
const std::string precision = "FP16";
|
|
const std::string numberOfThreads = "8";
|
|
const std::string numberOfStreams = "8";
|
|
|
|
std::vector<std::unordered_map<std::string, std::string>> try_configs = {
|
|
{ {"device_type","AUTO:NPU,GPU"}, {"precision",precision},
|
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
|
{ {"device_type","GPU.0"}, {"precision",precision},
|
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
|
{ {"device_type","GPU.1"}, {"precision",precision},
|
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
|
{ {"device_type","AUTO:GPU,CPU"}, {"precision",precision},
|
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }
|
|
};
|
|
|
|
for (const auto& config : try_configs) {
|
|
try {
|
|
sessionOptions.AppendExecutionProvider_OpenVINO_V2(config);
|
|
std::cout << "[YOLOOD] OpenVINO EP attached ("
|
|
<< config.at("device_type") << ")." << std::endl;
|
|
attached = true;
|
|
break;
|
|
}
|
|
catch (const Ort::Exception& e) {
|
|
// try next config
|
|
this->_logger.LogError("YOLOOD::loadYoloModel", e.what(), __FILE__, __LINE__);
|
|
}
|
|
}
|
|
|
|
if (!attached)
|
|
std::cerr << "[YOLOOD] OpenVINO EP: all device configs failed." << std::endl;
|
|
break;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (!attached) {
|
|
std::cerr << "[YOLOOD] No GPU EP attached — running on CPU." << std::endl;
|
|
this->_logger.LogFatal("YOLOOD::loadYoloModel","GPU EP not attached. Running on CPU.", __FILE__, __LINE__);
|
|
}
|
|
}
|
|
else {
|
|
std::cout << "[YOLOOD] Inference device: CPU (isGPU=false)" << std::endl;
|
|
}
|
|
|
|
// ── Load model ──────────────────────────────────────────────────────
|
|
std::wstring w_modelPath = String2WString(modelPath.c_str());
|
|
session = Ort::Session(env, w_modelPath.c_str(), sessionOptions);
|
|
Ort::AllocatorWithDefaultOptions allocator;
|
|
|
|
// ── Input shape ─────────────────────────────────────────────────────
|
|
Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0);
|
|
std::vector<int64_t> inputTensorShape =
|
|
inputTypeInfo.GetTensorTypeAndShapeInfo().GetShape();
|
|
this->isDynamicInputShape =
|
|
(inputTensorShape.size() >= 4 &&
|
|
inputTensorShape[2] == -1 &&
|
|
inputTensorShape[3] == -1);
|
|
|
|
if (this->isDynamicInputShape)
|
|
std::cout << "Dynamic input shape detected." << std::endl;
|
|
for (auto shape : inputTensorShape)
|
|
std::cout << "Input shape: " << shape << std::endl;
|
|
|
|
// ── Node names ──────────────────────────────────────────────────────
|
|
size_t inputNodesNum = session.GetInputCount();
|
|
for (size_t i = 0; i < inputNodesNum; i++) {
|
|
Ort::AllocatedStringPtr name = session.GetInputNameAllocated(i, allocator);
|
|
char* buf = new char[50];
|
|
strcpy(buf, name.get());
|
|
inputNodeNames.push_back(buf);
|
|
}
|
|
size_t outputNodesNum = session.GetOutputCount();
|
|
for (size_t i = 0; i < outputNodesNum; i++) {
|
|
Ort::AllocatedStringPtr name = session.GetOutputNameAllocated(i, allocator);
|
|
char* buf = new char[50];
|
|
strcpy(buf, name.get());
|
|
outputNodeNames.push_back(buf);
|
|
}
|
|
|
|
this->inputImageShape = cv::Size2f(inputSize);
|
|
std::cout << "Input image shape: " << inputImageShape << std::endl;
|
|
return true;
|
|
}
|
|
catch (const std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::loadYoloModel", e.what(), __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
void YOLOOD::preprocessing(const cv::Mat& image,std::vector<float>& blob,std::vector<int64_t>& inputTensorShape)
|
|
{
|
|
m_imgWidth = image.cols;
|
|
m_imgHeight = image.rows;
|
|
try {
|
|
// Convert grayscale to BGR if needed
|
|
cv::Mat processedImage;
|
|
if (image.channels() == 1) {
|
|
cv::cvtColor(image, processedImage, cv::COLOR_GRAY2BGR);
|
|
}
|
|
else {
|
|
processedImage = image; // Shallow copy - safe
|
|
}
|
|
|
|
// Convert BGR to RGB
|
|
cv::Mat rgbImage;
|
|
cv::cvtColor(processedImage, rgbImage, cv::COLOR_BGR2RGB);
|
|
|
|
// Resize with letterbox (SEPARATE output buffer!)
|
|
cv::Mat resizedImage;
|
|
letterbox(rgbImage, resizedImage, inputImageShape,
|
|
cv::Scalar(114, 114, 114), isDynamicInputShape,
|
|
false, true, 32);
|
|
|
|
if (resizedImage.empty()) {
|
|
_logger.LogError("YOLOOD::preprocessing",
|
|
"Letterbox operation failed",
|
|
__FILE__, __LINE__);
|
|
return;
|
|
}
|
|
|
|
// Update tensor shape
|
|
inputTensorShape = { 1, 3, resizedImage.rows, resizedImage.cols };
|
|
|
|
// Normalize to [0, 1] (FIX: Use 1.0f for float division)
|
|
cv::Mat floatImage;
|
|
resizedImage.convertTo(floatImage, CV_32FC3, 1.0f / 255.0f);
|
|
|
|
// Convert HWC to CHW
|
|
const size_t channelSize = floatImage.rows * floatImage.cols;
|
|
blob.resize(channelSize * 3);
|
|
|
|
std::vector<cv::Mat> channels(3);
|
|
cv::split(floatImage, channels);
|
|
|
|
for (int c = 0; c < 3; ++c) {
|
|
std::memcpy(blob.data() + c * channelSize,
|
|
channels[c].data,
|
|
channelSize * sizeof(float));
|
|
}
|
|
|
|
}
|
|
catch (const std::exception& e) {
|
|
_logger.LogFatal("YOLOOD::preprocessing", e.what(), __FILE__, __LINE__);
|
|
}
|
|
}
|
|
cv::Mat YOLOOD::preprocessv11(const cv::Mat& image,std::vector<float>& blob,std::vector<int64_t>& inputTensorShape)
|
|
{
|
|
try {
|
|
// Validation
|
|
if (image.empty()) {
|
|
_logger.LogError("YOLOOD::preprocessv11", "Empty image provided",
|
|
__FILE__, __LINE__);
|
|
return cv::Mat();
|
|
}
|
|
|
|
// Convert grayscale to BGR if needed
|
|
cv::Mat processedImage;
|
|
if (image.channels() == 1) {
|
|
cv::cvtColor(image, processedImage, cv::COLOR_GRAY2BGR);
|
|
}
|
|
else {
|
|
processedImage = image; // Shallow copy - safe!
|
|
}
|
|
|
|
// Resize with letterbox
|
|
cv::Mat resizedImage;
|
|
letterbox(processedImage, resizedImage, inputImageShape,
|
|
cv::Scalar(114, 114, 114), isDynamicInputShape,
|
|
false, true, 32);
|
|
|
|
if (resizedImage.empty()) {
|
|
_logger.LogError("YOLOOD::preprocessv11",
|
|
"Letterbox operation failed",
|
|
__FILE__, __LINE__);
|
|
return cv::Mat();
|
|
}
|
|
|
|
// Convert BGR to RGB
|
|
cv::Mat rgbImage;
|
|
cv::cvtColor(resizedImage, rgbImage, cv::COLOR_BGR2RGB);
|
|
|
|
// Normalize to [0, 1]
|
|
cv::Mat floatImage;
|
|
rgbImage.convertTo(floatImage, CV_32FC3, 1.0f / 255.0f);
|
|
|
|
// Update input tensor shape
|
|
const int height = floatImage.rows;
|
|
const int width = floatImage.cols;
|
|
const int channels = floatImage.channels();
|
|
|
|
inputTensorShape = { 1, channels, height, width };
|
|
|
|
// Allocate blob
|
|
const size_t totalSize = height * width * channels;
|
|
blob.resize(totalSize);
|
|
|
|
// Convert HWC to CHW efficiently
|
|
std::vector<cv::Mat> rgbChannels(channels);
|
|
cv::split(floatImage, rgbChannels);
|
|
|
|
// Copy channels to blob in CHW order
|
|
const size_t channelSize = height * width;
|
|
for (int c = 0; c < channels; ++c) {
|
|
std::memcpy(blob.data() + c * channelSize,
|
|
rgbChannels[c].data,
|
|
channelSize * sizeof(float));
|
|
}
|
|
|
|
return floatImage; // Return for visualization if needed
|
|
|
|
}
|
|
catch (const std::exception& e) {
|
|
_logger.LogFatal("YOLOOD::preprocessv11", e.what(), __FILE__, __LINE__);
|
|
return cv::Mat();
|
|
}
|
|
}
|
|
std::vector<Object> YOLOOD::postprocessing(const cv::Size& resizedImageShape,const cv::Size& originalImageShape,std::vector<Ort::Value>& outputTensors,const float& confThreshold,const float& iouThreshold)
|
|
{
|
|
try {
|
|
// Get raw output pointer (NO COPY!)
|
|
const float* rawOutput = outputTensors[0].GetTensorMutableData<float>();
|
|
std::vector<int64_t> outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
|
|
|
|
const int numClasses = static_cast<int>(outputShape[2]) - 5;
|
|
const int numDetections = static_cast<int>(outputShape[1]);
|
|
const int stride = static_cast<int>(outputShape[2]);
|
|
|
|
// Pre-allocate (assume ~10% detections pass threshold)
|
|
const size_t estimatedDetections = std::min(numDetections / 10, 1000);
|
|
|
|
std::vector<cv::Rect> boxes;
|
|
std::vector<float> confs;
|
|
std::vector<int> classIds;
|
|
|
|
boxes.reserve(estimatedDetections);
|
|
confs.reserve(estimatedDetections);
|
|
classIds.reserve(estimatedDetections);
|
|
|
|
// Parse detections using raw pointer (NO COPY!)
|
|
for (int i = 0; i < numDetections; ++i) {
|
|
const float* detection = rawOutput + i * stride;
|
|
|
|
const float objectness = detection[4];
|
|
if (objectness <= confThreshold) {
|
|
continue;
|
|
}
|
|
|
|
// Get best class
|
|
float maxClassScore = 0.0f;
|
|
int bestClassId = 0;
|
|
|
|
for (int c = 0; c < numClasses; ++c) {
|
|
const float classScore = detection[5 + c];
|
|
if (classScore > maxClassScore) {
|
|
maxClassScore = classScore;
|
|
bestClassId = c;
|
|
}
|
|
}
|
|
|
|
const float confidence = objectness * maxClassScore;
|
|
if (confidence <= confThreshold) {
|
|
continue;
|
|
}
|
|
|
|
// Parse bounding box (center format -> corner format)
|
|
const int centerX = static_cast<int>(detection[0]);
|
|
const int centerY = static_cast<int>(detection[1]);
|
|
const int width = static_cast<int>(detection[2]);
|
|
const int height = static_cast<int>(detection[3]);
|
|
const int left = centerX - width / 2;
|
|
const int top = centerY - height / 2;
|
|
|
|
boxes.emplace_back(left, top, width, height);
|
|
confs.push_back(confidence);
|
|
classIds.push_back(bestClassId);
|
|
}
|
|
|
|
// NMS
|
|
std::vector<int> indices;
|
|
cv::dnn::NMSBoxes(boxes, confs, confThreshold, iouThreshold, indices);
|
|
|
|
// Build final detections
|
|
std::vector<Object> detections;
|
|
detections.reserve(indices.size());
|
|
|
|
const int numClasses_safe = static_cast<int>(_classes.size());
|
|
|
|
for (int idx : indices) {
|
|
Object det;
|
|
|
|
// Scale coordinates
|
|
det.box = boxes[idx];
|
|
scaleCoords(resizedImageShape, det.box, originalImageShape);
|
|
|
|
det.confidence = confs[idx];
|
|
det.classId = classIds[idx];
|
|
|
|
// Set class name safely
|
|
if (!_classes.empty()) {
|
|
det.className = (det.classId < numClasses_safe)
|
|
? _classes[det.classId]
|
|
: _classes[numClasses_safe - 1];
|
|
}
|
|
else {
|
|
det.className = "Unknown";
|
|
}
|
|
|
|
detections.push_back(std::move(det));
|
|
}
|
|
|
|
return detections;
|
|
|
|
}
|
|
catch (const std::exception& e) {
|
|
_logger.LogFatal("YOLOOD::postprocessing", e.what(), __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
}
|
|
BoundingBox YOLOOD::scaleCoordsv11(const cv::Size& imageShape,BoundingBox coords,const cv::Size& imageOriginalShape,bool p_Clip)
|
|
{
|
|
// Calculate scale factor
|
|
const float gain = std::min(
|
|
static_cast<float>(imageShape.width) / imageOriginalShape.width,
|
|
static_cast<float>(imageShape.height) / imageOriginalShape.height
|
|
);
|
|
|
|
const float invGain = 1.0f / gain;
|
|
|
|
// Calculate padding (once per image, but recalculated here)
|
|
const float padX = (imageShape.width - imageOriginalShape.width * gain) * 0.5f;
|
|
const float padY = (imageShape.height - imageOriginalShape.height * gain) * 0.5f;
|
|
|
|
// Scale coordinates
|
|
BoundingBox result;
|
|
result.x = static_cast<int>((coords.x - padX) * invGain + 0.5f);
|
|
result.y = static_cast<int>((coords.y - padY) * invGain + 0.5f);
|
|
result.width = static_cast<int>(coords.width * invGain + 0.5f);
|
|
result.height = static_cast<int>(coords.height * invGain + 0.5f);
|
|
|
|
// Clamp to image bounds if requested
|
|
if (p_Clip) {
|
|
result.x = clamp(result.x, 0, imageOriginalShape.width);
|
|
result.y = clamp(result.y, 0, imageOriginalShape.height);
|
|
result.width = clamp(result.width, 0, imageOriginalShape.width - result.x);
|
|
result.height = clamp(result.height, 0, imageOriginalShape.height - result.y);
|
|
}
|
|
return result;
|
|
}
|
|
std::vector<Object> YOLOOD::postprocessv11(const cv::Size& originalImageSize,const cv::Size& resizedImageShape,std::vector<Ort::Value>& outputTensors,float confThreshold,float iouThreshold)
|
|
{
|
|
try {
|
|
// Get raw output
|
|
const float* rawOutput = outputTensors[0].GetTensorMutableData<float>();
|
|
const std::vector<int64_t> outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
|
|
|
|
const size_t numFeatures = outputShape[1];
|
|
const size_t numDetections = outputShape[2];
|
|
const int numClasses = static_cast<int>(numFeatures) - 4;
|
|
|
|
// Early exit
|
|
if (numDetections == 0 || numClasses <= 0) {
|
|
return {};
|
|
}
|
|
|
|
// Calculate scaling parameters ONCE
|
|
const float gain = std::min(
|
|
static_cast<float>(resizedImageShape.width) / originalImageSize.width,
|
|
static_cast<float>(resizedImageShape.height) / originalImageSize.height
|
|
);
|
|
const float invGain = 1.0f / gain;
|
|
const float padX = (resizedImageShape.width - originalImageSize.width * gain) * 0.5f;
|
|
const float padY = (resizedImageShape.height - originalImageSize.height * gain) * 0.5f;
|
|
const int maxX = originalImageSize.width;
|
|
const int maxY = originalImageSize.height;
|
|
|
|
// Pre-allocate (estimate ~10% pass threshold)
|
|
const size_t estimatedCount = std::min(numDetections / 10, size_t(1000));
|
|
std::vector<cv::Rect> boxes;
|
|
std::vector<cv::Rect> nmsBoxes;
|
|
std::vector<float> confs;
|
|
std::vector<int> classIds;
|
|
|
|
boxes.reserve(estimatedCount);
|
|
nmsBoxes.reserve(estimatedCount);
|
|
confs.reserve(estimatedCount);
|
|
classIds.reserve(estimatedCount);
|
|
|
|
// Parse detections
|
|
for (size_t d = 0; d < numDetections; ++d) {
|
|
// Extract coordinates
|
|
const float centerX = rawOutput[0 * numDetections + d];
|
|
const float centerY = rawOutput[1 * numDetections + d];
|
|
const float width = rawOutput[2 * numDetections + d];
|
|
const float height = rawOutput[3 * numDetections + d];
|
|
|
|
// Find best class
|
|
int classId = 0;
|
|
float maxScore = rawOutput[4 * numDetections + d];
|
|
|
|
for (int c = 1; c < numClasses; ++c) {
|
|
const float score = rawOutput[(4 + c) * numDetections + d];
|
|
if (score > maxScore) {
|
|
maxScore = score;
|
|
classId = c;
|
|
}
|
|
}
|
|
|
|
// Threshold check
|
|
if (maxScore <= confThreshold) {
|
|
continue;
|
|
}
|
|
|
|
// Convert to corner format and scale inline
|
|
const float left = (centerX - width * 0.5f - padX) * invGain;
|
|
const float top = (centerY - height * 0.5f - padY) * invGain;
|
|
const float scaledWidth = width * invGain;
|
|
const float scaledHeight = height * invGain;
|
|
|
|
// Round and clamp to original image bounds
|
|
const int x = clamp(static_cast<int>(left + 0.5f), 0, maxX);
|
|
const int y = clamp(static_cast<int>(top + 0.5f), 0, maxY);
|
|
const int w = clamp(static_cast<int>(scaledWidth + 0.5f), 0, maxX - x);
|
|
const int h = clamp(static_cast<int>(scaledHeight + 0.5f), 0, maxY - y);
|
|
|
|
// Store scaled box
|
|
boxes.emplace_back(x, y, w, h);
|
|
|
|
// Create NMS box with class offset (class-specific NMS)
|
|
const int nmsOffset = classId * 7680;
|
|
nmsBoxes.emplace_back(x + nmsOffset, y + nmsOffset, w, h);
|
|
|
|
confs.push_back(maxScore);
|
|
classIds.push_back(classId);
|
|
}
|
|
|
|
// Apply class-specific NMS
|
|
std::vector<int> indices;
|
|
cv::dnn::NMSBoxes(nmsBoxes, confs, confThreshold, iouThreshold, indices);
|
|
|
|
// Build final detections
|
|
std::vector<Object> detections;
|
|
detections.reserve(indices.size());
|
|
|
|
const int numClassNames = static_cast<int>(_classes.size());
|
|
|
|
for (int idx : indices) {
|
|
Object det;
|
|
|
|
// Use original scaled box (not NMS box)
|
|
det.box.x = boxes[idx].x;
|
|
det.box.y = boxes[idx].y;
|
|
det.box.width = boxes[idx].width;
|
|
det.box.height = boxes[idx].height;
|
|
|
|
det.confidence = confs[idx];
|
|
det.classId = classIds[idx];
|
|
|
|
// Set class name
|
|
if (!_classes.empty() && det.classId < numClassNames) {
|
|
det.className = _classes[det.classId];
|
|
}
|
|
else {
|
|
det.className = _classes.empty() ? "Unknown" : _classes[numClassNames - 1];
|
|
}
|
|
|
|
// Generate normalized polygon
|
|
det.polygon = ANSUtilityHelper::RectToNormalizedPolygon(
|
|
det.box, m_imgWidth, m_imgHeight
|
|
);
|
|
|
|
detections.emplace_back(std::move(det));
|
|
}
|
|
|
|
return detections;
|
|
|
|
}
|
|
catch (const std::exception& e) {
|
|
_logger.LogFatal("YOLOOD::postprocessv11", e.what(), __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
}
|
|
std::vector<Object> YOLOOD::detect(cv::Mat& image, const float& confThreshold, const float& iouThreshold) {
|
|
if (image.empty()) {
|
|
this->_logger.LogFatal("YOLOOD::detect", "Error: Empty image provided to detector", __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
try {
|
|
// Step 1: Preprocessing - reuse member buffer to avoid repeated allocations
|
|
std::vector<int64_t> inputTensorShape;
|
|
inputTensorShape.reserve(4);
|
|
|
|
this->_blob.clear(); // Clear but keep capacity
|
|
this->preprocessing(image, this->_blob, inputTensorShape);
|
|
|
|
// Step 2: Validate input tensor shape
|
|
if (inputTensorShape.size() != 4 || inputTensorShape[2] <= 0 || inputTensorShape[3] <= 0) {
|
|
this->_logger.LogFatal("YOLOOD::detect", "Invalid input tensor shape!", __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
|
|
const size_t inputTensorSize = vectorProduct(inputTensorShape);
|
|
if (this->_blob.size() != inputTensorSize) {
|
|
this->_logger.LogFatal("YOLOOD::detect", "Mismatch in input tensor size!", __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
|
|
// Step 3: Create ONNX Tensor - static MemoryInfo avoids repeated creation
|
|
static const Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
|
|
OrtAllocatorType::OrtArenaAllocator,
|
|
OrtMemType::OrtMemTypeDefault
|
|
);
|
|
|
|
std::array<Ort::Value, 1> inputTensors = {
|
|
Ort::Value::CreateTensor<float>(
|
|
memoryInfo,
|
|
this->_blob.data(),
|
|
this->_blob.size(),
|
|
inputTensorShape.data(),
|
|
inputTensorShape.size()
|
|
)
|
|
};
|
|
|
|
// Step 4: Run ONNX Inference
|
|
std::vector<Ort::Value> outputTensors = this->session.Run(
|
|
Ort::RunOptions{ nullptr },
|
|
inputNodeNames.data(),
|
|
inputTensors.data(),
|
|
inputNodeNames.size(),
|
|
outputNodeNames.data(),
|
|
outputNodeNames.size()
|
|
);
|
|
|
|
// Step 5: Postprocessing - return directly for RVO
|
|
const cv::Size resizedShape(
|
|
static_cast<int>(inputTensorShape[3]),
|
|
static_cast<int>(inputTensorShape[2])
|
|
);
|
|
return this->postprocessing(resizedShape, image.size(), outputTensors, confThreshold, iouThreshold);
|
|
|
|
}
|
|
catch (const std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::detect", e.what(), __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
}
|
|
std::vector<Object> YOLOOD::detectv11(const cv::Mat& image, float confThreshold, float iouThreshold) {
|
|
if (image.empty()) {
|
|
this->_logger.LogFatal("YOLOOD::detectv11", "Error: Empty image provided to detector", __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
|
|
try {
|
|
// Define input tensor shape (batch, channels, height, width)
|
|
std::vector<int64_t> inputTensorShape = {
|
|
1, 3, inputImageShape.height, inputImageShape.width
|
|
};
|
|
|
|
// Preprocess using reusable member buffer
|
|
this->_blob.clear();
|
|
cv::Mat preprocessedImage = preprocessv11(image, this->_blob, inputTensorShape);
|
|
|
|
// Validate blob size
|
|
const size_t inputTensorSize = vectorProduct(inputTensorShape);
|
|
if (this->_blob.size() != inputTensorSize) {
|
|
this->_logger.LogFatal("YOLOOD::detectv11", "Error: Blob size mismatch with expected input tensor size", __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
|
|
// Static memory info - created once, reused across calls
|
|
static const Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
|
|
OrtArenaAllocator,
|
|
OrtMemTypeDefault
|
|
);
|
|
|
|
// Create input tensor
|
|
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
|
|
memoryInfo,
|
|
this->_blob.data(),
|
|
inputTensorSize,
|
|
inputTensorShape.data(),
|
|
inputTensorShape.size()
|
|
);
|
|
|
|
// Run inference
|
|
std::vector<Ort::Value> outputTensors = this->session.Run(
|
|
Ort::RunOptions{ nullptr },
|
|
inputNodeNames.data(),
|
|
&inputTensor,
|
|
inputNodeNames.size(),
|
|
outputNodeNames.data(),
|
|
outputNodeNames.size()
|
|
);
|
|
|
|
// Postprocess and return directly for RVO
|
|
const cv::Size resizedImageShape(
|
|
static_cast<int>(inputTensorShape[3]),
|
|
static_cast<int>(inputTensorShape[2])
|
|
);
|
|
return postprocessv11(image.size(), resizedImageShape, outputTensors, confThreshold, iouThreshold);
|
|
|
|
}
|
|
catch (const std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::detectv11", e.what(), __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
}
|
|
|
|
// YOLOOD
|
|
bool YOLOOD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
try {
|
|
if (!ANSODBase::OptimizeModel(fp16, optimizedModelFolder)) {
|
|
return false;
|
|
}
|
|
if (FileExist(_modelFilePath)) {
|
|
optimizedModelFolder = GetParentFolder(_modelFilePath);
|
|
this->_logger.LogDebug("YOLOOD::OptimizeModel", "This model is optimized. No need other optimization.", __FILE__, __LINE__);
|
|
return true;
|
|
}
|
|
else {
|
|
optimizedModelFolder = "";
|
|
this->_logger.LogFatal("YOLOOD::OptimizeModel", "This model is not exist. Please check the model path again.", __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::OptimizeModel", e.what(), __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
|
|
}
|
|
bool YOLOOD::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) {
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
ModelLoadingGuard mlg(_modelLoading);
|
|
try {
|
|
bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword);
|
|
if (!result) return false;
|
|
_modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION;
|
|
_modelConfig.inpHeight = 640;
|
|
_modelConfig.inpWidth = 640;
|
|
if (_modelConfig.modelMNSThreshold < 0.2)
|
|
_modelConfig.modelMNSThreshold = 0.5;
|
|
if (_modelConfig.modelConfThreshold < 0.2)
|
|
_modelConfig.modelConfThreshold = 0.5;
|
|
_letterBoxForSquare = true;
|
|
// 0. Check if the configuration file exist
|
|
if (FileExist(_modelConfigFile)) {
|
|
ModelType modelType;
|
|
std::vector<int> inputShape;
|
|
_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
|
|
if (inputShape.size() == 2) {
|
|
if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
|
|
if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
|
|
}
|
|
_modelConfig.modelType = modelType;
|
|
if (_modelConfig.modelType == ModelType::YOLOV4) {
|
|
_isDarkNet = true;
|
|
_modelFilePath = CreateFilePath(_modelFolder, "train_last.weights");
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV4 weight", _modelFilePath, __FILE__, __LINE__);
|
|
}
|
|
else if (_modelConfig.modelType == ModelType::YOLOV5) {
|
|
_isDarkNet = false;
|
|
_modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx");
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV5 weight", _modelFilePath, __FILE__, __LINE__);
|
|
}
|
|
else if (_modelConfig.modelType == ModelType::YOLOV8) {
|
|
_isDarkNet = false;
|
|
_modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx");
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV8 weight", _modelFilePath, __FILE__, __LINE__);
|
|
}
|
|
else {
|
|
this->_logger.LogError("YOLOOD::Initialize. Model Type is not valid. Please check the model config file", _modelConfigFile, __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
if (!FileExist(_modelFilePath)) {
|
|
this->_logger.LogError("YOLOOD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
else {// This is old version of model zip file
|
|
std::string weightfile = CreateFilePath(_modelFolder, "train_last.weights");
|
|
if (FileExist(weightfile)) {
|
|
// This is the darknet Yolov4
|
|
_modelFilePath = weightfile;
|
|
_classFilePath = CreateFilePath(_modelFolder, "classes.names");
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV4 weight", _modelFilePath, __FILE__, __LINE__);
|
|
_modelConfig.modelType = ModelType::YOLOV4;
|
|
_isDarkNet = true;
|
|
_modelConfig.inpHeight = 416;
|
|
_modelConfig.inpWidth = 416;
|
|
}
|
|
else {
|
|
_isDarkNet = false;
|
|
std::string onnxfile = CreateFilePath(_modelFolder, "train_last.onnx");
|
|
if (std::filesystem::exists(onnxfile)) {
|
|
_modelFilePath = onnxfile;
|
|
_classFilePath = CreateFilePath(_modelFolder, "classes.names");
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV8 weight", _modelFilePath, __FILE__, __LINE__);
|
|
}
|
|
else {
|
|
this->_logger.LogError("YOLOOD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
//std::ifstream isValidFileName(_classFilePath);
|
|
if (FileExist(_classFilePath))
|
|
{
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__);
|
|
LoadClassesFromFile();
|
|
}
|
|
else {
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__);
|
|
LoadClassesFromString();
|
|
}
|
|
}
|
|
_isInitialized = true;
|
|
return true;
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::LoadModel", e.what(), __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
|
|
}
|
|
bool YOLOOD::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) {
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
ModelLoadingGuard mlg(_modelLoading);
|
|
try {
|
|
bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig, modelName, className, modelFolder, labelMap);
|
|
if (!result) return false;
|
|
std::string _modelName = modelName;
|
|
if (_modelName.empty()) {
|
|
_modelName = "train_last";
|
|
}
|
|
std::string weightFileName = _modelName + ".weights";
|
|
_modelConfig = modelConfig;
|
|
_modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION;
|
|
_modelConfig.inpHeight = 640;
|
|
_modelConfig.inpWidth = 640;
|
|
if (_modelConfig.modelMNSThreshold < 0.2)
|
|
_modelConfig.modelMNSThreshold = 0.5;
|
|
if (_modelConfig.modelConfThreshold < 0.2)
|
|
_modelConfig.modelConfThreshold = 0.5;
|
|
_letterBoxForSquare = true;
|
|
// 0. Check if the configuration file exist
|
|
if (FileExist(_modelConfigFile)) {
|
|
ModelType modelType;
|
|
std::vector<int> inputShape;
|
|
_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
|
|
if (inputShape.size() == 2) {
|
|
if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
|
|
if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
|
|
}
|
|
_modelConfig.modelType = modelType;
|
|
if (_modelConfig.modelType == ModelType::YOLOV4) {
|
|
_isDarkNet = true;
|
|
_modelFilePath = CreateFilePath(_modelFolder, weightFileName);
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV4 weight", _modelFilePath, __FILE__, __LINE__);
|
|
}
|
|
else if (_modelConfig.modelType == ModelType::YOLOV5) {
|
|
_isDarkNet = false;
|
|
weightFileName = _modelName + ".onnx";
|
|
_modelFilePath = CreateFilePath(_modelFolder, weightFileName);
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV5 weight", _modelFilePath, __FILE__, __LINE__);
|
|
}
|
|
else if (_modelConfig.modelType == ModelType::YOLOV8) {
|
|
_isDarkNet = false;
|
|
weightFileName = _modelName + ".onnx";
|
|
_modelFilePath = CreateFilePath(_modelFolder, weightFileName);
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV8 weight", _modelFilePath, __FILE__, __LINE__);
|
|
}
|
|
else {
|
|
this->_logger.LogError("YOLOOD::Initialize. Model Type is not valid. Please check the model config file", _modelConfigFile, __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
if (!FileExist(_modelFilePath)) {
|
|
this->_logger.LogError("YOLOOD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
else {// This is old version of model zip file
|
|
weightFileName = _modelName + ".weights";
|
|
std::string weightfile = CreateFilePath(_modelFolder, weightFileName);
|
|
if (FileExist(weightfile)) {
|
|
// This is the darknet Yolov4
|
|
_modelFilePath = weightfile;
|
|
_classFilePath = CreateFilePath(_modelFolder, className);
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV4 weight", _modelFilePath, __FILE__, __LINE__);
|
|
_modelConfig.modelType = ModelType::YOLOV4;
|
|
_isDarkNet = true;
|
|
_modelConfig.inpHeight = 416;
|
|
_modelConfig.inpWidth = 416;
|
|
}
|
|
else {
|
|
_isDarkNet = false;
|
|
weightFileName = _modelName + ".onnx";
|
|
std::string onnxfile = CreateFilePath(_modelFolder, weightFileName);
|
|
if (std::filesystem::exists(onnxfile)) {
|
|
// This is the yovoV5 or yolov8 format
|
|
_modelFilePath = onnxfile;
|
|
_classFilePath = CreateFilePath(_modelFolder, className);
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV8 weight", _modelFilePath, __FILE__, __LINE__);
|
|
}
|
|
else {
|
|
this->_logger.LogError("YOLOOD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
std::ifstream isValidFileName(_classFilePath);
|
|
if (!isValidFileName)
|
|
{
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__);
|
|
LoadClassesFromString();
|
|
}
|
|
else {
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__);
|
|
LoadClassesFromFile();
|
|
}
|
|
}
|
|
|
|
// 1. Load labelMap and engine
|
|
labelMap.clear();
|
|
if (!_classes.empty())
|
|
labelMap = VectorToCommaSeparatedString(_classes);
|
|
|
|
if (_isDarkNet) {
|
|
LoadDarknetNetwork();
|
|
|
|
}
|
|
else {
|
|
#ifdef USEOPENCVDNN
|
|
if (_modelConfig.modelType == ModelType::YOLOV8)
|
|
LoadOnnxNetwork();
|
|
else
|
|
loadYoloModel(_modelFilePath, true, cv::Size(_modelConfig.inpWidth, _modelConfig.inpHeight));//Assume that GPU is available
|
|
#else
|
|
loadYoloModel(_modelFilePath, true, cv::Size(_modelConfig.inpWidth, _modelConfig.inpHeight));//Assume that GPU is available
|
|
#endif
|
|
}
|
|
_isInitialized = true;
|
|
return true;
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::LoadModel", e.what(), __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
bool YOLOOD::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) {
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
ModelLoadingGuard mlg(_modelLoading);
|
|
try {
|
|
bool result = ANSODBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
|
|
if (!result) return false;
|
|
// Parsing for YOLO only here
|
|
_modelConfig = modelConfig;
|
|
_modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION;
|
|
_modelConfig.inpHeight = 640;
|
|
_modelConfig.inpWidth = 640;
|
|
if (_modelConfig.modelMNSThreshold < 0.2)
|
|
_modelConfig.modelMNSThreshold = 0.5;
|
|
if (_modelConfig.modelConfThreshold < 0.2)
|
|
_modelConfig.modelConfThreshold = 0.5;
|
|
_letterBoxForSquare = true;
|
|
// 0. Check if the configuration file exist
|
|
if (FileExist(_modelConfigFile)) {
|
|
ModelType modelType;
|
|
std::vector<int> inputShape;
|
|
_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
|
|
if (inputShape.size() == 2) {
|
|
if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
|
|
if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
|
|
}
|
|
_modelConfig.modelType = modelType;
|
|
if (_modelConfig.modelType == ModelType::YOLOV4) {
|
|
_isDarkNet = true;
|
|
_modelFilePath = CreateFilePath(_modelFolder, "train_last.weights");
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV4 weight", _modelFilePath, __FILE__, __LINE__);
|
|
}
|
|
else if (_modelConfig.modelType == ModelType::YOLOV5) {
|
|
_isDarkNet = false;
|
|
_modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx");
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV5 weight", _modelFilePath, __FILE__, __LINE__);
|
|
}
|
|
else if (_modelConfig.modelType == ModelType::YOLOV8) {
|
|
_isDarkNet = false;
|
|
_modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx");
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV8 weight", _modelFilePath, __FILE__, __LINE__);
|
|
}
|
|
else {
|
|
this->_logger.LogError("YOLOOD::Initialize. Model Type is not valid. Please check the model config file", _modelConfigFile, __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
if (!FileExist(_modelFilePath)) {
|
|
this->_logger.LogError("YOLOOD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
else {// This is old version of model zip file
|
|
std::string weightfile = CreateFilePath(_modelFolder, "train_last.weights");
|
|
if (FileExist(weightfile)) {
|
|
// This is the darknet Yolov4
|
|
_modelFilePath = weightfile;
|
|
_classFilePath = CreateFilePath(_modelFolder, "classes.names");
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV4 weight", _modelFilePath, __FILE__, __LINE__);
|
|
_modelConfig.modelType = ModelType::YOLOV4;
|
|
_isDarkNet = true;
|
|
_modelConfig.inpHeight = 416;
|
|
_modelConfig.inpWidth = 416;
|
|
}
|
|
else {
|
|
_isDarkNet = false;
|
|
std::string onnxfile = CreateFilePath(_modelFolder, "train_last.onnx");
|
|
if (std::filesystem::exists(onnxfile)) {
|
|
_modelFilePath = onnxfile;
|
|
_classFilePath = CreateFilePath(_modelFolder, "classes.names");
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV8/Yolov5 weight", _modelFilePath, __FILE__, __LINE__);
|
|
}
|
|
else {
|
|
this->_logger.LogError("YOLOOD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
if (FileExist(_classFilePath))
|
|
{
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__);
|
|
LoadClassesFromFile();
|
|
}
|
|
else {
|
|
this->_logger.LogDebug("YOLOOD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__);
|
|
LoadClassesFromString();
|
|
}
|
|
}
|
|
|
|
// 1. Load labelMap and engine
|
|
labelMap.clear();
|
|
if (!_classes.empty())
|
|
labelMap = VectorToCommaSeparatedString(_classes);
|
|
|
|
if (_isDarkNet) {
|
|
LoadDarknetNetwork();
|
|
}
|
|
else {
|
|
#ifdef USEOPENCVDNN
|
|
if (_modelConfig.modelType == ModelType::YOLOV8)
|
|
LoadOnnxNetwork();
|
|
else
|
|
loadYoloModel(_modelFilePath, true, cv::Size(_modelConfig.inpWidth, _modelConfig.inpHeight));//Assume that GPU is available
|
|
#else
|
|
loadYoloModel(_modelFilePath, true, cv::Size(_modelConfig.inpWidth, _modelConfig.inpHeight));//Assume that GPU is available
|
|
#endif
|
|
}
|
|
_isInitialized = true;
|
|
return true;
|
|
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::Initialize", e.what(), __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
std::vector<Object> YOLOOD::RunInference(const cv::Mat& input) {
|
|
return RunInference(input, "CustomCam");
|
|
}
|
|
std::vector<Object> YOLOOD::RunInference(const cv::Mat& input, const std::string& camera_id) {
|
|
if (!PreInferenceCheck("YOLOOD::RunInference")) return {};
|
|
|
|
if (input.empty() || input.cols < 10 || input.rows < 10) {
|
|
return {};
|
|
}
|
|
|
|
try {
|
|
// --- NV12 fast path: try to get full-res BGR from GPU NV12 frame ---
|
|
cv::Mat inferenceImage = input;
|
|
float bgrScaleX = 1.0f, bgrScaleY = 1.0f;
|
|
{
|
|
auto* gpuData = tl_currentGpuFrame();
|
|
if (gpuData && gpuData->width > 0 && gpuData->height > 0) {
|
|
if (gpuData->cpuYPlane && gpuData->cpuUvPlane &&
|
|
gpuData->cpuYLinesize >= gpuData->width &&
|
|
gpuData->cpuUvLinesize >= gpuData->width) {
|
|
const int fw = gpuData->width;
|
|
const int fh = gpuData->height;
|
|
// NV12 requires even dimensions
|
|
if ((fw % 2) == 0 && (fh % 2) == 0) {
|
|
try {
|
|
cv::Mat yPlane(fh, fw, CV_8UC1,
|
|
gpuData->cpuYPlane, static_cast<size_t>(gpuData->cpuYLinesize));
|
|
cv::Mat uvPlane(fh / 2, fw / 2, CV_8UC2,
|
|
gpuData->cpuUvPlane, static_cast<size_t>(gpuData->cpuUvLinesize));
|
|
cv::Mat fullResBGR;
|
|
cv::cvtColorTwoPlane(yPlane, uvPlane, fullResBGR, cv::COLOR_YUV2BGR_NV12);
|
|
if (!fullResBGR.empty()) {
|
|
bgrScaleX = static_cast<float>(input.cols) / fullResBGR.cols;
|
|
bgrScaleY = static_cast<float>(input.rows) / fullResBGR.rows;
|
|
inferenceImage = fullResBGR;
|
|
}
|
|
} catch (...) { /* NV12 conversion failed — fall back to input */ }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
std::vector<Object> ret;
|
|
if (_isDarkNet) {
|
|
ret = RunInferenceFromDarkNet(inferenceImage, camera_id);
|
|
}
|
|
else if (_modelConfig.modelType == ModelType::YOLOV5) {
|
|
ret = RunInferenceFromYoloV5(inferenceImage, camera_id);
|
|
}
|
|
else {
|
|
#ifdef USEOPENCVDNN
|
|
ret = RunInferenceFromONNX(inferenceImage, camera_id);
|
|
#else
|
|
ret = RunInferenceFromYoloV8(inferenceImage, camera_id);
|
|
#endif
|
|
}
|
|
|
|
// --- Rescale coordinates from full-res back to display-res ---
|
|
if (bgrScaleX != 1.0f || bgrScaleY != 1.0f) {
|
|
for (auto& obj : ret) {
|
|
obj.box.x = static_cast<int>(obj.box.x * bgrScaleX);
|
|
obj.box.y = static_cast<int>(obj.box.y * bgrScaleY);
|
|
obj.box.width = static_cast<int>(obj.box.width * bgrScaleX);
|
|
obj.box.height = static_cast<int>(obj.box.height * bgrScaleY);
|
|
for (size_t k = 0; k < obj.kps.size(); k += 2) {
|
|
obj.kps[k] *= bgrScaleX; // x
|
|
if (k + 1 < obj.kps.size())
|
|
obj.kps[k + 1] *= bgrScaleY; // y
|
|
}
|
|
for (auto& pt : obj.polygon) {
|
|
pt.x *= bgrScaleX;
|
|
pt.y *= bgrScaleY;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (_trackerEnabled) {
|
|
ret = ApplyTracking(ret, camera_id);
|
|
if (_stabilizationEnabled) ret = StabilizeDetections(ret, camera_id);
|
|
}
|
|
return ret;
|
|
}
|
|
catch (const std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::RunInference", e.what(), __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
}
|
|
YOLOOD::~YOLOOD() {
|
|
try {
|
|
this->_logger.LogDebug("YOLOOD::~YOLOOD()", "Release YOLOOD ", __FILE__, __LINE__);
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::~YOLOOD()", e.what(), __FILE__, __LINE__);
|
|
}
|
|
}
|
|
bool YOLOOD::Destroy() {
|
|
try {
|
|
_net.reset();
|
|
return true;
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::Destroy()", e.what(), __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
std::vector<Object> YOLOOD::RunInferenceFromYoloV5(const cv::Mat& input, const std::string& camera_id) {
|
|
// Early validation - no need for try block
|
|
if (input.empty() || input.data == nullptr) {
|
|
return {};
|
|
}
|
|
if (input.cols < 10 || input.rows < 10) {
|
|
return {};
|
|
}
|
|
|
|
try {
|
|
// Color conversion only if needed
|
|
cv::Mat frame;
|
|
if (input.channels() == 1) {
|
|
cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR);
|
|
}
|
|
else {
|
|
frame = input; // Shallow copy (shares data, no allocation)
|
|
}
|
|
return detect(frame, _modelConfig.detectionScoreThreshold, _modelConfig.modelMNSThreshold);
|
|
}
|
|
catch (const std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::RunInferenceFromYoloV5", e.what(), __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
}
|
|
std::vector<Object> YOLOOD::RunInferenceFromYoloV8(const cv::Mat& input, const std::string& camera_id) {
|
|
// Early validation - before try block
|
|
if (input.empty() || input.data == nullptr) {
|
|
return {};
|
|
}
|
|
if (input.cols < 10 || input.rows < 10) {
|
|
return {};
|
|
}
|
|
|
|
try {
|
|
// Color conversion using reusable buffer
|
|
cv::Mat* modelInputPtr;
|
|
if (input.channels() == 1) {
|
|
cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR);
|
|
modelInputPtr = &this->_frameBuffer;
|
|
}
|
|
else {
|
|
modelInputPtr = const_cast<cv::Mat*>(&input);
|
|
}
|
|
cv::Mat& modelInput = *modelInputPtr;
|
|
|
|
// Model shape and letterbox
|
|
const cv::Size2f modelShape(_modelConfig.inpWidth, _modelConfig.inpHeight);
|
|
if (_letterBoxForSquare && modelShape.width == modelShape.height) {
|
|
modelInput = ANSUtilityHelper::FormatToSquare(modelInput);
|
|
}
|
|
|
|
// Create blob
|
|
cv::Mat blob;
|
|
cv::dnn::blobFromImage(modelInput, blob, 1.0 / 255.0, modelShape, cv::Scalar(), true, false);
|
|
|
|
// Create input tensor - static MemoryInfo
|
|
static const Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
|
const std::array<int64_t, 4> inputShape = { 1, blob.size[1], blob.size[2], blob.size[3] };
|
|
|
|
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
|
|
memoryInfo,
|
|
reinterpret_cast<float*>(blob.data),
|
|
blob.total(),
|
|
inputShape.data(),
|
|
inputShape.size()
|
|
);
|
|
|
|
// Run inference
|
|
auto outputTensors = session.Run(
|
|
Ort::RunOptions{ nullptr },
|
|
inputNodeNames.data(), &inputTensor, inputNodeNames.size(),
|
|
outputNodeNames.data(), outputNodeNames.size()
|
|
);
|
|
|
|
// Parse output
|
|
const float* rawOutput = outputTensors[0].GetTensorMutableData<float>();
|
|
const std::vector<int64_t> outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
|
|
|
|
const int dimensions = static_cast<int>(outputShape[1]); // 4 + num_classes
|
|
const int rows = static_cast<int>(outputShape[2]); // 8400
|
|
const int numClasses = dimensions - 4;
|
|
const int maxWidth = modelInput.cols;
|
|
const int maxHeight = modelInput.rows;
|
|
|
|
cv::Mat outputMat(dimensions, rows, CV_32F, const_cast<float*>(rawOutput));
|
|
cv::transpose(outputMat, outputMat);
|
|
|
|
float* data = reinterpret_cast<float*>(outputMat.data);
|
|
const float x_factor = static_cast<float>(modelInput.cols) / modelShape.width;
|
|
const float y_factor = static_cast<float>(modelInput.rows) / modelShape.height;
|
|
|
|
// Pre-allocate vectors
|
|
std::vector<int> class_ids;
|
|
std::vector<float> confidences;
|
|
std::vector<cv::Rect> boxes;
|
|
const size_t estimatedDetections = std::min(static_cast<size_t>(rows / 10), size_t{ 100 });
|
|
class_ids.reserve(estimatedDetections);
|
|
confidences.reserve(estimatedDetections);
|
|
boxes.reserve(estimatedDetections);
|
|
|
|
const float scoreThreshold = _modelConfig.detectionScoreThreshold;
|
|
|
|
for (int i = 0; i < rows; ++i) {
|
|
float* classes_scores = data + 4;
|
|
cv::Mat scores(1, numClasses, CV_32FC1, classes_scores);
|
|
cv::Point class_id;
|
|
double maxClassScore;
|
|
cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &class_id);
|
|
|
|
if (maxClassScore >= scoreThreshold) {
|
|
confidences.push_back(static_cast<float>(maxClassScore));
|
|
class_ids.push_back(class_id.x);
|
|
|
|
const float x = data[0];
|
|
const float y = data[1];
|
|
const float w = data[2];
|
|
const float h = data[3];
|
|
|
|
int left = static_cast<int>((x - 0.5f * w) * x_factor);
|
|
int top = static_cast<int>((y - 0.5f * h) * y_factor);
|
|
int width = static_cast<int>(w * x_factor);
|
|
int height = static_cast<int>(h * y_factor);
|
|
|
|
// Clamp to image bounds
|
|
left = std::max(0, left);
|
|
top = std::max(0, top);
|
|
width = std::min(maxWidth - left - 5, width);
|
|
height = std::min(maxHeight - top - 5, height);
|
|
|
|
boxes.emplace_back(left, top, width, height);
|
|
}
|
|
data += dimensions;
|
|
}
|
|
|
|
// NMS
|
|
std::vector<int> nms_result;
|
|
cv::dnn::NMSBoxes(boxes, confidences, _modelConfig.modelConfThreshold, _modelConfig.modelMNSThreshold, nms_result);
|
|
|
|
// Build final detections
|
|
std::vector<Object> detections;
|
|
detections.reserve(nms_result.size());
|
|
|
|
const size_t classNameSize = _classes.size();
|
|
const int inputCols = input.cols;
|
|
const int inputRows = input.rows;
|
|
|
|
for (const int idx : nms_result) {
|
|
if (confidences[idx] > scoreThreshold) {
|
|
Object result;
|
|
result.classId = class_ids[idx];
|
|
result.confidence = confidences[idx];
|
|
|
|
// Safe class name lookup
|
|
if (classNameSize > 0) {
|
|
const size_t safeIdx = static_cast<size_t>(result.classId) < classNameSize
|
|
? static_cast<size_t>(result.classId)
|
|
: classNameSize - 1;
|
|
result.className = _classes[safeIdx];
|
|
}
|
|
else {
|
|
result.className = "Unknown";
|
|
}
|
|
|
|
result.box = boxes[idx];
|
|
result.polygon = ANSUtilityHelper::RectToNormalizedPolygon(result.box, inputCols, inputRows);
|
|
result.cameraId = camera_id;
|
|
detections.push_back(std::move(result));
|
|
}
|
|
}
|
|
|
|
return detections;
|
|
|
|
}
|
|
catch (const std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::RunInferenceFromYoloV8", e.what(), __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
}
|
|
std::vector<Object> YOLOOD::RunInferenceFromYoloV11(const cv::Mat& input, [[maybe_unused]] const std::string& camera_id) {
|
|
// Early validation - before try block
|
|
if (input.empty() || input.data == nullptr) {
|
|
this->_logger.LogError("YOLOOD::RunInferenceFromYoloV11",
|
|
"Input image is empty or null", __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
|
|
if (input.cols < 10 || input.rows < 10) {
|
|
this->_logger.LogError("YOLOOD::RunInferenceFromYoloV11",
|
|
"Input image too small: " + std::to_string(input.cols) + "x" + std::to_string(input.rows),
|
|
__FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
|
|
const int channels = input.channels();
|
|
if (channels != 1 && channels != 3) {
|
|
this->_logger.LogError("YOLOOD::RunInferenceFromYoloV11",
|
|
"Unsupported channel count: " + std::to_string(channels),
|
|
__FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
|
|
try {
|
|
// Color conversion if needed
|
|
if (channels == 1) {
|
|
cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR);
|
|
return detectv11(this->_frameBuffer, _modelConfig.detectionScoreThreshold, _modelConfig.modelMNSThreshold);
|
|
}
|
|
|
|
return detectv11(input, _modelConfig.detectionScoreThreshold, _modelConfig.modelMNSThreshold);
|
|
|
|
}
|
|
catch (const cv::Exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::RunInferenceFromYoloV11",
|
|
"OpenCV error: " + std::string(e.what()), __FILE__, __LINE__);
|
|
}
|
|
catch (const std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::RunInferenceFromYoloV11",
|
|
"Standard error: " + std::string(e.what()), __FILE__, __LINE__);
|
|
}
|
|
|
|
return {};
|
|
}
|
|
std::vector<Object> YOLOOD::RunInferenceFromONNX(const cv::Mat& input, const std::string& camera_id) {
|
|
// Early validation - before try block
|
|
if (input.empty() || input.data == nullptr) {
|
|
return {};
|
|
}
|
|
if (input.cols < 10 || input.rows < 10) {
|
|
return {};
|
|
}
|
|
|
|
try {
|
|
// Color conversion using reusable buffer
|
|
cv::Mat* modelInputPtr;
|
|
if (input.channels() == 1) {
|
|
cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR);
|
|
modelInputPtr = &this->_frameBuffer;
|
|
}
|
|
else {
|
|
modelInputPtr = const_cast<cv::Mat*>(&input); // Safe: we don't modify it
|
|
}
|
|
cv::Mat& modelInput = *modelInputPtr;
|
|
|
|
// Model shape
|
|
const cv::Size2f modelShape(_modelConfig.inpWidth, _modelConfig.inpHeight);
|
|
|
|
if (_letterBoxForSquare && modelShape.width == modelShape.height) {
|
|
modelInput = ANSUtilityHelper::FormatToSquare(modelInput);
|
|
}
|
|
|
|
// Blob creation and inference
|
|
cv::Mat blob;
|
|
cv::dnn::blobFromImage(modelInput, blob, 1.0 / 255.0, modelShape, cv::Scalar(), true, false);
|
|
_net->setInput(blob);
|
|
|
|
std::vector<cv::Mat> outputs;
|
|
_net->forward(outputs, _net->getUnconnectedOutLayersNames());
|
|
|
|
// Parse output dimensions
|
|
const int rows = outputs[0].size[2];
|
|
const int dimensions = outputs[0].size[1];
|
|
const int maxWidth = modelInput.cols;
|
|
const int maxHeight = modelInput.rows;
|
|
|
|
outputs[0] = outputs[0].reshape(1, dimensions);
|
|
cv::transpose(outputs[0], outputs[0]);
|
|
|
|
float* data = reinterpret_cast<float*>(outputs[0].data);
|
|
const float x_factor = static_cast<float>(modelInput.cols) / modelShape.width;
|
|
const float y_factor = static_cast<float>(modelInput.rows) / modelShape.height;
|
|
|
|
// Pre-allocate with estimated capacity
|
|
std::vector<int> class_ids;
|
|
std::vector<float> confidences;
|
|
std::vector<cv::Rect> boxes;
|
|
const size_t estimatedDetections = std::min(static_cast<size_t>(rows / 10), size_t{ 100 });
|
|
class_ids.reserve(estimatedDetections);
|
|
confidences.reserve(estimatedDetections);
|
|
boxes.reserve(estimatedDetections);
|
|
|
|
const size_t numClasses = _classes.size();
|
|
const float scoreThreshold = _modelConfig.detectionScoreThreshold;
|
|
|
|
for (int i = 0; i < rows; ++i) {
|
|
float* classes_scores = data + 4;
|
|
cv::Mat scores(1, static_cast<int>(numClasses), CV_32FC1, classes_scores);
|
|
cv::Point class_id;
|
|
double maxClassScore;
|
|
cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &class_id);
|
|
|
|
if (maxClassScore >= scoreThreshold) {
|
|
confidences.push_back(static_cast<float>(maxClassScore));
|
|
class_ids.push_back(class_id.x);
|
|
|
|
const float x = data[0];
|
|
const float y = data[1];
|
|
const float w = data[2];
|
|
const float h = data[3];
|
|
|
|
int left = static_cast<int>((x - 0.5f * w) * x_factor);
|
|
int top = static_cast<int>((y - 0.5f * h) * y_factor);
|
|
int width = static_cast<int>(w * x_factor);
|
|
int height = static_cast<int>(h * y_factor);
|
|
|
|
// Clamp to image bounds
|
|
left = std::max(0, left);
|
|
top = std::max(0, top);
|
|
width = std::min(maxWidth - left - 5, width);
|
|
height = std::min(maxHeight - top - 5, height);
|
|
|
|
boxes.emplace_back(left, top, width, height);
|
|
}
|
|
data += dimensions;
|
|
}
|
|
|
|
// NMS
|
|
std::vector<int> nms_result;
|
|
cv::dnn::NMSBoxes(boxes, confidences, _modelConfig.modelConfThreshold, _modelConfig.modelMNSThreshold, nms_result);
|
|
|
|
// Build final detections
|
|
std::vector<Object> detections;
|
|
detections.reserve(nms_result.size());
|
|
|
|
for (const int idx : nms_result) {
|
|
if (confidences[idx] > scoreThreshold) {
|
|
Object result;
|
|
result.classId = class_ids[idx];
|
|
result.confidence = confidences[idx];
|
|
result.className = _classes[result.classId];
|
|
result.box = boxes[idx];
|
|
result.polygon = ANSUtilityHelper::RectToNormalizedPolygon(result.box, input.cols, input.rows);
|
|
result.cameraId = camera_id;
|
|
detections.push_back(std::move(result));
|
|
}
|
|
}
|
|
|
|
return detections;
|
|
|
|
}
|
|
catch (const std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::RunInferenceFromONNX", e.what(), __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
}
|
|
std::vector<Object> YOLOOD::RunInferenceFromDarkNet(const cv::Mat& input, const std::string& camera_id) {
|
|
// Early validation - before try block
|
|
if (input.empty() || input.data == nullptr) {
|
|
return {};
|
|
}
|
|
if (input.cols < 10 || input.rows < 10) {
|
|
return {};
|
|
}
|
|
|
|
try {
|
|
// Color conversion using reusable buffer
|
|
cv::Mat* modelInputPtr;
|
|
if (input.channels() == 1) {
|
|
cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR);
|
|
modelInputPtr = &this->_frameBuffer;
|
|
}
|
|
else {
|
|
modelInputPtr = const_cast<cv::Mat*>(&input);
|
|
}
|
|
cv::Mat& modelInput = *modelInputPtr;
|
|
|
|
// Model shape
|
|
const cv::Size2f modelShape(_modelConfig.inpWidth, _modelConfig.inpHeight);
|
|
|
|
if (_letterBoxForSquare && modelShape.width == modelShape.height) {
|
|
modelInput = ANSUtilityHelper::FormatToSquare(modelInput);
|
|
}
|
|
|
|
// Setup detection model
|
|
cv::dnn::DetectionModel model(*_net);
|
|
model.setInputParams(1.0 / 255.0,
|
|
cv::Size(static_cast<int>(_modelConfig.inpHeight), static_cast<int>(_modelConfig.inpHeight)),
|
|
cv::Scalar(), true);
|
|
|
|
// Run detection
|
|
std::vector<int> classIds;
|
|
std::vector<float> scores;
|
|
std::vector<cv::Rect> boxes;
|
|
model.detect(modelInput, classIds, scores, boxes,
|
|
_modelConfig.detectionScoreThreshold, _modelConfig.modelMNSThreshold);
|
|
|
|
// Build results
|
|
std::vector<Object> detections;
|
|
detections.reserve(classIds.size());
|
|
|
|
const float scoreThreshold = _modelConfig.detectionScoreThreshold;
|
|
const int maxWidth = modelInput.cols;
|
|
const int maxHeight = modelInput.rows;
|
|
const int inputCols = input.cols;
|
|
const int inputRows = input.rows;
|
|
|
|
for (size_t i = 0; i < classIds.size(); ++i) {
|
|
if (scores[i] > scoreThreshold) {
|
|
Object result;
|
|
result.classId = classIds[i];
|
|
result.confidence = scores[i];
|
|
result.className = _classes[result.classId];
|
|
|
|
// Clamp box to image bounds
|
|
cv::Rect& box = boxes[i];
|
|
box.x = std::max(0, box.x);
|
|
box.y = std::max(0, box.y);
|
|
box.width = std::min(maxWidth - box.x - 5, box.width);
|
|
box.height = std::min(maxHeight - box.y - 5, box.height);
|
|
|
|
result.box = box;
|
|
result.polygon = ANSUtilityHelper::RectToNormalizedPolygon(box, inputCols, inputRows);
|
|
result.cameraId = camera_id;
|
|
detections.push_back(std::move(result));
|
|
}
|
|
}
|
|
|
|
return detections;
|
|
|
|
}
|
|
catch (const std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::RunInferenceFromDarkNet", e.what(), __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
}
|
|
void YOLOOD::SetEngineType() {
|
|
try {
|
|
EngineType engineType = ANSLicenseHelper::CheckHardwareInformation();
|
|
switch (engineType) {
|
|
case EngineType::NVIDIA_GPU: // NVIDIA CUDA
|
|
this->_logger.LogDebug("YOLOOD::SetEngineType", "Running on CUDA GPU", __FILE__, __LINE__);
|
|
_net->setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
|
|
_net->setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
|
|
break;
|
|
default: // Normal CPU
|
|
this->_logger.LogDebug("YOLOOD::SetEngineType", "Running on CPU", __FILE__, __LINE__);
|
|
_net->setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
|
|
_net->setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
|
|
break;
|
|
}
|
|
}
|
|
catch (const std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::SetEngineType()", e.what(), __FILE__, __LINE__);
|
|
}
|
|
|
|
}
|
|
void YOLOOD::LoadOnnxNetwork() { //For Yolov5 and Yolov8
|
|
try {
|
|
_net = std::make_shared<cv::dnn::Net>(cv::dnn::readNetFromONNX(_modelFilePath));
|
|
_isDarkNet = false;
|
|
_modelConfig.inpHeight = 640;
|
|
_modelConfig.inpWidth = 640;
|
|
SetEngineType();
|
|
}
|
|
catch (const std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::LoadOnnxNetwork()", e.what(), __FILE__, __LINE__);
|
|
}
|
|
|
|
}
|
|
void YOLOOD::LoadDarknetNetwork() {
|
|
try {
|
|
// Retrieve the cfg file
|
|
std::string modelCfgFile = CreateFilePath(_modelFolder, "test.cfg");//It is always test.cfg
|
|
_net = std::make_shared<cv::dnn::Net>(cv::dnn::readNetFromDarknet(modelCfgFile, _modelFilePath));
|
|
_modelConfig.inpHeight = 416;
|
|
_modelConfig.inpWidth = 416;
|
|
_isDarkNet = true;
|
|
SetEngineType();
|
|
}
|
|
catch (const std::exception& e) {
|
|
this->_logger.LogFatal("YOLOOD::LoadDarknetNetwork()", e.what(), __FILE__, __LINE__);
|
|
}
|
|
|
|
|
|
}
|
|
}
|