1377 lines
46 KiB
C++
1377 lines
46 KiB
C++
|
|
#include "ANSONNXSEG.h"
|
|||
|
|
#include "EPLoader.h"
|
|||
|
|
namespace ANSCENTER {
|
|||
|
|
std::atomic<int> ANSONNXSEG::instanceCounter_(0); // Initialize static member
|
|||
|
|
|
|||
|
|
size_t ANSONNXSEG::vectorProduct(const std::vector<int64_t>& shape) {
|
|||
|
|
return std::accumulate(shape.begin(), shape.end(), 1ull, std::multiplies<size_t>());
|
|||
|
|
}
|
|||
|
|
void ANSONNXSEG::letterBox(const cv::Mat& image, cv::Mat& outImage,
|
|||
|
|
const cv::Size& newShape,
|
|||
|
|
const cv::Scalar& color,
|
|||
|
|
bool auto_,
|
|||
|
|
bool scaleFill,
|
|||
|
|
bool scaleUp,
|
|||
|
|
int stride)
|
|||
|
|
{
|
|||
|
|
float r = std::min((float)newShape.height / (float)image.rows,
|
|||
|
|
(float)newShape.width / (float)image.cols);
|
|||
|
|
if (!scaleUp) {
|
|||
|
|
r = std::min(r, 1.0f);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
int newW = static_cast<int>(std::round(image.cols * r));
|
|||
|
|
int newH = static_cast<int>(std::round(image.rows * r));
|
|||
|
|
|
|||
|
|
int dw = newShape.width - newW;
|
|||
|
|
int dh = newShape.height - newH;
|
|||
|
|
|
|||
|
|
if (auto_) {
|
|||
|
|
dw = dw % stride;
|
|||
|
|
dh = dh % stride;
|
|||
|
|
}
|
|||
|
|
else if (scaleFill) {
|
|||
|
|
newW = newShape.width;
|
|||
|
|
newH = newShape.height;
|
|||
|
|
dw = 0;
|
|||
|
|
dh = 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
cv::Mat resized;
|
|||
|
|
cv::resize(image, resized, cv::Size(newW, newH), 0, 0, cv::INTER_LINEAR);
|
|||
|
|
|
|||
|
|
int top = dh / 2;
|
|||
|
|
int bottom = dh - top;
|
|||
|
|
int left = dw / 2;
|
|||
|
|
int right = dw - left;
|
|||
|
|
cv::copyMakeBorder(resized, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
|
|||
|
|
}
|
|||
|
|
void ANSONNXSEG::NMSBoxes(const std::vector<BoundingBox>& boxes,
|
|||
|
|
const std::vector<float>& scores,
|
|||
|
|
float scoreThreshold,
|
|||
|
|
float nmsThreshold,
|
|||
|
|
std::vector<int>& indices)
|
|||
|
|
{
|
|||
|
|
indices.clear();
|
|||
|
|
if (boxes.empty()) {
|
|||
|
|
return;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
std::vector<int> order;
|
|||
|
|
order.reserve(boxes.size());
|
|||
|
|
for (size_t i = 0; i < boxes.size(); ++i) {
|
|||
|
|
if (scores[i] >= scoreThreshold) {
|
|||
|
|
order.push_back((int)i);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
if (order.empty()) return;
|
|||
|
|
|
|||
|
|
std::sort(order.begin(), order.end(),
|
|||
|
|
[&scores](int a, int b) {
|
|||
|
|
return scores[a] > scores[b];
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
std::vector<float> areas(boxes.size());
|
|||
|
|
for (size_t i = 0; i < boxes.size(); ++i) {
|
|||
|
|
areas[i] = (float)(boxes[i].width * boxes[i].height);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
std::vector<bool> suppressed(boxes.size(), false);
|
|||
|
|
for (size_t i = 0; i < order.size(); ++i) {
|
|||
|
|
int idx = order[i];
|
|||
|
|
if (suppressed[idx]) continue;
|
|||
|
|
|
|||
|
|
indices.push_back(idx);
|
|||
|
|
|
|||
|
|
for (size_t j = i + 1; j < order.size(); ++j) {
|
|||
|
|
int idx2 = order[j];
|
|||
|
|
if (suppressed[idx2]) continue;
|
|||
|
|
|
|||
|
|
const BoundingBox& a = boxes[idx];
|
|||
|
|
const BoundingBox& b = boxes[idx2];
|
|||
|
|
int interX1 = std::max(a.x, b.x);
|
|||
|
|
int interY1 = std::max(a.y, b.y);
|
|||
|
|
int interX2 = std::min(a.x + a.width, b.x + b.width);
|
|||
|
|
int interY2 = std::min(a.y + a.height, b.y + b.height);
|
|||
|
|
|
|||
|
|
int w = interX2 - interX1;
|
|||
|
|
int h = interY2 - interY1;
|
|||
|
|
if (w > 0 && h > 0) {
|
|||
|
|
float interArea = (float)(w * h);
|
|||
|
|
float unionArea = areas[idx] + areas[idx2] - interArea;
|
|||
|
|
float iou = (unionArea > 0.f) ? (interArea / unionArea) : 0.f;
|
|||
|
|
if (iou > nmsThreshold) {
|
|||
|
|
suppressed[idx2] = true;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
cv::Mat ANSONNXSEG::sigmoid(const cv::Mat& src) {
|
|||
|
|
cv::Mat dst;
|
|||
|
|
cv::exp(-src, dst);
|
|||
|
|
dst = 1.0 / (1.0 + dst);
|
|||
|
|
return dst;
|
|||
|
|
}
|
|||
|
|
BoundingBox ANSONNXSEG::scaleCoords(const cv::Size& letterboxShape,
|
|||
|
|
const BoundingBox& coords,
|
|||
|
|
const cv::Size& originalShape,
|
|||
|
|
bool p_Clip)
|
|||
|
|
{
|
|||
|
|
float gain = std::min((float)letterboxShape.height / (float)originalShape.height,
|
|||
|
|
(float)letterboxShape.width / (float)originalShape.width);
|
|||
|
|
|
|||
|
|
int padW = static_cast<int>(std::round(((float)letterboxShape.width - (float)originalShape.width * gain) / 2.f));
|
|||
|
|
int padH = static_cast<int>(std::round(((float)letterboxShape.height - (float)originalShape.height * gain) / 2.f));
|
|||
|
|
|
|||
|
|
BoundingBox ret;
|
|||
|
|
ret.x = static_cast<int>(std::round(((float)coords.x - (float)padW) / gain));
|
|||
|
|
ret.y = static_cast<int>(std::round(((float)coords.y - (float)padH) / gain));
|
|||
|
|
ret.width = static_cast<int>(std::round((float)coords.width / gain));
|
|||
|
|
ret.height = static_cast<int>(std::round((float)coords.height / gain));
|
|||
|
|
|
|||
|
|
if (p_Clip) {
|
|||
|
|
ret.x = clamp(ret.x, 0, originalShape.width);
|
|||
|
|
ret.y = clamp(ret.y, 0, originalShape.height);
|
|||
|
|
ret.width = clamp(ret.width, 0, originalShape.width - ret.x);
|
|||
|
|
ret.height = clamp(ret.height, 0, originalShape.height - ret.y);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return ret;
|
|||
|
|
|
|||
|
|
}
|
|||
|
|
std::vector<cv::Scalar> ANSONNXSEG::generateColors(const std::vector<std::string>& classNames, int seed) {
|
|||
|
|
static std::unordered_map<size_t, std::vector<cv::Scalar>> cache;
|
|||
|
|
size_t key = 0;
|
|||
|
|
for (const auto& name : classNames) {
|
|||
|
|
size_t h = std::hash<std::string>{}(name);
|
|||
|
|
key ^= (h + 0x9e3779b9 + (key << 6) + (key >> 2));
|
|||
|
|
}
|
|||
|
|
auto it = cache.find(key);
|
|||
|
|
if (it != cache.end()) {
|
|||
|
|
return it->second;
|
|||
|
|
}
|
|||
|
|
std::mt19937 rng(seed);
|
|||
|
|
std::uniform_int_distribution<int> dist(0, 255);
|
|||
|
|
std::vector<cv::Scalar> colors;
|
|||
|
|
colors.reserve(classNames.size());
|
|||
|
|
for (size_t i = 0; i < classNames.size(); ++i) {
|
|||
|
|
colors.emplace_back(cv::Scalar(dist(rng), dist(rng), dist(rng)));
|
|||
|
|
}
|
|||
|
|
cache[key] = colors;
|
|||
|
|
return colors;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
void ANSONNXSEG::drawSegmentations(cv::Mat& image,
|
|||
|
|
const std::vector<Object>& results,
|
|||
|
|
float maskAlpha) const
|
|||
|
|
{
|
|||
|
|
for (const auto& seg : results) {
|
|||
|
|
if (seg.confidence < _modelConfig.detectionScoreThreshold) {
|
|||
|
|
continue;
|
|||
|
|
}
|
|||
|
|
cv::Scalar color = classColors[seg.classId % classColors.size()];
|
|||
|
|
|
|||
|
|
// -----------------------------
|
|||
|
|
// Draw Segmentation Mask Only
|
|||
|
|
// -----------------------------
|
|||
|
|
if (!seg.mask.empty()) {
|
|||
|
|
// Ensure the mask is single-channel
|
|||
|
|
cv::Mat mask_gray;
|
|||
|
|
if (seg.mask.channels() == 3) {
|
|||
|
|
cv::cvtColor(seg.mask, mask_gray, cv::COLOR_BGR2GRAY);
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
mask_gray = seg.mask.clone();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Threshold the mask to binary (object: 255, background: 0)
|
|||
|
|
cv::Mat mask_binary;
|
|||
|
|
cv::threshold(mask_gray, mask_binary, 127, 255, cv::THRESH_BINARY);
|
|||
|
|
|
|||
|
|
// Create a colored version of the mask
|
|||
|
|
cv::Mat colored_mask;
|
|||
|
|
cv::cvtColor(mask_binary, colored_mask, cv::COLOR_GRAY2BGR);
|
|||
|
|
colored_mask.setTo(color, mask_binary); // Apply color where mask is present
|
|||
|
|
|
|||
|
|
// Blend the colored mask with the original image
|
|||
|
|
cv::addWeighted(image, 1.0, colored_mask, maskAlpha, 0, image);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
void ANSONNXSEG::drawSegmentationsAndBoxes(cv::Mat& image,
|
|||
|
|
const std::vector<Object>& results,
|
|||
|
|
float maskAlpha) const
|
|||
|
|
{
|
|||
|
|
for (const auto& seg : results) {
|
|||
|
|
if (seg.confidence < _modelConfig.detectionScoreThreshold) {
|
|||
|
|
continue;
|
|||
|
|
}
|
|||
|
|
cv::Scalar color = classColors[seg.classId % classColors.size()];
|
|||
|
|
|
|||
|
|
// -----------------------------
|
|||
|
|
// 1. Draw Bounding Box
|
|||
|
|
// -----------------------------
|
|||
|
|
cv::rectangle(image,
|
|||
|
|
cv::Point(seg.box.x, seg.box.y),
|
|||
|
|
cv::Point(seg.box.x + seg.box.width, seg.box.y + seg.box.height),
|
|||
|
|
color, 2);
|
|||
|
|
|
|||
|
|
// -----------------------------
|
|||
|
|
// 2. Draw Label
|
|||
|
|
// -----------------------------
|
|||
|
|
std::string label = _classes[seg.classId] + " " + std::to_string(static_cast<int>(seg.confidence * 100)) + "%";
|
|||
|
|
int baseLine = 0;
|
|||
|
|
double fontScale = 0.5;
|
|||
|
|
int thickness = 1;
|
|||
|
|
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, fontScale, thickness, &baseLine);
|
|||
|
|
int top = std::max(seg.box.y, labelSize.height + 5);
|
|||
|
|
cv::rectangle(image,
|
|||
|
|
cv::Point(seg.box.x, top - labelSize.height - 5),
|
|||
|
|
cv::Point(seg.box.x + labelSize.width + 5, top),
|
|||
|
|
color, cv::FILLED);
|
|||
|
|
cv::putText(image, label,
|
|||
|
|
cv::Point(seg.box.x + 2, top - 2),
|
|||
|
|
cv::FONT_HERSHEY_SIMPLEX,
|
|||
|
|
fontScale,
|
|||
|
|
cv::Scalar(255, 255, 255),
|
|||
|
|
thickness);
|
|||
|
|
|
|||
|
|
// -----------------------------
|
|||
|
|
// 3. Apply Segmentation Mask
|
|||
|
|
// -----------------------------
|
|||
|
|
if (!seg.mask.empty()) {
|
|||
|
|
// Ensure the mask is single-channel
|
|||
|
|
cv::Mat mask_gray;
|
|||
|
|
if (seg.mask.channels() == 3) {
|
|||
|
|
cv::cvtColor(seg.mask, mask_gray, cv::COLOR_BGR2GRAY);
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
mask_gray = seg.mask.clone();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Threshold the mask to binary (object: 255, background: 0)
|
|||
|
|
cv::Mat mask_binary;
|
|||
|
|
cv::threshold(mask_gray, mask_binary, 127, 255, cv::THRESH_BINARY);
|
|||
|
|
|
|||
|
|
// Create a colored version of the mask
|
|||
|
|
cv::Mat colored_mask;
|
|||
|
|
cv::cvtColor(mask_binary, colored_mask, cv::COLOR_GRAY2BGR);
|
|||
|
|
colored_mask.setTo(color, mask_binary); // Apply color where mask is present
|
|||
|
|
|
|||
|
|
// Blend the colored mask with the original image
|
|||
|
|
cv::addWeighted(image, 1.0, colored_mask, maskAlpha, 0, image);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
bool ANSONNXSEG::Init(const std::string& modelPath, bool useGPU, int deviceId)
|
|||
|
|
{
|
|||
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|||
|
|
try {
|
|||
|
|
deviceId_ = deviceId;
|
|||
|
|
|
|||
|
|
const auto& ep = ANSCENTER::EPLoader::Current();
|
|||
|
|
if (Ort::Global<void>::api_ == nullptr)
|
|||
|
|
Ort::InitApi(static_cast<const OrtApi*>(EPLoader::GetOrtApiRaw()));
|
|||
|
|
std::cout << "[ANSONNXSEG] EP ready: "
|
|||
|
|
<< ANSCENTER::EPLoader::EngineTypeName(ep.type) << std::endl;
|
|||
|
|
|
|||
|
|
// Unique environment name per instance to avoid conflicts
|
|||
|
|
std::string envName = "ONNX_SEG_INST" + std::to_string(instanceId_);
|
|||
|
|
env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, envName.c_str());
|
|||
|
|
|
|||
|
|
sessionOptions = Ort::SessionOptions();
|
|||
|
|
sessionOptions.SetIntraOpNumThreads(
|
|||
|
|
std::min(6, static_cast<int>(std::thread::hardware_concurrency())));
|
|||
|
|
sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
|
|||
|
|
|
|||
|
|
// ── Log available providers ─────────────────────────────────────────
|
|||
|
|
std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
|
|||
|
|
std::cout << "[Instance " << instanceId_ << "] Available Execution Providers:" << std::endl;
|
|||
|
|
for (const auto& p : availableProviders)
|
|||
|
|
std::cout << " - " << p << std::endl;
|
|||
|
|
|
|||
|
|
// ── Attach EP based on runtime-detected hardware ────────────────────
|
|||
|
|
if (useGPU) {
|
|||
|
|
bool attached = false;
|
|||
|
|
|
|||
|
|
switch (ep.type) {
|
|||
|
|
|
|||
|
|
case ANSCENTER::EngineType::NVIDIA_GPU: {
|
|||
|
|
auto it = std::find(availableProviders.begin(),
|
|||
|
|
availableProviders.end(), "CUDAExecutionProvider");
|
|||
|
|
if (it == availableProviders.end()) {
|
|||
|
|
this->_logger.LogError("ANSONNXSEG::Init", "CUDAExecutionProvider not in DLL — "
|
|||
|
|
"check ep/cuda/ has the CUDA ORT build.", __FILE__, __LINE__);
|
|||
|
|
break;
|
|||
|
|
}
|
|||
|
|
try {
|
|||
|
|
OrtCUDAProviderOptionsV2* cuda_options = nullptr;
|
|||
|
|
Ort::GetApi().CreateCUDAProviderOptions(&cuda_options);
|
|||
|
|
|
|||
|
|
std::string deviceIdStr = std::to_string(deviceId_);
|
|||
|
|
const char* keys[] = { "device_id" };
|
|||
|
|
const char* values[] = { deviceIdStr.c_str() };
|
|||
|
|
Ort::GetApi().UpdateCUDAProviderOptions(cuda_options, keys, values, 1);
|
|||
|
|
|
|||
|
|
sessionOptions.AppendExecutionProvider_CUDA_V2(*cuda_options);
|
|||
|
|
Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options);
|
|||
|
|
|
|||
|
|
std::cout << "[Instance " << instanceId_ << "] CUDA EP attached on device "
|
|||
|
|
<< deviceId_ << "." << std::endl;
|
|||
|
|
attached = true;
|
|||
|
|
}
|
|||
|
|
catch (const Ort::Exception& e) {
|
|||
|
|
this->_logger.LogError("ANSONNXSEG::Init", e.what(), __FILE__, __LINE__);
|
|||
|
|
}
|
|||
|
|
break;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
case ANSCENTER::EngineType::AMD_GPU: {
|
|||
|
|
auto it = std::find(availableProviders.begin(),
|
|||
|
|
availableProviders.end(), "DmlExecutionProvider");
|
|||
|
|
if (it == availableProviders.end()) {
|
|||
|
|
this->_logger.LogError("ANSONNXSEG::Init", "DmlExecutionProvider not in DLL — "
|
|||
|
|
"check ep/directml/ has the DirectML ORT build.", __FILE__, __LINE__);
|
|||
|
|
break;
|
|||
|
|
}
|
|||
|
|
try {
|
|||
|
|
std::unordered_map<std::string, std::string> opts = {
|
|||
|
|
{ "device_id", std::to_string(deviceId_) }
|
|||
|
|
};
|
|||
|
|
sessionOptions.AppendExecutionProvider("DML", opts);
|
|||
|
|
std::cout << "[Instance " << instanceId_ << "] DirectML EP attached on device "
|
|||
|
|
<< deviceId_ << "." << std::endl;
|
|||
|
|
attached = true;
|
|||
|
|
}
|
|||
|
|
catch (const Ort::Exception& e) {
|
|||
|
|
this->_logger.LogError("ANSONNXSEG::Init", e.what(), __FILE__, __LINE__);
|
|||
|
|
}
|
|||
|
|
break;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
case ANSCENTER::EngineType::OPENVINO_GPU: {
|
|||
|
|
auto it = std::find(availableProviders.begin(),
|
|||
|
|
availableProviders.end(), "OpenVINOExecutionProvider");
|
|||
|
|
if (it == availableProviders.end()) {
|
|||
|
|
this->_logger.LogError("ANSONNXSEG::Init", "OpenVINOExecutionProvider not in DLL — "
|
|||
|
|
"check ep/openvino/ has the OpenVINO ORT build.", __FILE__, __LINE__);
|
|||
|
|
break;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// FP32 + single thread preserved for determinism; each instance gets its own stream and cache
|
|||
|
|
const std::string precision = "FP32";
|
|||
|
|
const std::string numberOfThreads = "1";
|
|||
|
|
const std::string numberOfStreams = std::to_string(instanceId_ + 1);
|
|||
|
|
const std::string primaryDevice = "GPU." + std::to_string(deviceId_);
|
|||
|
|
const std::string cacheDir = "./ov_cache_inst" + std::to_string(instanceId_);
|
|||
|
|
|
|||
|
|
std::vector<std::unordered_map<std::string, std::string>> try_configs = {
|
|||
|
|
{ {"device_type", primaryDevice}, {"precision",precision},
|
|||
|
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
|||
|
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"},
|
|||
|
|
{"cache_dir", cacheDir} },
|
|||
|
|
{ {"device_type","GPU"}, {"precision",precision},
|
|||
|
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
|||
|
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"},
|
|||
|
|
{"cache_dir", cacheDir} },
|
|||
|
|
{ {"device_type","AUTO:GPU,CPU"}, {"precision",precision},
|
|||
|
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
|||
|
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"},
|
|||
|
|
{"cache_dir", cacheDir} }
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
for (const auto& config : try_configs) {
|
|||
|
|
try {
|
|||
|
|
sessionOptions.AppendExecutionProvider_OpenVINO_V2(config);
|
|||
|
|
std::cout << "[Instance " << instanceId_ << "] OpenVINO EP attached ("
|
|||
|
|
<< config.at("device_type") << ", stream: " << numberOfStreams << ")." << std::endl;
|
|||
|
|
attached = true;
|
|||
|
|
break;
|
|||
|
|
}
|
|||
|
|
catch (const Ort::Exception& e) {
|
|||
|
|
this->_logger.LogError("ANSONNXSEG::Init", e.what(), __FILE__, __LINE__);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (!attached)
|
|||
|
|
std::cerr << "[Instance " << instanceId_ << "] OpenVINO EP: all device configs failed." << std::endl;
|
|||
|
|
break;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
default:
|
|||
|
|
break;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (!attached) {
|
|||
|
|
std::cerr << "[Instance " << instanceId_ << "] No GPU EP attached — running on CPU." << std::endl;
|
|||
|
|
this->_logger.LogFatal("ANSONNXSEG::Init", "GPU EP not attached. Running on CPU.", __FILE__, __LINE__);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
std::cout << "[Instance " << instanceId_ << "] Inference device: CPU (useGPU=false)" << std::endl;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ── Load model ──────────────────────────────────────────────────────
|
|||
|
|
#ifdef _WIN32
|
|||
|
|
std::wstring w_modelPath = std::wstring(modelPath.begin(), modelPath.end());
|
|||
|
|
session = Ort::Session(env, w_modelPath.c_str(), sessionOptions);
|
|||
|
|
#else
|
|||
|
|
session = Ort::Session(env, modelPath.c_str(), sessionOptions);
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
numInputNodes = session.GetInputCount();
|
|||
|
|
numOutputNodes = session.GetOutputCount();
|
|||
|
|
|
|||
|
|
Ort::AllocatorWithDefaultOptions allocator;
|
|||
|
|
|
|||
|
|
// ── Input node name & shape ─────────────────────────────────────────
|
|||
|
|
{
|
|||
|
|
auto inNameAlloc = session.GetInputNameAllocated(0, allocator);
|
|||
|
|
inputNameAllocs.emplace_back(std::move(inNameAlloc));
|
|||
|
|
inputNames.push_back(inputNameAllocs.back().get());
|
|||
|
|
|
|||
|
|
auto inShape = session.GetInputTypeInfo(0)
|
|||
|
|
.GetTensorTypeAndShapeInfo().GetShape();
|
|||
|
|
|
|||
|
|
if (inShape.size() == 4) {
|
|||
|
|
if (inShape[2] == -1 || inShape[3] == -1) {
|
|||
|
|
isDynamicInputShape = true;
|
|||
|
|
inputImageShape = cv::Size(_modelConfig.inpWidth, _modelConfig.inpHeight);
|
|||
|
|
std::cout << "[Instance " << instanceId_ << "] Dynamic input shape — "
|
|||
|
|
"using config default: " << inputImageShape.width
|
|||
|
|
<< "x" << inputImageShape.height << std::endl;
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
isDynamicInputShape = false;
|
|||
|
|
inputImageShape = cv::Size(
|
|||
|
|
static_cast<int>(inShape[3]),
|
|||
|
|
static_cast<int>(inShape[2]));
|
|||
|
|
std::cout << "[Instance " << instanceId_ << "] Fixed input shape: "
|
|||
|
|
<< inputImageShape.width << "x" << inputImageShape.height << std::endl;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
throw std::runtime_error("Model input is not 4D! Expect [N, C, H, W].");
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ── Output node names (segmentation always has exactly 2) ───────────
|
|||
|
|
if (numOutputNodes != 2)
|
|||
|
|
throw std::runtime_error("Expected exactly 2 output nodes: output0 and output1.");
|
|||
|
|
|
|||
|
|
for (size_t i = 0; i < numOutputNodes; ++i) {
|
|||
|
|
auto outNameAlloc = session.GetOutputNameAllocated(i, allocator);
|
|||
|
|
outputNameAllocs.emplace_back(std::move(outNameAlloc));
|
|||
|
|
outputNames.push_back(outputNameAllocs.back().get());
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
std::cout << "[Instance " << instanceId_ << "] Model loaded successfully — "
|
|||
|
|
<< numInputNodes << " input, " << numOutputNodes << " output nodes." << std::endl;
|
|||
|
|
|
|||
|
|
// ── Warmup ──────────────────────────────────────────────────────────
|
|||
|
|
DEBUG_PRINT("[Instance " << instanceId_ << "] Starting warmup...");
|
|||
|
|
warmupModel();
|
|||
|
|
DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup completed successfully.");
|
|||
|
|
|
|||
|
|
return true;
|
|||
|
|
}
|
|||
|
|
catch (const std::exception& e) {
|
|||
|
|
this->_logger.LogFatal("ANSONNXSEG::Init",
|
|||
|
|
std::string("[Instance ") + std::to_string(instanceId_) + "] " + e.what(),
|
|||
|
|
__FILE__, __LINE__);
|
|||
|
|
return false;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
void ANSONNXSEG::warmupModel() {
|
|||
|
|
try {
|
|||
|
|
// Create dummy input image with correct size
|
|||
|
|
cv::Mat dummyImage = cv::Mat::zeros(inputImageShape.height, inputImageShape.width, CV_8UC3);
|
|||
|
|
|
|||
|
|
DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup: dummy image "
|
|||
|
|
<< dummyImage.cols << "x" << dummyImage.rows);
|
|||
|
|
|
|||
|
|
// Run 3 warmup inferences to stabilize
|
|||
|
|
for (int i = 0; i < 3; ++i) {
|
|||
|
|
try {
|
|||
|
|
// Your preprocessing logic here
|
|||
|
|
float* blob = nullptr;
|
|||
|
|
std::vector<int64_t> inputShape;
|
|||
|
|
|
|||
|
|
// If you have a preprocess method, call it
|
|||
|
|
// Otherwise, create a simple dummy tensor
|
|||
|
|
size_t tensorSize = 1 * 3 * inputImageShape.height * inputImageShape.width;
|
|||
|
|
blob = new float[tensorSize];
|
|||
|
|
std::memset(blob, 0, tensorSize * sizeof(float));
|
|||
|
|
|
|||
|
|
inputShape = { 1, 3, inputImageShape.height, inputImageShape.width };
|
|||
|
|
|
|||
|
|
// Create input tensor
|
|||
|
|
Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
|||
|
|
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
|
|||
|
|
memoryInfo,
|
|||
|
|
blob,
|
|||
|
|
tensorSize,
|
|||
|
|
inputShape.data(),
|
|||
|
|
inputShape.size()
|
|||
|
|
);
|
|||
|
|
|
|||
|
|
// Run inference
|
|||
|
|
std::vector<Ort::Value> outputTensors = session.Run(
|
|||
|
|
Ort::RunOptions{ nullptr },
|
|||
|
|
inputNames.data(),
|
|||
|
|
&inputTensor,
|
|||
|
|
1,
|
|||
|
|
outputNames.data(),
|
|||
|
|
numOutputNodes
|
|||
|
|
);
|
|||
|
|
|
|||
|
|
// Clean up
|
|||
|
|
delete[] blob;
|
|||
|
|
|
|||
|
|
DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup " << (i + 1) << "/3 completed");
|
|||
|
|
}
|
|||
|
|
catch (const std::exception& e) {
|
|||
|
|
DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup iteration " << i
|
|||
|
|
<< " failed (non-critical): " << e.what());
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup successful - all states initialized");
|
|||
|
|
}
|
|||
|
|
catch (const std::exception& e) {
|
|||
|
|
this->_logger.LogWarn("ANSONNXSEG::warmupModel",
|
|||
|
|
std::string("[Instance ") + std::to_string(instanceId_) + "] Warmup failed: " + e.what(),
|
|||
|
|
__FILE__, __LINE__);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
cv::Mat ANSONNXSEG::preprocess(const cv::Mat& image, float*& blobPtr, std::vector<int64_t>& inputTensorShape) {
|
|||
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|||
|
|
m_imgWidth = image.cols;
|
|||
|
|
m_imgHeight = image.rows;
|
|||
|
|
try {
|
|||
|
|
// Validate input image
|
|||
|
|
if (image.empty() || image.data == nullptr) {
|
|||
|
|
this->_logger.LogError("ANSONNXSEG::preprocess", "Input image is empty or null", __FILE__, __LINE__);
|
|||
|
|
return cv::Mat();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (image.cols <= 0 || image.rows <= 0) {
|
|||
|
|
this->_logger.LogError("ANSONNXSEG::preprocess",
|
|||
|
|
"Invalid image dimensions: " + std::to_string(image.cols) + "x" + std::to_string(image.rows),
|
|||
|
|
__FILE__, __LINE__);
|
|||
|
|
return cv::Mat();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Check for NaN/Inf in input
|
|||
|
|
double minVal, maxVal;
|
|||
|
|
cv::minMaxLoc(image, &minVal, &maxVal);
|
|||
|
|
if (std::isnan(minVal) || std::isnan(maxVal) || std::isinf(minVal) || std::isinf(maxVal)) {
|
|||
|
|
this->_logger.LogError("ANSONNXSEG::preprocess",
|
|||
|
|
"Input image contains NaN or Inf values. Range: [" + std::to_string(minVal) +
|
|||
|
|
", " + std::to_string(maxVal) + "]", __FILE__, __LINE__);
|
|||
|
|
return cv::Mat();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
DEBUG_PRINT("[Instance " << instanceId_ << "] Input: " << image.cols << "x" << image.rows
|
|||
|
|
<< ", channels=" << image.channels()
|
|||
|
|
<< ", type=" << image.type()
|
|||
|
|
<< ", range=[" << minVal << ", " << maxVal << "]");
|
|||
|
|
|
|||
|
|
// Apply letterbox preprocessing
|
|||
|
|
cv::Mat letterboxImage;
|
|||
|
|
letterBox(image, letterboxImage, inputImageShape,
|
|||
|
|
cv::Scalar(114, 114, 114),
|
|||
|
|
/*auto_=*/isDynamicInputShape,
|
|||
|
|
/*scaleFill=*/false,
|
|||
|
|
/*scaleUp=*/true,
|
|||
|
|
/*stride=*/32);
|
|||
|
|
|
|||
|
|
// Validate letterbox output
|
|||
|
|
if (letterboxImage.empty() || letterboxImage.rows <= 0 || letterboxImage.cols <= 0) {
|
|||
|
|
this->_logger.LogError("ANSONNXSEG::preprocess",
|
|||
|
|
"Letterbox preprocessing failed", __FILE__, __LINE__);
|
|||
|
|
return cv::Mat();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Update tensor shape for dynamic input
|
|||
|
|
inputTensorShape[2] = static_cast<int64_t>(letterboxImage.rows);
|
|||
|
|
inputTensorShape[3] = static_cast<int64_t>(letterboxImage.cols);
|
|||
|
|
|
|||
|
|
// Normalize to [0, 1] range
|
|||
|
|
letterboxImage.convertTo(letterboxImage, CV_32FC3, 1.0f / 255.0f);
|
|||
|
|
|
|||
|
|
// Allocate blob memory
|
|||
|
|
const size_t totalPixels = static_cast<size_t>(letterboxImage.rows) *
|
|||
|
|
static_cast<size_t>(letterboxImage.cols);
|
|||
|
|
const size_t blobSize = totalPixels * 3;
|
|||
|
|
|
|||
|
|
// Clean up any existing blob
|
|||
|
|
if (blobPtr != nullptr) {
|
|||
|
|
delete[] blobPtr;
|
|||
|
|
blobPtr = nullptr;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
blobPtr = new float[blobSize];
|
|||
|
|
|
|||
|
|
// Split channels into CHW format (NCHW for ONNX)
|
|||
|
|
std::vector<cv::Mat> channels(3);
|
|||
|
|
const int pixelsPerChannel = letterboxImage.rows * letterboxImage.cols;
|
|||
|
|
|
|||
|
|
for (int c = 0; c < 3; ++c) {
|
|||
|
|
channels[c] = cv::Mat(letterboxImage.rows, letterboxImage.cols, CV_32FC1,
|
|||
|
|
blobPtr + c * pixelsPerChannel);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
cv::split(letterboxImage, channels);
|
|||
|
|
|
|||
|
|
return letterboxImage;
|
|||
|
|
}
|
|||
|
|
catch (const cv::Exception& e) {
|
|||
|
|
this->_logger.LogFatal("ANSONNXSEG::preprocess",
|
|||
|
|
"[Instance " + std::to_string(instanceId_) + "] OpenCV error: " + e.what(),
|
|||
|
|
__FILE__, __LINE__);
|
|||
|
|
|
|||
|
|
if (blobPtr != nullptr) {
|
|||
|
|
delete[] blobPtr;
|
|||
|
|
blobPtr = nullptr;
|
|||
|
|
}
|
|||
|
|
return cv::Mat();
|
|||
|
|
}
|
|||
|
|
catch (const std::exception& e) {
|
|||
|
|
this->_logger.LogFatal("ANSONNXSEG::preprocess",
|
|||
|
|
"[Instance " + std::to_string(instanceId_) + "] " + e.what(),
|
|||
|
|
__FILE__, __LINE__);
|
|||
|
|
|
|||
|
|
if (blobPtr != nullptr) {
|
|||
|
|
delete[] blobPtr;
|
|||
|
|
blobPtr = nullptr;
|
|||
|
|
}
|
|||
|
|
return cv::Mat();
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
std::vector<cv::Point2f> ANSONNXSEG::maskToPolygon(const cv::Mat& binaryMask,
|
|||
|
|
const cv::Rect& boundingBox,
|
|||
|
|
float simplificationEpsilon,
|
|||
|
|
int minContourArea)
|
|||
|
|
{
|
|||
|
|
std::vector<cv::Point2f> polygon;
|
|||
|
|
|
|||
|
|
try {
|
|||
|
|
// Validate input
|
|||
|
|
if (binaryMask.empty() || binaryMask.type() != CV_8UC1) {
|
|||
|
|
return polygon;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Extract region of interest from mask
|
|||
|
|
cv::Rect roi = boundingBox & cv::Rect(0, 0, binaryMask.cols, binaryMask.rows);
|
|||
|
|
if (roi.area() <= 0) {
|
|||
|
|
return polygon;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
cv::Mat maskROI = binaryMask(roi);
|
|||
|
|
|
|||
|
|
// Find contours in the mask
|
|||
|
|
std::vector<std::vector<cv::Point>> contours;
|
|||
|
|
std::vector<cv::Vec4i> hierarchy;
|
|||
|
|
cv::findContours(maskROI.clone(), contours, hierarchy,
|
|||
|
|
cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);
|
|||
|
|
|
|||
|
|
if (contours.empty()) {
|
|||
|
|
return polygon;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Find the largest contour (main object)
|
|||
|
|
int largestIdx = 0;
|
|||
|
|
double largestArea = 0.0;
|
|||
|
|
|
|||
|
|
for (size_t i = 0; i < contours.size(); ++i) {
|
|||
|
|
double area = cv::contourArea(contours[i]);
|
|||
|
|
if (area > largestArea && area >= minContourArea) {
|
|||
|
|
largestArea = area;
|
|||
|
|
largestIdx = static_cast<int>(i);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (largestArea < minContourArea) {
|
|||
|
|
return polygon;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Simplify the contour to reduce number of points
|
|||
|
|
std::vector<cv::Point> simplifiedContour;
|
|||
|
|
cv::approxPolyDP(contours[largestIdx], simplifiedContour,
|
|||
|
|
simplificationEpsilon, true);
|
|||
|
|
|
|||
|
|
// Convert to Point2f and offset by ROI position
|
|||
|
|
polygon.reserve(simplifiedContour.size());
|
|||
|
|
for (const auto& pt : simplifiedContour) {
|
|||
|
|
polygon.emplace_back(
|
|||
|
|
static_cast<float>(pt.x + roi.x),
|
|||
|
|
static_cast<float>(pt.y + roi.y)
|
|||
|
|
);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return polygon;
|
|||
|
|
}
|
|||
|
|
catch (const cv::Exception& e) {
|
|||
|
|
// Log error if logger available
|
|||
|
|
polygon.clear();
|
|||
|
|
return polygon;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
std::vector<Object> ANSONNXSEG::postprocess(
|
|||
|
|
const cv::Size& origSize,
|
|||
|
|
const cv::Size& letterboxSize,
|
|||
|
|
const std::vector<Ort::Value>& outputs,
|
|||
|
|
const std::string& camera_id)
|
|||
|
|
{
|
|||
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|||
|
|
|
|||
|
|
try {
|
|||
|
|
// Validate outputs
|
|||
|
|
if (outputs.size() < 2) {
|
|||
|
|
throw std::runtime_error("Insufficient model outputs. Expected at least 2, got " +
|
|||
|
|
std::to_string(outputs.size()));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Extract output tensors
|
|||
|
|
const float* detections = outputs[0].GetTensorData<float>();
|
|||
|
|
const float* prototypes = outputs[1].GetTensorData<float>();
|
|||
|
|
|
|||
|
|
// Get tensor shapes
|
|||
|
|
auto detectionShape = outputs[0].GetTensorTypeAndShapeInfo().GetShape(); // [1, 116, N]
|
|||
|
|
auto prototypeShape = outputs[1].GetTensorTypeAndShapeInfo().GetShape(); // [1, 32, H, W]
|
|||
|
|
|
|||
|
|
// Validate prototype shape
|
|||
|
|
if (prototypeShape.size() != 4 || prototypeShape[0] != 1 || prototypeShape[1] != 32) {
|
|||
|
|
throw std::runtime_error("Invalid prototype shape. Expected [1, 32, H, W], got [" +
|
|||
|
|
std::to_string(prototypeShape[0]) + ", " +
|
|||
|
|
std::to_string(prototypeShape[1]) + ", " +
|
|||
|
|
std::to_string(prototypeShape[2]) + ", " +
|
|||
|
|
std::to_string(prototypeShape[3]) + "]");
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Extract dimensions
|
|||
|
|
const size_t numFeatures = detectionShape[1]; // 116 = 4 bbox + 80 classes + 32 masks
|
|||
|
|
const size_t numDetections = detectionShape[2];
|
|||
|
|
const int maskH = static_cast<int>(prototypeShape[2]);
|
|||
|
|
const int maskW = static_cast<int>(prototypeShape[3]);
|
|||
|
|
|
|||
|
|
// Early exit if no detections
|
|||
|
|
if (numDetections == 0) {
|
|||
|
|
return {};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Calculate feature offsets
|
|||
|
|
constexpr int BOX_OFFSET = 0;
|
|||
|
|
constexpr int BOX_SIZE = 4;
|
|||
|
|
constexpr int MASK_COEFFS_SIZE = 32;
|
|||
|
|
const int numClasses = static_cast<int>(numFeatures - BOX_SIZE - MASK_COEFFS_SIZE);
|
|||
|
|
|
|||
|
|
if (numClasses <= 0) {
|
|||
|
|
throw std::runtime_error("Invalid number of classes: " + std::to_string(numClasses));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
const int CLASS_CONF_OFFSET = BOX_OFFSET + BOX_SIZE;
|
|||
|
|
const int MASK_COEFF_OFFSET = CLASS_CONF_OFFSET + numClasses;
|
|||
|
|
|
|||
|
|
// 1. Extract and cache prototype masks
|
|||
|
|
std::vector<cv::Mat> prototypeMasks;
|
|||
|
|
prototypeMasks.reserve(MASK_COEFFS_SIZE);
|
|||
|
|
|
|||
|
|
const int prototypeSize = maskH * maskW;
|
|||
|
|
for (int m = 0; m < MASK_COEFFS_SIZE; ++m) {
|
|||
|
|
cv::Mat proto(maskH, maskW, CV_32FC1,
|
|||
|
|
const_cast<float*>(prototypes + m * prototypeSize));
|
|||
|
|
prototypeMasks.emplace_back(proto.clone());
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 2. Process detections and filter by confidence
|
|||
|
|
std::vector<BoundingBox> boxes;
|
|||
|
|
std::vector<float> confidences;
|
|||
|
|
std::vector<int> classIds;
|
|||
|
|
std::vector<std::vector<float>> maskCoefficients;
|
|||
|
|
|
|||
|
|
boxes.reserve(numDetections);
|
|||
|
|
confidences.reserve(numDetections);
|
|||
|
|
classIds.reserve(numDetections);
|
|||
|
|
maskCoefficients.reserve(numDetections);
|
|||
|
|
|
|||
|
|
const int numBoxes = static_cast<int>(numDetections);
|
|||
|
|
|
|||
|
|
for (int i = 0; i < numBoxes; ++i) {
|
|||
|
|
// Find best class and confidence
|
|||
|
|
float maxConf = 0.0f;
|
|||
|
|
int bestClassId = -1;
|
|||
|
|
|
|||
|
|
for (int c = 0; c < numClasses; ++c) {
|
|||
|
|
const float conf = detections[(CLASS_CONF_OFFSET + c) * numBoxes + i];
|
|||
|
|
if (conf > maxConf) {
|
|||
|
|
maxConf = conf;
|
|||
|
|
bestClassId = c;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Skip low confidence detections
|
|||
|
|
if (maxConf < _modelConfig.detectionScoreThreshold) {
|
|||
|
|
continue;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Extract bounding box (xywh format)
|
|||
|
|
const float xc = detections[BOX_OFFSET * numBoxes + i];
|
|||
|
|
const float yc = detections[(BOX_OFFSET + 1) * numBoxes + i];
|
|||
|
|
const float w = detections[(BOX_OFFSET + 2) * numBoxes + i];
|
|||
|
|
const float h = detections[(BOX_OFFSET + 3) * numBoxes + i];
|
|||
|
|
|
|||
|
|
// Convert to xyxy format and store
|
|||
|
|
boxes.push_back({
|
|||
|
|
static_cast<int>(std::round(xc - w * 0.5f)),
|
|||
|
|
static_cast<int>(std::round(yc - h * 0.5f)),
|
|||
|
|
static_cast<int>(std::round(w)),
|
|||
|
|
static_cast<int>(std::round(h))
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
confidences.push_back(maxConf);
|
|||
|
|
classIds.push_back(bestClassId);
|
|||
|
|
|
|||
|
|
// Extract mask coefficients
|
|||
|
|
std::vector<float> coeffs(MASK_COEFFS_SIZE);
|
|||
|
|
for (int m = 0; m < MASK_COEFFS_SIZE; ++m) {
|
|||
|
|
coeffs[m] = detections[(MASK_COEFF_OFFSET + m) * numBoxes + i];
|
|||
|
|
}
|
|||
|
|
maskCoefficients.emplace_back(std::move(coeffs));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Early exit if no valid detections
|
|||
|
|
if (boxes.empty()) {
|
|||
|
|
return {};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 3. Apply Non-Maximum Suppression
|
|||
|
|
std::vector<int> nmsIndices;
|
|||
|
|
NMSBoxes(boxes, confidences,
|
|||
|
|
_modelConfig.modelConfThreshold,
|
|||
|
|
_modelConfig.modelMNSThreshold,
|
|||
|
|
nmsIndices);
|
|||
|
|
|
|||
|
|
if (nmsIndices.empty()) {
|
|||
|
|
return {};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 4. Calculate coordinate transformation parameters
|
|||
|
|
const float scale = std::min(
|
|||
|
|
static_cast<float>(letterboxSize.width) / origSize.width,
|
|||
|
|
static_cast<float>(letterboxSize.height) / origSize.height
|
|||
|
|
);
|
|||
|
|
|
|||
|
|
const int scaledW = static_cast<int>(origSize.width * scale);
|
|||
|
|
const int scaledH = static_cast<int>(origSize.height * scale);
|
|||
|
|
const float padW = (letterboxSize.width - scaledW) * 0.5f;
|
|||
|
|
const float padH = (letterboxSize.height - scaledH) * 0.5f;
|
|||
|
|
|
|||
|
|
// Mask coordinate transformation
|
|||
|
|
const float maskScaleX = static_cast<float>(maskW) / letterboxSize.width;
|
|||
|
|
const float maskScaleY = static_cast<float>(maskH) / letterboxSize.height;
|
|||
|
|
|
|||
|
|
// Define crop region in mask space (with small padding to avoid edge artifacts)
|
|||
|
|
constexpr float CROP_PADDING = 0.5f;
|
|||
|
|
const int cropX1 = std::clamp(
|
|||
|
|
static_cast<int>(std::round((padW - CROP_PADDING) * maskScaleX)),
|
|||
|
|
0, maskW - 1
|
|||
|
|
);
|
|||
|
|
const int cropY1 = std::clamp(
|
|||
|
|
static_cast<int>(std::round((padH - CROP_PADDING) * maskScaleY)),
|
|||
|
|
0, maskH - 1
|
|||
|
|
);
|
|||
|
|
const int cropX2 = std::clamp(
|
|||
|
|
static_cast<int>(std::round((letterboxSize.width - padW + CROP_PADDING) * maskScaleX)),
|
|||
|
|
cropX1 + 1, maskW
|
|||
|
|
);
|
|||
|
|
const int cropY2 = std::clamp(
|
|||
|
|
static_cast<int>(std::round((letterboxSize.height - padH + CROP_PADDING) * maskScaleY)),
|
|||
|
|
cropY1 + 1, maskH
|
|||
|
|
);
|
|||
|
|
|
|||
|
|
const cv::Rect cropRect(cropX1, cropY1, cropX2 - cropX1, cropY2 - cropY1);
|
|||
|
|
|
|||
|
|
// 5. Generate final results with masks
|
|||
|
|
std::vector<Object> results;
|
|||
|
|
results.reserve(nmsIndices.size());
|
|||
|
|
|
|||
|
|
for (const int idx : nmsIndices) {
|
|||
|
|
Object result;
|
|||
|
|
|
|||
|
|
// Scale bounding box to original image coordinates
|
|||
|
|
BoundingBox scaledBox = scaleCoords(letterboxSize, boxes[idx], origSize, true);
|
|||
|
|
|
|||
|
|
result.box.x = scaledBox.x;
|
|||
|
|
result.box.y = scaledBox.y;
|
|||
|
|
result.box.width = scaledBox.width;
|
|||
|
|
result.box.height = scaledBox.height;
|
|||
|
|
result.confidence = confidences[idx];
|
|||
|
|
result.classId = classIds[idx];
|
|||
|
|
|
|||
|
|
// Generate instance mask
|
|||
|
|
const auto& coeffs = maskCoefficients[idx];
|
|||
|
|
|
|||
|
|
// Linear combination of prototype masks
|
|||
|
|
cv::Mat combinedMask = cv::Mat::zeros(maskH, maskW, CV_32FC1);
|
|||
|
|
for (int m = 0; m < MASK_COEFFS_SIZE; ++m) {
|
|||
|
|
cv::addWeighted(combinedMask, 1.0, prototypeMasks[m], coeffs[m],
|
|||
|
|
0.0, combinedMask);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Apply sigmoid activation
|
|||
|
|
combinedMask = sigmoid(combinedMask);
|
|||
|
|
|
|||
|
|
// Crop to valid region
|
|||
|
|
cv::Mat croppedMask = combinedMask(cropRect).clone();
|
|||
|
|
|
|||
|
|
// Resize to original image dimensions
|
|||
|
|
cv::Mat resizedMask;
|
|||
|
|
cv::resize(croppedMask, resizedMask, origSize, 0, 0, cv::INTER_LINEAR);
|
|||
|
|
|
|||
|
|
// Binarize mask
|
|||
|
|
cv::Mat binaryMask;
|
|||
|
|
cv::threshold(resizedMask, binaryMask, 0.5, 255.0, cv::THRESH_BINARY);
|
|||
|
|
binaryMask.convertTo(binaryMask, CV_8UC1);
|
|||
|
|
|
|||
|
|
// Crop mask to bounding box region
|
|||
|
|
cv::Rect roi(result.box.x, result.box.y, result.box.width, result.box.height);
|
|||
|
|
roi &= cv::Rect(0, 0, origSize.width, origSize.height);
|
|||
|
|
|
|||
|
|
if (roi.area() > 0) {
|
|||
|
|
cv::Mat finalMask = cv::Mat::zeros(origSize, CV_8UC1);
|
|||
|
|
binaryMask(roi).copyTo(finalMask(roi));
|
|||
|
|
result.mask = finalMask;
|
|||
|
|
|
|||
|
|
// Convert mask to polygon (single largest contour)
|
|||
|
|
result.polygon = maskToPolygon(finalMask, result.box, 2.0f, 10);
|
|||
|
|
|
|||
|
|
// Validate polygon
|
|||
|
|
if (result.polygon.size() < 3) {
|
|||
|
|
// Fallback to bounding box if polygon extraction failed
|
|||
|
|
result.polygon = {
|
|||
|
|
cv::Point2f(result.box.x, result.box.y),
|
|||
|
|
cv::Point2f(result.box.x + result.box.width, result.box.y),
|
|||
|
|
cv::Point2f(result.box.x + result.box.width, result.box.y + result.box.height),
|
|||
|
|
cv::Point2f(result.box.x, result.box.y + result.box.height)
|
|||
|
|
};
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
// Skip invalid detections
|
|||
|
|
continue;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
results.push_back(result);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return results;
|
|||
|
|
}
|
|||
|
|
catch (const cv::Exception& e) {
|
|||
|
|
this->_logger.LogFatal("ANSONNXSEG::postprocess",
|
|||
|
|
"[Instance " + std::to_string(instanceId_) + "] OpenCV error: " + e.what(),
|
|||
|
|
__FILE__, __LINE__);
|
|||
|
|
return {};
|
|||
|
|
}
|
|||
|
|
catch (const std::exception& e) {
|
|||
|
|
this->_logger.LogFatal("ANSONNXSEG::postprocess",
|
|||
|
|
"[Instance " + std::to_string(instanceId_) + "] " + e.what(),
|
|||
|
|
__FILE__, __LINE__);
|
|||
|
|
return {};
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
std::vector<Object> ANSONNXSEG::segment(const cv::Mat& image, const std::string& camera_id) {
|
|||
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|||
|
|
|
|||
|
|
float* blobPtr = nullptr;
|
|||
|
|
|
|||
|
|
try {
|
|||
|
|
// Validate input image
|
|||
|
|
if (image.empty() || image.data == nullptr) {
|
|||
|
|
this->_logger.LogError("ANSONNXSEG::segment",
|
|||
|
|
"Input image is empty or null", __FILE__, __LINE__);
|
|||
|
|
return {};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (image.cols <= 0 || image.rows <= 0) {
|
|||
|
|
this->_logger.LogError("ANSONNXSEG::segment",
|
|||
|
|
"Invalid image dimensions: " + std::to_string(image.cols) + "x" +
|
|||
|
|
std::to_string(image.rows), __FILE__, __LINE__);
|
|||
|
|
return {};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 1. Preprocess image
|
|||
|
|
std::vector<int64_t> inputShape = { 1, 3, inputImageShape.height, inputImageShape.width };
|
|||
|
|
cv::Mat letterboxImg = preprocess(image, blobPtr, inputShape);
|
|||
|
|
|
|||
|
|
if (letterboxImg.empty()) {
|
|||
|
|
this->_logger.LogError("ANSONNXSEG::segment",
|
|||
|
|
"Preprocessing failed", __FILE__, __LINE__);
|
|||
|
|
if (blobPtr != nullptr) {
|
|||
|
|
delete[] blobPtr;
|
|||
|
|
blobPtr = nullptr;
|
|||
|
|
}
|
|||
|
|
return {};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Validate blob pointer after preprocessing
|
|||
|
|
if (blobPtr == nullptr) {
|
|||
|
|
this->_logger.LogError("ANSONNXSEG::segment",
|
|||
|
|
"Blob pointer is null after preprocessing", __FILE__, __LINE__);
|
|||
|
|
return {};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 2. Prepare input tensor
|
|||
|
|
const size_t inputSize = vectorProduct(inputShape);
|
|||
|
|
|
|||
|
|
if (inputSize == 0) {
|
|||
|
|
this->_logger.LogError("ANSONNXSEG::segment",
|
|||
|
|
"Invalid input tensor size", __FILE__, __LINE__);
|
|||
|
|
delete[] blobPtr;
|
|||
|
|
return {};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Create memory info and input tensor
|
|||
|
|
Ort::MemoryInfo memInfo = Ort::MemoryInfo::CreateCpu(
|
|||
|
|
OrtArenaAllocator,
|
|||
|
|
OrtMemTypeDefault
|
|||
|
|
);
|
|||
|
|
|
|||
|
|
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
|
|||
|
|
memInfo,
|
|||
|
|
blobPtr,
|
|||
|
|
inputSize,
|
|||
|
|
inputShape.data(),
|
|||
|
|
inputShape.size()
|
|||
|
|
);
|
|||
|
|
|
|||
|
|
// Validate tensor creation
|
|||
|
|
if (!inputTensor.IsTensor()) {
|
|||
|
|
this->_logger.LogError("ANSONNXSEG::segment",
|
|||
|
|
"Failed to create input tensor", __FILE__, __LINE__);
|
|||
|
|
delete[] blobPtr;
|
|||
|
|
return {};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 3. Run inference
|
|||
|
|
std::vector<Ort::Value> outputs;
|
|||
|
|
try {
|
|||
|
|
outputs = session.Run(
|
|||
|
|
Ort::RunOptions{ nullptr },
|
|||
|
|
inputNames.data(),
|
|||
|
|
&inputTensor,
|
|||
|
|
numInputNodes,
|
|||
|
|
outputNames.data(),
|
|||
|
|
numOutputNodes
|
|||
|
|
);
|
|||
|
|
}
|
|||
|
|
catch (const Ort::Exception& e) {
|
|||
|
|
this->_logger.LogError("ANSONNXSEG::segment",
|
|||
|
|
"ONNX Runtime inference failed: " + std::string(e.what()),
|
|||
|
|
__FILE__, __LINE__);
|
|||
|
|
delete[] blobPtr;
|
|||
|
|
return {};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Clean up blob after inference
|
|||
|
|
delete[] blobPtr;
|
|||
|
|
blobPtr = nullptr;
|
|||
|
|
|
|||
|
|
// Validate outputs
|
|||
|
|
if (outputs.empty()) {
|
|||
|
|
this->_logger.LogError("ANSONNXSEG::segment",
|
|||
|
|
"Model returned no outputs", __FILE__, __LINE__);
|
|||
|
|
return {};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 4. Postprocess results
|
|||
|
|
const cv::Size letterboxSize(
|
|||
|
|
static_cast<int>(inputShape[3]),
|
|||
|
|
static_cast<int>(inputShape[2])
|
|||
|
|
);
|
|||
|
|
|
|||
|
|
return postprocess(image.size(), letterboxSize, outputs, camera_id);
|
|||
|
|
}
|
|||
|
|
catch (const Ort::Exception& e) {
|
|||
|
|
this->_logger.LogFatal("ANSONNXSEG::segment",
|
|||
|
|
"[Instance " + std::to_string(instanceId_) + "] ONNX Runtime error: " +
|
|||
|
|
e.what(), __FILE__, __LINE__);
|
|||
|
|
|
|||
|
|
if (blobPtr != nullptr) {
|
|||
|
|
delete[] blobPtr;
|
|||
|
|
blobPtr = nullptr;
|
|||
|
|
}
|
|||
|
|
return {};
|
|||
|
|
}
|
|||
|
|
catch (const cv::Exception& e) {
|
|||
|
|
this->_logger.LogFatal("ANSONNXSEG::segment",
|
|||
|
|
"[Instance " + std::to_string(instanceId_) + "] OpenCV error: " +
|
|||
|
|
e.what(), __FILE__, __LINE__);
|
|||
|
|
|
|||
|
|
if (blobPtr != nullptr) {
|
|||
|
|
delete[] blobPtr;
|
|||
|
|
blobPtr = nullptr;
|
|||
|
|
}
|
|||
|
|
return {};
|
|||
|
|
}
|
|||
|
|
catch (const std::exception& e) {
|
|||
|
|
this->_logger.LogFatal("ANSONNXSEG::segment",
|
|||
|
|
"[Instance " + std::to_string(instanceId_) + "] " + e.what(),
|
|||
|
|
__FILE__, __LINE__);
|
|||
|
|
|
|||
|
|
if (blobPtr != nullptr) {
|
|||
|
|
delete[] blobPtr;
|
|||
|
|
blobPtr = nullptr;
|
|||
|
|
}
|
|||
|
|
return {};
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
// Public functions
|
|||
|
|
ANSONNXSEG::~ANSONNXSEG() {
|
|||
|
|
Destroy();
|
|||
|
|
}
|
|||
|
|
bool ANSONNXSEG::Destroy() {
|
|||
|
|
std::cout << "[ANSONNXSEG] Destroyed instance " << instanceId_ << std::endl;
|
|||
|
|
return true;
|
|||
|
|
}
|
|||
|
|
bool ANSONNXSEG::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
|
|||
|
|
if (!ANSODBase::OptimizeModel(fp16, optimizedModelFolder)) {
|
|||
|
|
return false;
|
|||
|
|
}
|
|||
|
|
return true;
|
|||
|
|
}
|
|||
|
|
bool ANSONNXSEG::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) {
|
|||
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|||
|
|
try {
|
|||
|
|
_modelLoadValid = false;
|
|||
|
|
bool result = ANSODBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
|
|||
|
|
if (!result) return false;
|
|||
|
|
// Parsing for YOLO only here
|
|||
|
|
_modelConfig = modelConfig;
|
|||
|
|
_modelConfig.detectionType = ANSCENTER::DetectionType::SEGMENTATION;
|
|||
|
|
_modelConfig.modelType = ModelType::ONNXSEG;
|
|||
|
|
_modelConfig.inpHeight = 640;
|
|||
|
|
_modelConfig.inpWidth = 640;
|
|||
|
|
if (_modelConfig.modelMNSThreshold < 0.2)
|
|||
|
|
_modelConfig.modelMNSThreshold = 0.5;
|
|||
|
|
if (_modelConfig.modelConfThreshold < 0.2)
|
|||
|
|
_modelConfig.modelConfThreshold = 0.5;
|
|||
|
|
if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133) // 133 = COCO wholebody max
|
|||
|
|
_modelConfig.numKPS = 17;
|
|||
|
|
if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define
|
|||
|
|
_fp16 = (modelConfig.precisionType == PrecisionType::FP16);
|
|||
|
|
|
|||
|
|
if (FileExist(_modelConfigFile)) {
|
|||
|
|
ModelType modelType;
|
|||
|
|
std::vector<int> inputShape;
|
|||
|
|
_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
|
|||
|
|
if (inputShape.size() == 2) {
|
|||
|
|
if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
|
|||
|
|
if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
else {// This is old version of model zip file
|
|||
|
|
_modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx");
|
|||
|
|
_classFilePath = CreateFilePath(_modelFolder, "classes.names");
|
|||
|
|
std::ifstream isValidFileName(_classFilePath);
|
|||
|
|
if (!isValidFileName)
|
|||
|
|
{
|
|||
|
|
this->_logger.LogDebug("ANSONNXCL::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__);
|
|||
|
|
LoadClassesFromString();
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
this->_logger.LogDebug("ANSONNXCL::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__);
|
|||
|
|
LoadClassesFromFile();
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
// 1. Load labelMap and engine
|
|||
|
|
labelMap.clear();
|
|||
|
|
if (!_classes.empty())
|
|||
|
|
labelMap = VectorToCommaSeparatedString(_classes);
|
|||
|
|
classColors = generateColors(_classes);
|
|||
|
|
|
|||
|
|
// 2. Initialize ONNX Runtime session
|
|||
|
|
instanceId_ = instanceCounter_.fetch_add(1); // Atomic increment
|
|||
|
|
result = Init(_modelFilePath, true, 0);
|
|||
|
|
_modelLoadValid = true;
|
|||
|
|
_isInitialized = true;
|
|||
|
|
return result;
|
|||
|
|
}
|
|||
|
|
catch (const std::exception& e) {
|
|||
|
|
this->_logger.LogFatal("ANSONNXCL::Initialize", e.what(), __FILE__, __LINE__);
|
|||
|
|
return false;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
bool ANSONNXSEG::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) {
|
|||
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|||
|
|
try {
|
|||
|
|
bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword);
|
|||
|
|
if (!result) return false;
|
|||
|
|
_modelConfig.detectionType = ANSCENTER::DetectionType::SEGMENTATION;
|
|||
|
|
_modelConfig.modelType = ModelType::ONNXSEG;
|
|||
|
|
_modelConfig.inpHeight = 640;
|
|||
|
|
_modelConfig.inpWidth = 640;
|
|||
|
|
if (_modelConfig.modelMNSThreshold < 0.2)
|
|||
|
|
_modelConfig.modelMNSThreshold = 0.5;
|
|||
|
|
if (_modelConfig.modelConfThreshold < 0.2)
|
|||
|
|
_modelConfig.modelConfThreshold = 0.5;
|
|||
|
|
if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133) // 133 = COCO wholebody max
|
|||
|
|
_modelConfig.numKPS = 17;
|
|||
|
|
if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define
|
|||
|
|
// if (_modelConfig.precisionType == PrecisionType::FP16)_fp16 = true;
|
|||
|
|
_fp16 = true; // Load Model from Here
|
|||
|
|
|
|||
|
|
// 0. Check if the configuration file exist
|
|||
|
|
if (FileExist(_modelConfigFile)) {
|
|||
|
|
ModelType modelType;
|
|||
|
|
std::vector<int> inputShape;
|
|||
|
|
_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
|
|||
|
|
if (inputShape.size() == 2) {
|
|||
|
|
if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
|
|||
|
|
if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
else {// This is old version of model zip file
|
|||
|
|
_modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx");
|
|||
|
|
_classFilePath = CreateFilePath(_modelFolder, "classes.names");
|
|||
|
|
std::ifstream isValidFileName(_classFilePath);
|
|||
|
|
if (!isValidFileName)
|
|||
|
|
{
|
|||
|
|
this->_logger.LogDebug("ANSONNXSEG::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__);
|
|||
|
|
LoadClassesFromString();
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
this->_logger.LogDebug("ANSONNXSEG::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__);
|
|||
|
|
LoadClassesFromFile();
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
classColors = generateColors(_classes);
|
|||
|
|
// Initialize ONNX Runtime session
|
|||
|
|
instanceId_ = instanceCounter_.fetch_add(1); // Atomic increment
|
|||
|
|
result = Init(_modelFilePath, true, 0);
|
|||
|
|
_modelLoadValid = true;
|
|||
|
|
_isInitialized = true;
|
|||
|
|
return result;
|
|||
|
|
}
|
|||
|
|
catch (const std::exception& e) {
|
|||
|
|
this->_logger.LogFatal("ANSONNXSEG::LoadModel", e.what(), __FILE__, __LINE__);
|
|||
|
|
return false;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
bool ANSONNXSEG::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) {
|
|||
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|||
|
|
try {
|
|||
|
|
bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig, modelName, className, modelFolder, labelMap);
|
|||
|
|
if (!result) return false;
|
|||
|
|
std::string _modelName = modelName;
|
|||
|
|
if (_modelName.empty()) {
|
|||
|
|
_modelName = "train_last";
|
|||
|
|
}
|
|||
|
|
std::string modelFullName = _modelName + ".onnx";
|
|||
|
|
// Parsing for YOLO only here
|
|||
|
|
_modelConfig = modelConfig;
|
|||
|
|
_modelConfig.detectionType = ANSCENTER::DetectionType::SEGMENTATION;
|
|||
|
|
_modelConfig.modelType = ModelType::ONNXSEG;
|
|||
|
|
_modelConfig.inpHeight = 640;
|
|||
|
|
_modelConfig.inpWidth = 640;
|
|||
|
|
if (_modelConfig.modelMNSThreshold < 0.2)
|
|||
|
|
_modelConfig.modelMNSThreshold = 0.5;
|
|||
|
|
if (_modelConfig.modelConfThreshold < 0.2)
|
|||
|
|
_modelConfig.modelConfThreshold = 0.5;
|
|||
|
|
if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133) // 133 = COCO wholebody max
|
|||
|
|
_modelConfig.numKPS = 17;
|
|||
|
|
if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define
|
|||
|
|
_fp16 = true; // Load Model from Here
|
|||
|
|
|
|||
|
|
// 0. Check if the configuration file exist
|
|||
|
|
if (FileExist(_modelConfigFile)) {
|
|||
|
|
ModelType modelType;
|
|||
|
|
std::vector<int> inputShape;
|
|||
|
|
_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
|
|||
|
|
if (inputShape.size() == 2) {
|
|||
|
|
if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
|
|||
|
|
if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
else {// This is old version of model zip file
|
|||
|
|
_modelFilePath = CreateFilePath(_modelFolder, modelFullName);
|
|||
|
|
_classFilePath = CreateFilePath(_modelFolder, className);
|
|||
|
|
std::ifstream isValidFileName(_classFilePath);
|
|||
|
|
if (!isValidFileName)
|
|||
|
|
{
|
|||
|
|
this->_logger.LogDebug("ANSONNXSEG::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__);
|
|||
|
|
LoadClassesFromString();
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
this->_logger.LogDebug("ANSONNXSEG::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__);
|
|||
|
|
LoadClassesFromFile();
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
// 1. Load labelMap and engine
|
|||
|
|
labelMap.clear();
|
|||
|
|
if (!_classes.empty())
|
|||
|
|
labelMap = VectorToCommaSeparatedString(_classes);
|
|||
|
|
classColors = generateColors(_classes);
|
|||
|
|
// 2. Initialize ONNX Runtime session
|
|||
|
|
instanceId_ = instanceCounter_.fetch_add(1); // Atomic increment
|
|||
|
|
_modelLoadValid = true;
|
|||
|
|
_isInitialized = true;
|
|||
|
|
return result;
|
|||
|
|
}
|
|||
|
|
catch (const std::exception& e) {
|
|||
|
|
this->_logger.LogFatal("ANSONNXSEG::LoadModelFromFolder", e.what(), __FILE__, __LINE__);
|
|||
|
|
return false;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
std::vector<Object> ANSONNXSEG::RunInference(const cv::Mat& input, const std::string& camera_id) {
|
|||
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|||
|
|
if (!_modelLoadValid) {
|
|||
|
|
this->_logger.LogFatal("ANSONNXSEG::RunInference", "Cannot load the TensorRT model. Please check if it is exist", __FILE__, __LINE__);
|
|||
|
|
std::vector<Object> result;
|
|||
|
|
result.clear();
|
|||
|
|
return result;
|
|||
|
|
}
|
|||
|
|
if (!_licenseValid) {
|
|||
|
|
this->_logger.LogFatal("ANSONNXSEG::RunInference", "Runtime license is not valid or expired. Please contact ANSCENTER", __FILE__, __LINE__);
|
|||
|
|
std::vector<Object> result;
|
|||
|
|
result.clear();
|
|||
|
|
return result;
|
|||
|
|
}
|
|||
|
|
if (!_isInitialized) {
|
|||
|
|
this->_logger.LogFatal("ANSONNXSEG::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
|||
|
|
std::vector<Object> result;
|
|||
|
|
result.clear();
|
|||
|
|
return result;
|
|||
|
|
}
|
|||
|
|
try {
|
|||
|
|
std::vector<Object> result;
|
|||
|
|
if (input.empty()) return result;
|
|||
|
|
if ((input.cols < 5) || (input.rows < 5)) return result;
|
|||
|
|
result = segment(input, camera_id);
|
|||
|
|
if (_trackerEnabled) {
|
|||
|
|
result = ApplyTracking(result, camera_id);
|
|||
|
|
if (_stabilizationEnabled) result = StabilizeDetections(result, camera_id);
|
|||
|
|
}
|
|||
|
|
return result;
|
|||
|
|
}
|
|||
|
|
catch (const std::exception& e) {
|
|||
|
|
this->_logger.LogFatal("ANSONNXSEG::RunInference", e.what(), __FILE__, __LINE__);
|
|||
|
|
return {};
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
std::vector<Object> ANSONNXSEG::RunInference(const cv::Mat& inputImgBGR) {
|
|||
|
|
return RunInference(inputImgBGR, "CustomCam");
|
|||
|
|
}
|
|||
|
|
}
|