Files
ANSCORE/modules/ANSODEngine/ANSONNXPOSE.cpp

1471 lines
53 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include "ANSONNXPOSE.h"
#include "EPLoader.h"
namespace ANSCENTER {
std::atomic<int> ANSONNXPOSE::instanceCounter_(0); // Initialize static member
size_t ANSONNXPOSE::vectorProduct(const std::vector<int64_t>& vector) {
return std::accumulate(vector.begin(), vector.end(), 1ull, std::multiplies<size_t>());
}
void ANSONNXPOSE::letterBox(const cv::Mat& image, cv::Mat& outImage,
const cv::Size& newShape,
const cv::Scalar& color,
bool auto_,
bool scaleFill,
bool scaleUp,
int stride)
{
// Calculate the scaling ratio to fit the image within the new shape
float ratio = std::min(static_cast<float>(newShape.height) / image.rows,
static_cast<float>(newShape.width) / image.cols);
// Prevent scaling up if not allowed
if (!scaleUp) {
ratio = std::min(ratio, 1.0f);
}
// Calculate new dimensions after scaling
int newUnpadW = static_cast<int>(std::round(image.cols * ratio));
int newUnpadH = static_cast<int>(std::round(image.rows * ratio));
// Calculate padding needed to reach the desired shape
int dw = newShape.width - newUnpadW;
int dh = newShape.height - newUnpadH;
if (auto_) {
// Ensure padding is a multiple of stride for model compatibility
dw = (dw % stride) / 2;
dh = (dh % stride) / 2;
}
else if (scaleFill) {
// Scale to fill without maintaining aspect ratio
newUnpadW = newShape.width;
newUnpadH = newShape.height;
ratio = std::min(static_cast<float>(newShape.width) / image.cols,
static_cast<float>(newShape.height) / image.rows);
dw = 0;
dh = 0;
}
else {
// Evenly distribute padding on both sides
// Calculate separate padding for left/right and top/bottom to handle odd padding
int padLeft = dw / 2;
int padRight = dw - padLeft;
int padTop = dh / 2;
int padBottom = dh - padTop;
// Resize the image if the new dimensions differ
if (image.cols != newUnpadW || image.rows != newUnpadH) {
cv::resize(image, outImage, cv::Size(newUnpadW, newUnpadH), 0, 0, cv::INTER_LINEAR);
}
else {
// Avoid unnecessary copying if dimensions are the same
outImage = image;
}
// Apply padding to reach the desired shape
cv::copyMakeBorder(outImage, outImage, padTop, padBottom, padLeft, padRight, cv::BORDER_CONSTANT, color);
return; // Exit early since padding is already applied
}
// Resize the image if the new dimensions differ
if (image.cols != newUnpadW || image.rows != newUnpadH) {
cv::resize(image, outImage, cv::Size(newUnpadW, newUnpadH), 0, 0, cv::INTER_LINEAR);
}
else {
// Avoid unnecessary copying if dimensions are the same
outImage = image;
}
// Calculate separate padding for left/right and top/bottom to handle odd padding
int padLeft = dw / 2;
int padRight = dw - padLeft;
int padTop = dh / 2;
int padBottom = dh - padTop;
// Apply padding to reach the desired shape
cv::copyMakeBorder(outImage, outImage, padTop, padBottom, padLeft, padRight, cv::BORDER_CONSTANT, color);
}
void ANSONNXPOSE::NMSBoxes(const std::vector<BoundingBox>& boundingBoxes,
const std::vector<float>& scores,
float scoreThreshold,
float nmsThreshold,
std::vector<int>& indices)
{
indices.clear();
const size_t numBoxes = boundingBoxes.size();
if (numBoxes == 0) {
DEBUG_PRINT("No bounding boxes to process in NMS");
return;
}
// Step 1: Filter out boxes with scores below the threshold
// and create a list of indices sorted by descending scores
std::vector<int> sortedIndices;
sortedIndices.reserve(numBoxes);
for (size_t i = 0; i < numBoxes; ++i) {
if (scores[i] >= scoreThreshold) {
sortedIndices.push_back(static_cast<int>(i));
}
}
// If no boxes remain after thresholding
if (sortedIndices.empty()) {
DEBUG_PRINT("No bounding boxes above score threshold");
return;
}
// Sort the indices based on scores in descending order
std::sort(sortedIndices.begin(), sortedIndices.end(),
[&scores](int idx1, int idx2) {
return scores[idx1] > scores[idx2];
});
// Step 2: Precompute the areas of all boxes
std::vector<float> areas(numBoxes, 0.0f);
for (size_t i = 0; i < numBoxes; ++i) {
areas[i] = boundingBoxes[i].width * boundingBoxes[i].height;
}
// Step 3: Suppression mask to mark boxes that are suppressed
std::vector<bool> suppressed(numBoxes, false);
// Step 4: Iterate through the sorted list and suppress boxes with high IoU
for (size_t i = 0; i < sortedIndices.size(); ++i) {
int currentIdx = sortedIndices[i];
if (suppressed[currentIdx]) {
continue;
}
// Select the current box as a valid detection
indices.push_back(currentIdx);
const BoundingBox& currentBox = boundingBoxes[currentIdx];
const float x1_max = currentBox.x;
const float y1_max = currentBox.y;
const float x2_max = currentBox.x + currentBox.width;
const float y2_max = currentBox.y + currentBox.height;
const float area_current = areas[currentIdx];
// Compare IoU of the current box with the rest
for (size_t j = i + 1; j < sortedIndices.size(); ++j) {
int compareIdx = sortedIndices[j];
if (suppressed[compareIdx]) {
continue;
}
const BoundingBox& compareBox = boundingBoxes[compareIdx];
const float x1 = std::max(x1_max, static_cast<float>(compareBox.x));
const float y1 = std::max(y1_max, static_cast<float>(compareBox.y));
const float x2 = std::min(x2_max, static_cast<float>(compareBox.x + compareBox.width));
const float y2 = std::min(y2_max, static_cast<float>(compareBox.y + compareBox.height));
const float interWidth = x2 - x1;
const float interHeight = y2 - y1;
if (interWidth <= 0 || interHeight <= 0) {
continue;
}
const float intersection = interWidth * interHeight;
const float unionArea = area_current + areas[compareIdx] - intersection;
const float iou = (unionArea > 0.0f) ? (intersection / unionArea) : 0.0f;
if (iou > nmsThreshold) {
suppressed[compareIdx] = true;
}
}
}
DEBUG_PRINT("NMS completed with " + std::to_string(indices.size()) + " indices remaining");
}
void ANSONNXPOSE::drawPoseEstimation(cv::Mat& image,
const std::vector<Object>& detections,
float confidenceThreshold,
float kptThreshold)
{
// Calculate dynamic sizes based on image dimensions
const int min_dim = std::min(image.rows, image.cols);
const float scale_factor = min_dim / 1280.0f; // Reference 1280px size
// Dynamic sizing parameters
const int line_thickness = std::max(1, static_cast<int>(2 * scale_factor));
const int kpt_radius = std::max(2, static_cast<int>(4 * scale_factor));
const float font_scale = 0.5f * scale_factor;
const int text_thickness = std::max(1, static_cast<int>(1 * scale_factor));
const int text_offset = static_cast<int>(10 * scale_factor);
static const std::vector<cv::Scalar> pose_palette = {
cv::Scalar(0,128,255), // 0
cv::Scalar(51,153,255), // 1
cv::Scalar(102,178,255), // 2
cv::Scalar(0,230,230), // 3
cv::Scalar(255,153,255), // 4
cv::Scalar(255,204,153), // 5
cv::Scalar(255,102,255), // 6
cv::Scalar(255,51,255), // 7
cv::Scalar(255,178,102), // 8
cv::Scalar(255,153,51), // 9
cv::Scalar(153,153,255), // 10
cv::Scalar(102,102,255), // 11
cv::Scalar(51,51,255), // 12
cv::Scalar(153,255,153), // 13
cv::Scalar(102,255,102), // 14
cv::Scalar(51,255,51), // 15
cv::Scalar(0,255,0), // 16
cv::Scalar(255,0,0), // 17
cv::Scalar(0,0,255), // 18
cv::Scalar(255,255,255) // 19
};
// Define per-keypoint color indices (for keypoints 0 to 16)
static const std::vector<int> kpt_color_indices = { 16,16,16,16,16,0,0,0,0,0,0,9,9,9,9,9,9 };
// Define per-limb color indices for each skeleton connection.
// Make sure the number of entries here matches the number of pairs in POSE_SKELETON.
static const std::vector<int> limb_color_indices = { 9,9,9,9,7,7,7,0,0,0,0,0,16,16,16,16,16,16,16 };
// Loop through each detection
for (const auto& detection : detections) {
if (detection.confidence < confidenceThreshold)
continue;
// Draw bounding box (optional remove if you prefer only pose visualization)
const auto& box = detection.box;
cv::rectangle(image,
cv::Point(box.x, box.y),
cv::Point(box.x + box.width, box.y + box.height),
cv::Scalar(0, 255, 0), // You can change the box color if desired
line_thickness);
// Prepare a vector to hold keypoint positions and validity flags.
const size_t numKpts = detection.kps.size();
std::vector<cv::Point> kpt_points(numKpts, cv::Point(-1, -1));
std::vector<bool> valid(numKpts, false);
// Draw keypoints using the corresponding palette colors
for (size_t i = 0; i < numKpts; i++) {
int x = std::round(detection.polygon[i].x);
int y = std::round(detection.polygon[i].y);
kpt_points[i] = cv::Point(x, y);
valid[i] = true;
int color_index = (i < kpt_color_indices.size()) ? kpt_color_indices[i] : 0;
cv::circle(image, cv::Point(x, y), kpt_radius, pose_palette[color_index], -1, cv::LINE_AA);
}
// Draw skeleton connections based on a predefined POSE_SKELETON (vector of pairs)
// Make sure that POSE_SKELETON is defined with 0-indexed keypoint indices.
for (size_t j = 0; j < POSE_SKELETON.size(); j++) {
auto [src, dst] = POSE_SKELETON[j];
if (src < numKpts && dst < numKpts && valid[src] && valid[dst]) {
// Use the corresponding limb color from the palette
int limb_color_index = (j < limb_color_indices.size()) ? limb_color_indices[j] : 0;
cv::line(image, kpt_points[src], kpt_points[dst],
pose_palette[limb_color_index],
line_thickness, cv::LINE_AA);
}
}
// (Optional) Add text labels such as confidence scores here if desired.
}
}
bool ANSONNXPOSE::Init(const std::string& modelPath, bool useGPU, int deviceId)
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
deviceId_ = deviceId;
const auto& ep = ANSCENTER::EPLoader::Current();
if (Ort::Global<void>::api_ == nullptr)
Ort::InitApi(static_cast<const OrtApi*>(EPLoader::GetOrtApiRaw()));
std::cout << "[ANSONNXPOSE] EP ready: "
<< ANSCENTER::EPLoader::EngineTypeName(ep.type) << std::endl;
// Unique environment name per instance to avoid conflicts
std::string envName = "ONNX_POSE_INST" + std::to_string(instanceId_);
env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, envName.c_str());
sessionOptions = Ort::SessionOptions();
sessionOptions.SetIntraOpNumThreads(
std::min(6, static_cast<int>(std::thread::hardware_concurrency())));
sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
// ── Log available providers ─────────────────────────────────────────
std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
std::cout << "[Instance " << instanceId_ << "] Available Execution Providers:" << std::endl;
for (const auto& p : availableProviders)
std::cout << " - " << p << std::endl;
// ── Attach EP based on runtime-detected hardware ────────────────────
if (useGPU) {
bool attached = false;
switch (ep.type) {
case ANSCENTER::EngineType::NVIDIA_GPU: {
auto it = std::find(availableProviders.begin(),
availableProviders.end(), "CUDAExecutionProvider");
if (it == availableProviders.end()) {
this->_logger.LogError("ANSONNXPOSE::Init", "CUDAExecutionProvider not in DLL — "
"check ep/cuda/ has the CUDA ORT build.", __FILE__, __LINE__);
break;
}
try {
OrtCUDAProviderOptionsV2* cuda_options = nullptr;
Ort::GetApi().CreateCUDAProviderOptions(&cuda_options);
std::string deviceIdStr = std::to_string(deviceId_);
const char* keys[] = { "device_id" };
const char* values[] = { deviceIdStr.c_str() };
Ort::GetApi().UpdateCUDAProviderOptions(cuda_options, keys, values, 1);
sessionOptions.AppendExecutionProvider_CUDA_V2(*cuda_options);
Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options);
std::cout << "[Instance " << instanceId_ << "] CUDA EP attached on device "
<< deviceId_ << "." << std::endl;
attached = true;
}
catch (const Ort::Exception& e) {
this->_logger.LogError("ANSONNXPOSE::Init", e.what(), __FILE__, __LINE__);
}
break;
}
case ANSCENTER::EngineType::AMD_GPU: {
auto it = std::find(availableProviders.begin(),
availableProviders.end(), "DmlExecutionProvider");
if (it == availableProviders.end()) {
this->_logger.LogError("ANSONNXPOSE::Init", "DmlExecutionProvider not in DLL — "
"check ep/directml/ has the DirectML ORT build.", __FILE__, __LINE__);
break;
}
try {
std::unordered_map<std::string, std::string> opts = {
{ "device_id", std::to_string(deviceId_) }
};
sessionOptions.AppendExecutionProvider("DML", opts);
std::cout << "[Instance " << instanceId_ << "] DirectML EP attached on device "
<< deviceId_ << "." << std::endl;
attached = true;
}
catch (const Ort::Exception& e) {
this->_logger.LogError("ANSONNXPOSE::Init", e.what(), __FILE__, __LINE__);
}
break;
}
case ANSCENTER::EngineType::OPENVINO_GPU: {
auto it = std::find(availableProviders.begin(),
availableProviders.end(), "OpenVINOExecutionProvider");
if (it == availableProviders.end()) {
this->_logger.LogError("ANSONNXPOSE::Init", "OpenVINOExecutionProvider not in DLL — "
"check ep/openvino/ has the OpenVINO ORT build.", __FILE__, __LINE__);
break;
}
// FP32 + single thread preserved for determinism; each instance gets its own stream and cache
const std::string precision = "FP32";
const std::string numberOfThreads = "1";
const std::string numberOfStreams = std::to_string(instanceId_ + 1);
const std::string primaryDevice = "GPU." + std::to_string(deviceId_);
const std::string cacheDir = "./ov_cache_inst" + std::to_string(instanceId_);
std::vector<std::unordered_map<std::string, std::string>> try_configs = {
{ {"device_type", primaryDevice}, {"precision",precision},
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"},
{"cache_dir", cacheDir} },
{ {"device_type","GPU"}, {"precision",precision},
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"},
{"cache_dir", cacheDir} },
{ {"device_type","AUTO:GPU,CPU"}, {"precision",precision},
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"},
{"cache_dir", cacheDir} }
};
for (const auto& config : try_configs) {
try {
sessionOptions.AppendExecutionProvider_OpenVINO_V2(config);
std::cout << "[Instance " << instanceId_ << "] OpenVINO EP attached ("
<< config.at("device_type") << ", stream: " << numberOfStreams << ")." << std::endl;
attached = true;
break;
}
catch (const Ort::Exception& e) {
this->_logger.LogError("ANSONNXPOSE::Init", e.what(), __FILE__, __LINE__);
}
}
if (!attached)
std::cerr << "[Instance " << instanceId_ << "] OpenVINO EP: all device configs failed." << std::endl;
break;
}
default:
break;
}
if (!attached) {
std::cerr << "[Instance " << instanceId_ << "] No GPU EP attached — running on CPU." << std::endl;
this->_logger.LogFatal("ANSONNXPOSE::Init", "GPU EP not attached. Running on CPU.", __FILE__, __LINE__);
}
}
else {
std::cout << "[Instance " << instanceId_ << "] Inference device: CPU (useGPU=false)" << std::endl;
}
// ── Load model ──────────────────────────────────────────────────────
#ifdef _WIN32
std::wstring w_modelPath = std::wstring(modelPath.begin(), modelPath.end());
session = Ort::Session(env, w_modelPath.c_str(), sessionOptions);
#else
session = Ort::Session(env, modelPath.c_str(), sessionOptions);
#endif
Ort::AllocatorWithDefaultOptions allocator;
numInputNodes = session.GetInputCount();
numOutputNodes = session.GetOutputCount();
if (numInputNodes == 0) throw std::runtime_error("Model has no input nodes.");
if (numOutputNodes == 0) throw std::runtime_error("Model has no output nodes.");
// ── Input shape ─────────────────────────────────────────────────────
Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0);
std::vector<int64_t> inputTensorShapeVec =
inputTypeInfo.GetTensorTypeAndShapeInfo().GetShape();
if (inputTensorShapeVec.size() < 4)
throw std::runtime_error("Invalid input tensor shape - expected 4 dimensions (NCHW).");
isDynamicInputShape = (inputTensorShapeVec[2] == -1 || inputTensorShapeVec[3] == -1);
std::cout << "[Instance " << instanceId_ << "] Model input shape: ["
<< inputTensorShapeVec[0] << ", " << inputTensorShapeVec[1] << ", "
<< inputTensorShapeVec[2] << ", " << inputTensorShapeVec[3] << "]"
<< (isDynamicInputShape ? " (dynamic)" : " (fixed)") << std::endl;
if (!isDynamicInputShape) {
inputImageShape = cv::Size(
static_cast<int>(inputTensorShapeVec[3]),
static_cast<int>(inputTensorShapeVec[2]));
}
else {
inputImageShape = cv::Size(_modelConfig.inpWidth, _modelConfig.inpHeight);
std::cout << "[Instance " << instanceId_ << "] Using default input shape: "
<< inputImageShape.width << "x" << inputImageShape.height << std::endl;
}
// ── Node names ──────────────────────────────────────────────────────
auto input_name = session.GetInputNameAllocated(0, allocator);
inputNodeNameAllocatedStrings.push_back(std::move(input_name));
inputNames.push_back(inputNodeNameAllocatedStrings.back().get());
for (size_t i = 0; i < numOutputNodes; ++i) {
auto output_name = session.GetOutputNameAllocated(i, allocator);
outputNodeNameAllocatedStrings.push_back(std::move(output_name));
outputNames.push_back(outputNodeNameAllocatedStrings.back().get());
}
std::cout << "[Instance " << instanceId_ << "] Model loaded successfully:" << std::endl;
std::cout << " - Input nodes: " << numInputNodes << std::endl;
std::cout << " - Output nodes: " << numOutputNodes << std::endl;
std::cout << " - Input shape: " << inputImageShape.width << "x" << inputImageShape.height << std::endl;
// ── Warmup ──────────────────────────────────────────────────────────
DEBUG_PRINT("[Instance " << instanceId_ << "] Starting warmup...");
warmupModel();
DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup completed successfully.");
return true;
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSONNXPOSE::Init",
std::string("[Instance ") + std::to_string(instanceId_) + "] " + e.what(),
__FILE__, __LINE__);
return false;
}
}
void ANSONNXPOSE::warmupModel() {
try {
// Create dummy input image with correct size
cv::Mat dummyImage = cv::Mat::zeros(inputImageShape.height, inputImageShape.width, CV_8UC3);
DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup: dummy image "
<< dummyImage.cols << "x" << dummyImage.rows);
// Run 3 warmup inferences to stabilize
for (int i = 0; i < 3; ++i) {
try {
// Your preprocessing logic here
float* blob = nullptr;
std::vector<int64_t> inputShape;
// If you have a preprocess method, call it
// Otherwise, create a simple dummy tensor
size_t tensorSize = 1 * 3 * inputImageShape.height * inputImageShape.width;
blob = new float[tensorSize];
std::memset(blob, 0, tensorSize * sizeof(float));
inputShape = { 1, 3, inputImageShape.height, inputImageShape.width };
// Create input tensor
Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
memoryInfo,
blob,
tensorSize,
inputShape.data(),
inputShape.size()
);
// Run inference
std::vector<Ort::Value> outputTensors = session.Run(
Ort::RunOptions{ nullptr },
inputNames.data(),
&inputTensor,
1,
outputNames.data(),
numOutputNodes
);
// Clean up
delete[] blob;
DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup " << (i + 1) << "/3 completed");
}
catch (const std::exception& e) {
DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup iteration " << i
<< " failed (non-critical): " << e.what());
}
}
DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup successful - all states initialized");
}
catch (const std::exception& e) {
this->_logger.LogWarn("ANSONNXPOSE::warmupModel",
std::string("[Instance ") + std::to_string(instanceId_) + "] Warmup failed: " + e.what(),
__FILE__, __LINE__);
}
}
cv::Mat ANSONNXPOSE::preprocess(const cv::Mat& image, float*& blob, std::vector<int64_t>& inputTensorShape) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
// CRITICAL: Validate input image
if (image.empty()) {
this->_logger.LogError("ANSONNXPOSE::preprocess", "Input image is empty", __FILE__, __LINE__);
return cv::Mat();
}
if (image.data == nullptr) {
this->_logger.LogError("ANSONNXPOSE::preprocess", "Input image data pointer is null", __FILE__, __LINE__);
return cv::Mat();
}
if (image.cols <= 0 || image.rows <= 0) {
this->_logger.LogError("ANSONNXPOSE::preprocess",
"Invalid image dimensions: " + std::to_string(image.cols) + "x" + std::to_string(image.rows),
__FILE__, __LINE__);
return cv::Mat();
}
// CRITICAL: Check for NaN/Inf in input
double minVal, maxVal;
cv::minMaxLoc(image, &minVal, &maxVal);
if (std::isnan(minVal) || std::isnan(maxVal) || std::isinf(minVal) || std::isinf(maxVal)) {
this->_logger.LogError("ANSONNXPOSE::preprocess",
"Input image contains NaN or Inf values. Min: " + std::to_string(minVal) +
", Max: " + std::to_string(maxVal), __FILE__, __LINE__);
return cv::Mat();
}
DEBUG_PRINT("[Instance " << instanceId_ << "] Input image: " << image.cols << "x" << image.rows
<< ", channels: " << image.channels()
<< ", type: " << image.type()
<< ", pixel range: [" << minVal << ", " << maxVal << "]");
// CRITICAL: Clean up existing blob first to prevent memory leak
if (blob != nullptr) {
delete[] blob;
blob = nullptr;
}
cv::Mat resizedImage;
// Resize and pad the image using letterBox utility
try {
letterBox(image, resizedImage, inputImageShape, cv::Scalar(114, 114, 114),
isDynamicInputShape, false, true, 32);
}
catch (const std::exception& e) {
this->_logger.LogError("ANSONNXPOSE::preprocess",
"letterBox failed: " + std::string(e.what()), __FILE__, __LINE__);
return cv::Mat();
}
// CRITICAL: Validate resized image
if (resizedImage.empty() || resizedImage.data == nullptr) {
this->_logger.LogError("ANSONNXPOSE::preprocess",
"Resized image is empty after letterBox", __FILE__, __LINE__);
return cv::Mat();
}
if (resizedImage.cols <= 0 || resizedImage.rows <= 0) {
this->_logger.LogError("ANSONNXPOSE::preprocess",
"Invalid resized dimensions: " + std::to_string(resizedImage.cols) +
"x" + std::to_string(resizedImage.rows), __FILE__, __LINE__);
return cv::Mat();
}
if (resizedImage.channels() != 3) {
this->_logger.LogError("ANSONNXPOSE::preprocess",
"Expected 3 channels but got: " + std::to_string(resizedImage.channels()),
__FILE__, __LINE__);
return cv::Mat();
}
// CRITICAL: Validate input tensor shape size
if (inputTensorShape.size() < 4) {
this->_logger.LogError("ANSONNXPOSE::preprocess",
"Input tensor shape has insufficient dimensions: " + std::to_string(inputTensorShape.size()),
__FILE__, __LINE__);
return cv::Mat();
}
// Update input tensor shape based on resized image dimensions
inputTensorShape[0] = 1; // Batch size
inputTensorShape[1] = 3; // Channels
inputTensorShape[2] = resizedImage.rows;
inputTensorShape[3] = resizedImage.cols;
DEBUG_PRINT("[Instance " << instanceId_ << "] Resized to: " << resizedImage.cols << "x" << resizedImage.rows);
// Convert image to float and normalize to [0, 1]
cv::Mat floatImage;
try {
resizedImage.convertTo(floatImage, CV_32FC3, 1.0 / 255.0);
}
catch (const std::exception& e) {
this->_logger.LogError("ANSONNXPOSE::preprocess",
"convertTo failed: " + std::string(e.what()), __FILE__, __LINE__);
return cv::Mat();
}
// CRITICAL: Validate after conversion
if (floatImage.empty() || floatImage.data == nullptr) {
this->_logger.LogError("ANSONNXPOSE::preprocess",
"Float image is empty after conversion", __FILE__, __LINE__);
return cv::Mat();
}
// CRITICAL: Check for NaN/Inf after float conversion
cv::minMaxLoc(floatImage, &minVal, &maxVal);
if (std::isnan(minVal) || std::isnan(maxVal) || std::isinf(minVal) || std::isinf(maxVal)) {
this->_logger.LogError("ANSONNXPOSE::preprocess",
"Float image contains NaN or Inf after conversion. Min: " + std::to_string(minVal) +
", Max: " + std::to_string(maxVal), __FILE__, __LINE__);
return cv::Mat();
}
// Calculate blob size and allocate memory
size_t blobSize = static_cast<size_t>(floatImage.cols) *
static_cast<size_t>(floatImage.rows) *
static_cast<size_t>(floatImage.channels());
if (blobSize == 0) {
this->_logger.LogError("ANSONNXPOSE::preprocess",
"Calculated blob size is zero", __FILE__, __LINE__);
return cv::Mat();
}
// CRITICAL: Allocate and zero-initialize blob memory
blob = new float[blobSize];
std::memset(blob, 0, blobSize * sizeof(float));
// Split the image into separate channels and store in CHW format
std::vector<cv::Mat> chw(floatImage.channels());
size_t channelSize = static_cast<size_t>(floatImage.cols) * static_cast<size_t>(floatImage.rows);
for (int i = 0; i < floatImage.channels(); ++i) {
chw[i] = cv::Mat(floatImage.rows, floatImage.cols, CV_32FC1,
blob + i * channelSize);
}
try {
cv::split(floatImage, chw);
}
catch (const std::exception& e) {
this->_logger.LogError("ANSONNXPOSE::preprocess",
"cv::split failed: " + std::string(e.what()), __FILE__, __LINE__);
delete[] blob;
blob = nullptr;
return cv::Mat();
}
// CRITICAL: Final validation of blob data
bool hasNaN = false;
float blobSum = 0.0f;
float blobMin = std::numeric_limits<float>::max();
float blobMax = std::numeric_limits<float>::lowest();
// Check first 1000 values for NaN/Inf (sampling for performance)
size_t checkSize = std::min(blobSize, size_t(1000));
for (size_t i = 0; i < checkSize; ++i) {
if (std::isnan(blob[i]) || std::isinf(blob[i])) {
this->_logger.LogError("ANSONNXPOSE::preprocess",
"NaN/Inf found in blob at index " + std::to_string(i) +
", value: " + std::to_string(blob[i]), __FILE__, __LINE__);
hasNaN = true;
break;
}
blobSum += blob[i];
blobMin = std::min(blobMin, blob[i]);
blobMax = std::max(blobMax, blob[i]);
}
if (hasNaN) {
delete[] blob;
blob = nullptr;
return cv::Mat();
}
DEBUG_PRINT("[Instance " << instanceId_ << "] Preprocessing completed. "
<< "Tensor shape: " << inputTensorShape[0] << "x" << inputTensorShape[1] << "x"
<< inputTensorShape[2] << "x" << inputTensorShape[3]
<< " | Blob stats (sampled) - Min: " << blobMin << ", Max: " << blobMax
<< ", Sum: " << blobSum << ", Avg: " << (blobSum / checkSize));
return floatImage; // Return the preprocessed image for potential use
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSONNXPOSE::preprocess",
std::string("[Instance ") + std::to_string(instanceId_) + "] " + e.what(),
__FILE__, __LINE__);
// Clean up blob on error
if (blob != nullptr) {
delete[] blob;
blob = nullptr;
}
return cv::Mat();
}
}
std::vector<Object> ANSONNXPOSE::postprocess(
const cv::Size& originalImageSize,
const cv::Size& resizedImageShape,
const std::vector<Ort::Value>& outputTensors,
const std::string& camera_id)
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
std::vector<Object> detections;
// CRITICAL: Validate output tensors
if (outputTensors.empty()) {
this->_logger.LogError("ANSONNXPOSE::postprocess", "No output tensors", __FILE__, __LINE__);
return {};
}
const float* rawOutput = outputTensors[0].GetTensorData<float>();
if (!rawOutput) {
this->_logger.LogError("ANSONNXPOSE::postprocess", "rawOutput pointer is null", __FILE__, __LINE__);
return {};
}
const std::vector<int64_t> outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
// CRITICAL: Validate output shape
if (outputShape.size() < 3) {
this->_logger.LogError("ANSONNXPOSE::postprocess",
"Invalid output shape dimensions: " + std::to_string(outputShape.size()),
__FILE__, __LINE__);
return {};
}
const int featuresPerKeypoint = 3; // x, y, confidence
const size_t numFeatures = static_cast<size_t>(outputShape[1]);
const size_t numDetections = static_cast<size_t>(outputShape[2]);
// With this:
const int derivedKps = static_cast<int>((numFeatures - 5) / featuresPerKeypoint);
const int numKeypoints = (derivedKps > 0 && derivedKps <= 133)
? derivedKps
: _modelConfig.numKPS;
DEBUG_PRINT("[Instance " << instanceId_ << "] Keypoints: " << numKeypoints
<< " (derived=" << derivedKps << ", config=" << _modelConfig.numKPS << ")");
const size_t expectedFeatures = 4 + 1 + numKeypoints * featuresPerKeypoint; // box(4) + conf(1) + kpts(17*3)
DEBUG_PRINT("[Instance " << instanceId_ << "] Output shape: ["
<< outputShape[0] << ", " << outputShape[1] << ", " << outputShape[2] << "]");
DEBUG_PRINT("[Instance " << instanceId_ << "] Detections: " << numDetections
<< ", Features: " << numFeatures << " (expected: " << expectedFeatures << ")");
if (numFeatures != expectedFeatures) {
this->_logger.LogError("ANSONNXPOSE::postprocess",
"Invalid output shape for pose estimation. Expected " + std::to_string(expectedFeatures) +
" features, got " + std::to_string(numFeatures),
__FILE__, __LINE__);
return {};
}
if (numDetections == 0) {
DEBUG_PRINT("[Instance " << instanceId_ << "] No detections in output");
return {};
}
// CRITICAL: Validate image sizes
if (originalImageSize.width <= 0 || originalImageSize.height <= 0) {
this->_logger.LogError("ANSONNXPOSE::postprocess",
"Invalid original image size: " + std::to_string(originalImageSize.width) +
"x" + std::to_string(originalImageSize.height),
__FILE__, __LINE__);
return {};
}
if (resizedImageShape.width <= 0 || resizedImageShape.height <= 0) {
this->_logger.LogError("ANSONNXPOSE::postprocess",
"Invalid resized image size: " + std::to_string(resizedImageShape.width) +
"x" + std::to_string(resizedImageShape.height),
__FILE__, __LINE__);
return {};
}
// CRITICAL: Check for NaN/Inf in raw output (conditional based on validation level)
size_t totalSize = numFeatures * numDetections;
size_t checkSize = std::min(totalSize, size_t(1000));
for (size_t i = 0; i < checkSize; ++i) {
if (std::isnan(rawOutput[i]) || std::isinf(rawOutput[i])) {
this->_logger.LogError("ANSONNXPOSE::postprocess",
"NaN/Inf detected in model output at index " + std::to_string(i) +
", value: " + std::to_string(rawOutput[i]),
__FILE__, __LINE__);
return {};
}
}
// Calculate letterbox padding parameters
const float scaleX = static_cast<float>(resizedImageShape.width) / static_cast<float>(originalImageSize.width);
const float scaleY = static_cast<float>(resizedImageShape.height) / static_cast<float>(originalImageSize.height);
const float scale = std::min(scaleX, scaleY);
if (scale <= 0.0f || std::isnan(scale) || std::isinf(scale)) {
this->_logger.LogError("ANSONNXPOSE::postprocess",
"Invalid scale factor: " + std::to_string(scale),
__FILE__, __LINE__);
return {};
}
const cv::Size scaledSize(
static_cast<int>(originalImageSize.width * scale),
static_cast<int>(originalImageSize.height * scale)
);
const cv::Point2f padding(
(resizedImageShape.width - scaledSize.width) / 2.0f,
(resizedImageShape.height - scaledSize.height) / 2.0f
);
DEBUG_PRINT("[Instance " << instanceId_ << "] Scale: " << scale
<< ", Padding: (" << padding.x << ", " << padding.y << ")");
// Process each detection
std::vector<cv::Rect> boxes; // Use cv::Rect for NMSBoxes compatibility
std::vector<float> confidences;
std::vector<std::vector<cv::Point2f>> allKeypoints;
size_t validDetections = 0;
for (size_t d = 0; d < numDetections; ++d) {
// Get object confidence
const float objConfidence = rawOutput[4 * numDetections + d];
// CRITICAL: Validate confidence value
if (std::isnan(objConfidence) || std::isinf(objConfidence)) {
continue;
}
if (objConfidence < _modelConfig.detectionScoreThreshold) {
continue;
}
// Decode bounding box
const float cx = rawOutput[0 * numDetections + d];
const float cy = rawOutput[1 * numDetections + d];
const float w = rawOutput[2 * numDetections + d];
const float h = rawOutput[3 * numDetections + d];
// CRITICAL: Validate bounding box values
if (std::isnan(cx) || std::isnan(cy) || std::isnan(w) || std::isnan(h) ||
std::isinf(cx) || std::isinf(cy) || std::isinf(w) || std::isinf(h)) {
continue;
}
if (w <= 0 || h <= 0) {
continue; // Invalid box dimensions
}
// Convert to original image coordinates
float x1 = (cx - padding.x - w / 2.0f) / scale;
float y1 = (cy - padding.y - h / 2.0f) / scale;
float box_w = w / scale;
float box_h = h / scale;
// Clip to image boundaries
x1 = std::max(0.0f, std::min(x1, static_cast<float>(originalImageSize.width - 1)));
y1 = std::max(0.0f, std::min(y1, static_cast<float>(originalImageSize.height - 1)));
box_w = std::max(1.0f, std::min(box_w, static_cast<float>(originalImageSize.width) - x1));
box_h = std::max(1.0f, std::min(box_h, static_cast<float>(originalImageSize.height) - y1));
cv::Rect box(
static_cast<int>(x1),
static_cast<int>(y1),
static_cast<int>(box_w),
static_cast<int>(box_h)
);
// Extract keypoints
std::vector<cv::Point2f> keypoints;
keypoints.reserve(numKeypoints);
bool validKeypoints = true;
for (int k = 0; k < numKeypoints; ++k) {
const size_t offset = 5 + k * featuresPerKeypoint;
// CRITICAL: Bounds check
if ((offset + 2) * numDetections + d >= numFeatures * numDetections) {
this->_logger.LogError("ANSONNXPOSE::postprocess",
"Keypoint index out of bounds", __FILE__, __LINE__);
validKeypoints = false;
break;
}
const float kpt_x = rawOutput[offset * numDetections + d];
const float kpt_y = rawOutput[(offset + 1) * numDetections + d];
const float kpt_conf_raw = rawOutput[(offset + 2) * numDetections + d];
// CRITICAL: Validate keypoint values
if (std::isnan(kpt_x) || std::isnan(kpt_y) || std::isnan(kpt_conf_raw) ||
std::isinf(kpt_x) || std::isinf(kpt_y) || std::isinf(kpt_conf_raw)) {
this->_logger.LogWarn("ANSONNXPOSE::postprocess",
"NaN/Inf in keypoint " + std::to_string(k) +
" of detection " + std::to_string(d),
__FILE__, __LINE__);
validKeypoints = false;
break;
}
cv::Point2f kpt;
kpt.x = (kpt_x - padding.x) / scale;
kpt.y = (kpt_y - padding.y) / scale;
// Sigmoid activation for confidence
//kpt.confidence = 1.0f / (1.0f + std::exp(-kpt_conf_raw));
// CRITICAL: Validate sigmoid result
//if (std::isnan(kpt.confidence) || std::isinf(kpt.confidence)) {
// kpt.confidence = 0.0f; // Fallback for extreme values
//}
// Clip keypoints to image boundaries
kpt.x = std::max(0.0f, std::min(kpt.x, static_cast<float>(originalImageSize.width - 1)));
kpt.y = std::max(0.0f, std::min(kpt.y, static_cast<float>(originalImageSize.height - 1)));
keypoints.push_back(kpt);
}
if (!validKeypoints) {
continue; // Skip this detection if keypoints are invalid
}
// Store detection components
boxes.push_back(box);
confidences.push_back(objConfidence);
allKeypoints.push_back(std::move(keypoints));
validDetections++;
}
DEBUG_PRINT("[Instance " << instanceId_ << "] Valid detections before NMS: " << validDetections);
if (boxes.empty()) {
DEBUG_PRINT("[Instance " << instanceId_ << "] No valid detections after filtering");
return {};
}
// Apply Non-Maximum Suppression
std::vector<int> indices;
try {
cv::dnn::NMSBoxes(
boxes,
confidences,
_modelConfig.modelConfThreshold,
_modelConfig.modelMNSThreshold,
indices
);
}
catch (const cv::Exception& e) {
this->_logger.LogError("ANSONNXPOSE::postprocess",
"NMS failed: " + std::string(e.what()), __FILE__, __LINE__);
return {};
}
DEBUG_PRINT("[Instance " << instanceId_ << "] Detections after NMS: " << indices.size());
// Create final detections
detections.reserve(indices.size());
for (int idx : indices) {
// Validate index
if (idx < 0 || idx >= static_cast<int>(boxes.size())) {
this->_logger.LogWarn("ANSONNXPOSE::postprocess",
"Invalid NMS index: " + std::to_string(idx), __FILE__, __LINE__);
continue;
}
std::stringstream keypointXss;
std::stringstream keypointYss;
std::vector<float> keypointValues;
for (size_t keypointIdx = 0; keypointIdx < allKeypoints[idx].size(); keypointIdx++) {
keypointXss << allKeypoints[idx][keypointIdx].x;
keypointYss << allKeypoints[idx][keypointIdx].y;
// Add semicolon after each value except the last one
if (keypointIdx < allKeypoints[idx].size() - 1) {
keypointXss << ";";
keypointYss << ";";
}
keypointValues.push_back(allKeypoints[idx][keypointIdx].x);
keypointValues.push_back(allKeypoints[idx][keypointIdx].y);
}
std::string keypointXString = keypointXss.str();
std::string keypointYString = keypointYss.str();
std::string keypointString = keypointXString + "|" + keypointYString;
Object det;
det.box = boxes[idx];
det.confidence = confidences[idx];
det.classId = 0;
det.className = "Person";
det.cameraId = camera_id;
det.polygon = allKeypoints[idx];
det.kps = keypointValues;
det.extraInfo = keypointString;//Convert keypoint to st;
detections.push_back(det);
}
DEBUG_PRINT("[Instance " << instanceId_ << "] Final detections: " << detections.size());
return detections;
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSONNXPOSE::postprocess",
std::string("[Instance ") + std::to_string(instanceId_) + "] " + e.what(),
__FILE__, __LINE__);
return {};
}
}
std::vector<Object> ANSONNXPOSE::detect(const cv::Mat& image, const std::string& camera_id) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
float* blobPtr = nullptr;
try {
// CRITICAL: Validate input image
if (image.empty()) {
this->_logger.LogError("ANSONNXPOSE::detect", "Input image is empty", __FILE__, __LINE__);
return {};
}
if (image.data == nullptr) {
this->_logger.LogError("ANSONNXPOSE::detect", "Input image data pointer is null", __FILE__, __LINE__);
return {};
}
if (image.cols <= 0 || image.rows <= 0) {
this->_logger.LogError("ANSONNXPOSE::detect",
"Invalid image dimensions: " + std::to_string(image.cols) + "x" + std::to_string(image.rows),
__FILE__, __LINE__);
return {};
}
DEBUG_PRINT("[Instance " << instanceId_ << "] Detecting pose in "
<< image.cols << "x" << image.rows << " image");
// Define the shape of the input tensor (batch size, channels, height, width)
std::vector<int64_t> inputTensorShape = { 1, 3, inputImageShape.height, inputImageShape.width };
// Preprocess the image and obtain a pointer to the blob
cv::Mat preprocessedImage = preprocess(image, blobPtr, inputTensorShape);
// CRITICAL: Validate preprocessing result
if (preprocessedImage.empty() || blobPtr == nullptr) {
this->_logger.LogError("ANSONNXPOSE::detect", "Preprocessing failed", __FILE__, __LINE__);
if (blobPtr) {
delete[] blobPtr;
blobPtr = nullptr;
}
return {};
}
// Validate tensor shape was properly updated
if (inputTensorShape.size() != 4) {
this->_logger.LogError("ANSONNXPOSE::detect",
"Invalid input tensor shape dimensions: " + std::to_string(inputTensorShape.size()),
__FILE__, __LINE__);
delete[] blobPtr;
return {};
}
// Compute the total number of elements in the input tensor
size_t inputTensorSize = 1;
for (auto dim : inputTensorShape) {
if (dim <= 0) {
this->_logger.LogError("ANSONNXPOSE::detect",
"Invalid dimension in tensor shape: " + std::to_string(dim),
__FILE__, __LINE__);
delete[] blobPtr;
return {};
}
inputTensorSize *= static_cast<size_t>(dim);
}
if (inputTensorSize == 0) {
this->_logger.LogError("ANSONNXPOSE::detect", "Input tensor size is zero", __FILE__, __LINE__);
delete[] blobPtr;
return {};
}
DEBUG_PRINT("[Instance " << instanceId_ << "] Tensor shape: ["
<< inputTensorShape[0] << ", " << inputTensorShape[1] << ", "
<< inputTensorShape[2] << ", " << inputTensorShape[3] << "], Size: " << inputTensorSize);
// CRITICAL: Create memory info (use static to avoid recreation overhead)
static Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
// CRITICAL: Create input tensor directly from blob pointer
// DO NOT copy to vector - this wastes memory and time
// The blob must remain valid during inference!
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
memoryInfo,
blobPtr, // Use blob pointer directly
inputTensorSize,
inputTensorShape.data(),
inputTensorShape.size()
);
// CRITICAL: Validate tensor was created successfully
if (!inputTensor.IsTensor()) {
this->_logger.LogError("ANSONNXPOSE::detect", "Failed to create input tensor", __FILE__, __LINE__);
delete[] blobPtr;
return {};
}
// Run the inference session with the input tensor
std::vector<Ort::Value> outputTensors;
try {
outputTensors = session.Run(
Ort::RunOptions{ nullptr },
inputNames.data(),
&inputTensor,
numInputNodes,
outputNames.data(),
numOutputNodes
);
}
catch (const Ort::Exception& e) {
this->_logger.LogError("ANSONNXPOSE::detect",
"ONNX Runtime exception during inference: " + std::string(e.what()),
__FILE__, __LINE__);
delete[] blobPtr;
return {};
}
catch (const std::exception& e) {
this->_logger.LogError("ANSONNXPOSE::detect",
"Exception during inference: " + std::string(e.what()),
__FILE__, __LINE__);
delete[] blobPtr;
return {};
}
// CRITICAL: NOW it's safe to delete blob (after inference completes)
delete[] blobPtr;
blobPtr = nullptr;
// Validate output tensors
if (outputTensors.empty()) {
this->_logger.LogError("ANSONNXPOSE::detect", "No output tensors from inference", __FILE__, __LINE__);
return {};
}
DEBUG_PRINT("[Instance " << instanceId_ << "] Inference completed, processing outputs");
// Determine the resized image shape based on input tensor shape
cv::Size resizedImageShape(
static_cast<int>(inputTensorShape[3]),
static_cast<int>(inputTensorShape[2])
);
// Postprocess the output tensors to obtain detections
std::vector<Object> detections;
try {
detections = postprocess(image.size(), resizedImageShape, outputTensors, camera_id);
}
catch (const std::exception& e) {
this->_logger.LogError("ANSONNXPOSE::detect",
"Exception during postprocessing: " + std::string(e.what()),
__FILE__, __LINE__);
return {};
}
DEBUG_PRINT("[Instance " << instanceId_ << "] Detection completed, found "
<< detections.size() << " pose(s)");
return detections;
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSONNXPOSE::detect",
std::string("[Instance ") + std::to_string(instanceId_) + "] " + e.what(),
__FILE__, __LINE__);
// Clean up blob if still allocated
if (blobPtr != nullptr) {
delete[] blobPtr;
blobPtr = nullptr;
}
return {};
}
}
// Public functions
ANSONNXPOSE::~ANSONNXPOSE() {
Destroy();
}
bool ANSONNXPOSE::Destroy() {
std::cout << "[ANSONNXPOSE] Destroyed instance " << instanceId_ << std::endl;
return true;
}
bool ANSONNXPOSE::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
if (!ANSODBase::OptimizeModel(fp16, optimizedModelFolder)) {
return false;
}
return true;
}
bool ANSONNXPOSE::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
ModelLoadingGuard mlg(_modelLoading);
try {
_modelLoadValid = false;
bool result = ANSODBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
if (!result) return false;
// Parsing for YOLO only here
_modelConfig = modelConfig;
_modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION;
_modelConfig.modelType = ModelType::ONNXPOSE;
_modelConfig.inpHeight = 640;
_modelConfig.inpWidth = 640;
if (_modelConfig.modelMNSThreshold < 0.2)
_modelConfig.modelMNSThreshold = 0.5;
if (_modelConfig.modelConfThreshold < 0.2)
_modelConfig.modelConfThreshold = 0.5;
// After
if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133) // 133 = COCO wholebody max
_modelConfig.numKPS = 17;
if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define
_fp16 = (modelConfig.precisionType == PrecisionType::FP16);
if (FileExist(_modelConfigFile)) {
ModelType modelType;
std::vector<int> inputShape;
_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
if (inputShape.size() == 2) {
if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
}
}
else {// This is old version of model zip file
_modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx");
_classFilePath = CreateFilePath(_modelFolder, "classes.names");
std::ifstream isValidFileName(_classFilePath);
if (!isValidFileName)
{
this->_logger.LogDebug("ANSONNXCL::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__);
LoadClassesFromString();
}
else {
this->_logger.LogDebug("ANSONNXCL::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__);
LoadClassesFromFile();
}
}
// 1. Load labelMap and engine
labelMap.clear();
if (!_classes.empty())
labelMap = VectorToCommaSeparatedString(_classes);
// 2. Initialize ONNX Runtime session
instanceId_ = instanceCounter_.fetch_add(1); // Atomic increment
result = Init(_modelFilePath, true,0);
_modelLoadValid = true;
_isInitialized = true;
return result;
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSONNXCL::Initialize", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSONNXPOSE::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
ModelLoadingGuard mlg(_modelLoading);
try {
bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword);
if (!result) return false;
_modelConfig.detectionType = ANSCENTER::DetectionType::CLASSIFICATION;
_modelConfig.modelType = ModelType::TENSORRT;
_modelConfig.inpHeight = 640;
_modelConfig.inpWidth = 640;
if (_modelConfig.modelMNSThreshold < 0.2)
_modelConfig.modelMNSThreshold = 0.5;
if (_modelConfig.modelConfThreshold < 0.2)
_modelConfig.modelConfThreshold = 0.5;
// After
if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133) // 133 = COCO wholebody max
_modelConfig.numKPS = 17;
if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define
// if (_modelConfig.precisionType == PrecisionType::FP16)_fp16 = true;
_fp16 = true; // Load Model from Here
// 0. Check if the configuration file exist
if (FileExist(_modelConfigFile)) {
ModelType modelType;
std::vector<int> inputShape;
_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
if (inputShape.size() == 2) {
if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
}
}
else {// This is old version of model zip file
_modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx");
_classFilePath = CreateFilePath(_modelFolder, "classes.names");
std::ifstream isValidFileName(_classFilePath);
if (!isValidFileName)
{
this->_logger.LogDebug("ANSONNXPOSE::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__);
LoadClassesFromString();
}
else {
this->_logger.LogDebug("ANSONNXPOSE::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__);
LoadClassesFromFile();
}
}
// Initialize ONNX Runtime session
instanceId_ = instanceCounter_.fetch_add(1); // Atomic increment
result = Init(_modelFilePath, true, 0);
_modelLoadValid = true;
_isInitialized = true;
return result;
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSONNXPOSE::LoadModel", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSONNXPOSE::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
ModelLoadingGuard mlg(_modelLoading);
try {
bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig, modelName, className, modelFolder, labelMap);
if (!result) return false;
std::string _modelName = modelName;
if (_modelName.empty()) {
_modelName = "train_last";
}
std::string modelFullName = _modelName + ".onnx";
// Parsing for YOLO only here
_modelConfig = modelConfig;
_modelConfig.detectionType = ANSCENTER::DetectionType::CLASSIFICATION;
_modelConfig.modelType = ModelType::TENSORRT;
_modelConfig.inpHeight = 640;
_modelConfig.inpWidth = 640;
if (_modelConfig.modelMNSThreshold < 0.2)
_modelConfig.modelMNSThreshold = 0.5;
if (_modelConfig.modelConfThreshold < 0.2)
_modelConfig.modelConfThreshold = 0.5;
// After
if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133) // 133 = COCO wholebody max
_modelConfig.numKPS = 17;
if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define
_fp16 = true; // Load Model from Here
// 0. Check if the configuration file exist
if (FileExist(_modelConfigFile)) {
ModelType modelType;
std::vector<int> inputShape;
_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
if (inputShape.size() == 2) {
if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
}
}
else {// This is old version of model zip file
_modelFilePath = CreateFilePath(_modelFolder, modelFullName);
_classFilePath = CreateFilePath(_modelFolder, className);
std::ifstream isValidFileName(_classFilePath);
if (!isValidFileName)
{
this->_logger.LogDebug("ANSONNXPOSE::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__);
LoadClassesFromString();
}
else {
this->_logger.LogDebug("ANSONNXPOSE::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__);
LoadClassesFromFile();
}
}
// 1. Load labelMap and engine
labelMap.clear();
if (!_classes.empty())
labelMap = VectorToCommaSeparatedString(_classes);
// 2. Initialize ONNX Runtime session
instanceId_ = instanceCounter_.fetch_add(1); // Atomic increment
_modelLoadValid = true;
_isInitialized = true;
return result;
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSONNXPOSE::LoadModelFromFolder", e.what(), __FILE__, __LINE__);
return false;
}
}
std::vector<Object> ANSONNXPOSE::RunInference(const cv::Mat& input, const std::string& camera_id) {
if (!PreInferenceCheck("ANSONNXPOSE::RunInference")) return {};
try {
std::vector<Object> result;
if (input.empty()) return result;
if ((input.cols < 5) || (input.rows < 5)) return result;
result = detect(input, camera_id);
if (_trackerEnabled) {
result = ApplyTracking(result, camera_id);
if (_stabilizationEnabled) result = StabilizeDetections(result, camera_id);
}
return result;
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSONNXPOSE::RunInference", e.what(), __FILE__, __LINE__);
return {};
}
}
std::vector<Object> ANSONNXPOSE::RunInference(const cv::Mat& inputImgBGR) {
return RunInference(inputImgBGR, "CustomCam");
}
}