ANSCORE/modules/ANSODEngine/ANSONNXPOSE.cpp

#include "ANSONNXPOSE.h"
#include "EPLoader.h"
namespace ANSCENTER {
	std::atomic<int> ANSONNXPOSE::instanceCounter_(0);  // Initialize static member

	size_t ANSONNXPOSE::vectorProduct(const std::vector<int64_t>& vector) {
		return std::accumulate(vector.begin(), vector.end(), 1ull, std::multiplies<size_t>());
	}
	void ANSONNXPOSE::letterBox(const cv::Mat& image, cv::Mat& outImage,
		const cv::Size& newShape,
		const cv::Scalar& color,
		bool auto_,
		bool scaleFill,
		bool scaleUp,
		int stride)
	{
		// Calculate the scaling ratio to fit the image within the new shape
		float ratio = std::min(static_cast<float>(newShape.height) / image.rows,
			static_cast<float>(newShape.width) / image.cols);

		// Prevent scaling up if not allowed
		if (!scaleUp) {
			ratio = std::min(ratio, 1.0f);
		}

		// Calculate new dimensions after scaling
		int newUnpadW = static_cast<int>(std::round(image.cols * ratio));
		int newUnpadH = static_cast<int>(std::round(image.rows * ratio));

		// Calculate padding needed to reach the desired shape
		int dw = newShape.width - newUnpadW;
		int dh = newShape.height - newUnpadH;

		if (auto_) {
			// Ensure padding is a multiple of stride for model compatibility
			dw = (dw % stride) / 2;
			dh = (dh % stride) / 2;
		}
		else if (scaleFill) {
			// Scale to fill without maintaining aspect ratio
			newUnpadW = newShape.width;
			newUnpadH = newShape.height;
			ratio = std::min(static_cast<float>(newShape.width) / image.cols,
				static_cast<float>(newShape.height) / image.rows);
			dw = 0;
			dh = 0;
		}
		else {
			// Evenly distribute padding on both sides
			// Calculate separate padding for left/right and top/bottom to handle odd padding
			int padLeft = dw / 2;
			int padRight = dw - padLeft;
			int padTop = dh / 2;
			int padBottom = dh - padTop;

			// Resize the image if the new dimensions differ
			if (image.cols != newUnpadW || image.rows != newUnpadH) {
				cv::resize(image, outImage, cv::Size(newUnpadW, newUnpadH), 0, 0, cv::INTER_LINEAR);
			}
			else {
				// Avoid unnecessary copying if dimensions are the same
				outImage = image;
			}

			// Apply padding to reach the desired shape
			cv::copyMakeBorder(outImage, outImage, padTop, padBottom, padLeft, padRight, cv::BORDER_CONSTANT, color);
			return; // Exit early since padding is already applied
		}

		// Resize the image if the new dimensions differ
		if (image.cols != newUnpadW || image.rows != newUnpadH) {
			cv::resize(image, outImage, cv::Size(newUnpadW, newUnpadH), 0, 0, cv::INTER_LINEAR);
		}
		else {
			// Avoid unnecessary copying if dimensions are the same
			outImage = image;
		}

		// Calculate separate padding for left/right and top/bottom to handle odd padding
		int padLeft = dw / 2;
		int padRight = dw - padLeft;
		int padTop = dh / 2;
		int padBottom = dh - padTop;

		// Apply padding to reach the desired shape
		cv::copyMakeBorder(outImage, outImage, padTop, padBottom, padLeft, padRight, cv::BORDER_CONSTANT, color);
	}
	void ANSONNXPOSE::NMSBoxes(const std::vector<BoundingBox>& boundingBoxes,
		const std::vector<float>& scores,
		float scoreThreshold,
		float nmsThreshold,
		std::vector<int>& indices)
	{
		indices.clear();

		const size_t numBoxes = boundingBoxes.size();
		if (numBoxes == 0) {
			DEBUG_PRINT("No bounding boxes to process in NMS");
			return;
		}

		// Step 1: Filter out boxes with scores below the threshold
		// and create a list of indices sorted by descending scores
		std::vector<int> sortedIndices;
		sortedIndices.reserve(numBoxes);
		for (size_t i = 0; i < numBoxes; ++i) {
			if (scores[i] >= scoreThreshold) {
				sortedIndices.push_back(static_cast<int>(i));
			}
		}

		// If no boxes remain after thresholding
		if (sortedIndices.empty()) {
			DEBUG_PRINT("No bounding boxes above score threshold");
			return;
		}

		// Sort the indices based on scores in descending order
		std::sort(sortedIndices.begin(), sortedIndices.end(),
			[&scores](int idx1, int idx2) {
				return scores[idx1] > scores[idx2];
			});

		// Step 2: Precompute the areas of all boxes
		std::vector<float> areas(numBoxes, 0.0f);
		for (size_t i = 0; i < numBoxes; ++i) {
			areas[i] = boundingBoxes[i].width * boundingBoxes[i].height;
		}

		// Step 3: Suppression mask to mark boxes that are suppressed
		std::vector<bool> suppressed(numBoxes, false);

		// Step 4: Iterate through the sorted list and suppress boxes with high IoU
		for (size_t i = 0; i < sortedIndices.size(); ++i) {
			int currentIdx = sortedIndices[i];
			if (suppressed[currentIdx]) {
				continue;
			}

			// Select the current box as a valid detection
			indices.push_back(currentIdx);

			const BoundingBox& currentBox = boundingBoxes[currentIdx];
			const float x1_max = currentBox.x;
			const float y1_max = currentBox.y;
			const float x2_max = currentBox.x + currentBox.width;
			const float y2_max = currentBox.y + currentBox.height;
			const float area_current = areas[currentIdx];

			// Compare IoU of the current box with the rest
			for (size_t j = i + 1; j < sortedIndices.size(); ++j) {
				int compareIdx = sortedIndices[j];
				if (suppressed[compareIdx]) {
					continue;
				}

				const BoundingBox& compareBox = boundingBoxes[compareIdx];
				const float x1 = std::max(x1_max, static_cast<float>(compareBox.x));
				const float y1 = std::max(y1_max, static_cast<float>(compareBox.y));
				const float x2 = std::min(x2_max, static_cast<float>(compareBox.x + compareBox.width));
				const float y2 = std::min(y2_max, static_cast<float>(compareBox.y + compareBox.height));

				const float interWidth = x2 - x1;
				const float interHeight = y2 - y1;

				if (interWidth <= 0 || interHeight <= 0) {
					continue;
				}

				const float intersection = interWidth * interHeight;
				const float unionArea = area_current + areas[compareIdx] - intersection;
				const float iou = (unionArea > 0.0f) ? (intersection / unionArea) : 0.0f;

				if (iou > nmsThreshold) {
					suppressed[compareIdx] = true;
				}
			}
		}

		DEBUG_PRINT("NMS completed with " + std::to_string(indices.size()) + " indices remaining");
	}
	void ANSONNXPOSE::drawPoseEstimation(cv::Mat& image,
		const std::vector<Object>& detections,
		float confidenceThreshold,
		float kptThreshold)
	{
		// Calculate dynamic sizes based on image dimensions
		const int min_dim = std::min(image.rows, image.cols);
		const float scale_factor = min_dim / 1280.0f;  // Reference 1280px size

		// Dynamic sizing parameters
		const int line_thickness = std::max(1, static_cast<int>(2 * scale_factor));
		const int kpt_radius = std::max(2, static_cast<int>(4 * scale_factor));
		const float font_scale = 0.5f * scale_factor;
		const int text_thickness = std::max(1, static_cast<int>(1 * scale_factor));
		const int text_offset = static_cast<int>(10 * scale_factor);

		static const std::vector<cv::Scalar> pose_palette = {
			cv::Scalar(0,128,255),    // 0
			cv::Scalar(51,153,255),   // 1
			cv::Scalar(102,178,255),  // 2
			cv::Scalar(0,230,230),    // 3
			cv::Scalar(255,153,255),  // 4
			cv::Scalar(255,204,153),  // 5
			cv::Scalar(255,102,255),  // 6
			cv::Scalar(255,51,255),   // 7
			cv::Scalar(255,178,102),  // 8
			cv::Scalar(255,153,51),   // 9
			cv::Scalar(153,153,255),  // 10
			cv::Scalar(102,102,255),  // 11
			cv::Scalar(51,51,255),    // 12
			cv::Scalar(153,255,153),  // 13
			cv::Scalar(102,255,102),  // 14
			cv::Scalar(51,255,51),    // 15
			cv::Scalar(0,255,0),      // 16
			cv::Scalar(255,0,0),      // 17
			cv::Scalar(0,0,255),      // 18
			cv::Scalar(255,255,255)   // 19
		};

		// Define per-keypoint color indices (for keypoints 0 to 16)
		static const std::vector<int> kpt_color_indices = { 16,16,16,16,16,0,0,0,0,0,0,9,9,9,9,9,9 };
		// Define per-limb color indices for each skeleton connection.
		// Make sure the number of entries here matches the number of pairs in POSE_SKELETON.
		static const std::vector<int> limb_color_indices = { 9,9,9,9,7,7,7,0,0,0,0,0,16,16,16,16,16,16,16 };

		// Loop through each detection
		for (const auto& detection : detections) {
			if (detection.confidence < confidenceThreshold)
				continue;

			// Draw bounding box (optional – remove if you prefer only pose visualization)
			const auto& box = detection.box;
			cv::rectangle(image,
				cv::Point(box.x, box.y),
				cv::Point(box.x + box.width, box.y + box.height),
				cv::Scalar(0, 255, 0), // You can change the box color if desired
				line_thickness);

			// Prepare a vector to hold keypoint positions and validity flags.
			const size_t numKpts = detection.kps.size();
			std::vector<cv::Point> kpt_points(numKpts, cv::Point(-1, -1));
			std::vector<bool> valid(numKpts, false);

			// Draw keypoints using the corresponding palette colors
			for (size_t i = 0; i < numKpts; i++) {
				int x = std::round(detection.polygon[i].x);
				int y = std::round(detection.polygon[i].y);
				kpt_points[i] = cv::Point(x, y);
				valid[i] = true;
				int color_index = (i < kpt_color_indices.size()) ? kpt_color_indices[i] : 0;
				cv::circle(image, cv::Point(x, y), kpt_radius, pose_palette[color_index], -1, cv::LINE_AA);
			}

			// Draw skeleton connections based on a predefined POSE_SKELETON (vector of pairs)
			// Make sure that POSE_SKELETON is defined with 0-indexed keypoint indices.
			for (size_t j = 0; j < POSE_SKELETON.size(); j++) {
				auto [src, dst] = POSE_SKELETON[j];
				if (src < numKpts && dst < numKpts && valid[src] && valid[dst]) {
					// Use the corresponding limb color from the palette
					int limb_color_index = (j < limb_color_indices.size()) ? limb_color_indices[j] : 0;
					cv::line(image, kpt_points[src], kpt_points[dst],
						pose_palette[limb_color_index],
						line_thickness, cv::LINE_AA);
				}
			}

			// (Optional) Add text labels such as confidence scores here if desired.
		}
	}

	bool ANSONNXPOSE::Init(const std::string& modelPath, bool useGPU, int deviceId)
	{
		std::lock_guard<std::recursive_mutex> lock(_mutex);
		try {
			deviceId_ = deviceId;

			const auto& ep = ANSCENTER::EPLoader::Current();
			if (Ort::Global<void>::api_ == nullptr)
				Ort::InitApi(static_cast<const OrtApi*>(EPLoader::GetOrtApiRaw()));
			std::cout << "[ANSONNXPOSE] EP ready: "
				<< ANSCENTER::EPLoader::EngineTypeName(ep.type) << std::endl;

			// Unique environment name per instance to avoid conflicts
			std::string envName = "ONNX_POSE_INST" + std::to_string(instanceId_);
			env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, envName.c_str());

			sessionOptions = Ort::SessionOptions();
			sessionOptions.SetIntraOpNumThreads(
				std::min(6, static_cast<int>(std::thread::hardware_concurrency())));
			sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

			// ── Log available providers ─────────────────────────────────────────
			std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
			std::cout << "[Instance " << instanceId_ << "] Available Execution Providers:" << std::endl;
			for (const auto& p : availableProviders)
				std::cout << " - " << p << std::endl;

			// ── Attach EP based on runtime-detected hardware ────────────────────
			if (useGPU) {
				bool attached = false;

				switch (ep.type) {

				case ANSCENTER::EngineType::NVIDIA_GPU: {
					auto it = std::find(availableProviders.begin(),
						availableProviders.end(), "CUDAExecutionProvider");
					if (it == availableProviders.end()) {
						this->_logger.LogError("ANSONNXPOSE::Init", "CUDAExecutionProvider not in DLL — "
							"check ep/cuda/ has the CUDA ORT build.", __FILE__, __LINE__);
						break;
					}
					try {
						OrtCUDAProviderOptionsV2* cuda_options = nullptr;
						Ort::GetApi().CreateCUDAProviderOptions(&cuda_options);

						std::string deviceIdStr = std::to_string(deviceId_);
						const char* keys[] = { "device_id" };
						const char* values[] = { deviceIdStr.c_str() };
						Ort::GetApi().UpdateCUDAProviderOptions(cuda_options, keys, values, 1);

						sessionOptions.AppendExecutionProvider_CUDA_V2(*cuda_options);
						Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options);

						std::cout << "[Instance " << instanceId_ << "] CUDA EP attached on device "
							<< deviceId_ << "." << std::endl;
						attached = true;
					}
					catch (const Ort::Exception& e) {
						this->_logger.LogError("ANSONNXPOSE::Init", e.what(), __FILE__, __LINE__);
					}
					break;
				}

				case ANSCENTER::EngineType::AMD_GPU: {
					auto it = std::find(availableProviders.begin(),
						availableProviders.end(), "DmlExecutionProvider");
					if (it == availableProviders.end()) {
						this->_logger.LogError("ANSONNXPOSE::Init", "DmlExecutionProvider not in DLL — "
							"check ep/directml/ has the DirectML ORT build.", __FILE__, __LINE__);
						break;
					}
					try {
						std::unordered_map<std::string, std::string> opts = {
							{ "device_id", std::to_string(deviceId_) }
						};
						sessionOptions.AppendExecutionProvider("DML", opts);
						std::cout << "[Instance " << instanceId_ << "] DirectML EP attached on device "
							<< deviceId_ << "." << std::endl;
						attached = true;
					}
					catch (const Ort::Exception& e) {
						this->_logger.LogError("ANSONNXPOSE::Init", e.what(), __FILE__, __LINE__);
					}
					break;
				}

				case ANSCENTER::EngineType::OPENVINO_GPU: {
					auto it = std::find(availableProviders.begin(),
						availableProviders.end(), "OpenVINOExecutionProvider");
					if (it == availableProviders.end()) {
						this->_logger.LogError("ANSONNXPOSE::Init", "OpenVINOExecutionProvider not in DLL — "
							"check ep/openvino/ has the OpenVINO ORT build.", __FILE__, __LINE__);
						break;
					}

					// FP32 + single thread preserved for determinism; each instance gets its own stream and cache
					const std::string precision = "FP32";
					const std::string numberOfThreads = "1";
					const std::string numberOfStreams = std::to_string(instanceId_ + 1);
					const std::string primaryDevice = "GPU." + std::to_string(deviceId_);
					const std::string cacheDir = "./ov_cache_inst" + std::to_string(instanceId_);

					std::vector<std::unordered_map<std::string, std::string>> try_configs = {
						{ {"device_type", primaryDevice},   {"precision",precision},
						  {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
						  {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"},
						  {"cache_dir", cacheDir} },
						{ {"device_type","GPU"},             {"precision",precision},
						  {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
						  {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"},
						  {"cache_dir", cacheDir} },
						{ {"device_type","AUTO:GPU,CPU"},    {"precision",precision},
						  {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
						  {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"},
						  {"cache_dir", cacheDir} }
					};

					for (const auto& config : try_configs) {
						try {
							sessionOptions.AppendExecutionProvider_OpenVINO_V2(config);
							std::cout << "[Instance " << instanceId_ << "] OpenVINO EP attached ("
								<< config.at("device_type") << ", stream: " << numberOfStreams << ")." << std::endl;
							attached = true;
							break;
						}
						catch (const Ort::Exception& e) {
							this->_logger.LogError("ANSONNXPOSE::Init", e.what(), __FILE__, __LINE__);
						}
					}

					if (!attached)
						std::cerr << "[Instance " << instanceId_ << "] OpenVINO EP: all device configs failed." << std::endl;
					break;
				}

				default:
					break;
				}

				if (!attached) {
					std::cerr << "[Instance " << instanceId_ << "] No GPU EP attached — running on CPU." << std::endl;
					this->_logger.LogFatal("ANSONNXPOSE::Init", "GPU EP not attached. Running on CPU.", __FILE__, __LINE__);
				}
			}
			else {
				std::cout << "[Instance " << instanceId_ << "] Inference device: CPU (useGPU=false)" << std::endl;
			}

			// ── Load model ──────────────────────────────────────────────────────
#ifdef _WIN32
			std::wstring w_modelPath = std::wstring(modelPath.begin(), modelPath.end());
			session = Ort::Session(env, w_modelPath.c_str(), sessionOptions);
#else
			session = Ort::Session(env, modelPath.c_str(), sessionOptions);
#endif

			Ort::AllocatorWithDefaultOptions allocator;

			numInputNodes = session.GetInputCount();
			numOutputNodes = session.GetOutputCount();

			if (numInputNodes == 0)  throw std::runtime_error("Model has no input nodes.");
			if (numOutputNodes == 0) throw std::runtime_error("Model has no output nodes.");

			// ── Input shape ─────────────────────────────────────────────────────
			Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0);
			std::vector<int64_t> inputTensorShapeVec =
				inputTypeInfo.GetTensorTypeAndShapeInfo().GetShape();

			if (inputTensorShapeVec.size() < 4)
				throw std::runtime_error("Invalid input tensor shape - expected 4 dimensions (NCHW).");

			isDynamicInputShape = (inputTensorShapeVec[2] == -1 || inputTensorShapeVec[3] == -1);

			std::cout << "[Instance " << instanceId_ << "] Model input shape: ["
				<< inputTensorShapeVec[0] << ", " << inputTensorShapeVec[1] << ", "
				<< inputTensorShapeVec[2] << ", " << inputTensorShapeVec[3] << "]"
				<< (isDynamicInputShape ? " (dynamic)" : " (fixed)") << std::endl;

			if (!isDynamicInputShape) {
				inputImageShape = cv::Size(
					static_cast<int>(inputTensorShapeVec[3]),
					static_cast<int>(inputTensorShapeVec[2]));
			}
			else {
				inputImageShape = cv::Size(_modelConfig.inpWidth, _modelConfig.inpHeight);
				std::cout << "[Instance " << instanceId_ << "] Using default input shape: "
					<< inputImageShape.width << "x" << inputImageShape.height << std::endl;
			}

			// ── Node names ──────────────────────────────────────────────────────
			auto input_name = session.GetInputNameAllocated(0, allocator);
			inputNodeNameAllocatedStrings.push_back(std::move(input_name));
			inputNames.push_back(inputNodeNameAllocatedStrings.back().get());

			for (size_t i = 0; i < numOutputNodes; ++i) {
				auto output_name = session.GetOutputNameAllocated(i, allocator);
				outputNodeNameAllocatedStrings.push_back(std::move(output_name));
				outputNames.push_back(outputNodeNameAllocatedStrings.back().get());
			}

			std::cout << "[Instance " << instanceId_ << "] Model loaded successfully:" << std::endl;
			std::cout << "  - Input nodes:  " << numInputNodes << std::endl;
			std::cout << "  - Output nodes: " << numOutputNodes << std::endl;
			std::cout << "  - Input shape:  " << inputImageShape.width << "x" << inputImageShape.height << std::endl;

			// ── Warmup ──────────────────────────────────────────────────────────
			DEBUG_PRINT("[Instance " << instanceId_ << "] Starting warmup...");
			warmupModel();
			DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup completed successfully.");

			return true;
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSONNXPOSE::Init",
				std::string("[Instance ") + std::to_string(instanceId_) + "] " + e.what(),
				__FILE__, __LINE__);
			return false;
		}
	}
	void ANSONNXPOSE::warmupModel() {
		try {
			// Create dummy input image with correct size
			cv::Mat dummyImage = cv::Mat::zeros(inputImageShape.height, inputImageShape.width, CV_8UC3);

			DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup: dummy image "
				<< dummyImage.cols << "x" << dummyImage.rows);

			// Run 3 warmup inferences to stabilize
			for (int i = 0; i < 3; ++i) {
				try {
					// Your preprocessing logic here
					float* blob = nullptr;
					std::vector<int64_t> inputShape;

					// If you have a preprocess method, call it
					// Otherwise, create a simple dummy tensor
					size_t tensorSize = 1 * 3 * inputImageShape.height * inputImageShape.width;
					blob = new float[tensorSize];
					std::memset(blob, 0, tensorSize * sizeof(float));

					inputShape = { 1, 3, inputImageShape.height, inputImageShape.width };

					// Create input tensor
					Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
					Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
						memoryInfo,
						blob,
						tensorSize,
						inputShape.data(),
						inputShape.size()
					);

					// Run inference
					std::vector<Ort::Value> outputTensors = session.Run(
						Ort::RunOptions{ nullptr },
						inputNames.data(),
						&inputTensor,
						1,
						outputNames.data(),
						numOutputNodes
					);

					// Clean up
					delete[] blob;

					DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup " << (i + 1) << "/3 completed");
				}
				catch (const std::exception& e) {
					DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup iteration " << i
						<< " failed (non-critical): " << e.what());
				}
			}

			DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup successful - all states initialized");
		}
		catch (const std::exception& e) {
			this->_logger.LogWarn("ANSONNXPOSE::warmupModel",
				std::string("[Instance ") + std::to_string(instanceId_) + "] Warmup failed: " + e.what(),
				__FILE__, __LINE__);
		}
	}
	cv::Mat ANSONNXPOSE::preprocess(const cv::Mat& image, float*& blob, std::vector<int64_t>& inputTensorShape) {
		std::lock_guard<std::recursive_mutex> lock(_mutex);

		try {
			// CRITICAL: Validate input image
			if (image.empty()) {
				this->_logger.LogError("ANSONNXPOSE::preprocess", "Input image is empty", __FILE__, __LINE__);
				return cv::Mat();
			}

			if (image.data == nullptr) {
				this->_logger.LogError("ANSONNXPOSE::preprocess", "Input image data pointer is null", __FILE__, __LINE__);
				return cv::Mat();
			}

			if (image.cols <= 0 || image.rows <= 0) {
				this->_logger.LogError("ANSONNXPOSE::preprocess",
					"Invalid image dimensions: " + std::to_string(image.cols) + "x" + std::to_string(image.rows),
					__FILE__, __LINE__);
				return cv::Mat();
			}

			// CRITICAL: Check for NaN/Inf in input
			double minVal, maxVal;
			cv::minMaxLoc(image, &minVal, &maxVal);
			if (std::isnan(minVal) || std::isnan(maxVal) || std::isinf(minVal) || std::isinf(maxVal)) {
				this->_logger.LogError("ANSONNXPOSE::preprocess",
					"Input image contains NaN or Inf values. Min: " + std::to_string(minVal) +
					", Max: " + std::to_string(maxVal), __FILE__, __LINE__);
				return cv::Mat();
			}

			DEBUG_PRINT("[Instance " << instanceId_ << "] Input image: " << image.cols << "x" << image.rows
				<< ", channels: " << image.channels()
				<< ", type: " << image.type()
				<< ", pixel range: [" << minVal << ", " << maxVal << "]");

			// CRITICAL: Clean up existing blob first to prevent memory leak
			if (blob != nullptr) {
				delete[] blob;
				blob = nullptr;
			}

			cv::Mat resizedImage;

			// Resize and pad the image using letterBox utility
			try {
				letterBox(image, resizedImage, inputImageShape, cv::Scalar(114, 114, 114),
					isDynamicInputShape, false, true, 32);
			}
			catch (const std::exception& e) {
				this->_logger.LogError("ANSONNXPOSE::preprocess",
					"letterBox failed: " + std::string(e.what()), __FILE__, __LINE__);
				return cv::Mat();
			}

			// CRITICAL: Validate resized image
			if (resizedImage.empty() || resizedImage.data == nullptr) {
				this->_logger.LogError("ANSONNXPOSE::preprocess",
					"Resized image is empty after letterBox", __FILE__, __LINE__);
				return cv::Mat();
			}

			if (resizedImage.cols <= 0 || resizedImage.rows <= 0) {
				this->_logger.LogError("ANSONNXPOSE::preprocess",
					"Invalid resized dimensions: " + std::to_string(resizedImage.cols) +
					"x" + std::to_string(resizedImage.rows), __FILE__, __LINE__);
				return cv::Mat();
			}

			if (resizedImage.channels() != 3) {
				this->_logger.LogError("ANSONNXPOSE::preprocess",
					"Expected 3 channels but got: " + std::to_string(resizedImage.channels()),
					__FILE__, __LINE__);
				return cv::Mat();
			}

			// CRITICAL: Validate input tensor shape size
			if (inputTensorShape.size() < 4) {
				this->_logger.LogError("ANSONNXPOSE::preprocess",
					"Input tensor shape has insufficient dimensions: " + std::to_string(inputTensorShape.size()),
					__FILE__, __LINE__);
				return cv::Mat();
			}

			// Update input tensor shape based on resized image dimensions
			inputTensorShape[0] = 1;  // Batch size
			inputTensorShape[1] = 3;  // Channels
			inputTensorShape[2] = resizedImage.rows;
			inputTensorShape[3] = resizedImage.cols;

			DEBUG_PRINT("[Instance " << instanceId_ << "] Resized to: " << resizedImage.cols << "x" << resizedImage.rows);

			// Convert image to float and normalize to [0, 1]
			cv::Mat floatImage;
			try {
				resizedImage.convertTo(floatImage, CV_32FC3, 1.0 / 255.0);
			}
			catch (const std::exception& e) {
				this->_logger.LogError("ANSONNXPOSE::preprocess",
					"convertTo failed: " + std::string(e.what()), __FILE__, __LINE__);
				return cv::Mat();
			}

			// CRITICAL: Validate after conversion
			if (floatImage.empty() || floatImage.data == nullptr) {
				this->_logger.LogError("ANSONNXPOSE::preprocess",
					"Float image is empty after conversion", __FILE__, __LINE__);
				return cv::Mat();
			}

			// CRITICAL: Check for NaN/Inf after float conversion
			cv::minMaxLoc(floatImage, &minVal, &maxVal);
			if (std::isnan(minVal) || std::isnan(maxVal) || std::isinf(minVal) || std::isinf(maxVal)) {
				this->_logger.LogError("ANSONNXPOSE::preprocess",
					"Float image contains NaN or Inf after conversion. Min: " + std::to_string(minVal) +
					", Max: " + std::to_string(maxVal), __FILE__, __LINE__);
				return cv::Mat();
			}

			// Calculate blob size and allocate memory
			size_t blobSize = static_cast<size_t>(floatImage.cols) *
				static_cast<size_t>(floatImage.rows) *
				static_cast<size_t>(floatImage.channels());

			if (blobSize == 0) {
				this->_logger.LogError("ANSONNXPOSE::preprocess",
					"Calculated blob size is zero", __FILE__, __LINE__);
				return cv::Mat();
			}

			// CRITICAL: Allocate and zero-initialize blob memory
			blob = new float[blobSize];
			std::memset(blob, 0, blobSize * sizeof(float));

			// Split the image into separate channels and store in CHW format
			std::vector<cv::Mat> chw(floatImage.channels());
			size_t channelSize = static_cast<size_t>(floatImage.cols) * static_cast<size_t>(floatImage.rows);

			for (int i = 0; i < floatImage.channels(); ++i) {
				chw[i] = cv::Mat(floatImage.rows, floatImage.cols, CV_32FC1,
					blob + i * channelSize);
			}

			try {
				cv::split(floatImage, chw);
			}
			catch (const std::exception& e) {
				this->_logger.LogError("ANSONNXPOSE::preprocess",
					"cv::split failed: " + std::string(e.what()), __FILE__, __LINE__);
				delete[] blob;
				blob = nullptr;
				return cv::Mat();
			}

			// CRITICAL: Final validation of blob data
			bool hasNaN = false;
			float blobSum = 0.0f;
			float blobMin = std::numeric_limits<float>::max();
			float blobMax = std::numeric_limits<float>::lowest();

			// Check first 1000 values for NaN/Inf (sampling for performance)
			size_t checkSize = std::min(blobSize, size_t(1000));
			for (size_t i = 0; i < checkSize; ++i) {
				if (std::isnan(blob[i]) || std::isinf(blob[i])) {
					this->_logger.LogError("ANSONNXPOSE::preprocess",
						"NaN/Inf found in blob at index " + std::to_string(i) +
						", value: " + std::to_string(blob[i]), __FILE__, __LINE__);
					hasNaN = true;
					break;
				}
				blobSum += blob[i];
				blobMin = std::min(blobMin, blob[i]);
				blobMax = std::max(blobMax, blob[i]);
			}

			if (hasNaN) {
				delete[] blob;
				blob = nullptr;
				return cv::Mat();
			}

			DEBUG_PRINT("[Instance " << instanceId_ << "] Preprocessing completed. "
				<< "Tensor shape: " << inputTensorShape[0] << "x" << inputTensorShape[1] << "x"
				<< inputTensorShape[2] << "x" << inputTensorShape[3]
				<< " | Blob stats (sampled) - Min: " << blobMin << ", Max: " << blobMax
				<< ", Sum: " << blobSum << ", Avg: " << (blobSum / checkSize));

			return floatImage;  // Return the preprocessed image for potential use
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSONNXPOSE::preprocess",
				std::string("[Instance ") + std::to_string(instanceId_) + "] " + e.what(),
				__FILE__, __LINE__);

			// Clean up blob on error
			if (blob != nullptr) {
				delete[] blob;
				blob = nullptr;
			}

			return cv::Mat();
		}
	}

	std::vector<Object> ANSONNXPOSE::postprocess(
		const cv::Size& originalImageSize,
		const cv::Size& resizedImageShape,
		const std::vector<Ort::Value>& outputTensors,
		const std::string& camera_id)
	{
		std::lock_guard<std::recursive_mutex> lock(_mutex);

		try {
			std::vector<Object> detections;

			// CRITICAL: Validate output tensors
			if (outputTensors.empty()) {
				this->_logger.LogError("ANSONNXPOSE::postprocess", "No output tensors", __FILE__, __LINE__);
				return {};
			}

			const float* rawOutput = outputTensors[0].GetTensorData<float>();
			if (!rawOutput) {
				this->_logger.LogError("ANSONNXPOSE::postprocess", "rawOutput pointer is null", __FILE__, __LINE__);
				return {};
			}

			const std::vector<int64_t> outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();

			// CRITICAL: Validate output shape
			if (outputShape.size() < 3) {
				this->_logger.LogError("ANSONNXPOSE::postprocess",
					"Invalid output shape dimensions: " + std::to_string(outputShape.size()),
					__FILE__, __LINE__);
				return {};
			}
			const int featuresPerKeypoint = 3;  // x, y, confidence

			const size_t numFeatures = static_cast<size_t>(outputShape[1]);
			const size_t numDetections = static_cast<size_t>(outputShape[2]);
			// With this:
			const int derivedKps = static_cast<int>((numFeatures - 5) / featuresPerKeypoint);
			const int numKeypoints = (derivedKps > 0 && derivedKps <= 133)
				? derivedKps
				: _modelConfig.numKPS;

			DEBUG_PRINT("[Instance " << instanceId_ << "] Keypoints: " << numKeypoints
				<< " (derived=" << derivedKps << ", config=" << _modelConfig.numKPS << ")");


			const size_t expectedFeatures = 4 + 1 + numKeypoints * featuresPerKeypoint;  // box(4) + conf(1) + kpts(17*3)

			DEBUG_PRINT("[Instance " << instanceId_ << "] Output shape: ["
				<< outputShape[0] << ", " << outputShape[1] << ", " << outputShape[2] << "]");
			DEBUG_PRINT("[Instance " << instanceId_ << "] Detections: " << numDetections
				<< ", Features: " << numFeatures << " (expected: " << expectedFeatures << ")");

			if (numFeatures != expectedFeatures) {
				this->_logger.LogError("ANSONNXPOSE::postprocess",
					"Invalid output shape for pose estimation. Expected " + std::to_string(expectedFeatures) +
					" features, got " + std::to_string(numFeatures),
					__FILE__, __LINE__);
				return {};
			}

			if (numDetections == 0) {
				DEBUG_PRINT("[Instance " << instanceId_ << "] No detections in output");
				return {};
			}

			// CRITICAL: Validate image sizes
			if (originalImageSize.width <= 0 || originalImageSize.height <= 0) {
				this->_logger.LogError("ANSONNXPOSE::postprocess",
					"Invalid original image size: " + std::to_string(originalImageSize.width) +
					"x" + std::to_string(originalImageSize.height),
					__FILE__, __LINE__);
				return {};
			}

			if (resizedImageShape.width <= 0 || resizedImageShape.height <= 0) {
				this->_logger.LogError("ANSONNXPOSE::postprocess",
					"Invalid resized image size: " + std::to_string(resizedImageShape.width) +
					"x" + std::to_string(resizedImageShape.height),
					__FILE__, __LINE__);
				return {};
			}

			// CRITICAL: Check for NaN/Inf in raw output (conditional based on validation level)
			size_t totalSize = numFeatures * numDetections;
			size_t checkSize = std::min(totalSize, size_t(1000));
			for (size_t i = 0; i < checkSize; ++i) {
				if (std::isnan(rawOutput[i]) || std::isinf(rawOutput[i])) {
					this->_logger.LogError("ANSONNXPOSE::postprocess",
						"NaN/Inf detected in model output at index " + std::to_string(i) +
						", value: " + std::to_string(rawOutput[i]),
						__FILE__, __LINE__);
					return {};
				}
			}

			// Calculate letterbox padding parameters
			const float scaleX = static_cast<float>(resizedImageShape.width) / static_cast<float>(originalImageSize.width);
			const float scaleY = static_cast<float>(resizedImageShape.height) / static_cast<float>(originalImageSize.height);
			const float scale = std::min(scaleX, scaleY);

			if (scale <= 0.0f || std::isnan(scale) || std::isinf(scale)) {
				this->_logger.LogError("ANSONNXPOSE::postprocess",
					"Invalid scale factor: " + std::to_string(scale),
					__FILE__, __LINE__);
				return {};
			}

			const cv::Size scaledSize(
				static_cast<int>(originalImageSize.width * scale),
				static_cast<int>(originalImageSize.height * scale)
			);

			const cv::Point2f padding(
				(resizedImageShape.width - scaledSize.width) / 2.0f,
				(resizedImageShape.height - scaledSize.height) / 2.0f
			);

			DEBUG_PRINT("[Instance " << instanceId_ << "] Scale: " << scale
				<< ", Padding: (" << padding.x << ", " << padding.y << ")");

			// Process each detection
			std::vector<cv::Rect> boxes;  // Use cv::Rect for NMSBoxes compatibility
			std::vector<float> confidences;
			std::vector<std::vector<cv::Point2f>> allKeypoints;

			size_t validDetections = 0;

			for (size_t d = 0; d < numDetections; ++d) {
				// Get object confidence
				const float objConfidence = rawOutput[4 * numDetections + d];

				// CRITICAL: Validate confidence value
				if (std::isnan(objConfidence) || std::isinf(objConfidence)) {
					continue;
				}

				if (objConfidence < _modelConfig.detectionScoreThreshold) {
					continue;
				}

				// Decode bounding box
				const float cx = rawOutput[0 * numDetections + d];
				const float cy = rawOutput[1 * numDetections + d];
				const float w = rawOutput[2 * numDetections + d];
				const float h = rawOutput[3 * numDetections + d];

				// CRITICAL: Validate bounding box values
				if (std::isnan(cx) || std::isnan(cy) || std::isnan(w) || std::isnan(h) ||
					std::isinf(cx) || std::isinf(cy) || std::isinf(w) || std::isinf(h)) {
					continue;
				}

				if (w <= 0 || h <= 0) {
					continue;  // Invalid box dimensions
				}

				// Convert to original image coordinates
				float x1 = (cx - padding.x - w / 2.0f) / scale;
				float y1 = (cy - padding.y - h / 2.0f) / scale;
				float box_w = w / scale;
				float box_h = h / scale;

				// Clip to image boundaries
				x1 = std::max(0.0f, std::min(x1, static_cast<float>(originalImageSize.width - 1)));
				y1 = std::max(0.0f, std::min(y1, static_cast<float>(originalImageSize.height - 1)));
				box_w = std::max(1.0f, std::min(box_w, static_cast<float>(originalImageSize.width) - x1));
				box_h = std::max(1.0f, std::min(box_h, static_cast<float>(originalImageSize.height) - y1));

				cv::Rect box(
					static_cast<int>(x1),
					static_cast<int>(y1),
					static_cast<int>(box_w),
					static_cast<int>(box_h)
				);

				// Extract keypoints
				std::vector<cv::Point2f> keypoints;
				keypoints.reserve(numKeypoints);
				bool validKeypoints = true;

				for (int k = 0; k < numKeypoints; ++k) {
					const size_t offset = 5 + k * featuresPerKeypoint;

					// CRITICAL: Bounds check
					if ((offset + 2) * numDetections + d >= numFeatures * numDetections) {
						this->_logger.LogError("ANSONNXPOSE::postprocess",
							"Keypoint index out of bounds", __FILE__, __LINE__);
						validKeypoints = false;
						break;
					}

					const float kpt_x = rawOutput[offset * numDetections + d];
					const float kpt_y = rawOutput[(offset + 1) * numDetections + d];
					const float kpt_conf_raw = rawOutput[(offset + 2) * numDetections + d];

					// CRITICAL: Validate keypoint values
					if (std::isnan(kpt_x) || std::isnan(kpt_y) || std::isnan(kpt_conf_raw) ||
						std::isinf(kpt_x) || std::isinf(kpt_y) || std::isinf(kpt_conf_raw)) {
						this->_logger.LogWarn("ANSONNXPOSE::postprocess",
							"NaN/Inf in keypoint " + std::to_string(k) +
							" of detection " + std::to_string(d),
							__FILE__, __LINE__);
						validKeypoints = false;
						break;
					}

					cv::Point2f kpt;
					kpt.x = (kpt_x - padding.x) / scale;
					kpt.y = (kpt_y - padding.y) / scale;

					// Sigmoid activation for confidence
					//kpt.confidence = 1.0f / (1.0f + std::exp(-kpt_conf_raw));

					// CRITICAL: Validate sigmoid result
					//if (std::isnan(kpt.confidence) || std::isinf(kpt.confidence)) {
					//	kpt.confidence = 0.0f;  // Fallback for extreme values
					//}

					// Clip keypoints to image boundaries
					kpt.x = std::max(0.0f, std::min(kpt.x, static_cast<float>(originalImageSize.width - 1)));
					kpt.y = std::max(0.0f, std::min(kpt.y, static_cast<float>(originalImageSize.height - 1)));

					keypoints.push_back(kpt);
				}

				if (!validKeypoints) {
					continue;  // Skip this detection if keypoints are invalid
				}

				// Store detection components
				boxes.push_back(box);
				confidences.push_back(objConfidence);
				allKeypoints.push_back(std::move(keypoints));
				validDetections++;
			}

			DEBUG_PRINT("[Instance " << instanceId_ << "] Valid detections before NMS: " << validDetections);

			if (boxes.empty()) {
				DEBUG_PRINT("[Instance " << instanceId_ << "] No valid detections after filtering");
				return {};
			}

			// Apply Non-Maximum Suppression
			std::vector<int> indices;
			try {
				cv::dnn::NMSBoxes(
					boxes,
					confidences,
					_modelConfig.modelConfThreshold,
					_modelConfig.modelMNSThreshold,
					indices
				);
			}
			catch (const cv::Exception& e) {
				this->_logger.LogError("ANSONNXPOSE::postprocess",
					"NMS failed: " + std::string(e.what()), __FILE__, __LINE__);
				return {};
			}

			DEBUG_PRINT("[Instance " << instanceId_ << "] Detections after NMS: " << indices.size());

			// Create final detections
			detections.reserve(indices.size());

			for (int idx : indices) {
				// Validate index
				if (idx < 0 || idx >= static_cast<int>(boxes.size())) {
					this->_logger.LogWarn("ANSONNXPOSE::postprocess",
						"Invalid NMS index: " + std::to_string(idx), __FILE__, __LINE__);
					continue;
				}
				std::stringstream keypointXss;
				std::stringstream keypointYss;
				std::vector<float> keypointValues;
				for (size_t keypointIdx = 0; keypointIdx < allKeypoints[idx].size(); keypointIdx++) {
					keypointXss << allKeypoints[idx][keypointIdx].x;
					keypointYss << allKeypoints[idx][keypointIdx].y;

					// Add semicolon after each value except the last one
					if (keypointIdx < allKeypoints[idx].size() - 1) {
						keypointXss << ";";
						keypointYss << ";";
					}
					keypointValues.push_back(allKeypoints[idx][keypointIdx].x);
					keypointValues.push_back(allKeypoints[idx][keypointIdx].y);
				}
				std::string keypointXString = keypointXss.str();
				std::string keypointYString = keypointYss.str();
				std::string keypointString = keypointXString + "|" + keypointYString;

				Object det;
				det.box = boxes[idx];
				det.confidence = confidences[idx];
				det.classId = 0;
				det.className = "Person";
				det.cameraId = camera_id;
				det.polygon = allKeypoints[idx];
				det.kps = keypointValues;
				det.extraInfo = keypointString;//Convert keypoint to st;
				detections.push_back(det);
			}

			DEBUG_PRINT("[Instance " << instanceId_ << "] Final detections: " << detections.size());

			return detections;
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSONNXPOSE::postprocess",
				std::string("[Instance ") + std::to_string(instanceId_) + "] " + e.what(),
				__FILE__, __LINE__);
			return {};
		}
	}


	std::vector<Object> ANSONNXPOSE::detect(const cv::Mat& image, const std::string& camera_id) {
		std::lock_guard<std::recursive_mutex> lock(_mutex);

		float* blobPtr = nullptr;

		try {
			// CRITICAL: Validate input image
			if (image.empty()) {
				this->_logger.LogError("ANSONNXPOSE::detect", "Input image is empty", __FILE__, __LINE__);
				return {};
			}

			if (image.data == nullptr) {
				this->_logger.LogError("ANSONNXPOSE::detect", "Input image data pointer is null", __FILE__, __LINE__);
				return {};
			}

			if (image.cols <= 0 || image.rows <= 0) {
				this->_logger.LogError("ANSONNXPOSE::detect",
					"Invalid image dimensions: " + std::to_string(image.cols) + "x" + std::to_string(image.rows),
					__FILE__, __LINE__);
				return {};
			}

			DEBUG_PRINT("[Instance " << instanceId_ << "] Detecting pose in "
				<< image.cols << "x" << image.rows << " image");

			// Define the shape of the input tensor (batch size, channels, height, width)
			std::vector<int64_t> inputTensorShape = { 1, 3, inputImageShape.height, inputImageShape.width };

			// Preprocess the image and obtain a pointer to the blob
			cv::Mat preprocessedImage = preprocess(image, blobPtr, inputTensorShape);

			// CRITICAL: Validate preprocessing result
			if (preprocessedImage.empty() || blobPtr == nullptr) {
				this->_logger.LogError("ANSONNXPOSE::detect", "Preprocessing failed", __FILE__, __LINE__);
				if (blobPtr) {
					delete[] blobPtr;
					blobPtr = nullptr;
				}
				return {};
			}

			// Validate tensor shape was properly updated
			if (inputTensorShape.size() != 4) {
				this->_logger.LogError("ANSONNXPOSE::detect",
					"Invalid input tensor shape dimensions: " + std::to_string(inputTensorShape.size()),
					__FILE__, __LINE__);
				delete[] blobPtr;
				return {};
			}

			// Compute the total number of elements in the input tensor
			size_t inputTensorSize = 1;
			for (auto dim : inputTensorShape) {
				if (dim <= 0) {
					this->_logger.LogError("ANSONNXPOSE::detect",
						"Invalid dimension in tensor shape: " + std::to_string(dim),
						__FILE__, __LINE__);
					delete[] blobPtr;
					return {};
				}
				inputTensorSize *= static_cast<size_t>(dim);
			}

			if (inputTensorSize == 0) {
				this->_logger.LogError("ANSONNXPOSE::detect", "Input tensor size is zero", __FILE__, __LINE__);
				delete[] blobPtr;
				return {};
			}

			DEBUG_PRINT("[Instance " << instanceId_ << "] Tensor shape: ["
				<< inputTensorShape[0] << ", " << inputTensorShape[1] << ", "
				<< inputTensorShape[2] << ", " << inputTensorShape[3] << "], Size: " << inputTensorSize);

			// CRITICAL: Create memory info (use static to avoid recreation overhead)
			static Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);

			// CRITICAL: Create input tensor directly from blob pointer
			// DO NOT copy to vector - this wastes memory and time
			// The blob must remain valid during inference!
			Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
				memoryInfo,
				blobPtr,  // Use blob pointer directly
				inputTensorSize,
				inputTensorShape.data(),
				inputTensorShape.size()
			);

			// CRITICAL: Validate tensor was created successfully
			if (!inputTensor.IsTensor()) {
				this->_logger.LogError("ANSONNXPOSE::detect", "Failed to create input tensor", __FILE__, __LINE__);
				delete[] blobPtr;
				return {};
			}

			// Run the inference session with the input tensor
			std::vector<Ort::Value> outputTensors;
			try {
				outputTensors = session.Run(
					Ort::RunOptions{ nullptr },
					inputNames.data(),
					&inputTensor,
					numInputNodes,
					outputNames.data(),
					numOutputNodes
				);
			}
			catch (const Ort::Exception& e) {
				this->_logger.LogError("ANSONNXPOSE::detect",
					"ONNX Runtime exception during inference: " + std::string(e.what()),
					__FILE__, __LINE__);
				delete[] blobPtr;
				return {};
			}
			catch (const std::exception& e) {
				this->_logger.LogError("ANSONNXPOSE::detect",
					"Exception during inference: " + std::string(e.what()),
					__FILE__, __LINE__);
				delete[] blobPtr;
				return {};
			}

			// CRITICAL: NOW it's safe to delete blob (after inference completes)
			delete[] blobPtr;
			blobPtr = nullptr;

			// Validate output tensors
			if (outputTensors.empty()) {
				this->_logger.LogError("ANSONNXPOSE::detect", "No output tensors from inference", __FILE__, __LINE__);
				return {};
			}

			DEBUG_PRINT("[Instance " << instanceId_ << "] Inference completed, processing outputs");

			// Determine the resized image shape based on input tensor shape
			cv::Size resizedImageShape(
				static_cast<int>(inputTensorShape[3]),
				static_cast<int>(inputTensorShape[2])
			);

			// Postprocess the output tensors to obtain detections
			std::vector<Object> detections;
			try {
				detections = postprocess(image.size(), resizedImageShape, outputTensors, camera_id);
			}
			catch (const std::exception& e) {
				this->_logger.LogError("ANSONNXPOSE::detect",
					"Exception during postprocessing: " + std::string(e.what()),
					__FILE__, __LINE__);
				return {};
			}

			DEBUG_PRINT("[Instance " << instanceId_ << "] Detection completed, found "
				<< detections.size() << " pose(s)");

			return detections;
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSONNXPOSE::detect",
				std::string("[Instance ") + std::to_string(instanceId_) + "] " + e.what(),
				__FILE__, __LINE__);

			// Clean up blob if still allocated
			if (blobPtr != nullptr) {
				delete[] blobPtr;
				blobPtr = nullptr;
			}
			return {};
		}
	}


	// Public functions
	ANSONNXPOSE::~ANSONNXPOSE() {
		Destroy();
	}
	bool ANSONNXPOSE::Destroy() {
		std::cout << "[ANSONNXPOSE] Destroyed instance " << instanceId_ << std::endl;
		return true;
	}
	bool ANSONNXPOSE::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
		if (!ANSODBase::OptimizeModel(fp16, optimizedModelFolder)) {
			return false;
		}
		return true;
	}
	bool ANSONNXPOSE::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) {
		std::lock_guard<std::recursive_mutex> lock(_mutex);
		ModelLoadingGuard mlg(_modelLoading);
		try {
			_modelLoadValid = false;
			bool result = ANSODBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
			if (!result) return false;
			// Parsing for YOLO only here
			_modelConfig = modelConfig;
			_modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION;
			_modelConfig.modelType = ModelType::ONNXPOSE;
			_modelConfig.inpHeight = 640;
			_modelConfig.inpWidth = 640;
			if (_modelConfig.modelMNSThreshold < 0.2)
				_modelConfig.modelMNSThreshold = 0.5;
			if (_modelConfig.modelConfThreshold < 0.2)
				_modelConfig.modelConfThreshold = 0.5;
			// After
			if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133)  // 133 = COCO wholebody max
				_modelConfig.numKPS = 17;
			if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define
			_fp16 = (modelConfig.precisionType == PrecisionType::FP16);

			if (FileExist(_modelConfigFile)) {
				ModelType modelType;
				std::vector<int> inputShape;
				_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
				if (inputShape.size() == 2) {
					if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
					if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
				}
			}
			else {// This is old version of model zip file
				_modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx");
				_classFilePath = CreateFilePath(_modelFolder, "classes.names");
				std::ifstream isValidFileName(_classFilePath);
				if (!isValidFileName)
				{
					this->_logger.LogDebug("ANSONNXCL::Initialize.  Load classes from string", _classFilePath, __FILE__, __LINE__);
					LoadClassesFromString();
				}
				else {
					this->_logger.LogDebug("ANSONNXCL::Initialize.  Load classes from file", _classFilePath, __FILE__, __LINE__);
					LoadClassesFromFile();
				}
			}
			// 1. Load labelMap and engine
			labelMap.clear();
			if (!_classes.empty())
				labelMap = VectorToCommaSeparatedString(_classes);

			// 2. Initialize ONNX Runtime session
			instanceId_ = instanceCounter_.fetch_add(1);  // Atomic increment
			result = Init(_modelFilePath, true,0);
			_modelLoadValid = true;
			_isInitialized = true;
			return result;
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSONNXCL::Initialize", e.what(), __FILE__, __LINE__);
			return false;
		}
	}
	bool ANSONNXPOSE::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) {
		std::lock_guard<std::recursive_mutex> lock(_mutex);
		ModelLoadingGuard mlg(_modelLoading);
		try {
			bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword);
			if (!result) return false;
			_modelConfig.detectionType = ANSCENTER::DetectionType::CLASSIFICATION;
			_modelConfig.modelType = ModelType::TENSORRT;
			_modelConfig.inpHeight = 640;
			_modelConfig.inpWidth = 640;
			if (_modelConfig.modelMNSThreshold < 0.2)
				_modelConfig.modelMNSThreshold = 0.5;
			if (_modelConfig.modelConfThreshold < 0.2)
				_modelConfig.modelConfThreshold = 0.5;
			// After
			if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133)  // 133 = COCO wholebody max
				_modelConfig.numKPS = 17;
			if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define
			// if (_modelConfig.precisionType == PrecisionType::FP16)_fp16 = true;
			_fp16 = true; // Load Model from Here

			// 0. Check if the configuration file exist
			if (FileExist(_modelConfigFile)) {
				ModelType modelType;
				std::vector<int> inputShape;
				_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
				if (inputShape.size() == 2) {
					if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
					if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
				}
			}
			else {// This is old version of model zip file
				_modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx");
				_classFilePath = CreateFilePath(_modelFolder, "classes.names");
				std::ifstream isValidFileName(_classFilePath);
				if (!isValidFileName)
				{
					this->_logger.LogDebug("ANSONNXPOSE::Initialize.  Load classes from string", _classFilePath, __FILE__, __LINE__);
					LoadClassesFromString();
				}
				else {
					this->_logger.LogDebug("ANSONNXPOSE::Initialize.  Load classes from file", _classFilePath, __FILE__, __LINE__);
					LoadClassesFromFile();
				}
			}
			// Initialize ONNX Runtime session
			instanceId_ = instanceCounter_.fetch_add(1);  // Atomic increment
			result = Init(_modelFilePath, true, 0);
			_modelLoadValid = true;
			_isInitialized = true;
			return result;
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSONNXPOSE::LoadModel", e.what(), __FILE__, __LINE__);
			return false;
		}
	}
	bool ANSONNXPOSE::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) {
		std::lock_guard<std::recursive_mutex> lock(_mutex);
		ModelLoadingGuard mlg(_modelLoading);
		try {
			bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig, modelName, className, modelFolder, labelMap);
			if (!result) return false;
			std::string _modelName = modelName;
			if (_modelName.empty()) {
				_modelName = "train_last";
			}
			std::string modelFullName = _modelName + ".onnx";
			// Parsing for YOLO only here
			_modelConfig = modelConfig;
			_modelConfig.detectionType = ANSCENTER::DetectionType::CLASSIFICATION;
			_modelConfig.modelType = ModelType::TENSORRT;
			_modelConfig.inpHeight = 640;
			_modelConfig.inpWidth = 640;
			if (_modelConfig.modelMNSThreshold < 0.2)
				_modelConfig.modelMNSThreshold = 0.5;
			if (_modelConfig.modelConfThreshold < 0.2)
				_modelConfig.modelConfThreshold = 0.5;
			// After
			if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133)  // 133 = COCO wholebody max
				_modelConfig.numKPS = 17;
			if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define
			_fp16 = true; // Load Model from Here

			// 0. Check if the configuration file exist
			if (FileExist(_modelConfigFile)) {
				ModelType modelType;
				std::vector<int> inputShape;
				_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
				if (inputShape.size() == 2) {
					if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
					if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
				}
			}
			else {// This is old version of model zip file
				_modelFilePath = CreateFilePath(_modelFolder, modelFullName);
				_classFilePath = CreateFilePath(_modelFolder, className);
				std::ifstream isValidFileName(_classFilePath);
				if (!isValidFileName)
				{
					this->_logger.LogDebug("ANSONNXPOSE::Initialize.  Load classes from string", _classFilePath, __FILE__, __LINE__);
					LoadClassesFromString();
				}
				else {
					this->_logger.LogDebug("ANSONNXPOSE::Initialize.  Load classes from file", _classFilePath, __FILE__, __LINE__);
					LoadClassesFromFile();
				}
			}
			// 1. Load labelMap and engine
			labelMap.clear();
			if (!_classes.empty())
				labelMap = VectorToCommaSeparatedString(_classes);
			// 2. Initialize ONNX Runtime session
			instanceId_ = instanceCounter_.fetch_add(1);  // Atomic increment
			_modelLoadValid = true;
			_isInitialized = true;
			return result;
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSONNXPOSE::LoadModelFromFolder", e.what(), __FILE__, __LINE__);
			return false;
		}
	}
	std::vector<Object> ANSONNXPOSE::RunInference(const cv::Mat& input, const std::string& camera_id) {
		if (!PreInferenceCheck("ANSONNXPOSE::RunInference")) return {};
		try {
			std::vector<Object> result;
			if (input.empty()) return result;
			if ((input.cols < 5) || (input.rows < 5)) return result;
			result = detect(input, camera_id);
			if (_trackerEnabled) {
				result = ApplyTracking(result, camera_id);
				if (_stabilizationEnabled) result = StabilizeDetections(result, camera_id);
			}
			return result;
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSONNXPOSE::RunInference", e.what(), __FILE__, __LINE__);
			return {};
		}
	}
	std::vector<Object> ANSONNXPOSE::RunInference(const cv::Mat& inputImgBGR) {
		return RunInference(inputImgBGR, "CustomCam");
	}
}