ANSCORE/ANSODEngine/ANSONNXSEG.cpp

#include "ANSONNXSEG.h"
#include "EPLoader.h"
namespace ANSCENTER {
	std::atomic<int> ANSONNXSEG::instanceCounter_(0);  // Initialize static member

	size_t ANSONNXSEG::vectorProduct(const std::vector<int64_t>& shape) {
		return std::accumulate(shape.begin(), shape.end(), 1ull, std::multiplies<size_t>());
	}
	void ANSONNXSEG::letterBox(const cv::Mat& image, cv::Mat& outImage,
		const cv::Size& newShape,
		const cv::Scalar& color,
		bool auto_,
		bool scaleFill,
		bool scaleUp,
		int stride)
	{
		float r = std::min((float)newShape.height / (float)image.rows,
			(float)newShape.width / (float)image.cols);
		if (!scaleUp) {
			r = std::min(r, 1.0f);
		}

		int newW = static_cast<int>(std::round(image.cols * r));
		int newH = static_cast<int>(std::round(image.rows * r));

		int dw = newShape.width - newW;
		int dh = newShape.height - newH;

		if (auto_) {
			dw = dw % stride;
			dh = dh % stride;
		}
		else if (scaleFill) {
			newW = newShape.width;
			newH = newShape.height;
			dw = 0;
			dh = 0;
		}

		cv::Mat resized;
		cv::resize(image, resized, cv::Size(newW, newH), 0, 0, cv::INTER_LINEAR);

		int top = dh / 2;
		int bottom = dh - top;
		int left = dw / 2;
		int right = dw - left;
		cv::copyMakeBorder(resized, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
	}
	void ANSONNXSEG::NMSBoxes(const std::vector<BoundingBox>& boxes,
		const std::vector<float>& scores,
		float scoreThreshold,
		float nmsThreshold,
		std::vector<int>& indices)
	{
		indices.clear();
		if (boxes.empty()) {
			return;
		}

		std::vector<int> order;
		order.reserve(boxes.size());
		for (size_t i = 0; i < boxes.size(); ++i) {
			if (scores[i] >= scoreThreshold) {
				order.push_back((int)i);
			}
		}
		if (order.empty()) return;

		std::sort(order.begin(), order.end(),
			[&scores](int a, int b) {
				return scores[a] > scores[b];
			});

		std::vector<float> areas(boxes.size());
		for (size_t i = 0; i < boxes.size(); ++i) {
			areas[i] = (float)(boxes[i].width * boxes[i].height);
		}

		std::vector<bool> suppressed(boxes.size(), false);
		for (size_t i = 0; i < order.size(); ++i) {
			int idx = order[i];
			if (suppressed[idx]) continue;

			indices.push_back(idx);

			for (size_t j = i + 1; j < order.size(); ++j) {
				int idx2 = order[j];
				if (suppressed[idx2]) continue;

				const BoundingBox& a = boxes[idx];
				const BoundingBox& b = boxes[idx2];
				int interX1 = std::max(a.x, b.x);
				int interY1 = std::max(a.y, b.y);
				int interX2 = std::min(a.x + a.width, b.x + b.width);
				int interY2 = std::min(a.y + a.height, b.y + b.height);

				int w = interX2 - interX1;
				int h = interY2 - interY1;
				if (w > 0 && h > 0) {
					float interArea = (float)(w * h);
					float unionArea = areas[idx] + areas[idx2] - interArea;
					float iou = (unionArea > 0.f) ? (interArea / unionArea) : 0.f;
					if (iou > nmsThreshold) {
						suppressed[idx2] = true;
					}
				}
			}
		}
	}
	cv::Mat ANSONNXSEG::sigmoid(const cv::Mat& src) {
		cv::Mat dst;
		cv::exp(-src, dst);
		dst = 1.0 / (1.0 + dst);
		return dst;
	}
	BoundingBox ANSONNXSEG::scaleCoords(const cv::Size& letterboxShape,
		const BoundingBox& coords,
		const cv::Size& originalShape,
		bool p_Clip)
	{
		float gain = std::min((float)letterboxShape.height / (float)originalShape.height,
			(float)letterboxShape.width / (float)originalShape.width);

		int padW = static_cast<int>(std::round(((float)letterboxShape.width - (float)originalShape.width * gain) / 2.f));
		int padH = static_cast<int>(std::round(((float)letterboxShape.height - (float)originalShape.height * gain) / 2.f));

		BoundingBox ret;
		ret.x = static_cast<int>(std::round(((float)coords.x - (float)padW) / gain));
		ret.y = static_cast<int>(std::round(((float)coords.y - (float)padH) / gain));
		ret.width = static_cast<int>(std::round((float)coords.width / gain));
		ret.height = static_cast<int>(std::round((float)coords.height / gain));

		if (p_Clip) {
			ret.x = clamp(ret.x, 0, originalShape.width);
			ret.y = clamp(ret.y, 0, originalShape.height);
			ret.width = clamp(ret.width, 0, originalShape.width - ret.x);
			ret.height = clamp(ret.height, 0, originalShape.height - ret.y);
		}

		return ret;

	}
	std::vector<cv::Scalar> ANSONNXSEG::generateColors(const std::vector<std::string>& classNames, int seed) {
		static std::unordered_map<size_t, std::vector<cv::Scalar>> cache;
		size_t key = 0;
		for (const auto& name : classNames) {
			size_t h = std::hash<std::string>{}(name);
			key ^= (h + 0x9e3779b9 + (key << 6) + (key >> 2));
		}
		auto it = cache.find(key);
		if (it != cache.end()) {
			return it->second;
		}
		std::mt19937 rng(seed);
		std::uniform_int_distribution<int> dist(0, 255);
		std::vector<cv::Scalar> colors;
		colors.reserve(classNames.size());
		for (size_t i = 0; i < classNames.size(); ++i) {
			colors.emplace_back(cv::Scalar(dist(rng), dist(rng), dist(rng)));
		}
		cache[key] = colors;
		return colors;
	}

	void ANSONNXSEG::drawSegmentations(cv::Mat& image,
		const std::vector<Object>& results,
		float maskAlpha) const
	{
		for (const auto& seg : results) {
			if (seg.confidence < _modelConfig.detectionScoreThreshold) {
				continue;
			}
			cv::Scalar color = classColors[seg.classId % classColors.size()];

			// -----------------------------
			// Draw Segmentation Mask Only
			// -----------------------------
			if (!seg.mask.empty()) {
				// Ensure the mask is single-channel
				cv::Mat mask_gray;
				if (seg.mask.channels() == 3) {
					cv::cvtColor(seg.mask, mask_gray, cv::COLOR_BGR2GRAY);
				}
				else {
					mask_gray = seg.mask.clone();
				}

				// Threshold the mask to binary (object: 255, background: 0)
				cv::Mat mask_binary;
				cv::threshold(mask_gray, mask_binary, 127, 255, cv::THRESH_BINARY);

				// Create a colored version of the mask
				cv::Mat colored_mask;
				cv::cvtColor(mask_binary, colored_mask, cv::COLOR_GRAY2BGR);
				colored_mask.setTo(color, mask_binary); // Apply color where mask is present

				// Blend the colored mask with the original image
				cv::addWeighted(image, 1.0, colored_mask, maskAlpha, 0, image);
			}
		}
	}


	void ANSONNXSEG::drawSegmentationsAndBoxes(cv::Mat& image,
		const std::vector<Object>& results,
		float maskAlpha) const
	{
		for (const auto& seg : results) {
			if (seg.confidence < _modelConfig.detectionScoreThreshold) {
				continue;
			}
			cv::Scalar color = classColors[seg.classId % classColors.size()];

			// -----------------------------
			// 1. Draw Bounding Box
			// -----------------------------
			cv::rectangle(image,
				cv::Point(seg.box.x, seg.box.y),
				cv::Point(seg.box.x + seg.box.width, seg.box.y + seg.box.height),
				color, 2);

			// -----------------------------
			// 2. Draw Label
			// -----------------------------
			std::string label = _classes[seg.classId] + " " + std::to_string(static_cast<int>(seg.confidence * 100)) + "%";
			int baseLine = 0;
			double fontScale = 0.5;
			int thickness = 1;
			cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, fontScale, thickness, &baseLine);
			int top = std::max(seg.box.y, labelSize.height + 5);
			cv::rectangle(image,
				cv::Point(seg.box.x, top - labelSize.height - 5),
				cv::Point(seg.box.x + labelSize.width + 5, top),
				color, cv::FILLED);
			cv::putText(image, label,
				cv::Point(seg.box.x + 2, top - 2),
				cv::FONT_HERSHEY_SIMPLEX,
				fontScale,
				cv::Scalar(255, 255, 255),
				thickness);

			// -----------------------------
			// 3. Apply Segmentation Mask
			// -----------------------------
			if (!seg.mask.empty()) {
				// Ensure the mask is single-channel
				cv::Mat mask_gray;
				if (seg.mask.channels() == 3) {
					cv::cvtColor(seg.mask, mask_gray, cv::COLOR_BGR2GRAY);
				}
				else {
					mask_gray = seg.mask.clone();
				}

				// Threshold the mask to binary (object: 255, background: 0)
				cv::Mat mask_binary;
				cv::threshold(mask_gray, mask_binary, 127, 255, cv::THRESH_BINARY);

				// Create a colored version of the mask
				cv::Mat colored_mask;
				cv::cvtColor(mask_binary, colored_mask, cv::COLOR_GRAY2BGR);
				colored_mask.setTo(color, mask_binary); // Apply color where mask is present

				// Blend the colored mask with the original image
				cv::addWeighted(image, 1.0, colored_mask, maskAlpha, 0, image);
			}
		}
	}
	bool ANSONNXSEG::Init(const std::string& modelPath, bool useGPU, int deviceId)
	{
		std::lock_guard<std::recursive_mutex> lock(_mutex);
		try {
			deviceId_ = deviceId;

			const auto& ep = ANSCENTER::EPLoader::Current();
			if (Ort::Global<void>::api_ == nullptr)
				Ort::InitApi(static_cast<const OrtApi*>(EPLoader::GetOrtApiRaw()));
			std::cout << "[ANSONNXSEG] EP ready: "
				<< ANSCENTER::EPLoader::EngineTypeName(ep.type) << std::endl;

			// Unique environment name per instance to avoid conflicts
			std::string envName = "ONNX_SEG_INST" + std::to_string(instanceId_);
			env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, envName.c_str());

			sessionOptions = Ort::SessionOptions();
			sessionOptions.SetIntraOpNumThreads(
				std::min(6, static_cast<int>(std::thread::hardware_concurrency())));
			sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

			// ── Log available providers ─────────────────────────────────────────
			std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
			std::cout << "[Instance " << instanceId_ << "] Available Execution Providers:" << std::endl;
			for (const auto& p : availableProviders)
				std::cout << " - " << p << std::endl;

			// ── Attach EP based on runtime-detected hardware ────────────────────
			if (useGPU) {
				bool attached = false;

				switch (ep.type) {

				case ANSCENTER::EngineType::NVIDIA_GPU: {
					auto it = std::find(availableProviders.begin(),
						availableProviders.end(), "CUDAExecutionProvider");
					if (it == availableProviders.end()) {
						this->_logger.LogError("ANSONNXSEG::Init", "CUDAExecutionProvider not in DLL — "
							"check ep/cuda/ has the CUDA ORT build.", __FILE__, __LINE__);
						break;
					}
					try {
						OrtCUDAProviderOptionsV2* cuda_options = nullptr;
						Ort::GetApi().CreateCUDAProviderOptions(&cuda_options);

						std::string deviceIdStr = std::to_string(deviceId_);
						const char* keys[] = { "device_id" };
						const char* values[] = { deviceIdStr.c_str() };
						Ort::GetApi().UpdateCUDAProviderOptions(cuda_options, keys, values, 1);

						sessionOptions.AppendExecutionProvider_CUDA_V2(*cuda_options);
						Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options);

						std::cout << "[Instance " << instanceId_ << "] CUDA EP attached on device "
							<< deviceId_ << "." << std::endl;
						attached = true;
					}
					catch (const Ort::Exception& e) {
						this->_logger.LogError("ANSONNXSEG::Init", e.what(), __FILE__, __LINE__);
					}
					break;
				}

				case ANSCENTER::EngineType::AMD_GPU: {
					auto it = std::find(availableProviders.begin(),
						availableProviders.end(), "DmlExecutionProvider");
					if (it == availableProviders.end()) {
						this->_logger.LogError("ANSONNXSEG::Init", "DmlExecutionProvider not in DLL — "
							"check ep/directml/ has the DirectML ORT build.", __FILE__, __LINE__);
						break;
					}
					try {
						std::unordered_map<std::string, std::string> opts = {
							{ "device_id", std::to_string(deviceId_) }
						};
						sessionOptions.AppendExecutionProvider("DML", opts);
						std::cout << "[Instance " << instanceId_ << "] DirectML EP attached on device "
							<< deviceId_ << "." << std::endl;
						attached = true;
					}
					catch (const Ort::Exception& e) {
						this->_logger.LogError("ANSONNXSEG::Init", e.what(), __FILE__, __LINE__);
					}
					break;
				}

				case ANSCENTER::EngineType::OPENVINO_GPU: {
					auto it = std::find(availableProviders.begin(),
						availableProviders.end(), "OpenVINOExecutionProvider");
					if (it == availableProviders.end()) {
						this->_logger.LogError("ANSONNXSEG::Init", "OpenVINOExecutionProvider not in DLL — "
							"check ep/openvino/ has the OpenVINO ORT build.", __FILE__, __LINE__);
						break;
					}

					// FP32 + single thread preserved for determinism; each instance gets its own stream and cache
					const std::string precision = "FP32";
					const std::string numberOfThreads = "1";
					const std::string numberOfStreams = std::to_string(instanceId_ + 1);
					const std::string primaryDevice = "GPU." + std::to_string(deviceId_);
					const std::string cacheDir = "./ov_cache_inst" + std::to_string(instanceId_);

					std::vector<std::unordered_map<std::string, std::string>> try_configs = {
						{ {"device_type", primaryDevice},   {"precision",precision},
						  {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
						  {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"},
						  {"cache_dir", cacheDir} },
						{ {"device_type","GPU"},             {"precision",precision},
						  {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
						  {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"},
						  {"cache_dir", cacheDir} },
						{ {"device_type","AUTO:GPU,CPU"},    {"precision",precision},
						  {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
						  {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"},
						  {"cache_dir", cacheDir} }
					};

					for (const auto& config : try_configs) {
						try {
							sessionOptions.AppendExecutionProvider_OpenVINO_V2(config);
							std::cout << "[Instance " << instanceId_ << "] OpenVINO EP attached ("
								<< config.at("device_type") << ", stream: " << numberOfStreams << ")." << std::endl;
							attached = true;
							break;
						}
						catch (const Ort::Exception& e) {
							this->_logger.LogError("ANSONNXSEG::Init", e.what(), __FILE__, __LINE__);
						}
					}

					if (!attached)
						std::cerr << "[Instance " << instanceId_ << "] OpenVINO EP: all device configs failed." << std::endl;
					break;
				}

				default:
					break;
				}

				if (!attached) {
					std::cerr << "[Instance " << instanceId_ << "] No GPU EP attached — running on CPU." << std::endl;
					this->_logger.LogFatal("ANSONNXSEG::Init", "GPU EP not attached. Running on CPU.", __FILE__, __LINE__);
				}
			}
			else {
				std::cout << "[Instance " << instanceId_ << "] Inference device: CPU (useGPU=false)" << std::endl;
			}

			// ── Load model ──────────────────────────────────────────────────────
#ifdef _WIN32
			std::wstring w_modelPath = std::wstring(modelPath.begin(), modelPath.end());
			session = Ort::Session(env, w_modelPath.c_str(), sessionOptions);
#else
			session = Ort::Session(env, modelPath.c_str(), sessionOptions);
#endif

			numInputNodes = session.GetInputCount();
			numOutputNodes = session.GetOutputCount();

			Ort::AllocatorWithDefaultOptions allocator;

			// ── Input node name & shape ─────────────────────────────────────────
			{
				auto inNameAlloc = session.GetInputNameAllocated(0, allocator);
				inputNameAllocs.emplace_back(std::move(inNameAlloc));
				inputNames.push_back(inputNameAllocs.back().get());

				auto inShape = session.GetInputTypeInfo(0)
					.GetTensorTypeAndShapeInfo().GetShape();

				if (inShape.size() == 4) {
					if (inShape[2] == -1 || inShape[3] == -1) {
						isDynamicInputShape = true;
						inputImageShape = cv::Size(_modelConfig.inpWidth, _modelConfig.inpHeight);
						std::cout << "[Instance " << instanceId_ << "] Dynamic input shape — "
							"using config default: " << inputImageShape.width
							<< "x" << inputImageShape.height << std::endl;
					}
					else {
						isDynamicInputShape = false;
						inputImageShape = cv::Size(
							static_cast<int>(inShape[3]),
							static_cast<int>(inShape[2]));
						std::cout << "[Instance " << instanceId_ << "] Fixed input shape: "
							<< inputImageShape.width << "x" << inputImageShape.height << std::endl;
					}
				}
				else {
					throw std::runtime_error("Model input is not 4D! Expect [N, C, H, W].");
				}
			}

			// ── Output node names (segmentation always has exactly 2) ───────────
			if (numOutputNodes != 2)
				throw std::runtime_error("Expected exactly 2 output nodes: output0 and output1.");

			for (size_t i = 0; i < numOutputNodes; ++i) {
				auto outNameAlloc = session.GetOutputNameAllocated(i, allocator);
				outputNameAllocs.emplace_back(std::move(outNameAlloc));
				outputNames.push_back(outputNameAllocs.back().get());
			}

			std::cout << "[Instance " << instanceId_ << "] Model loaded successfully — "
				<< numInputNodes << " input, " << numOutputNodes << " output nodes." << std::endl;

			// ── Warmup ──────────────────────────────────────────────────────────
			DEBUG_PRINT("[Instance " << instanceId_ << "] Starting warmup...");
			warmupModel();
			DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup completed successfully.");

			return true;
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSONNXSEG::Init",
				std::string("[Instance ") + std::to_string(instanceId_) + "] " + e.what(),
				__FILE__, __LINE__);
			return false;
		}
	}

	void ANSONNXSEG::warmupModel() {
		try {
			// Create dummy input image with correct size
			cv::Mat dummyImage = cv::Mat::zeros(inputImageShape.height, inputImageShape.width, CV_8UC3);

			DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup: dummy image "
				<< dummyImage.cols << "x" << dummyImage.rows);

			// Run 3 warmup inferences to stabilize
			for (int i = 0; i < 3; ++i) {
				try {
					// Your preprocessing logic here
					float* blob = nullptr;
					std::vector<int64_t> inputShape;

					// If you have a preprocess method, call it
					// Otherwise, create a simple dummy tensor
					size_t tensorSize = 1 * 3 * inputImageShape.height * inputImageShape.width;
					blob = new float[tensorSize];
					std::memset(blob, 0, tensorSize * sizeof(float));

					inputShape = { 1, 3, inputImageShape.height, inputImageShape.width };

					// Create input tensor
					Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
					Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
						memoryInfo,
						blob,
						tensorSize,
						inputShape.data(),
						inputShape.size()
					);

					// Run inference
					std::vector<Ort::Value> outputTensors = session.Run(
						Ort::RunOptions{ nullptr },
						inputNames.data(),
						&inputTensor,
						1,
						outputNames.data(),
						numOutputNodes
					);

					// Clean up
					delete[] blob;

					DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup " << (i + 1) << "/3 completed");
				}
				catch (const std::exception& e) {
					DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup iteration " << i
						<< " failed (non-critical): " << e.what());
				}
			}

			DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup successful - all states initialized");
		}
		catch (const std::exception& e) {
			this->_logger.LogWarn("ANSONNXSEG::warmupModel",
				std::string("[Instance ") + std::to_string(instanceId_) + "] Warmup failed: " + e.what(),
				__FILE__, __LINE__);
		}
	}


	cv::Mat ANSONNXSEG::preprocess(const cv::Mat& image, float*& blobPtr, std::vector<int64_t>& inputTensorShape) {
		std::lock_guard<std::recursive_mutex> lock(_mutex);
		m_imgWidth = image.cols;
		m_imgHeight = image.rows;
		try {
			// Validate input image
			if (image.empty() || image.data == nullptr) {
				this->_logger.LogError("ANSONNXSEG::preprocess", "Input image is empty or null", __FILE__, __LINE__);
				return cv::Mat();
			}

			if (image.cols <= 0 || image.rows <= 0) {
				this->_logger.LogError("ANSONNXSEG::preprocess",
					"Invalid image dimensions: " + std::to_string(image.cols) + "x" + std::to_string(image.rows),
					__FILE__, __LINE__);
				return cv::Mat();
			}

			// Check for NaN/Inf in input
			double minVal, maxVal;
			cv::minMaxLoc(image, &minVal, &maxVal);
			if (std::isnan(minVal) || std::isnan(maxVal) || std::isinf(minVal) || std::isinf(maxVal)) {
				this->_logger.LogError("ANSONNXSEG::preprocess",
					"Input image contains NaN or Inf values. Range: [" + std::to_string(minVal) +
					", " + std::to_string(maxVal) + "]", __FILE__, __LINE__);
				return cv::Mat();
			}

			DEBUG_PRINT("[Instance " << instanceId_ << "] Input: " << image.cols << "x" << image.rows
				<< ", channels=" << image.channels()
				<< ", type=" << image.type()
				<< ", range=[" << minVal << ", " << maxVal << "]");

			// Apply letterbox preprocessing
			cv::Mat letterboxImage;
			letterBox(image, letterboxImage, inputImageShape,
				cv::Scalar(114, 114, 114),
				/*auto_=*/isDynamicInputShape,
				/*scaleFill=*/false,
				/*scaleUp=*/true,
				/*stride=*/32);

			// Validate letterbox output
			if (letterboxImage.empty() || letterboxImage.rows <= 0 || letterboxImage.cols <= 0) {
				this->_logger.LogError("ANSONNXSEG::preprocess",
					"Letterbox preprocessing failed", __FILE__, __LINE__);
				return cv::Mat();
			}

			// Update tensor shape for dynamic input
			inputTensorShape[2] = static_cast<int64_t>(letterboxImage.rows);
			inputTensorShape[3] = static_cast<int64_t>(letterboxImage.cols);

			// Normalize to [0, 1] range
			letterboxImage.convertTo(letterboxImage, CV_32FC3, 1.0f / 255.0f);

			// Allocate blob memory
			const size_t totalPixels = static_cast<size_t>(letterboxImage.rows) *
				static_cast<size_t>(letterboxImage.cols);
			const size_t blobSize = totalPixels * 3;

			// Clean up any existing blob
			if (blobPtr != nullptr) {
				delete[] blobPtr;
				blobPtr = nullptr;
			}

			blobPtr = new float[blobSize];

			// Split channels into CHW format (NCHW for ONNX)
			std::vector<cv::Mat> channels(3);
			const int pixelsPerChannel = letterboxImage.rows * letterboxImage.cols;

			for (int c = 0; c < 3; ++c) {
				channels[c] = cv::Mat(letterboxImage.rows, letterboxImage.cols, CV_32FC1,
					blobPtr + c * pixelsPerChannel);
			}

			cv::split(letterboxImage, channels);

			return letterboxImage;
		}
		catch (const cv::Exception& e) {
			this->_logger.LogFatal("ANSONNXSEG::preprocess",
				"[Instance " + std::to_string(instanceId_) + "] OpenCV error: " + e.what(),
				__FILE__, __LINE__);

			if (blobPtr != nullptr) {
				delete[] blobPtr;
				blobPtr = nullptr;
			}
			return cv::Mat();
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSONNXSEG::preprocess",
				"[Instance " + std::to_string(instanceId_) + "] " + e.what(),
				__FILE__, __LINE__);

			if (blobPtr != nullptr) {
				delete[] blobPtr;
				blobPtr = nullptr;
			}
			return cv::Mat();
		}
	}
	std::vector<cv::Point2f> ANSONNXSEG::maskToPolygon(const cv::Mat& binaryMask,
		const cv::Rect& boundingBox,
		float simplificationEpsilon,
		int minContourArea)
	{
		std::vector<cv::Point2f> polygon;

		try {
			// Validate input
			if (binaryMask.empty() || binaryMask.type() != CV_8UC1) {
				return polygon;
			}

			// Extract region of interest from mask
			cv::Rect roi = boundingBox & cv::Rect(0, 0, binaryMask.cols, binaryMask.rows);
			if (roi.area() <= 0) {
				return polygon;
			}

			cv::Mat maskROI = binaryMask(roi);

			// Find contours in the mask
			std::vector<std::vector<cv::Point>> contours;
			std::vector<cv::Vec4i> hierarchy;
			cv::findContours(maskROI.clone(), contours, hierarchy,
				cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);

			if (contours.empty()) {
				return polygon;
			}

			// Find the largest contour (main object)
			int largestIdx = 0;
			double largestArea = 0.0;

			for (size_t i = 0; i < contours.size(); ++i) {
				double area = cv::contourArea(contours[i]);
				if (area > largestArea && area >= minContourArea) {
					largestArea = area;
					largestIdx = static_cast<int>(i);
				}
			}

			if (largestArea < minContourArea) {
				return polygon;
			}

			// Simplify the contour to reduce number of points
			std::vector<cv::Point> simplifiedContour;
			cv::approxPolyDP(contours[largestIdx], simplifiedContour,
				simplificationEpsilon, true);

			// Convert to Point2f and offset by ROI position
			polygon.reserve(simplifiedContour.size());
			for (const auto& pt : simplifiedContour) {
				polygon.emplace_back(
					static_cast<float>(pt.x + roi.x),
					static_cast<float>(pt.y + roi.y)
				);
			}

			return polygon;
		}
		catch (const cv::Exception& e) {
			// Log error if logger available
			polygon.clear();
			return polygon;
		}
	}
	std::vector<Object> ANSONNXSEG::postprocess(
		const cv::Size& origSize,
		const cv::Size& letterboxSize,
		const std::vector<Ort::Value>& outputs,
		const std::string& camera_id)
	{
		std::lock_guard<std::recursive_mutex> lock(_mutex);

		try {
			// Validate outputs
			if (outputs.size() < 2) {
				throw std::runtime_error("Insufficient model outputs. Expected at least 2, got " +
					std::to_string(outputs.size()));
			}

			// Extract output tensors
			const float* detections = outputs[0].GetTensorData<float>();
			const float* prototypes = outputs[1].GetTensorData<float>();

			// Get tensor shapes
			auto detectionShape = outputs[0].GetTensorTypeAndShapeInfo().GetShape(); // [1, 116, N]
			auto prototypeShape = outputs[1].GetTensorTypeAndShapeInfo().GetShape(); // [1, 32, H, W]

			// Validate prototype shape
			if (prototypeShape.size() != 4 || prototypeShape[0] != 1 || prototypeShape[1] != 32) {
				throw std::runtime_error("Invalid prototype shape. Expected [1, 32, H, W], got [" +
					std::to_string(prototypeShape[0]) + ", " +
					std::to_string(prototypeShape[1]) + ", " +
					std::to_string(prototypeShape[2]) + ", " +
					std::to_string(prototypeShape[3]) + "]");
			}

			// Extract dimensions
			const size_t numFeatures = detectionShape[1];     // 116 = 4 bbox + 80 classes + 32 masks
			const size_t numDetections = detectionShape[2];
			const int maskH = static_cast<int>(prototypeShape[2]);
			const int maskW = static_cast<int>(prototypeShape[3]);

			// Early exit if no detections
			if (numDetections == 0) {
				return {};
			}

			// Calculate feature offsets
			constexpr int BOX_OFFSET = 0;
			constexpr int BOX_SIZE = 4;
			constexpr int MASK_COEFFS_SIZE = 32;
			const int numClasses = static_cast<int>(numFeatures - BOX_SIZE - MASK_COEFFS_SIZE);

			if (numClasses <= 0) {
				throw std::runtime_error("Invalid number of classes: " + std::to_string(numClasses));
			}

			const int CLASS_CONF_OFFSET = BOX_OFFSET + BOX_SIZE;
			const int MASK_COEFF_OFFSET = CLASS_CONF_OFFSET + numClasses;

			// 1. Extract and cache prototype masks
			std::vector<cv::Mat> prototypeMasks;
			prototypeMasks.reserve(MASK_COEFFS_SIZE);

			const int prototypeSize = maskH * maskW;
			for (int m = 0; m < MASK_COEFFS_SIZE; ++m) {
				cv::Mat proto(maskH, maskW, CV_32FC1,
					const_cast<float*>(prototypes + m * prototypeSize));
				prototypeMasks.emplace_back(proto.clone());
			}

			// 2. Process detections and filter by confidence
			std::vector<BoundingBox> boxes;
			std::vector<float> confidences;
			std::vector<int> classIds;
			std::vector<std::vector<float>> maskCoefficients;

			boxes.reserve(numDetections);
			confidences.reserve(numDetections);
			classIds.reserve(numDetections);
			maskCoefficients.reserve(numDetections);

			const int numBoxes = static_cast<int>(numDetections);

			for (int i = 0; i < numBoxes; ++i) {
				// Find best class and confidence
				float maxConf = 0.0f;
				int bestClassId = -1;

				for (int c = 0; c < numClasses; ++c) {
					const float conf = detections[(CLASS_CONF_OFFSET + c) * numBoxes + i];
					if (conf > maxConf) {
						maxConf = conf;
						bestClassId = c;
					}
				}

				// Skip low confidence detections
				if (maxConf < _modelConfig.detectionScoreThreshold) {
					continue;
				}

				// Extract bounding box (xywh format)
				const float xc = detections[BOX_OFFSET * numBoxes + i];
				const float yc = detections[(BOX_OFFSET + 1) * numBoxes + i];
				const float w = detections[(BOX_OFFSET + 2) * numBoxes + i];
				const float h = detections[(BOX_OFFSET + 3) * numBoxes + i];

				// Convert to xyxy format and store
				boxes.push_back({
					static_cast<int>(std::round(xc - w * 0.5f)),
					static_cast<int>(std::round(yc - h * 0.5f)),
					static_cast<int>(std::round(w)),
					static_cast<int>(std::round(h))
					});

				confidences.push_back(maxConf);
				classIds.push_back(bestClassId);

				// Extract mask coefficients
				std::vector<float> coeffs(MASK_COEFFS_SIZE);
				for (int m = 0; m < MASK_COEFFS_SIZE; ++m) {
					coeffs[m] = detections[(MASK_COEFF_OFFSET + m) * numBoxes + i];
				}
				maskCoefficients.emplace_back(std::move(coeffs));
			}

			// Early exit if no valid detections
			if (boxes.empty()) {
				return {};
			}

			// 3. Apply Non-Maximum Suppression
			std::vector<int> nmsIndices;
			NMSBoxes(boxes, confidences,
				_modelConfig.modelConfThreshold,
				_modelConfig.modelMNSThreshold,
				nmsIndices);

			if (nmsIndices.empty()) {
				return {};
			}

			// 4. Calculate coordinate transformation parameters
			const float scale = std::min(
				static_cast<float>(letterboxSize.width) / origSize.width,
				static_cast<float>(letterboxSize.height) / origSize.height
			);

			const int scaledW = static_cast<int>(origSize.width * scale);
			const int scaledH = static_cast<int>(origSize.height * scale);
			const float padW = (letterboxSize.width - scaledW) * 0.5f;
			const float padH = (letterboxSize.height - scaledH) * 0.5f;

			// Mask coordinate transformation
			const float maskScaleX = static_cast<float>(maskW) / letterboxSize.width;
			const float maskScaleY = static_cast<float>(maskH) / letterboxSize.height;

			// Define crop region in mask space (with small padding to avoid edge artifacts)
			constexpr float CROP_PADDING = 0.5f;
			const int cropX1 = std::clamp(
				static_cast<int>(std::round((padW - CROP_PADDING) * maskScaleX)),
				0, maskW - 1
			);
			const int cropY1 = std::clamp(
				static_cast<int>(std::round((padH - CROP_PADDING) * maskScaleY)),
				0, maskH - 1
			);
			const int cropX2 = std::clamp(
				static_cast<int>(std::round((letterboxSize.width - padW + CROP_PADDING) * maskScaleX)),
				cropX1 + 1, maskW
			);
			const int cropY2 = std::clamp(
				static_cast<int>(std::round((letterboxSize.height - padH + CROP_PADDING) * maskScaleY)),
				cropY1 + 1, maskH
			);

			const cv::Rect cropRect(cropX1, cropY1, cropX2 - cropX1, cropY2 - cropY1);

			// 5. Generate final results with masks
			std::vector<Object> results;
			results.reserve(nmsIndices.size());

			for (const int idx : nmsIndices) {
				Object result;

				// Scale bounding box to original image coordinates
				BoundingBox scaledBox = scaleCoords(letterboxSize, boxes[idx], origSize, true);

				result.box.x = scaledBox.x;
				result.box.y = scaledBox.y;
				result.box.width = scaledBox.width;
				result.box.height = scaledBox.height;
				result.confidence = confidences[idx];
				result.classId = classIds[idx];

				// Generate instance mask
				const auto& coeffs = maskCoefficients[idx];

				// Linear combination of prototype masks
				cv::Mat combinedMask = cv::Mat::zeros(maskH, maskW, CV_32FC1);
				for (int m = 0; m < MASK_COEFFS_SIZE; ++m) {
					cv::addWeighted(combinedMask, 1.0, prototypeMasks[m], coeffs[m],
						0.0, combinedMask);
				}

				// Apply sigmoid activation
				combinedMask = sigmoid(combinedMask);

				// Crop to valid region
				cv::Mat croppedMask = combinedMask(cropRect).clone();

				// Resize to original image dimensions
				cv::Mat resizedMask;
				cv::resize(croppedMask, resizedMask, origSize, 0, 0, cv::INTER_LINEAR);

				// Binarize mask
				cv::Mat binaryMask;
				cv::threshold(resizedMask, binaryMask, 0.5, 255.0, cv::THRESH_BINARY);
				binaryMask.convertTo(binaryMask, CV_8UC1);

				// Crop mask to bounding box region
				cv::Rect roi(result.box.x, result.box.y, result.box.width, result.box.height);
				roi &= cv::Rect(0, 0, origSize.width, origSize.height);

				if (roi.area() > 0) {
					cv::Mat finalMask = cv::Mat::zeros(origSize, CV_8UC1);
					binaryMask(roi).copyTo(finalMask(roi));
					result.mask = finalMask;

					// Convert mask to polygon (single largest contour)
					result.polygon = maskToPolygon(finalMask, result.box, 2.0f, 10);

					// Validate polygon
					if (result.polygon.size() < 3) {
						// Fallback to bounding box if polygon extraction failed
						result.polygon = {
							cv::Point2f(result.box.x, result.box.y),
							cv::Point2f(result.box.x + result.box.width, result.box.y),
							cv::Point2f(result.box.x + result.box.width, result.box.y + result.box.height),
							cv::Point2f(result.box.x, result.box.y + result.box.height)
						};
					}
				}
				else {
					// Skip invalid detections
					continue;
				}

				results.push_back(result);
			}

			return results;
		}
		catch (const cv::Exception& e) {
			this->_logger.LogFatal("ANSONNXSEG::postprocess",
				"[Instance " + std::to_string(instanceId_) + "] OpenCV error: " + e.what(),
				__FILE__, __LINE__);
			return {};
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSONNXSEG::postprocess",
				"[Instance " + std::to_string(instanceId_) + "] " + e.what(),
				__FILE__, __LINE__);
			return {};
		}
	}


	std::vector<Object> ANSONNXSEG::segment(const cv::Mat& image, const std::string& camera_id) {
		std::lock_guard<std::recursive_mutex> lock(_mutex);

		float* blobPtr = nullptr;

		try {
			// Validate input image
			if (image.empty() || image.data == nullptr) {
				this->_logger.LogError("ANSONNXSEG::segment",
					"Input image is empty or null", __FILE__, __LINE__);
				return {};
			}

			if (image.cols <= 0 || image.rows <= 0) {
				this->_logger.LogError("ANSONNXSEG::segment",
					"Invalid image dimensions: " + std::to_string(image.cols) + "x" +
					std::to_string(image.rows), __FILE__, __LINE__);
				return {};
			}

			// 1. Preprocess image
			std::vector<int64_t> inputShape = { 1, 3, inputImageShape.height, inputImageShape.width };
			cv::Mat letterboxImg = preprocess(image, blobPtr, inputShape);

			if (letterboxImg.empty()) {
				this->_logger.LogError("ANSONNXSEG::segment",
					"Preprocessing failed", __FILE__, __LINE__);
				if (blobPtr != nullptr) {
					delete[] blobPtr;
					blobPtr = nullptr;
				}
				return {};
			}

			// Validate blob pointer after preprocessing
			if (blobPtr == nullptr) {
				this->_logger.LogError("ANSONNXSEG::segment",
					"Blob pointer is null after preprocessing", __FILE__, __LINE__);
				return {};
			}

			// 2. Prepare input tensor
			const size_t inputSize = vectorProduct(inputShape);

			if (inputSize == 0) {
				this->_logger.LogError("ANSONNXSEG::segment",
					"Invalid input tensor size", __FILE__, __LINE__);
				delete[] blobPtr;
				return {};
			}

			// Create memory info and input tensor
			Ort::MemoryInfo memInfo = Ort::MemoryInfo::CreateCpu(
				OrtArenaAllocator,
				OrtMemTypeDefault
			);

			Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
				memInfo,
				blobPtr,
				inputSize,
				inputShape.data(),
				inputShape.size()
			);

			// Validate tensor creation
			if (!inputTensor.IsTensor()) {
				this->_logger.LogError("ANSONNXSEG::segment",
					"Failed to create input tensor", __FILE__, __LINE__);
				delete[] blobPtr;
				return {};
			}

			// 3. Run inference
			std::vector<Ort::Value> outputs;
			try {
				outputs = session.Run(
					Ort::RunOptions{ nullptr },
					inputNames.data(),
					&inputTensor,
					numInputNodes,
					outputNames.data(),
					numOutputNodes
				);
			}
			catch (const Ort::Exception& e) {
				this->_logger.LogError("ANSONNXSEG::segment",
					"ONNX Runtime inference failed: " + std::string(e.what()),
					__FILE__, __LINE__);
				delete[] blobPtr;
				return {};
			}

			// Clean up blob after inference
			delete[] blobPtr;
			blobPtr = nullptr;

			// Validate outputs
			if (outputs.empty()) {
				this->_logger.LogError("ANSONNXSEG::segment",
					"Model returned no outputs", __FILE__, __LINE__);
				return {};
			}

			// 4. Postprocess results
			const cv::Size letterboxSize(
				static_cast<int>(inputShape[3]),
				static_cast<int>(inputShape[2])
			);

			return postprocess(image.size(), letterboxSize, outputs, camera_id);
		}
		catch (const Ort::Exception& e) {
			this->_logger.LogFatal("ANSONNXSEG::segment",
				"[Instance " + std::to_string(instanceId_) + "] ONNX Runtime error: " +
				e.what(), __FILE__, __LINE__);

			if (blobPtr != nullptr) {
				delete[] blobPtr;
				blobPtr = nullptr;
			}
			return {};
		}
		catch (const cv::Exception& e) {
			this->_logger.LogFatal("ANSONNXSEG::segment",
				"[Instance " + std::to_string(instanceId_) + "] OpenCV error: " +
				e.what(), __FILE__, __LINE__);

			if (blobPtr != nullptr) {
				delete[] blobPtr;
				blobPtr = nullptr;
			}
			return {};
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSONNXSEG::segment",
				"[Instance " + std::to_string(instanceId_) + "] " + e.what(),
				__FILE__, __LINE__);

			if (blobPtr != nullptr) {
				delete[] blobPtr;
				blobPtr = nullptr;
			}
			return {};
		}
	}


	// Public functions
	ANSONNXSEG::~ANSONNXSEG() {
		Destroy();
	}
	bool ANSONNXSEG::Destroy() {
		std::cout << "[ANSONNXSEG] Destroyed instance " << instanceId_ << std::endl;
		return true;
	}
	bool ANSONNXSEG::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
		if (!ANSODBase::OptimizeModel(fp16, optimizedModelFolder)) {
			return false;
		}
		return true;
	}
	bool ANSONNXSEG::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) {
		std::lock_guard<std::recursive_mutex> lock(_mutex);
		try {
			_modelLoadValid = false;
			bool result = ANSODBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
			if (!result) return false;
			// Parsing for YOLO only here
			_modelConfig = modelConfig;
			_modelConfig.detectionType = ANSCENTER::DetectionType::SEGMENTATION;
			_modelConfig.modelType = ModelType::ONNXSEG;
			_modelConfig.inpHeight = 640;
			_modelConfig.inpWidth = 640;
			if (_modelConfig.modelMNSThreshold < 0.2)
				_modelConfig.modelMNSThreshold = 0.5;
			if (_modelConfig.modelConfThreshold < 0.2)
				_modelConfig.modelConfThreshold = 0.5;
			if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133)  // 133 = COCO wholebody max
				_modelConfig.numKPS = 17;
			if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define
			_fp16 = (modelConfig.precisionType == PrecisionType::FP16);

			if (FileExist(_modelConfigFile)) {
				ModelType modelType;
				std::vector<int> inputShape;
				_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
				if (inputShape.size() == 2) {
					if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
					if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
				}
			}
			else {// This is old version of model zip file
				_modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx");
				_classFilePath = CreateFilePath(_modelFolder, "classes.names");
				std::ifstream isValidFileName(_classFilePath);
				if (!isValidFileName)
				{
					this->_logger.LogDebug("ANSONNXCL::Initialize.  Load classes from string", _classFilePath, __FILE__, __LINE__);
					LoadClassesFromString();
				}
				else {
					this->_logger.LogDebug("ANSONNXCL::Initialize.  Load classes from file", _classFilePath, __FILE__, __LINE__);
					LoadClassesFromFile();
				}
			}
			// 1. Load labelMap and engine
			labelMap.clear();
			if (!_classes.empty())
				labelMap = VectorToCommaSeparatedString(_classes);
			classColors = generateColors(_classes);

			// 2. Initialize ONNX Runtime session
			instanceId_ = instanceCounter_.fetch_add(1);  // Atomic increment
			result = Init(_modelFilePath, true, 0);
			_modelLoadValid = true;
			_isInitialized = true;
			return result;
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSONNXCL::Initialize", e.what(), __FILE__, __LINE__);
			return false;
		}
	}
	bool ANSONNXSEG::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) {
		std::lock_guard<std::recursive_mutex> lock(_mutex);
		try {
			bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword);
			if (!result) return false;
			_modelConfig.detectionType = ANSCENTER::DetectionType::SEGMENTATION;
			_modelConfig.modelType = ModelType::ONNXSEG;
			_modelConfig.inpHeight = 640;
			_modelConfig.inpWidth = 640;
			if (_modelConfig.modelMNSThreshold < 0.2)
				_modelConfig.modelMNSThreshold = 0.5;
			if (_modelConfig.modelConfThreshold < 0.2)
				_modelConfig.modelConfThreshold = 0.5;
			if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133)  // 133 = COCO wholebody max
				_modelConfig.numKPS = 17;
			if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define
			// if (_modelConfig.precisionType == PrecisionType::FP16)_fp16 = true;
			_fp16 = true; // Load Model from Here

			// 0. Check if the configuration file exist
			if (FileExist(_modelConfigFile)) {
				ModelType modelType;
				std::vector<int> inputShape;
				_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
				if (inputShape.size() == 2) {
					if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
					if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
				}
			}
			else {// This is old version of model zip file
				_modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx");
				_classFilePath = CreateFilePath(_modelFolder, "classes.names");
				std::ifstream isValidFileName(_classFilePath);
				if (!isValidFileName)
				{
					this->_logger.LogDebug("ANSONNXSEG::Initialize.  Load classes from string", _classFilePath, __FILE__, __LINE__);
					LoadClassesFromString();
				}
				else {
					this->_logger.LogDebug("ANSONNXSEG::Initialize.  Load classes from file", _classFilePath, __FILE__, __LINE__);
					LoadClassesFromFile();
				}
			}
			classColors = generateColors(_classes);
			// Initialize ONNX Runtime session
			instanceId_ = instanceCounter_.fetch_add(1);  // Atomic increment
			result = Init(_modelFilePath, true, 0);
			_modelLoadValid = true;
			_isInitialized = true;
			return result;
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSONNXSEG::LoadModel", e.what(), __FILE__, __LINE__);
			return false;
		}
	}
	bool ANSONNXSEG::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) {
		std::lock_guard<std::recursive_mutex> lock(_mutex);
		try {
			bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig, modelName, className, modelFolder, labelMap);
			if (!result) return false;
			std::string _modelName = modelName;
			if (_modelName.empty()) {
				_modelName = "train_last";
			}
			std::string modelFullName = _modelName + ".onnx";
			// Parsing for YOLO only here
			_modelConfig = modelConfig;
			_modelConfig.detectionType = ANSCENTER::DetectionType::SEGMENTATION;
			_modelConfig.modelType = ModelType::ONNXSEG;
			_modelConfig.inpHeight = 640;
			_modelConfig.inpWidth = 640;
			if (_modelConfig.modelMNSThreshold < 0.2)
				_modelConfig.modelMNSThreshold = 0.5;
			if (_modelConfig.modelConfThreshold < 0.2)
				_modelConfig.modelConfThreshold = 0.5;
			if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133)  // 133 = COCO wholebody max
				_modelConfig.numKPS = 17;
			if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define
			_fp16 = true; // Load Model from Here

			// 0. Check if the configuration file exist
			if (FileExist(_modelConfigFile)) {
				ModelType modelType;
				std::vector<int> inputShape;
				_classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape);
				if (inputShape.size() == 2) {
					if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0];
					if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1];
				}
			}
			else {// This is old version of model zip file
				_modelFilePath = CreateFilePath(_modelFolder, modelFullName);
				_classFilePath = CreateFilePath(_modelFolder, className);
				std::ifstream isValidFileName(_classFilePath);
				if (!isValidFileName)
				{
					this->_logger.LogDebug("ANSONNXSEG::Initialize.  Load classes from string", _classFilePath, __FILE__, __LINE__);
					LoadClassesFromString();
				}
				else {
					this->_logger.LogDebug("ANSONNXSEG::Initialize.  Load classes from file", _classFilePath, __FILE__, __LINE__);
					LoadClassesFromFile();
				}
			}
			// 1. Load labelMap and engine
			labelMap.clear();
			if (!_classes.empty())
				labelMap = VectorToCommaSeparatedString(_classes);
			classColors = generateColors(_classes);
			// 2. Initialize ONNX Runtime session
			instanceId_ = instanceCounter_.fetch_add(1);  // Atomic increment
			_modelLoadValid = true;
			_isInitialized = true;
			return result;
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSONNXSEG::LoadModelFromFolder", e.what(), __FILE__, __LINE__);
			return false;
		}
	}
	std::vector<Object> ANSONNXSEG::RunInference(const cv::Mat& input, const std::string& camera_id) {
		std::lock_guard<std::recursive_mutex> lock(_mutex);
		if (!_modelLoadValid) {
			this->_logger.LogFatal("ANSONNXSEG::RunInference", "Cannot load the TensorRT model. Please check if it is exist", __FILE__, __LINE__);
			std::vector<Object> result;
			result.clear();
			return result;
		}
		if (!_licenseValid) {
			this->_logger.LogFatal("ANSONNXSEG::RunInference", "Runtime license is not valid or expired. Please contact ANSCENTER", __FILE__, __LINE__);
			std::vector<Object> result;
			result.clear();
			return result;
		}
		if (!_isInitialized) {
			this->_logger.LogFatal("ANSONNXSEG::RunInference", "Model is not initialized", __FILE__, __LINE__);
			std::vector<Object> result;
			result.clear();
			return result;
		}
		try {
			std::vector<Object> result;
			if (input.empty()) return result;
			if ((input.cols < 5) || (input.rows < 5)) return result;
			result = segment(input, camera_id);
			if (_trackerEnabled) {
				result = ApplyTracking(result, camera_id);
				if (_stabilizationEnabled) result = StabilizeDetections(result, camera_id);
			}
			return result;
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSONNXSEG::RunInference", e.what(), __FILE__, __LINE__);
			return {};
		}
	}
	std::vector<Object> ANSONNXSEG::RunInference(const cv::Mat& inputImgBGR) {
		return RunInference(inputImgBGR, "CustomCam");
	}
}