#include "ANSONNXSEG.h" #include "EPLoader.h" namespace ANSCENTER { std::atomic ANSONNXSEG::instanceCounter_(0); // Initialize static member size_t ANSONNXSEG::vectorProduct(const std::vector& shape) { return std::accumulate(shape.begin(), shape.end(), 1ull, std::multiplies()); } void ANSONNXSEG::letterBox(const cv::Mat& image, cv::Mat& outImage, const cv::Size& newShape, const cv::Scalar& color, bool auto_, bool scaleFill, bool scaleUp, int stride) { float r = std::min((float)newShape.height / (float)image.rows, (float)newShape.width / (float)image.cols); if (!scaleUp) { r = std::min(r, 1.0f); } int newW = static_cast(std::round(image.cols * r)); int newH = static_cast(std::round(image.rows * r)); int dw = newShape.width - newW; int dh = newShape.height - newH; if (auto_) { dw = dw % stride; dh = dh % stride; } else if (scaleFill) { newW = newShape.width; newH = newShape.height; dw = 0; dh = 0; } cv::Mat resized; cv::resize(image, resized, cv::Size(newW, newH), 0, 0, cv::INTER_LINEAR); int top = dh / 2; int bottom = dh - top; int left = dw / 2; int right = dw - left; cv::copyMakeBorder(resized, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color); } void ANSONNXSEG::NMSBoxes(const std::vector& boxes, const std::vector& scores, float scoreThreshold, float nmsThreshold, std::vector& indices) { indices.clear(); if (boxes.empty()) { return; } std::vector order; order.reserve(boxes.size()); for (size_t i = 0; i < boxes.size(); ++i) { if (scores[i] >= scoreThreshold) { order.push_back((int)i); } } if (order.empty()) return; std::sort(order.begin(), order.end(), [&scores](int a, int b) { return scores[a] > scores[b]; }); std::vector areas(boxes.size()); for (size_t i = 0; i < boxes.size(); ++i) { areas[i] = (float)(boxes[i].width * boxes[i].height); } std::vector suppressed(boxes.size(), false); for (size_t i = 0; i < order.size(); ++i) { int idx = order[i]; if (suppressed[idx]) continue; indices.push_back(idx); for (size_t j = i + 1; j < order.size(); ++j) { int idx2 = order[j]; if (suppressed[idx2]) continue; const BoundingBox& a = boxes[idx]; const BoundingBox& b = boxes[idx2]; int interX1 = std::max(a.x, b.x); int interY1 = std::max(a.y, b.y); int interX2 = std::min(a.x + a.width, b.x + b.width); int interY2 = std::min(a.y + a.height, b.y + b.height); int w = interX2 - interX1; int h = interY2 - interY1; if (w > 0 && h > 0) { float interArea = (float)(w * h); float unionArea = areas[idx] + areas[idx2] - interArea; float iou = (unionArea > 0.f) ? (interArea / unionArea) : 0.f; if (iou > nmsThreshold) { suppressed[idx2] = true; } } } } } cv::Mat ANSONNXSEG::sigmoid(const cv::Mat& src) { cv::Mat dst; cv::exp(-src, dst); dst = 1.0 / (1.0 + dst); return dst; } BoundingBox ANSONNXSEG::scaleCoords(const cv::Size& letterboxShape, const BoundingBox& coords, const cv::Size& originalShape, bool p_Clip) { float gain = std::min((float)letterboxShape.height / (float)originalShape.height, (float)letterboxShape.width / (float)originalShape.width); int padW = static_cast(std::round(((float)letterboxShape.width - (float)originalShape.width * gain) / 2.f)); int padH = static_cast(std::round(((float)letterboxShape.height - (float)originalShape.height * gain) / 2.f)); BoundingBox ret; ret.x = static_cast(std::round(((float)coords.x - (float)padW) / gain)); ret.y = static_cast(std::round(((float)coords.y - (float)padH) / gain)); ret.width = static_cast(std::round((float)coords.width / gain)); ret.height = static_cast(std::round((float)coords.height / gain)); if (p_Clip) { ret.x = clamp(ret.x, 0, originalShape.width); ret.y = clamp(ret.y, 0, originalShape.height); ret.width = clamp(ret.width, 0, originalShape.width - ret.x); ret.height = clamp(ret.height, 0, originalShape.height - ret.y); } return ret; } std::vector ANSONNXSEG::generateColors(const std::vector& classNames, int seed) { static std::unordered_map> cache; size_t key = 0; for (const auto& name : classNames) { size_t h = std::hash{}(name); key ^= (h + 0x9e3779b9 + (key << 6) + (key >> 2)); } auto it = cache.find(key); if (it != cache.end()) { return it->second; } std::mt19937 rng(seed); std::uniform_int_distribution dist(0, 255); std::vector colors; colors.reserve(classNames.size()); for (size_t i = 0; i < classNames.size(); ++i) { colors.emplace_back(cv::Scalar(dist(rng), dist(rng), dist(rng))); } cache[key] = colors; return colors; } void ANSONNXSEG::drawSegmentations(cv::Mat& image, const std::vector& results, float maskAlpha) const { for (const auto& seg : results) { if (seg.confidence < _modelConfig.detectionScoreThreshold) { continue; } cv::Scalar color = classColors[seg.classId % classColors.size()]; // ----------------------------- // Draw Segmentation Mask Only // ----------------------------- if (!seg.mask.empty()) { // Ensure the mask is single-channel cv::Mat mask_gray; if (seg.mask.channels() == 3) { cv::cvtColor(seg.mask, mask_gray, cv::COLOR_BGR2GRAY); } else { mask_gray = seg.mask.clone(); } // Threshold the mask to binary (object: 255, background: 0) cv::Mat mask_binary; cv::threshold(mask_gray, mask_binary, 127, 255, cv::THRESH_BINARY); // Create a colored version of the mask cv::Mat colored_mask; cv::cvtColor(mask_binary, colored_mask, cv::COLOR_GRAY2BGR); colored_mask.setTo(color, mask_binary); // Apply color where mask is present // Blend the colored mask with the original image cv::addWeighted(image, 1.0, colored_mask, maskAlpha, 0, image); } } } void ANSONNXSEG::drawSegmentationsAndBoxes(cv::Mat& image, const std::vector& results, float maskAlpha) const { for (const auto& seg : results) { if (seg.confidence < _modelConfig.detectionScoreThreshold) { continue; } cv::Scalar color = classColors[seg.classId % classColors.size()]; // ----------------------------- // 1. Draw Bounding Box // ----------------------------- cv::rectangle(image, cv::Point(seg.box.x, seg.box.y), cv::Point(seg.box.x + seg.box.width, seg.box.y + seg.box.height), color, 2); // ----------------------------- // 2. Draw Label // ----------------------------- std::string label = _classes[seg.classId] + " " + std::to_string(static_cast(seg.confidence * 100)) + "%"; int baseLine = 0; double fontScale = 0.5; int thickness = 1; cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, fontScale, thickness, &baseLine); int top = std::max(seg.box.y, labelSize.height + 5); cv::rectangle(image, cv::Point(seg.box.x, top - labelSize.height - 5), cv::Point(seg.box.x + labelSize.width + 5, top), color, cv::FILLED); cv::putText(image, label, cv::Point(seg.box.x + 2, top - 2), cv::FONT_HERSHEY_SIMPLEX, fontScale, cv::Scalar(255, 255, 255), thickness); // ----------------------------- // 3. Apply Segmentation Mask // ----------------------------- if (!seg.mask.empty()) { // Ensure the mask is single-channel cv::Mat mask_gray; if (seg.mask.channels() == 3) { cv::cvtColor(seg.mask, mask_gray, cv::COLOR_BGR2GRAY); } else { mask_gray = seg.mask.clone(); } // Threshold the mask to binary (object: 255, background: 0) cv::Mat mask_binary; cv::threshold(mask_gray, mask_binary, 127, 255, cv::THRESH_BINARY); // Create a colored version of the mask cv::Mat colored_mask; cv::cvtColor(mask_binary, colored_mask, cv::COLOR_GRAY2BGR); colored_mask.setTo(color, mask_binary); // Apply color where mask is present // Blend the colored mask with the original image cv::addWeighted(image, 1.0, colored_mask, maskAlpha, 0, image); } } } bool ANSONNXSEG::Init(const std::string& modelPath, bool useGPU, int deviceId) { std::lock_guard lock(_mutex); try { deviceId_ = deviceId; const auto& ep = ANSCENTER::EPLoader::Current(); if (Ort::Global::api_ == nullptr) Ort::InitApi(static_cast(EPLoader::GetOrtApiRaw())); std::cout << "[ANSONNXSEG] EP ready: " << ANSCENTER::EPLoader::EngineTypeName(ep.type) << std::endl; // Unique environment name per instance to avoid conflicts std::string envName = "ONNX_SEG_INST" + std::to_string(instanceId_); env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, envName.c_str()); sessionOptions = Ort::SessionOptions(); sessionOptions.SetIntraOpNumThreads( std::min(6, static_cast(std::thread::hardware_concurrency()))); sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); // ── Log available providers ───────────────────────────────────────── std::vector availableProviders = Ort::GetAvailableProviders(); std::cout << "[Instance " << instanceId_ << "] Available Execution Providers:" << std::endl; for (const auto& p : availableProviders) std::cout << " - " << p << std::endl; // ── Attach EP based on runtime-detected hardware ──────────────────── if (useGPU) { bool attached = false; switch (ep.type) { case ANSCENTER::EngineType::NVIDIA_GPU: { auto it = std::find(availableProviders.begin(), availableProviders.end(), "CUDAExecutionProvider"); if (it == availableProviders.end()) { this->_logger.LogError("ANSONNXSEG::Init", "CUDAExecutionProvider not in DLL — " "check ep/cuda/ has the CUDA ORT build.", __FILE__, __LINE__); break; } try { OrtCUDAProviderOptionsV2* cuda_options = nullptr; Ort::GetApi().CreateCUDAProviderOptions(&cuda_options); std::string deviceIdStr = std::to_string(deviceId_); const char* keys[] = { "device_id" }; const char* values[] = { deviceIdStr.c_str() }; Ort::GetApi().UpdateCUDAProviderOptions(cuda_options, keys, values, 1); sessionOptions.AppendExecutionProvider_CUDA_V2(*cuda_options); Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options); std::cout << "[Instance " << instanceId_ << "] CUDA EP attached on device " << deviceId_ << "." << std::endl; attached = true; } catch (const Ort::Exception& e) { this->_logger.LogError("ANSONNXSEG::Init", e.what(), __FILE__, __LINE__); } break; } case ANSCENTER::EngineType::AMD_GPU: { auto it = std::find(availableProviders.begin(), availableProviders.end(), "DmlExecutionProvider"); if (it == availableProviders.end()) { this->_logger.LogError("ANSONNXSEG::Init", "DmlExecutionProvider not in DLL — " "check ep/directml/ has the DirectML ORT build.", __FILE__, __LINE__); break; } try { std::unordered_map opts = { { "device_id", std::to_string(deviceId_) } }; sessionOptions.AppendExecutionProvider("DML", opts); std::cout << "[Instance " << instanceId_ << "] DirectML EP attached on device " << deviceId_ << "." << std::endl; attached = true; } catch (const Ort::Exception& e) { this->_logger.LogError("ANSONNXSEG::Init", e.what(), __FILE__, __LINE__); } break; } case ANSCENTER::EngineType::OPENVINO_GPU: { auto it = std::find(availableProviders.begin(), availableProviders.end(), "OpenVINOExecutionProvider"); if (it == availableProviders.end()) { this->_logger.LogError("ANSONNXSEG::Init", "OpenVINOExecutionProvider not in DLL — " "check ep/openvino/ has the OpenVINO ORT build.", __FILE__, __LINE__); break; } // FP32 + single thread preserved for determinism; each instance gets its own stream and cache const std::string precision = "FP32"; const std::string numberOfThreads = "1"; const std::string numberOfStreams = std::to_string(instanceId_ + 1); const std::string primaryDevice = "GPU." + std::to_string(deviceId_); const std::string cacheDir = "./ov_cache_inst" + std::to_string(instanceId_); std::vector> try_configs = { { {"device_type", primaryDevice}, {"precision",precision}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"}, {"cache_dir", cacheDir} }, { {"device_type","GPU"}, {"precision",precision}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"}, {"cache_dir", cacheDir} }, { {"device_type","AUTO:GPU,CPU"}, {"precision",precision}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"}, {"cache_dir", cacheDir} } }; for (const auto& config : try_configs) { try { sessionOptions.AppendExecutionProvider_OpenVINO_V2(config); std::cout << "[Instance " << instanceId_ << "] OpenVINO EP attached (" << config.at("device_type") << ", stream: " << numberOfStreams << ")." << std::endl; attached = true; break; } catch (const Ort::Exception& e) { this->_logger.LogError("ANSONNXSEG::Init", e.what(), __FILE__, __LINE__); } } if (!attached) std::cerr << "[Instance " << instanceId_ << "] OpenVINO EP: all device configs failed." << std::endl; break; } default: break; } if (!attached) { std::cerr << "[Instance " << instanceId_ << "] No GPU EP attached — running on CPU." << std::endl; this->_logger.LogFatal("ANSONNXSEG::Init", "GPU EP not attached. Running on CPU.", __FILE__, __LINE__); } } else { std::cout << "[Instance " << instanceId_ << "] Inference device: CPU (useGPU=false)" << std::endl; } // ── Load model ────────────────────────────────────────────────────── #ifdef _WIN32 std::wstring w_modelPath = std::wstring(modelPath.begin(), modelPath.end()); session = Ort::Session(env, w_modelPath.c_str(), sessionOptions); #else session = Ort::Session(env, modelPath.c_str(), sessionOptions); #endif numInputNodes = session.GetInputCount(); numOutputNodes = session.GetOutputCount(); Ort::AllocatorWithDefaultOptions allocator; // ── Input node name & shape ───────────────────────────────────────── { auto inNameAlloc = session.GetInputNameAllocated(0, allocator); inputNameAllocs.emplace_back(std::move(inNameAlloc)); inputNames.push_back(inputNameAllocs.back().get()); auto inShape = session.GetInputTypeInfo(0) .GetTensorTypeAndShapeInfo().GetShape(); if (inShape.size() == 4) { if (inShape[2] == -1 || inShape[3] == -1) { isDynamicInputShape = true; inputImageShape = cv::Size(_modelConfig.inpWidth, _modelConfig.inpHeight); std::cout << "[Instance " << instanceId_ << "] Dynamic input shape — " "using config default: " << inputImageShape.width << "x" << inputImageShape.height << std::endl; } else { isDynamicInputShape = false; inputImageShape = cv::Size( static_cast(inShape[3]), static_cast(inShape[2])); std::cout << "[Instance " << instanceId_ << "] Fixed input shape: " << inputImageShape.width << "x" << inputImageShape.height << std::endl; } } else { throw std::runtime_error("Model input is not 4D! Expect [N, C, H, W]."); } } // ── Output node names (segmentation always has exactly 2) ─────────── if (numOutputNodes != 2) throw std::runtime_error("Expected exactly 2 output nodes: output0 and output1."); for (size_t i = 0; i < numOutputNodes; ++i) { auto outNameAlloc = session.GetOutputNameAllocated(i, allocator); outputNameAllocs.emplace_back(std::move(outNameAlloc)); outputNames.push_back(outputNameAllocs.back().get()); } std::cout << "[Instance " << instanceId_ << "] Model loaded successfully — " << numInputNodes << " input, " << numOutputNodes << " output nodes." << std::endl; // ── Warmup ────────────────────────────────────────────────────────── DEBUG_PRINT("[Instance " << instanceId_ << "] Starting warmup..."); warmupModel(); DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup completed successfully."); return true; } catch (const std::exception& e) { this->_logger.LogFatal("ANSONNXSEG::Init", std::string("[Instance ") + std::to_string(instanceId_) + "] " + e.what(), __FILE__, __LINE__); return false; } } void ANSONNXSEG::warmupModel() { try { // Create dummy input image with correct size cv::Mat dummyImage = cv::Mat::zeros(inputImageShape.height, inputImageShape.width, CV_8UC3); DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup: dummy image " << dummyImage.cols << "x" << dummyImage.rows); // Run 3 warmup inferences to stabilize for (int i = 0; i < 3; ++i) { try { // Your preprocessing logic here float* blob = nullptr; std::vector inputShape; // If you have a preprocess method, call it // Otherwise, create a simple dummy tensor size_t tensorSize = 1 * 3 * inputImageShape.height * inputImageShape.width; blob = new float[tensorSize]; std::memset(blob, 0, tensorSize * sizeof(float)); inputShape = { 1, 3, inputImageShape.height, inputImageShape.width }; // Create input tensor Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); Ort::Value inputTensor = Ort::Value::CreateTensor( memoryInfo, blob, tensorSize, inputShape.data(), inputShape.size() ); // Run inference std::vector outputTensors = session.Run( Ort::RunOptions{ nullptr }, inputNames.data(), &inputTensor, 1, outputNames.data(), numOutputNodes ); // Clean up delete[] blob; DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup " << (i + 1) << "/3 completed"); } catch (const std::exception& e) { DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup iteration " << i << " failed (non-critical): " << e.what()); } } DEBUG_PRINT("[Instance " << instanceId_ << "] Warmup successful - all states initialized"); } catch (const std::exception& e) { this->_logger.LogWarn("ANSONNXSEG::warmupModel", std::string("[Instance ") + std::to_string(instanceId_) + "] Warmup failed: " + e.what(), __FILE__, __LINE__); } } cv::Mat ANSONNXSEG::preprocess(const cv::Mat& image, float*& blobPtr, std::vector& inputTensorShape) { std::lock_guard lock(_mutex); m_imgWidth = image.cols; m_imgHeight = image.rows; try { // Validate input image if (image.empty() || image.data == nullptr) { this->_logger.LogError("ANSONNXSEG::preprocess", "Input image is empty or null", __FILE__, __LINE__); return cv::Mat(); } if (image.cols <= 0 || image.rows <= 0) { this->_logger.LogError("ANSONNXSEG::preprocess", "Invalid image dimensions: " + std::to_string(image.cols) + "x" + std::to_string(image.rows), __FILE__, __LINE__); return cv::Mat(); } // Check for NaN/Inf in input double minVal, maxVal; cv::minMaxLoc(image, &minVal, &maxVal); if (std::isnan(minVal) || std::isnan(maxVal) || std::isinf(minVal) || std::isinf(maxVal)) { this->_logger.LogError("ANSONNXSEG::preprocess", "Input image contains NaN or Inf values. Range: [" + std::to_string(minVal) + ", " + std::to_string(maxVal) + "]", __FILE__, __LINE__); return cv::Mat(); } DEBUG_PRINT("[Instance " << instanceId_ << "] Input: " << image.cols << "x" << image.rows << ", channels=" << image.channels() << ", type=" << image.type() << ", range=[" << minVal << ", " << maxVal << "]"); // Apply letterbox preprocessing cv::Mat letterboxImage; letterBox(image, letterboxImage, inputImageShape, cv::Scalar(114, 114, 114), /*auto_=*/isDynamicInputShape, /*scaleFill=*/false, /*scaleUp=*/true, /*stride=*/32); // Validate letterbox output if (letterboxImage.empty() || letterboxImage.rows <= 0 || letterboxImage.cols <= 0) { this->_logger.LogError("ANSONNXSEG::preprocess", "Letterbox preprocessing failed", __FILE__, __LINE__); return cv::Mat(); } // Update tensor shape for dynamic input inputTensorShape[2] = static_cast(letterboxImage.rows); inputTensorShape[3] = static_cast(letterboxImage.cols); // Normalize to [0, 1] range letterboxImage.convertTo(letterboxImage, CV_32FC3, 1.0f / 255.0f); // Allocate blob memory const size_t totalPixels = static_cast(letterboxImage.rows) * static_cast(letterboxImage.cols); const size_t blobSize = totalPixels * 3; // Clean up any existing blob if (blobPtr != nullptr) { delete[] blobPtr; blobPtr = nullptr; } blobPtr = new float[blobSize]; // Split channels into CHW format (NCHW for ONNX) std::vector channels(3); const int pixelsPerChannel = letterboxImage.rows * letterboxImage.cols; for (int c = 0; c < 3; ++c) { channels[c] = cv::Mat(letterboxImage.rows, letterboxImage.cols, CV_32FC1, blobPtr + c * pixelsPerChannel); } cv::split(letterboxImage, channels); return letterboxImage; } catch (const cv::Exception& e) { this->_logger.LogFatal("ANSONNXSEG::preprocess", "[Instance " + std::to_string(instanceId_) + "] OpenCV error: " + e.what(), __FILE__, __LINE__); if (blobPtr != nullptr) { delete[] blobPtr; blobPtr = nullptr; } return cv::Mat(); } catch (const std::exception& e) { this->_logger.LogFatal("ANSONNXSEG::preprocess", "[Instance " + std::to_string(instanceId_) + "] " + e.what(), __FILE__, __LINE__); if (blobPtr != nullptr) { delete[] blobPtr; blobPtr = nullptr; } return cv::Mat(); } } std::vector ANSONNXSEG::maskToPolygon(const cv::Mat& binaryMask, const cv::Rect& boundingBox, float simplificationEpsilon, int minContourArea) { std::vector polygon; try { // Validate input if (binaryMask.empty() || binaryMask.type() != CV_8UC1) { return polygon; } // Extract region of interest from mask cv::Rect roi = boundingBox & cv::Rect(0, 0, binaryMask.cols, binaryMask.rows); if (roi.area() <= 0) { return polygon; } cv::Mat maskROI = binaryMask(roi); // Find contours in the mask std::vector> contours; std::vector hierarchy; cv::findContours(maskROI.clone(), contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE); if (contours.empty()) { return polygon; } // Find the largest contour (main object) int largestIdx = 0; double largestArea = 0.0; for (size_t i = 0; i < contours.size(); ++i) { double area = cv::contourArea(contours[i]); if (area > largestArea && area >= minContourArea) { largestArea = area; largestIdx = static_cast(i); } } if (largestArea < minContourArea) { return polygon; } // Simplify the contour to reduce number of points std::vector simplifiedContour; cv::approxPolyDP(contours[largestIdx], simplifiedContour, simplificationEpsilon, true); // Convert to Point2f and offset by ROI position polygon.reserve(simplifiedContour.size()); for (const auto& pt : simplifiedContour) { polygon.emplace_back( static_cast(pt.x + roi.x), static_cast(pt.y + roi.y) ); } return polygon; } catch (const cv::Exception& e) { // Log error if logger available polygon.clear(); return polygon; } } std::vector ANSONNXSEG::postprocess( const cv::Size& origSize, const cv::Size& letterboxSize, const std::vector& outputs, const std::string& camera_id) { std::lock_guard lock(_mutex); try { // Validate outputs if (outputs.size() < 2) { throw std::runtime_error("Insufficient model outputs. Expected at least 2, got " + std::to_string(outputs.size())); } // Extract output tensors const float* detections = outputs[0].GetTensorData(); const float* prototypes = outputs[1].GetTensorData(); // Get tensor shapes auto detectionShape = outputs[0].GetTensorTypeAndShapeInfo().GetShape(); // [1, 116, N] auto prototypeShape = outputs[1].GetTensorTypeAndShapeInfo().GetShape(); // [1, 32, H, W] // Validate prototype shape if (prototypeShape.size() != 4 || prototypeShape[0] != 1 || prototypeShape[1] != 32) { throw std::runtime_error("Invalid prototype shape. Expected [1, 32, H, W], got [" + std::to_string(prototypeShape[0]) + ", " + std::to_string(prototypeShape[1]) + ", " + std::to_string(prototypeShape[2]) + ", " + std::to_string(prototypeShape[3]) + "]"); } // Extract dimensions const size_t numFeatures = detectionShape[1]; // 116 = 4 bbox + 80 classes + 32 masks const size_t numDetections = detectionShape[2]; const int maskH = static_cast(prototypeShape[2]); const int maskW = static_cast(prototypeShape[3]); // Early exit if no detections if (numDetections == 0) { return {}; } // Calculate feature offsets constexpr int BOX_OFFSET = 0; constexpr int BOX_SIZE = 4; constexpr int MASK_COEFFS_SIZE = 32; const int numClasses = static_cast(numFeatures - BOX_SIZE - MASK_COEFFS_SIZE); if (numClasses <= 0) { throw std::runtime_error("Invalid number of classes: " + std::to_string(numClasses)); } const int CLASS_CONF_OFFSET = BOX_OFFSET + BOX_SIZE; const int MASK_COEFF_OFFSET = CLASS_CONF_OFFSET + numClasses; // 1. Extract and cache prototype masks std::vector prototypeMasks; prototypeMasks.reserve(MASK_COEFFS_SIZE); const int prototypeSize = maskH * maskW; for (int m = 0; m < MASK_COEFFS_SIZE; ++m) { cv::Mat proto(maskH, maskW, CV_32FC1, const_cast(prototypes + m * prototypeSize)); prototypeMasks.emplace_back(proto.clone()); } // 2. Process detections and filter by confidence std::vector boxes; std::vector confidences; std::vector classIds; std::vector> maskCoefficients; boxes.reserve(numDetections); confidences.reserve(numDetections); classIds.reserve(numDetections); maskCoefficients.reserve(numDetections); const int numBoxes = static_cast(numDetections); for (int i = 0; i < numBoxes; ++i) { // Find best class and confidence float maxConf = 0.0f; int bestClassId = -1; for (int c = 0; c < numClasses; ++c) { const float conf = detections[(CLASS_CONF_OFFSET + c) * numBoxes + i]; if (conf > maxConf) { maxConf = conf; bestClassId = c; } } // Skip low confidence detections if (maxConf < _modelConfig.detectionScoreThreshold) { continue; } // Extract bounding box (xywh format) const float xc = detections[BOX_OFFSET * numBoxes + i]; const float yc = detections[(BOX_OFFSET + 1) * numBoxes + i]; const float w = detections[(BOX_OFFSET + 2) * numBoxes + i]; const float h = detections[(BOX_OFFSET + 3) * numBoxes + i]; // Convert to xyxy format and store boxes.push_back({ static_cast(std::round(xc - w * 0.5f)), static_cast(std::round(yc - h * 0.5f)), static_cast(std::round(w)), static_cast(std::round(h)) }); confidences.push_back(maxConf); classIds.push_back(bestClassId); // Extract mask coefficients std::vector coeffs(MASK_COEFFS_SIZE); for (int m = 0; m < MASK_COEFFS_SIZE; ++m) { coeffs[m] = detections[(MASK_COEFF_OFFSET + m) * numBoxes + i]; } maskCoefficients.emplace_back(std::move(coeffs)); } // Early exit if no valid detections if (boxes.empty()) { return {}; } // 3. Apply Non-Maximum Suppression std::vector nmsIndices; NMSBoxes(boxes, confidences, _modelConfig.modelConfThreshold, _modelConfig.modelMNSThreshold, nmsIndices); if (nmsIndices.empty()) { return {}; } // 4. Calculate coordinate transformation parameters const float scale = std::min( static_cast(letterboxSize.width) / origSize.width, static_cast(letterboxSize.height) / origSize.height ); const int scaledW = static_cast(origSize.width * scale); const int scaledH = static_cast(origSize.height * scale); const float padW = (letterboxSize.width - scaledW) * 0.5f; const float padH = (letterboxSize.height - scaledH) * 0.5f; // Mask coordinate transformation const float maskScaleX = static_cast(maskW) / letterboxSize.width; const float maskScaleY = static_cast(maskH) / letterboxSize.height; // Define crop region in mask space (with small padding to avoid edge artifacts) constexpr float CROP_PADDING = 0.5f; const int cropX1 = std::clamp( static_cast(std::round((padW - CROP_PADDING) * maskScaleX)), 0, maskW - 1 ); const int cropY1 = std::clamp( static_cast(std::round((padH - CROP_PADDING) * maskScaleY)), 0, maskH - 1 ); const int cropX2 = std::clamp( static_cast(std::round((letterboxSize.width - padW + CROP_PADDING) * maskScaleX)), cropX1 + 1, maskW ); const int cropY2 = std::clamp( static_cast(std::round((letterboxSize.height - padH + CROP_PADDING) * maskScaleY)), cropY1 + 1, maskH ); const cv::Rect cropRect(cropX1, cropY1, cropX2 - cropX1, cropY2 - cropY1); // 5. Generate final results with masks std::vector results; results.reserve(nmsIndices.size()); for (const int idx : nmsIndices) { Object result; // Scale bounding box to original image coordinates BoundingBox scaledBox = scaleCoords(letterboxSize, boxes[idx], origSize, true); result.box.x = scaledBox.x; result.box.y = scaledBox.y; result.box.width = scaledBox.width; result.box.height = scaledBox.height; result.confidence = confidences[idx]; result.classId = classIds[idx]; // Generate instance mask const auto& coeffs = maskCoefficients[idx]; // Linear combination of prototype masks cv::Mat combinedMask = cv::Mat::zeros(maskH, maskW, CV_32FC1); for (int m = 0; m < MASK_COEFFS_SIZE; ++m) { cv::addWeighted(combinedMask, 1.0, prototypeMasks[m], coeffs[m], 0.0, combinedMask); } // Apply sigmoid activation combinedMask = sigmoid(combinedMask); // Crop to valid region cv::Mat croppedMask = combinedMask(cropRect).clone(); // Resize to original image dimensions cv::Mat resizedMask; cv::resize(croppedMask, resizedMask, origSize, 0, 0, cv::INTER_LINEAR); // Binarize mask cv::Mat binaryMask; cv::threshold(resizedMask, binaryMask, 0.5, 255.0, cv::THRESH_BINARY); binaryMask.convertTo(binaryMask, CV_8UC1); // Crop mask to bounding box region cv::Rect roi(result.box.x, result.box.y, result.box.width, result.box.height); roi &= cv::Rect(0, 0, origSize.width, origSize.height); if (roi.area() > 0) { cv::Mat finalMask = cv::Mat::zeros(origSize, CV_8UC1); binaryMask(roi).copyTo(finalMask(roi)); result.mask = finalMask; // Convert mask to polygon (single largest contour) result.polygon = maskToPolygon(finalMask, result.box, 2.0f, 10); // Validate polygon if (result.polygon.size() < 3) { // Fallback to bounding box if polygon extraction failed result.polygon = { cv::Point2f(result.box.x, result.box.y), cv::Point2f(result.box.x + result.box.width, result.box.y), cv::Point2f(result.box.x + result.box.width, result.box.y + result.box.height), cv::Point2f(result.box.x, result.box.y + result.box.height) }; } } else { // Skip invalid detections continue; } results.push_back(result); } return results; } catch (const cv::Exception& e) { this->_logger.LogFatal("ANSONNXSEG::postprocess", "[Instance " + std::to_string(instanceId_) + "] OpenCV error: " + e.what(), __FILE__, __LINE__); return {}; } catch (const std::exception& e) { this->_logger.LogFatal("ANSONNXSEG::postprocess", "[Instance " + std::to_string(instanceId_) + "] " + e.what(), __FILE__, __LINE__); return {}; } } std::vector ANSONNXSEG::segment(const cv::Mat& image, const std::string& camera_id) { std::lock_guard lock(_mutex); float* blobPtr = nullptr; try { // Validate input image if (image.empty() || image.data == nullptr) { this->_logger.LogError("ANSONNXSEG::segment", "Input image is empty or null", __FILE__, __LINE__); return {}; } if (image.cols <= 0 || image.rows <= 0) { this->_logger.LogError("ANSONNXSEG::segment", "Invalid image dimensions: " + std::to_string(image.cols) + "x" + std::to_string(image.rows), __FILE__, __LINE__); return {}; } // 1. Preprocess image std::vector inputShape = { 1, 3, inputImageShape.height, inputImageShape.width }; cv::Mat letterboxImg = preprocess(image, blobPtr, inputShape); if (letterboxImg.empty()) { this->_logger.LogError("ANSONNXSEG::segment", "Preprocessing failed", __FILE__, __LINE__); if (blobPtr != nullptr) { delete[] blobPtr; blobPtr = nullptr; } return {}; } // Validate blob pointer after preprocessing if (blobPtr == nullptr) { this->_logger.LogError("ANSONNXSEG::segment", "Blob pointer is null after preprocessing", __FILE__, __LINE__); return {}; } // 2. Prepare input tensor const size_t inputSize = vectorProduct(inputShape); if (inputSize == 0) { this->_logger.LogError("ANSONNXSEG::segment", "Invalid input tensor size", __FILE__, __LINE__); delete[] blobPtr; return {}; } // Create memory info and input tensor Ort::MemoryInfo memInfo = Ort::MemoryInfo::CreateCpu( OrtArenaAllocator, OrtMemTypeDefault ); Ort::Value inputTensor = Ort::Value::CreateTensor( memInfo, blobPtr, inputSize, inputShape.data(), inputShape.size() ); // Validate tensor creation if (!inputTensor.IsTensor()) { this->_logger.LogError("ANSONNXSEG::segment", "Failed to create input tensor", __FILE__, __LINE__); delete[] blobPtr; return {}; } // 3. Run inference std::vector outputs; try { outputs = session.Run( Ort::RunOptions{ nullptr }, inputNames.data(), &inputTensor, numInputNodes, outputNames.data(), numOutputNodes ); } catch (const Ort::Exception& e) { this->_logger.LogError("ANSONNXSEG::segment", "ONNX Runtime inference failed: " + std::string(e.what()), __FILE__, __LINE__); delete[] blobPtr; return {}; } // Clean up blob after inference delete[] blobPtr; blobPtr = nullptr; // Validate outputs if (outputs.empty()) { this->_logger.LogError("ANSONNXSEG::segment", "Model returned no outputs", __FILE__, __LINE__); return {}; } // 4. Postprocess results const cv::Size letterboxSize( static_cast(inputShape[3]), static_cast(inputShape[2]) ); return postprocess(image.size(), letterboxSize, outputs, camera_id); } catch (const Ort::Exception& e) { this->_logger.LogFatal("ANSONNXSEG::segment", "[Instance " + std::to_string(instanceId_) + "] ONNX Runtime error: " + e.what(), __FILE__, __LINE__); if (blobPtr != nullptr) { delete[] blobPtr; blobPtr = nullptr; } return {}; } catch (const cv::Exception& e) { this->_logger.LogFatal("ANSONNXSEG::segment", "[Instance " + std::to_string(instanceId_) + "] OpenCV error: " + e.what(), __FILE__, __LINE__); if (blobPtr != nullptr) { delete[] blobPtr; blobPtr = nullptr; } return {}; } catch (const std::exception& e) { this->_logger.LogFatal("ANSONNXSEG::segment", "[Instance " + std::to_string(instanceId_) + "] " + e.what(), __FILE__, __LINE__); if (blobPtr != nullptr) { delete[] blobPtr; blobPtr = nullptr; } return {}; } } // Public functions ANSONNXSEG::~ANSONNXSEG() { Destroy(); } bool ANSONNXSEG::Destroy() { std::cout << "[ANSONNXSEG] Destroyed instance " << instanceId_ << std::endl; return true; } bool ANSONNXSEG::OptimizeModel(bool fp16, std::string& optimizedModelFolder) { if (!ANSODBase::OptimizeModel(fp16, optimizedModelFolder)) { return false; } return true; } bool ANSONNXSEG::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) { std::lock_guard lock(_mutex); try { _modelLoadValid = false; bool result = ANSODBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap); if (!result) return false; // Parsing for YOLO only here _modelConfig = modelConfig; _modelConfig.detectionType = ANSCENTER::DetectionType::SEGMENTATION; _modelConfig.modelType = ModelType::ONNXSEG; _modelConfig.inpHeight = 640; _modelConfig.inpWidth = 640; if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133) // 133 = COCO wholebody max _modelConfig.numKPS = 17; if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define _fp16 = (modelConfig.precisionType == PrecisionType::FP16); if (FileExist(_modelConfigFile)) { ModelType modelType; std::vector inputShape; _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape); if (inputShape.size() == 2) { if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0]; if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1]; } } else {// This is old version of model zip file _modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx"); _classFilePath = CreateFilePath(_modelFolder, "classes.names"); std::ifstream isValidFileName(_classFilePath); if (!isValidFileName) { this->_logger.LogDebug("ANSONNXCL::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__); LoadClassesFromString(); } else { this->_logger.LogDebug("ANSONNXCL::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__); LoadClassesFromFile(); } } // 1. Load labelMap and engine labelMap.clear(); if (!_classes.empty()) labelMap = VectorToCommaSeparatedString(_classes); classColors = generateColors(_classes); // 2. Initialize ONNX Runtime session instanceId_ = instanceCounter_.fetch_add(1); // Atomic increment result = Init(_modelFilePath, true, 0); _modelLoadValid = true; _isInitialized = true; return result; } catch (const std::exception& e) { this->_logger.LogFatal("ANSONNXCL::Initialize", e.what(), __FILE__, __LINE__); return false; } } bool ANSONNXSEG::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) { std::lock_guard lock(_mutex); try { bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword); if (!result) return false; _modelConfig.detectionType = ANSCENTER::DetectionType::SEGMENTATION; _modelConfig.modelType = ModelType::ONNXSEG; _modelConfig.inpHeight = 640; _modelConfig.inpWidth = 640; if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133) // 133 = COCO wholebody max _modelConfig.numKPS = 17; if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define // if (_modelConfig.precisionType == PrecisionType::FP16)_fp16 = true; _fp16 = true; // Load Model from Here // 0. Check if the configuration file exist if (FileExist(_modelConfigFile)) { ModelType modelType; std::vector inputShape; _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape); if (inputShape.size() == 2) { if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0]; if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1]; } } else {// This is old version of model zip file _modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx"); _classFilePath = CreateFilePath(_modelFolder, "classes.names"); std::ifstream isValidFileName(_classFilePath); if (!isValidFileName) { this->_logger.LogDebug("ANSONNXSEG::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__); LoadClassesFromString(); } else { this->_logger.LogDebug("ANSONNXSEG::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__); LoadClassesFromFile(); } } classColors = generateColors(_classes); // Initialize ONNX Runtime session instanceId_ = instanceCounter_.fetch_add(1); // Atomic increment result = Init(_modelFilePath, true, 0); _modelLoadValid = true; _isInitialized = true; return result; } catch (const std::exception& e) { this->_logger.LogFatal("ANSONNXSEG::LoadModel", e.what(), __FILE__, __LINE__); return false; } } bool ANSONNXSEG::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) { std::lock_guard lock(_mutex); try { bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig, modelName, className, modelFolder, labelMap); if (!result) return false; std::string _modelName = modelName; if (_modelName.empty()) { _modelName = "train_last"; } std::string modelFullName = _modelName + ".onnx"; // Parsing for YOLO only here _modelConfig = modelConfig; _modelConfig.detectionType = ANSCENTER::DetectionType::SEGMENTATION; _modelConfig.modelType = ModelType::ONNXSEG; _modelConfig.inpHeight = 640; _modelConfig.inpWidth = 640; if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133) // 133 = COCO wholebody max _modelConfig.numKPS = 17; if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define _fp16 = true; // Load Model from Here // 0. Check if the configuration file exist if (FileExist(_modelConfigFile)) { ModelType modelType; std::vector inputShape; _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape); if (inputShape.size() == 2) { if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0]; if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1]; } } else {// This is old version of model zip file _modelFilePath = CreateFilePath(_modelFolder, modelFullName); _classFilePath = CreateFilePath(_modelFolder, className); std::ifstream isValidFileName(_classFilePath); if (!isValidFileName) { this->_logger.LogDebug("ANSONNXSEG::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__); LoadClassesFromString(); } else { this->_logger.LogDebug("ANSONNXSEG::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__); LoadClassesFromFile(); } } // 1. Load labelMap and engine labelMap.clear(); if (!_classes.empty()) labelMap = VectorToCommaSeparatedString(_classes); classColors = generateColors(_classes); // 2. Initialize ONNX Runtime session instanceId_ = instanceCounter_.fetch_add(1); // Atomic increment _modelLoadValid = true; _isInitialized = true; return result; } catch (const std::exception& e) { this->_logger.LogFatal("ANSONNXSEG::LoadModelFromFolder", e.what(), __FILE__, __LINE__); return false; } } std::vector ANSONNXSEG::RunInference(const cv::Mat& input, const std::string& camera_id) { std::lock_guard lock(_mutex); if (!_modelLoadValid) { this->_logger.LogFatal("ANSONNXSEG::RunInference", "Cannot load the TensorRT model. Please check if it is exist", __FILE__, __LINE__); std::vector result; result.clear(); return result; } if (!_licenseValid) { this->_logger.LogFatal("ANSONNXSEG::RunInference", "Runtime license is not valid or expired. Please contact ANSCENTER", __FILE__, __LINE__); std::vector result; result.clear(); return result; } if (!_isInitialized) { this->_logger.LogFatal("ANSONNXSEG::RunInference", "Model is not initialized", __FILE__, __LINE__); std::vector result; result.clear(); return result; } try { std::vector result; if (input.empty()) return result; if ((input.cols < 5) || (input.rows < 5)) return result; result = segment(input, camera_id); if (_trackerEnabled) { result = ApplyTracking(result, camera_id); if (_stabilizationEnabled) result = StabilizeDetections(result, camera_id); } return result; } catch (const std::exception& e) { this->_logger.LogFatal("ANSONNXSEG::RunInference", e.what(), __FILE__, __LINE__); return {}; } } std::vector ANSONNXSEG::RunInference(const cv::Mat& inputImgBGR) { return RunInference(inputImgBGR, "CustomCam"); } }