#include "ANSYOLO12OD.h" #include "EPLoader.h" #ifdef USEONNXOV #endif namespace ANSCENTER { bool YOLO12OD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) { std::lock_guard lock(_mutex); if (!ANSODBase::OptimizeModel(fp16, optimizedModelFolder)) { return false; } if (FileExist(_modelFilePath)) { optimizedModelFolder = GetParentFolder(_modelFilePath); this->_logger.LogDebug("YOLO12OD::OptimizeModel", "This model is optimized. No need other optimization.", __FILE__, __LINE__); return true; } else { optimizedModelFolder = ""; this->_logger.LogFatal("YOLO12OD::OptimizeModel", "This model is not exist. Please check the model path again.", __FILE__, __LINE__); return false; } } bool YOLO12OD::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) { std::lock_guard lock(_mutex); ModelLoadingGuard mlg(_modelLoading); try { bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword); if (!result) return false; _modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION; _modelConfig.inpHeight = 640; _modelConfig.inpWidth = 640; if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; // 0. Check if the configuration file exist if (FileExist(_modelConfigFile)) { ModelType modelType; std::vector inputShape; _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape); if (inputShape.size() == 2) { if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0]; if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1]; } _modelConfig.modelType = modelType; _modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx"); this->_logger.LogDebug("YOLO12OD::Initialize. Loading YoloV12 weight", _modelFilePath, __FILE__, __LINE__); if (!FileExist(_modelFilePath)) { this->_logger.LogError("YOLO12OD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__); return false; } } else {// This is old version of model zip file std::string onnxfile = CreateFilePath(_modelFolder, "train_last.onnx"); if (std::filesystem::exists(onnxfile)) { _modelFilePath = onnxfile; _classFilePath = CreateFilePath(_modelFolder, "classes.names"); this->_logger.LogDebug("YOLO12OD::Initialize. Loading YoloV12 weight", _modelFilePath, __FILE__, __LINE__); } else { this->_logger.LogError("YOLO12OD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__); return false; } if (FileExist(_classFilePath)) { this->_logger.LogDebug("YOLO12OD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__); LoadClassesFromFile(); } else { this->_logger.LogDebug("YOLO12OD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__); LoadClassesFromString(); } } _isInitialized = loadModel(_modelFilePath, true);//Assume that GPU is available; return _isInitialized; } catch (std::exception& e) { this->_logger.LogFatal("YOLO12OD::LoadModel", e.what(), __FILE__, __LINE__); return false; } } bool YOLO12OD::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName,std::string className, const std::string& modelFolder, std::string& labelMap) { std::lock_guard lock(_mutex); ModelLoadingGuard mlg(_modelLoading); try { bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig, modelName, className,modelFolder, labelMap); if (!result) return false; std::string _modelName = modelName; if (_modelName.empty()) { _modelName = "train_last"; } std::string weightFileName = _modelName + ".weights"; _modelConfig = modelConfig; _modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION; _modelConfig.inpHeight = 640; _modelConfig.inpWidth = 640; if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; // 0. Check if the configuration file exist if (FileExist(_modelConfigFile)) { ModelType modelType; std::vector inputShape; _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape); if (inputShape.size() == 2) { if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0]; if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1]; } _modelConfig.modelType = modelType; weightFileName = _modelName + ".onnx"; _modelFilePath = CreateFilePath(_modelFolder, weightFileName); this->_logger.LogDebug("YOLO12OD::Initialize. Loading YoloV12 weight", _modelFilePath, __FILE__, __LINE__); if (!FileExist(_modelFilePath)) { this->_logger.LogError("YOLO12OD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__); return false; } } else {// This is old version of model zip file weightFileName = _modelName + ".onnx"; std::string onnxfile = CreateFilePath(_modelFolder, weightFileName); if (std::filesystem::exists(onnxfile)) { _modelFilePath = onnxfile; _classFilePath = CreateFilePath(_modelFolder, className); this->_logger.LogDebug("YOLO12OD::Initialize. Loading YoloV8 weight", _modelFilePath, __FILE__, __LINE__); } else { this->_logger.LogError("YOLO12OD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__); return false; } std::ifstream isValidFileName(_classFilePath); if (!isValidFileName) { this->_logger.LogDebug("YOLO12OD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__); LoadClassesFromString(); } else { this->_logger.LogDebug("YOLO12OD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__); LoadClassesFromFile(); } } // 1. Load labelMap and engine labelMap.clear(); if (!_classes.empty()) labelMap = VectorToCommaSeparatedString(_classes); _isInitialized = loadModel(_modelFilePath, true);//Assume that GPU is available; return _isInitialized; } catch (std::exception& e) { this->_logger.LogFatal("YOLO12OD::LoadModel", e.what(), __FILE__, __LINE__); return false; } } bool YOLO12OD::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) { std::lock_guard lock(_mutex); ModelLoadingGuard mlg(_modelLoading); try { bool result = ANSODBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap); if (!result) return false; // Parsing for YOLO only here _modelConfig = modelConfig; _modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION; _modelConfig.inpHeight = 640; _modelConfig.inpWidth = 640; if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; // 0. Check if the configuration file exist if (FileExist(_modelConfigFile)) { ModelType modelType; std::vector inputShape; _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape); if (inputShape.size() == 2) { if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0]; if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1]; } _modelConfig.modelType = modelType; _modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx"); this->_logger.LogDebug("YOLO12OD::Initialize. Loading YoloV8 weight", _modelFilePath, __FILE__, __LINE__); if (!FileExist(_modelFilePath)) { this->_logger.LogError("YOLO12OD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__); return false; } } else {// This is old version of model zip file std::string onnxfile = CreateFilePath(_modelFolder, "train_last.onnx"); if (std::filesystem::exists(onnxfile)) { // This is the yovoV5 or yolov8 format _modelFilePath = onnxfile; _classFilePath = CreateFilePath(_modelFolder, "classes.names"); this->_logger.LogDebug("YOLO12OD::Initialize. Loading YoloV8/Yolov5 weight", _modelFilePath, __FILE__, __LINE__); } else { this->_logger.LogError("YOLO12OD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__); return false; } if (FileExist(_classFilePath)) { this->_logger.LogDebug("YOLO12OD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__); LoadClassesFromFile(); } else { this->_logger.LogDebug("YOLO12OD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__); LoadClassesFromString(); } } // 1. Load labelMap and engine labelMap.clear(); if (!_classes.empty()) labelMap = VectorToCommaSeparatedString(_classes); _isInitialized = loadModel(_modelFilePath, true);//Assume that GPU is available; return _isInitialized; } catch (std::exception& e) { this->_logger.LogFatal("YOLO12OD::Initialize", e.what(), __FILE__, __LINE__); return false; } } std::vector YOLO12OD::RunInference(const cv::Mat& input) { return RunInference(input, "CustomCam"); } std::vector YOLO12OD::RunInference(const cv::Mat& input,const std::string& camera_id) { if (!PreInferenceCheck("YOLO12OD::RunInference")) return {}; try { // Validation if (!_licenseValid) { _logger.LogError("YOLO12OD::RunInference", "Invalid License", __FILE__, __LINE__); return {}; } if (!_isInitialized) { _logger.LogError("YOLO12OD::RunInference", "Model is not initialized", __FILE__, __LINE__); return {}; } if (input.empty() || input.cols < 10 || input.rows < 10) { return {}; } auto ret = detect(input, _modelConfig.detectionScoreThreshold, _modelConfig.modelMNSThreshold); if (_trackerEnabled) { ret = ApplyTracking(ret, camera_id); if (_stabilizationEnabled) ret = StabilizeDetections(ret, camera_id); } return ret; } catch (const std::exception& e) { _logger.LogFatal("YOLO12OD::RunInference", e.what(), __FILE__, __LINE__); return {}; } } YOLO12OD::~YOLO12OD() { try { this->_logger.LogDebug("YOLO12OD::~YOLO12OD()", "Release YOLO12OD ", __FILE__, __LINE__); } catch (std::exception& e) { this->_logger.LogFatal("YOLO12OD::~YOLO12OD()", e.what(), __FILE__, __LINE__); } } bool YOLO12OD::Destroy() { try { return true; } catch (std::exception& e) { this->_logger.LogFatal("YOLO12OD::Destroy()", e.what(), __FILE__, __LINE__); return false; } } bool YOLO12OD::loadModel(const std::string& modelPath, bool useGPU) { try { const auto& ep = ANSCENTER::EPLoader::Current(); if (Ort::Global::api_ == nullptr) Ort::InitApi(static_cast(EPLoader::GetOrtApiRaw())); std::cout << "[YOLO12OD] EP ready: " << ANSCENTER::EPLoader::EngineTypeName(ep.type) << std::endl; env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "ONNX_DETECTION"); sessionOptions = Ort::SessionOptions(); sessionOptions.SetIntraOpNumThreads( std::min(6, static_cast(std::thread::hardware_concurrency()))); sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); // ── Log available providers ───────────────────────────────────────── std::vector availableProviders = Ort::GetAvailableProviders(); std::cout << "Available Execution Providers:" << std::endl; for (const auto& p : availableProviders) std::cout << " - " << p << std::endl; // ── Attach EP based on runtime-detected hardware ──────────────────── if (useGPU) { bool attached = false; switch (ep.type) { case ANSCENTER::EngineType::NVIDIA_GPU: { auto it = std::find(availableProviders.begin(), availableProviders.end(), "CUDAExecutionProvider"); if (it == availableProviders.end()) { this->_logger.LogError("YOLO12OD::loadModel", "CUDAExecutionProvider not in DLL — " "check ep/cuda/ has the CUDA ORT build.", __FILE__, __LINE__); break; } try { OrtCUDAProviderOptionsV2* cuda_options = nullptr; Ort::GetApi().CreateCUDAProviderOptions(&cuda_options); const char* keys[] = { "device_id" }; const char* values[] = { "0" }; Ort::GetApi().UpdateCUDAProviderOptions(cuda_options, keys, values, 1); sessionOptions.AppendExecutionProvider_CUDA_V2(*cuda_options); Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options); std::cout << "[YOLO12OD] CUDA EP attached." << std::endl; attached = true; } catch (const Ort::Exception& e) { this->_logger.LogError("YOLO12OD::loadModel", e.what(), __FILE__, __LINE__); } break; } case ANSCENTER::EngineType::AMD_GPU: { auto it = std::find(availableProviders.begin(), availableProviders.end(), "DmlExecutionProvider"); if (it == availableProviders.end()) { this->_logger.LogError("YOLO12OD::loadModel", "DmlExecutionProvider not in DLL — " "check ep/directml/ has the DirectML ORT build.", __FILE__, __LINE__); break; } try { std::unordered_map opts = { { "device_id", "0" } }; sessionOptions.AppendExecutionProvider("DML", opts); std::cout << "[YOLO12OD] DirectML EP attached." << std::endl; attached = true; } catch (const Ort::Exception& e) { this->_logger.LogError("YOLO12OD::loadModel", e.what(), __FILE__, __LINE__); } break; } case ANSCENTER::EngineType::OPENVINO_GPU: { auto it = std::find(availableProviders.begin(), availableProviders.end(), "OpenVINOExecutionProvider"); if (it == availableProviders.end()) { this->_logger.LogError("YOLO12OD::loadModel", "OpenVINOExecutionProvider not in DLL — " "check ep/openvino/ has the OpenVINO ORT build.", __FILE__, __LINE__); break; } const std::string precision = "FP16"; const std::string numberOfThreads = "8"; const std::string numberOfStreams = "8"; std::vector> try_configs = { { {"device_type","AUTO:NPU,GPU"}, {"precision",precision}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }, { {"device_type","GPU.0"}, {"precision",precision}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }, { {"device_type","GPU.1"}, {"precision",precision}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }, { {"device_type","AUTO:GPU,CPU"}, {"precision",precision}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} } }; for (const auto& config : try_configs) { try { sessionOptions.AppendExecutionProvider_OpenVINO_V2(config); std::cout << "[YOLO12OD] OpenVINO EP attached (" << config.at("device_type") << ")." << std::endl; attached = true; break; } catch (const Ort::Exception& e) { this->_logger.LogError("YOLO12OD::loadModel", e.what(), __FILE__, __LINE__); } } if (!attached) std::cerr << "[YOLO12OD] OpenVINO EP: all device configs failed." << std::endl; break; } default: break; } if (!attached) { std::cerr << "[YOLO12OD] No GPU EP attached — running on CPU." << std::endl; this->_logger.LogFatal("YOLO12OD::loadModel", "GPU EP not attached. Running on CPU.", __FILE__, __LINE__); } } else { std::cout << "[YOLO12OD] Inference device: CPU (useGPU=false)" << std::endl; } // ── Load model ────────────────────────────────────────────────────── #ifdef _WIN32 std::wstring w_modelPath(modelPath.begin(), modelPath.end()); session = Ort::Session(env, w_modelPath.c_str(), sessionOptions); #else session = Ort::Session(env, modelPath.c_str(), sessionOptions); #endif Ort::AllocatorWithDefaultOptions allocator; // ── Input shape ───────────────────────────────────────────────────── Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0); std::vector inputTensorShapeVec = inputTypeInfo.GetTensorTypeAndShapeInfo().GetShape(); isDynamicInputShape = (inputTensorShapeVec.size() >= 4) && (inputTensorShapeVec[2] == -1 && inputTensorShapeVec[3] == -1); if (isDynamicInputShape) std::cout << "Dynamic input shape detected." << std::endl; for (auto shape : inputTensorShapeVec) std::cout << "Input shape: " << shape << std::endl; // ── Node names ────────────────────────────────────────────────────── auto input_name = session.GetInputNameAllocated(0, allocator); inputNodeNameAllocatedStrings.push_back(std::move(input_name)); inputNames.push_back(inputNodeNameAllocatedStrings.back().get()); auto output_name = session.GetOutputNameAllocated(0, allocator); outputNodeNameAllocatedStrings.push_back(std::move(output_name)); outputNames.push_back(outputNodeNameAllocatedStrings.back().get()); // ── Input image size ──────────────────────────────────────────────── if (inputTensorShapeVec.size() >= 4) { inputImageShape = cv::Size( static_cast(inputTensorShapeVec[3]), static_cast(inputTensorShapeVec[2])); } else { this->_logger.LogFatal("YOLO12OD::loadModel", "Invalid input tensor shape.", __FILE__, __LINE__); return false; } numInputNodes = session.GetInputCount(); numOutputNodes = session.GetOutputCount(); std::cout << "Model loaded successfully with " << numInputNodes << " input nodes and " << numOutputNodes << " output nodes." << std::endl; return true; } catch (const std::exception& e) { this->_logger.LogFatal("YOLO12OD::loadModel", e.what(), __FILE__, __LINE__); return false; } } cv::Mat YOLO12OD::preprocess(const cv::Mat& image, std::vector& blob, std::vector& inputTensorShape) { std::lock_guard lock(_mutex); m_imgWidth = image.cols; m_imgHeight = image.rows; try { cv::Mat processedImage; // Handle grayscale images by converting them to 3-channel BGR if (image.channels() == 1) { cv::cvtColor(image, processedImage, cv::COLOR_GRAY2BGR); } else { processedImage = image.clone(); } cv::Mat resizedImage; // Resize and pad the image using letterBox utility letterBox(processedImage, resizedImage, inputImageShape, cv::Scalar(114, 114, 114), isDynamicInputShape, false, true, 32); // Update input tensor shape based on resized image dimensions inputTensorShape[2] = resizedImage.rows; inputTensorShape[3] = resizedImage.cols; // Convert image to float and normalize to [0, 1] resizedImage.convertTo(resizedImage, CV_32FC3, 1 / 255.0f); // Allocate memory for the image blob in CHW format size_t totalSize = resizedImage.cols * resizedImage.rows * resizedImage.channels(); blob.resize(totalSize); // Split the image into separate channels and store in the blob std::vector chw(resizedImage.channels()); for (int i = 0; i < resizedImage.channels(); ++i) { chw[i] = cv::Mat(resizedImage.rows, resizedImage.cols, CV_32FC1, blob.data() + i * resizedImage.cols * resizedImage.rows); } cv::split(resizedImage, chw); // Split channels into the blob return resizedImage; } catch (const std::exception& e) { this->_logger.LogFatal("YOLO12OD::preprocess", e.what(), __FILE__, __LINE__); return cv::Mat(); } } std::vector YOLO12OD::postprocess(const cv::Size& originalImageSize, const cv::Size& resizedImageShape, const std::vector& outputTensors, float confThreshold, float iouThreshold) { std::lock_guard lock(_mutex); try { std::vector detections; const float* rawOutput = outputTensors[0].GetTensorData(); // Extract raw output data from the first output tensor const std::vector outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); // Determine the number of features and detections const size_t num_features = outputShape[1]; const size_t num_detections = outputShape[2]; // Early exit if no detections if (num_detections == 0) { return detections; } // Calculate number of classes based on output shape const int numClasses = static_cast(num_features) - 4; if (numClasses <= 0) { // Invalid number of classes return detections; } // Reserve memory for efficient appending std::vector boxes; boxes.reserve(num_detections); std::vector confs; confs.reserve(num_detections); std::vector classIds; classIds.reserve(num_detections); std::vector nms_boxes; nms_boxes.reserve(num_detections); // Constants for indexing const float* ptr = rawOutput; for (size_t d = 0; d < num_detections; ++d) { // Extract bounding box coordinates (center x, center y, width, height) float centerX = ptr[0 * num_detections + d]; float centerY = ptr[1 * num_detections + d]; float width = ptr[2 * num_detections + d]; float height = ptr[3 * num_detections + d]; // Find class with the highest confidence score int classId = -1; float maxScore = -FLT_MAX; for (int c = 0; c < numClasses; ++c) { const float score = ptr[d + (4 + c) * num_detections]; if (score > maxScore) { maxScore = score; classId = c; } } // Proceed only if confidence exceeds threshold if (maxScore > confThreshold) { // Convert center coordinates to top-left (x1, y1) float left = centerX - width / 2.0f; float top = centerY - height / 2.0f; // Scale to original image size BoundingBox scaledBox = scaleCoords( resizedImageShape, BoundingBox(left, top, width, height), originalImageSize, true ); // Round coordinates for integer pixel positions BoundingBox roundedBox; roundedBox.x = std::round(scaledBox.x); roundedBox.y = std::round(scaledBox.y); roundedBox.width = std::round(scaledBox.width); roundedBox.height = std::round(scaledBox.height); // Adjust NMS box coordinates to prevent overlap between classes BoundingBox nmsBox = roundedBox; nmsBox.x += classId * 7680; // Arbitrary offset to differentiate classes nmsBox.y += classId * 7680; // Add to respective containers nms_boxes.emplace_back(nmsBox); boxes.emplace_back(roundedBox); confs.emplace_back(maxScore); classIds.emplace_back(classId); } } // Apply Non-Maximum Suppression (NMS) to eliminate redundant detections std::vector indices; NMSBoxes(nms_boxes, confs, confThreshold, iouThreshold, indices); // Collect filtered detections into the result vector int classNameSize = _classes.size(); detections.reserve(indices.size()); for (const int idx : indices) { float conf = confs[idx]; if (conf >= confThreshold) { Object detection; detection.confidence = confs[idx]; detection.box.x = boxes[idx].x; detection.box.y = boxes[idx].y; detection.box.width = boxes[idx].width; detection.box.height = boxes[idx].height; detection.classId = classIds[idx]; if (!_classes.empty()) { if (detection.classId < classNameSize) { detection.className = _classes[detection.classId]; } else { detection.className = _classes[classNameSize - 1]; // Use last valid class name if out of range } } else { detection.className = "Unknown"; // Fallback if _classes is empty } detection.polygon = ANSUtilityHelper::RectToNormalizedPolygon(detection.box, m_imgWidth, m_imgHeight); detections.push_back(detection); } } return detections; } catch (const std::exception& e) { this->_logger.LogFatal("YOLO12OD::postprocess", e.what(), __FILE__, __LINE__); return std::vector(); } } std::vector YOLO12OD::detect(const cv::Mat& image, float confThreshold, float iouThreshold) { std::lock_guard lock(_mutex); try { // Define the shape of the input tensor (batch size, channels, height, width) std::vector inputTensorShape = { 1, 3, inputImageShape.height, inputImageShape.width }; // Preprocess the image and obtain the blob as a vector std::vector blob; cv::Mat preprocessedImage = preprocess(image, blob, inputTensorShape); // Compute the total number of elements in the input tensor size_t inputTensorSize = vectorProduct(inputTensorShape); if (blob.size() != inputTensorSize) { this->_logger.LogFatal("YOLO12OD::detect", "Mismatch between blob size and expected tensor size", __FILE__, __LINE__); return {}; } // Create an Ort memory info object (cached for efficiency) static const Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); // Create input tensor object using the preprocessed data Ort::Value inputTensor = Ort::Value::CreateTensor( memoryInfo, blob.data(), // Use the vector's data directly inputTensorSize, inputTensorShape.data(), inputTensorShape.size() ); // Run the inference session with the input tensor and retrieve output tensors std::vector outputTensors = session.Run( Ort::RunOptions{ nullptr }, inputNames.data(), &inputTensor, numInputNodes, outputNames.data(), numOutputNodes ); // Determine the resized image shape based on input tensor shape cv::Size resizedImageShape(static_cast(inputTensorShape[3]), static_cast(inputTensorShape[2])); // Postprocess the output tensors to obtain detections std::vector detections = postprocess(image.size(), resizedImageShape, outputTensors, confThreshold, iouThreshold); return detections; // Return the vector of detections } catch (const std::exception& e) { this->_logger.LogFatal("YOLO12OD::detect", e.what(), __FILE__, __LINE__); return {}; } } // Utility function to clamp a value within a specified range size_t YOLO12OD::vectorProduct(const std::vector& vector) { return std::accumulate(vector.begin(), vector.end(), 1ull, std::multiplies()); } void YOLO12OD::letterBox(const cv::Mat& image, cv::Mat& outImage, const cv::Size& newShape, const cv::Scalar& color, bool auto_, bool scaleFill, bool scaleUp, int stride) { try { // Calculate the scaling ratio to fit the image within the new shape float ratio = std::min(static_cast(newShape.height) / image.rows, static_cast(newShape.width) / image.cols); // Prevent scaling up if not allowed if (!scaleUp) { ratio = std::min(ratio, 1.0f); } // Calculate new dimensions after scaling int newUnpadW = static_cast(std::round(image.cols * ratio)); int newUnpadH = static_cast(std::round(image.rows * ratio)); // Calculate padding needed to reach the desired shape int dw = newShape.width - newUnpadW; int dh = newShape.height - newUnpadH; if (auto_) { // Ensure padding is a multiple of stride for model compatibility dw = (dw % stride) / 2; dh = (dh % stride) / 2; } else if (scaleFill) { // Scale to fill without maintaining aspect ratio newUnpadW = newShape.width; newUnpadH = newShape.height; ratio = std::min(static_cast(newShape.width) / image.cols, static_cast(newShape.height) / image.rows); dw = 0; dh = 0; } else { // Evenly distribute padding on both sides // Calculate separate padding for left/right and top/bottom to handle odd padding int padLeft = dw / 2; int padRight = dw - padLeft; int padTop = dh / 2; int padBottom = dh - padTop; // Resize the image if the new dimensions differ if (image.cols != newUnpadW || image.rows != newUnpadH) { cv::resize(image, outImage, cv::Size(newUnpadW, newUnpadH), 0, 0, cv::INTER_LINEAR); } else { // Avoid unnecessary copying if dimensions are the same outImage = image; } // Apply padding to reach the desired shape cv::copyMakeBorder(outImage, outImage, padTop, padBottom, padLeft, padRight, cv::BORDER_CONSTANT, color); return; // Exit early since padding is already applied } // Resize the image if the new dimensions differ if (image.cols != newUnpadW || image.rows != newUnpadH) { cv::resize(image, outImage, cv::Size(newUnpadW, newUnpadH), 0, 0, cv::INTER_LINEAR); } else { // Avoid unnecessary copying if dimensions are the same outImage = image; } // Calculate separate padding for left/right and top/bottom to handle odd padding int padLeft = dw / 2; int padRight = dw - padLeft; int padTop = dh / 2; int padBottom = dh - padTop; // Apply padding to reach the desired shape cv::copyMakeBorder(outImage, outImage, padTop, padBottom, padLeft, padRight, cv::BORDER_CONSTANT, color); } catch (const std::exception& e) { std::cerr << "Error in letterBox: " << e.what() << std::endl; } } BoundingBox YOLO12OD::scaleCoords(const cv::Size& imageShape, BoundingBox coords, const cv::Size& imageOriginalShape, bool p_Clip) { BoundingBox result; try { float gain = std::min(static_cast(imageShape.height) / static_cast(imageOriginalShape.height), static_cast(imageShape.width) / static_cast(imageOriginalShape.width)); int padX = static_cast(std::round((imageShape.width - imageOriginalShape.width * gain) / 2.0f)); int padY = static_cast(std::round((imageShape.height - imageOriginalShape.height * gain) / 2.0f)); result.x = static_cast(std::round((coords.x - padX) / gain)); result.y = static_cast(std::round((coords.y - padY) / gain)); result.width = static_cast(std::round(coords.width / gain)); result.height = static_cast(std::round(coords.height / gain)); if (p_Clip) { result.x = clamp(result.x, 0, imageOriginalShape.width); result.y = clamp(result.y, 0, imageOriginalShape.height); result.width = clamp(result.width, 0, imageOriginalShape.width - result.x); result.height = clamp(result.height, 0, imageOriginalShape.height - result.y); } return result; } catch (const std::exception& e) { std::cerr << "Error in scaleCoords: " << e.what() << std::endl; return result; } } // Optimized Non-Maximum Suppression Function void YOLO12OD::NMSBoxes(const std::vector& boundingBoxes, const std::vector& scores, float scoreThreshold, float nmsThreshold, std::vector& indices) { indices.clear(); const size_t numBoxes = boundingBoxes.size(); if (numBoxes == 0) { return; } // Step 1: Filter out boxes with scores below the threshold // and create a list of indices sorted by descending scores std::vector sortedIndices; sortedIndices.reserve(numBoxes); for (size_t i = 0; i < numBoxes; ++i) { if (scores[i] >= scoreThreshold) { sortedIndices.push_back(static_cast(i)); } } // If no boxes remain after thresholding if (sortedIndices.empty()) { return; } // Sort the indices based on scores in descending order std::sort(sortedIndices.begin(), sortedIndices.end(), [&scores](int idx1, int idx2) { return scores[idx1] > scores[idx2]; }); // Step 2: Precompute the areas of all boxes std::vector areas(numBoxes, 0.0f); for (size_t i = 0; i < numBoxes; ++i) { areas[i] = boundingBoxes[i].width * boundingBoxes[i].height; } // Step 3: Suppression mask to mark boxes that are suppressed std::vector suppressed(numBoxes, false); // Step 4: Iterate through the sorted list and suppress boxes with high IoU for (size_t i = 0; i < sortedIndices.size(); ++i) { int currentIdx = sortedIndices[i]; if (suppressed[currentIdx]) { continue; } // Select the current box as a valid detection indices.push_back(currentIdx); const BoundingBox& currentBox = boundingBoxes[currentIdx]; const float x1_max = currentBox.x; const float y1_max = currentBox.y; const float x2_max = currentBox.x + currentBox.width; const float y2_max = currentBox.y + currentBox.height; const float area_current = areas[currentIdx]; // Compare IoU of the current box with the rest for (size_t j = i + 1; j < sortedIndices.size(); ++j) { int compareIdx = sortedIndices[j]; if (suppressed[compareIdx]) { continue; } const BoundingBox& compareBox = boundingBoxes[compareIdx]; const float x1 = std::max(x1_max, static_cast(compareBox.x)); const float y1 = std::max(y1_max, static_cast(compareBox.y)); const float x2 = std::min(x2_max, static_cast(compareBox.x + compareBox.width)); const float y2 = std::min(y2_max, static_cast(compareBox.y + compareBox.height)); const float interWidth = x2 - x1; const float interHeight = y2 - y1; if (interWidth <= 0 || interHeight <= 0) { continue; } const float intersection = interWidth * interHeight; const float unionArea = area_current + areas[compareIdx] - intersection; const float iou = (unionArea > 0.0f) ? (intersection / unionArea) : 0.0f; if (iou > nmsThreshold) { suppressed[compareIdx] = true; } } } } }