#include "ANSYOLOOD.h" #include "Utility.h" #include "EPLoader.h" #include "ANSGpuFrameRegistry.h" #include "NV12PreprocessHelper.h" // tl_currentGpuFrame() #ifdef USEONNXOV //#include #endif namespace ANSCENTER { void NMSBoxes(const std::vector& boundingBoxes,const std::vector& scores,float scoreThreshold,float nmsThreshold,std::vector& indices) { indices.clear(); const size_t numBoxes = boundingBoxes.size(); if (numBoxes == 0) { return; } // Step 1: Filter out boxes with scores below the threshold // and create a list of indices sorted by descending scores std::vector sortedIndices; sortedIndices.reserve(numBoxes); for (size_t i = 0; i < numBoxes; ++i) { if (scores[i] >= scoreThreshold) { sortedIndices.push_back(static_cast(i)); } } // If no boxes remain after thresholding if (sortedIndices.empty()) { return; } // Sort the indices based on scores in descending order std::sort(sortedIndices.begin(), sortedIndices.end(), [&scores](int idx1, int idx2) { return scores[idx1] > scores[idx2]; }); // Step 2: Precompute the areas of all boxes std::vector areas(numBoxes, 0.0f); for (size_t i = 0; i < numBoxes; ++i) { areas[i] = boundingBoxes[i].width * boundingBoxes[i].height; } // Step 3: Suppression mask to mark boxes that are suppressed std::vector suppressed(numBoxes, false); // Step 4: Iterate through the sorted list and suppress boxes with high IoU for (size_t i = 0; i < sortedIndices.size(); ++i) { int currentIdx = sortedIndices[i]; if (suppressed[currentIdx]) { continue; } // Select the current box as a valid detection indices.push_back(currentIdx); const BoundingBox& currentBox = boundingBoxes[currentIdx]; const float x1_max = currentBox.x; const float y1_max = currentBox.y; const float x2_max = currentBox.x + currentBox.width; const float y2_max = currentBox.y + currentBox.height; const float area_current = areas[currentIdx]; // Compare IoU of the current box with the rest for (size_t j = i + 1; j < sortedIndices.size(); ++j) { int compareIdx = sortedIndices[j]; if (suppressed[compareIdx]) { continue; } const BoundingBox& compareBox = boundingBoxes[compareIdx]; const float x1 = std::max(x1_max, static_cast(compareBox.x)); const float y1 = std::max(y1_max, static_cast(compareBox.y)); const float x2 = std::min(x2_max, static_cast(compareBox.x + compareBox.width)); const float y2 = std::min(y2_max, static_cast(compareBox.y + compareBox.height)); const float interWidth = x2 - x1; const float interHeight = y2 - y1; if (interWidth <= 0 || interHeight <= 0) { continue; } const float intersection = interWidth * interHeight; const float unionArea = area_current + areas[compareIdx] - intersection; const float iou = (unionArea > 0.0f) ? (intersection / unionArea) : 0.0f; if (iou > nmsThreshold) { suppressed[compareIdx] = true; } } } } // End of YOLO V8 // Utility functions void YOLOOD::getBestClassInfo(std::vector::iterator it, const int& numClasses, float& bestConf, int& bestClassId) { try { // first 5 element are box and obj confidence bestClassId = 5; bestConf = 0; for (int i = 5; i < numClasses + 5; i++) { if (it[i] > bestConf) { bestConf = it[i]; bestClassId = i - 5; } } } catch (std::exception& e) { this->_logger.LogFatal("YOLOOD::getBestClassInfo", e.what(), __FILE__, __LINE__); } } size_t YOLOOD::vectorProduct(const std::vector& vector) { try { if (vector.empty()) return 0; size_t product = 1; for (const auto& element : vector) product *= element; return product; } catch (std::exception& e) { this->_logger.LogFatal("YOLOOD::vectorProduct", e.what(), __FILE__, __LINE__); return 0; } } void YOLOOD::letterbox(const cv::Mat& image, cv::Mat& outImage, const cv::Size& newShape, const cv::Scalar& color, bool auto_, bool scaleFill, bool scaleUp, int stride) { try { cv::Size shape = image.size(); float r = std::min((float)newShape.height / (float)shape.height, (float)newShape.width / (float)shape.width); if (!scaleUp) r = std::min(r, 1.0f); int newUnpad[2]{ (int)std::round((float)shape.width * r), (int)std::round((float)shape.height * r) }; auto dw = (float)(newShape.width - newUnpad[0]); auto dh = (float)(newShape.height - newUnpad[1]); if (auto_) { dw = (float)((int)dw % stride); dh = (float)((int)dh % stride); } else if (scaleFill) { dw = 0.0f; dh = 0.0f; newUnpad[0] = newShape.width; newUnpad[1] = newShape.height; } dw /= 2.0f; dh /= 2.0f; // Fix: Use OR instead of AND, and handle the else case if (shape.width != newUnpad[0] || shape.height != newUnpad[1]) { cv::resize(image, outImage, cv::Size(newUnpad[0], newUnpad[1])); } else { outImage = image.clone(); } // Fix: Better padding calculation int top = (int)dh; int bottom = newShape.height - newUnpad[1] - top; int left = (int)dw; int right = newShape.width - newUnpad[0] - left; cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color); } catch (std::exception& e) { this->_logger.LogFatal("YOLOOD::letterbox", e.what(), __FILE__, __LINE__); } } void YOLOOD::scaleCoords(const cv::Size& imageShape, cv::Rect& coords, const cv::Size& imageOriginalShape) { try { float gain = std::min((float)imageShape.height / (float)imageOriginalShape.height, (float)imageShape.width / (float)imageOriginalShape.width); int pad[2] = { (int)(((float)imageShape.width - (float)imageOriginalShape.width * gain) / 2.0f), (int)(((float)imageShape.height - (float)imageOriginalShape.height * gain) / 2.0f) }; coords.x = (int)std::round(((float)(coords.x - pad[0]) / gain)); coords.y = (int)std::round(((float)(coords.y - pad[1]) / gain)); coords.width = (int)std::round(((float)coords.width / gain)); coords.height = (int)std::round(((float)coords.height / gain)); } catch (std::exception& e) { this->_logger.LogFatal("YOLOOD::scaleCoords", e.what(), __FILE__, __LINE__); } } bool YOLOOD::loadYoloModel(const std::string& modelPath, const bool& isGPU, const cv::Size& inputSize) { try { const auto& ep = ANSCENTER::EPLoader::Current(); if (Ort::Global::api_ == nullptr) Ort::InitApi(static_cast(EPLoader::GetOrtApiRaw())); std::cout << "[YOLOOD] EP ready: " << ANSCENTER::EPLoader::EngineTypeName(ep.type) << std::endl; env = Ort::Env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, "ONNX_DETECTION"); sessionOptions = Ort::SessionOptions(); sessionOptions.SetIntraOpNumThreads( std::min(6, static_cast(std::thread::hardware_concurrency()))); sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); // ── Log available providers ───────────────────────────────────────── std::vector availableProviders = Ort::GetAvailableProviders(); std::cout << "Available Execution Providers:" << std::endl; for (const auto& p : availableProviders) std::cout << " - " << p << std::endl; // ── Attach EP based on runtime-detected hardware ──────────────────── // No #ifdef, no isGPU flag — EPLoader owns the decision if (isGPU) { bool attached = false; switch (ep.type) { case ANSCENTER::EngineType::NVIDIA_GPU: { auto cuda_it = std::find(availableProviders.begin(), availableProviders.end(), "CUDAExecutionProvider"); if (cuda_it == availableProviders.end()) { this->_logger.LogError("YOLOOD::loadYoloModel. CUDA EP failed:", "CUDAExecutionProvider not in DLL — " "check ep/cuda/ has the CUDA ORT build.", __FILE__, __LINE__); break; } try { OrtCUDAProviderOptionsV2* cuda_options = nullptr; Ort::GetApi().CreateCUDAProviderOptions(&cuda_options); const char* keys[] = { "device_id" }; const char* values[] = { "0" }; Ort::GetApi().UpdateCUDAProviderOptions(cuda_options, keys, values, 1); sessionOptions.AppendExecutionProvider_CUDA_V2(*cuda_options); Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options); std::cout << "[YOLOOD] CUDA EP attached." << std::endl; attached = true; } catch (const Ort::Exception& e) { this->_logger.LogError("YOLOOD::loadYoloModel. CUDA EP failed:", e.what(), __FILE__, __LINE__); } break; } case ANSCENTER::EngineType::AMD_GPU: { auto it = std::find(availableProviders.begin(), availableProviders.end(), "DmlExecutionProvider"); if (it == availableProviders.end()) { this->_logger.LogError("YOLOOD::loadYoloModel. DirectML EP failed:", "DmlExecutionProvider not in DLL — " "check ep/directml/ has the DirectML ORT build.", __FILE__, __LINE__); break; } try { std::unordered_map opts = { { "device_id", "0" } }; sessionOptions.AppendExecutionProvider("DML", opts); std::cout << "[YOLOOD] DirectML EP attached." << std::endl; attached = true; } catch (const Ort::Exception& e) { this->_logger.LogError("YOLOOD::loadYoloModel. DirectML EP failed:", e.what(), __FILE__, __LINE__); } break; } case ANSCENTER::EngineType::OPENVINO_GPU: { auto it = std::find(availableProviders.begin(), availableProviders.end(), "OpenVINOExecutionProvider"); if (it == availableProviders.end()) { this->_logger.LogError("YOLOOD::loadYoloModel", "OpenVINOExecutionProvider not in DLL — " "check ep/openvino/ has the OpenVINO ORT build.", __FILE__, __LINE__); break; } // Try device configs in priority order const std::string precision = "FP16"; const std::string numberOfThreads = "8"; const std::string numberOfStreams = "8"; std::vector> try_configs = { { {"device_type","AUTO:NPU,GPU"}, {"precision",precision}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }, { {"device_type","GPU.0"}, {"precision",precision}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }, { {"device_type","GPU.1"}, {"precision",precision}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }, { {"device_type","AUTO:GPU,CPU"}, {"precision",precision}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} } }; for (const auto& config : try_configs) { try { sessionOptions.AppendExecutionProvider_OpenVINO_V2(config); std::cout << "[YOLOOD] OpenVINO EP attached (" << config.at("device_type") << ")." << std::endl; attached = true; break; } catch (const Ort::Exception& e) { // try next config this->_logger.LogError("YOLOOD::loadYoloModel", e.what(), __FILE__, __LINE__); } } if (!attached) std::cerr << "[YOLOOD] OpenVINO EP: all device configs failed." << std::endl; break; } default: break; } if (!attached) { std::cerr << "[YOLOOD] No GPU EP attached — running on CPU." << std::endl; this->_logger.LogFatal("YOLOOD::loadYoloModel","GPU EP not attached. Running on CPU.", __FILE__, __LINE__); } } else { std::cout << "[YOLOOD] Inference device: CPU (isGPU=false)" << std::endl; } // ── Load model ────────────────────────────────────────────────────── std::wstring w_modelPath = String2WString(modelPath.c_str()); session = Ort::Session(env, w_modelPath.c_str(), sessionOptions); Ort::AllocatorWithDefaultOptions allocator; // ── Input shape ───────────────────────────────────────────────────── Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0); std::vector inputTensorShape = inputTypeInfo.GetTensorTypeAndShapeInfo().GetShape(); this->isDynamicInputShape = (inputTensorShape.size() >= 4 && inputTensorShape[2] == -1 && inputTensorShape[3] == -1); if (this->isDynamicInputShape) std::cout << "Dynamic input shape detected." << std::endl; for (auto shape : inputTensorShape) std::cout << "Input shape: " << shape << std::endl; // ── Node names ────────────────────────────────────────────────────── size_t inputNodesNum = session.GetInputCount(); for (size_t i = 0; i < inputNodesNum; i++) { Ort::AllocatedStringPtr name = session.GetInputNameAllocated(i, allocator); char* buf = new char[50]; strcpy(buf, name.get()); inputNodeNames.push_back(buf); } size_t outputNodesNum = session.GetOutputCount(); for (size_t i = 0; i < outputNodesNum; i++) { Ort::AllocatedStringPtr name = session.GetOutputNameAllocated(i, allocator); char* buf = new char[50]; strcpy(buf, name.get()); outputNodeNames.push_back(buf); } this->inputImageShape = cv::Size2f(inputSize); std::cout << "Input image shape: " << inputImageShape << std::endl; return true; } catch (const std::exception& e) { this->_logger.LogFatal("YOLOOD::loadYoloModel", e.what(), __FILE__, __LINE__); return false; } } void YOLOOD::preprocessing(const cv::Mat& image,std::vector& blob,std::vector& inputTensorShape) { m_imgWidth = image.cols; m_imgHeight = image.rows; try { // Convert grayscale to BGR if needed cv::Mat processedImage; if (image.channels() == 1) { cv::cvtColor(image, processedImage, cv::COLOR_GRAY2BGR); } else { processedImage = image; // Shallow copy - safe } // Convert BGR to RGB cv::Mat rgbImage; cv::cvtColor(processedImage, rgbImage, cv::COLOR_BGR2RGB); // Resize with letterbox (SEPARATE output buffer!) cv::Mat resizedImage; letterbox(rgbImage, resizedImage, inputImageShape, cv::Scalar(114, 114, 114), isDynamicInputShape, false, true, 32); if (resizedImage.empty()) { _logger.LogError("YOLOOD::preprocessing", "Letterbox operation failed", __FILE__, __LINE__); return; } // Update tensor shape inputTensorShape = { 1, 3, resizedImage.rows, resizedImage.cols }; // Normalize to [0, 1] (FIX: Use 1.0f for float division) cv::Mat floatImage; resizedImage.convertTo(floatImage, CV_32FC3, 1.0f / 255.0f); // Convert HWC to CHW const size_t channelSize = floatImage.rows * floatImage.cols; blob.resize(channelSize * 3); std::vector channels(3); cv::split(floatImage, channels); for (int c = 0; c < 3; ++c) { std::memcpy(blob.data() + c * channelSize, channels[c].data, channelSize * sizeof(float)); } } catch (const std::exception& e) { _logger.LogFatal("YOLOOD::preprocessing", e.what(), __FILE__, __LINE__); } } cv::Mat YOLOOD::preprocessv11(const cv::Mat& image,std::vector& blob,std::vector& inputTensorShape) { try { // Validation if (image.empty()) { _logger.LogError("YOLOOD::preprocessv11", "Empty image provided", __FILE__, __LINE__); return cv::Mat(); } // Convert grayscale to BGR if needed cv::Mat processedImage; if (image.channels() == 1) { cv::cvtColor(image, processedImage, cv::COLOR_GRAY2BGR); } else { processedImage = image; // Shallow copy - safe! } // Resize with letterbox cv::Mat resizedImage; letterbox(processedImage, resizedImage, inputImageShape, cv::Scalar(114, 114, 114), isDynamicInputShape, false, true, 32); if (resizedImage.empty()) { _logger.LogError("YOLOOD::preprocessv11", "Letterbox operation failed", __FILE__, __LINE__); return cv::Mat(); } // Convert BGR to RGB cv::Mat rgbImage; cv::cvtColor(resizedImage, rgbImage, cv::COLOR_BGR2RGB); // Normalize to [0, 1] cv::Mat floatImage; rgbImage.convertTo(floatImage, CV_32FC3, 1.0f / 255.0f); // Update input tensor shape const int height = floatImage.rows; const int width = floatImage.cols; const int channels = floatImage.channels(); inputTensorShape = { 1, channels, height, width }; // Allocate blob const size_t totalSize = height * width * channels; blob.resize(totalSize); // Convert HWC to CHW efficiently std::vector rgbChannels(channels); cv::split(floatImage, rgbChannels); // Copy channels to blob in CHW order const size_t channelSize = height * width; for (int c = 0; c < channels; ++c) { std::memcpy(blob.data() + c * channelSize, rgbChannels[c].data, channelSize * sizeof(float)); } return floatImage; // Return for visualization if needed } catch (const std::exception& e) { _logger.LogFatal("YOLOOD::preprocessv11", e.what(), __FILE__, __LINE__); return cv::Mat(); } } std::vector YOLOOD::postprocessing(const cv::Size& resizedImageShape,const cv::Size& originalImageShape,std::vector& outputTensors,const float& confThreshold,const float& iouThreshold) { try { // Get raw output pointer (NO COPY!) const float* rawOutput = outputTensors[0].GetTensorData(); std::vector outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); const int numClasses = static_cast(outputShape[2]) - 5; const int numDetections = static_cast(outputShape[1]); const int stride = static_cast(outputShape[2]); // Pre-allocate (assume ~10% detections pass threshold) const size_t estimatedDetections = std::min(numDetections / 10, 1000); std::vector boxes; std::vector confs; std::vector classIds; boxes.reserve(estimatedDetections); confs.reserve(estimatedDetections); classIds.reserve(estimatedDetections); // Parse detections using raw pointer (NO COPY!) for (int i = 0; i < numDetections; ++i) { const float* detection = rawOutput + i * stride; const float objectness = detection[4]; if (objectness <= confThreshold) { continue; } // Get best class float maxClassScore = 0.0f; int bestClassId = 0; for (int c = 0; c < numClasses; ++c) { const float classScore = detection[5 + c]; if (classScore > maxClassScore) { maxClassScore = classScore; bestClassId = c; } } const float confidence = objectness * maxClassScore; if (confidence <= confThreshold) { continue; } // Parse bounding box (center format -> corner format) const int centerX = static_cast(detection[0]); const int centerY = static_cast(detection[1]); const int width = static_cast(detection[2]); const int height = static_cast(detection[3]); const int left = centerX - width / 2; const int top = centerY - height / 2; boxes.emplace_back(left, top, width, height); confs.push_back(confidence); classIds.push_back(bestClassId); } // NMS std::vector indices; cv::dnn::NMSBoxes(boxes, confs, confThreshold, iouThreshold, indices); // Build final detections std::vector detections; detections.reserve(indices.size()); const int numClasses_safe = static_cast(_classes.size()); for (int idx : indices) { Object det; // Scale coordinates det.box = boxes[idx]; scaleCoords(resizedImageShape, det.box, originalImageShape); det.confidence = confs[idx]; det.classId = classIds[idx]; // Set class name safely if (!_classes.empty()) { det.className = (det.classId < numClasses_safe) ? _classes[det.classId] : _classes[numClasses_safe - 1]; } else { det.className = "Unknown"; } detections.push_back(std::move(det)); } return detections; } catch (const std::exception& e) { _logger.LogFatal("YOLOOD::postprocessing", e.what(), __FILE__, __LINE__); return {}; } } BoundingBox YOLOOD::scaleCoordsv11(const cv::Size& imageShape,BoundingBox coords,const cv::Size& imageOriginalShape,bool p_Clip) { // Calculate scale factor const float gain = std::min( static_cast(imageShape.width) / imageOriginalShape.width, static_cast(imageShape.height) / imageOriginalShape.height ); const float invGain = 1.0f / gain; // Calculate padding (once per image, but recalculated here) const float padX = (imageShape.width - imageOriginalShape.width * gain) * 0.5f; const float padY = (imageShape.height - imageOriginalShape.height * gain) * 0.5f; // Scale coordinates BoundingBox result; result.x = static_cast((coords.x - padX) * invGain + 0.5f); result.y = static_cast((coords.y - padY) * invGain + 0.5f); result.width = static_cast(coords.width * invGain + 0.5f); result.height = static_cast(coords.height * invGain + 0.5f); // Clamp to image bounds if requested if (p_Clip) { result.x = clamp(result.x, 0, imageOriginalShape.width); result.y = clamp(result.y, 0, imageOriginalShape.height); result.width = clamp(result.width, 0, imageOriginalShape.width - result.x); result.height = clamp(result.height, 0, imageOriginalShape.height - result.y); } return result; } std::vector YOLOOD::postprocessv11(const cv::Size& originalImageSize,const cv::Size& resizedImageShape,const std::vector& outputTensors,float confThreshold,float iouThreshold) { try { // Get raw output const float* rawOutput = outputTensors[0].GetTensorData(); const std::vector outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); const size_t numFeatures = outputShape[1]; const size_t numDetections = outputShape[2]; const int numClasses = static_cast(numFeatures) - 4; // Early exit if (numDetections == 0 || numClasses <= 0) { return {}; } // Calculate scaling parameters ONCE const float gain = std::min( static_cast(resizedImageShape.width) / originalImageSize.width, static_cast(resizedImageShape.height) / originalImageSize.height ); const float invGain = 1.0f / gain; const float padX = (resizedImageShape.width - originalImageSize.width * gain) * 0.5f; const float padY = (resizedImageShape.height - originalImageSize.height * gain) * 0.5f; const int maxX = originalImageSize.width; const int maxY = originalImageSize.height; // Pre-allocate (estimate ~10% pass threshold) const size_t estimatedCount = std::min(numDetections / 10, size_t(1000)); std::vector boxes; std::vector nmsBoxes; std::vector confs; std::vector classIds; boxes.reserve(estimatedCount); nmsBoxes.reserve(estimatedCount); confs.reserve(estimatedCount); classIds.reserve(estimatedCount); // Parse detections for (size_t d = 0; d < numDetections; ++d) { // Extract coordinates const float centerX = rawOutput[0 * numDetections + d]; const float centerY = rawOutput[1 * numDetections + d]; const float width = rawOutput[2 * numDetections + d]; const float height = rawOutput[3 * numDetections + d]; // Find best class int classId = 0; float maxScore = rawOutput[4 * numDetections + d]; for (int c = 1; c < numClasses; ++c) { const float score = rawOutput[(4 + c) * numDetections + d]; if (score > maxScore) { maxScore = score; classId = c; } } // Threshold check if (maxScore <= confThreshold) { continue; } // Convert to corner format and scale inline const float left = (centerX - width * 0.5f - padX) * invGain; const float top = (centerY - height * 0.5f - padY) * invGain; const float scaledWidth = width * invGain; const float scaledHeight = height * invGain; // Round and clamp to original image bounds const int x = clamp(static_cast(left + 0.5f), 0, maxX); const int y = clamp(static_cast(top + 0.5f), 0, maxY); const int w = clamp(static_cast(scaledWidth + 0.5f), 0, maxX - x); const int h = clamp(static_cast(scaledHeight + 0.5f), 0, maxY - y); // Store scaled box boxes.emplace_back(x, y, w, h); // Create NMS box with class offset (class-specific NMS) const int nmsOffset = classId * 7680; nmsBoxes.emplace_back(x + nmsOffset, y + nmsOffset, w, h); confs.push_back(maxScore); classIds.push_back(classId); } // Apply class-specific NMS std::vector indices; cv::dnn::NMSBoxes(nmsBoxes, confs, confThreshold, iouThreshold, indices); // Build final detections std::vector detections; detections.reserve(indices.size()); const int numClassNames = static_cast(_classes.size()); for (int idx : indices) { Object det; // Use original scaled box (not NMS box) det.box.x = boxes[idx].x; det.box.y = boxes[idx].y; det.box.width = boxes[idx].width; det.box.height = boxes[idx].height; det.confidence = confs[idx]; det.classId = classIds[idx]; // Set class name if (!_classes.empty() && det.classId < numClassNames) { det.className = _classes[det.classId]; } else { det.className = _classes.empty() ? "Unknown" : _classes[numClassNames - 1]; } // Generate normalized polygon det.polygon = ANSUtilityHelper::RectToNormalizedPolygon( det.box, m_imgWidth, m_imgHeight ); detections.emplace_back(std::move(det)); } return detections; } catch (const std::exception& e) { _logger.LogFatal("YOLOOD::postprocessv11", e.what(), __FILE__, __LINE__); return {}; } } std::vector YOLOOD::detect(cv::Mat& image, const float& confThreshold, const float& iouThreshold) { if (image.empty()) { this->_logger.LogFatal("YOLOOD::detect", "Error: Empty image provided to detector", __FILE__, __LINE__); return {}; } try { // Step 1: Preprocessing - reuse member buffer to avoid repeated allocations std::vector inputTensorShape; inputTensorShape.reserve(4); this->_blob.clear(); // Clear but keep capacity this->preprocessing(image, this->_blob, inputTensorShape); // Step 2: Validate input tensor shape if (inputTensorShape.size() != 4 || inputTensorShape[2] <= 0 || inputTensorShape[3] <= 0) { this->_logger.LogFatal("YOLOOD::detect", "Invalid input tensor shape!", __FILE__, __LINE__); return {}; } const size_t inputTensorSize = vectorProduct(inputTensorShape); if (this->_blob.size() != inputTensorSize) { this->_logger.LogFatal("YOLOOD::detect", "Mismatch in input tensor size!", __FILE__, __LINE__); return {}; } // Step 3: Create ONNX Tensor - static MemoryInfo avoids repeated creation static const Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu( OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault ); std::array inputTensors = { Ort::Value::CreateTensor( memoryInfo, this->_blob.data(), this->_blob.size(), inputTensorShape.data(), inputTensorShape.size() ) }; // Step 4: Run ONNX Inference std::vector outputTensors = this->session.Run( Ort::RunOptions{ nullptr }, inputNodeNames.data(), inputTensors.data(), inputNodeNames.size(), outputNodeNames.data(), outputNodeNames.size() ); // Step 5: Postprocessing - return directly for RVO const cv::Size resizedShape( static_cast(inputTensorShape[3]), static_cast(inputTensorShape[2]) ); return this->postprocessing(resizedShape, image.size(), outputTensors, confThreshold, iouThreshold); } catch (const std::exception& e) { this->_logger.LogFatal("YOLOOD::detect", e.what(), __FILE__, __LINE__); return {}; } } std::vector YOLOOD::detectv11(const cv::Mat& image, float confThreshold, float iouThreshold) { if (image.empty()) { this->_logger.LogFatal("YOLOOD::detectv11", "Error: Empty image provided to detector", __FILE__, __LINE__); return {}; } try { // Define input tensor shape (batch, channels, height, width) std::vector inputTensorShape = { 1, 3, inputImageShape.height, inputImageShape.width }; // Preprocess using reusable member buffer this->_blob.clear(); cv::Mat preprocessedImage = preprocessv11(image, this->_blob, inputTensorShape); // Validate blob size const size_t inputTensorSize = vectorProduct(inputTensorShape); if (this->_blob.size() != inputTensorSize) { this->_logger.LogFatal("YOLOOD::detectv11", "Error: Blob size mismatch with expected input tensor size", __FILE__, __LINE__); return {}; } // Static memory info - created once, reused across calls static const Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu( OrtArenaAllocator, OrtMemTypeDefault ); // Create input tensor Ort::Value inputTensor = Ort::Value::CreateTensor( memoryInfo, this->_blob.data(), inputTensorSize, inputTensorShape.data(), inputTensorShape.size() ); // Run inference std::vector outputTensors = this->session.Run( Ort::RunOptions{ nullptr }, inputNodeNames.data(), &inputTensor, inputNodeNames.size(), outputNodeNames.data(), outputNodeNames.size() ); // Postprocess and return directly for RVO const cv::Size resizedImageShape( static_cast(inputTensorShape[3]), static_cast(inputTensorShape[2]) ); return postprocessv11(image.size(), resizedImageShape, outputTensors, confThreshold, iouThreshold); } catch (const std::exception& e) { this->_logger.LogFatal("YOLOOD::detectv11", e.what(), __FILE__, __LINE__); return {}; } } // YOLOOD bool YOLOOD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) { std::lock_guard lock(_mutex); try { if (!ANSODBase::OptimizeModel(fp16, optimizedModelFolder)) { return false; } if (FileExist(_modelFilePath)) { optimizedModelFolder = GetParentFolder(_modelFilePath); this->_logger.LogDebug("YOLOOD::OptimizeModel", "This model is optimized. No need other optimization.", __FILE__, __LINE__); return true; } else { optimizedModelFolder = ""; this->_logger.LogFatal("YOLOOD::OptimizeModel", "This model is not exist. Please check the model path again.", __FILE__, __LINE__); return false; } } catch (std::exception& e) { this->_logger.LogFatal("YOLOOD::OptimizeModel", e.what(), __FILE__, __LINE__); return false; } } bool YOLOOD::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) { std::lock_guard lock(_mutex); try { bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword); if (!result) return false; _modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION; _modelConfig.inpHeight = 640; _modelConfig.inpWidth = 640; if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; _letterBoxForSquare = true; // 0. Check if the configuration file exist if (FileExist(_modelConfigFile)) { ModelType modelType; std::vector inputShape; _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape); if (inputShape.size() == 2) { if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0]; if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1]; } _modelConfig.modelType = modelType; if (_modelConfig.modelType == ModelType::YOLOV4) { _isDarkNet = true; _modelFilePath = CreateFilePath(_modelFolder, "train_last.weights"); this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV4 weight", _modelFilePath, __FILE__, __LINE__); } else if (_modelConfig.modelType == ModelType::YOLOV5) { _isDarkNet = false; _modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx"); this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV5 weight", _modelFilePath, __FILE__, __LINE__); } else if (_modelConfig.modelType == ModelType::YOLOV8) { _isDarkNet = false; _modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx"); this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV8 weight", _modelFilePath, __FILE__, __LINE__); } else { this->_logger.LogError("YOLOOD::Initialize. Model Type is not valid. Please check the model config file", _modelConfigFile, __FILE__, __LINE__); return false; } if (!FileExist(_modelFilePath)) { this->_logger.LogError("YOLOOD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__); return false; } } else {// This is old version of model zip file std::string weightfile = CreateFilePath(_modelFolder, "train_last.weights"); if (FileExist(weightfile)) { // This is the darknet Yolov4 _modelFilePath = weightfile; _classFilePath = CreateFilePath(_modelFolder, "classes.names"); this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV4 weight", _modelFilePath, __FILE__, __LINE__); _modelConfig.modelType = ModelType::YOLOV4; _isDarkNet = true; _modelConfig.inpHeight = 416; _modelConfig.inpWidth = 416; } else { _isDarkNet = false; std::string onnxfile = CreateFilePath(_modelFolder, "train_last.onnx"); if (std::filesystem::exists(onnxfile)) { _modelFilePath = onnxfile; _classFilePath = CreateFilePath(_modelFolder, "classes.names"); this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV8 weight", _modelFilePath, __FILE__, __LINE__); } else { this->_logger.LogError("YOLOOD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__); return false; } } //std::ifstream isValidFileName(_classFilePath); if (FileExist(_classFilePath)) { this->_logger.LogDebug("YOLOOD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__); LoadClassesFromFile(); } else { this->_logger.LogDebug("YOLOOD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__); LoadClassesFromString(); } } _isInitialized = true; return true; } catch (std::exception& e) { this->_logger.LogFatal("YOLOOD::LoadModel", e.what(), __FILE__, __LINE__); return false; } } bool YOLOOD::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) { std::lock_guard lock(_mutex); try { bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig, modelName, className, modelFolder, labelMap); if (!result) return false; std::string _modelName = modelName; if (_modelName.empty()) { _modelName = "train_last"; } std::string weightFileName = _modelName + ".weights"; _modelConfig = modelConfig; _modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION; _modelConfig.inpHeight = 640; _modelConfig.inpWidth = 640; if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; _letterBoxForSquare = true; // 0. Check if the configuration file exist if (FileExist(_modelConfigFile)) { ModelType modelType; std::vector inputShape; _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape); if (inputShape.size() == 2) { if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0]; if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1]; } _modelConfig.modelType = modelType; if (_modelConfig.modelType == ModelType::YOLOV4) { _isDarkNet = true; _modelFilePath = CreateFilePath(_modelFolder, weightFileName); this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV4 weight", _modelFilePath, __FILE__, __LINE__); } else if (_modelConfig.modelType == ModelType::YOLOV5) { _isDarkNet = false; weightFileName = _modelName + ".onnx"; _modelFilePath = CreateFilePath(_modelFolder, weightFileName); this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV5 weight", _modelFilePath, __FILE__, __LINE__); } else if (_modelConfig.modelType == ModelType::YOLOV8) { _isDarkNet = false; weightFileName = _modelName + ".onnx"; _modelFilePath = CreateFilePath(_modelFolder, weightFileName); this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV8 weight", _modelFilePath, __FILE__, __LINE__); } else { this->_logger.LogError("YOLOOD::Initialize. Model Type is not valid. Please check the model config file", _modelConfigFile, __FILE__, __LINE__); return false; } if (!FileExist(_modelFilePath)) { this->_logger.LogError("YOLOOD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__); return false; } } else {// This is old version of model zip file weightFileName = _modelName + ".weights"; std::string weightfile = CreateFilePath(_modelFolder, weightFileName); if (FileExist(weightfile)) { // This is the darknet Yolov4 _modelFilePath = weightfile; _classFilePath = CreateFilePath(_modelFolder, className); this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV4 weight", _modelFilePath, __FILE__, __LINE__); _modelConfig.modelType = ModelType::YOLOV4; _isDarkNet = true; _modelConfig.inpHeight = 416; _modelConfig.inpWidth = 416; } else { _isDarkNet = false; weightFileName = _modelName + ".onnx"; std::string onnxfile = CreateFilePath(_modelFolder, weightFileName); if (std::filesystem::exists(onnxfile)) { // This is the yovoV5 or yolov8 format _modelFilePath = onnxfile; _classFilePath = CreateFilePath(_modelFolder, className); this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV8 weight", _modelFilePath, __FILE__, __LINE__); } else { this->_logger.LogError("YOLOOD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__); return false; } } std::ifstream isValidFileName(_classFilePath); if (!isValidFileName) { this->_logger.LogDebug("YOLOOD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__); LoadClassesFromString(); } else { this->_logger.LogDebug("YOLOOD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__); LoadClassesFromFile(); } } // 1. Load labelMap and engine labelMap.clear(); if (!_classes.empty()) labelMap = VectorToCommaSeparatedString(_classes); if (_isDarkNet) { LoadDarknetNetwork(); } else { #ifdef USEOPENCVDNN if (_modelConfig.modelType == ModelType::YOLOV8) LoadOnnxNetwork(); else loadYoloModel(_modelFilePath, true, cv::Size(_modelConfig.inpWidth, _modelConfig.inpHeight));//Assume that GPU is available #else loadYoloModel(_modelFilePath, true, cv::Size(_modelConfig.inpWidth, _modelConfig.inpHeight));//Assume that GPU is available #endif } _isInitialized = true; return true; } catch (std::exception& e) { this->_logger.LogFatal("YOLOOD::LoadModel", e.what(), __FILE__, __LINE__); return false; } } bool YOLOOD::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) { std::lock_guard lock(_mutex); try { bool result = ANSODBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap); if (!result) return false; // Parsing for YOLO only here _modelConfig = modelConfig; _modelConfig.detectionType = ANSCENTER::DetectionType::DETECTION; _modelConfig.inpHeight = 640; _modelConfig.inpWidth = 640; if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; _letterBoxForSquare = true; // 0. Check if the configuration file exist if (FileExist(_modelConfigFile)) { ModelType modelType; std::vector inputShape; _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape); if (inputShape.size() == 2) { if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0]; if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1]; } _modelConfig.modelType = modelType; if (_modelConfig.modelType == ModelType::YOLOV4) { _isDarkNet = true; _modelFilePath = CreateFilePath(_modelFolder, "train_last.weights"); this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV4 weight", _modelFilePath, __FILE__, __LINE__); } else if (_modelConfig.modelType == ModelType::YOLOV5) { _isDarkNet = false; _modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx"); this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV5 weight", _modelFilePath, __FILE__, __LINE__); } else if (_modelConfig.modelType == ModelType::YOLOV8) { _isDarkNet = false; _modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx"); this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV8 weight", _modelFilePath, __FILE__, __LINE__); } else { this->_logger.LogError("YOLOOD::Initialize. Model Type is not valid. Please check the model config file", _modelConfigFile, __FILE__, __LINE__); return false; } if (!FileExist(_modelFilePath)) { this->_logger.LogError("YOLOOD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__); return false; } } else {// This is old version of model zip file std::string weightfile = CreateFilePath(_modelFolder, "train_last.weights"); if (FileExist(weightfile)) { // This is the darknet Yolov4 _modelFilePath = weightfile; _classFilePath = CreateFilePath(_modelFolder, "classes.names"); this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV4 weight", _modelFilePath, __FILE__, __LINE__); _modelConfig.modelType = ModelType::YOLOV4; _isDarkNet = true; _modelConfig.inpHeight = 416; _modelConfig.inpWidth = 416; } else { _isDarkNet = false; std::string onnxfile = CreateFilePath(_modelFolder, "train_last.onnx"); if (std::filesystem::exists(onnxfile)) { _modelFilePath = onnxfile; _classFilePath = CreateFilePath(_modelFolder, "classes.names"); this->_logger.LogDebug("YOLOOD::Initialize. Loading YoloV8/Yolov5 weight", _modelFilePath, __FILE__, __LINE__); } else { this->_logger.LogError("YOLOOD::Initialize. Model file is not exist", _modelFilePath, __FILE__, __LINE__); return false; } } if (FileExist(_classFilePath)) { this->_logger.LogDebug("YOLOOD::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__); LoadClassesFromFile(); } else { this->_logger.LogDebug("YOLOOD::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__); LoadClassesFromString(); } } // 1. Load labelMap and engine labelMap.clear(); if (!_classes.empty()) labelMap = VectorToCommaSeparatedString(_classes); if (_isDarkNet) { LoadDarknetNetwork(); } else { #ifdef USEOPENCVDNN if (_modelConfig.modelType == ModelType::YOLOV8) LoadOnnxNetwork(); else loadYoloModel(_modelFilePath, true, cv::Size(_modelConfig.inpWidth, _modelConfig.inpHeight));//Assume that GPU is available #else loadYoloModel(_modelFilePath, true, cv::Size(_modelConfig.inpWidth, _modelConfig.inpHeight));//Assume that GPU is available #endif } _isInitialized = true; return true; } catch (std::exception& e) { this->_logger.LogFatal("YOLOOD::Initialize", e.what(), __FILE__, __LINE__); return false; } } std::vector YOLOOD::RunInference(const cv::Mat& input) { return RunInference(input, "CustomCam"); } std::vector YOLOOD::RunInference(const cv::Mat& input, const std::string& camera_id) { std::lock_guard lock(_mutex); // Early validation - before try block if (!_licenseValid) { this->_logger.LogError("YOLOOD::RunInference", "Invalid License", __FILE__, __LINE__); return {}; } if (!_isInitialized) { this->_logger.LogError("YOLOOD::RunInference", "Model is not initialized", __FILE__, __LINE__); return {}; } if (input.empty() || input.cols < 10 || input.rows < 10) { return {}; } try { // --- NV12 fast path: try to get full-res BGR from GPU NV12 frame --- cv::Mat inferenceImage = input; float bgrScaleX = 1.0f, bgrScaleY = 1.0f; { auto* gpuData = tl_currentGpuFrame(); if (gpuData && gpuData->width > 0 && gpuData->height > 0) { if (gpuData->cpuYPlane && gpuData->cpuUvPlane && gpuData->cpuYLinesize >= gpuData->width && gpuData->cpuUvLinesize >= gpuData->width) { const int fw = gpuData->width; const int fh = gpuData->height; // NV12 requires even dimensions if ((fw % 2) == 0 && (fh % 2) == 0) { try { cv::Mat yPlane(fh, fw, CV_8UC1, gpuData->cpuYPlane, static_cast(gpuData->cpuYLinesize)); cv::Mat uvPlane(fh / 2, fw / 2, CV_8UC2, gpuData->cpuUvPlane, static_cast(gpuData->cpuUvLinesize)); cv::Mat fullResBGR; cv::cvtColorTwoPlane(yPlane, uvPlane, fullResBGR, cv::COLOR_YUV2BGR_NV12); if (!fullResBGR.empty()) { bgrScaleX = static_cast(input.cols) / fullResBGR.cols; bgrScaleY = static_cast(input.rows) / fullResBGR.rows; inferenceImage = fullResBGR; } } catch (...) { /* NV12 conversion failed — fall back to input */ } } } } } std::vector ret; if (_isDarkNet) { ret = RunInferenceFromDarkNet(inferenceImage, camera_id); } else if (_modelConfig.modelType == ModelType::YOLOV5) { ret = RunInferenceFromYoloV5(inferenceImage, camera_id); } else { #ifdef USEOPENCVDNN ret = RunInferenceFromONNX(inferenceImage, camera_id); #else ret = RunInferenceFromYoloV8(inferenceImage, camera_id); #endif } // --- Rescale coordinates from full-res back to display-res --- if (bgrScaleX != 1.0f || bgrScaleY != 1.0f) { for (auto& obj : ret) { obj.box.x = static_cast(obj.box.x * bgrScaleX); obj.box.y = static_cast(obj.box.y * bgrScaleY); obj.box.width = static_cast(obj.box.width * bgrScaleX); obj.box.height = static_cast(obj.box.height * bgrScaleY); for (size_t k = 0; k < obj.kps.size(); k += 2) { obj.kps[k] *= bgrScaleX; // x if (k + 1 < obj.kps.size()) obj.kps[k + 1] *= bgrScaleY; // y } for (auto& pt : obj.polygon) { pt.x *= bgrScaleX; pt.y *= bgrScaleY; } } } if (_trackerEnabled) { ret = ApplyTracking(ret, camera_id); if (_stabilizationEnabled) ret = StabilizeDetections(ret, camera_id); } return ret; } catch (const std::exception& e) { this->_logger.LogFatal("YOLOOD::RunInference", e.what(), __FILE__, __LINE__); return {}; } } YOLOOD::~YOLOOD() { try { this->_logger.LogDebug("YOLOOD::~YOLOOD()", "Release YOLOOD ", __FILE__, __LINE__); } catch (std::exception& e) { this->_logger.LogFatal("YOLOOD::~YOLOOD()", e.what(), __FILE__, __LINE__); } } bool YOLOOD::Destroy() { try { _net.reset(); return true; } catch (std::exception& e) { this->_logger.LogFatal("YOLOOD::Destroy()", e.what(), __FILE__, __LINE__); return false; } } std::vector YOLOOD::RunInferenceFromYoloV5(const cv::Mat& input, const std::string& camera_id) { // Early validation - no need for try block if (input.empty() || input.data == nullptr) { return {}; } if (input.cols < 10 || input.rows < 10) { return {}; } try { // Color conversion only if needed cv::Mat frame; if (input.channels() == 1) { cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR); } else { frame = input; // Shallow copy (shares data, no allocation) } return detect(frame, _modelConfig.detectionScoreThreshold, _modelConfig.modelMNSThreshold); } catch (const std::exception& e) { this->_logger.LogFatal("YOLOOD::RunInferenceFromYoloV5", e.what(), __FILE__, __LINE__); return {}; } } std::vector YOLOOD::RunInferenceFromYoloV8(const cv::Mat& input, const std::string& camera_id) { // Early validation - before try block if (input.empty() || input.data == nullptr) { return {}; } if (input.cols < 10 || input.rows < 10) { return {}; } try { // Color conversion using reusable buffer cv::Mat* modelInputPtr; if (input.channels() == 1) { cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR); modelInputPtr = &this->_frameBuffer; } else { modelInputPtr = const_cast(&input); } cv::Mat& modelInput = *modelInputPtr; // Model shape and letterbox const cv::Size2f modelShape(_modelConfig.inpWidth, _modelConfig.inpHeight); if (_letterBoxForSquare && modelShape.width == modelShape.height) { modelInput = ANSUtilityHelper::FormatToSquare(modelInput); } // Create blob cv::Mat blob; cv::dnn::blobFromImage(modelInput, blob, 1.0 / 255.0, modelShape, cv::Scalar(), true, false); // Create input tensor - static MemoryInfo static const Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); const std::array inputShape = { 1, blob.size[1], blob.size[2], blob.size[3] }; Ort::Value inputTensor = Ort::Value::CreateTensor( memoryInfo, reinterpret_cast(blob.data), blob.total(), inputShape.data(), inputShape.size() ); // Run inference auto outputTensors = session.Run( Ort::RunOptions{ nullptr }, inputNodeNames.data(), &inputTensor, inputNodeNames.size(), outputNodeNames.data(), outputNodeNames.size() ); // Parse output const float* rawOutput = outputTensors[0].GetTensorData(); const std::vector outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); const int dimensions = static_cast(outputShape[1]); // 4 + num_classes const int rows = static_cast(outputShape[2]); // 8400 const int numClasses = dimensions - 4; const int maxWidth = modelInput.cols; const int maxHeight = modelInput.rows; cv::Mat outputMat(dimensions, rows, CV_32F, const_cast(rawOutput)); cv::transpose(outputMat, outputMat); float* data = reinterpret_cast(outputMat.data); const float x_factor = static_cast(modelInput.cols) / modelShape.width; const float y_factor = static_cast(modelInput.rows) / modelShape.height; // Pre-allocate vectors std::vector class_ids; std::vector confidences; std::vector boxes; const size_t estimatedDetections = std::min(static_cast(rows / 10), size_t{ 100 }); class_ids.reserve(estimatedDetections); confidences.reserve(estimatedDetections); boxes.reserve(estimatedDetections); const float scoreThreshold = _modelConfig.detectionScoreThreshold; for (int i = 0; i < rows; ++i) { float* classes_scores = data + 4; cv::Mat scores(1, numClasses, CV_32FC1, classes_scores); cv::Point class_id; double maxClassScore; cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &class_id); if (maxClassScore >= scoreThreshold) { confidences.push_back(static_cast(maxClassScore)); class_ids.push_back(class_id.x); const float x = data[0]; const float y = data[1]; const float w = data[2]; const float h = data[3]; int left = static_cast((x - 0.5f * w) * x_factor); int top = static_cast((y - 0.5f * h) * y_factor); int width = static_cast(w * x_factor); int height = static_cast(h * y_factor); // Clamp to image bounds left = std::max(0, left); top = std::max(0, top); width = std::min(maxWidth - left - 5, width); height = std::min(maxHeight - top - 5, height); boxes.emplace_back(left, top, width, height); } data += dimensions; } // NMS std::vector nms_result; cv::dnn::NMSBoxes(boxes, confidences, _modelConfig.modelConfThreshold, _modelConfig.modelMNSThreshold, nms_result); // Build final detections std::vector detections; detections.reserve(nms_result.size()); const size_t classNameSize = _classes.size(); const int inputCols = input.cols; const int inputRows = input.rows; for (const int idx : nms_result) { if (confidences[idx] > scoreThreshold) { Object result; result.classId = class_ids[idx]; result.confidence = confidences[idx]; // Safe class name lookup if (classNameSize > 0) { const size_t safeIdx = static_cast(result.classId) < classNameSize ? static_cast(result.classId) : classNameSize - 1; result.className = _classes[safeIdx]; } else { result.className = "Unknown"; } result.box = boxes[idx]; result.polygon = ANSUtilityHelper::RectToNormalizedPolygon(result.box, inputCols, inputRows); result.cameraId = camera_id; detections.push_back(std::move(result)); } } return detections; } catch (const std::exception& e) { this->_logger.LogFatal("YOLOOD::RunInferenceFromYoloV8", e.what(), __FILE__, __LINE__); return {}; } } std::vector YOLOOD::RunInferenceFromYoloV11(const cv::Mat& input, [[maybe_unused]] const std::string& camera_id) { // Early validation - before try block if (input.empty() || input.data == nullptr) { this->_logger.LogError("YOLOOD::RunInferenceFromYoloV11", "Input image is empty or null", __FILE__, __LINE__); return {}; } if (input.cols < 10 || input.rows < 10) { this->_logger.LogError("YOLOOD::RunInferenceFromYoloV11", "Input image too small: " + std::to_string(input.cols) + "x" + std::to_string(input.rows), __FILE__, __LINE__); return {}; } const int channels = input.channels(); if (channels != 1 && channels != 3) { this->_logger.LogError("YOLOOD::RunInferenceFromYoloV11", "Unsupported channel count: " + std::to_string(channels), __FILE__, __LINE__); return {}; } try { // Color conversion if needed if (channels == 1) { cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR); return detectv11(this->_frameBuffer, _modelConfig.detectionScoreThreshold, _modelConfig.modelMNSThreshold); } return detectv11(input, _modelConfig.detectionScoreThreshold, _modelConfig.modelMNSThreshold); } catch (const cv::Exception& e) { this->_logger.LogFatal("YOLOOD::RunInferenceFromYoloV11", "OpenCV error: " + std::string(e.what()), __FILE__, __LINE__); } catch (const std::exception& e) { this->_logger.LogFatal("YOLOOD::RunInferenceFromYoloV11", "Standard error: " + std::string(e.what()), __FILE__, __LINE__); } return {}; } std::vector YOLOOD::RunInferenceFromONNX(const cv::Mat& input, const std::string& camera_id) { // Early validation - before try block if (input.empty() || input.data == nullptr) { return {}; } if (input.cols < 10 || input.rows < 10) { return {}; } try { // Color conversion using reusable buffer cv::Mat* modelInputPtr; if (input.channels() == 1) { cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR); modelInputPtr = &this->_frameBuffer; } else { modelInputPtr = const_cast(&input); // Safe: we don't modify it } cv::Mat& modelInput = *modelInputPtr; // Model shape const cv::Size2f modelShape(_modelConfig.inpWidth, _modelConfig.inpHeight); if (_letterBoxForSquare && modelShape.width == modelShape.height) { modelInput = ANSUtilityHelper::FormatToSquare(modelInput); } // Blob creation and inference cv::Mat blob; cv::dnn::blobFromImage(modelInput, blob, 1.0 / 255.0, modelShape, cv::Scalar(), true, false); _net->setInput(blob); std::vector outputs; _net->forward(outputs, _net->getUnconnectedOutLayersNames()); // Parse output dimensions const int rows = outputs[0].size[2]; const int dimensions = outputs[0].size[1]; const int maxWidth = modelInput.cols; const int maxHeight = modelInput.rows; outputs[0] = outputs[0].reshape(1, dimensions); cv::transpose(outputs[0], outputs[0]); float* data = reinterpret_cast(outputs[0].data); const float x_factor = static_cast(modelInput.cols) / modelShape.width; const float y_factor = static_cast(modelInput.rows) / modelShape.height; // Pre-allocate with estimated capacity std::vector class_ids; std::vector confidences; std::vector boxes; const size_t estimatedDetections = std::min(static_cast(rows / 10), size_t{ 100 }); class_ids.reserve(estimatedDetections); confidences.reserve(estimatedDetections); boxes.reserve(estimatedDetections); const size_t numClasses = _classes.size(); const float scoreThreshold = _modelConfig.detectionScoreThreshold; for (int i = 0; i < rows; ++i) { float* classes_scores = data + 4; cv::Mat scores(1, static_cast(numClasses), CV_32FC1, classes_scores); cv::Point class_id; double maxClassScore; cv::minMaxLoc(scores, nullptr, &maxClassScore, nullptr, &class_id); if (maxClassScore >= scoreThreshold) { confidences.push_back(static_cast(maxClassScore)); class_ids.push_back(class_id.x); const float x = data[0]; const float y = data[1]; const float w = data[2]; const float h = data[3]; int left = static_cast((x - 0.5f * w) * x_factor); int top = static_cast((y - 0.5f * h) * y_factor); int width = static_cast(w * x_factor); int height = static_cast(h * y_factor); // Clamp to image bounds left = std::max(0, left); top = std::max(0, top); width = std::min(maxWidth - left - 5, width); height = std::min(maxHeight - top - 5, height); boxes.emplace_back(left, top, width, height); } data += dimensions; } // NMS std::vector nms_result; cv::dnn::NMSBoxes(boxes, confidences, _modelConfig.modelConfThreshold, _modelConfig.modelMNSThreshold, nms_result); // Build final detections std::vector detections; detections.reserve(nms_result.size()); for (const int idx : nms_result) { if (confidences[idx] > scoreThreshold) { Object result; result.classId = class_ids[idx]; result.confidence = confidences[idx]; result.className = _classes[result.classId]; result.box = boxes[idx]; result.polygon = ANSUtilityHelper::RectToNormalizedPolygon(result.box, input.cols, input.rows); result.cameraId = camera_id; detections.push_back(std::move(result)); } } return detections; } catch (const std::exception& e) { this->_logger.LogFatal("YOLOOD::RunInferenceFromONNX", e.what(), __FILE__, __LINE__); return {}; } } std::vector YOLOOD::RunInferenceFromDarkNet(const cv::Mat& input, const std::string& camera_id) { // Early validation - before try block if (input.empty() || input.data == nullptr) { return {}; } if (input.cols < 10 || input.rows < 10) { return {}; } try { // Color conversion using reusable buffer cv::Mat* modelInputPtr; if (input.channels() == 1) { cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR); modelInputPtr = &this->_frameBuffer; } else { modelInputPtr = const_cast(&input); } cv::Mat& modelInput = *modelInputPtr; // Model shape const cv::Size2f modelShape(_modelConfig.inpWidth, _modelConfig.inpHeight); if (_letterBoxForSquare && modelShape.width == modelShape.height) { modelInput = ANSUtilityHelper::FormatToSquare(modelInput); } // Setup detection model cv::dnn::DetectionModel model(*_net); model.setInputParams(1.0 / 255.0, cv::Size(static_cast(_modelConfig.inpHeight), static_cast(_modelConfig.inpHeight)), cv::Scalar(), true); // Run detection std::vector classIds; std::vector scores; std::vector boxes; model.detect(modelInput, classIds, scores, boxes, _modelConfig.detectionScoreThreshold, _modelConfig.modelMNSThreshold); // Build results std::vector detections; detections.reserve(classIds.size()); const float scoreThreshold = _modelConfig.detectionScoreThreshold; const int maxWidth = modelInput.cols; const int maxHeight = modelInput.rows; const int inputCols = input.cols; const int inputRows = input.rows; for (size_t i = 0; i < classIds.size(); ++i) { if (scores[i] > scoreThreshold) { Object result; result.classId = classIds[i]; result.confidence = scores[i]; result.className = _classes[result.classId]; // Clamp box to image bounds cv::Rect& box = boxes[i]; box.x = std::max(0, box.x); box.y = std::max(0, box.y); box.width = std::min(maxWidth - box.x - 5, box.width); box.height = std::min(maxHeight - box.y - 5, box.height); result.box = box; result.polygon = ANSUtilityHelper::RectToNormalizedPolygon(box, inputCols, inputRows); result.cameraId = camera_id; detections.push_back(std::move(result)); } } return detections; } catch (const std::exception& e) { this->_logger.LogFatal("YOLOOD::RunInferenceFromDarkNet", e.what(), __FILE__, __LINE__); return {}; } } void YOLOOD::SetEngineType() { try { EngineType engineType = ANSLicenseHelper::CheckHardwareInformation(); switch (engineType) { case EngineType::NVIDIA_GPU: // NVIDIA CUDA this->_logger.LogDebug("YOLOOD::SetEngineType", "Running on CUDA GPU", __FILE__, __LINE__); _net->setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); _net->setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); break; default: // Normal CPU this->_logger.LogDebug("YOLOOD::SetEngineType", "Running on CPU", __FILE__, __LINE__); _net->setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); _net->setPreferableTarget(cv::dnn::DNN_TARGET_CPU); break; } } catch (const std::exception& e) { this->_logger.LogFatal("YOLOOD::SetEngineType()", e.what(), __FILE__, __LINE__); } } void YOLOOD::LoadOnnxNetwork() { //For Yolov5 and Yolov8 try { _net = std::make_shared(cv::dnn::readNetFromONNX(_modelFilePath)); _isDarkNet = false; _modelConfig.inpHeight = 640; _modelConfig.inpWidth = 640; SetEngineType(); } catch (const std::exception& e) { this->_logger.LogFatal("YOLOOD::LoadOnnxNetwork()", e.what(), __FILE__, __LINE__); } } void YOLOOD::LoadDarknetNetwork() { try { // Retrieve the cfg file std::string modelCfgFile = CreateFilePath(_modelFolder, "test.cfg");//It is always test.cfg _net = std::make_shared(cv::dnn::readNetFromDarknet(modelCfgFile, _modelFilePath)); _modelConfig.inpHeight = 416; _modelConfig.inpWidth = 416; _isDarkNet = true; SetEngineType(); } catch (const std::exception& e) { this->_logger.LogFatal("YOLOOD::LoadDarknetNetwork()", e.what(), __FILE__, __LINE__); } } }