#ifndef ANSONNXYOLO_H #define ANSONNXYOLO_H #pragma once #include "ANSEngineCommon.h" #include "ONNXEngine.h" namespace ANSCENTER { // ==================================================================== // ONNXYOLO — Ultralytics YOLO inference via ONNX Runtime // // Compatible with ALL Ultralytics YOLO tasks and versions // (v8, v9, v10, v11, v26+) by auto-detecting the task type // from output tensor shapes at inference time. // // Supported tasks: // - Detection : [B,300,6] end2end or [B, nc+4, N] legacy // - OBB : [B,300,7] end2end or [B, nc+5, N] legacy // - Segmentation: [B,300,38]+protos or [B, nc+36, N]+protos // - Pose : [B,300,6+nk*3] or [B, nc+4+nk*3, N] legacy // - Classification: [B, nc] // // Preprocessing follows the exact Ultralytics LetterBox transform: // - center=True, stride=32, pad_value=114 // - Ultralytics-compatible -0.1/+0.1 rounding for deterministic padding // - BGR→RGB, /255.0 normalisation, HWC→CHW // ==================================================================== class ONNXENGINE_API ONNXYOLO : public BasicOrtHandler { public: explicit ONNXYOLO(const std::string& _onnx_path, unsigned int _num_threads = 1); explicit ONNXYOLO(const std::string& _onnx_path, EngineType engineType, unsigned int _num_threads = 1); ~ONNXYOLO() override = default; /// Run inference on a single image. /// Auto-detects the task type (detect/segment/obb/pose/classify) /// from the ONNX model's output tensor shapes. std::vector detect(const cv::Mat& image, const std::vector& classNames, float confThreshold = 0.25f, float iouThreshold = 0.45f, int numKPS = 0); /// True after detect() if the last inference was classification. bool lastWasClassification = false; /// Run batched inference on multiple images in a single ONNX session call. /// Falls back to sequential detect() if the model has fixed batch=1. std::vector> detectBatch( const std::vector& images, const std::vector& classNames, float confThreshold = 0.25f, float iouThreshold = 0.45f, int numKPS = 0); /// True after detectBatch() if the batch was classification. bool lastBatchWasClassification = false; /// Override the input image shape for dynamic-input models. /// Call after construction when the model config specifies a /// different resolution than the default 640x640. void setInputShape(int width, int height) { inputImageShape = cv::Size(width, height); } /// True if the ONNX model has dynamic spatial dimensions. bool hasDynamicInputShape() const { return isDynamicInputShape; } private: Ort::Value transform(const cv::Mat& mat) override; Ort::Value transformBatch(const std::vector& images) override; // ── Ultralytics-compatible letterbox ──────────────────────────── void letterBox(const cv::Mat& image, cv::Mat& outImage, const cv::Size& newShape, const cv::Scalar& color = cv::Scalar(114, 114, 114), bool scaleUp = true, int stride = 32); // ── Detection postprocess ─────────────────────────────────────── std::vector postprocessEndToEnd( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, const std::vector& outputTensors, const std::vector& classNames, float confThreshold); std::vector postprocessLegacy( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, const std::vector& outputTensors, const std::vector& classNames, float confThreshold, float iouThreshold, int maxDet = 300); // ── OBB postprocess ───────────────────────────────────────────── std::vector postprocessOBBEndToEnd( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, const std::vector& outputTensors, const std::vector& classNames, float confThreshold); std::vector postprocessOBBLegacy( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, const std::vector& outputTensors, const std::vector& classNames, float confThreshold, float iouThreshold, int maxDet = 300); // ── Segmentation postprocess ──────────────────────────────────── std::vector postprocessSegEndToEnd( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, const std::vector& outputTensors, const std::vector& classNames, float confThreshold); std::vector postprocessSegLegacy( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, const std::vector& outputTensors, const std::vector& classNames, float confThreshold, float iouThreshold, int maxDet = 300); // ── Pose postprocess ──────────────────────────────────────────── std::vector postprocessPoseEndToEnd( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, const std::vector& outputTensors, const std::vector& classNames, float confThreshold, int numKPS); std::vector postprocessPoseLegacy( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, const std::vector& outputTensors, const std::vector& classNames, float confThreshold, float iouThreshold, int numKPS, int maxDet = 300); // ── Classification postprocess ────────────────────────────────── std::vector postprocessClassify( const std::vector& outputTensors, const std::vector& classNames, const cv::Size& imageSize); // ── OBB NMS helpers (Prob-IoU based) ──────────────────────────── struct OrientedBox { float x, y, width, height, angle; }; static void getCovarianceComponents(const OrientedBox& box, float& out1, float& out2, float& out3); static std::vector> batchProbiou( const std::vector& obb1, const std::vector& obb2, float eps = 1e-7f); static std::vector nmsRotatedImpl( const std::vector& sortedBoxes, float iouThreshold); static std::vector nmsRotated( const std::vector& boxes, const std::vector& scores, float iouThreshold); static std::vector OBBToPoints(const OrientedBox& obb); // ── Batch output slicing helper ──────────────────────────────── static Ort::Value sliceBatchOutput( const Ort::Value& batchTensor, int64_t batchIndex, const std::vector& fullShape, Ort::MemoryInfo& memInfo); // Cached model input shape cv::Size inputImageShape; bool isDynamicInputShape{ false }; }; // ==================================================================== // ANSONNXYOLO — ANSODBase wrapper for Ultralytics YOLO ONNX // // Compatible with all Ultralytics YOLO tasks and versions. // Implements all required ANSODBase interfaces. // ==================================================================== class ANSENGINE_API ANSONNXYOLO : public ANSODBase { public: bool Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) override; bool LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) override; bool LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) override; bool OptimizeModel(bool fp16, std::string& optimizedModelFolder) override; std::vector RunInference(const cv::Mat& input); std::vector RunInference(const cv::Mat& input, const std::string& camera_id); std::vector> RunInferencesBatch( const std::vector& inputs, const std::string& camera_id) override; bool Destroy(); ~ANSONNXYOLO(); private: std::string _modelFilePath; bool _modelLoadValid{ false }; // Filter thresholds float PROBABILITY_THRESHOLD{ 0.25f }; float NMS_THRESHOLD{ 0.45f }; int TOP_K{ 300 }; // Pose estimation int NUM_KPS{ 0 }; float KPS_THRESHOLD{ 0.5f }; // ONNX Runtime inference engine std::unique_ptr m_ortEngine; // Internal detection pipeline std::vector DetectObjects(const cv::Mat& inputImage, const std::string& camera_id); // Internal batch detection pipeline std::vector> DetectObjectsBatch( const std::vector& inputImages, const std::string& camera_id); // Initialise ORT engine from the resolved model path bool InitOrtEngine(); }; } #endif