modules/ANSODEngine/ANSONNXYOLO.h

#ifndef ANSONNXYOLO_H
#define ANSONNXYOLO_H
#pragma once
#include "ANSEngineCommon.h"
#include "ONNXEngine.h"

namespace ANSCENTER {

    // ====================================================================
    // ONNXYOLO — Ultralytics YOLO inference via ONNX Runtime
    //
    // Compatible with ALL Ultralytics YOLO tasks and versions
    // (v8, v9, v10, v11, v26+) by auto-detecting the task type
    // from output tensor shapes at inference time.
    //
    // Supported tasks:
    //   - Detection   : [B,300,6] end2end  or [B, nc+4, N] legacy
    //   - OBB         : [B,300,7] end2end  or [B, nc+5, N] legacy
    //   - Segmentation: [B,300,38]+protos   or [B, nc+36, N]+protos
    //   - Pose        : [B,300,6+nk*3]     or [B, nc+4+nk*3, N] legacy
    //   - Classification: [B, nc]
    //
    // Preprocessing follows the exact Ultralytics LetterBox transform:
    //   - center=True, stride=32, pad_value=114
    //   - Ultralytics-compatible -0.1/+0.1 rounding for deterministic padding
    //   - BGR→RGB, /255.0 normalisation, HWC→CHW
    // ====================================================================
    class ONNXENGINE_API ONNXYOLO : public BasicOrtHandler
    {
    public:
        explicit ONNXYOLO(const std::string& _onnx_path,
                            unsigned int _num_threads = 1);
        explicit ONNXYOLO(const std::string& _onnx_path,
                            EngineType engineType,
                            unsigned int _num_threads = 1);
        ~ONNXYOLO() override = default;

        /// Run inference on a single image.
        /// Auto-detects the task type (detect/segment/obb/pose/classify)
        /// from the ONNX model's output tensor shapes.
        std::vector<Object> detect(const cv::Mat& image,
                                   const std::vector<std::string>& classNames,
                                   float confThreshold = 0.25f,
                                   float iouThreshold  = 0.45f,
                                   int numKPS = 0);

        /// True after detect() if the last inference was classification.
        bool lastWasClassification = false;

        /// Run batched inference on multiple images in a single ONNX session call.
        /// Falls back to sequential detect() if the model has fixed batch=1.
        std::vector<std::vector<Object>> detectBatch(
            const std::vector<cv::Mat>& images,
            const std::vector<std::string>& classNames,
            float confThreshold = 0.25f,
            float iouThreshold  = 0.45f,
            int numKPS = 0);

        /// True after detectBatch() if the batch was classification.
        bool lastBatchWasClassification = false;

        /// Override the input image shape for dynamic-input models.
        /// Call after construction when the model config specifies a
        /// different resolution than the default 640x640.
        void setInputShape(int width, int height) {
            inputImageShape = cv::Size(width, height);
        }

        /// True if the ONNX model has dynamic spatial dimensions.
        bool hasDynamicInputShape() const { return isDynamicInputShape; }

    private:
        Ort::Value transform(const cv::Mat& mat) override;
        Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;

        // ── Ultralytics-compatible letterbox ────────────────────────────
        void letterBox(const cv::Mat& image, cv::Mat& outImage,
                       const cv::Size& newShape,
                       const cv::Scalar& color = cv::Scalar(114, 114, 114),
                       bool scaleUp = true,
                       int stride = 32);

        // ── Detection postprocess ───────────────────────────────────────
        std::vector<Object> postprocessEndToEnd(
            const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
            std::vector<Ort::Value>& outputTensors,
            const std::vector<std::string>& classNames, float confThreshold);

        std::vector<Object> postprocessLegacy(
            const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
            std::vector<Ort::Value>& outputTensors,
            const std::vector<std::string>& classNames,
            float confThreshold, float iouThreshold, int maxDet = 300);

        // ── OBB postprocess ─────────────────────────────────────────────
        std::vector<Object> postprocessOBBEndToEnd(
            const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
            std::vector<Ort::Value>& outputTensors,
            const std::vector<std::string>& classNames, float confThreshold);

        std::vector<Object> postprocessOBBLegacy(
            const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
            std::vector<Ort::Value>& outputTensors,
            const std::vector<std::string>& classNames,
            float confThreshold, float iouThreshold, int maxDet = 300);

        // ── Segmentation postprocess ────────────────────────────────────
        std::vector<Object> postprocessSegEndToEnd(
            const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
            std::vector<Ort::Value>& outputTensors,
            const std::vector<std::string>& classNames, float confThreshold);

        std::vector<Object> postprocessSegLegacy(
            const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
            std::vector<Ort::Value>& outputTensors,
            const std::vector<std::string>& classNames,
            float confThreshold, float iouThreshold, int maxDet = 300);

        // ── Pose postprocess ────────────────────────────────────────────
        std::vector<Object> postprocessPoseEndToEnd(
            const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
            std::vector<Ort::Value>& outputTensors,
            const std::vector<std::string>& classNames,
            float confThreshold, int numKPS);

        std::vector<Object> postprocessPoseLegacy(
            const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
            std::vector<Ort::Value>& outputTensors,
            const std::vector<std::string>& classNames,
            float confThreshold, float iouThreshold, int numKPS, int maxDet = 300);

        // ── Classification postprocess ──────────────────────────────────
        std::vector<Object> postprocessClassify(
            std::vector<Ort::Value>& outputTensors,
            const std::vector<std::string>& classNames,
            const cv::Size& imageSize);

        // ── OBB NMS helpers (Prob-IoU based) ────────────────────────────
        struct OrientedBox {
            float x, y, width, height, angle;
        };

        static void getCovarianceComponents(const OrientedBox& box,
                                            float& out1, float& out2, float& out3);
        static std::vector<std::vector<float>> batchProbiou(
            const std::vector<OrientedBox>& obb1,
            const std::vector<OrientedBox>& obb2, float eps = 1e-7f);
        static std::vector<int> nmsRotatedImpl(
            const std::vector<OrientedBox>& sortedBoxes, float iouThreshold);
        static std::vector<int> nmsRotated(
            const std::vector<OrientedBox>& boxes,
            const std::vector<float>& scores, float iouThreshold);
        static std::vector<cv::Point2f> OBBToPoints(const OrientedBox& obb);

        // ── Batch output slicing helper ────────────────────────────────
        static Ort::Value sliceBatchOutput(
            Ort::Value& batchTensor,
            int64_t batchIndex,
            const std::vector<int64_t>& fullShape,
            Ort::MemoryInfo& memInfo);

        // Cached model input shape
        cv::Size inputImageShape;
        bool isDynamicInputShape{ false };
    };

    // ====================================================================
    // ANSONNXYOLO — ANSODBase wrapper for Ultralytics YOLO ONNX
    //
    // Compatible with all Ultralytics YOLO tasks and versions.
    // Implements all required ANSODBase interfaces.
    // ====================================================================
    class ANSENGINE_API ANSONNXYOLO : public ANSODBase {
    public:
        bool Initialize(std::string licenseKey, ModelConfig modelConfig,
                        const std::string& modelZipFilePath,
                        const std::string& modelZipPassword,
                        std::string& labelMap) override;

        bool LoadModel(const std::string& modelZipFilePath,
                       const std::string& modelZipPassword) override;

        bool LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig,
                                 std::string modelName, std::string className,
                                 const std::string& modelFolder,
                                 std::string& labelMap) override;

        bool OptimizeModel(bool fp16, std::string& optimizedModelFolder) override;

        std::vector<Object> RunInference(const cv::Mat& input);
        std::vector<Object> RunInference(const cv::Mat& input, const std::string& camera_id);

        std::vector<std::vector<Object>> RunInferencesBatch(
            const std::vector<cv::Mat>& inputs,
            const std::string& camera_id) override;

        bool Destroy();
        ~ANSONNXYOLO();

    private:
        std::string  _modelFilePath;
        bool         _modelLoadValid{ false };

        // Filter thresholds
        float PROBABILITY_THRESHOLD{ 0.25f };
        float NMS_THRESHOLD{ 0.45f };
        int   TOP_K{ 300 };

        // Pose estimation
        int   NUM_KPS{ 0 };
        float KPS_THRESHOLD{ 0.5f };

        // ONNX Runtime inference engine
        std::unique_ptr<ONNXYOLO> m_ortEngine;

        // DML device-lost recovery: when DirectML's GPU device is removed
        // (HRESULT 887A0005), the session is permanently broken.  We detect
        // this once, attempt a CPU-fallback recreation, and suppress further
        // error-log flooding.
        bool _dmlDeviceLost{ false };

        // Internal detection pipeline
        std::vector<Object> DetectObjects(const cv::Mat& inputImage,
                                          const std::string& camera_id);

        // Internal batch detection pipeline
        std::vector<std::vector<Object>> DetectObjectsBatch(
            const std::vector<cv::Mat>& inputImages,
            const std::string& camera_id);

        // Initialise ORT engine from the resolved model path
        bool InitOrtEngine();
    public:
        // Initialise ORT engine with explicit engine type override (e.g. CPU fallback for AMD iGPUs)
        bool InitOrtEngine(ANSCENTER::EngineType engineType);
    };
}
#endif
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`#ifndef ANSONNXYOLO_H`
			`#define ANSONNXYOLO_H`
			`#pragma once`
			`#include "ANSEngineCommon.h"`
			`#include "ONNXEngine.h"`

			`namespace ANSCENTER {`

			`// ====================================================================`
			`// ONNXYOLO — Ultralytics YOLO inference via ONNX Runtime`
			`//`
			`// Compatible with ALL Ultralytics YOLO tasks and versions`
			`// (v8, v9, v10, v11, v26+) by auto-detecting the task type`
			`// from output tensor shapes at inference time.`
			`//`
			`// Supported tasks:`
			`// - Detection : [B,300,6] end2end or [B, nc+4, N] legacy`
			`// - OBB : [B,300,7] end2end or [B, nc+5, N] legacy`
			`// - Segmentation: [B,300,38]+protos or [B, nc+36, N]+protos`
			`// - Pose : [B,300,6+nk3] or [B, nc+4+nk3, N] legacy`
			`// - Classification: [B, nc]`
			`//`
			`// Preprocessing follows the exact Ultralytics LetterBox transform:`
			`// - center=True, stride=32, pad_value=114`
			`// - Ultralytics-compatible -0.1/+0.1 rounding for deterministic padding`
			`// - BGR→RGB, /255.0 normalisation, HWC→CHW`
			`// ====================================================================`
			`class ONNXENGINE_API ONNXYOLO : public BasicOrtHandler`
			`{`
			`public:`
			`explicit ONNXYOLO(const std::string& _onnx_path,`
			`unsigned int _num_threads = 1);`
			`explicit ONNXYOLO(const std::string& _onnx_path,`
			`EngineType engineType,`
			`unsigned int _num_threads = 1);`
			`~ONNXYOLO() override = default;`

			`/// Run inference on a single image.`
			`/// Auto-detects the task type (detect/segment/obb/pose/classify)`
			`/// from the ONNX model's output tensor shapes.`
			`std::vector<Object> detect(const cv::Mat& image,`
			`const std::vector<std::string>& classNames,`
			`float confThreshold = 0.25f,`
			`float iouThreshold = 0.45f,`
			`int numKPS = 0);`

			`/// True after detect() if the last inference was classification.`
			`bool lastWasClassification = false;`

			`/// Run batched inference on multiple images in a single ONNX session call.`
			`/// Falls back to sequential detect() if the model has fixed batch=1.`
			`std::vector<std::vector<Object>> detectBatch(`
			`const std::vector<cv::Mat>& images,`
			`const std::vector<std::string>& classNames,`
			`float confThreshold = 0.25f,`
			`float iouThreshold = 0.45f,`
			`int numKPS = 0);`

			`/// True after detectBatch() if the batch was classification.`
			`bool lastBatchWasClassification = false;`

			`/// Override the input image shape for dynamic-input models.`
			`/// Call after construction when the model config specifies a`
			`/// different resolution than the default 640x640.`
			`void setInputShape(int width, int height) {`
			`inputImageShape = cv::Size(width, height);`
			`}`

			`/// True if the ONNX model has dynamic spatial dimensions.`
			`bool hasDynamicInputShape() const { return isDynamicInputShape; }`

			`private:`
			`Ort::Value transform(const cv::Mat& mat) override;`
			`Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;`

			`// ── Ultralytics-compatible letterbox ────────────────────────────`
			`void letterBox(const cv::Mat& image, cv::Mat& outImage,`
			`const cv::Size& newShape,`
			`const cv::Scalar& color = cv::Scalar(114, 114, 114),`
			`bool scaleUp = true,`
			`int stride = 32);`

			`// ── Detection postprocess ───────────────────────────────────────`
			`std::vector<Object> postprocessEndToEnd(`
			`const cv::Size& originalImageSize, const cv::Size& resizedImageShape,`
Fix AMD and OpenVINO 2026-04-08 13:45:52 +10:00			`std::vector<Ort::Value>& outputTensors,`
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`const std::vector<std::string>& classNames, float confThreshold);`

			`std::vector<Object> postprocessLegacy(`
			`const cv::Size& originalImageSize, const cv::Size& resizedImageShape,`
Fix AMD and OpenVINO 2026-04-08 13:45:52 +10:00			`std::vector<Ort::Value>& outputTensors,`
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`const std::vector<std::string>& classNames,`
			`float confThreshold, float iouThreshold, int maxDet = 300);`

			`// ── OBB postprocess ─────────────────────────────────────────────`
			`std::vector<Object> postprocessOBBEndToEnd(`
			`const cv::Size& originalImageSize, const cv::Size& resizedImageShape,`
Fix AMD and OpenVINO 2026-04-08 13:45:52 +10:00			`std::vector<Ort::Value>& outputTensors,`
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`const std::vector<std::string>& classNames, float confThreshold);`

			`std::vector<Object> postprocessOBBLegacy(`
			`const cv::Size& originalImageSize, const cv::Size& resizedImageShape,`
Fix AMD and OpenVINO 2026-04-08 13:45:52 +10:00			`std::vector<Ort::Value>& outputTensors,`
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`const std::vector<std::string>& classNames,`
			`float confThreshold, float iouThreshold, int maxDet = 300);`

			`// ── Segmentation postprocess ────────────────────────────────────`
			`std::vector<Object> postprocessSegEndToEnd(`
			`const cv::Size& originalImageSize, const cv::Size& resizedImageShape,`
Fix AMD and OpenVINO 2026-04-08 13:45:52 +10:00			`std::vector<Ort::Value>& outputTensors,`
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`const std::vector<std::string>& classNames, float confThreshold);`

			`std::vector<Object> postprocessSegLegacy(`
			`const cv::Size& originalImageSize, const cv::Size& resizedImageShape,`
Fix AMD and OpenVINO 2026-04-08 13:45:52 +10:00			`std::vector<Ort::Value>& outputTensors,`
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`const std::vector<std::string>& classNames,`
			`float confThreshold, float iouThreshold, int maxDet = 300);`

			`// ── Pose postprocess ────────────────────────────────────────────`
			`std::vector<Object> postprocessPoseEndToEnd(`
			`const cv::Size& originalImageSize, const cv::Size& resizedImageShape,`
Fix AMD and OpenVINO 2026-04-08 13:45:52 +10:00			`std::vector<Ort::Value>& outputTensors,`
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`const std::vector<std::string>& classNames,`
			`float confThreshold, int numKPS);`

			`std::vector<Object> postprocessPoseLegacy(`
			`const cv::Size& originalImageSize, const cv::Size& resizedImageShape,`
Fix AMD and OpenVINO 2026-04-08 13:45:52 +10:00			`std::vector<Ort::Value>& outputTensors,`
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`const std::vector<std::string>& classNames,`
			`float confThreshold, float iouThreshold, int numKPS, int maxDet = 300);`

			`// ── Classification postprocess ──────────────────────────────────`
			`std::vector<Object> postprocessClassify(`
Fix AMD and OpenVINO 2026-04-08 13:45:52 +10:00			`std::vector<Ort::Value>& outputTensors,`
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`const std::vector<std::string>& classNames,`
			`const cv::Size& imageSize);`

			`// ── OBB NMS helpers (Prob-IoU based) ────────────────────────────`
			`struct OrientedBox {`
			`float x, y, width, height, angle;`
			`};`

			`static void getCovarianceComponents(const OrientedBox& box,`
			`float& out1, float& out2, float& out3);`
			`static std::vector<std::vector<float>> batchProbiou(`
			`const std::vector<OrientedBox>& obb1,`
			`const std::vector<OrientedBox>& obb2, float eps = 1e-7f);`
			`static std::vector<int> nmsRotatedImpl(`
			`const std::vector<OrientedBox>& sortedBoxes, float iouThreshold);`
			`static std::vector<int> nmsRotated(`
			`const std::vector<OrientedBox>& boxes,`
			`const std::vector<float>& scores, float iouThreshold);`
			`static std::vector<cv::Point2f> OBBToPoints(const OrientedBox& obb);`

			`// ── Batch output slicing helper ────────────────────────────────`
			`static Ort::Value sliceBatchOutput(`
Fix AMD and OpenVINO 2026-04-08 13:45:52 +10:00			`Ort::Value& batchTensor,`
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`int64_t batchIndex,`
			`const std::vector<int64_t>& fullShape,`
			`Ort::MemoryInfo& memInfo);`

			`// Cached model input shape`
			`cv::Size inputImageShape;`
			`bool isDynamicInputShape{ false };`
			`};`

			`// ====================================================================`
			`// ANSONNXYOLO — ANSODBase wrapper for Ultralytics YOLO ONNX`
			`//`
			`// Compatible with all Ultralytics YOLO tasks and versions.`
			`// Implements all required ANSODBase interfaces.`
			`// ====================================================================`
			`class ANSENGINE_API ANSONNXYOLO : public ANSODBase {`
			`public:`
			`bool Initialize(std::string licenseKey, ModelConfig modelConfig,`
			`const std::string& modelZipFilePath,`
			`const std::string& modelZipPassword,`
			`std::string& labelMap) override;`

			`bool LoadModel(const std::string& modelZipFilePath,`
			`const std::string& modelZipPassword) override;`

			`bool LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig,`
			`std::string modelName, std::string className,`
			`const std::string& modelFolder,`
			`std::string& labelMap) override;`

			`bool OptimizeModel(bool fp16, std::string& optimizedModelFolder) override;`

			`std::vector<Object> RunInference(const cv::Mat& input);`
			`std::vector<Object> RunInference(const cv::Mat& input, const std::string& camera_id);`

			`std::vector<std::vector<Object>> RunInferencesBatch(`
			`const std::vector<cv::Mat>& inputs,`
			`const std::string& camera_id) override;`

			`bool Destroy();`
			`~ANSONNXYOLO();`

			`private:`
			`std::string _modelFilePath;`
			`bool _modelLoadValid{ false };`

			`// Filter thresholds`
			`float PROBABILITY_THRESHOLD{ 0.25f };`
			`float NMS_THRESHOLD{ 0.45f };`
			`int TOP_K{ 300 };`

			`// Pose estimation`
			`int NUM_KPS{ 0 };`
			`float KPS_THRESHOLD{ 0.5f };`

			`// ONNX Runtime inference engine`
			`std::unique_ptr<ONNXYOLO> m_ortEngine;`

Fix model optimisation 2026-04-09 08:09:02 +10:00			`// DML device-lost recovery: when DirectML's GPU device is removed`
			`// (HRESULT 887A0005), the session is permanently broken. We detect`
			`// this once, attempt a CPU-fallback recreation, and suppress further`
			`// error-log flooding.`
			`bool _dmlDeviceLost{ false };`

Initial setup for CLion 2026-03-28 16:54:11 +11:00			`// Internal detection pipeline`
			`std::vector<Object> DetectObjects(const cv::Mat& inputImage,`
			`const std::string& camera_id);`

			`// Internal batch detection pipeline`
			`std::vector<std::vector<Object>> DetectObjectsBatch(`
			`const std::vector<cv::Mat>& inputImages,`
			`const std::string& camera_id);`

			`// Initialise ORT engine from the resolved model path`
			`bool InitOrtEngine();`
Fix AMD and OpenVINO 2026-04-08 13:45:52 +10:00			`public:`
			`// Initialise ORT engine with explicit engine type override (e.g. CPU fallback for AMD iGPUs)`
			`bool InitOrtEngine(ANSCENTER::EngineType engineType);`
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`};`
			`}`
			`#endif`