2026-03-28 16:54:11 +11:00
|
|
|
#ifndef ANSONNXYOLO_H
|
|
|
|
|
#define ANSONNXYOLO_H
|
|
|
|
|
#pragma once
|
|
|
|
|
#include "ANSEngineCommon.h"
|
|
|
|
|
#include "ONNXEngine.h"
|
|
|
|
|
|
|
|
|
|
namespace ANSCENTER {
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
// ONNXYOLO — Ultralytics YOLO inference via ONNX Runtime
|
|
|
|
|
//
|
|
|
|
|
// Compatible with ALL Ultralytics YOLO tasks and versions
|
|
|
|
|
// (v8, v9, v10, v11, v26+) by auto-detecting the task type
|
|
|
|
|
// from output tensor shapes at inference time.
|
|
|
|
|
//
|
|
|
|
|
// Supported tasks:
|
|
|
|
|
// - Detection : [B,300,6] end2end or [B, nc+4, N] legacy
|
|
|
|
|
// - OBB : [B,300,7] end2end or [B, nc+5, N] legacy
|
|
|
|
|
// - Segmentation: [B,300,38]+protos or [B, nc+36, N]+protos
|
|
|
|
|
// - Pose : [B,300,6+nk*3] or [B, nc+4+nk*3, N] legacy
|
|
|
|
|
// - Classification: [B, nc]
|
|
|
|
|
//
|
|
|
|
|
// Preprocessing follows the exact Ultralytics LetterBox transform:
|
|
|
|
|
// - center=True, stride=32, pad_value=114
|
|
|
|
|
// - Ultralytics-compatible -0.1/+0.1 rounding for deterministic padding
|
|
|
|
|
// - BGR→RGB, /255.0 normalisation, HWC→CHW
|
|
|
|
|
// ====================================================================
|
|
|
|
|
class ONNXENGINE_API ONNXYOLO : public BasicOrtHandler
|
|
|
|
|
{
|
|
|
|
|
public:
|
|
|
|
|
explicit ONNXYOLO(const std::string& _onnx_path,
|
|
|
|
|
unsigned int _num_threads = 1);
|
|
|
|
|
explicit ONNXYOLO(const std::string& _onnx_path,
|
|
|
|
|
EngineType engineType,
|
|
|
|
|
unsigned int _num_threads = 1);
|
|
|
|
|
~ONNXYOLO() override = default;
|
|
|
|
|
|
|
|
|
|
/// Run inference on a single image.
|
|
|
|
|
/// Auto-detects the task type (detect/segment/obb/pose/classify)
|
|
|
|
|
/// from the ONNX model's output tensor shapes.
|
|
|
|
|
std::vector<Object> detect(const cv::Mat& image,
|
|
|
|
|
const std::vector<std::string>& classNames,
|
|
|
|
|
float confThreshold = 0.25f,
|
|
|
|
|
float iouThreshold = 0.45f,
|
|
|
|
|
int numKPS = 0);
|
|
|
|
|
|
|
|
|
|
/// True after detect() if the last inference was classification.
|
|
|
|
|
bool lastWasClassification = false;
|
|
|
|
|
|
|
|
|
|
/// Run batched inference on multiple images in a single ONNX session call.
|
|
|
|
|
/// Falls back to sequential detect() if the model has fixed batch=1.
|
|
|
|
|
std::vector<std::vector<Object>> detectBatch(
|
|
|
|
|
const std::vector<cv::Mat>& images,
|
|
|
|
|
const std::vector<std::string>& classNames,
|
|
|
|
|
float confThreshold = 0.25f,
|
|
|
|
|
float iouThreshold = 0.45f,
|
|
|
|
|
int numKPS = 0);
|
|
|
|
|
|
|
|
|
|
/// True after detectBatch() if the batch was classification.
|
|
|
|
|
bool lastBatchWasClassification = false;
|
|
|
|
|
|
|
|
|
|
/// Override the input image shape for dynamic-input models.
|
|
|
|
|
/// Call after construction when the model config specifies a
|
|
|
|
|
/// different resolution than the default 640x640.
|
|
|
|
|
void setInputShape(int width, int height) {
|
|
|
|
|
inputImageShape = cv::Size(width, height);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// True if the ONNX model has dynamic spatial dimensions.
|
|
|
|
|
bool hasDynamicInputShape() const { return isDynamicInputShape; }
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
Ort::Value transform(const cv::Mat& mat) override;
|
|
|
|
|
Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
|
|
|
|
|
|
|
|
|
|
// ── Ultralytics-compatible letterbox ────────────────────────────
|
|
|
|
|
void letterBox(const cv::Mat& image, cv::Mat& outImage,
|
|
|
|
|
const cv::Size& newShape,
|
|
|
|
|
const cv::Scalar& color = cv::Scalar(114, 114, 114),
|
|
|
|
|
bool scaleUp = true,
|
|
|
|
|
int stride = 32);
|
|
|
|
|
|
|
|
|
|
// ── Detection postprocess ───────────────────────────────────────
|
|
|
|
|
std::vector<Object> postprocessEndToEnd(
|
|
|
|
|
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
|
2026-04-08 13:45:52 +10:00
|
|
|
std::vector<Ort::Value>& outputTensors,
|
2026-03-28 16:54:11 +11:00
|
|
|
const std::vector<std::string>& classNames, float confThreshold);
|
|
|
|
|
|
|
|
|
|
std::vector<Object> postprocessLegacy(
|
|
|
|
|
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
|
2026-04-08 13:45:52 +10:00
|
|
|
std::vector<Ort::Value>& outputTensors,
|
2026-03-28 16:54:11 +11:00
|
|
|
const std::vector<std::string>& classNames,
|
|
|
|
|
float confThreshold, float iouThreshold, int maxDet = 300);
|
|
|
|
|
|
|
|
|
|
// ── OBB postprocess ─────────────────────────────────────────────
|
|
|
|
|
std::vector<Object> postprocessOBBEndToEnd(
|
|
|
|
|
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
|
2026-04-08 13:45:52 +10:00
|
|
|
std::vector<Ort::Value>& outputTensors,
|
2026-03-28 16:54:11 +11:00
|
|
|
const std::vector<std::string>& classNames, float confThreshold);
|
|
|
|
|
|
|
|
|
|
std::vector<Object> postprocessOBBLegacy(
|
|
|
|
|
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
|
2026-04-08 13:45:52 +10:00
|
|
|
std::vector<Ort::Value>& outputTensors,
|
2026-03-28 16:54:11 +11:00
|
|
|
const std::vector<std::string>& classNames,
|
|
|
|
|
float confThreshold, float iouThreshold, int maxDet = 300);
|
|
|
|
|
|
|
|
|
|
// ── Segmentation postprocess ────────────────────────────────────
|
|
|
|
|
std::vector<Object> postprocessSegEndToEnd(
|
|
|
|
|
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
|
2026-04-08 13:45:52 +10:00
|
|
|
std::vector<Ort::Value>& outputTensors,
|
2026-03-28 16:54:11 +11:00
|
|
|
const std::vector<std::string>& classNames, float confThreshold);
|
|
|
|
|
|
|
|
|
|
std::vector<Object> postprocessSegLegacy(
|
|
|
|
|
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
|
2026-04-08 13:45:52 +10:00
|
|
|
std::vector<Ort::Value>& outputTensors,
|
2026-03-28 16:54:11 +11:00
|
|
|
const std::vector<std::string>& classNames,
|
|
|
|
|
float confThreshold, float iouThreshold, int maxDet = 300);
|
|
|
|
|
|
|
|
|
|
// ── Pose postprocess ────────────────────────────────────────────
|
|
|
|
|
std::vector<Object> postprocessPoseEndToEnd(
|
|
|
|
|
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
|
2026-04-08 13:45:52 +10:00
|
|
|
std::vector<Ort::Value>& outputTensors,
|
2026-03-28 16:54:11 +11:00
|
|
|
const std::vector<std::string>& classNames,
|
|
|
|
|
float confThreshold, int numKPS);
|
|
|
|
|
|
|
|
|
|
std::vector<Object> postprocessPoseLegacy(
|
|
|
|
|
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
|
2026-04-08 13:45:52 +10:00
|
|
|
std::vector<Ort::Value>& outputTensors,
|
2026-03-28 16:54:11 +11:00
|
|
|
const std::vector<std::string>& classNames,
|
|
|
|
|
float confThreshold, float iouThreshold, int numKPS, int maxDet = 300);
|
|
|
|
|
|
|
|
|
|
// ── Classification postprocess ──────────────────────────────────
|
|
|
|
|
std::vector<Object> postprocessClassify(
|
2026-04-08 13:45:52 +10:00
|
|
|
std::vector<Ort::Value>& outputTensors,
|
2026-03-28 16:54:11 +11:00
|
|
|
const std::vector<std::string>& classNames,
|
|
|
|
|
const cv::Size& imageSize);
|
|
|
|
|
|
|
|
|
|
// ── OBB NMS helpers (Prob-IoU based) ────────────────────────────
|
|
|
|
|
struct OrientedBox {
|
|
|
|
|
float x, y, width, height, angle;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static void getCovarianceComponents(const OrientedBox& box,
|
|
|
|
|
float& out1, float& out2, float& out3);
|
|
|
|
|
static std::vector<std::vector<float>> batchProbiou(
|
|
|
|
|
const std::vector<OrientedBox>& obb1,
|
|
|
|
|
const std::vector<OrientedBox>& obb2, float eps = 1e-7f);
|
|
|
|
|
static std::vector<int> nmsRotatedImpl(
|
|
|
|
|
const std::vector<OrientedBox>& sortedBoxes, float iouThreshold);
|
|
|
|
|
static std::vector<int> nmsRotated(
|
|
|
|
|
const std::vector<OrientedBox>& boxes,
|
|
|
|
|
const std::vector<float>& scores, float iouThreshold);
|
|
|
|
|
static std::vector<cv::Point2f> OBBToPoints(const OrientedBox& obb);
|
|
|
|
|
|
|
|
|
|
// ── Batch output slicing helper ────────────────────────────────
|
|
|
|
|
static Ort::Value sliceBatchOutput(
|
2026-04-08 13:45:52 +10:00
|
|
|
Ort::Value& batchTensor,
|
2026-03-28 16:54:11 +11:00
|
|
|
int64_t batchIndex,
|
|
|
|
|
const std::vector<int64_t>& fullShape,
|
|
|
|
|
Ort::MemoryInfo& memInfo);
|
|
|
|
|
|
|
|
|
|
// Cached model input shape
|
|
|
|
|
cv::Size inputImageShape;
|
|
|
|
|
bool isDynamicInputShape{ false };
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
// ANSONNXYOLO — ANSODBase wrapper for Ultralytics YOLO ONNX
|
|
|
|
|
//
|
|
|
|
|
// Compatible with all Ultralytics YOLO tasks and versions.
|
|
|
|
|
// Implements all required ANSODBase interfaces.
|
|
|
|
|
// ====================================================================
|
|
|
|
|
class ANSENGINE_API ANSONNXYOLO : public ANSODBase {
|
|
|
|
|
public:
|
|
|
|
|
bool Initialize(std::string licenseKey, ModelConfig modelConfig,
|
|
|
|
|
const std::string& modelZipFilePath,
|
|
|
|
|
const std::string& modelZipPassword,
|
|
|
|
|
std::string& labelMap) override;
|
|
|
|
|
|
|
|
|
|
bool LoadModel(const std::string& modelZipFilePath,
|
|
|
|
|
const std::string& modelZipPassword) override;
|
|
|
|
|
|
|
|
|
|
bool LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig,
|
|
|
|
|
std::string modelName, std::string className,
|
|
|
|
|
const std::string& modelFolder,
|
|
|
|
|
std::string& labelMap) override;
|
|
|
|
|
|
|
|
|
|
bool OptimizeModel(bool fp16, std::string& optimizedModelFolder) override;
|
|
|
|
|
|
|
|
|
|
std::vector<Object> RunInference(const cv::Mat& input);
|
|
|
|
|
std::vector<Object> RunInference(const cv::Mat& input, const std::string& camera_id);
|
|
|
|
|
|
|
|
|
|
std::vector<std::vector<Object>> RunInferencesBatch(
|
|
|
|
|
const std::vector<cv::Mat>& inputs,
|
|
|
|
|
const std::string& camera_id) override;
|
|
|
|
|
|
|
|
|
|
bool Destroy();
|
|
|
|
|
~ANSONNXYOLO();
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
std::string _modelFilePath;
|
|
|
|
|
bool _modelLoadValid{ false };
|
|
|
|
|
|
|
|
|
|
// Filter thresholds
|
|
|
|
|
float PROBABILITY_THRESHOLD{ 0.25f };
|
|
|
|
|
float NMS_THRESHOLD{ 0.45f };
|
|
|
|
|
int TOP_K{ 300 };
|
|
|
|
|
|
|
|
|
|
// Pose estimation
|
|
|
|
|
int NUM_KPS{ 0 };
|
|
|
|
|
float KPS_THRESHOLD{ 0.5f };
|
|
|
|
|
|
|
|
|
|
// ONNX Runtime inference engine
|
|
|
|
|
std::unique_ptr<ONNXYOLO> m_ortEngine;
|
|
|
|
|
|
2026-04-09 08:09:02 +10:00
|
|
|
// DML device-lost recovery: when DirectML's GPU device is removed
|
|
|
|
|
// (HRESULT 887A0005), the session is permanently broken. We detect
|
|
|
|
|
// this once, attempt a CPU-fallback recreation, and suppress further
|
|
|
|
|
// error-log flooding.
|
|
|
|
|
bool _dmlDeviceLost{ false };
|
|
|
|
|
|
2026-03-28 16:54:11 +11:00
|
|
|
// Internal detection pipeline
|
|
|
|
|
std::vector<Object> DetectObjects(const cv::Mat& inputImage,
|
|
|
|
|
const std::string& camera_id);
|
|
|
|
|
|
|
|
|
|
// Internal batch detection pipeline
|
|
|
|
|
std::vector<std::vector<Object>> DetectObjectsBatch(
|
|
|
|
|
const std::vector<cv::Mat>& inputImages,
|
|
|
|
|
const std::string& camera_id);
|
|
|
|
|
|
|
|
|
|
// Initialise ORT engine from the resolved model path
|
|
|
|
|
bool InitOrtEngine();
|
2026-04-08 13:45:52 +10:00
|
|
|
public:
|
|
|
|
|
// Initialise ORT engine with explicit engine type override (e.g. CPU fallback for AMD iGPUs)
|
|
|
|
|
bool InitOrtEngine(ANSCENTER::EngineType engineType);
|
2026-03-28 16:54:11 +11:00
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
#endif
|