Files
ANSCORE/modules/ANSODEngine/ANSONNXYOLO.h

239 lines
11 KiB
C++

#ifndef ANSONNXYOLO_H
#define ANSONNXYOLO_H
#pragma once
#include "ANSEngineCommon.h"
#include "ONNXEngine.h"
namespace ANSCENTER {
// ====================================================================
// ONNXYOLO — Ultralytics YOLO inference via ONNX Runtime
//
// Compatible with ALL Ultralytics YOLO tasks and versions
// (v8, v9, v10, v11, v26+) by auto-detecting the task type
// from output tensor shapes at inference time.
//
// Supported tasks:
// - Detection : [B,300,6] end2end or [B, nc+4, N] legacy
// - OBB : [B,300,7] end2end or [B, nc+5, N] legacy
// - Segmentation: [B,300,38]+protos or [B, nc+36, N]+protos
// - Pose : [B,300,6+nk*3] or [B, nc+4+nk*3, N] legacy
// - Classification: [B, nc]
//
// Preprocessing follows the exact Ultralytics LetterBox transform:
// - center=True, stride=32, pad_value=114
// - Ultralytics-compatible -0.1/+0.1 rounding for deterministic padding
// - BGR→RGB, /255.0 normalisation, HWC→CHW
// ====================================================================
class ONNXENGINE_API ONNXYOLO : public BasicOrtHandler
{
public:
explicit ONNXYOLO(const std::string& _onnx_path,
unsigned int _num_threads = 1);
explicit ONNXYOLO(const std::string& _onnx_path,
EngineType engineType,
unsigned int _num_threads = 1);
~ONNXYOLO() override = default;
/// Run inference on a single image.
/// Auto-detects the task type (detect/segment/obb/pose/classify)
/// from the ONNX model's output tensor shapes.
std::vector<Object> detect(const cv::Mat& image,
const std::vector<std::string>& classNames,
float confThreshold = 0.25f,
float iouThreshold = 0.45f,
int numKPS = 0);
/// True after detect() if the last inference was classification.
bool lastWasClassification = false;
/// Run batched inference on multiple images in a single ONNX session call.
/// Falls back to sequential detect() if the model has fixed batch=1.
std::vector<std::vector<Object>> detectBatch(
const std::vector<cv::Mat>& images,
const std::vector<std::string>& classNames,
float confThreshold = 0.25f,
float iouThreshold = 0.45f,
int numKPS = 0);
/// True after detectBatch() if the batch was classification.
bool lastBatchWasClassification = false;
/// Override the input image shape for dynamic-input models.
/// Call after construction when the model config specifies a
/// different resolution than the default 640x640.
void setInputShape(int width, int height) {
inputImageShape = cv::Size(width, height);
}
/// True if the ONNX model has dynamic spatial dimensions.
bool hasDynamicInputShape() const { return isDynamicInputShape; }
private:
Ort::Value transform(const cv::Mat& mat) override;
Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
// ── Ultralytics-compatible letterbox ────────────────────────────
void letterBox(const cv::Mat& image, cv::Mat& outImage,
const cv::Size& newShape,
const cv::Scalar& color = cv::Scalar(114, 114, 114),
bool scaleUp = true,
int stride = 32);
// ── Detection postprocess ───────────────────────────────────────
std::vector<Object> postprocessEndToEnd(
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames, float confThreshold);
std::vector<Object> postprocessLegacy(
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
float confThreshold, float iouThreshold, int maxDet = 300);
// ── OBB postprocess ─────────────────────────────────────────────
std::vector<Object> postprocessOBBEndToEnd(
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames, float confThreshold);
std::vector<Object> postprocessOBBLegacy(
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
float confThreshold, float iouThreshold, int maxDet = 300);
// ── Segmentation postprocess ────────────────────────────────────
std::vector<Object> postprocessSegEndToEnd(
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames, float confThreshold);
std::vector<Object> postprocessSegLegacy(
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
float confThreshold, float iouThreshold, int maxDet = 300);
// ── Pose postprocess ────────────────────────────────────────────
std::vector<Object> postprocessPoseEndToEnd(
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
float confThreshold, int numKPS);
std::vector<Object> postprocessPoseLegacy(
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
float confThreshold, float iouThreshold, int numKPS, int maxDet = 300);
// ── Classification postprocess ──────────────────────────────────
std::vector<Object> postprocessClassify(
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
const cv::Size& imageSize);
// ── OBB NMS helpers (Prob-IoU based) ────────────────────────────
struct OrientedBox {
float x, y, width, height, angle;
};
static void getCovarianceComponents(const OrientedBox& box,
float& out1, float& out2, float& out3);
static std::vector<std::vector<float>> batchProbiou(
const std::vector<OrientedBox>& obb1,
const std::vector<OrientedBox>& obb2, float eps = 1e-7f);
static std::vector<int> nmsRotatedImpl(
const std::vector<OrientedBox>& sortedBoxes, float iouThreshold);
static std::vector<int> nmsRotated(
const std::vector<OrientedBox>& boxes,
const std::vector<float>& scores, float iouThreshold);
static std::vector<cv::Point2f> OBBToPoints(const OrientedBox& obb);
// ── Batch output slicing helper ────────────────────────────────
static Ort::Value sliceBatchOutput(
Ort::Value& batchTensor,
int64_t batchIndex,
const std::vector<int64_t>& fullShape,
Ort::MemoryInfo& memInfo);
// Cached model input shape
cv::Size inputImageShape;
bool isDynamicInputShape{ false };
};
// ====================================================================
// ANSONNXYOLO — ANSODBase wrapper for Ultralytics YOLO ONNX
//
// Compatible with all Ultralytics YOLO tasks and versions.
// Implements all required ANSODBase interfaces.
// ====================================================================
class ANSENGINE_API ANSONNXYOLO : public ANSODBase {
public:
bool Initialize(std::string licenseKey, ModelConfig modelConfig,
const std::string& modelZipFilePath,
const std::string& modelZipPassword,
std::string& labelMap) override;
bool LoadModel(const std::string& modelZipFilePath,
const std::string& modelZipPassword) override;
bool LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig,
std::string modelName, std::string className,
const std::string& modelFolder,
std::string& labelMap) override;
bool OptimizeModel(bool fp16, std::string& optimizedModelFolder) override;
std::vector<Object> RunInference(const cv::Mat& input);
std::vector<Object> RunInference(const cv::Mat& input, const std::string& camera_id);
std::vector<std::vector<Object>> RunInferencesBatch(
const std::vector<cv::Mat>& inputs,
const std::string& camera_id) override;
bool Destroy();
~ANSONNXYOLO();
private:
std::string _modelFilePath;
bool _modelLoadValid{ false };
// Filter thresholds
float PROBABILITY_THRESHOLD{ 0.25f };
float NMS_THRESHOLD{ 0.45f };
int TOP_K{ 300 };
// Pose estimation
int NUM_KPS{ 0 };
float KPS_THRESHOLD{ 0.5f };
// ONNX Runtime inference engine
std::unique_ptr<ONNXYOLO> m_ortEngine;
// DML device-lost recovery: when DirectML's GPU device is removed
// (HRESULT 887A0005), the session is permanently broken. We detect
// this once, attempt a CPU-fallback recreation, and suppress further
// error-log flooding.
bool _dmlDeviceLost{ false };
// Internal detection pipeline
std::vector<Object> DetectObjects(const cv::Mat& inputImage,
const std::string& camera_id);
// Internal batch detection pipeline
std::vector<std::vector<Object>> DetectObjectsBatch(
const std::vector<cv::Mat>& inputImages,
const std::string& camera_id);
// Initialise ORT engine from the resolved model path
bool InitOrtEngine();
public:
// Initialise ORT engine with explicit engine type override (e.g. CPU fallback for AMD iGPUs)
bool InitOrtEngine(ANSCENTER::EngineType engineType);
};
}
#endif