Files
ANSCORE/modules/ANSODEngine/ANSRTYOLO.h

167 lines
7.6 KiB
C++

#ifndef ANSRTYOLO_H
#define ANSRTYOLO_H
#pragma once
#include "ANSEngineCommon.h"
#include "engine.h"
#include "ANSGpuFrameRegistry.h"
#include "NV12PreprocessHelper.h"
#include "engine/EnginePoolManager.h"
namespace ANSCENTER {
// ====================================================================
// ANSRTYOLO — TensorRT-based Ultralytics YOLO inference
//
// Compatible with ALL Ultralytics YOLO tasks and versions
// (v8, v9, v10, v11, v26+) by auto-detecting the task type
// from output tensor shapes at inference time.
//
// Supported tasks:
// - Detection : legacy [B, nc+4, N] or end2end [B, 300, 6]
// - OBB : legacy [B, nc+5, N] or end2end [B, 300, 7]
// - Segmentation: legacy [B, nc+36, N]+protos or end2end [B, 300, 38]+protos
// - Pose : legacy [B, nc+4+nk*3, N] or end2end [B, 300, 6+nk*3]
// - Classification: [B, nc]
//
// Uses Engine<float> with GPU preprocessing (cv::cuda::GpuMat),
// supports multi-GPU pool via SetMaxSlotsPerGpu.
// ====================================================================
class ANSENGINE_API ANSRTYOLO : public ANSODBase {
public:
bool Initialize(std::string licenseKey, ModelConfig modelConfig,
const std::string& modelZipFilePath,
const std::string& modelZipPassword,
std::string& labelMap) override;
bool LoadModel(const std::string& modelZipFilePath,
const std::string& modelZipPassword) override;
bool LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig,
std::string modelName, std::string className,
const std::string& modelFolder,
std::string& labelMap) override;
bool OptimizeModel(bool fp16, std::string& optimizedModelFolder) override;
std::vector<Object> RunInference(const cv::Mat& input);
std::vector<Object> RunInference(const cv::Mat& input, const std::string& camera_id);
std::vector<std::vector<Object>> RunInferencesBatch(
const std::vector<cv::Mat>& inputs, const std::string& camera_id) override;
bool Destroy();
~ANSRTYOLO();
private:
std::string _modelFilePath;
bool _modelLoadValid{ false };
bool _fp16{ false };
bool _isFixedBatch{ false };
int m_maxSlotsPerGpu{ 1 }; // 1 = one slot per GPU, multi-GPU round-robin (no elastic)
void SetMaxSlotsPerGpu(int n) override { m_maxSlotsPerGpu = n; }
// NV12 fast-path helper (shared with ANSYOLOV12RTOD, ANSYOLOV10RTOD)
NV12PreprocessHelper m_nv12Helper;
// Per-call image geometry for concurrent inference
struct ImageMetadata {
float ratio = 1.f;
float imgWidth = 0.f;
float imgHeight = 0.f;
};
struct BatchMetadata {
std::vector<int> imgHeights;
std::vector<int> imgWidths;
std::vector<float> ratios;
};
// ── GPU Preprocessing ────────────────────────────────────────────
std::vector<std::vector<cv::cuda::GpuMat>> Preprocess(
const cv::Mat& inputImage, ImageMetadata& outMeta);
std::vector<std::vector<cv::cuda::GpuMat>> PreprocessBatch(
const std::vector<cv::Mat>& inputImages, BatchMetadata& outMetadata);
// ── Detection pipeline ───────────────────────────────────────────
std::vector<Object> DetectObjects(const cv::Mat& inputImage,
const std::string& camera_id);
std::vector<std::vector<Object>> DetectObjectsBatch(
const std::vector<cv::Mat>& inputImages, const std::string& camera_id);
// ── Task-specific postprocessors (legacy format) ─────────────────
std::vector<Object> PostprocessDetection(
std::vector<float>& featureVector,
const std::string& camera_id, const ImageMetadata& meta);
std::vector<Object> PostprocessOBB(
std::vector<float>& featureVector,
const std::string& camera_id, const ImageMetadata& meta);
std::vector<Object> PostprocessSegmentation(
std::vector<std::vector<float>>& featureVectors,
const std::string& camera_id, const ImageMetadata& meta);
std::vector<Object> PostprocessPose(
std::vector<float>& featureVector,
const std::string& camera_id, const ImageMetadata& meta);
std::vector<Object> PostprocessClassify(
std::vector<float>& featureVector,
const std::string& camera_id, const ImageMetadata& meta);
// ── End2end postprocessors ───────────────────────────────────────
std::vector<Object> PostprocessDetectionE2E(
std::vector<float>& featureVector,
const std::string& camera_id, const ImageMetadata& meta);
std::vector<Object> PostprocessOBBE2E(
std::vector<float>& featureVector,
const std::string& camera_id, const ImageMetadata& meta);
std::vector<Object> PostprocessSegE2E(
std::vector<std::vector<float>>& featureVectors,
const std::string& camera_id, const ImageMetadata& meta);
std::vector<Object> PostprocessPoseE2E(
std::vector<float>& featureVector,
const std::string& camera_id, const ImageMetadata& meta);
// ── OBB NMS helpers (Prob-IoU based) ─────────────────────────────
struct OrientedBox {
float x, y, width, height, angle;
};
static void getCovarianceComponents(const OrientedBox& box,
float& out1, float& out2, float& out3);
static std::vector<std::vector<float>> batchProbiou(
const std::vector<OrientedBox>& obb1,
const std::vector<OrientedBox>& obb2, float eps = 1e-7f);
static std::vector<int> nmsRotatedImpl(
const std::vector<OrientedBox>& sortedBoxes, float iouThreshold);
static std::vector<int> nmsRotated(
const std::vector<OrientedBox>& boxes,
const std::vector<float>& scores, float iouThreshold);
static std::vector<cv::Point2f> OBBToPoints(const OrientedBox& obb);
// ── TensorRT engine (shared across tasks using same model) ──────
std::shared_ptr<Engine<float>> m_trtEngine = nullptr;
EnginePoolManager<float>::PoolKey m_poolKey; // key for release
bool m_usingSharedPool = false;
// Preprocessing constants: YOLO expects [0,1] normalized input
const std::array<float, 3> SUB_VALS{ 0.f, 0.f, 0.f };
const std::array<float, 3> DIV_VALS{ 1.f, 1.f, 1.f };
const bool NORMALIZE = true;
ANSCENTER::Options m_options;
// Filter thresholds
float PROBABILITY_THRESHOLD{ 0.25f };
float NMS_THRESHOLD{ 0.45f };
int TOP_K{ 300 };
// Segmentation constants
int SEG_CHANNELS{ 32 };
int SEG_H{ 160 };
int SEG_W{ 160 };
float SEGMENTATION_THRESHOLD{ 0.5f };
// Pose estimation constants
int NUM_KPS{ 0 };
float KPS_THRESHOLD{ 0.5f };
};
} // namespace ANSCENTER
#endif