168 lines
7.6 KiB
C++
168 lines
7.6 KiB
C++
#ifndef ANSRTYOLO_H
|
|
#define ANSRTYOLO_H
|
|
#pragma once
|
|
#include "ANSEngineCommon.h"
|
|
#include "engine.h"
|
|
#include "ANSGpuFrameRegistry.h"
|
|
#include "NV12PreprocessHelper.h"
|
|
#include "engine/EnginePoolManager.h"
|
|
|
|
namespace ANSCENTER {
|
|
|
|
// ====================================================================
|
|
// ANSRTYOLO — TensorRT-based Ultralytics YOLO inference
|
|
//
|
|
// Compatible with ALL Ultralytics YOLO tasks and versions
|
|
// (v8, v9, v10, v11, v26+) by auto-detecting the task type
|
|
// from output tensor shapes at inference time.
|
|
//
|
|
// Supported tasks:
|
|
// - Detection : legacy [B, nc+4, N] or end2end [B, 300, 6]
|
|
// - OBB : legacy [B, nc+5, N] or end2end [B, 300, 7]
|
|
// - Segmentation: legacy [B, nc+36, N]+protos or end2end [B, 300, 38]+protos
|
|
// - Pose : legacy [B, nc+4+nk*3, N] or end2end [B, 300, 6+nk*3]
|
|
// - Classification: [B, nc]
|
|
//
|
|
// Uses Engine<float> with GPU preprocessing (cv::cuda::GpuMat),
|
|
// supports multi-GPU pool via SetMaxSlotsPerGpu.
|
|
// ====================================================================
|
|
class ANSENGINE_API ANSRTYOLO : public ANSODBase {
|
|
public:
|
|
bool Initialize(std::string licenseKey, ModelConfig modelConfig,
|
|
const std::string& modelZipFilePath,
|
|
const std::string& modelZipPassword,
|
|
std::string& labelMap) override;
|
|
|
|
bool LoadModel(const std::string& modelZipFilePath,
|
|
const std::string& modelZipPassword) override;
|
|
|
|
bool LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig,
|
|
std::string modelName, std::string className,
|
|
const std::string& modelFolder,
|
|
std::string& labelMap) override;
|
|
|
|
bool OptimizeModel(bool fp16, std::string& optimizedModelFolder) override;
|
|
|
|
std::vector<Object> RunInference(const cv::Mat& input);
|
|
std::vector<Object> RunInference(const cv::Mat& input, const std::string& camera_id);
|
|
std::vector<std::vector<Object>> RunInferencesBatch(
|
|
const std::vector<cv::Mat>& inputs, const std::string& camera_id) override;
|
|
|
|
bool Destroy();
|
|
~ANSRTYOLO();
|
|
|
|
private:
|
|
std::string _modelFilePath;
|
|
bool _modelLoadValid{ false };
|
|
bool _fp16{ false };
|
|
bool _isFixedBatch{ false };
|
|
int m_maxSlotsPerGpu{ 1 }; // 1 = one slot per GPU, multi-GPU round-robin (no elastic)
|
|
void SetMaxSlotsPerGpu(int n) override { m_maxSlotsPerGpu = n; }
|
|
|
|
// NV12 fast-path helper (shared with ANSYOLOV12RTOD, ANSYOLOV10RTOD)
|
|
NV12PreprocessHelper m_nv12Helper;
|
|
|
|
// Per-call image geometry for concurrent inference
|
|
struct ImageMetadata {
|
|
float ratio = 1.f;
|
|
float imgWidth = 0.f;
|
|
float imgHeight = 0.f;
|
|
};
|
|
|
|
struct BatchMetadata {
|
|
std::vector<int> imgHeights;
|
|
std::vector<int> imgWidths;
|
|
std::vector<float> ratios;
|
|
};
|
|
|
|
// ── GPU Preprocessing ────────────────────────────────────────────
|
|
std::vector<std::vector<cv::cuda::GpuMat>> Preprocess(
|
|
const cv::Mat& inputImage, ImageMetadata& outMeta);
|
|
std::vector<std::vector<cv::cuda::GpuMat>> PreprocessBatch(
|
|
const std::vector<cv::Mat>& inputImages, BatchMetadata& outMetadata);
|
|
|
|
|
|
// ── Detection pipeline ───────────────────────────────────────────
|
|
std::vector<Object> DetectObjects(const cv::Mat& inputImage,
|
|
const std::string& camera_id);
|
|
std::vector<std::vector<Object>> DetectObjectsBatch(
|
|
const std::vector<cv::Mat>& inputImages, const std::string& camera_id);
|
|
|
|
// ── Task-specific postprocessors (legacy format) ─────────────────
|
|
std::vector<Object> PostprocessDetection(
|
|
std::vector<float>& featureVector,
|
|
const std::string& camera_id, const ImageMetadata& meta);
|
|
std::vector<Object> PostprocessOBB(
|
|
std::vector<float>& featureVector,
|
|
const std::string& camera_id, const ImageMetadata& meta);
|
|
std::vector<Object> PostprocessSegmentation(
|
|
std::vector<std::vector<float>>& featureVectors,
|
|
const std::string& camera_id, const ImageMetadata& meta);
|
|
std::vector<Object> PostprocessPose(
|
|
std::vector<float>& featureVector,
|
|
const std::string& camera_id, const ImageMetadata& meta);
|
|
std::vector<Object> PostprocessClassify(
|
|
std::vector<float>& featureVector,
|
|
const std::string& camera_id, const ImageMetadata& meta);
|
|
|
|
// ── End2end postprocessors ───────────────────────────────────────
|
|
std::vector<Object> PostprocessDetectionE2E(
|
|
std::vector<float>& featureVector,
|
|
const std::string& camera_id, const ImageMetadata& meta);
|
|
std::vector<Object> PostprocessOBBE2E(
|
|
std::vector<float>& featureVector,
|
|
const std::string& camera_id, const ImageMetadata& meta);
|
|
std::vector<Object> PostprocessSegE2E(
|
|
std::vector<std::vector<float>>& featureVectors,
|
|
const std::string& camera_id, const ImageMetadata& meta);
|
|
std::vector<Object> PostprocessPoseE2E(
|
|
std::vector<float>& featureVector,
|
|
const std::string& camera_id, const ImageMetadata& meta);
|
|
|
|
// ── OBB NMS helpers (Prob-IoU based) ─────────────────────────────
|
|
struct OrientedBox {
|
|
float x, y, width, height, angle;
|
|
};
|
|
static void getCovarianceComponents(const OrientedBox& box,
|
|
float& out1, float& out2, float& out3);
|
|
static std::vector<std::vector<float>> batchProbiou(
|
|
const std::vector<OrientedBox>& obb1,
|
|
const std::vector<OrientedBox>& obb2, float eps = 1e-7f);
|
|
static std::vector<int> nmsRotatedImpl(
|
|
const std::vector<OrientedBox>& sortedBoxes, float iouThreshold);
|
|
static std::vector<int> nmsRotated(
|
|
const std::vector<OrientedBox>& boxes,
|
|
const std::vector<float>& scores, float iouThreshold);
|
|
static std::vector<cv::Point2f> OBBToPoints(const OrientedBox& obb);
|
|
|
|
// ── TensorRT engine (shared across tasks using same model) ──────
|
|
std::shared_ptr<Engine<float>> m_trtEngine = nullptr;
|
|
EnginePoolManager<float>::PoolKey m_poolKey; // key for release
|
|
bool m_usingSharedPool = false;
|
|
|
|
// Preprocessing constants: YOLO expects [0,1] normalized input
|
|
const std::array<float, 3> SUB_VALS{ 0.f, 0.f, 0.f };
|
|
const std::array<float, 3> DIV_VALS{ 1.f, 1.f, 1.f };
|
|
const bool NORMALIZE = true;
|
|
|
|
ANSCENTER::Options m_options;
|
|
|
|
// Filter thresholds
|
|
float PROBABILITY_THRESHOLD{ 0.25f };
|
|
float NMS_THRESHOLD{ 0.45f };
|
|
int TOP_K{ 300 };
|
|
|
|
// Segmentation constants
|
|
int SEG_CHANNELS{ 32 };
|
|
int SEG_H{ 160 };
|
|
int SEG_W{ 160 };
|
|
float SEGMENTATION_THRESHOLD{ 0.5f };
|
|
|
|
// Pose estimation constants
|
|
int NUM_KPS{ 0 };
|
|
float KPS_THRESHOLD{ 0.5f };
|
|
};
|
|
|
|
} // namespace ANSCENTER
|
|
#endif
|