#ifndef ANSRTYOLO_H #define ANSRTYOLO_H #pragma once #include "ANSEngineCommon.h" #include "engine.h" #include "ANSGpuFrameRegistry.h" #include "NV12PreprocessHelper.h" #include "engine/EnginePoolManager.h" namespace ANSCENTER { // ==================================================================== // ANSRTYOLO — TensorRT-based Ultralytics YOLO inference // // Compatible with ALL Ultralytics YOLO tasks and versions // (v8, v9, v10, v11, v26+) by auto-detecting the task type // from output tensor shapes at inference time. // // Supported tasks: // - Detection : legacy [B, nc+4, N] or end2end [B, 300, 6] // - OBB : legacy [B, nc+5, N] or end2end [B, 300, 7] // - Segmentation: legacy [B, nc+36, N]+protos or end2end [B, 300, 38]+protos // - Pose : legacy [B, nc+4+nk*3, N] or end2end [B, 300, 6+nk*3] // - Classification: [B, nc] // // Uses Engine with GPU preprocessing (cv::cuda::GpuMat), // supports multi-GPU pool via SetMaxSlotsPerGpu. // ==================================================================== class ANSENGINE_API ANSRTYOLO : public ANSODBase { public: bool Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) override; bool LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) override; bool LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) override; bool OptimizeModel(bool fp16, std::string& optimizedModelFolder) override; std::vector RunInference(const cv::Mat& input); std::vector RunInference(const cv::Mat& input, const std::string& camera_id); std::vector> RunInferencesBatch( const std::vector& inputs, const std::string& camera_id) override; bool Destroy(); ~ANSRTYOLO(); private: std::string _modelFilePath; bool _modelLoadValid{ false }; bool _fp16{ false }; bool _isFixedBatch{ false }; int m_maxSlotsPerGpu{ 1 }; // 1 = one slot per GPU, multi-GPU round-robin (no elastic) void SetMaxSlotsPerGpu(int n) override { m_maxSlotsPerGpu = n; } // NV12 fast-path helper (shared with ANSYOLOV12RTOD, ANSYOLOV10RTOD) NV12PreprocessHelper m_nv12Helper; // Per-call image geometry for concurrent inference struct ImageMetadata { float ratio = 1.f; float imgWidth = 0.f; float imgHeight = 0.f; }; struct BatchMetadata { std::vector imgHeights; std::vector imgWidths; std::vector ratios; }; // ── GPU Preprocessing ──────────────────────────────────────────── std::vector> Preprocess( const cv::Mat& inputImage, ImageMetadata& outMeta); std::vector> PreprocessBatch( const std::vector& inputImages, BatchMetadata& outMetadata); // ── Detection pipeline ─────────────────────────────────────────── std::vector DetectObjects(const cv::Mat& inputImage, const std::string& camera_id); std::vector> DetectObjectsBatch( const std::vector& inputImages, const std::string& camera_id); // ── Task-specific postprocessors (legacy format) ───────────────── std::vector PostprocessDetection( std::vector& featureVector, const std::string& camera_id, const ImageMetadata& meta); std::vector PostprocessOBB( std::vector& featureVector, const std::string& camera_id, const ImageMetadata& meta); std::vector PostprocessSegmentation( std::vector>& featureVectors, const std::string& camera_id, const ImageMetadata& meta); std::vector PostprocessPose( std::vector& featureVector, const std::string& camera_id, const ImageMetadata& meta); std::vector PostprocessClassify( std::vector& featureVector, const std::string& camera_id, const ImageMetadata& meta); // ── End2end postprocessors ─────────────────────────────────────── std::vector PostprocessDetectionE2E( std::vector& featureVector, const std::string& camera_id, const ImageMetadata& meta); std::vector PostprocessOBBE2E( std::vector& featureVector, const std::string& camera_id, const ImageMetadata& meta); std::vector PostprocessSegE2E( std::vector>& featureVectors, const std::string& camera_id, const ImageMetadata& meta); std::vector PostprocessPoseE2E( std::vector& featureVector, const std::string& camera_id, const ImageMetadata& meta); // ── OBB NMS helpers (Prob-IoU based) ───────────────────────────── struct OrientedBox { float x, y, width, height, angle; }; static void getCovarianceComponents(const OrientedBox& box, float& out1, float& out2, float& out3); static std::vector> batchProbiou( const std::vector& obb1, const std::vector& obb2, float eps = 1e-7f); static std::vector nmsRotatedImpl( const std::vector& sortedBoxes, float iouThreshold); static std::vector nmsRotated( const std::vector& boxes, const std::vector& scores, float iouThreshold); static std::vector OBBToPoints(const OrientedBox& obb); // ── TensorRT engine (shared across tasks using same model) ────── std::shared_ptr> m_trtEngine = nullptr; EnginePoolManager::PoolKey m_poolKey; // key for release bool m_usingSharedPool = false; // Preprocessing constants: YOLO expects [0,1] normalized input const std::array SUB_VALS{ 0.f, 0.f, 0.f }; const std::array DIV_VALS{ 1.f, 1.f, 1.f }; const bool NORMALIZE = true; ANSCENTER::Options m_options; // Filter thresholds float PROBABILITY_THRESHOLD{ 0.25f }; float NMS_THRESHOLD{ 0.45f }; int TOP_K{ 300 }; // Segmentation constants int SEG_CHANNELS{ 32 }; int SEG_H{ 160 }; int SEG_W{ 160 }; float SEGMENTATION_THRESHOLD{ 0.5f }; // Pose estimation constants int NUM_KPS{ 0 }; float KPS_THRESHOLD{ 0.5f }; }; } // namespace ANSCENTER #endif