#ifndef ANSYOLOV10RTOD_H #define ANSYOLOV10RTOD_H #pragma once #include "ANSEngineCommon.h" #include "engine.h" #include "NV12PreprocessHelper.h" namespace ANSCENTER { class ANSENGINE_API ANSYOLOV10RTOD :public ANSODBase { public: virtual bool Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) override; virtual bool LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword)override; virtual bool LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap)override; virtual bool OptimizeModel(bool fp16, std::string& optimizedModelFolder); std::vector RunInference(const cv::Mat& input); std::vector RunInference(const cv::Mat& input, const std::string& camera_id); virtual std::vector> RunInferencesBatch( const std::vector& inputs, const std::string& camera_id) override; bool Destroy(); ~ANSYOLOV10RTOD(); private: std::string _modelFilePath; bool _modelLoadValid; bool _fp16{ false }; int m_maxSlotsPerGpu{ 1 }; // 1 = one slot per GPU, multi-GPU round-robin (no elastic) void SetMaxSlotsPerGpu(int n) override { m_maxSlotsPerGpu = n; } ANSCENTER::Options m_options; private: // Per-call batch metadata — passed by out-param to avoid shared state struct BatchMetadata { std::vector imgHeights; std::vector imgWidths; std::vector ratios; }; // Per-call single-image metadata struct ImageMetadata { float ratio = 1.f; float imgWidth = 0.f; float imgHeight = 0.f; }; // Preprocess the input; fills outMeta with per-call image geometry std::vector> Preprocess(const cv::Mat& inputImage, ImageMetadata& outMeta); // Postprocess the output std::vector Postprocess(std::vector& featureVector, const std::string& camera_id, const ImageMetadata& meta); // Postprocess the output for segmentation model std::vector PostProcessSegmentation(std::vector>& featureVectors, const std::string& camera_id, const ImageMetadata& meta); // Postprocess the output for pose model std::vector PostProcessPose(std::vector& featureVector, const std::string& camera_id, const ImageMetadata& meta); std::vector DetectObjects(const cv::Mat& inputImage, const std::string& camera_id); std::vector> DetectObjectsBatch(const std::vector& inputImages, const std::string& camera_id); std::vector> PreprocessBatch( const std::vector& inputImages, BatchMetadata& outMetadata); std::vector PostprocessBatch(std::vector& featureVector, const std::string& camera_id, size_t batchIdx, const BatchMetadata& metadata); NV12PreprocessHelper m_nv12Helper; std::unique_ptr> m_trtEngine = nullptr; // Used for image preprocessing // YoloV8 model expects values between [0.f, 1.f] so we use the following params const std::array SUB_VALS{ 0.f, 0.f, 0.f }; const std::array DIV_VALS{ 1.f, 1.f, 1.f }; const bool NORMALIZE = true; // Filter thresholds float PROBABILITY_THRESHOLD; float NMS_THRESHOLD; int TOP_K; // Segmentation constants int SEG_CHANNELS; int SEG_H; int SEG_W; float SEGMENTATION_THRESHOLD; // Pose estimation constant int NUM_KPS; float KPS_THRESHOLD; }; } #endif