Files
ANSCORE/modules/ANSODEngine/ANSTENSORRTSEG.h

93 lines
4.4 KiB
C++

#ifndef ANSTENSORRTSEG_H
#define ANSTENSORRTSEG_H
#pragma once
#include "ANSEngineCommon.h"
#include "engine.h"
#include "ANSGpuFrameRegistry.h"
#include "NV12PreprocessHelper.h"
namespace ANSCENTER {
// TensorRT class
class ANSENGINE_API TENSORRTSEG :public ANSODBase {
public:
virtual bool Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) override;
virtual bool LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword)override;
virtual bool LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap)override;
virtual bool OptimizeModel(bool fp16, std::string& optimizedModelFolder);
std::vector<Object> RunInference(const cv::Mat& input);
std::vector<Object> RunInference(const cv::Mat& input, const std::string& camera_id);
virtual std::vector<std::vector<Object>> RunInferencesBatch(
const std::vector<cv::Mat>& inputs, const std::string& camera_id) override;
bool Destroy();
~TENSORRTSEG();
private:
std::string _modelFilePath;
bool _modelLoadValid;
bool _fp16{ false };
int m_maxSlotsPerGpu{ 1 }; // 1 = one slot per GPU, multi-GPU round-robin (no elastic)
void SetMaxSlotsPerGpu(int n) override { m_maxSlotsPerGpu = n; }
// NV12 fast-path helper
NV12PreprocessHelper m_nv12Helper;
private:
// Per-call batch metadata — passed by out-param to avoid shared state
struct BatchMetadata {
std::vector<int> imgHeights;
std::vector<int> imgWidths;
std::vector<float> ratios;
};
// Per-call single-image metadata
struct ImageMetadata {
float ratio = 1.f;
float imgWidth = 0.f;
float imgHeight = 0.f;
};
// Preprocess the input; fills outMeta with per-call image geometry
std::vector<std::vector<cv::cuda::GpuMat>> Preprocess(const cv::Mat& inputImage,
ImageMetadata& outMeta);
// Postprocess the output for segmentation model
std::vector<Object> PostProcessSegmentation(std::vector<std::vector<float>>& featureVectors,
const std::string& camera_id,
const ImageMetadata& meta);
std::vector<Object> DetectObjects(const cv::Mat& inputImage, const std::string& camera_id);
std::vector<std::vector<Object>> DetectObjectsBatch(const std::vector<cv::Mat>& inputImages,
const std::string& camera_id);
std::vector<std::vector<cv::cuda::GpuMat>> PreprocessBatch(const std::vector<cv::Mat>& inputImages,
BatchMetadata& outMetadata);
std::vector<Object> PostProcessSegmentationBatch(std::vector<std::vector<float>>& featureVectors,
const std::string& camera_id,
size_t batchIdx,
const BatchMetadata& metadata);
std::unique_ptr<Engine<float>> m_trtEngine = nullptr;
std::vector<cv::Point2f> maskToPolygon(const cv::Mat& binaryMask,
const cv::Rect& boundingBox,
float simplificationEpsilon = 2.0f,
int minContourArea = 10);
// Used for image preprocessing
// YoloV8 model expects values between [0.f, 1.f] so we use the following params
const std::array<float, 3> SUB_VALS{ 0.f, 0.f, 0.f };
const std::array<float, 3> DIV_VALS{ 1.f, 1.f, 1.f };
const bool NORMALIZE = true;
ANSCENTER::Options m_options;
// Filter thresholds
float PROBABILITY_THRESHOLD;
float NMS_THRESHOLD;
int TOP_K;
// Segmentation constants
int SEG_CHANNELS;
int SEG_H;
int SEG_W;
float SEGMENTATION_THRESHOLD;
// Pose estimation constant
int NUM_KPS;
float KPS_THRESHOLD;
};
}
#endif