#pragma once #ifndef ONNXEngine_H #define ONNXEngine_H #include #include #include #include #include #include #include "onnxruntime_cxx_api.h" #include "opencv2/opencv.hpp" #include "EPLoader.h" // brings in EngineType via ANSLicenseHelper #define LITEORT_CHAR wchar_t #ifdef ENGINE_EXPORTS #define ONNXENGINE_API __declspec(dllexport) #else #define ONNXENGINE_API __declspec(dllimport) #endif namespace ANSCENTER { // ==================================================================== // types // ==================================================================== namespace types { template static inline void __assert_type() { static_assert( std::is_standard_layout_v<_T1> && std::is_trivially_copyable_v<_T1> && std::is_standard_layout_v<_T2> && std::is_trivially_copyable_v<_T2> && std::is_floating_point<_T2>::value && (std::is_integral<_T1>::value || std::is_floating_point<_T1>::value), "not support type."); } template struct BoundingBoxType { typedef T1 value_type; typedef T2 score_type; value_type x1, y1, x2, y2; score_type score; const char* label_text; unsigned int label; bool flag; template BoundingBoxType convert_type() const; template value_type iou_of(const BoundingBoxType& other) const; value_type width() const; value_type height() const; value_type area() const; ::cv::Rect rect() const; ::cv::Point2i tl() const; ::cv::Point2i rb() const; BoundingBoxType() : x1(0), y1(0), x2(0), y2(0), score(0), label_text(nullptr), label(0), flag(false) { types::__assert_type(); } }; template class BoundingBoxType; template class BoundingBoxType; template class BoundingBoxType; typedef BoundingBoxType Boxi; typedef BoundingBoxType Boxf; typedef BoundingBoxType Boxd; typedef struct LandmarksType { std::vector points; bool flag; LandmarksType() : flag(false) {} } Landmarks; typedef Landmarks Landmarks2D; typedef struct Landmarks3DType { std::vector points; bool flag; Landmarks3DType() : flag(false) {} } Landmarks3D; typedef struct BoxfWithLandmarksType { Boxf box; Landmarks landmarks; bool flag; BoxfWithLandmarksType() : flag(false) {} } BoxfWithLandmarks; typedef struct EulerAnglesType { float yaw, pitch, roll; bool flag; EulerAnglesType() : flag(false) {} } EulerAngles; typedef struct EmotionsType { float score; unsigned int label; const char* text; bool flag; EmotionsType() : flag(false) {} } Emotions; typedef struct AgeType { float age; unsigned int age_interval[2]; float interval_prob; bool flag; AgeType() : flag(false) {} } Age; typedef struct GenderType { float score; unsigned int label; const char* text; bool flag; GenderType() : flag(false) {} } Gender; typedef struct FaceContentType { std::vector embedding; unsigned int dim; bool flag; FaceContentType() : flag(false) {} } FaceContent; typedef struct SegmentContentType { cv::Mat class_mat; cv::Mat color_mat; std::unordered_map names_map; bool flag; SegmentContentType() : flag(false) {} } SegmentContent; typedef struct MattingContentType { cv::Mat fgr_mat; cv::Mat pha_mat; cv::Mat merge_mat; bool flag; MattingContentType() : flag(false) {} } MattingContent; typedef struct SegmentationMaskContentType { cv::Mat mask; bool flag; SegmentationMaskContentType() : flag(false) {} } SegmentationMaskContent; typedef struct ImageNetContentType { std::vector scores; std::vector texts; std::vector labels; bool flag; ImageNetContentType() : flag(false) {} } ImageNetContent; typedef ImageNetContent ClassificationContent; typedef struct StyleContentType { cv::Mat mat; bool flag; StyleContentType() : flag(false) {} } StyleContent; typedef struct SuperResolutionContentType { cv::Mat mat; bool flag; SuperResolutionContentType() : flag(false) {} } SuperResolutionContent; typedef struct FaceParsingContentType { cv::Mat label; cv::Mat merge; bool flag; FaceParsingContentType() : flag(false) {} } FaceParsingContent; typedef SegmentationMaskContent HairSegContent; typedef SegmentationMaskContent HeadSegContent; typedef SegmentationMaskContent FaceHairSegContent; typedef SegmentationMaskContent PortraitSegContent; } // namespace types // ==================================================================== // utils // ==================================================================== namespace utils { namespace transform { enum { CHW = 0, HWC = 1 }; Ort::Value create_tensor( const cv::Mat& mat, const std::vector& tensor_dims, const Ort::MemoryInfo& memory_info_handler, std::vector& tensor_value_handler, unsigned int data_format = CHW); Ort::Value create_tensor_batch( const std::vector& batch_mats, const std::vector& tensor_dims, const Ort::MemoryInfo& memory_info_handler, std::vector& tensor_value_handler, unsigned int data_format = CHW); Ort::Value create_video_tensor_5d( const std::deque& frames, const std::vector& tensor_dims, const Ort::MemoryInfo& memory_info_handler, std::vector& tensor_value_handler); cv::Mat normalize(const cv::Mat& mat, float mean, float scale); cv::Mat normalize(const cv::Mat& mat, const float mean[3], const float scale[3]); void normalize(const cv::Mat& inmat, cv::Mat& outmat, float mean, float scale); void normalize_inplace(cv::Mat& mat_inplace, float mean, float scale); void normalize_inplace(cv::Mat& mat_inplace, const float mean[3], const float scale[3]); } // namespace transform } // namespace utils // ==================================================================== // Helpers // ==================================================================== inline static std::string OrtCompatiableGetInputName( size_t index, OrtAllocator* allocator, Ort::Session* ort_session) { return std::string(ort_session->GetInputNameAllocated(index, allocator).get()); } inline static std::string OrtCompatiableGetOutputName( size_t index, OrtAllocator* allocator, Ort::Session* ort_session) { return std::string(ort_session->GetOutputNameAllocated(index, allocator).get()); } // ==================================================================== // BasicOrtHandler // ==================================================================== class ONNXENGINE_API BasicOrtHandler { protected: const char* input_name = nullptr; std::vector input_node_names; std::vector input_node_names_; std::vector input_node_dims; std::size_t input_tensor_size = 1; std::vector input_values_handler; std::vector output_node_names; std::vector output_node_names_; std::vector> output_node_dims; int num_outputs = 1; Ort::Env* ort_env = nullptr; // ← pointer, no in-class init Ort::Session* ort_session = nullptr; Ort::MemoryInfo* memory_info_handler = nullptr; std::wstring onnx_path_w; // ← owns the wstring storage const LITEORT_CHAR* onnx_path = nullptr; // ← points into onnx_path_w const char* log_id = nullptr; protected: const unsigned int num_threads; EngineType m_engineType; protected: // Default: hardware auto-detection via ANSLicenseHelper through EPLoader explicit BasicOrtHandler(const std::string& _onnx_path, unsigned int _num_threads = 1); // Explicit engine override per-session explicit BasicOrtHandler(const std::string& _onnx_path, EngineType engineType, unsigned int _num_threads = 1); virtual ~BasicOrtHandler(); BasicOrtHandler(const BasicOrtHandler&) = delete; BasicOrtHandler& operator=(const BasicOrtHandler&) = delete; public: // Resolved EP type (after EPLoader fallback). Subclasses use this // to branch on actual EP at inference time. EngineType getEngineType() const { return m_engineType; } private: void initialize_handler(); protected: virtual Ort::Value transform(const cv::Mat& mat) = 0; virtual Ort::Value transformBatch(const std::vector& images) = 0; // EP-specific session option builders bool TryAppendCUDA(Ort::SessionOptions& opts); bool TryAppendDirectML(Ort::SessionOptions& opts); bool TryAppendOpenVINO(Ort::SessionOptions& opts); }; // ==================================================================== // SCRFD — face detection // ==================================================================== class SCRFD : public BasicOrtHandler { public: explicit SCRFD(const std::string& _onnx_path,unsigned int _num_threads = 1); explicit SCRFD(const std::string& _onnx_path,EngineType engineType,unsigned int _num_threads = 1); ~SCRFD() override = default; void detect(const cv::Mat& mat, std::vector& detected_boxes_kps, float score_threshold = 0.3f, float iou_threshold = 0.45f, unsigned int topk = 400); private: typedef struct { float cx, cy, stride; } SCRFDPoint; typedef struct { float ratio; int dw, dh; bool flag; } SCRFDScaleParams; const float mean_vals[3] = { 127.5f, 127.5f, 127.5f }; const float scale_vals[3] = { 1.f / 128.f, 1.f / 128.f, 1.f / 128.f }; unsigned int fmc = 3; bool use_kps = false; unsigned int num_anchors = 2; std::vector feat_stride_fpn = { 8, 16, 32 }; std::unordered_map> center_points; bool center_points_is_update = false; static constexpr unsigned int nms_pre = 1000; static constexpr unsigned int max_nms = 30000; Ort::Value transform(const cv::Mat& mat_rs) override; Ort::Value transformBatch(const std::vector& images) override; void initial_context(); void resize_unscale(const cv::Mat& mat, cv::Mat& mat_rs, int target_height, int target_width, SCRFDScaleParams& scale_params); void generate_points(int target_height, int target_width); void generate_bboxes_kps(const SCRFDScaleParams& scale_params, std::vector& bbox_kps_collection, std::vector& output_tensors, float score_threshold, float img_height, float img_width); void generate_bboxes_single_stride( const SCRFDScaleParams& scale_params, Ort::Value& score_pred, Ort::Value& bbox_pred, unsigned int stride, float score_threshold, float img_height, float img_width, std::vector& bbox_kps_collection); void generate_bboxes_kps_single_stride( const SCRFDScaleParams& scale_params, Ort::Value& score_pred, Ort::Value& bbox_pred, Ort::Value& kps_pred, unsigned int stride, float score_threshold, float img_height, float img_width, std::vector& bbox_kps_collection); void nms_bboxes_kps(std::vector& input, std::vector& output, float iou_threshold, unsigned int topk); }; // ==================================================================== // GlintArcFace — face recognition // ==================================================================== class GlintArcFace : public BasicOrtHandler { public: explicit GlintArcFace(const std::string& _onnx_path, unsigned int _num_threads = 1) : BasicOrtHandler(_onnx_path, _num_threads) { } explicit GlintArcFace(const std::string& _onnx_path, EngineType engineType, unsigned int _num_threads = 1) : BasicOrtHandler(_onnx_path, engineType, _num_threads) { } ~GlintArcFace() override = default; void detect(const cv::Mat& mat, types::FaceContent& face_content); void detectBatch(const std::vector& images, std::vector& face_contents); private: static constexpr float mean_val = 127.5f; static constexpr float scale_val = 1.f / 127.5f; Ort::Value transform(const cv::Mat& mat) override; Ort::Value transformBatch(const std::vector& images) override; }; // ==================================================================== // GlintCosFace — face recognition // ==================================================================== class GlintCosFace : public BasicOrtHandler { public: explicit GlintCosFace(const std::string& _onnx_path, unsigned int _num_threads = 1) : BasicOrtHandler(_onnx_path, _num_threads) { } explicit GlintCosFace(const std::string& _onnx_path, EngineType engineType, unsigned int _num_threads = 1) : BasicOrtHandler(_onnx_path, engineType, _num_threads) { } ~GlintCosFace() override = default; void detect(const cv::Mat& mat, types::FaceContent& face_content); void detectBatch(const std::vector& images, std::vector& face_contents); private: static constexpr float mean_val = 127.5f; static constexpr float scale_val = 1.f / 127.5f; Ort::Value transform(const cv::Mat& mat) override; Ort::Value transformBatch(const std::vector& images) override; }; // ==================================================================== // MOVINET — action recognition // ==================================================================== class MOVINET : public BasicOrtHandler { public: explicit MOVINET(const std::string& _onnx_path, unsigned int _num_threads = 1); explicit MOVINET(const std::string& _onnx_path, int _temporal, int _width, int _height, int _channels = 3, unsigned int _num_threads = 1); explicit MOVINET(const std::string& _onnx_path, EngineType engineType, unsigned int _num_threads = 1); explicit MOVINET(const std::string& _onnx_path, EngineType engineType, int _temporal, int _width, int _height, int _channels = 3, unsigned int _num_threads = 1); ~MOVINET() override = default; void inference(const std::deque& frames, std::pair& out_result); private: struct InputConfig { int temporal = 16; int width = 172; int height = 172; int channels = 3; } input_params; struct OutputConfig { int num_classes = 2; } output_params; std::string _MoviNetInputName; std::string _MoviNetOutputName; std::vector input_tensor_values; void init_io_names(); Ort::Value transform(const std::deque& frames); std::pair post_processing(const float* pOutput); // Required by BasicOrtHandler pure virtuals Ort::Value transform(const cv::Mat& mat) override; Ort::Value transformBatch(const std::vector& images) override; }; // ==================================================================== // BoundingBoxType template implementations // ==================================================================== template template inline ANSCENTER::types::BoundingBoxType ANSCENTER::types::BoundingBoxType::convert_type() const { types::__assert_type(); types::__assert_type(); BoundingBoxType other; other.x1 = static_cast(x1); other.y1 = static_cast(y1); other.x2 = static_cast(x2); other.y2 = static_cast(y2); other.score = static_cast(score); other.label_text = label_text; other.label = label; other.flag = flag; return other; } } // namespace ANSCENTER #endif // ONNXEngine_H