Files
ANSCORE/engines/ONNXEngine/ONNXEngine.h

522 lines
18 KiB
C++

#pragma once
#ifndef ONNXEngine_H
#define ONNXEngine_H
#include <string>
#include <vector>
#include <iostream>
#include <typeinfo>
#include <deque>
#include <unordered_map>
#include "onnxruntime_cxx_api.h"
#include "opencv2/opencv.hpp"
#include "EPLoader.h" // brings in EngineType via ANSLicenseHelper
#define LITEORT_CHAR wchar_t
#ifdef ENGINE_EXPORTS
#define ONNXENGINE_API __declspec(dllexport)
#else
#define ONNXENGINE_API __declspec(dllimport)
#endif
namespace ANSCENTER {
// ====================================================================
// types
// ====================================================================
namespace types {
template<typename _T1 = float, typename _T2 = float>
static inline void __assert_type()
{
static_assert(
std::is_standard_layout_v<_T1> && std::is_trivially_copyable_v<_T1>
&& std::is_standard_layout_v<_T2> && std::is_trivially_copyable_v<_T2>
&& std::is_floating_point<_T2>::value
&& (std::is_integral<_T1>::value || std::is_floating_point<_T1>::value),
"not support type.");
}
template<typename T1 = float, typename T2 = float>
struct BoundingBoxType
{
typedef T1 value_type;
typedef T2 score_type;
value_type x1, y1, x2, y2;
score_type score;
const char* label_text;
unsigned int label;
bool flag;
template<typename O1, typename O2 = score_type>
BoundingBoxType<O1, O2> convert_type() const;
template<typename O1, typename O2 = score_type>
value_type iou_of(const BoundingBoxType<O1, O2>& other) const;
value_type width() const;
value_type height() const;
value_type area() const;
::cv::Rect rect() const;
::cv::Point2i tl() const;
::cv::Point2i rb() const;
BoundingBoxType() :
x1(0), y1(0), x2(0), y2(0),
score(0), label_text(nullptr), label(0), flag(false)
{
types::__assert_type<value_type, score_type>();
}
};
template class BoundingBoxType<int, float>;
template class BoundingBoxType<float, float>;
template class BoundingBoxType<double, double>;
typedef BoundingBoxType<int, float> Boxi;
typedef BoundingBoxType<float, float> Boxf;
typedef BoundingBoxType<double, double> Boxd;
typedef struct LandmarksType {
std::vector<cv::Point2f> points;
bool flag;
LandmarksType() : flag(false) {}
} Landmarks;
typedef Landmarks Landmarks2D;
typedef struct Landmarks3DType {
std::vector<cv::Point3f> points;
bool flag;
Landmarks3DType() : flag(false) {}
} Landmarks3D;
typedef struct BoxfWithLandmarksType {
Boxf box;
Landmarks landmarks;
bool flag;
BoxfWithLandmarksType() : flag(false) {}
} BoxfWithLandmarks;
typedef struct EulerAnglesType {
float yaw, pitch, roll;
bool flag;
EulerAnglesType() : flag(false) {}
} EulerAngles;
typedef struct EmotionsType {
float score;
unsigned int label;
const char* text;
bool flag;
EmotionsType() : flag(false) {}
} Emotions;
typedef struct AgeType {
float age;
unsigned int age_interval[2];
float interval_prob;
bool flag;
AgeType() : flag(false) {}
} Age;
typedef struct GenderType {
float score;
unsigned int label;
const char* text;
bool flag;
GenderType() : flag(false) {}
} Gender;
typedef struct FaceContentType {
std::vector<float> embedding;
unsigned int dim;
bool flag;
FaceContentType() : flag(false) {}
} FaceContent;
typedef struct SegmentContentType {
cv::Mat class_mat;
cv::Mat color_mat;
std::unordered_map<int, std::string> names_map;
bool flag;
SegmentContentType() : flag(false) {}
} SegmentContent;
typedef struct MattingContentType {
cv::Mat fgr_mat;
cv::Mat pha_mat;
cv::Mat merge_mat;
bool flag;
MattingContentType() : flag(false) {}
} MattingContent;
typedef struct SegmentationMaskContentType {
cv::Mat mask;
bool flag;
SegmentationMaskContentType() : flag(false) {}
} SegmentationMaskContent;
typedef struct ImageNetContentType {
std::vector<float> scores;
std::vector<const char*> texts;
std::vector<unsigned int> labels;
bool flag;
ImageNetContentType() : flag(false) {}
} ImageNetContent;
typedef ImageNetContent ClassificationContent;
typedef struct StyleContentType {
cv::Mat mat;
bool flag;
StyleContentType() : flag(false) {}
} StyleContent;
typedef struct SuperResolutionContentType {
cv::Mat mat;
bool flag;
SuperResolutionContentType() : flag(false) {}
} SuperResolutionContent;
typedef struct FaceParsingContentType {
cv::Mat label;
cv::Mat merge;
bool flag;
FaceParsingContentType() : flag(false) {}
} FaceParsingContent;
typedef SegmentationMaskContent HairSegContent;
typedef SegmentationMaskContent HeadSegContent;
typedef SegmentationMaskContent FaceHairSegContent;
typedef SegmentationMaskContent PortraitSegContent;
} // namespace types
// ====================================================================
// utils
// ====================================================================
namespace utils {
namespace transform {
enum { CHW = 0, HWC = 1 };
Ort::Value create_tensor(
const cv::Mat& mat,
const std::vector<int64_t>& tensor_dims,
const Ort::MemoryInfo& memory_info_handler,
std::vector<float>& tensor_value_handler,
unsigned int data_format = CHW);
Ort::Value create_tensor_batch(
const std::vector<cv::Mat>& batch_mats,
const std::vector<int64_t>& tensor_dims,
const Ort::MemoryInfo& memory_info_handler,
std::vector<float>& tensor_value_handler,
unsigned int data_format = CHW);
Ort::Value create_video_tensor_5d(
const std::deque<cv::Mat>& frames,
const std::vector<int64_t>& tensor_dims,
const Ort::MemoryInfo& memory_info_handler,
std::vector<float>& tensor_value_handler);
cv::Mat normalize(const cv::Mat& mat, float mean, float scale);
cv::Mat normalize(const cv::Mat& mat, const float mean[3], const float scale[3]);
void normalize(const cv::Mat& inmat, cv::Mat& outmat, float mean, float scale);
void normalize_inplace(cv::Mat& mat_inplace, float mean, float scale);
void normalize_inplace(cv::Mat& mat_inplace, const float mean[3], const float scale[3]);
} // namespace transform
} // namespace utils
// ====================================================================
// Helpers
// ====================================================================
inline static std::string OrtCompatiableGetInputName(
size_t index, OrtAllocator* allocator, Ort::Session* ort_session)
{
return std::string(ort_session->GetInputNameAllocated(index, allocator).get());
}
inline static std::string OrtCompatiableGetOutputName(
size_t index, OrtAllocator* allocator, Ort::Session* ort_session)
{
return std::string(ort_session->GetOutputNameAllocated(index, allocator).get());
}
// ====================================================================
// BasicOrtHandler
// ====================================================================
class ONNXENGINE_API BasicOrtHandler
{
protected:
const char* input_name = nullptr;
std::vector<const char*> input_node_names;
std::vector<std::string> input_node_names_;
std::vector<int64_t> input_node_dims;
std::size_t input_tensor_size = 1;
std::vector<float> input_values_handler;
std::vector<const char*> output_node_names;
std::vector<std::string> output_node_names_;
std::vector<std::vector<int64_t>> output_node_dims;
int num_outputs = 1;
Ort::Env* ort_env = nullptr; // ← pointer, no in-class init
Ort::Session* ort_session = nullptr;
Ort::MemoryInfo* memory_info_handler = nullptr;
std::wstring onnx_path_w; // ← owns the wstring storage
const LITEORT_CHAR* onnx_path = nullptr; // ← points into onnx_path_w
const char* log_id = nullptr;
protected:
const unsigned int num_threads;
EngineType m_engineType;
protected:
// Default: hardware auto-detection via ANSLicenseHelper through EPLoader
explicit BasicOrtHandler(const std::string& _onnx_path,
unsigned int _num_threads = 1);
// Explicit engine override per-session
explicit BasicOrtHandler(const std::string& _onnx_path,
EngineType engineType,
unsigned int _num_threads = 1);
virtual ~BasicOrtHandler();
BasicOrtHandler(const BasicOrtHandler&) = delete;
BasicOrtHandler& operator=(const BasicOrtHandler&) = delete;
public:
// Resolved EP type (after EPLoader fallback). Subclasses use this
// to branch on actual EP at inference time.
EngineType getEngineType() const { return m_engineType; }
private:
void initialize_handler();
protected:
virtual Ort::Value transform(const cv::Mat& mat) = 0;
virtual Ort::Value transformBatch(const std::vector<cv::Mat>& images) = 0;
// EP-specific session option builders
bool TryAppendCUDA(Ort::SessionOptions& opts);
bool TryAppendDirectML(Ort::SessionOptions& opts);
bool TryAppendOpenVINO(Ort::SessionOptions& opts);
};
// ====================================================================
// SCRFD — face detection
// ====================================================================
class SCRFD : public BasicOrtHandler
{
public:
explicit SCRFD(const std::string& _onnx_path,unsigned int _num_threads = 1);
explicit SCRFD(const std::string& _onnx_path,EngineType engineType,unsigned int _num_threads = 1);
~SCRFD() override = default;
void detect(const cv::Mat& mat,
std::vector<types::BoxfWithLandmarks>& detected_boxes_kps,
float score_threshold = 0.3f,
float iou_threshold = 0.45f,
unsigned int topk = 400);
private:
typedef struct { float cx, cy, stride; } SCRFDPoint;
typedef struct { float ratio; int dw, dh; bool flag; } SCRFDScaleParams;
const float mean_vals[3] = { 127.5f, 127.5f, 127.5f };
const float scale_vals[3] = { 1.f / 128.f, 1.f / 128.f, 1.f / 128.f };
unsigned int fmc = 3;
bool use_kps = false;
unsigned int num_anchors = 2;
std::vector<int> feat_stride_fpn = { 8, 16, 32 };
std::unordered_map<int, std::vector<SCRFDPoint>> center_points;
bool center_points_is_update = false;
static constexpr unsigned int nms_pre = 1000;
static constexpr unsigned int max_nms = 30000;
Ort::Value transform(const cv::Mat& mat_rs) override;
Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
void initial_context();
void resize_unscale(const cv::Mat& mat, cv::Mat& mat_rs,
int target_height, int target_width,
SCRFDScaleParams& scale_params);
void generate_points(int target_height, int target_width);
void generate_bboxes_kps(const SCRFDScaleParams& scale_params,
std::vector<types::BoxfWithLandmarks>& bbox_kps_collection,
std::vector<Ort::Value>& output_tensors,
float score_threshold,
float img_height, float img_width);
void generate_bboxes_single_stride(
const SCRFDScaleParams& scale_params,
Ort::Value& score_pred, Ort::Value& bbox_pred,
unsigned int stride, float score_threshold,
float img_height, float img_width,
std::vector<types::BoxfWithLandmarks>& bbox_kps_collection);
void generate_bboxes_kps_single_stride(
const SCRFDScaleParams& scale_params,
Ort::Value& score_pred, Ort::Value& bbox_pred, Ort::Value& kps_pred,
unsigned int stride, float score_threshold,
float img_height, float img_width,
std::vector<types::BoxfWithLandmarks>& bbox_kps_collection);
void nms_bboxes_kps(std::vector<types::BoxfWithLandmarks>& input,
std::vector<types::BoxfWithLandmarks>& output,
float iou_threshold, unsigned int topk);
};
// ====================================================================
// GlintArcFace — face recognition
// ====================================================================
class GlintArcFace : public BasicOrtHandler
{
public:
explicit GlintArcFace(const std::string& _onnx_path,
unsigned int _num_threads = 1)
: BasicOrtHandler(_onnx_path, _num_threads)
{
}
explicit GlintArcFace(const std::string& _onnx_path,
EngineType engineType,
unsigned int _num_threads = 1)
: BasicOrtHandler(_onnx_path, engineType, _num_threads) {
}
~GlintArcFace() override = default;
void detect(const cv::Mat& mat, types::FaceContent& face_content);
void detectBatch(const std::vector<cv::Mat>& images,
std::vector<types::FaceContent>& face_contents);
private:
static constexpr float mean_val = 127.5f;
static constexpr float scale_val = 1.f / 127.5f;
Ort::Value transform(const cv::Mat& mat) override;
Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
};
// ====================================================================
// GlintCosFace — face recognition
// ====================================================================
class GlintCosFace : public BasicOrtHandler
{
public:
explicit GlintCosFace(const std::string& _onnx_path,
unsigned int _num_threads = 1)
: BasicOrtHandler(_onnx_path, _num_threads)
{
}
explicit GlintCosFace(const std::string& _onnx_path,
EngineType engineType,
unsigned int _num_threads = 1)
: BasicOrtHandler(_onnx_path, engineType, _num_threads)
{
}
~GlintCosFace() override = default;
void detect(const cv::Mat& mat, types::FaceContent& face_content);
void detectBatch(const std::vector<cv::Mat>& images,
std::vector<types::FaceContent>& face_contents);
private:
static constexpr float mean_val = 127.5f;
static constexpr float scale_val = 1.f / 127.5f;
Ort::Value transform(const cv::Mat& mat) override;
Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
};
// ====================================================================
// MOVINET — action recognition
// ====================================================================
class MOVINET : public BasicOrtHandler
{
public:
explicit MOVINET(const std::string& _onnx_path,
unsigned int _num_threads = 1);
explicit MOVINET(const std::string& _onnx_path,
int _temporal, int _width, int _height, int _channels = 3,
unsigned int _num_threads = 1);
explicit MOVINET(const std::string& _onnx_path,
EngineType engineType,
unsigned int _num_threads = 1);
explicit MOVINET(const std::string& _onnx_path,
EngineType engineType,
int _temporal, int _width, int _height, int _channels = 3,
unsigned int _num_threads = 1);
~MOVINET() override = default;
void inference(const std::deque<cv::Mat>& frames,
std::pair<int, float>& out_result);
private:
struct InputConfig {
int temporal = 16;
int width = 172;
int height = 172;
int channels = 3;
} input_params;
struct OutputConfig {
int num_classes = 2;
} output_params;
std::string _MoviNetInputName;
std::string _MoviNetOutputName;
std::vector<float> input_tensor_values;
void init_io_names();
Ort::Value transform(const std::deque<cv::Mat>& frames);
std::pair<int, float> post_processing(const float* pOutput);
// Required by BasicOrtHandler pure virtuals
Ort::Value transform(const cv::Mat& mat) override;
Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
};
// ====================================================================
// BoundingBoxType template implementations
// ====================================================================
template<typename T1, typename T2>
template<typename O1, typename O2>
inline ANSCENTER::types::BoundingBoxType<O1, O2>
ANSCENTER::types::BoundingBoxType<T1, T2>::convert_type() const
{
types::__assert_type<O1, O2>();
types::__assert_type<value_type, score_type>();
BoundingBoxType<O1, O2> other;
other.x1 = static_cast<O1>(x1);
other.y1 = static_cast<O1>(y1);
other.x2 = static_cast<O1>(x2);
other.y2 = static_cast<O1>(y2);
other.score = static_cast<O2>(score);
other.label_text = label_text;
other.label = label;
other.flag = flag;
return other;
}
} // namespace ANSCENTER
#endif // ONNXEngine_H