2026-03-28 16:54:11 +11:00
|
|
|
|
#pragma once
|
|
|
|
|
|
#ifndef ONNXEngine_H
|
|
|
|
|
|
#define ONNXEngine_H
|
|
|
|
|
|
|
|
|
|
|
|
#include <string>
|
|
|
|
|
|
#include <vector>
|
|
|
|
|
|
#include <iostream>
|
|
|
|
|
|
#include <typeinfo>
|
|
|
|
|
|
#include <deque>
|
|
|
|
|
|
#include <unordered_map>
|
|
|
|
|
|
|
|
|
|
|
|
#include "onnxruntime_cxx_api.h"
|
|
|
|
|
|
#include "opencv2/opencv.hpp"
|
|
|
|
|
|
#include "EPLoader.h" // brings in EngineType via ANSLicenseHelper
|
|
|
|
|
|
|
|
|
|
|
|
#define LITEORT_CHAR wchar_t
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef ENGINE_EXPORTS
|
|
|
|
|
|
#define ONNXENGINE_API __declspec(dllexport)
|
|
|
|
|
|
#else
|
|
|
|
|
|
#define ONNXENGINE_API __declspec(dllimport)
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
namespace ANSCENTER {
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// types
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
namespace types {
|
|
|
|
|
|
|
|
|
|
|
|
template<typename _T1 = float, typename _T2 = float>
|
|
|
|
|
|
static inline void __assert_type()
|
|
|
|
|
|
{
|
|
|
|
|
|
static_assert(
|
|
|
|
|
|
std::is_standard_layout_v<_T1> && std::is_trivially_copyable_v<_T1>
|
|
|
|
|
|
&& std::is_standard_layout_v<_T2> && std::is_trivially_copyable_v<_T2>
|
|
|
|
|
|
&& std::is_floating_point<_T2>::value
|
|
|
|
|
|
&& (std::is_integral<_T1>::value || std::is_floating_point<_T1>::value),
|
|
|
|
|
|
"not support type.");
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template<typename T1 = float, typename T2 = float>
|
|
|
|
|
|
struct BoundingBoxType
|
|
|
|
|
|
{
|
|
|
|
|
|
typedef T1 value_type;
|
|
|
|
|
|
typedef T2 score_type;
|
|
|
|
|
|
|
|
|
|
|
|
value_type x1, y1, x2, y2;
|
|
|
|
|
|
score_type score;
|
|
|
|
|
|
const char* label_text;
|
|
|
|
|
|
unsigned int label;
|
|
|
|
|
|
bool flag;
|
|
|
|
|
|
|
|
|
|
|
|
template<typename O1, typename O2 = score_type>
|
|
|
|
|
|
BoundingBoxType<O1, O2> convert_type() const;
|
|
|
|
|
|
|
|
|
|
|
|
template<typename O1, typename O2 = score_type>
|
|
|
|
|
|
value_type iou_of(const BoundingBoxType<O1, O2>& other) const;
|
|
|
|
|
|
|
|
|
|
|
|
value_type width() const;
|
|
|
|
|
|
value_type height() const;
|
|
|
|
|
|
value_type area() const;
|
|
|
|
|
|
::cv::Rect rect() const;
|
|
|
|
|
|
::cv::Point2i tl() const;
|
|
|
|
|
|
::cv::Point2i rb() const;
|
|
|
|
|
|
|
|
|
|
|
|
BoundingBoxType() :
|
|
|
|
|
|
x1(0), y1(0), x2(0), y2(0),
|
|
|
|
|
|
score(0), label_text(nullptr), label(0), flag(false)
|
|
|
|
|
|
{
|
|
|
|
|
|
types::__assert_type<value_type, score_type>();
|
|
|
|
|
|
}
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template class BoundingBoxType<int, float>;
|
|
|
|
|
|
template class BoundingBoxType<float, float>;
|
|
|
|
|
|
template class BoundingBoxType<double, double>;
|
|
|
|
|
|
|
|
|
|
|
|
typedef BoundingBoxType<int, float> Boxi;
|
|
|
|
|
|
typedef BoundingBoxType<float, float> Boxf;
|
|
|
|
|
|
typedef BoundingBoxType<double, double> Boxd;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct LandmarksType {
|
|
|
|
|
|
std::vector<cv::Point2f> points;
|
|
|
|
|
|
bool flag;
|
|
|
|
|
|
LandmarksType() : flag(false) {}
|
|
|
|
|
|
} Landmarks;
|
|
|
|
|
|
|
|
|
|
|
|
typedef Landmarks Landmarks2D;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct Landmarks3DType {
|
|
|
|
|
|
std::vector<cv::Point3f> points;
|
|
|
|
|
|
bool flag;
|
|
|
|
|
|
Landmarks3DType() : flag(false) {}
|
|
|
|
|
|
} Landmarks3D;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct BoxfWithLandmarksType {
|
|
|
|
|
|
Boxf box;
|
|
|
|
|
|
Landmarks landmarks;
|
|
|
|
|
|
bool flag;
|
|
|
|
|
|
BoxfWithLandmarksType() : flag(false) {}
|
|
|
|
|
|
} BoxfWithLandmarks;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct EulerAnglesType {
|
|
|
|
|
|
float yaw, pitch, roll;
|
|
|
|
|
|
bool flag;
|
|
|
|
|
|
EulerAnglesType() : flag(false) {}
|
|
|
|
|
|
} EulerAngles;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct EmotionsType {
|
|
|
|
|
|
float score;
|
|
|
|
|
|
unsigned int label;
|
|
|
|
|
|
const char* text;
|
|
|
|
|
|
bool flag;
|
|
|
|
|
|
EmotionsType() : flag(false) {}
|
|
|
|
|
|
} Emotions;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct AgeType {
|
|
|
|
|
|
float age;
|
|
|
|
|
|
unsigned int age_interval[2];
|
|
|
|
|
|
float interval_prob;
|
|
|
|
|
|
bool flag;
|
|
|
|
|
|
AgeType() : flag(false) {}
|
|
|
|
|
|
} Age;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct GenderType {
|
|
|
|
|
|
float score;
|
|
|
|
|
|
unsigned int label;
|
|
|
|
|
|
const char* text;
|
|
|
|
|
|
bool flag;
|
|
|
|
|
|
GenderType() : flag(false) {}
|
|
|
|
|
|
} Gender;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct FaceContentType {
|
|
|
|
|
|
std::vector<float> embedding;
|
|
|
|
|
|
unsigned int dim;
|
|
|
|
|
|
bool flag;
|
|
|
|
|
|
FaceContentType() : flag(false) {}
|
|
|
|
|
|
} FaceContent;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct SegmentContentType {
|
|
|
|
|
|
cv::Mat class_mat;
|
|
|
|
|
|
cv::Mat color_mat;
|
|
|
|
|
|
std::unordered_map<int, std::string> names_map;
|
|
|
|
|
|
bool flag;
|
|
|
|
|
|
SegmentContentType() : flag(false) {}
|
|
|
|
|
|
} SegmentContent;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct MattingContentType {
|
|
|
|
|
|
cv::Mat fgr_mat;
|
|
|
|
|
|
cv::Mat pha_mat;
|
|
|
|
|
|
cv::Mat merge_mat;
|
|
|
|
|
|
bool flag;
|
|
|
|
|
|
MattingContentType() : flag(false) {}
|
|
|
|
|
|
} MattingContent;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct SegmentationMaskContentType {
|
|
|
|
|
|
cv::Mat mask;
|
|
|
|
|
|
bool flag;
|
|
|
|
|
|
SegmentationMaskContentType() : flag(false) {}
|
|
|
|
|
|
} SegmentationMaskContent;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct ImageNetContentType {
|
|
|
|
|
|
std::vector<float> scores;
|
|
|
|
|
|
std::vector<const char*> texts;
|
|
|
|
|
|
std::vector<unsigned int> labels;
|
|
|
|
|
|
bool flag;
|
|
|
|
|
|
ImageNetContentType() : flag(false) {}
|
|
|
|
|
|
} ImageNetContent;
|
|
|
|
|
|
|
|
|
|
|
|
typedef ImageNetContent ClassificationContent;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct StyleContentType {
|
|
|
|
|
|
cv::Mat mat;
|
|
|
|
|
|
bool flag;
|
|
|
|
|
|
StyleContentType() : flag(false) {}
|
|
|
|
|
|
} StyleContent;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct SuperResolutionContentType {
|
|
|
|
|
|
cv::Mat mat;
|
|
|
|
|
|
bool flag;
|
|
|
|
|
|
SuperResolutionContentType() : flag(false) {}
|
|
|
|
|
|
} SuperResolutionContent;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct FaceParsingContentType {
|
|
|
|
|
|
cv::Mat label;
|
|
|
|
|
|
cv::Mat merge;
|
|
|
|
|
|
bool flag;
|
|
|
|
|
|
FaceParsingContentType() : flag(false) {}
|
|
|
|
|
|
} FaceParsingContent;
|
|
|
|
|
|
|
|
|
|
|
|
typedef SegmentationMaskContent HairSegContent;
|
|
|
|
|
|
typedef SegmentationMaskContent HeadSegContent;
|
|
|
|
|
|
typedef SegmentationMaskContent FaceHairSegContent;
|
|
|
|
|
|
typedef SegmentationMaskContent PortraitSegContent;
|
|
|
|
|
|
|
|
|
|
|
|
} // namespace types
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// utils
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
namespace utils {
|
|
|
|
|
|
namespace transform {
|
|
|
|
|
|
|
|
|
|
|
|
enum { CHW = 0, HWC = 1 };
|
|
|
|
|
|
|
|
|
|
|
|
Ort::Value create_tensor(
|
|
|
|
|
|
const cv::Mat& mat,
|
|
|
|
|
|
const std::vector<int64_t>& tensor_dims,
|
|
|
|
|
|
const Ort::MemoryInfo& memory_info_handler,
|
|
|
|
|
|
std::vector<float>& tensor_value_handler,
|
|
|
|
|
|
unsigned int data_format = CHW);
|
|
|
|
|
|
|
|
|
|
|
|
Ort::Value create_tensor_batch(
|
|
|
|
|
|
const std::vector<cv::Mat>& batch_mats,
|
|
|
|
|
|
const std::vector<int64_t>& tensor_dims,
|
|
|
|
|
|
const Ort::MemoryInfo& memory_info_handler,
|
|
|
|
|
|
std::vector<float>& tensor_value_handler,
|
|
|
|
|
|
unsigned int data_format = CHW);
|
|
|
|
|
|
|
|
|
|
|
|
Ort::Value create_video_tensor_5d(
|
|
|
|
|
|
const std::deque<cv::Mat>& frames,
|
|
|
|
|
|
const std::vector<int64_t>& tensor_dims,
|
|
|
|
|
|
const Ort::MemoryInfo& memory_info_handler,
|
|
|
|
|
|
std::vector<float>& tensor_value_handler);
|
|
|
|
|
|
|
|
|
|
|
|
cv::Mat normalize(const cv::Mat& mat, float mean, float scale);
|
|
|
|
|
|
cv::Mat normalize(const cv::Mat& mat, const float mean[3], const float scale[3]);
|
|
|
|
|
|
void normalize(const cv::Mat& inmat, cv::Mat& outmat, float mean, float scale);
|
|
|
|
|
|
void normalize_inplace(cv::Mat& mat_inplace, float mean, float scale);
|
|
|
|
|
|
void normalize_inplace(cv::Mat& mat_inplace, const float mean[3], const float scale[3]);
|
|
|
|
|
|
|
|
|
|
|
|
} // namespace transform
|
|
|
|
|
|
} // namespace utils
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// Helpers
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
inline static std::string OrtCompatiableGetInputName(
|
|
|
|
|
|
size_t index, OrtAllocator* allocator, Ort::Session* ort_session)
|
|
|
|
|
|
{
|
|
|
|
|
|
return std::string(ort_session->GetInputNameAllocated(index, allocator).get());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
inline static std::string OrtCompatiableGetOutputName(
|
|
|
|
|
|
size_t index, OrtAllocator* allocator, Ort::Session* ort_session)
|
|
|
|
|
|
{
|
|
|
|
|
|
return std::string(ort_session->GetOutputNameAllocated(index, allocator).get());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-14 20:30:21 +10:00
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// High-perf options for OCR sub-models that need TRT EP and full
|
|
|
|
|
|
// cuDNN workspace. Default-constructed = identical to the legacy
|
|
|
|
|
|
// behavior (CUDA EP only, minimal cuDNN workspace).
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
struct OrtHandlerOptions {
|
|
|
|
|
|
// Try to attach TensorRT EP before CUDA EP (NVIDIA only).
|
|
|
|
|
|
// Falls back to CUDA EP automatically if TRT EP creation or session
|
|
|
|
|
|
// creation fails. Engines are cached on disk for fast reload.
|
|
|
|
|
|
bool preferTensorRT = false;
|
|
|
|
|
|
|
|
|
|
|
|
// Use the largest cuDNN conv workspace. cuDNN can then pick fast
|
|
|
|
|
|
// algorithms (Winograd, implicit-precomp-GEMM with big workspaces).
|
|
|
|
|
|
// Defaults off because some deployments share VRAM with TRT engines
|
|
|
|
|
|
// and need the minimal-workspace mode to avoid OOM.
|
|
|
|
|
|
bool useMaxCudnnWorkspace = false;
|
|
|
|
|
|
|
|
|
|
|
|
// Where to cache built TRT engines. Empty → default
|
|
|
|
|
|
// %TEMP%/ANSCENTER/TRTEngineCache. Only used when preferTensorRT.
|
|
|
|
|
|
std::string trtEngineCacheDir;
|
|
|
|
|
|
|
|
|
|
|
|
// FP16 builds for TRT EP. Recommended for inference; ignored if
|
|
|
|
|
|
// preferTensorRT is false.
|
|
|
|
|
|
bool trtFP16 = true;
|
|
|
|
|
|
|
|
|
|
|
|
// Dynamic-shape profile for TRT EP. When set, TRT builds ONE
|
|
|
|
|
|
// engine that handles every input shape in the [min..max] range
|
|
|
|
|
|
// instead of rebuilding per unique shape. Critical for models
|
|
|
|
|
|
// that see many (batch_size, spatial) combinations at runtime.
|
|
|
|
|
|
//
|
|
|
|
|
|
// Format: "input_name:d0xd1xd2xd3[,input2:...]"
|
|
|
|
|
|
// e.g. "x:1x3x48x320" for batch=1, C=3, H=48, W=320
|
|
|
|
|
|
//
|
|
|
|
|
|
// All three fields must be set together. An empty min implies
|
|
|
|
|
|
// no profile (fall back to static-shape-per-unique-input mode).
|
|
|
|
|
|
std::string trtProfileMinShapes;
|
|
|
|
|
|
std::string trtProfileOptShapes;
|
|
|
|
|
|
std::string trtProfileMaxShapes;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
2026-03-28 16:54:11 +11:00
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// BasicOrtHandler
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
class ONNXENGINE_API BasicOrtHandler
|
|
|
|
|
|
{
|
|
|
|
|
|
protected:
|
|
|
|
|
|
|
|
|
|
|
|
const char* input_name = nullptr;
|
|
|
|
|
|
std::vector<const char*> input_node_names;
|
|
|
|
|
|
std::vector<std::string> input_node_names_;
|
|
|
|
|
|
std::vector<int64_t> input_node_dims;
|
|
|
|
|
|
std::size_t input_tensor_size = 1;
|
|
|
|
|
|
std::vector<float> input_values_handler;
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<const char*> output_node_names;
|
|
|
|
|
|
std::vector<std::string> output_node_names_;
|
|
|
|
|
|
std::vector<std::vector<int64_t>> output_node_dims;
|
|
|
|
|
|
int num_outputs = 1;
|
|
|
|
|
|
|
|
|
|
|
|
Ort::Env* ort_env = nullptr; // ← pointer, no in-class init
|
|
|
|
|
|
Ort::Session* ort_session = nullptr;
|
|
|
|
|
|
Ort::MemoryInfo* memory_info_handler = nullptr;
|
|
|
|
|
|
|
|
|
|
|
|
std::wstring onnx_path_w; // ← owns the wstring storage
|
|
|
|
|
|
const LITEORT_CHAR* onnx_path = nullptr; // ← points into onnx_path_w
|
|
|
|
|
|
const char* log_id = nullptr;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
|
const unsigned int num_threads;
|
|
|
|
|
|
EngineType m_engineType;
|
|
|
|
|
|
|
2026-04-14 20:30:21 +10:00
|
|
|
|
// Per-session high-perf options. Default = legacy behavior.
|
|
|
|
|
|
OrtHandlerOptions m_handlerOptions;
|
|
|
|
|
|
|
2026-03-28 16:54:11 +11:00
|
|
|
|
protected:
|
|
|
|
|
|
// Default: hardware auto-detection via ANSLicenseHelper through EPLoader
|
|
|
|
|
|
explicit BasicOrtHandler(const std::string& _onnx_path,
|
|
|
|
|
|
unsigned int _num_threads = 1);
|
|
|
|
|
|
|
|
|
|
|
|
// Explicit engine override per-session
|
|
|
|
|
|
explicit BasicOrtHandler(const std::string& _onnx_path,
|
|
|
|
|
|
EngineType engineType,
|
|
|
|
|
|
unsigned int _num_threads = 1);
|
|
|
|
|
|
|
2026-04-14 20:30:21 +10:00
|
|
|
|
// Engine override + per-session high-perf options (TRT EP, max
|
|
|
|
|
|
// cuDNN workspace, etc.). Used by OCR sub-models that need
|
|
|
|
|
|
// shape-stable, high-throughput inference.
|
|
|
|
|
|
explicit BasicOrtHandler(const std::string& _onnx_path,
|
|
|
|
|
|
EngineType engineType,
|
|
|
|
|
|
const OrtHandlerOptions& options,
|
|
|
|
|
|
unsigned int _num_threads = 1);
|
|
|
|
|
|
|
|
|
|
|
|
// Auto-detect engine via EPLoader, but with high-perf options.
|
|
|
|
|
|
explicit BasicOrtHandler(const std::string& _onnx_path,
|
|
|
|
|
|
const OrtHandlerOptions& options,
|
|
|
|
|
|
unsigned int _num_threads = 1);
|
|
|
|
|
|
|
2026-03-28 16:54:11 +11:00
|
|
|
|
virtual ~BasicOrtHandler();
|
|
|
|
|
|
|
|
|
|
|
|
BasicOrtHandler(const BasicOrtHandler&) = delete;
|
|
|
|
|
|
BasicOrtHandler& operator=(const BasicOrtHandler&) = delete;
|
2026-04-10 17:13:47 +10:00
|
|
|
|
public:
|
|
|
|
|
|
// Resolved EP type (after EPLoader fallback). Subclasses use this
|
|
|
|
|
|
// to branch on actual EP at inference time.
|
|
|
|
|
|
EngineType getEngineType() const { return m_engineType; }
|
2026-04-14 20:30:21 +10:00
|
|
|
|
|
|
|
|
|
|
// Spin up a tiny CPU-only ORT session just long enough to read
|
|
|
|
|
|
// the name of the model's first input, then tear it down. Used
|
|
|
|
|
|
// by callers that need to build TRT profile-shape strings
|
|
|
|
|
|
// (which require the input name) BEFORE the real session is
|
|
|
|
|
|
// created. Returns an empty string on failure.
|
|
|
|
|
|
static std::string QueryModelInputName(const std::string& onnxPath);
|
2026-03-28 16:54:11 +11:00
|
|
|
|
private:
|
|
|
|
|
|
void initialize_handler();
|
|
|
|
|
|
protected:
|
|
|
|
|
|
virtual Ort::Value transform(const cv::Mat& mat) = 0;
|
|
|
|
|
|
virtual Ort::Value transformBatch(const std::vector<cv::Mat>& images) = 0;
|
|
|
|
|
|
|
|
|
|
|
|
// EP-specific session option builders
|
|
|
|
|
|
bool TryAppendCUDA(Ort::SessionOptions& opts);
|
2026-04-14 20:30:21 +10:00
|
|
|
|
bool TryAppendTensorRT(Ort::SessionOptions& opts);
|
2026-03-28 16:54:11 +11:00
|
|
|
|
bool TryAppendDirectML(Ort::SessionOptions& opts);
|
|
|
|
|
|
bool TryAppendOpenVINO(Ort::SessionOptions& opts);
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// SCRFD — face detection
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
class SCRFD : public BasicOrtHandler
|
|
|
|
|
|
{
|
|
|
|
|
|
public:
|
|
|
|
|
|
explicit SCRFD(const std::string& _onnx_path,unsigned int _num_threads = 1);
|
|
|
|
|
|
explicit SCRFD(const std::string& _onnx_path,EngineType engineType,unsigned int _num_threads = 1);
|
|
|
|
|
|
~SCRFD() override = default;
|
|
|
|
|
|
|
|
|
|
|
|
void detect(const cv::Mat& mat,
|
|
|
|
|
|
std::vector<types::BoxfWithLandmarks>& detected_boxes_kps,
|
|
|
|
|
|
float score_threshold = 0.3f,
|
|
|
|
|
|
float iou_threshold = 0.45f,
|
|
|
|
|
|
unsigned int topk = 400);
|
|
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
|
typedef struct { float cx, cy, stride; } SCRFDPoint;
|
|
|
|
|
|
typedef struct { float ratio; int dw, dh; bool flag; } SCRFDScaleParams;
|
|
|
|
|
|
|
|
|
|
|
|
const float mean_vals[3] = { 127.5f, 127.5f, 127.5f };
|
|
|
|
|
|
const float scale_vals[3] = { 1.f / 128.f, 1.f / 128.f, 1.f / 128.f };
|
|
|
|
|
|
|
|
|
|
|
|
unsigned int fmc = 3;
|
|
|
|
|
|
bool use_kps = false;
|
|
|
|
|
|
unsigned int num_anchors = 2;
|
|
|
|
|
|
std::vector<int> feat_stride_fpn = { 8, 16, 32 };
|
|
|
|
|
|
std::unordered_map<int, std::vector<SCRFDPoint>> center_points;
|
|
|
|
|
|
bool center_points_is_update = false;
|
|
|
|
|
|
|
|
|
|
|
|
static constexpr unsigned int nms_pre = 1000;
|
|
|
|
|
|
static constexpr unsigned int max_nms = 30000;
|
|
|
|
|
|
|
|
|
|
|
|
Ort::Value transform(const cv::Mat& mat_rs) override;
|
|
|
|
|
|
Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
|
|
|
|
|
|
|
|
|
|
|
|
void initial_context();
|
|
|
|
|
|
void resize_unscale(const cv::Mat& mat, cv::Mat& mat_rs,
|
|
|
|
|
|
int target_height, int target_width,
|
|
|
|
|
|
SCRFDScaleParams& scale_params);
|
|
|
|
|
|
void generate_points(int target_height, int target_width);
|
|
|
|
|
|
|
|
|
|
|
|
void generate_bboxes_kps(const SCRFDScaleParams& scale_params,
|
|
|
|
|
|
std::vector<types::BoxfWithLandmarks>& bbox_kps_collection,
|
|
|
|
|
|
std::vector<Ort::Value>& output_tensors,
|
|
|
|
|
|
float score_threshold,
|
|
|
|
|
|
float img_height, float img_width);
|
|
|
|
|
|
|
|
|
|
|
|
void generate_bboxes_single_stride(
|
|
|
|
|
|
const SCRFDScaleParams& scale_params,
|
|
|
|
|
|
Ort::Value& score_pred, Ort::Value& bbox_pred,
|
|
|
|
|
|
unsigned int stride, float score_threshold,
|
|
|
|
|
|
float img_height, float img_width,
|
|
|
|
|
|
std::vector<types::BoxfWithLandmarks>& bbox_kps_collection);
|
|
|
|
|
|
|
|
|
|
|
|
void generate_bboxes_kps_single_stride(
|
|
|
|
|
|
const SCRFDScaleParams& scale_params,
|
|
|
|
|
|
Ort::Value& score_pred, Ort::Value& bbox_pred, Ort::Value& kps_pred,
|
|
|
|
|
|
unsigned int stride, float score_threshold,
|
|
|
|
|
|
float img_height, float img_width,
|
|
|
|
|
|
std::vector<types::BoxfWithLandmarks>& bbox_kps_collection);
|
|
|
|
|
|
|
|
|
|
|
|
void nms_bboxes_kps(std::vector<types::BoxfWithLandmarks>& input,
|
|
|
|
|
|
std::vector<types::BoxfWithLandmarks>& output,
|
|
|
|
|
|
float iou_threshold, unsigned int topk);
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// GlintArcFace — face recognition
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
class GlintArcFace : public BasicOrtHandler
|
|
|
|
|
|
{
|
|
|
|
|
|
public:
|
|
|
|
|
|
explicit GlintArcFace(const std::string& _onnx_path,
|
|
|
|
|
|
unsigned int _num_threads = 1)
|
|
|
|
|
|
: BasicOrtHandler(_onnx_path, _num_threads)
|
|
|
|
|
|
{
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
explicit GlintArcFace(const std::string& _onnx_path,
|
|
|
|
|
|
EngineType engineType,
|
|
|
|
|
|
unsigned int _num_threads = 1)
|
|
|
|
|
|
: BasicOrtHandler(_onnx_path, engineType, _num_threads) {
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
~GlintArcFace() override = default;
|
|
|
|
|
|
|
|
|
|
|
|
void detect(const cv::Mat& mat, types::FaceContent& face_content);
|
|
|
|
|
|
void detectBatch(const std::vector<cv::Mat>& images,
|
|
|
|
|
|
std::vector<types::FaceContent>& face_contents);
|
|
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
|
static constexpr float mean_val = 127.5f;
|
|
|
|
|
|
static constexpr float scale_val = 1.f / 127.5f;
|
|
|
|
|
|
|
|
|
|
|
|
Ort::Value transform(const cv::Mat& mat) override;
|
|
|
|
|
|
Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// GlintCosFace — face recognition
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
class GlintCosFace : public BasicOrtHandler
|
|
|
|
|
|
{
|
|
|
|
|
|
public:
|
|
|
|
|
|
explicit GlintCosFace(const std::string& _onnx_path,
|
|
|
|
|
|
unsigned int _num_threads = 1)
|
|
|
|
|
|
: BasicOrtHandler(_onnx_path, _num_threads)
|
|
|
|
|
|
{
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
explicit GlintCosFace(const std::string& _onnx_path,
|
|
|
|
|
|
EngineType engineType,
|
|
|
|
|
|
unsigned int _num_threads = 1)
|
|
|
|
|
|
: BasicOrtHandler(_onnx_path, engineType, _num_threads)
|
|
|
|
|
|
{
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
~GlintCosFace() override = default;
|
|
|
|
|
|
|
|
|
|
|
|
void detect(const cv::Mat& mat, types::FaceContent& face_content);
|
|
|
|
|
|
void detectBatch(const std::vector<cv::Mat>& images,
|
|
|
|
|
|
std::vector<types::FaceContent>& face_contents);
|
|
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
|
static constexpr float mean_val = 127.5f;
|
|
|
|
|
|
static constexpr float scale_val = 1.f / 127.5f;
|
|
|
|
|
|
|
|
|
|
|
|
Ort::Value transform(const cv::Mat& mat) override;
|
|
|
|
|
|
Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// MOVINET — action recognition
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
class MOVINET : public BasicOrtHandler
|
|
|
|
|
|
{
|
|
|
|
|
|
public:
|
|
|
|
|
|
explicit MOVINET(const std::string& _onnx_path,
|
|
|
|
|
|
unsigned int _num_threads = 1);
|
|
|
|
|
|
|
|
|
|
|
|
explicit MOVINET(const std::string& _onnx_path,
|
|
|
|
|
|
int _temporal, int _width, int _height, int _channels = 3,
|
|
|
|
|
|
unsigned int _num_threads = 1);
|
|
|
|
|
|
|
|
|
|
|
|
explicit MOVINET(const std::string& _onnx_path,
|
|
|
|
|
|
EngineType engineType,
|
|
|
|
|
|
unsigned int _num_threads = 1);
|
|
|
|
|
|
|
|
|
|
|
|
explicit MOVINET(const std::string& _onnx_path,
|
|
|
|
|
|
EngineType engineType,
|
|
|
|
|
|
int _temporal, int _width, int _height, int _channels = 3,
|
|
|
|
|
|
unsigned int _num_threads = 1);
|
|
|
|
|
|
|
|
|
|
|
|
~MOVINET() override = default;
|
|
|
|
|
|
|
|
|
|
|
|
void inference(const std::deque<cv::Mat>& frames,
|
|
|
|
|
|
std::pair<int, float>& out_result);
|
|
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
|
struct InputConfig {
|
|
|
|
|
|
int temporal = 16;
|
|
|
|
|
|
int width = 172;
|
|
|
|
|
|
int height = 172;
|
|
|
|
|
|
int channels = 3;
|
|
|
|
|
|
} input_params;
|
|
|
|
|
|
|
|
|
|
|
|
struct OutputConfig {
|
|
|
|
|
|
int num_classes = 2;
|
|
|
|
|
|
} output_params;
|
|
|
|
|
|
|
|
|
|
|
|
std::string _MoviNetInputName;
|
|
|
|
|
|
std::string _MoviNetOutputName;
|
|
|
|
|
|
std::vector<float> input_tensor_values;
|
|
|
|
|
|
|
|
|
|
|
|
void init_io_names();
|
|
|
|
|
|
|
|
|
|
|
|
Ort::Value transform(const std::deque<cv::Mat>& frames);
|
|
|
|
|
|
std::pair<int, float> post_processing(const float* pOutput);
|
|
|
|
|
|
|
|
|
|
|
|
// Required by BasicOrtHandler pure virtuals
|
|
|
|
|
|
Ort::Value transform(const cv::Mat& mat) override;
|
|
|
|
|
|
Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
// BoundingBoxType template implementations
|
|
|
|
|
|
// ====================================================================
|
|
|
|
|
|
template<typename T1, typename T2>
|
|
|
|
|
|
template<typename O1, typename O2>
|
|
|
|
|
|
inline ANSCENTER::types::BoundingBoxType<O1, O2>
|
|
|
|
|
|
ANSCENTER::types::BoundingBoxType<T1, T2>::convert_type() const
|
|
|
|
|
|
{
|
|
|
|
|
|
types::__assert_type<O1, O2>();
|
|
|
|
|
|
types::__assert_type<value_type, score_type>();
|
|
|
|
|
|
BoundingBoxType<O1, O2> other;
|
|
|
|
|
|
other.x1 = static_cast<O1>(x1);
|
|
|
|
|
|
other.y1 = static_cast<O1>(y1);
|
|
|
|
|
|
other.x2 = static_cast<O1>(x2);
|
|
|
|
|
|
other.y2 = static_cast<O1>(y2);
|
|
|
|
|
|
other.score = static_cast<O2>(score);
|
|
|
|
|
|
other.label_text = label_text;
|
|
|
|
|
|
other.label = label;
|
|
|
|
|
|
other.flag = flag;
|
|
|
|
|
|
return other;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
} // namespace ANSCENTER
|
|
|
|
|
|
|
|
|
|
|
|
#endif // ONNXEngine_H
|