Initial setup for CLion

2026-03-28 16:54:11 +11:00
parent 239cc02591
commit 7b4134133c
1136 changed files with 811916 additions and 0 deletions
--- a/engines/ONNXEngine/ONNXEngine.h
+++ b/engines/ONNXEngine/ONNXEngine.h
@@ -0,0 +1,518 @@
+#pragma once
+#ifndef ONNXEngine_H
+#define ONNXEngine_H
+
+#include <string>
+#include <vector>
+#include <iostream>
+#include <typeinfo>
+#include <deque>
+#include <unordered_map>
+
+#include "onnxruntime_cxx_api.h"
+#include "opencv2/opencv.hpp"
+#include "EPLoader.h"           // brings in EngineType via ANSLicenseHelper
+
+#define LITEORT_CHAR wchar_t
+
+#ifdef ENGINE_EXPORTS
+#define ONNXENGINE_API __declspec(dllexport)
+#else
+#define ONNXENGINE_API __declspec(dllimport)
+#endif
+
+namespace ANSCENTER {
+
+    // ====================================================================
+    // types
+    // ====================================================================
+    namespace types {
+
+        template<typename _T1 = float, typename _T2 = float>
+        static inline void __assert_type()
+        {
+            static_assert(
+                std::is_standard_layout_v<_T1> && std::is_trivially_copyable_v<_T1>
+                && std::is_standard_layout_v<_T2> && std::is_trivially_copyable_v<_T2>
+                && std::is_floating_point<_T2>::value
+                && (std::is_integral<_T1>::value || std::is_floating_point<_T1>::value),
+                "not support type.");
+        }
+
+        template<typename T1 = float, typename T2 = float>
+        struct BoundingBoxType
+        {
+            typedef T1 value_type;
+            typedef T2 score_type;
+
+            value_type   x1, y1, x2, y2;
+            score_type   score;
+            const char* label_text;
+            unsigned int label;
+            bool         flag;
+
+            template<typename O1, typename O2 = score_type>
+            BoundingBoxType<O1, O2> convert_type() const;
+
+            template<typename O1, typename O2 = score_type>
+            value_type iou_of(const BoundingBoxType<O1, O2>& other) const;
+
+            value_type   width()  const;
+            value_type   height() const;
+            value_type   area()   const;
+            ::cv::Rect   rect()   const;
+            ::cv::Point2i tl()   const;
+            ::cv::Point2i rb()   const;
+
+            BoundingBoxType() :
+                x1(0), y1(0), x2(0), y2(0),
+                score(0), label_text(nullptr), label(0), flag(false)
+            {
+                types::__assert_type<value_type, score_type>();
+            }
+        };
+
+        template class BoundingBoxType<int, float>;
+        template class BoundingBoxType<float, float>;
+        template class BoundingBoxType<double, double>;
+
+        typedef BoundingBoxType<int, float>  Boxi;
+        typedef BoundingBoxType<float, float>  Boxf;
+        typedef BoundingBoxType<double, double> Boxd;
+
+        typedef struct LandmarksType {
+            std::vector<cv::Point2f> points;
+            bool flag;
+            LandmarksType() : flag(false) {}
+        } Landmarks;
+
+        typedef Landmarks Landmarks2D;
+
+        typedef struct Landmarks3DType {
+            std::vector<cv::Point3f> points;
+            bool flag;
+            Landmarks3DType() : flag(false) {}
+        } Landmarks3D;
+
+        typedef struct BoxfWithLandmarksType {
+            Boxf      box;
+            Landmarks landmarks;
+            bool      flag;
+            BoxfWithLandmarksType() : flag(false) {}
+        } BoxfWithLandmarks;
+
+        typedef struct EulerAnglesType {
+            float yaw, pitch, roll;
+            bool  flag;
+            EulerAnglesType() : flag(false) {}
+        } EulerAngles;
+
+        typedef struct EmotionsType {
+            float        score;
+            unsigned int label;
+            const char* text;
+            bool         flag;
+            EmotionsType() : flag(false) {}
+        } Emotions;
+
+        typedef struct AgeType {
+            float        age;
+            unsigned int age_interval[2];
+            float        interval_prob;
+            bool         flag;
+            AgeType() : flag(false) {}
+        } Age;
+
+        typedef struct GenderType {
+            float        score;
+            unsigned int label;
+            const char* text;
+            bool         flag;
+            GenderType() : flag(false) {}
+        } Gender;
+
+        typedef struct FaceContentType {
+            std::vector<float> embedding;
+            unsigned int       dim;
+            bool               flag;
+            FaceContentType() : flag(false) {}
+        } FaceContent;
+
+        typedef struct SegmentContentType {
+            cv::Mat class_mat;
+            cv::Mat color_mat;
+            std::unordered_map<int, std::string> names_map;
+            bool flag;
+            SegmentContentType() : flag(false) {}
+        } SegmentContent;
+
+        typedef struct MattingContentType {
+            cv::Mat fgr_mat;
+            cv::Mat pha_mat;
+            cv::Mat merge_mat;
+            bool flag;
+            MattingContentType() : flag(false) {}
+        } MattingContent;
+
+        typedef struct SegmentationMaskContentType {
+            cv::Mat mask;
+            bool flag;
+            SegmentationMaskContentType() : flag(false) {}
+        } SegmentationMaskContent;
+
+        typedef struct ImageNetContentType {
+            std::vector<float>        scores;
+            std::vector<const char*>  texts;
+            std::vector<unsigned int> labels;
+            bool flag;
+            ImageNetContentType() : flag(false) {}
+        } ImageNetContent;
+
+        typedef ImageNetContent ClassificationContent;
+
+        typedef struct StyleContentType {
+            cv::Mat mat;
+            bool flag;
+            StyleContentType() : flag(false) {}
+        } StyleContent;
+
+        typedef struct SuperResolutionContentType {
+            cv::Mat mat;
+            bool flag;
+            SuperResolutionContentType() : flag(false) {}
+        } SuperResolutionContent;
+
+        typedef struct FaceParsingContentType {
+            cv::Mat label;
+            cv::Mat merge;
+            bool flag;
+            FaceParsingContentType() : flag(false) {}
+        } FaceParsingContent;
+
+        typedef SegmentationMaskContent HairSegContent;
+        typedef SegmentationMaskContent HeadSegContent;
+        typedef SegmentationMaskContent FaceHairSegContent;
+        typedef SegmentationMaskContent PortraitSegContent;
+
+    } // namespace types
+
+    // ====================================================================
+    // utils
+    // ====================================================================
+    namespace utils {
+        namespace transform {
+
+            enum { CHW = 0, HWC = 1 };
+
+            Ort::Value create_tensor(
+                const cv::Mat& mat,
+                const std::vector<int64_t>& tensor_dims,
+                const Ort::MemoryInfo& memory_info_handler,
+                std::vector<float>& tensor_value_handler,
+                unsigned int data_format = CHW);
+
+            Ort::Value create_tensor_batch(
+                const std::vector<cv::Mat>& batch_mats,
+                const std::vector<int64_t>& tensor_dims,
+                const Ort::MemoryInfo& memory_info_handler,
+                std::vector<float>& tensor_value_handler,
+                unsigned int data_format = CHW);
+
+            Ort::Value create_video_tensor_5d(
+                const std::deque<cv::Mat>& frames,
+                const std::vector<int64_t>& tensor_dims,
+                const Ort::MemoryInfo& memory_info_handler,
+                std::vector<float>& tensor_value_handler);
+
+            cv::Mat normalize(const cv::Mat& mat, float mean, float scale);
+            cv::Mat normalize(const cv::Mat& mat, const float mean[3], const float scale[3]);
+            void    normalize(const cv::Mat& inmat, cv::Mat& outmat, float mean, float scale);
+            void    normalize_inplace(cv::Mat& mat_inplace, float mean, float scale);
+            void    normalize_inplace(cv::Mat& mat_inplace, const float mean[3], const float scale[3]);
+
+        } // namespace transform
+    } // namespace utils
+
+    // ====================================================================
+    // Helpers
+    // ====================================================================
+    inline static std::string OrtCompatiableGetInputName(
+        size_t index, OrtAllocator* allocator, Ort::Session* ort_session)
+    {
+        return std::string(ort_session->GetInputNameAllocated(index, allocator).get());
+    }
+
+    inline static std::string OrtCompatiableGetOutputName(
+        size_t index, OrtAllocator* allocator, Ort::Session* ort_session)
+    {
+        return std::string(ort_session->GetOutputNameAllocated(index, allocator).get());
+    }
+
+    // ====================================================================
+    // BasicOrtHandler
+    // ====================================================================
+    class ONNXENGINE_API BasicOrtHandler
+    {
+    protected:
+
+        const char* input_name = nullptr;
+        std::vector<const char*> input_node_names;
+        std::vector<std::string> input_node_names_;
+        std::vector<int64_t>     input_node_dims;
+        std::size_t              input_tensor_size = 1;
+        std::vector<float>       input_values_handler;
+
+        std::vector<const char*>              output_node_names;
+        std::vector<std::string>              output_node_names_;
+        std::vector<std::vector<int64_t>>     output_node_dims;
+        int                 num_outputs = 1;
+
+        Ort::Env* ort_env = nullptr;  // ← pointer, no in-class init
+        Ort::Session* ort_session = nullptr;
+        Ort::MemoryInfo* memory_info_handler = nullptr;
+
+        std::wstring        onnx_path_w;          // ← owns the wstring storage
+        const LITEORT_CHAR* onnx_path = nullptr;  // ← points into onnx_path_w
+        const char* log_id = nullptr;
+
+
+    protected:
+        const unsigned int num_threads;
+        EngineType m_engineType;
+
+    protected:
+        // Default: hardware auto-detection via ANSLicenseHelper through EPLoader
+        explicit BasicOrtHandler(const std::string& _onnx_path,
+            unsigned int _num_threads = 1);
+
+        // Explicit engine override per-session
+        explicit BasicOrtHandler(const std::string& _onnx_path,
+            EngineType engineType,
+            unsigned int _num_threads = 1);
+
+        virtual ~BasicOrtHandler();
+
+        BasicOrtHandler(const BasicOrtHandler&) = delete;
+        BasicOrtHandler& operator=(const BasicOrtHandler&) = delete;
+    private:
+        void initialize_handler();
+    protected:
+        virtual Ort::Value transform(const cv::Mat& mat) = 0;
+        virtual Ort::Value transformBatch(const std::vector<cv::Mat>& images) = 0;
+
+        // EP-specific session option builders
+        bool TryAppendCUDA(Ort::SessionOptions& opts);
+        bool TryAppendDirectML(Ort::SessionOptions& opts);
+        bool TryAppendOpenVINO(Ort::SessionOptions& opts);
+    };
+
+    // ====================================================================
+    // SCRFD — face detection
+    // ====================================================================
+    class SCRFD : public BasicOrtHandler
+    {
+    public:
+        explicit SCRFD(const std::string& _onnx_path,unsigned int _num_threads = 1);
+        explicit SCRFD(const std::string& _onnx_path,EngineType engineType,unsigned int _num_threads = 1);
+        ~SCRFD() override = default;
+
+        void detect(const cv::Mat& mat,
+            std::vector<types::BoxfWithLandmarks>& detected_boxes_kps,
+            float score_threshold = 0.3f,
+            float iou_threshold = 0.45f,
+            unsigned int topk = 400);
+
+    private:
+        typedef struct { float cx, cy, stride; } SCRFDPoint;
+        typedef struct { float ratio; int dw, dh; bool flag; } SCRFDScaleParams;
+
+        const float mean_vals[3] = { 127.5f, 127.5f, 127.5f };
+        const float scale_vals[3] = { 1.f / 128.f, 1.f / 128.f, 1.f / 128.f };
+
+        unsigned int fmc = 3;
+        bool         use_kps = false;
+        unsigned int num_anchors = 2;
+        std::vector<int> feat_stride_fpn = { 8, 16, 32 };
+        std::unordered_map<int, std::vector<SCRFDPoint>> center_points;
+        bool center_points_is_update = false;
+
+        static constexpr unsigned int nms_pre = 1000;
+        static constexpr unsigned int max_nms = 30000;
+
+        Ort::Value transform(const cv::Mat& mat_rs) override;
+        Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
+
+        void initial_context();
+        void resize_unscale(const cv::Mat& mat, cv::Mat& mat_rs,
+            int target_height, int target_width,
+            SCRFDScaleParams& scale_params);
+        void generate_points(int target_height, int target_width);
+
+        void generate_bboxes_kps(const SCRFDScaleParams& scale_params,
+            std::vector<types::BoxfWithLandmarks>& bbox_kps_collection,
+            std::vector<Ort::Value>& output_tensors,
+            float score_threshold,
+            float img_height, float img_width);
+
+        void generate_bboxes_single_stride(
+            const SCRFDScaleParams& scale_params,
+            Ort::Value& score_pred, Ort::Value& bbox_pred,
+            unsigned int stride, float score_threshold,
+            float img_height, float img_width,
+            std::vector<types::BoxfWithLandmarks>& bbox_kps_collection);
+
+        void generate_bboxes_kps_single_stride(
+            const SCRFDScaleParams& scale_params,
+            Ort::Value& score_pred, Ort::Value& bbox_pred, Ort::Value& kps_pred,
+            unsigned int stride, float score_threshold,
+            float img_height, float img_width,
+            std::vector<types::BoxfWithLandmarks>& bbox_kps_collection);
+
+        void nms_bboxes_kps(std::vector<types::BoxfWithLandmarks>& input,
+            std::vector<types::BoxfWithLandmarks>& output,
+            float iou_threshold, unsigned int topk);
+    };
+
+    // ====================================================================
+    // GlintArcFace — face recognition
+    // ====================================================================
+    class GlintArcFace : public BasicOrtHandler
+    {
+    public:
+        explicit GlintArcFace(const std::string& _onnx_path,
+            unsigned int _num_threads = 1)
+            : BasicOrtHandler(_onnx_path, _num_threads) 
+        {
+        }
+
+        explicit GlintArcFace(const std::string& _onnx_path,
+            EngineType engineType,
+            unsigned int _num_threads = 1)
+            : BasicOrtHandler(_onnx_path, engineType, _num_threads) {
+        }
+
+        ~GlintArcFace() override = default;
+
+        void detect(const cv::Mat& mat, types::FaceContent& face_content);
+        void detectBatch(const std::vector<cv::Mat>& images,
+            std::vector<types::FaceContent>& face_contents);
+
+    private:
+        static constexpr float mean_val = 127.5f;
+        static constexpr float scale_val = 1.f / 127.5f;
+
+        Ort::Value transform(const cv::Mat& mat) override;
+        Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
+    };
+
+    // ====================================================================
+    // GlintCosFace — face recognition
+    // ====================================================================
+    class GlintCosFace : public BasicOrtHandler
+    {
+    public:
+        explicit GlintCosFace(const std::string& _onnx_path,
+            unsigned int _num_threads = 1)
+            : BasicOrtHandler(_onnx_path, _num_threads) 
+        {
+        }
+
+        explicit GlintCosFace(const std::string& _onnx_path,
+            EngineType engineType,
+            unsigned int _num_threads = 1)
+            : BasicOrtHandler(_onnx_path, engineType, _num_threads) 
+        {
+        }
+
+        ~GlintCosFace() override = default;
+
+        void detect(const cv::Mat& mat, types::FaceContent& face_content);
+        void detectBatch(const std::vector<cv::Mat>& images,
+            std::vector<types::FaceContent>& face_contents);
+
+    private:
+        static constexpr float mean_val = 127.5f;
+        static constexpr float scale_val = 1.f / 127.5f;
+
+        Ort::Value transform(const cv::Mat& mat) override;
+        Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
+    };
+
+    // ====================================================================
+    // MOVINET — action recognition
+    // ====================================================================
+    class MOVINET : public BasicOrtHandler
+    {
+    public:
+        explicit MOVINET(const std::string& _onnx_path,
+            unsigned int _num_threads = 1);
+
+        explicit MOVINET(const std::string& _onnx_path,
+            int _temporal, int _width, int _height, int _channels = 3,
+            unsigned int _num_threads = 1);
+
+        explicit MOVINET(const std::string& _onnx_path,
+            EngineType engineType,
+            unsigned int _num_threads = 1);
+
+        explicit MOVINET(const std::string& _onnx_path,
+            EngineType engineType,
+            int _temporal, int _width, int _height, int _channels = 3,
+            unsigned int _num_threads = 1);
+
+        ~MOVINET() override = default;
+
+        void inference(const std::deque<cv::Mat>& frames,
+            std::pair<int, float>& out_result);
+
+    private:
+        struct InputConfig {
+            int temporal = 16;
+            int width = 172;
+            int height = 172;
+            int channels = 3;
+        } input_params;
+
+        struct OutputConfig {
+            int num_classes = 2;
+        } output_params;
+
+        std::string          _MoviNetInputName;
+        std::string          _MoviNetOutputName;
+        std::vector<float>   input_tensor_values;
+
+        void init_io_names();
+
+        Ort::Value transform(const std::deque<cv::Mat>& frames);
+        std::pair<int, float> post_processing(const float* pOutput);
+
+        // Required by BasicOrtHandler pure virtuals
+        Ort::Value transform(const cv::Mat& mat) override;
+        Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
+    };
+
+    // ====================================================================
+    // BoundingBoxType template implementations
+    // ====================================================================
+    template<typename T1, typename T2>
+    template<typename O1, typename O2>
+    inline ANSCENTER::types::BoundingBoxType<O1, O2>
+        ANSCENTER::types::BoundingBoxType<T1, T2>::convert_type() const
+    {
+        types::__assert_type<O1, O2>();
+        types::__assert_type<value_type, score_type>();
+        BoundingBoxType<O1, O2> other;
+        other.x1 = static_cast<O1>(x1);
+        other.y1 = static_cast<O1>(y1);
+        other.x2 = static_cast<O1>(x2);
+        other.y2 = static_cast<O1>(y2);
+        other.score = static_cast<O2>(score);
+        other.label_text = label_text;
+        other.label = label;
+        other.flag = flag;
+        return other;
+    }
+
+} // namespace ANSCENTER
+
+#endif // ONNXEngine_H