Initial setup for CLion

2026-03-28 16:54:11 +11:00
parent 239cc02591
commit 7b4134133c
1136 changed files with 811916 additions and 0 deletions
--- a/engines/OpenVINOEngine/include/faceapp/action_detector.hpp
+++ b/engines/OpenVINOEngine/include/faceapp/action_detector.hpp
@@ -0,0 +1,217 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include <opencv2/core/core.hpp>
+
+#include "cnn.hpp"
+
+#include "openvino/openvino.hpp"
+
+/**
+* @brief Class for detection with action info
+*/
+struct DetectedAction {
+    /** @brief BBox of detection */
+    cv::Rect rect;
+    /** @brief Action label */
+    int label;
+    /** @brief Confidence of detection */
+    float detection_conf;
+    /** @brief Confidence of predicted action */
+    float action_conf;
+
+    /**
+    * @brief Constructor
+    */
+    DetectedAction(const cv::Rect& rect, int label,
+                   float detection_conf, float action_conf)
+        : rect(rect), label(label), detection_conf(detection_conf),
+          action_conf(action_conf) {}
+};
+using DetectedActions = std::vector<DetectedAction>;
+
+/**
+* @brief Class to store SSD-based head info
+*/
+struct SSDHead {
+    /** @brief Step size for the head */
+    int step;
+    /** @brief Vector of anchors */
+    std::vector<cv::Size2f> anchors;
+
+    /**
+    * @brief Constructor
+    */
+    SSDHead(int step, const std::vector<cv::Size2f>& anchors) : step(step), anchors(anchors) {}
+};
+using SSDHeads = std::vector<SSDHead>;
+
+/**
+* @brief Config for the Action Detection model
+*/
+struct ActionDetectorConfig : public CnnConfig {
+    explicit ActionDetectorConfig(const std::string& path_to_model, const std::string& model_type)
+        : CnnConfig(path_to_model, model_type) {}
+
+    /** @brief Name of output blob with location info */
+    std::string old_loc_blob_name{"mbox_loc1/out/conv/flat"};
+    /** @brief Name of output blob with detection confidence info */
+    std::string old_det_conf_blob_name{"mbox_main_conf/out/conv/flat/softmax/flat"};
+    /** @brief Prefix of name of output blob with action confidence info */
+    std::string old_action_conf_blob_name_prefix{"out/anchor"};
+    /** @brief Name of output blob with priorbox info */
+    std::string old_priorbox_blob_name{"mbox/priorbox"};
+
+    /** @brief Name of output blob with location info */
+    std::string new_loc_blob_name{"ActionNet/out_detection_loc"};
+    /** @brief Name of output blob with detection confidence info */
+    std::string new_det_conf_blob_name{"ActionNet/out_detection_conf"};
+    /** @brief Prefix of name of output blob with action confidence info */
+    std::string new_action_conf_blob_name_prefix{"ActionNet/action_heads/out_head_"};
+    /** @brief Suffix of name of output blob with action confidence info */
+    std::string new_action_conf_blob_name_suffix{"_anchor_"};
+
+    /** @brief Scale parameter for Soft-NMS algorithm */
+    float nms_sigma = 0.6f;
+    /** @brief Threshold for detected objects */
+    float detection_confidence_threshold = 0.4f;
+    /** @brief Threshold for recognized actions */
+    float action_confidence_threshold = 0.75f;
+    /** @brief Scale of action logits for the old network version */
+    float old_action_scale = 3.f;
+    /** @brief Scale of action logits for the new network version */
+    float new_action_scale = 16.f;
+    /** @brief Default action class label */
+    int default_action_id = 0;
+    /** @brief Number of top-score bboxes in output */
+    size_t keep_top_k = 200;
+    /** @brief Number of SSD anchors for the old network version */
+    std::vector<int> old_anchors{4};
+    /** @brief Number of SSD anchors for the new network version */
+    std::vector<int> new_anchors{1, 4};
+    /** @brief Number of actions to detect */
+    size_t num_action_classes = 3;
+    /** @brief Async execution flag */
+    bool is_async = true;
+    /** @brief  SSD bbox encoding variances */
+    float variances[4]{0.1f, 0.1f, 0.2f, 0.2f};
+    SSDHeads new_det_heads{{8,  {{26.17863728f, 58.670372f}}},
+                           {16, {{35.36f, 81.829632f},
+                                 {45.8114572f, 107.651852f},
+                                 {63.31491832f, 142.595732f},
+                                 {93.5070856f, 201.107692f}}}};
+};
+
+
+class ActionDetection : public AsyncDetection<DetectedAction>, public BaseCnnDetection {
+public:
+    explicit ActionDetection(const ActionDetectorConfig& config);
+
+    void submitRequest() override;
+    void enqueue(const cv::Mat& frame) override;
+    void wait() override { BaseCnnDetection::wait(); }
+    DetectedActions fetchResults() override;
+
+private:
+    ActionDetectorConfig m_config;
+    ov::CompiledModel m_model;
+    ov::Layout m_modelLayout;
+    std::string m_input_name;
+    std::map<std::string, ov::Tensor> m_outputs;
+
+    int m_enqueued_frames = 0;
+    float m_width = 0;
+    float m_height = 0;
+    bool m_new_model = false;
+    std::vector<int> m_head_ranges;
+    std::vector<int> m_head_step_sizes;
+    std::vector<cv::Size> m_head_blob_sizes;
+    std::vector<std::vector<int>> m_glob_anchor_map;
+    std::vector<std::string> m_glob_anchor_names;
+    int m_num_glob_anchors = 0;
+    cv::Size m_network_input_size;
+    int m_num_candidates;
+    bool m_binary_task;
+
+    /**
+    * @brief BBox in normalized form (each coordinate is in range [0;1]).
+    */
+    struct NormalizedBBox {
+        float xmin;
+        float ymin;
+        float xmax;
+        float ymax;
+    };
+    typedef std::vector<NormalizedBBox> NormalizedBBoxes;
+
+     /**
+    * @brief Translates the detections from the network outputs
+    *
+    * @param loc Location buffer
+    * @param main_conf Detection conf buffer
+    * @param add_conf Action conf buffer
+    * @param priorboxes Priorboxes buffer
+    * @param frame_size Size of input image (WxH)
+    * @return Detected objects
+    */
+    DetectedActions GetDetections(const cv::Mat& loc,
+                                  const cv::Mat& main_conf,
+                                  const cv::Mat& priorboxes,
+                                  const std::vector<cv::Mat>& add_conf,
+                                  const cv::Size& frame_size) const;
+
+     /**
+    * @brief Translate input buffer to BBox
+    *
+    * @param data Input buffer
+    * @return BBox
+    */
+    inline NormalizedBBox
+    ParseBBoxRecord(const float* data, bool inverse) const;
+
+
+     /**
+    * @brief Translate input buffer to BBox
+    *
+    * @param data Input buffer
+    * @return BBox
+    */
+    inline NormalizedBBox
+    GeneratePriorBox(int pos, int step, const cv::Size2f& anchor, const cv::Size& blob_size) const;
+
+     /**
+    * @brief Translates input blobs in SSD format to bbox in CV_Rect
+    *
+    * @param prior_bbox Prior boxes in SSD format
+    * @param variances Variances of prior boxes in SSD format
+    * @param encoded_bbox BBox to decode
+    * @param frame_size Size of input image (WxH)
+    * @return BBox in CV_Rect format
+    */
+    cv::Rect ConvertToRect(const NormalizedBBox& prior_bbox,
+                           const NormalizedBBox& variances,
+                           const NormalizedBBox& encoded_bbox,
+                           const cv::Size& frame_size) const;
+
+     /**
+    * @brief Carry out Soft Non-Maximum Suppression algorithm under detected actions
+    *
+    * @param detections Detected actions
+    * @param sigma Scale parameter
+    * @param top_k Number of top-score bboxes
+    * @param min_det_conf Minimum detection confidence
+    * @param out_indices Out indices of valid detections
+    */
+    void SoftNonMaxSuppression(const DetectedActions& detections,
+                               const float sigma,
+                               size_t top_k,
+                               const float min_det_conf,
+                               std::vector<int>* out_indices) const;
+};
--- a/engines/OpenVINOEngine/include/faceapp/actions.hpp
+++ b/engines/OpenVINOEngine/include/faceapp/actions.hpp
@@ -0,0 +1,50 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+
+/**
+* @brief Class for action info
+*/
+using Action = int;
+
+/**
+* @brief Class for events on a single frame with action info
+*/
+struct FrameEvent {
+    /** @brief Frame index */
+    int frame_id;
+    /** @brief Action label */
+    Action action;
+
+    /**
+  * @brief Constructor
+  */
+    FrameEvent(int frame_id, Action action)
+        : frame_id(frame_id), action(action) {}
+};
+using FrameEventsTrack = std::vector<FrameEvent>;
+
+/**
+* @brief Class for range of the same event with action info
+*/
+struct RangeEvent {
+    /** @brief  Start frame index */
+    int begin_frame_id;
+    /** @brief  Next after the last valid frame index */
+    int end_frame_id;
+    /** @brief Action label */
+    Action action;
+
+    /**
+  * @brief Constructor
+  */
+    RangeEvent(int begin_frame_id, int end_frame_id, Action action)
+        : begin_frame_id(begin_frame_id), end_frame_id(end_frame_id), action(action) {}
+};
+using RangeEventsTrack = std::vector<RangeEvent>;
+
+enum ActionsType { STUDENT, TEACHER, TOP_K };
--- a/engines/OpenVINOEngine/include/faceapp/cnn.hpp
+++ b/engines/OpenVINOEngine/include/faceapp/cnn.hpp
@@ -0,0 +1,145 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <functional>
+
+#include "openvino/openvino.hpp"
+
+#include "utils/ocv_common.hpp"
+
+/**
+* @brief Base class of config for network
+*/
+struct CnnConfig {
+    explicit CnnConfig(const std::string& path_to_model, const std::string& model_type = "") :
+        m_path_to_model(path_to_model), m_model_type(model_type) {}
+    /** @brief Path to model description */
+    std::string m_path_to_model;
+    /** @brief Model type*/
+    std::string m_model_type;
+    /** @brief Maximal size of batch */
+    int m_max_batch_size{1};
+
+    /** @brief OpenVINO Core instance */
+    ov::Core m_core;
+    /** @brief Device name */
+    std::string m_deviceName;
+};
+
+/**
+* @brief Base class of model
+*/
+class CnnDLSDKBase {
+public:
+    using Config = CnnConfig;
+
+    /**
+   * @brief Constructor
+   */
+    explicit CnnDLSDKBase(const Config& config);
+
+    /**
+   * @brief Descructor
+   */
+    ~CnnDLSDKBase() {}
+
+    /**
+   * @brief Loads network
+   */
+    void Load();
+
+protected:
+    /**
+   * @brief Run model in batch mode
+   *
+   * @param frames Vector of input images
+   * @param results_fetcher Callback to fetch inference results
+   */
+    void InferBatch(const std::vector<cv::Mat>& frames,
+                    const std::function<void(const std::map<std::string, ov::Tensor>&, size_t)>& results_fetcher);
+
+    /** @brief Config */
+    Config m_config;
+    /** @brief Model inputs info */
+    ov::OutputVector m_inInfo;
+    /** @brief Model outputs info */
+    ov::OutputVector m_outInfo_;
+    /** @brief Model layout */
+    ov::Layout m_desired_layout;
+    /** @brief Model input shape */
+    ov::Shape m_modelShape;
+    /** @brief Compled model */
+    ov::CompiledModel m_compiled_model;
+    /** @brief Inference request */
+    ov::InferRequest m_infer_request;
+    ov::Tensor m_in_tensor;
+    /** @brief Names of output tensors */
+    std::vector<std::string> m_output_tensors_names;
+};
+
+class VectorCNN : public CnnDLSDKBase {
+public:
+    explicit VectorCNN(const CnnConfig& config);
+
+    void Compute(const cv::Mat& image,
+                 cv::Mat* vector, cv::Size outp_shape = cv::Size());
+    void Compute(const std::vector<cv::Mat>& images,
+                 std::vector<cv::Mat>* vectors, cv::Size outp_shape = cv::Size());
+    int maxBatchSize() const;
+};
+
+class AsyncAlgorithm {
+public:
+    virtual ~AsyncAlgorithm() {}
+    virtual void enqueue(const cv::Mat& frame) = 0;
+    virtual void submitRequest() = 0;
+    virtual void wait() = 0;
+};
+
+template <typename T>
+class AsyncDetection : public AsyncAlgorithm {
+public:
+    virtual std::vector<T> fetchResults() = 0;
+};
+
+template <typename T>
+class NullDetection : public AsyncDetection<T> {
+public:
+    void enqueue(const cv::Mat&) override {}
+    void submitRequest() override {}
+    void wait() override {}
+    std::vector<T> fetchResults() override { return {}; }
+};
+
+class BaseCnnDetection : public AsyncAlgorithm {
+protected:
+    std::shared_ptr<ov::InferRequest> m_request;
+    const bool m_isAsync;
+    std::string m_detectorName;
+
+public:
+    explicit BaseCnnDetection(bool isAsync = false) : m_isAsync(isAsync) {}
+
+    void submitRequest() override {
+        if (m_request == nullptr)
+            return;
+        if (m_isAsync) {
+            m_request->start_async();
+        } else {
+            m_request->infer();
+        }
+    }
+
+    void wait() override {
+        if (!m_request || !m_isAsync)
+            return;
+        m_request->wait();
+    }
+};
--- a/engines/OpenVINOEngine/include/faceapp/detector.hpp
+++ b/engines/OpenVINOEngine/include/faceapp/detector.hpp
@@ -0,0 +1,63 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include <opencv2/core/core.hpp>
+
+#include "openvino/openvino.hpp"
+
+#include "cnn.hpp"
+
+namespace detection {
+
+struct DetectedObject {
+    cv::Rect rect;
+    float confidence;
+
+    explicit DetectedObject(const cv::Rect& rect = cv::Rect(), float confidence = -1.0f) :
+        rect(rect), confidence(confidence) {}
+};
+
+using DetectedObjects = std::vector<DetectedObject>;
+
+struct DetectorConfig : public CnnConfig {
+    explicit DetectorConfig(const std::string& path_to_model) :
+        CnnConfig(path_to_model) {}
+
+    float confidence_threshold{0.6f};
+    float increase_scale_x{1.15f};
+    float increase_scale_y{1.15f};
+    bool is_async = true;
+    int input_h = 600;
+    int input_w = 600;
+};
+
+class FaceDetection : public AsyncDetection<DetectedObject>, public BaseCnnDetection {
+private:
+    DetectorConfig m_config;
+    ov::CompiledModel m_model;
+    std::string m_input_name;
+    std::string m_output_name;
+    int m_max_detections_count = 0;
+    int m_object_size = 0;
+    int m_enqueued_frames = 0;
+    float m_width = 0;
+    float m_height = 0;
+
+public:
+    explicit FaceDetection(const DetectorConfig& config);
+
+    void submitRequest() override;
+    void enqueue(const cv::Mat& frame) override;
+    void wait() override { BaseCnnDetection::wait(); }
+
+    DetectedObjects fetchResults() override;
+};
+
+}  // namespace detection
--- a/engines/OpenVINOEngine/include/faceapp/face_reid.hpp
+++ b/engines/OpenVINOEngine/include/faceapp/face_reid.hpp
@@ -0,0 +1,63 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include <opencv2/core/core.hpp>
+
+#include "cnn.hpp"
+#include "detector.hpp"
+
+enum class RegistrationStatus {
+  SUCCESS,
+  FAILURE_LOW_QUALITY,
+  FAILURE_NOT_DETECTED,
+};
+
+struct GalleryObject {
+    std::vector<cv::Mat> embeddings;
+    std::string label;
+    int id;
+
+    GalleryObject(const std::vector<cv::Mat>& embeddings,
+                  const std::string& label, int id) :
+        embeddings(embeddings), label(label), id(id) {}
+};
+
+class EmbeddingsGallery {
+public:
+    static const char unknown_label[];
+    static const int unknown_id;
+    EmbeddingsGallery(const std::string& ids_list, double threshold, int min_size_fr,
+                      bool crop_gallery, const detection::DetectorConfig& detector_config,
+                      VectorCNN& landmarks_det,
+                      VectorCNN& image_reid,
+                      bool use_greedy_matcher=false);
+    size_t size() const;
+    std::vector<int> GetIDsByEmbeddings(const std::vector<cv::Mat>& embeddings) const;
+    std::string GetLabelByID(int id) const;
+    std::vector<std::string> GetIDToLabelMap() const;
+    bool LabelExists(const std::string& label) const;
+
+private:
+    RegistrationStatus RegisterIdentity(const std::string& identity_label,
+                                        const cv::Mat& image,
+                                        int min_size_fr,
+                                        bool crop_gallery,
+                                        detection::FaceDetection& detector,
+                                        VectorCNN& landmarks_det,
+                                        VectorCNN& image_reid,
+                                        cv::Mat& embedding);
+    std::vector<int> idx_to_id;
+    double reid_threshold;
+    std::vector<GalleryObject> identities;
+    bool use_greedy_matcher;
+};
+
+void AlignFaces(std::vector<cv::Mat>* face_images,
+                std::vector<cv::Mat>* landmarks_vec);
--- a/engines/OpenVINOEngine/include/faceapp/logger.hpp
+++ b/engines/OpenVINOEngine/include/faceapp/logger.hpp
@@ -0,0 +1,62 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <vector>
+
+#include <opencv2/opencv.hpp>
+#include <utils/slog.hpp>
+#include "actions.hpp"
+#include "tracker/tracker.hpp"
+
+class DetectionsLogger {
+private:
+    bool m_write_logs;
+    std::ofstream m_act_stat_log_stream;
+    cv::FileStorage m_act_det_log_stream;
+    slog::LogStream& m_log_stream;
+
+
+public:
+    explicit DetectionsLogger(slog::LogStream& stream, bool enabled,
+                              const std::string& act_stat_log_file,
+                              const std::string& act_det_log_file);
+
+    ~DetectionsLogger();
+    void CreateNextFrameRecord(const std::string& path, const int frame_idx,
+                               const size_t width, const size_t height);
+    void AddFaceToFrame(const cv::Rect& rect, const std::string& id, const std::string& action);
+    void AddPersonToFrame(const cv::Rect& rect, const std::string& action, const std::string& id);
+    void AddDetectionToFrame(const TrackedObject& object, const int frame_idx);
+    void FinalizeFrameRecord();
+    void DumpDetections(const std::string& video_path,
+                        const cv::Size frame_size,
+                        const size_t num_frames,
+                        const std::vector<Track>& face_tracks,
+                        const std::map<int, int>& track_id_to_label_faces,
+                        const std::vector<std::string>& action_idx_to_label,
+                        const std::vector<std::string>& person_id_to_label,
+                        const std::vector<std::map<int, int>>& frame_face_obj_id_to_action_maps);
+    void DumpTracks(const std::map<int, RangeEventsTrack>& obj_id_to_events,
+                    const std::vector<std::string>& action_idx_to_label,
+                    const std::map<int, int>& track_id_to_label_faces,
+                    const std::vector<std::string>& person_id_to_label);
+};
+
+
+#define SCR_CHECK(cond) CV_Assert(cond);
+
+#define SCR_CHECK_BINARY(actual, expected, op) \
+    CV_Assert(actual op expected);
+
+#define SCR_CHECK_EQ(actual, expected) SCR_CHECK_BINARY(actual, expected, ==)
+#define SCR_CHECK_NE(actual, expected) SCR_CHECK_BINARY(actual, expected, !=)
+#define SCR_CHECK_LT(actual, expected) SCR_CHECK_BINARY(actual, expected, <)
+#define SCR_CHECK_GT(actual, expected) SCR_CHECK_BINARY(actual, expected, >)
+#define SCR_CHECK_LE(actual, expected) SCR_CHECK_BINARY(actual, expected, <=)
+#define SCR_CHECK_GE(actual, expected) SCR_CHECK_BINARY(actual, expected, >=)