Initial setup for CLion

This commit is contained in:
2026-03-28 16:54:11 +11:00
parent 239cc02591
commit 7b4134133c
1136 changed files with 811916 additions and 0 deletions

View File

@@ -0,0 +1,217 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <map>
#include <string>
#include <vector>
#include <opencv2/core/core.hpp>
#include "cnn.hpp"
#include "openvino/openvino.hpp"
/**
* @brief Class for detection with action info
*/
struct DetectedAction {
/** @brief BBox of detection */
cv::Rect rect;
/** @brief Action label */
int label;
/** @brief Confidence of detection */
float detection_conf;
/** @brief Confidence of predicted action */
float action_conf;
/**
* @brief Constructor
*/
DetectedAction(const cv::Rect& rect, int label,
float detection_conf, float action_conf)
: rect(rect), label(label), detection_conf(detection_conf),
action_conf(action_conf) {}
};
using DetectedActions = std::vector<DetectedAction>;
/**
* @brief Class to store SSD-based head info
*/
struct SSDHead {
/** @brief Step size for the head */
int step;
/** @brief Vector of anchors */
std::vector<cv::Size2f> anchors;
/**
* @brief Constructor
*/
SSDHead(int step, const std::vector<cv::Size2f>& anchors) : step(step), anchors(anchors) {}
};
using SSDHeads = std::vector<SSDHead>;
/**
* @brief Config for the Action Detection model
*/
struct ActionDetectorConfig : public CnnConfig {
explicit ActionDetectorConfig(const std::string& path_to_model, const std::string& model_type)
: CnnConfig(path_to_model, model_type) {}
/** @brief Name of output blob with location info */
std::string old_loc_blob_name{"mbox_loc1/out/conv/flat"};
/** @brief Name of output blob with detection confidence info */
std::string old_det_conf_blob_name{"mbox_main_conf/out/conv/flat/softmax/flat"};
/** @brief Prefix of name of output blob with action confidence info */
std::string old_action_conf_blob_name_prefix{"out/anchor"};
/** @brief Name of output blob with priorbox info */
std::string old_priorbox_blob_name{"mbox/priorbox"};
/** @brief Name of output blob with location info */
std::string new_loc_blob_name{"ActionNet/out_detection_loc"};
/** @brief Name of output blob with detection confidence info */
std::string new_det_conf_blob_name{"ActionNet/out_detection_conf"};
/** @brief Prefix of name of output blob with action confidence info */
std::string new_action_conf_blob_name_prefix{"ActionNet/action_heads/out_head_"};
/** @brief Suffix of name of output blob with action confidence info */
std::string new_action_conf_blob_name_suffix{"_anchor_"};
/** @brief Scale parameter for Soft-NMS algorithm */
float nms_sigma = 0.6f;
/** @brief Threshold for detected objects */
float detection_confidence_threshold = 0.4f;
/** @brief Threshold for recognized actions */
float action_confidence_threshold = 0.75f;
/** @brief Scale of action logits for the old network version */
float old_action_scale = 3.f;
/** @brief Scale of action logits for the new network version */
float new_action_scale = 16.f;
/** @brief Default action class label */
int default_action_id = 0;
/** @brief Number of top-score bboxes in output */
size_t keep_top_k = 200;
/** @brief Number of SSD anchors for the old network version */
std::vector<int> old_anchors{4};
/** @brief Number of SSD anchors for the new network version */
std::vector<int> new_anchors{1, 4};
/** @brief Number of actions to detect */
size_t num_action_classes = 3;
/** @brief Async execution flag */
bool is_async = true;
/** @brief SSD bbox encoding variances */
float variances[4]{0.1f, 0.1f, 0.2f, 0.2f};
SSDHeads new_det_heads{{8, {{26.17863728f, 58.670372f}}},
{16, {{35.36f, 81.829632f},
{45.8114572f, 107.651852f},
{63.31491832f, 142.595732f},
{93.5070856f, 201.107692f}}}};
};
class ActionDetection : public AsyncDetection<DetectedAction>, public BaseCnnDetection {
public:
explicit ActionDetection(const ActionDetectorConfig& config);
void submitRequest() override;
void enqueue(const cv::Mat& frame) override;
void wait() override { BaseCnnDetection::wait(); }
DetectedActions fetchResults() override;
private:
ActionDetectorConfig m_config;
ov::CompiledModel m_model;
ov::Layout m_modelLayout;
std::string m_input_name;
std::map<std::string, ov::Tensor> m_outputs;
int m_enqueued_frames = 0;
float m_width = 0;
float m_height = 0;
bool m_new_model = false;
std::vector<int> m_head_ranges;
std::vector<int> m_head_step_sizes;
std::vector<cv::Size> m_head_blob_sizes;
std::vector<std::vector<int>> m_glob_anchor_map;
std::vector<std::string> m_glob_anchor_names;
int m_num_glob_anchors = 0;
cv::Size m_network_input_size;
int m_num_candidates;
bool m_binary_task;
/**
* @brief BBox in normalized form (each coordinate is in range [0;1]).
*/
struct NormalizedBBox {
float xmin;
float ymin;
float xmax;
float ymax;
};
typedef std::vector<NormalizedBBox> NormalizedBBoxes;
/**
* @brief Translates the detections from the network outputs
*
* @param loc Location buffer
* @param main_conf Detection conf buffer
* @param add_conf Action conf buffer
* @param priorboxes Priorboxes buffer
* @param frame_size Size of input image (WxH)
* @return Detected objects
*/
DetectedActions GetDetections(const cv::Mat& loc,
const cv::Mat& main_conf,
const cv::Mat& priorboxes,
const std::vector<cv::Mat>& add_conf,
const cv::Size& frame_size) const;
/**
* @brief Translate input buffer to BBox
*
* @param data Input buffer
* @return BBox
*/
inline NormalizedBBox
ParseBBoxRecord(const float* data, bool inverse) const;
/**
* @brief Translate input buffer to BBox
*
* @param data Input buffer
* @return BBox
*/
inline NormalizedBBox
GeneratePriorBox(int pos, int step, const cv::Size2f& anchor, const cv::Size& blob_size) const;
/**
* @brief Translates input blobs in SSD format to bbox in CV_Rect
*
* @param prior_bbox Prior boxes in SSD format
* @param variances Variances of prior boxes in SSD format
* @param encoded_bbox BBox to decode
* @param frame_size Size of input image (WxH)
* @return BBox in CV_Rect format
*/
cv::Rect ConvertToRect(const NormalizedBBox& prior_bbox,
const NormalizedBBox& variances,
const NormalizedBBox& encoded_bbox,
const cv::Size& frame_size) const;
/**
* @brief Carry out Soft Non-Maximum Suppression algorithm under detected actions
*
* @param detections Detected actions
* @param sigma Scale parameter
* @param top_k Number of top-score bboxes
* @param min_det_conf Minimum detection confidence
* @param out_indices Out indices of valid detections
*/
void SoftNonMaxSuppression(const DetectedActions& detections,
const float sigma,
size_t top_k,
const float min_det_conf,
std::vector<int>* out_indices) const;
};

View File

@@ -0,0 +1,50 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <vector>
/**
* @brief Class for action info
*/
using Action = int;
/**
* @brief Class for events on a single frame with action info
*/
struct FrameEvent {
/** @brief Frame index */
int frame_id;
/** @brief Action label */
Action action;
/**
* @brief Constructor
*/
FrameEvent(int frame_id, Action action)
: frame_id(frame_id), action(action) {}
};
using FrameEventsTrack = std::vector<FrameEvent>;
/**
* @brief Class for range of the same event with action info
*/
struct RangeEvent {
/** @brief Start frame index */
int begin_frame_id;
/** @brief Next after the last valid frame index */
int end_frame_id;
/** @brief Action label */
Action action;
/**
* @brief Constructor
*/
RangeEvent(int begin_frame_id, int end_frame_id, Action action)
: begin_frame_id(begin_frame_id), end_frame_id(end_frame_id), action(action) {}
};
using RangeEventsTrack = std::vector<RangeEvent>;
enum ActionsType { STUDENT, TEACHER, TOP_K };

View File

@@ -0,0 +1,145 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <map>
#include <memory>
#include <string>
#include <vector>
#include <functional>
#include "openvino/openvino.hpp"
#include "utils/ocv_common.hpp"
/**
* @brief Base class of config for network
*/
struct CnnConfig {
explicit CnnConfig(const std::string& path_to_model, const std::string& model_type = "") :
m_path_to_model(path_to_model), m_model_type(model_type) {}
/** @brief Path to model description */
std::string m_path_to_model;
/** @brief Model type*/
std::string m_model_type;
/** @brief Maximal size of batch */
int m_max_batch_size{1};
/** @brief OpenVINO Core instance */
ov::Core m_core;
/** @brief Device name */
std::string m_deviceName;
};
/**
* @brief Base class of model
*/
class CnnDLSDKBase {
public:
using Config = CnnConfig;
/**
* @brief Constructor
*/
explicit CnnDLSDKBase(const Config& config);
/**
* @brief Descructor
*/
~CnnDLSDKBase() {}
/**
* @brief Loads network
*/
void Load();
protected:
/**
* @brief Run model in batch mode
*
* @param frames Vector of input images
* @param results_fetcher Callback to fetch inference results
*/
void InferBatch(const std::vector<cv::Mat>& frames,
const std::function<void(const std::map<std::string, ov::Tensor>&, size_t)>& results_fetcher);
/** @brief Config */
Config m_config;
/** @brief Model inputs info */
ov::OutputVector m_inInfo;
/** @brief Model outputs info */
ov::OutputVector m_outInfo_;
/** @brief Model layout */
ov::Layout m_desired_layout;
/** @brief Model input shape */
ov::Shape m_modelShape;
/** @brief Compled model */
ov::CompiledModel m_compiled_model;
/** @brief Inference request */
ov::InferRequest m_infer_request;
ov::Tensor m_in_tensor;
/** @brief Names of output tensors */
std::vector<std::string> m_output_tensors_names;
};
class VectorCNN : public CnnDLSDKBase {
public:
explicit VectorCNN(const CnnConfig& config);
void Compute(const cv::Mat& image,
cv::Mat* vector, cv::Size outp_shape = cv::Size());
void Compute(const std::vector<cv::Mat>& images,
std::vector<cv::Mat>* vectors, cv::Size outp_shape = cv::Size());
int maxBatchSize() const;
};
class AsyncAlgorithm {
public:
virtual ~AsyncAlgorithm() {}
virtual void enqueue(const cv::Mat& frame) = 0;
virtual void submitRequest() = 0;
virtual void wait() = 0;
};
template <typename T>
class AsyncDetection : public AsyncAlgorithm {
public:
virtual std::vector<T> fetchResults() = 0;
};
template <typename T>
class NullDetection : public AsyncDetection<T> {
public:
void enqueue(const cv::Mat&) override {}
void submitRequest() override {}
void wait() override {}
std::vector<T> fetchResults() override { return {}; }
};
class BaseCnnDetection : public AsyncAlgorithm {
protected:
std::shared_ptr<ov::InferRequest> m_request;
const bool m_isAsync;
std::string m_detectorName;
public:
explicit BaseCnnDetection(bool isAsync = false) : m_isAsync(isAsync) {}
void submitRequest() override {
if (m_request == nullptr)
return;
if (m_isAsync) {
m_request->start_async();
} else {
m_request->infer();
}
}
void wait() override {
if (!m_request || !m_isAsync)
return;
m_request->wait();
}
};

View File

@@ -0,0 +1,63 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <map>
#include <string>
#include <vector>
#include <opencv2/core/core.hpp>
#include "openvino/openvino.hpp"
#include "cnn.hpp"
namespace detection {
struct DetectedObject {
cv::Rect rect;
float confidence;
explicit DetectedObject(const cv::Rect& rect = cv::Rect(), float confidence = -1.0f) :
rect(rect), confidence(confidence) {}
};
using DetectedObjects = std::vector<DetectedObject>;
struct DetectorConfig : public CnnConfig {
explicit DetectorConfig(const std::string& path_to_model) :
CnnConfig(path_to_model) {}
float confidence_threshold{0.6f};
float increase_scale_x{1.15f};
float increase_scale_y{1.15f};
bool is_async = true;
int input_h = 600;
int input_w = 600;
};
class FaceDetection : public AsyncDetection<DetectedObject>, public BaseCnnDetection {
private:
DetectorConfig m_config;
ov::CompiledModel m_model;
std::string m_input_name;
std::string m_output_name;
int m_max_detections_count = 0;
int m_object_size = 0;
int m_enqueued_frames = 0;
float m_width = 0;
float m_height = 0;
public:
explicit FaceDetection(const DetectorConfig& config);
void submitRequest() override;
void enqueue(const cv::Mat& frame) override;
void wait() override { BaseCnnDetection::wait(); }
DetectedObjects fetchResults() override;
};
} // namespace detection

View File

@@ -0,0 +1,63 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <map>
#include <string>
#include <vector>
#include <opencv2/core/core.hpp>
#include "cnn.hpp"
#include "detector.hpp"
enum class RegistrationStatus {
SUCCESS,
FAILURE_LOW_QUALITY,
FAILURE_NOT_DETECTED,
};
struct GalleryObject {
std::vector<cv::Mat> embeddings;
std::string label;
int id;
GalleryObject(const std::vector<cv::Mat>& embeddings,
const std::string& label, int id) :
embeddings(embeddings), label(label), id(id) {}
};
class EmbeddingsGallery {
public:
static const char unknown_label[];
static const int unknown_id;
EmbeddingsGallery(const std::string& ids_list, double threshold, int min_size_fr,
bool crop_gallery, const detection::DetectorConfig& detector_config,
VectorCNN& landmarks_det,
VectorCNN& image_reid,
bool use_greedy_matcher=false);
size_t size() const;
std::vector<int> GetIDsByEmbeddings(const std::vector<cv::Mat>& embeddings) const;
std::string GetLabelByID(int id) const;
std::vector<std::string> GetIDToLabelMap() const;
bool LabelExists(const std::string& label) const;
private:
RegistrationStatus RegisterIdentity(const std::string& identity_label,
const cv::Mat& image,
int min_size_fr,
bool crop_gallery,
detection::FaceDetection& detector,
VectorCNN& landmarks_det,
VectorCNN& image_reid,
cv::Mat& embedding);
std::vector<int> idx_to_id;
double reid_threshold;
std::vector<GalleryObject> identities;
bool use_greedy_matcher;
};
void AlignFaces(std::vector<cv::Mat>* face_images,
std::vector<cv::Mat>* landmarks_vec);

View File

@@ -0,0 +1,62 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <fstream>
#include <iostream>
#include <map>
#include <vector>
#include <opencv2/opencv.hpp>
#include <utils/slog.hpp>
#include "actions.hpp"
#include "tracker/tracker.hpp"
class DetectionsLogger {
private:
bool m_write_logs;
std::ofstream m_act_stat_log_stream;
cv::FileStorage m_act_det_log_stream;
slog::LogStream& m_log_stream;
public:
explicit DetectionsLogger(slog::LogStream& stream, bool enabled,
const std::string& act_stat_log_file,
const std::string& act_det_log_file);
~DetectionsLogger();
void CreateNextFrameRecord(const std::string& path, const int frame_idx,
const size_t width, const size_t height);
void AddFaceToFrame(const cv::Rect& rect, const std::string& id, const std::string& action);
void AddPersonToFrame(const cv::Rect& rect, const std::string& action, const std::string& id);
void AddDetectionToFrame(const TrackedObject& object, const int frame_idx);
void FinalizeFrameRecord();
void DumpDetections(const std::string& video_path,
const cv::Size frame_size,
const size_t num_frames,
const std::vector<Track>& face_tracks,
const std::map<int, int>& track_id_to_label_faces,
const std::vector<std::string>& action_idx_to_label,
const std::vector<std::string>& person_id_to_label,
const std::vector<std::map<int, int>>& frame_face_obj_id_to_action_maps);
void DumpTracks(const std::map<int, RangeEventsTrack>& obj_id_to_events,
const std::vector<std::string>& action_idx_to_label,
const std::map<int, int>& track_id_to_label_faces,
const std::vector<std::string>& person_id_to_label);
};
#define SCR_CHECK(cond) CV_Assert(cond);
#define SCR_CHECK_BINARY(actual, expected, op) \
CV_Assert(actual op expected);
#define SCR_CHECK_EQ(actual, expected) SCR_CHECK_BINARY(actual, expected, ==)
#define SCR_CHECK_NE(actual, expected) SCR_CHECK_BINARY(actual, expected, !=)
#define SCR_CHECK_LT(actual, expected) SCR_CHECK_BINARY(actual, expected, <)
#define SCR_CHECK_GT(actual, expected) SCR_CHECK_BINARY(actual, expected, >)
#define SCR_CHECK_LE(actual, expected) SCR_CHECK_BINARY(actual, expected, <=)
#define SCR_CHECK_GE(actual, expected) SCR_CHECK_BINARY(actual, expected, >=)