218 lines
7.4 KiB
C++
218 lines
7.4 KiB
C++
|
|
// Copyright (C) 2018-2024 Intel Corporation
|
||
|
|
// SPDX-License-Identifier: Apache-2.0
|
||
|
|
//
|
||
|
|
|
||
|
|
#pragma once
|
||
|
|
|
||
|
|
#include <map>
|
||
|
|
#include <string>
|
||
|
|
#include <vector>
|
||
|
|
|
||
|
|
#include <opencv2/core/core.hpp>
|
||
|
|
|
||
|
|
#include "cnn.hpp"
|
||
|
|
|
||
|
|
#include "openvino/openvino.hpp"
|
||
|
|
|
||
|
|
/**
|
||
|
|
* @brief Class for detection with action info
|
||
|
|
*/
|
||
|
|
struct DetectedAction {
|
||
|
|
/** @brief BBox of detection */
|
||
|
|
cv::Rect rect;
|
||
|
|
/** @brief Action label */
|
||
|
|
int label;
|
||
|
|
/** @brief Confidence of detection */
|
||
|
|
float detection_conf;
|
||
|
|
/** @brief Confidence of predicted action */
|
||
|
|
float action_conf;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* @brief Constructor
|
||
|
|
*/
|
||
|
|
DetectedAction(const cv::Rect& rect, int label,
|
||
|
|
float detection_conf, float action_conf)
|
||
|
|
: rect(rect), label(label), detection_conf(detection_conf),
|
||
|
|
action_conf(action_conf) {}
|
||
|
|
};
|
||
|
|
using DetectedActions = std::vector<DetectedAction>;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* @brief Class to store SSD-based head info
|
||
|
|
*/
|
||
|
|
struct SSDHead {
|
||
|
|
/** @brief Step size for the head */
|
||
|
|
int step;
|
||
|
|
/** @brief Vector of anchors */
|
||
|
|
std::vector<cv::Size2f> anchors;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* @brief Constructor
|
||
|
|
*/
|
||
|
|
SSDHead(int step, const std::vector<cv::Size2f>& anchors) : step(step), anchors(anchors) {}
|
||
|
|
};
|
||
|
|
using SSDHeads = std::vector<SSDHead>;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* @brief Config for the Action Detection model
|
||
|
|
*/
|
||
|
|
struct ActionDetectorConfig : public CnnConfig {
|
||
|
|
explicit ActionDetectorConfig(const std::string& path_to_model, const std::string& model_type)
|
||
|
|
: CnnConfig(path_to_model, model_type) {}
|
||
|
|
|
||
|
|
/** @brief Name of output blob with location info */
|
||
|
|
std::string old_loc_blob_name{"mbox_loc1/out/conv/flat"};
|
||
|
|
/** @brief Name of output blob with detection confidence info */
|
||
|
|
std::string old_det_conf_blob_name{"mbox_main_conf/out/conv/flat/softmax/flat"};
|
||
|
|
/** @brief Prefix of name of output blob with action confidence info */
|
||
|
|
std::string old_action_conf_blob_name_prefix{"out/anchor"};
|
||
|
|
/** @brief Name of output blob with priorbox info */
|
||
|
|
std::string old_priorbox_blob_name{"mbox/priorbox"};
|
||
|
|
|
||
|
|
/** @brief Name of output blob with location info */
|
||
|
|
std::string new_loc_blob_name{"ActionNet/out_detection_loc"};
|
||
|
|
/** @brief Name of output blob with detection confidence info */
|
||
|
|
std::string new_det_conf_blob_name{"ActionNet/out_detection_conf"};
|
||
|
|
/** @brief Prefix of name of output blob with action confidence info */
|
||
|
|
std::string new_action_conf_blob_name_prefix{"ActionNet/action_heads/out_head_"};
|
||
|
|
/** @brief Suffix of name of output blob with action confidence info */
|
||
|
|
std::string new_action_conf_blob_name_suffix{"_anchor_"};
|
||
|
|
|
||
|
|
/** @brief Scale parameter for Soft-NMS algorithm */
|
||
|
|
float nms_sigma = 0.6f;
|
||
|
|
/** @brief Threshold for detected objects */
|
||
|
|
float detection_confidence_threshold = 0.4f;
|
||
|
|
/** @brief Threshold for recognized actions */
|
||
|
|
float action_confidence_threshold = 0.75f;
|
||
|
|
/** @brief Scale of action logits for the old network version */
|
||
|
|
float old_action_scale = 3.f;
|
||
|
|
/** @brief Scale of action logits for the new network version */
|
||
|
|
float new_action_scale = 16.f;
|
||
|
|
/** @brief Default action class label */
|
||
|
|
int default_action_id = 0;
|
||
|
|
/** @brief Number of top-score bboxes in output */
|
||
|
|
size_t keep_top_k = 200;
|
||
|
|
/** @brief Number of SSD anchors for the old network version */
|
||
|
|
std::vector<int> old_anchors{4};
|
||
|
|
/** @brief Number of SSD anchors for the new network version */
|
||
|
|
std::vector<int> new_anchors{1, 4};
|
||
|
|
/** @brief Number of actions to detect */
|
||
|
|
size_t num_action_classes = 3;
|
||
|
|
/** @brief Async execution flag */
|
||
|
|
bool is_async = true;
|
||
|
|
/** @brief SSD bbox encoding variances */
|
||
|
|
float variances[4]{0.1f, 0.1f, 0.2f, 0.2f};
|
||
|
|
SSDHeads new_det_heads{{8, {{26.17863728f, 58.670372f}}},
|
||
|
|
{16, {{35.36f, 81.829632f},
|
||
|
|
{45.8114572f, 107.651852f},
|
||
|
|
{63.31491832f, 142.595732f},
|
||
|
|
{93.5070856f, 201.107692f}}}};
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
class ActionDetection : public AsyncDetection<DetectedAction>, public BaseCnnDetection {
|
||
|
|
public:
|
||
|
|
explicit ActionDetection(const ActionDetectorConfig& config);
|
||
|
|
|
||
|
|
void submitRequest() override;
|
||
|
|
void enqueue(const cv::Mat& frame) override;
|
||
|
|
void wait() override { BaseCnnDetection::wait(); }
|
||
|
|
DetectedActions fetchResults() override;
|
||
|
|
|
||
|
|
private:
|
||
|
|
ActionDetectorConfig m_config;
|
||
|
|
ov::CompiledModel m_model;
|
||
|
|
ov::Layout m_modelLayout;
|
||
|
|
std::string m_input_name;
|
||
|
|
std::map<std::string, ov::Tensor> m_outputs;
|
||
|
|
|
||
|
|
int m_enqueued_frames = 0;
|
||
|
|
float m_width = 0;
|
||
|
|
float m_height = 0;
|
||
|
|
bool m_new_model = false;
|
||
|
|
std::vector<int> m_head_ranges;
|
||
|
|
std::vector<int> m_head_step_sizes;
|
||
|
|
std::vector<cv::Size> m_head_blob_sizes;
|
||
|
|
std::vector<std::vector<int>> m_glob_anchor_map;
|
||
|
|
std::vector<std::string> m_glob_anchor_names;
|
||
|
|
int m_num_glob_anchors = 0;
|
||
|
|
cv::Size m_network_input_size;
|
||
|
|
int m_num_candidates;
|
||
|
|
bool m_binary_task;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* @brief BBox in normalized form (each coordinate is in range [0;1]).
|
||
|
|
*/
|
||
|
|
struct NormalizedBBox {
|
||
|
|
float xmin;
|
||
|
|
float ymin;
|
||
|
|
float xmax;
|
||
|
|
float ymax;
|
||
|
|
};
|
||
|
|
typedef std::vector<NormalizedBBox> NormalizedBBoxes;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* @brief Translates the detections from the network outputs
|
||
|
|
*
|
||
|
|
* @param loc Location buffer
|
||
|
|
* @param main_conf Detection conf buffer
|
||
|
|
* @param add_conf Action conf buffer
|
||
|
|
* @param priorboxes Priorboxes buffer
|
||
|
|
* @param frame_size Size of input image (WxH)
|
||
|
|
* @return Detected objects
|
||
|
|
*/
|
||
|
|
DetectedActions GetDetections(const cv::Mat& loc,
|
||
|
|
const cv::Mat& main_conf,
|
||
|
|
const cv::Mat& priorboxes,
|
||
|
|
const std::vector<cv::Mat>& add_conf,
|
||
|
|
const cv::Size& frame_size) const;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* @brief Translate input buffer to BBox
|
||
|
|
*
|
||
|
|
* @param data Input buffer
|
||
|
|
* @return BBox
|
||
|
|
*/
|
||
|
|
inline NormalizedBBox
|
||
|
|
ParseBBoxRecord(const float* data, bool inverse) const;
|
||
|
|
|
||
|
|
|
||
|
|
/**
|
||
|
|
* @brief Translate input buffer to BBox
|
||
|
|
*
|
||
|
|
* @param data Input buffer
|
||
|
|
* @return BBox
|
||
|
|
*/
|
||
|
|
inline NormalizedBBox
|
||
|
|
GeneratePriorBox(int pos, int step, const cv::Size2f& anchor, const cv::Size& blob_size) const;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* @brief Translates input blobs in SSD format to bbox in CV_Rect
|
||
|
|
*
|
||
|
|
* @param prior_bbox Prior boxes in SSD format
|
||
|
|
* @param variances Variances of prior boxes in SSD format
|
||
|
|
* @param encoded_bbox BBox to decode
|
||
|
|
* @param frame_size Size of input image (WxH)
|
||
|
|
* @return BBox in CV_Rect format
|
||
|
|
*/
|
||
|
|
cv::Rect ConvertToRect(const NormalizedBBox& prior_bbox,
|
||
|
|
const NormalizedBBox& variances,
|
||
|
|
const NormalizedBBox& encoded_bbox,
|
||
|
|
const cv::Size& frame_size) const;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* @brief Carry out Soft Non-Maximum Suppression algorithm under detected actions
|
||
|
|
*
|
||
|
|
* @param detections Detected actions
|
||
|
|
* @param sigma Scale parameter
|
||
|
|
* @param top_k Number of top-score bboxes
|
||
|
|
* @param min_det_conf Minimum detection confidence
|
||
|
|
* @param out_indices Out indices of valid detections
|
||
|
|
*/
|
||
|
|
void SoftNonMaxSuppression(const DetectedActions& detections,
|
||
|
|
const float sigma,
|
||
|
|
size_t top_k,
|
||
|
|
const float min_det_conf,
|
||
|
|
std::vector<int>* out_indices) const;
|
||
|
|
};
|