Files

218 lines
7.4 KiB
C++
Raw Permalink Normal View History

2026-03-28 16:54:11 +11:00
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <map>
#include <string>
#include <vector>
#include <opencv2/core/core.hpp>
#include "cnn.hpp"
#include "openvino/openvino.hpp"
/**
* @brief Class for detection with action info
*/
struct DetectedAction {
/** @brief BBox of detection */
cv::Rect rect;
/** @brief Action label */
int label;
/** @brief Confidence of detection */
float detection_conf;
/** @brief Confidence of predicted action */
float action_conf;
/**
* @brief Constructor
*/
DetectedAction(const cv::Rect& rect, int label,
float detection_conf, float action_conf)
: rect(rect), label(label), detection_conf(detection_conf),
action_conf(action_conf) {}
};
using DetectedActions = std::vector<DetectedAction>;
/**
* @brief Class to store SSD-based head info
*/
struct SSDHead {
/** @brief Step size for the head */
int step;
/** @brief Vector of anchors */
std::vector<cv::Size2f> anchors;
/**
* @brief Constructor
*/
SSDHead(int step, const std::vector<cv::Size2f>& anchors) : step(step), anchors(anchors) {}
};
using SSDHeads = std::vector<SSDHead>;
/**
* @brief Config for the Action Detection model
*/
struct ActionDetectorConfig : public CnnConfig {
explicit ActionDetectorConfig(const std::string& path_to_model, const std::string& model_type)
: CnnConfig(path_to_model, model_type) {}
/** @brief Name of output blob with location info */
std::string old_loc_blob_name{"mbox_loc1/out/conv/flat"};
/** @brief Name of output blob with detection confidence info */
std::string old_det_conf_blob_name{"mbox_main_conf/out/conv/flat/softmax/flat"};
/** @brief Prefix of name of output blob with action confidence info */
std::string old_action_conf_blob_name_prefix{"out/anchor"};
/** @brief Name of output blob with priorbox info */
std::string old_priorbox_blob_name{"mbox/priorbox"};
/** @brief Name of output blob with location info */
std::string new_loc_blob_name{"ActionNet/out_detection_loc"};
/** @brief Name of output blob with detection confidence info */
std::string new_det_conf_blob_name{"ActionNet/out_detection_conf"};
/** @brief Prefix of name of output blob with action confidence info */
std::string new_action_conf_blob_name_prefix{"ActionNet/action_heads/out_head_"};
/** @brief Suffix of name of output blob with action confidence info */
std::string new_action_conf_blob_name_suffix{"_anchor_"};
/** @brief Scale parameter for Soft-NMS algorithm */
float nms_sigma = 0.6f;
/** @brief Threshold for detected objects */
float detection_confidence_threshold = 0.4f;
/** @brief Threshold for recognized actions */
float action_confidence_threshold = 0.75f;
/** @brief Scale of action logits for the old network version */
float old_action_scale = 3.f;
/** @brief Scale of action logits for the new network version */
float new_action_scale = 16.f;
/** @brief Default action class label */
int default_action_id = 0;
/** @brief Number of top-score bboxes in output */
size_t keep_top_k = 200;
/** @brief Number of SSD anchors for the old network version */
std::vector<int> old_anchors{4};
/** @brief Number of SSD anchors for the new network version */
std::vector<int> new_anchors{1, 4};
/** @brief Number of actions to detect */
size_t num_action_classes = 3;
/** @brief Async execution flag */
bool is_async = true;
/** @brief SSD bbox encoding variances */
float variances[4]{0.1f, 0.1f, 0.2f, 0.2f};
SSDHeads new_det_heads{{8, {{26.17863728f, 58.670372f}}},
{16, {{35.36f, 81.829632f},
{45.8114572f, 107.651852f},
{63.31491832f, 142.595732f},
{93.5070856f, 201.107692f}}}};
};
class ActionDetection : public AsyncDetection<DetectedAction>, public BaseCnnDetection {
public:
explicit ActionDetection(const ActionDetectorConfig& config);
void submitRequest() override;
void enqueue(const cv::Mat& frame) override;
void wait() override { BaseCnnDetection::wait(); }
DetectedActions fetchResults() override;
private:
ActionDetectorConfig m_config;
ov::CompiledModel m_model;
ov::Layout m_modelLayout;
std::string m_input_name;
std::map<std::string, ov::Tensor> m_outputs;
int m_enqueued_frames = 0;
float m_width = 0;
float m_height = 0;
bool m_new_model = false;
std::vector<int> m_head_ranges;
std::vector<int> m_head_step_sizes;
std::vector<cv::Size> m_head_blob_sizes;
std::vector<std::vector<int>> m_glob_anchor_map;
std::vector<std::string> m_glob_anchor_names;
int m_num_glob_anchors = 0;
cv::Size m_network_input_size;
int m_num_candidates;
bool m_binary_task;
/**
* @brief BBox in normalized form (each coordinate is in range [0;1]).
*/
struct NormalizedBBox {
float xmin;
float ymin;
float xmax;
float ymax;
};
typedef std::vector<NormalizedBBox> NormalizedBBoxes;
/**
* @brief Translates the detections from the network outputs
*
* @param loc Location buffer
* @param main_conf Detection conf buffer
* @param add_conf Action conf buffer
* @param priorboxes Priorboxes buffer
* @param frame_size Size of input image (WxH)
* @return Detected objects
*/
DetectedActions GetDetections(const cv::Mat& loc,
const cv::Mat& main_conf,
const cv::Mat& priorboxes,
const std::vector<cv::Mat>& add_conf,
const cv::Size& frame_size) const;
/**
* @brief Translate input buffer to BBox
*
* @param data Input buffer
* @return BBox
*/
inline NormalizedBBox
ParseBBoxRecord(const float* data, bool inverse) const;
/**
* @brief Translate input buffer to BBox
*
* @param data Input buffer
* @return BBox
*/
inline NormalizedBBox
GeneratePriorBox(int pos, int step, const cv::Size2f& anchor, const cv::Size& blob_size) const;
/**
* @brief Translates input blobs in SSD format to bbox in CV_Rect
*
* @param prior_bbox Prior boxes in SSD format
* @param variances Variances of prior boxes in SSD format
* @param encoded_bbox BBox to decode
* @param frame_size Size of input image (WxH)
* @return BBox in CV_Rect format
*/
cv::Rect ConvertToRect(const NormalizedBBox& prior_bbox,
const NormalizedBBox& variances,
const NormalizedBBox& encoded_bbox,
const cv::Size& frame_size) const;
/**
* @brief Carry out Soft Non-Maximum Suppression algorithm under detected actions
*
* @param detections Detected actions
* @param sigma Scale parameter
* @param top_k Number of top-score bboxes
* @param min_det_conf Minimum detection confidence
* @param out_indices Out indices of valid detections
*/
void SoftNonMaxSuppression(const DetectedActions& detections,
const float sigma,
size_t top_k,
const float min_det_conf,
std::vector<int>* out_indices) const;
};