Initial setup for CLion

This commit is contained in:
2026-03-28 16:54:11 +11:00
parent 239cc02591
commit 7b4134133c
1136 changed files with 811916 additions and 0 deletions

View File

@@ -0,0 +1,418 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <algorithm>
#include <limits>
#include <numeric>
#include <utility>
#include <vector>
#include <opencv2/imgproc/imgproc.hpp>
#include "openvino/openvino.hpp"
#include "action_detector.hpp"
#define SSD_LOCATION_RECORD_SIZE 4
#define SSD_PRIORBOX_RECORD_SIZE 4
#define NUM_DETECTION_CLASSES 2
#define POSITIVE_DETECTION_IDX 1
template <typename T>
bool SortScorePairDescend(const std::pair<float, T>& pair1,
const std::pair<float, T>& pair2) {
return pair1.first > pair2.first;
}
ActionDetection::ActionDetection(const ActionDetectorConfig& config) :
BaseCnnDetection(config.is_async), m_config(config) {
m_detectorName = "action detector";
ov::Layout desiredLayout = {"NHWC"};
slog::info << "Reading model: " << m_config.m_path_to_model << slog::endl;
std::shared_ptr<ov::Model> model = m_config.m_core.read_model(m_config.m_path_to_model);
logBasicModelInfo(model);
ov::OutputVector inputInfo = model->inputs();
if (inputInfo.size() != 1) {
throw std::runtime_error("Action Detection network should have only one input");
}
m_input_name = model->input().get_any_name();
m_modelLayout = ov::layout::get_layout(model->input());
if (m_modelLayout.empty())
m_modelLayout = {"NCHW"};
m_network_input_size.height = model->input().get_shape()[ov::layout::height_idx(m_modelLayout)];
m_network_input_size.width = model->input().get_shape()[ov::layout::width_idx(m_modelLayout)];
m_new_model = false;
ov::OutputVector outputs = model->outputs();
auto cmp = [&](const ov::Output<ov::Node>& output) { return output.get_any_name() == m_config.new_loc_blob_name; };
auto it = std::find_if(outputs.begin(), outputs.end(), cmp);
if (it != outputs.end())
m_new_model = true;
ov::preprocess::PrePostProcessor ppp(model);
ppp.input().tensor()
.set_element_type(ov::element::u8)
.set_layout(desiredLayout);
ppp.input().preprocess()
.convert_layout(m_modelLayout)
.convert_element_type(ov::element::f32);
ppp.input().model().set_layout(m_modelLayout);
model = ppp.build();
slog::info << "PrePostProcessor configuration:" << slog::endl;
slog::info << ppp << slog::endl;
ov::set_batch(model, m_config.m_max_batch_size);
m_model = m_config.m_core.compile_model(model, m_config.m_deviceName);
logCompiledModelInfo(m_model, m_config.m_path_to_model, m_config.m_deviceName, m_config.m_model_type);
const auto& head_anchors = m_new_model ? m_config.new_anchors : m_config.old_anchors;
const int num_heads = head_anchors.size();
m_head_ranges.resize(num_heads + 1);
m_glob_anchor_map.resize(num_heads);
m_head_step_sizes.resize(num_heads);
m_head_ranges[0] = 0;
int head_shift = 0;
for (int head_id = 0; head_id < num_heads; ++head_id) {
m_glob_anchor_map[head_id].resize(head_anchors[head_id]);
int anchor_height, anchor_width;
for (int anchor_id = 0; anchor_id < head_anchors[head_id]; ++anchor_id) {
const auto glob_anchor_name = m_new_model ?
m_config.new_action_conf_blob_name_prefix + std::to_string(head_id + 1) +
m_config.new_action_conf_blob_name_suffix + std::to_string(anchor_id + 1) :
m_config.old_action_conf_blob_name_prefix + std::to_string(anchor_id + 1);
m_glob_anchor_names.push_back(glob_anchor_name);
const auto anchor_dims = m_model.output(glob_anchor_name).get_shape();
anchor_height = m_new_model ? anchor_dims[ov::layout::height_idx(m_modelLayout)] : anchor_dims[1];
anchor_width = m_new_model ? anchor_dims[ov::layout::width_idx(m_modelLayout)] : anchor_dims[2];
std::size_t num_action_classes = m_new_model ? anchor_dims[ov::layout::channels_idx(m_modelLayout)] : anchor_dims[3];
if (num_action_classes != m_config.num_action_classes) {
throw std::logic_error("The number of specified actions and the number of actions predicted by "
"the Person/Action Detection Retail model must match");
}
const int anchor_size = anchor_height * anchor_width;
head_shift += anchor_size;
m_head_step_sizes[head_id] = m_new_model ? anchor_size : 1;
m_glob_anchor_map[head_id][anchor_id] = m_num_glob_anchors++;
}
m_head_ranges[head_id + 1] = head_shift;
m_head_blob_sizes.emplace_back(anchor_width, anchor_height);
}
m_num_candidates = head_shift;
m_binary_task = m_config.num_action_classes == 2;
}
void ActionDetection::submitRequest() {
if (!m_enqueued_frames)
return;
m_enqueued_frames = 0;
BaseCnnDetection::submitRequest();
}
void ActionDetection::enqueue(const cv::Mat& frame) {
if (m_request == nullptr) {
m_request = std::make_shared<ov::InferRequest>(m_model.create_infer_request());
}
m_width = static_cast<float>(frame.cols);
m_height = static_cast<float>(frame.rows);
ov::Tensor input_tensor = m_request->get_tensor(m_input_name);
resize2tensor(frame, input_tensor);
m_enqueued_frames = 1;
}
DetectedActions ActionDetection::fetchResults() {
const auto loc_blob_name = m_new_model ? m_config.new_loc_blob_name : m_config.old_loc_blob_name;
const auto det_conf_blob_name = m_new_model ? m_config.new_det_conf_blob_name : m_config.old_det_conf_blob_name;
ov::Shape loc_out_shape = m_model.output(loc_blob_name).get_shape();
const cv::Mat loc_out(loc_out_shape[0],
loc_out_shape[1],
CV_32F,
m_request->get_tensor(loc_blob_name).data<float>());
ov::Shape conf_out_shape = m_model.output(det_conf_blob_name).get_shape();
const cv::Mat main_conf_out(conf_out_shape[0],
conf_out_shape[1],
CV_32F,
m_request->get_tensor(det_conf_blob_name).data<float>());
std::vector<cv::Mat> add_conf_out;
for (int glob_anchor_id = 0; glob_anchor_id < m_num_glob_anchors; ++glob_anchor_id) {
const auto& blob_name = m_glob_anchor_names[glob_anchor_id];
ov::Shape shape = m_request->get_tensor(blob_name).get_shape();
add_conf_out.emplace_back(shape[ov::layout::height_idx(m_modelLayout)],
shape[ov::layout::width_idx(m_modelLayout)],
CV_32F,
m_request->get_tensor(blob_name).data());
}
// Parse detections
DetectedActions result;
if (m_new_model) {
const cv::Mat priorbox_out;
result = GetDetections(loc_out, main_conf_out, priorbox_out, add_conf_out,
cv::Size(static_cast<int>(m_width), static_cast<int>(m_height)));
} else {
ov::Shape old_priorbox_shape = m_model.output(m_config.old_priorbox_blob_name).get_shape();
const cv::Mat priorbox_out((int)old_priorbox_shape[0], (int)old_priorbox_shape[1],
CV_32F,
m_request->get_tensor(m_config.old_priorbox_blob_name).data());
result = GetDetections(loc_out, main_conf_out, priorbox_out, add_conf_out,
cv::Size(static_cast<int>(m_width), static_cast<int>(m_height)));
}
return result;
}
inline ActionDetection::NormalizedBBox
ActionDetection::ParseBBoxRecord(const float* data, bool inverse) const {
NormalizedBBox bbox;
bbox.xmin = data[inverse ? 1 : 0];
bbox.ymin = data[inverse ? 0 : 1];
bbox.xmax = data[inverse ? 3 : 2];
bbox.ymax = data[inverse ? 2 : 3];
return bbox;
}
inline ActionDetection::NormalizedBBox
ActionDetection::GeneratePriorBox(int pos, int step, const cv::Size2f& anchor,
const cv::Size& blob_size) const {
const int row = pos / blob_size.width;
const int col = pos % blob_size.width;
const float center_x = (col + 0.5f) * static_cast<float>(step);
const float center_y = (row + 0.5f) * static_cast<float>(step);
NormalizedBBox bbox;
bbox.xmin = (center_x - 0.5f * anchor.width) / static_cast<float>(m_network_input_size.width);
bbox.ymin = (center_y - 0.5f * anchor.height) / static_cast<float>(m_network_input_size.height);
bbox.xmax = (center_x + 0.5f * anchor.width) / static_cast<float>(m_network_input_size.width);
bbox.ymax = (center_y + 0.5f * anchor.height) / static_cast<float>(m_network_input_size.height);
return bbox;
}
cv::Rect ActionDetection::ConvertToRect(
const NormalizedBBox& prior_bbox, const NormalizedBBox& variances,
const NormalizedBBox& encoded_bbox, const cv::Size& frame_size) const {
// Convert prior bbox to CV_Rect
const float prior_width = prior_bbox.xmax - prior_bbox.xmin;
const float prior_height = prior_bbox.ymax - prior_bbox.ymin;
const float prior_center_x = 0.5f * (prior_bbox.xmin + prior_bbox.xmax);
const float prior_center_y = 0.5f * (prior_bbox.ymin + prior_bbox.ymax);
// Decode bbox coordinates from the SSD format
const float decoded_bbox_center_x =
variances.xmin * encoded_bbox.xmin * prior_width + prior_center_x;
const float decoded_bbox_center_y =
variances.ymin * encoded_bbox.ymin * prior_height + prior_center_y;
const float decoded_bbox_width =
static_cast<float>(exp(static_cast<float>(variances.xmax * encoded_bbox.xmax))) * prior_width;
const float decoded_bbox_height =
static_cast<float>(exp(static_cast<float>(variances.ymax * encoded_bbox.ymax))) * prior_height;
// Create decoded bbox
const float decoded_bbox_xmin = decoded_bbox_center_x - 0.5f * decoded_bbox_width;
const float decoded_bbox_ymin = decoded_bbox_center_y - 0.5f * decoded_bbox_height;
const float decoded_bbox_xmax = decoded_bbox_center_x + 0.5f * decoded_bbox_width;
const float decoded_bbox_ymax = decoded_bbox_center_y + 0.5f * decoded_bbox_height;
// Convert decoded bbox to CV_Rect
return cv::Rect(static_cast<int>(decoded_bbox_xmin * frame_size.width),
static_cast<int>(decoded_bbox_ymin * frame_size.height),
static_cast<int>((decoded_bbox_xmax - decoded_bbox_xmin) * frame_size.width),
static_cast<int>((decoded_bbox_ymax - decoded_bbox_ymin) * frame_size.height));
}
DetectedActions ActionDetection::GetDetections(const cv::Mat& loc, const cv::Mat& main_conf,
const cv::Mat& priorboxes, const std::vector<cv::Mat>& add_conf,
const cv::Size& frame_size) const {
// Prepare input data buffers
const float* loc_data = reinterpret_cast<float*>(loc.data);
const float* det_conf_data = reinterpret_cast<float*>(main_conf.data);
const float* prior_data = m_new_model ? NULL : reinterpret_cast<float*>(priorboxes.data);
const int total_num_anchors = add_conf.size();
std::vector<float*> action_conf_data(total_num_anchors);
for (int i = 0; i < total_num_anchors; ++i) {
action_conf_data[i] = reinterpret_cast<float*>(add_conf[i].data);
}
// Variable to store all detection candidates
DetectedActions valid_detections;
// Iterate over all candidate bboxes
for (int p = 0; p < m_num_candidates; ++p) {
// Parse detection confidence from the SSD Detection output
const float detection_conf =
det_conf_data[p * NUM_DETECTION_CLASSES + POSITIVE_DETECTION_IDX];
// Skip low-confidence detections
if (detection_conf < m_config.detection_confidence_threshold) {
continue;
}
// Estimate the action head ID
int head_id = 0;
while (p >= m_head_ranges[head_id + 1]) {
++head_id;
}
const int head_p = p - m_head_ranges[head_id];
// Estimate the action anchor ID
const int head_num_anchors =
m_new_model ? m_config.new_anchors[head_id] : m_config.old_anchors[head_id];
const int anchor_id = head_p % head_num_anchors;
// Estimate the action label
const int glob_anchor_id = m_glob_anchor_map[head_id][anchor_id];
const float* anchor_conf_data = action_conf_data[glob_anchor_id];
const int action_conf_idx_shift = m_new_model ?
head_p / head_num_anchors :
head_p / head_num_anchors * m_config.num_action_classes;
const int action_conf_step = m_head_step_sizes[head_id];
const float scale = m_new_model ? m_config.new_action_scale : m_config.old_action_scale;
int action_label = -1;
float action_max_exp_value = 0.f;
float action_sum_exp_values = 0.f;
for (size_t c = 0; c < m_config.num_action_classes; ++c) {
float action_exp_value =
std::exp(scale * anchor_conf_data[action_conf_idx_shift + c * action_conf_step]);
action_sum_exp_values += action_exp_value;
if (action_exp_value > action_max_exp_value && ((c > 0 && m_binary_task) || !m_binary_task)) {
action_max_exp_value = action_exp_value;
action_label = c;
}
}
if (std::fabs(action_sum_exp_values) < std::numeric_limits<float>::epsilon()) {
throw std::logic_error("action_sum_exp_values can't be equal to 0");
}
// Estimate the action confidence
float action_conf = action_max_exp_value / action_sum_exp_values;
// Skip low-confidence actions
if (action_label < 0 || action_conf < m_config.action_confidence_threshold) {
action_label = m_config.default_action_id;
action_conf = 0.f;
}
// Parse bbox from the SSD Detection output
const auto priorbox = m_new_model ?
GeneratePriorBox(head_p / head_num_anchors,
m_config.new_det_heads[head_id].step,
m_config.new_det_heads[head_id].anchors[anchor_id],
m_head_blob_sizes[head_id]) :
ParseBBoxRecord(prior_data + p * SSD_PRIORBOX_RECORD_SIZE, false);
const auto variance =
ParseBBoxRecord(m_new_model ?
m_config.variances :
prior_data + (m_num_candidates + p) * SSD_PRIORBOX_RECORD_SIZE, false);
const auto encoded_bbox =
ParseBBoxRecord(loc_data + p * SSD_LOCATION_RECORD_SIZE, m_new_model);
const auto det_rect = ConvertToRect(priorbox, variance, encoded_bbox, frame_size);
// Store detected action
valid_detections.emplace_back(det_rect, action_label, detection_conf, action_conf);
}
// Merge most overlapped detections
std::vector<int> out_det_indices;
SoftNonMaxSuppression(valid_detections, m_config.nms_sigma, m_config.keep_top_k,
m_config.detection_confidence_threshold, &out_det_indices);
DetectedActions detections;
for (size_t i = 0; i < out_det_indices.size(); ++i) {
detections.emplace_back(valid_detections[out_det_indices[i]]);
}
return detections;
}
void ActionDetection::SoftNonMaxSuppression(const DetectedActions& detections,
const float sigma, size_t top_k, const float min_det_conf,
std::vector<int>* out_indices) const {
// Store input bbox scores
std::vector<float> scores(detections.size());
for (size_t i = 0; i < detections.size(); ++i) {
scores[i] = detections[i].detection_conf;
}
top_k = std::min(top_k, scores.size());
// Select top-k score indices
std::vector<size_t> score_idx(scores.size());
std::iota(score_idx.begin(), score_idx.end(), 0);
std::nth_element(score_idx.begin(), score_idx.begin() + top_k, score_idx.end(),
[&scores](size_t i1, size_t i2) {return scores[i1] > scores[i2];});
// Extract top-k score values
std::vector<float> top_scores(top_k);
for (size_t i = 0; i < top_scores.size(); ++i) {
top_scores[i] = scores[score_idx[i]];
}
// Carry out Soft Non-Maximum Suppression algorithm
out_indices->clear();
for (size_t step = 0; step < top_scores.size(); ++step) {
auto best_score_itr = std::max_element(top_scores.begin(), top_scores.end());
if (*best_score_itr < min_det_conf) {
break;
}
// Add current bbox to output list
const size_t local_anchor_idx = std::distance(top_scores.begin(), best_score_itr);
const int anchor_idx = score_idx[local_anchor_idx];
out_indices->emplace_back(anchor_idx);
*best_score_itr = 0.f;
// Update top_scores of the rest bboxes
for (size_t local_reference_idx = 0; local_reference_idx < top_scores.size(); ++local_reference_idx) {
// Skip updating step for the low-confidence bbox
if (top_scores[local_reference_idx] < min_det_conf) {
continue;
}
// Calculate the Intersection over Union metric between two bboxes
const size_t reference_idx = score_idx[local_reference_idx];
const auto& rect1 = detections[anchor_idx].rect;
const auto& rect2 = detections[reference_idx].rect;
const auto intersection = rect1 & rect2;
float overlap = 0.f;
if (intersection.width > 0 && intersection.height > 0) {
const int intersection_area = intersection.area();
overlap = static_cast<float>(intersection_area) / static_cast<float>(rect1.area() + rect2.area() - intersection_area);
}
// Scale bbox score using the exponential rule
top_scores[local_reference_idx] *= std::exp(-overlap * overlap / sigma);
}
}
}

View File

@@ -0,0 +1,72 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <algorithm>
#include <vector>
#include <limits>
#include <opencv2/imgproc.hpp>
#include "face_reid.hpp"
static const float h = 112.;
static const float w = 96.;
// reference landmarks points in the unit square [0,1]x[0,1]
static const float ref_landmarks_normalized[] = {
30.2946f / w, 51.6963f / h, 65.5318f / w, 51.5014f / h, 48.0252f / w,
71.7366f / h, 33.5493f / w, 92.3655f / h, 62.7299f / w, 92.2041f / h};
cv::Mat GetTransform(cv::Mat* src, cv::Mat* dst) {
cv::Mat col_mean_src;
reduce(*src, col_mean_src, 0, cv::REDUCE_AVG);
for (int i = 0; i < src->rows; i++) {
src->row(i) -= col_mean_src;
}
cv::Mat col_mean_dst;
reduce(*dst, col_mean_dst, 0, cv::REDUCE_AVG);
for (int i = 0; i < dst->rows; i++) {
dst->row(i) -= col_mean_dst;
}
cv::Scalar mean, dev_src, dev_dst;
cv::meanStdDev(*src, mean, dev_src);
dev_src(0) =
std::max(static_cast<double>(std::numeric_limits<float>::epsilon()), dev_src(0));
*src /= dev_src(0);
cv::meanStdDev(*dst, mean, dev_dst);
dev_dst(0) =
std::max(static_cast<double>(std::numeric_limits<float>::epsilon()), dev_dst(0));
*dst /= dev_dst(0);
cv::Mat w, u, vt;
cv::SVD::compute((*src).t() * (*dst), w, u, vt);
cv::Mat r = (u * vt).t();
cv::Mat m(2, 3, CV_32F);
m.colRange(0, 2) = r * (dev_dst(0) / dev_src(0));
m.col(2) = (col_mean_dst.t() - m.colRange(0, 2) * col_mean_src.t());
return m;
}
void AlignFaces(std::vector<cv::Mat>* face_images, std::vector<cv::Mat>* landmarks_vec) {
if (landmarks_vec->size() == 0) {
return;
}
CV_Assert(face_images->size() == landmarks_vec->size());
cv::Mat ref_landmarks = cv::Mat(5, 2, CV_32F);
for (size_t j = 0; j < face_images->size(); j++) {
for (int i = 0; i < ref_landmarks.rows; i++) {
ref_landmarks.at<float>(i, 0) =
ref_landmarks_normalized[2 * i] * face_images->at(j).cols;
ref_landmarks.at<float>(i, 1) =
ref_landmarks_normalized[2 * i + 1] * face_images->at(j).rows;
landmarks_vec->at(j).at<float>(i, 0) *= face_images->at(j).cols;
landmarks_vec->at(j).at<float>(i, 1) *= face_images->at(j).rows;
}
cv::Mat m = GetTransform(&ref_landmarks, &landmarks_vec->at(j));
cv::warpAffine(face_images->at(j), face_images->at(j), m,
face_images->at(j).size(), cv::WARP_INVERSE_MAP);
}
}

View File

@@ -0,0 +1,111 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <algorithm>
#include <map>
#include <string>
#include <vector>
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "openvino/openvino.hpp"
#include "cnn.hpp"
CnnDLSDKBase::CnnDLSDKBase(const Config& config) : m_config(config) {}
void CnnDLSDKBase::Load() {
std::shared_ptr<ov::Model> model = m_config.m_core.read_model(m_config.m_path_to_model);
ov::OutputVector inputs = model->inputs();
if (inputs.size() != 1) {
return;
}
m_desired_layout = {"NHWC"};
ov::Layout model_layout = ov::layout::get_layout(model->input());
if (model_layout.empty()) {
model_layout = {"NCHW"};
}
ov::OutputVector outputs = model->outputs();
for (auto& item : outputs) {
const std::string name = item.get_any_name();
m_output_tensors_names.push_back(name);
}
ov::preprocess::PrePostProcessor ppp(model);
ppp.input().tensor()
.set_element_type(ov::element::u8)
.set_layout(m_desired_layout);
ppp.input().preprocess()
.convert_layout(model_layout)
.convert_element_type(ov::element::f32);
ppp.input().model().set_layout(model_layout);
model = ppp.build();
m_modelShape = model->input().get_shape();
m_modelShape[ov::layout::batch_idx(m_desired_layout)] = m_config.m_max_batch_size;
ov::set_batch(model, {1, m_config.m_max_batch_size});
m_compiled_model = m_config.m_core.compile_model(model, m_config.m_deviceName, { ov::hint::num_requests(1) });
m_infer_request = m_compiled_model.create_infer_request();
m_in_tensor = m_infer_request.get_input_tensor();
m_in_tensor.set_shape(m_modelShape);
}
void CnnDLSDKBase::InferBatch(const std::vector<cv::Mat>& frames,const std::function<void(const std::map<std::string, ov::Tensor>&, size_t)>& fetch_results)
{
size_t num_imgs = frames.size();
size_t h = m_modelShape[ov::layout::height_idx(m_desired_layout)];
size_t w = m_modelShape[ov::layout::width_idx(m_desired_layout)];
size_t c = m_modelShape[ov::layout::channels_idx(m_desired_layout)];
for (size_t i = 0; i < num_imgs; i++) {
auto slice = ov::Tensor{ m_in_tensor, {i, 0, 0, 0}, {i + 1, h, w, c} };
resize2tensor(frames[i], slice);
}
m_infer_request.set_input_tensor(ov::Tensor{ m_in_tensor, { 0, 0, 0, 0 }, {num_imgs, h, w, c} });
m_infer_request.infer();
std::map<std::string, ov::Tensor> output_tensors;
for (const auto& output_tensor_name : m_output_tensors_names) {
output_tensors[output_tensor_name] = m_infer_request.get_tensor(output_tensor_name);
}
fetch_results(output_tensors, num_imgs);
}
VectorCNN::VectorCNN(const Config& config) : CnnDLSDKBase(config) {
Load();
}
void VectorCNN::Compute(const cv::Mat& frame, cv::Mat* vector, cv::Size outp_shape) {
std::vector<cv::Mat> output;
Compute({frame}, &output, outp_shape);
*vector = output[0];
}
void VectorCNN::Compute(const std::vector<cv::Mat>& images, std::vector<cv::Mat>* vectors,
cv::Size outp_shape) {
if (images.empty()) {
return;
}
vectors->clear();
auto results_fetcher = [vectors, outp_shape](const std::map<std::string, ov::Tensor>& outputs, size_t batch_size) {
for (auto& output : outputs) {
ov::Tensor tensor = output.second;
ov::Shape shape = tensor.get_shape();
std::vector<int> tensor_sizes(shape.size(), 0);
for (size_t i = 0; i < tensor_sizes.size(); ++i) {
tensor_sizes[i] = shape[i];
}
cv::Mat out_tensor(tensor_sizes, CV_32F, tensor.data<float>());
for (size_t b = 0; b < batch_size; b++) {
cv::Mat tensor_wrapper(out_tensor.size[1], 1, CV_32F,
reinterpret_cast<void*>((out_tensor.ptr<float>(0) + b * out_tensor.size[1])));
vectors->emplace_back();
if (!isSizeEmpty(outp_shape))
tensor_wrapper = tensor_wrapper.reshape(1, outp_shape.height);
tensor_wrapper.copyTo(vectors->back());
}
}
};
InferBatch(images, results_fetcher);
}
int VectorCNN::maxBatchSize() const {
return m_config.m_max_batch_size;
}

View File

@@ -0,0 +1,176 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <algorithm>
#include <string>
#include <map>
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "openvino/openvino.hpp"
#include "detector.hpp"
#define SSD_EMPTY_DETECTIONS_INDICATOR -1.0
using namespace detection;
namespace {
cv::Rect TruncateToValidRect(const cv::Rect& rect, const cv::Size& size) {
auto tl = rect.tl(), br = rect.br();
tl.x = std::max(0, std::min(size.width - 1, tl.x));
tl.y = std::max(0, std::min(size.height - 1, tl.y));
br.x = std::max(0, std::min(size.width, br.x));
br.y = std::max(0, std::min(size.height, br.y));
int w = std::max(0, br.x - tl.x);
int h = std::max(0, br.y - tl.y);
return cv::Rect(tl.x, tl.y, w, h);
}
cv::Rect IncreaseRect(const cv::Rect& r, float coeff_x, float coeff_y) {
cv::Point2f tl = r.tl();
cv::Point2f br = r.br();
cv::Point2f c = (tl * 0.5f) + (br * 0.5f);
cv::Point2f diff = c - tl;
cv::Point2f new_diff{diff.x * coeff_x, diff.y * coeff_y};
cv::Point2f new_tl = c - new_diff;
cv::Point2f new_br = c + new_diff;
cv::Point new_tl_int {static_cast<int>(std::floor(new_tl.x)), static_cast<int>(std::floor(new_tl.y))};
cv::Point new_br_int {static_cast<int>(std::ceil(new_br.x)), static_cast<int>(std::ceil(new_br.y))};
return cv::Rect(new_tl_int, new_br_int);
}
} // namespace
FaceDetection::FaceDetection(const DetectorConfig& config) :
BaseCnnDetection(config.is_async), m_config(config) {
m_detectorName = "Face Detection";
slog::info << "Reading model: " << m_config.m_path_to_model << slog::endl;
std::shared_ptr<ov::Model> model = m_config.m_core.read_model(m_config.m_path_to_model);
logBasicModelInfo(model);
ov::Layout desiredLayout = {"NHWC"};
ov::OutputVector inputs_info = model->inputs();
if (inputs_info.size() != 1) {
throw std::runtime_error("Face Detection network should have only one input");
}
ov::OutputVector outputs_info = model->outputs();
if (outputs_info.size() != 1) {
throw std::runtime_error("Face Detection network should have only one output");
}
ov::Output<ov::Node> input = model->input();
m_input_name = input.get_any_name();
ov::Layout modelLayout = ov::layout::get_layout(input);
if (modelLayout.empty())
modelLayout = {"NCHW"};
ov::Shape shape = input.get_shape();
shape[ov::layout::height_idx(modelLayout)] = m_config.input_h;
shape[ov::layout::width_idx(modelLayout)] = m_config.input_w;
ov::Output<ov::Node> output = model->output();
m_output_name = output.get_any_name();
ov::Shape outputDims = output.get_shape();
m_max_detections_count = outputDims[2];
m_object_size = outputDims[3];
if (m_object_size != 7) {
throw std::runtime_error("Face Detection network output layer should have 7 as a last dimension");
}
if (outputDims.size() != 4) {
throw std::runtime_error("Face Detection network output should have 4 dimensions, but had " +
std::to_string(outputDims.size()));
}
std::map<std::string, ov::PartialShape> input_shapes;
input_shapes[input.get_any_name()] = shape;
model->reshape(input_shapes);
ov::preprocess::PrePostProcessor ppp(model);
ppp.input().tensor()
.set_element_type(ov::element::u8)
.set_layout(desiredLayout);
ppp.input().preprocess()
.convert_layout(modelLayout)
.convert_element_type(ov::element::f32);
ppp.input().model().set_layout(modelLayout);
model = ppp.build();
slog::info << "PrePostProcessor configuration:" << slog::endl;
slog::info << ppp << slog::endl;
m_model = m_config.m_core.compile_model(model, m_config.m_deviceName);
logCompiledModelInfo(m_model, m_config.m_path_to_model, m_config.m_deviceName, m_detectorName);
}
void FaceDetection::submitRequest() {
if (!m_enqueued_frames)
return;
m_enqueued_frames = 0;
BaseCnnDetection::submitRequest();
}
void FaceDetection::enqueue(const cv::Mat& frame) {
if (m_request == nullptr) {
m_request = std::make_shared<ov::InferRequest>(m_model.create_infer_request());
}
m_width = static_cast<float>(frame.cols);
m_height = static_cast<float>(frame.rows);
ov::Tensor inputTensor = m_request->get_tensor(m_input_name);
resize2tensor(frame, inputTensor);
m_enqueued_frames = 1;
}
DetectedObjects FaceDetection::fetchResults() {
DetectedObjects results;
const float* data = m_request->get_tensor(m_output_name).data<float>();
for (int det_id = 0; det_id < m_max_detections_count; ++det_id) {
const int start_pos = det_id * m_object_size;
const float batchID = data[start_pos];
if (batchID == SSD_EMPTY_DETECTIONS_INDICATOR) {
break;
}
const float score = std::min(std::max(0.0f, data[start_pos + 2]), 1.0f);
const float x0 = std::min(std::max(0.0f, data[start_pos + 3]), 1.0f) * m_width;
const float y0 = std::min(std::max(0.0f, data[start_pos + 4]), 1.0f) * m_height;
const float x1 = std::min(std::max(0.0f, data[start_pos + 5]), 1.0f) * m_width;
const float y1 = std::min(std::max(0.0f, data[start_pos + 6]), 1.0f) * m_height;
DetectedObject object;
object.confidence = score;
object.rect = cv::Rect(cv::Point(static_cast<int>(round(static_cast<double>(x0))),
static_cast<int>(round(static_cast<double>(y0)))),
cv::Point(static_cast<int>(round(static_cast<double>(x1))),
static_cast<int>(round(static_cast<double>(y1)))));
object.rect = TruncateToValidRect(IncreaseRect(object.rect,
m_config.increase_scale_x,
m_config.increase_scale_y),
cv::Size(static_cast<int>(m_width), static_cast<int>(m_height)));
if (object.confidence > m_config.confidence_threshold && object.rect.area() > 0) {
results.emplace_back(object);
}
}
return results;
}

View File

@@ -0,0 +1,166 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <iomanip>
#include <string>
#include <map>
#include <set>
#include <vector>
#include <fstream>
#include "logger.hpp"
namespace {
const char unknown_label[] = "Unknown";
std::string GetUnknownOrLabel(const std::vector<std::string>& labels, int idx) {
return idx >= 0 ? labels.at(idx) : unknown_label;
}
std::string FrameIdxToString(const std::string& path, int frame_idx) {
std::stringstream ss;
ss << std::setw(6) << std::setfill('0') << frame_idx;
return path.substr(path.rfind("/") + 1) + "@" + ss.str();
}
} // anonymous namespace
DetectionsLogger::DetectionsLogger(slog::LogStream& stream, bool enabled,
const std::string& act_stat_log_file,
const std::string& act_det_log_file) :
m_log_stream(stream) {
m_write_logs = enabled;
m_act_stat_log_stream.open(act_stat_log_file, std::fstream::out);
if (!act_det_log_file.empty()) {
m_act_det_log_stream.open(act_det_log_file, cv::FileStorage::WRITE);
m_act_det_log_stream << "data" << "[";
}
}
void DetectionsLogger::CreateNextFrameRecord(
const std::string& path, const int frame_idx, const size_t width, const size_t height) {
if (m_write_logs)
m_log_stream << "Frame_name: " << path << "@" << frame_idx << " width: "
<< width << " height: " << height << slog::endl;
}
void DetectionsLogger::AddFaceToFrame(const cv::Rect& rect, const std::string& id, const std::string& action) {
if (m_write_logs) {
m_log_stream << "Object type: face. Box: " << rect << " id: " << id;
if (!action.empty()) {
m_log_stream << " action: " << action;
}
m_log_stream << slog::endl;
}
}
void DetectionsLogger::AddPersonToFrame(const cv::Rect& rect, const std::string& action, const std::string& id) {
if (m_write_logs) {
m_log_stream << "Object type: person. Box: " << rect << " action: " << action;
if (!id.empty()) {
m_log_stream << " id: " << id;
}
m_log_stream << slog::endl;
}
}
void DetectionsLogger::AddDetectionToFrame(const TrackedObject& object, const int frame_idx) {
if (m_act_det_log_stream.isOpened()) {
m_act_det_log_stream << "{" << "frame_id" << frame_idx
<< "det_conf" << object.confidence
<< "label" << object.label
<< "rect" << object.rect << "}";
}
}
void DetectionsLogger::FinalizeFrameRecord() {
if (m_write_logs) {
m_log_stream << slog::endl;
}
}
void DetectionsLogger::DumpDetections(const std::string& video_path,
const cv::Size frame_size,
const size_t num_frames,
const std::vector<Track>& face_tracks,
const std::map<int, int>& track_id_to_label_faces,
const std::vector<std::string>& action_idx_to_label,
const std::vector<std::string>& person_id_to_label,
const std::vector<std::map<int, int>>& frame_face_obj_id_to_action_maps) {
std::map<int, std::vector<const TrackedObject*>> frame_idx_to_face_track_objs;
for (const auto& tr : face_tracks) {
for (const auto& obj : tr.objects) {
frame_idx_to_face_track_objs[obj.frame_idx].emplace_back(&obj);
}
}
std::map<std::string, std::string> face_label_to_action;
for (const auto& label : person_id_to_label) {
face_label_to_action[label] = unknown_label;
}
m_act_stat_log_stream << "frame_idx";
for (const auto& label : person_id_to_label) {
m_act_stat_log_stream << "," << label;
}
m_act_stat_log_stream << std::endl;
for (size_t i = 0; i < num_frames; i++) {
CreateNextFrameRecord(video_path, i, frame_size.width, frame_size.height);
const auto& frame_face_obj_id_to_action = frame_face_obj_id_to_action_maps.at(i);
for (auto& kv : face_label_to_action) {
kv.second = unknown_label;
}
for (const auto& p_obj : frame_idx_to_face_track_objs[i]) {
const auto& obj = *p_obj;
std::string action_label = unknown_label;
if (frame_face_obj_id_to_action.count(obj.object_id) > 0) {
action_label = GetUnknownOrLabel(action_idx_to_label, frame_face_obj_id_to_action.at(obj.object_id));
}
std::string face_label = GetUnknownOrLabel(person_id_to_label, track_id_to_label_faces.at(obj.object_id));
face_label_to_action[face_label] = action_label;
AddFaceToFrame(obj.rect, face_label, action_label);
}
m_act_stat_log_stream << FrameIdxToString(video_path, i);
for (const auto& label : person_id_to_label) {
m_act_stat_log_stream << "," << face_label_to_action[label];
}
m_act_stat_log_stream << std::endl;
FinalizeFrameRecord();
}
}
void DetectionsLogger::DumpTracks(const std::map<int, RangeEventsTrack>& obj_id_to_events,
const std::vector<std::string>& action_idx_to_label,
const std::map<int, int>& track_id_to_label_faces,
const std::vector<std::string>& person_id_to_label) {
for (const auto& tup : obj_id_to_events) {
const int obj_id = tup.first;
if (track_id_to_label_faces.count(obj_id) > 0) {
const auto& events = tup.second;
std::string face_label = GetUnknownOrLabel(person_id_to_label, track_id_to_label_faces.at(obj_id));
m_log_stream << "Person: " << face_label << slog::endl;
for (const auto& event : events) {
std::string action_label = GetUnknownOrLabel(action_idx_to_label, event.action);
m_log_stream << " - " << action_label
<< ": from " << event.begin_frame_id
<< " to " << event.end_frame_id
<< " frames" << slog::endl;
}
}
}
}
DetectionsLogger::~DetectionsLogger() {
if (m_act_det_log_stream.isOpened()) {
m_act_det_log_stream << "]";
}
}

View File

@@ -0,0 +1,186 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <fstream>
#include <iostream>
#include <vector>
#include <string>
#include <limits>
#include <opencv2/opencv.hpp>
#include "face_reid.hpp"
#include "tracker/tracker.hpp"
namespace {
float ComputeReidDistance(const cv::Mat& descr1, const cv::Mat& descr2) {
float xy = static_cast<float>(descr1.dot(descr2));
float xx = static_cast<float>(descr1.dot(descr1));
float yy = static_cast<float>(descr2.dot(descr2));
float norm = sqrt(xx * yy) + 1e-6f;
return 1.0f - xy / norm;
}
bool file_exists(const std::string& name) {
std::ifstream f(name.c_str());
return f.good();
}
inline char separator() {
#ifdef _WIN32
return '\\';
#else
return '/';
#endif
}
std::string folder_name(const std::string& path) {
size_t found_pos;
found_pos = path.find_last_of(separator());
if (found_pos != std::string::npos)
return path.substr(0, found_pos);
return std::string(".") + separator();
}
} // namespace
const char EmbeddingsGallery::unknown_label[] = "Unknown";
const int EmbeddingsGallery::unknown_id = TrackedObject::UNKNOWN_LABEL_IDX;
EmbeddingsGallery::EmbeddingsGallery(const std::string& ids_list,
double threshold, int min_size_fr,
bool crop_gallery, const detection::DetectorConfig& detector_config,
VectorCNN& landmarks_det,
VectorCNN& image_reid,
bool use_greedy_matcher) :
reid_threshold(threshold), use_greedy_matcher(use_greedy_matcher) {
if (ids_list.empty()) {
return;
}
detection::FaceDetection detector(detector_config);
cv::FileStorage fs(ids_list, cv::FileStorage::Mode::READ);
cv::FileNode fn = fs.root();
int id = 0;
for (cv::FileNodeIterator fit = fn.begin(); fit != fn.end(); ++fit) {
cv::FileNode item = *fit;
std::string label = item.name();
std::vector<cv::Mat> embeddings;
// Please, note that the case when there are more than one image in gallery
// for a person might not work properly with the current implementation
// of the demo.
// Remove this assert by your own risk.
CV_Assert(item.size() == 1);
for (size_t i = 0; i < item.size(); i++) {
std::string path;
if (file_exists(item[i].string())) {
path = item[i].string();
} else {
path = folder_name(ids_list) + separator() + item[i].string();
}
cv::Mat image = cv::imread(path);
CV_Assert(!image.empty());
cv::Mat emb;
RegistrationStatus status = RegisterIdentity(label, image, min_size_fr, crop_gallery, detector, landmarks_det, image_reid, emb);
if (status == RegistrationStatus::SUCCESS) {
embeddings.push_back(emb);
idx_to_id.push_back(id);
identities.emplace_back(embeddings, label, id);
++id;
}
}
}
}
std::vector<int> EmbeddingsGallery::GetIDsByEmbeddings(const std::vector<cv::Mat>& embeddings) const {
if (embeddings.empty() || idx_to_id.empty())
return std::vector<int>(embeddings.size(), unknown_id);
cv::Mat distances(static_cast<int>(embeddings.size()), static_cast<int>(idx_to_id.size()), CV_32F);
for (int i = 0; i < distances.rows; i++) {
int k = 0;
for (size_t j = 0; j < identities.size(); j++) {
for (const auto& reference_emb : identities[j].embeddings) {
distances.at<float>(i, k) = ComputeReidDistance(embeddings[i], reference_emb);
k++;
}
}
}
KuhnMunkres matcher(use_greedy_matcher);
auto matched_idx = matcher.Solve(distances);
std::vector<int> output_ids;
for (auto col_idx : matched_idx) {
if (int(col_idx) == -1) {
output_ids.push_back(unknown_id);
continue;
}
if (distances.at<float>(output_ids.size(), col_idx) > reid_threshold)
output_ids.push_back(unknown_id);
else
output_ids.push_back(idx_to_id[col_idx]);
}
return output_ids;
}
std::string EmbeddingsGallery::GetLabelByID(int id) const {
if (id >= 0 && id < static_cast<int>(identities.size()))
return identities[id].label;
else
return unknown_label;
}
size_t EmbeddingsGallery::size() const {
return identities.size();
}
std::vector<std::string> EmbeddingsGallery::GetIDToLabelMap() const {
std::vector<std::string> map;
map.reserve(identities.size());
for (const auto& item : identities) {
map.emplace_back(item.label);
}
return map;
}
bool EmbeddingsGallery::LabelExists(const std::string& label) const {
return identities.end() != std::find_if(identities.begin(), identities.end(),
[label](const GalleryObject& o){return o.label == label;});
}
RegistrationStatus EmbeddingsGallery::RegisterIdentity(const std::string& identity_label,
const cv::Mat& image,
int min_size_fr, bool crop_gallery,
detection::FaceDetection& detector,
VectorCNN& landmarks_det,
VectorCNN& image_reid,
cv::Mat& embedding) {
cv::Mat target = image;
if (crop_gallery) {
detector.enqueue(image);
detector.submitRequest();
detector.wait();
detection::DetectedObjects faces = detector.fetchResults();
if (faces.size() == 0) {
return RegistrationStatus::FAILURE_NOT_DETECTED;
}
CV_Assert(faces.size() == 1);
cv::Mat face_roi = image(faces[0].rect);
target = face_roi;
}
if ((target.rows < min_size_fr) && (target.cols < min_size_fr)) {
return RegistrationStatus::FAILURE_LOW_QUALITY;
}
cv::Mat landmarks;
landmarks_det.Compute(target, &landmarks, cv::Size(2, 5));
std::vector<cv::Mat> images = { target };
std::vector<cv::Mat> landmarks_vec = { landmarks };
AlignFaces(&images, &landmarks_vec);
image_reid.Compute(images[0], &embedding);
return RegistrationStatus::SUCCESS;
}