Initial setup for CLion
This commit is contained in:
418
engines/OpenVINOEngine/src/faceapp/action_detector.cpp
Normal file
418
engines/OpenVINOEngine/src/faceapp/action_detector.cpp
Normal file
@@ -0,0 +1,418 @@
|
||||
// Copyright (C) 2018-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
|
||||
#include "openvino/openvino.hpp"
|
||||
|
||||
#include "action_detector.hpp"
|
||||
|
||||
#define SSD_LOCATION_RECORD_SIZE 4
|
||||
#define SSD_PRIORBOX_RECORD_SIZE 4
|
||||
#define NUM_DETECTION_CLASSES 2
|
||||
#define POSITIVE_DETECTION_IDX 1
|
||||
|
||||
template <typename T>
|
||||
bool SortScorePairDescend(const std::pair<float, T>& pair1,
|
||||
const std::pair<float, T>& pair2) {
|
||||
return pair1.first > pair2.first;
|
||||
}
|
||||
|
||||
ActionDetection::ActionDetection(const ActionDetectorConfig& config) :
|
||||
BaseCnnDetection(config.is_async), m_config(config) {
|
||||
m_detectorName = "action detector";
|
||||
|
||||
ov::Layout desiredLayout = {"NHWC"};
|
||||
|
||||
slog::info << "Reading model: " << m_config.m_path_to_model << slog::endl;
|
||||
std::shared_ptr<ov::Model> model = m_config.m_core.read_model(m_config.m_path_to_model);
|
||||
logBasicModelInfo(model);
|
||||
|
||||
ov::OutputVector inputInfo = model->inputs();
|
||||
if (inputInfo.size() != 1) {
|
||||
throw std::runtime_error("Action Detection network should have only one input");
|
||||
}
|
||||
|
||||
m_input_name = model->input().get_any_name();
|
||||
|
||||
m_modelLayout = ov::layout::get_layout(model->input());
|
||||
if (m_modelLayout.empty())
|
||||
m_modelLayout = {"NCHW"};
|
||||
|
||||
m_network_input_size.height = model->input().get_shape()[ov::layout::height_idx(m_modelLayout)];
|
||||
m_network_input_size.width = model->input().get_shape()[ov::layout::width_idx(m_modelLayout)];
|
||||
|
||||
m_new_model = false;
|
||||
|
||||
ov::OutputVector outputs = model->outputs();
|
||||
auto cmp = [&](const ov::Output<ov::Node>& output) { return output.get_any_name() == m_config.new_loc_blob_name; };
|
||||
auto it = std::find_if(outputs.begin(), outputs.end(), cmp);
|
||||
if (it != outputs.end())
|
||||
m_new_model = true;
|
||||
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
|
||||
ppp.input().tensor()
|
||||
.set_element_type(ov::element::u8)
|
||||
.set_layout(desiredLayout);
|
||||
ppp.input().preprocess()
|
||||
.convert_layout(m_modelLayout)
|
||||
.convert_element_type(ov::element::f32);
|
||||
ppp.input().model().set_layout(m_modelLayout);
|
||||
|
||||
model = ppp.build();
|
||||
|
||||
slog::info << "PrePostProcessor configuration:" << slog::endl;
|
||||
slog::info << ppp << slog::endl;
|
||||
|
||||
ov::set_batch(model, m_config.m_max_batch_size);
|
||||
|
||||
m_model = m_config.m_core.compile_model(model, m_config.m_deviceName);
|
||||
logCompiledModelInfo(m_model, m_config.m_path_to_model, m_config.m_deviceName, m_config.m_model_type);
|
||||
|
||||
const auto& head_anchors = m_new_model ? m_config.new_anchors : m_config.old_anchors;
|
||||
const int num_heads = head_anchors.size();
|
||||
|
||||
m_head_ranges.resize(num_heads + 1);
|
||||
m_glob_anchor_map.resize(num_heads);
|
||||
m_head_step_sizes.resize(num_heads);
|
||||
|
||||
m_head_ranges[0] = 0;
|
||||
int head_shift = 0;
|
||||
|
||||
for (int head_id = 0; head_id < num_heads; ++head_id) {
|
||||
m_glob_anchor_map[head_id].resize(head_anchors[head_id]);
|
||||
int anchor_height, anchor_width;
|
||||
for (int anchor_id = 0; anchor_id < head_anchors[head_id]; ++anchor_id) {
|
||||
const auto glob_anchor_name = m_new_model ?
|
||||
m_config.new_action_conf_blob_name_prefix + std::to_string(head_id + 1) +
|
||||
m_config.new_action_conf_blob_name_suffix + std::to_string(anchor_id + 1) :
|
||||
m_config.old_action_conf_blob_name_prefix + std::to_string(anchor_id + 1);
|
||||
m_glob_anchor_names.push_back(glob_anchor_name);
|
||||
|
||||
const auto anchor_dims = m_model.output(glob_anchor_name).get_shape();
|
||||
|
||||
anchor_height = m_new_model ? anchor_dims[ov::layout::height_idx(m_modelLayout)] : anchor_dims[1];
|
||||
anchor_width = m_new_model ? anchor_dims[ov::layout::width_idx(m_modelLayout)] : anchor_dims[2];
|
||||
std::size_t num_action_classes = m_new_model ? anchor_dims[ov::layout::channels_idx(m_modelLayout)] : anchor_dims[3];
|
||||
if (num_action_classes != m_config.num_action_classes) {
|
||||
throw std::logic_error("The number of specified actions and the number of actions predicted by "
|
||||
"the Person/Action Detection Retail model must match");
|
||||
}
|
||||
|
||||
const int anchor_size = anchor_height * anchor_width;
|
||||
head_shift += anchor_size;
|
||||
|
||||
m_head_step_sizes[head_id] = m_new_model ? anchor_size : 1;
|
||||
m_glob_anchor_map[head_id][anchor_id] = m_num_glob_anchors++;
|
||||
}
|
||||
|
||||
m_head_ranges[head_id + 1] = head_shift;
|
||||
m_head_blob_sizes.emplace_back(anchor_width, anchor_height);
|
||||
}
|
||||
|
||||
m_num_candidates = head_shift;
|
||||
|
||||
m_binary_task = m_config.num_action_classes == 2;
|
||||
}
|
||||
|
||||
void ActionDetection::submitRequest() {
|
||||
if (!m_enqueued_frames)
|
||||
return;
|
||||
m_enqueued_frames = 0;
|
||||
|
||||
BaseCnnDetection::submitRequest();
|
||||
}
|
||||
|
||||
void ActionDetection::enqueue(const cv::Mat& frame) {
|
||||
if (m_request == nullptr) {
|
||||
m_request = std::make_shared<ov::InferRequest>(m_model.create_infer_request());
|
||||
}
|
||||
|
||||
m_width = static_cast<float>(frame.cols);
|
||||
m_height = static_cast<float>(frame.rows);
|
||||
|
||||
ov::Tensor input_tensor = m_request->get_tensor(m_input_name);
|
||||
|
||||
resize2tensor(frame, input_tensor);
|
||||
|
||||
m_enqueued_frames = 1;
|
||||
}
|
||||
|
||||
DetectedActions ActionDetection::fetchResults() {
|
||||
const auto loc_blob_name = m_new_model ? m_config.new_loc_blob_name : m_config.old_loc_blob_name;
|
||||
const auto det_conf_blob_name = m_new_model ? m_config.new_det_conf_blob_name : m_config.old_det_conf_blob_name;
|
||||
|
||||
ov::Shape loc_out_shape = m_model.output(loc_blob_name).get_shape();
|
||||
const cv::Mat loc_out(loc_out_shape[0],
|
||||
loc_out_shape[1],
|
||||
CV_32F,
|
||||
m_request->get_tensor(loc_blob_name).data<float>());
|
||||
|
||||
ov::Shape conf_out_shape = m_model.output(det_conf_blob_name).get_shape();
|
||||
const cv::Mat main_conf_out(conf_out_shape[0],
|
||||
conf_out_shape[1],
|
||||
CV_32F,
|
||||
m_request->get_tensor(det_conf_blob_name).data<float>());
|
||||
|
||||
std::vector<cv::Mat> add_conf_out;
|
||||
for (int glob_anchor_id = 0; glob_anchor_id < m_num_glob_anchors; ++glob_anchor_id) {
|
||||
const auto& blob_name = m_glob_anchor_names[glob_anchor_id];
|
||||
ov::Shape shape = m_request->get_tensor(blob_name).get_shape();
|
||||
add_conf_out.emplace_back(shape[ov::layout::height_idx(m_modelLayout)],
|
||||
shape[ov::layout::width_idx(m_modelLayout)],
|
||||
CV_32F,
|
||||
m_request->get_tensor(blob_name).data());
|
||||
}
|
||||
|
||||
// Parse detections
|
||||
DetectedActions result;
|
||||
if (m_new_model) {
|
||||
const cv::Mat priorbox_out;
|
||||
result = GetDetections(loc_out, main_conf_out, priorbox_out, add_conf_out,
|
||||
cv::Size(static_cast<int>(m_width), static_cast<int>(m_height)));
|
||||
} else {
|
||||
ov::Shape old_priorbox_shape = m_model.output(m_config.old_priorbox_blob_name).get_shape();
|
||||
const cv::Mat priorbox_out((int)old_priorbox_shape[0], (int)old_priorbox_shape[1],
|
||||
CV_32F,
|
||||
m_request->get_tensor(m_config.old_priorbox_blob_name).data());
|
||||
|
||||
result = GetDetections(loc_out, main_conf_out, priorbox_out, add_conf_out,
|
||||
cv::Size(static_cast<int>(m_width), static_cast<int>(m_height)));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline ActionDetection::NormalizedBBox
|
||||
ActionDetection::ParseBBoxRecord(const float* data, bool inverse) const {
|
||||
NormalizedBBox bbox;
|
||||
bbox.xmin = data[inverse ? 1 : 0];
|
||||
bbox.ymin = data[inverse ? 0 : 1];
|
||||
bbox.xmax = data[inverse ? 3 : 2];
|
||||
bbox.ymax = data[inverse ? 2 : 3];
|
||||
return bbox;
|
||||
}
|
||||
|
||||
inline ActionDetection::NormalizedBBox
|
||||
ActionDetection::GeneratePriorBox(int pos, int step, const cv::Size2f& anchor,
|
||||
const cv::Size& blob_size) const {
|
||||
const int row = pos / blob_size.width;
|
||||
const int col = pos % blob_size.width;
|
||||
|
||||
const float center_x = (col + 0.5f) * static_cast<float>(step);
|
||||
const float center_y = (row + 0.5f) * static_cast<float>(step);
|
||||
|
||||
NormalizedBBox bbox;
|
||||
bbox.xmin = (center_x - 0.5f * anchor.width) / static_cast<float>(m_network_input_size.width);
|
||||
bbox.ymin = (center_y - 0.5f * anchor.height) / static_cast<float>(m_network_input_size.height);
|
||||
bbox.xmax = (center_x + 0.5f * anchor.width) / static_cast<float>(m_network_input_size.width);
|
||||
bbox.ymax = (center_y + 0.5f * anchor.height) / static_cast<float>(m_network_input_size.height);
|
||||
|
||||
return bbox;
|
||||
}
|
||||
|
||||
cv::Rect ActionDetection::ConvertToRect(
|
||||
const NormalizedBBox& prior_bbox, const NormalizedBBox& variances,
|
||||
const NormalizedBBox& encoded_bbox, const cv::Size& frame_size) const {
|
||||
// Convert prior bbox to CV_Rect
|
||||
const float prior_width = prior_bbox.xmax - prior_bbox.xmin;
|
||||
const float prior_height = prior_bbox.ymax - prior_bbox.ymin;
|
||||
const float prior_center_x = 0.5f * (prior_bbox.xmin + prior_bbox.xmax);
|
||||
const float prior_center_y = 0.5f * (prior_bbox.ymin + prior_bbox.ymax);
|
||||
|
||||
// Decode bbox coordinates from the SSD format
|
||||
const float decoded_bbox_center_x =
|
||||
variances.xmin * encoded_bbox.xmin * prior_width + prior_center_x;
|
||||
const float decoded_bbox_center_y =
|
||||
variances.ymin * encoded_bbox.ymin * prior_height + prior_center_y;
|
||||
const float decoded_bbox_width =
|
||||
static_cast<float>(exp(static_cast<float>(variances.xmax * encoded_bbox.xmax))) * prior_width;
|
||||
const float decoded_bbox_height =
|
||||
static_cast<float>(exp(static_cast<float>(variances.ymax * encoded_bbox.ymax))) * prior_height;
|
||||
|
||||
// Create decoded bbox
|
||||
const float decoded_bbox_xmin = decoded_bbox_center_x - 0.5f * decoded_bbox_width;
|
||||
const float decoded_bbox_ymin = decoded_bbox_center_y - 0.5f * decoded_bbox_height;
|
||||
const float decoded_bbox_xmax = decoded_bbox_center_x + 0.5f * decoded_bbox_width;
|
||||
const float decoded_bbox_ymax = decoded_bbox_center_y + 0.5f * decoded_bbox_height;
|
||||
|
||||
// Convert decoded bbox to CV_Rect
|
||||
return cv::Rect(static_cast<int>(decoded_bbox_xmin * frame_size.width),
|
||||
static_cast<int>(decoded_bbox_ymin * frame_size.height),
|
||||
static_cast<int>((decoded_bbox_xmax - decoded_bbox_xmin) * frame_size.width),
|
||||
static_cast<int>((decoded_bbox_ymax - decoded_bbox_ymin) * frame_size.height));
|
||||
}
|
||||
|
||||
DetectedActions ActionDetection::GetDetections(const cv::Mat& loc, const cv::Mat& main_conf,
|
||||
const cv::Mat& priorboxes, const std::vector<cv::Mat>& add_conf,
|
||||
const cv::Size& frame_size) const {
|
||||
// Prepare input data buffers
|
||||
const float* loc_data = reinterpret_cast<float*>(loc.data);
|
||||
const float* det_conf_data = reinterpret_cast<float*>(main_conf.data);
|
||||
const float* prior_data = m_new_model ? NULL : reinterpret_cast<float*>(priorboxes.data);
|
||||
|
||||
const int total_num_anchors = add_conf.size();
|
||||
std::vector<float*> action_conf_data(total_num_anchors);
|
||||
for (int i = 0; i < total_num_anchors; ++i) {
|
||||
action_conf_data[i] = reinterpret_cast<float*>(add_conf[i].data);
|
||||
}
|
||||
|
||||
// Variable to store all detection candidates
|
||||
DetectedActions valid_detections;
|
||||
|
||||
// Iterate over all candidate bboxes
|
||||
for (int p = 0; p < m_num_candidates; ++p) {
|
||||
// Parse detection confidence from the SSD Detection output
|
||||
const float detection_conf =
|
||||
det_conf_data[p * NUM_DETECTION_CLASSES + POSITIVE_DETECTION_IDX];
|
||||
|
||||
// Skip low-confidence detections
|
||||
if (detection_conf < m_config.detection_confidence_threshold) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Estimate the action head ID
|
||||
int head_id = 0;
|
||||
while (p >= m_head_ranges[head_id + 1]) {
|
||||
++head_id;
|
||||
}
|
||||
const int head_p = p - m_head_ranges[head_id];
|
||||
|
||||
// Estimate the action anchor ID
|
||||
const int head_num_anchors =
|
||||
m_new_model ? m_config.new_anchors[head_id] : m_config.old_anchors[head_id];
|
||||
const int anchor_id = head_p % head_num_anchors;
|
||||
|
||||
// Estimate the action label
|
||||
const int glob_anchor_id = m_glob_anchor_map[head_id][anchor_id];
|
||||
const float* anchor_conf_data = action_conf_data[glob_anchor_id];
|
||||
const int action_conf_idx_shift = m_new_model ?
|
||||
head_p / head_num_anchors :
|
||||
head_p / head_num_anchors * m_config.num_action_classes;
|
||||
const int action_conf_step = m_head_step_sizes[head_id];
|
||||
const float scale = m_new_model ? m_config.new_action_scale : m_config.old_action_scale;
|
||||
int action_label = -1;
|
||||
float action_max_exp_value = 0.f;
|
||||
float action_sum_exp_values = 0.f;
|
||||
for (size_t c = 0; c < m_config.num_action_classes; ++c) {
|
||||
float action_exp_value =
|
||||
std::exp(scale * anchor_conf_data[action_conf_idx_shift + c * action_conf_step]);
|
||||
action_sum_exp_values += action_exp_value;
|
||||
if (action_exp_value > action_max_exp_value && ((c > 0 && m_binary_task) || !m_binary_task)) {
|
||||
action_max_exp_value = action_exp_value;
|
||||
action_label = c;
|
||||
}
|
||||
}
|
||||
|
||||
if (std::fabs(action_sum_exp_values) < std::numeric_limits<float>::epsilon()) {
|
||||
throw std::logic_error("action_sum_exp_values can't be equal to 0");
|
||||
}
|
||||
// Estimate the action confidence
|
||||
float action_conf = action_max_exp_value / action_sum_exp_values;
|
||||
|
||||
// Skip low-confidence actions
|
||||
if (action_label < 0 || action_conf < m_config.action_confidence_threshold) {
|
||||
action_label = m_config.default_action_id;
|
||||
action_conf = 0.f;
|
||||
}
|
||||
|
||||
// Parse bbox from the SSD Detection output
|
||||
const auto priorbox = m_new_model ?
|
||||
GeneratePriorBox(head_p / head_num_anchors,
|
||||
m_config.new_det_heads[head_id].step,
|
||||
m_config.new_det_heads[head_id].anchors[anchor_id],
|
||||
m_head_blob_sizes[head_id]) :
|
||||
ParseBBoxRecord(prior_data + p * SSD_PRIORBOX_RECORD_SIZE, false);
|
||||
const auto variance =
|
||||
ParseBBoxRecord(m_new_model ?
|
||||
m_config.variances :
|
||||
prior_data + (m_num_candidates + p) * SSD_PRIORBOX_RECORD_SIZE, false);
|
||||
const auto encoded_bbox =
|
||||
ParseBBoxRecord(loc_data + p * SSD_LOCATION_RECORD_SIZE, m_new_model);
|
||||
|
||||
const auto det_rect = ConvertToRect(priorbox, variance, encoded_bbox, frame_size);
|
||||
|
||||
// Store detected action
|
||||
valid_detections.emplace_back(det_rect, action_label, detection_conf, action_conf);
|
||||
}
|
||||
|
||||
// Merge most overlapped detections
|
||||
std::vector<int> out_det_indices;
|
||||
SoftNonMaxSuppression(valid_detections, m_config.nms_sigma, m_config.keep_top_k,
|
||||
m_config.detection_confidence_threshold, &out_det_indices);
|
||||
|
||||
DetectedActions detections;
|
||||
for (size_t i = 0; i < out_det_indices.size(); ++i) {
|
||||
detections.emplace_back(valid_detections[out_det_indices[i]]);
|
||||
}
|
||||
return detections;
|
||||
}
|
||||
|
||||
void ActionDetection::SoftNonMaxSuppression(const DetectedActions& detections,
|
||||
const float sigma, size_t top_k, const float min_det_conf,
|
||||
std::vector<int>* out_indices) const {
|
||||
// Store input bbox scores
|
||||
std::vector<float> scores(detections.size());
|
||||
for (size_t i = 0; i < detections.size(); ++i) {
|
||||
scores[i] = detections[i].detection_conf;
|
||||
}
|
||||
|
||||
top_k = std::min(top_k, scores.size());
|
||||
|
||||
// Select top-k score indices
|
||||
std::vector<size_t> score_idx(scores.size());
|
||||
std::iota(score_idx.begin(), score_idx.end(), 0);
|
||||
std::nth_element(score_idx.begin(), score_idx.begin() + top_k, score_idx.end(),
|
||||
[&scores](size_t i1, size_t i2) {return scores[i1] > scores[i2];});
|
||||
|
||||
// Extract top-k score values
|
||||
std::vector<float> top_scores(top_k);
|
||||
for (size_t i = 0; i < top_scores.size(); ++i) {
|
||||
top_scores[i] = scores[score_idx[i]];
|
||||
}
|
||||
|
||||
// Carry out Soft Non-Maximum Suppression algorithm
|
||||
out_indices->clear();
|
||||
for (size_t step = 0; step < top_scores.size(); ++step) {
|
||||
auto best_score_itr = std::max_element(top_scores.begin(), top_scores.end());
|
||||
if (*best_score_itr < min_det_conf) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Add current bbox to output list
|
||||
const size_t local_anchor_idx = std::distance(top_scores.begin(), best_score_itr);
|
||||
const int anchor_idx = score_idx[local_anchor_idx];
|
||||
out_indices->emplace_back(anchor_idx);
|
||||
*best_score_itr = 0.f;
|
||||
|
||||
// Update top_scores of the rest bboxes
|
||||
for (size_t local_reference_idx = 0; local_reference_idx < top_scores.size(); ++local_reference_idx) {
|
||||
// Skip updating step for the low-confidence bbox
|
||||
if (top_scores[local_reference_idx] < min_det_conf) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Calculate the Intersection over Union metric between two bboxes
|
||||
const size_t reference_idx = score_idx[local_reference_idx];
|
||||
const auto& rect1 = detections[anchor_idx].rect;
|
||||
const auto& rect2 = detections[reference_idx].rect;
|
||||
const auto intersection = rect1 & rect2;
|
||||
float overlap = 0.f;
|
||||
if (intersection.width > 0 && intersection.height > 0) {
|
||||
const int intersection_area = intersection.area();
|
||||
overlap = static_cast<float>(intersection_area) / static_cast<float>(rect1.area() + rect2.area() - intersection_area);
|
||||
}
|
||||
|
||||
// Scale bbox score using the exponential rule
|
||||
top_scores[local_reference_idx] *= std::exp(-overlap * overlap / sigma);
|
||||
}
|
||||
}
|
||||
}
|
||||
72
engines/OpenVINOEngine/src/faceapp/align_transform.cpp
Normal file
72
engines/OpenVINOEngine/src/faceapp/align_transform.cpp
Normal file
@@ -0,0 +1,72 @@
|
||||
// Copyright (C) 2018-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
#include "face_reid.hpp"
|
||||
|
||||
static const float h = 112.;
|
||||
static const float w = 96.;
|
||||
// reference landmarks points in the unit square [0,1]x[0,1]
|
||||
static const float ref_landmarks_normalized[] = {
|
||||
30.2946f / w, 51.6963f / h, 65.5318f / w, 51.5014f / h, 48.0252f / w,
|
||||
71.7366f / h, 33.5493f / w, 92.3655f / h, 62.7299f / w, 92.2041f / h};
|
||||
|
||||
cv::Mat GetTransform(cv::Mat* src, cv::Mat* dst) {
|
||||
cv::Mat col_mean_src;
|
||||
reduce(*src, col_mean_src, 0, cv::REDUCE_AVG);
|
||||
for (int i = 0; i < src->rows; i++) {
|
||||
src->row(i) -= col_mean_src;
|
||||
}
|
||||
|
||||
cv::Mat col_mean_dst;
|
||||
reduce(*dst, col_mean_dst, 0, cv::REDUCE_AVG);
|
||||
for (int i = 0; i < dst->rows; i++) {
|
||||
dst->row(i) -= col_mean_dst;
|
||||
}
|
||||
|
||||
cv::Scalar mean, dev_src, dev_dst;
|
||||
cv::meanStdDev(*src, mean, dev_src);
|
||||
dev_src(0) =
|
||||
std::max(static_cast<double>(std::numeric_limits<float>::epsilon()), dev_src(0));
|
||||
*src /= dev_src(0);
|
||||
cv::meanStdDev(*dst, mean, dev_dst);
|
||||
dev_dst(0) =
|
||||
std::max(static_cast<double>(std::numeric_limits<float>::epsilon()), dev_dst(0));
|
||||
*dst /= dev_dst(0);
|
||||
|
||||
cv::Mat w, u, vt;
|
||||
cv::SVD::compute((*src).t() * (*dst), w, u, vt);
|
||||
cv::Mat r = (u * vt).t();
|
||||
cv::Mat m(2, 3, CV_32F);
|
||||
m.colRange(0, 2) = r * (dev_dst(0) / dev_src(0));
|
||||
m.col(2) = (col_mean_dst.t() - m.colRange(0, 2) * col_mean_src.t());
|
||||
return m;
|
||||
}
|
||||
|
||||
void AlignFaces(std::vector<cv::Mat>* face_images, std::vector<cv::Mat>* landmarks_vec) {
|
||||
if (landmarks_vec->size() == 0) {
|
||||
return;
|
||||
}
|
||||
CV_Assert(face_images->size() == landmarks_vec->size());
|
||||
cv::Mat ref_landmarks = cv::Mat(5, 2, CV_32F);
|
||||
|
||||
for (size_t j = 0; j < face_images->size(); j++) {
|
||||
for (int i = 0; i < ref_landmarks.rows; i++) {
|
||||
ref_landmarks.at<float>(i, 0) =
|
||||
ref_landmarks_normalized[2 * i] * face_images->at(j).cols;
|
||||
ref_landmarks.at<float>(i, 1) =
|
||||
ref_landmarks_normalized[2 * i + 1] * face_images->at(j).rows;
|
||||
landmarks_vec->at(j).at<float>(i, 0) *= face_images->at(j).cols;
|
||||
landmarks_vec->at(j).at<float>(i, 1) *= face_images->at(j).rows;
|
||||
}
|
||||
cv::Mat m = GetTransform(&ref_landmarks, &landmarks_vec->at(j));
|
||||
cv::warpAffine(face_images->at(j), face_images->at(j), m,
|
||||
face_images->at(j).size(), cv::WARP_INVERSE_MAP);
|
||||
}
|
||||
}
|
||||
111
engines/OpenVINOEngine/src/faceapp/cnn.cpp
Normal file
111
engines/OpenVINOEngine/src/faceapp/cnn.cpp
Normal file
@@ -0,0 +1,111 @@
|
||||
// Copyright (C) 2018-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#include "openvino/openvino.hpp"
|
||||
#include "cnn.hpp"
|
||||
|
||||
CnnDLSDKBase::CnnDLSDKBase(const Config& config) : m_config(config) {}
|
||||
|
||||
void CnnDLSDKBase::Load() {
|
||||
std::shared_ptr<ov::Model> model = m_config.m_core.read_model(m_config.m_path_to_model);
|
||||
ov::OutputVector inputs = model->inputs();
|
||||
if (inputs.size() != 1) {
|
||||
return;
|
||||
}
|
||||
m_desired_layout = {"NHWC"};
|
||||
ov::Layout model_layout = ov::layout::get_layout(model->input());
|
||||
if (model_layout.empty()) {
|
||||
model_layout = {"NCHW"};
|
||||
}
|
||||
ov::OutputVector outputs = model->outputs();
|
||||
for (auto& item : outputs) {
|
||||
const std::string name = item.get_any_name();
|
||||
m_output_tensors_names.push_back(name);
|
||||
}
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
ppp.input().tensor()
|
||||
.set_element_type(ov::element::u8)
|
||||
.set_layout(m_desired_layout);
|
||||
ppp.input().preprocess()
|
||||
.convert_layout(model_layout)
|
||||
.convert_element_type(ov::element::f32);
|
||||
ppp.input().model().set_layout(model_layout);
|
||||
|
||||
model = ppp.build();
|
||||
m_modelShape = model->input().get_shape();
|
||||
m_modelShape[ov::layout::batch_idx(m_desired_layout)] = m_config.m_max_batch_size;
|
||||
ov::set_batch(model, {1, m_config.m_max_batch_size});
|
||||
m_compiled_model = m_config.m_core.compile_model(model, m_config.m_deviceName, { ov::hint::num_requests(1) });
|
||||
m_infer_request = m_compiled_model.create_infer_request();
|
||||
m_in_tensor = m_infer_request.get_input_tensor();
|
||||
m_in_tensor.set_shape(m_modelShape);
|
||||
}
|
||||
|
||||
void CnnDLSDKBase::InferBatch(const std::vector<cv::Mat>& frames,const std::function<void(const std::map<std::string, ov::Tensor>&, size_t)>& fetch_results)
|
||||
{
|
||||
size_t num_imgs = frames.size();
|
||||
size_t h = m_modelShape[ov::layout::height_idx(m_desired_layout)];
|
||||
size_t w = m_modelShape[ov::layout::width_idx(m_desired_layout)];
|
||||
size_t c = m_modelShape[ov::layout::channels_idx(m_desired_layout)];
|
||||
for (size_t i = 0; i < num_imgs; i++) {
|
||||
auto slice = ov::Tensor{ m_in_tensor, {i, 0, 0, 0}, {i + 1, h, w, c} };
|
||||
resize2tensor(frames[i], slice);
|
||||
}
|
||||
m_infer_request.set_input_tensor(ov::Tensor{ m_in_tensor, { 0, 0, 0, 0 }, {num_imgs, h, w, c} });
|
||||
m_infer_request.infer();
|
||||
|
||||
std::map<std::string, ov::Tensor> output_tensors;
|
||||
for (const auto& output_tensor_name : m_output_tensors_names) {
|
||||
output_tensors[output_tensor_name] = m_infer_request.get_tensor(output_tensor_name);
|
||||
}
|
||||
|
||||
fetch_results(output_tensors, num_imgs);
|
||||
}
|
||||
|
||||
VectorCNN::VectorCNN(const Config& config) : CnnDLSDKBase(config) {
|
||||
Load();
|
||||
}
|
||||
|
||||
void VectorCNN::Compute(const cv::Mat& frame, cv::Mat* vector, cv::Size outp_shape) {
|
||||
std::vector<cv::Mat> output;
|
||||
Compute({frame}, &output, outp_shape);
|
||||
*vector = output[0];
|
||||
}
|
||||
|
||||
void VectorCNN::Compute(const std::vector<cv::Mat>& images, std::vector<cv::Mat>* vectors,
|
||||
cv::Size outp_shape) {
|
||||
if (images.empty()) {
|
||||
return;
|
||||
}
|
||||
vectors->clear();
|
||||
auto results_fetcher = [vectors, outp_shape](const std::map<std::string, ov::Tensor>& outputs, size_t batch_size) {
|
||||
for (auto& output : outputs) {
|
||||
ov::Tensor tensor = output.second;
|
||||
ov::Shape shape = tensor.get_shape();
|
||||
std::vector<int> tensor_sizes(shape.size(), 0);
|
||||
for (size_t i = 0; i < tensor_sizes.size(); ++i) {
|
||||
tensor_sizes[i] = shape[i];
|
||||
}
|
||||
cv::Mat out_tensor(tensor_sizes, CV_32F, tensor.data<float>());
|
||||
for (size_t b = 0; b < batch_size; b++) {
|
||||
cv::Mat tensor_wrapper(out_tensor.size[1], 1, CV_32F,
|
||||
reinterpret_cast<void*>((out_tensor.ptr<float>(0) + b * out_tensor.size[1])));
|
||||
vectors->emplace_back();
|
||||
if (!isSizeEmpty(outp_shape))
|
||||
tensor_wrapper = tensor_wrapper.reshape(1, outp_shape.height);
|
||||
tensor_wrapper.copyTo(vectors->back());
|
||||
}
|
||||
}
|
||||
};
|
||||
InferBatch(images, results_fetcher);
|
||||
}
|
||||
int VectorCNN::maxBatchSize() const {
|
||||
return m_config.m_max_batch_size;
|
||||
}
|
||||
176
engines/OpenVINOEngine/src/faceapp/detector.cpp
Normal file
176
engines/OpenVINOEngine/src/faceapp/detector.cpp
Normal file
@@ -0,0 +1,176 @@
|
||||
// Copyright (C) 2018-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
|
||||
#include "openvino/openvino.hpp"
|
||||
|
||||
#include "detector.hpp"
|
||||
|
||||
#define SSD_EMPTY_DETECTIONS_INDICATOR -1.0
|
||||
|
||||
using namespace detection;
|
||||
|
||||
namespace {
|
||||
cv::Rect TruncateToValidRect(const cv::Rect& rect, const cv::Size& size) {
|
||||
auto tl = rect.tl(), br = rect.br();
|
||||
tl.x = std::max(0, std::min(size.width - 1, tl.x));
|
||||
tl.y = std::max(0, std::min(size.height - 1, tl.y));
|
||||
br.x = std::max(0, std::min(size.width, br.x));
|
||||
br.y = std::max(0, std::min(size.height, br.y));
|
||||
int w = std::max(0, br.x - tl.x);
|
||||
int h = std::max(0, br.y - tl.y);
|
||||
return cv::Rect(tl.x, tl.y, w, h);
|
||||
}
|
||||
|
||||
cv::Rect IncreaseRect(const cv::Rect& r, float coeff_x, float coeff_y) {
|
||||
cv::Point2f tl = r.tl();
|
||||
cv::Point2f br = r.br();
|
||||
cv::Point2f c = (tl * 0.5f) + (br * 0.5f);
|
||||
cv::Point2f diff = c - tl;
|
||||
cv::Point2f new_diff{diff.x * coeff_x, diff.y * coeff_y};
|
||||
cv::Point2f new_tl = c - new_diff;
|
||||
cv::Point2f new_br = c + new_diff;
|
||||
|
||||
cv::Point new_tl_int {static_cast<int>(std::floor(new_tl.x)), static_cast<int>(std::floor(new_tl.y))};
|
||||
cv::Point new_br_int {static_cast<int>(std::ceil(new_br.x)), static_cast<int>(std::ceil(new_br.y))};
|
||||
|
||||
return cv::Rect(new_tl_int, new_br_int);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
FaceDetection::FaceDetection(const DetectorConfig& config) :
|
||||
BaseCnnDetection(config.is_async), m_config(config) {
|
||||
m_detectorName = "Face Detection";
|
||||
|
||||
slog::info << "Reading model: " << m_config.m_path_to_model << slog::endl;
|
||||
std::shared_ptr<ov::Model> model = m_config.m_core.read_model(m_config.m_path_to_model);
|
||||
logBasicModelInfo(model);
|
||||
|
||||
ov::Layout desiredLayout = {"NHWC"};
|
||||
|
||||
ov::OutputVector inputs_info = model->inputs();
|
||||
if (inputs_info.size() != 1) {
|
||||
throw std::runtime_error("Face Detection network should have only one input");
|
||||
}
|
||||
|
||||
ov::OutputVector outputs_info = model->outputs();
|
||||
if (outputs_info.size() != 1) {
|
||||
throw std::runtime_error("Face Detection network should have only one output");
|
||||
}
|
||||
|
||||
ov::Output<ov::Node> input = model->input();
|
||||
m_input_name = input.get_any_name();
|
||||
|
||||
ov::Layout modelLayout = ov::layout::get_layout(input);
|
||||
if (modelLayout.empty())
|
||||
modelLayout = {"NCHW"};
|
||||
|
||||
ov::Shape shape = input.get_shape();
|
||||
shape[ov::layout::height_idx(modelLayout)] = m_config.input_h;
|
||||
shape[ov::layout::width_idx(modelLayout)] = m_config.input_w;
|
||||
|
||||
ov::Output<ov::Node> output = model->output();
|
||||
m_output_name = output.get_any_name();
|
||||
|
||||
ov::Shape outputDims = output.get_shape();
|
||||
m_max_detections_count = outputDims[2];
|
||||
m_object_size = outputDims[3];
|
||||
if (m_object_size != 7) {
|
||||
throw std::runtime_error("Face Detection network output layer should have 7 as a last dimension");
|
||||
}
|
||||
if (outputDims.size() != 4) {
|
||||
throw std::runtime_error("Face Detection network output should have 4 dimensions, but had " +
|
||||
std::to_string(outputDims.size()));
|
||||
}
|
||||
|
||||
std::map<std::string, ov::PartialShape> input_shapes;
|
||||
input_shapes[input.get_any_name()] = shape;
|
||||
model->reshape(input_shapes);
|
||||
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
|
||||
ppp.input().tensor()
|
||||
.set_element_type(ov::element::u8)
|
||||
.set_layout(desiredLayout);
|
||||
ppp.input().preprocess()
|
||||
.convert_layout(modelLayout)
|
||||
.convert_element_type(ov::element::f32);
|
||||
ppp.input().model().set_layout(modelLayout);
|
||||
|
||||
model = ppp.build();
|
||||
|
||||
slog::info << "PrePostProcessor configuration:" << slog::endl;
|
||||
slog::info << ppp << slog::endl;
|
||||
|
||||
m_model = m_config.m_core.compile_model(model, m_config.m_deviceName);
|
||||
logCompiledModelInfo(m_model, m_config.m_path_to_model, m_config.m_deviceName, m_detectorName);
|
||||
}
|
||||
|
||||
void FaceDetection::submitRequest() {
|
||||
if (!m_enqueued_frames)
|
||||
return;
|
||||
m_enqueued_frames = 0;
|
||||
|
||||
BaseCnnDetection::submitRequest();
|
||||
}
|
||||
|
||||
void FaceDetection::enqueue(const cv::Mat& frame) {
|
||||
if (m_request == nullptr) {
|
||||
m_request = std::make_shared<ov::InferRequest>(m_model.create_infer_request());
|
||||
}
|
||||
|
||||
m_width = static_cast<float>(frame.cols);
|
||||
m_height = static_cast<float>(frame.rows);
|
||||
|
||||
ov::Tensor inputTensor = m_request->get_tensor(m_input_name);
|
||||
|
||||
resize2tensor(frame, inputTensor);
|
||||
|
||||
m_enqueued_frames = 1;
|
||||
}
|
||||
|
||||
DetectedObjects FaceDetection::fetchResults() {
|
||||
DetectedObjects results;
|
||||
const float* data = m_request->get_tensor(m_output_name).data<float>();
|
||||
|
||||
for (int det_id = 0; det_id < m_max_detections_count; ++det_id) {
|
||||
const int start_pos = det_id * m_object_size;
|
||||
|
||||
const float batchID = data[start_pos];
|
||||
if (batchID == SSD_EMPTY_DETECTIONS_INDICATOR) {
|
||||
break;
|
||||
}
|
||||
|
||||
const float score = std::min(std::max(0.0f, data[start_pos + 2]), 1.0f);
|
||||
const float x0 = std::min(std::max(0.0f, data[start_pos + 3]), 1.0f) * m_width;
|
||||
const float y0 = std::min(std::max(0.0f, data[start_pos + 4]), 1.0f) * m_height;
|
||||
const float x1 = std::min(std::max(0.0f, data[start_pos + 5]), 1.0f) * m_width;
|
||||
const float y1 = std::min(std::max(0.0f, data[start_pos + 6]), 1.0f) * m_height;
|
||||
|
||||
DetectedObject object;
|
||||
object.confidence = score;
|
||||
object.rect = cv::Rect(cv::Point(static_cast<int>(round(static_cast<double>(x0))),
|
||||
static_cast<int>(round(static_cast<double>(y0)))),
|
||||
cv::Point(static_cast<int>(round(static_cast<double>(x1))),
|
||||
static_cast<int>(round(static_cast<double>(y1)))));
|
||||
|
||||
|
||||
object.rect = TruncateToValidRect(IncreaseRect(object.rect,
|
||||
m_config.increase_scale_x,
|
||||
m_config.increase_scale_y),
|
||||
cv::Size(static_cast<int>(m_width), static_cast<int>(m_height)));
|
||||
|
||||
if (object.confidence > m_config.confidence_threshold && object.rect.area() > 0) {
|
||||
results.emplace_back(object);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
166
engines/OpenVINOEngine/src/faceapp/logger.cpp
Normal file
166
engines/OpenVINOEngine/src/faceapp/logger.cpp
Normal file
@@ -0,0 +1,166 @@
|
||||
// Copyright (C) 2018-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <iomanip>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
|
||||
#include "logger.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
const char unknown_label[] = "Unknown";
|
||||
|
||||
std::string GetUnknownOrLabel(const std::vector<std::string>& labels, int idx) {
|
||||
return idx >= 0 ? labels.at(idx) : unknown_label;
|
||||
}
|
||||
|
||||
std::string FrameIdxToString(const std::string& path, int frame_idx) {
|
||||
std::stringstream ss;
|
||||
ss << std::setw(6) << std::setfill('0') << frame_idx;
|
||||
return path.substr(path.rfind("/") + 1) + "@" + ss.str();
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
DetectionsLogger::DetectionsLogger(slog::LogStream& stream, bool enabled,
|
||||
const std::string& act_stat_log_file,
|
||||
const std::string& act_det_log_file) :
|
||||
m_log_stream(stream) {
|
||||
m_write_logs = enabled;
|
||||
m_act_stat_log_stream.open(act_stat_log_file, std::fstream::out);
|
||||
|
||||
if (!act_det_log_file.empty()) {
|
||||
m_act_det_log_stream.open(act_det_log_file, cv::FileStorage::WRITE);
|
||||
|
||||
m_act_det_log_stream << "data" << "[";
|
||||
}
|
||||
}
|
||||
|
||||
void DetectionsLogger::CreateNextFrameRecord(
|
||||
const std::string& path, const int frame_idx, const size_t width, const size_t height) {
|
||||
if (m_write_logs)
|
||||
m_log_stream << "Frame_name: " << path << "@" << frame_idx << " width: "
|
||||
<< width << " height: " << height << slog::endl;
|
||||
}
|
||||
|
||||
void DetectionsLogger::AddFaceToFrame(const cv::Rect& rect, const std::string& id, const std::string& action) {
|
||||
if (m_write_logs) {
|
||||
m_log_stream << "Object type: face. Box: " << rect << " id: " << id;
|
||||
if (!action.empty()) {
|
||||
m_log_stream << " action: " << action;
|
||||
}
|
||||
m_log_stream << slog::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void DetectionsLogger::AddPersonToFrame(const cv::Rect& rect, const std::string& action, const std::string& id) {
|
||||
if (m_write_logs) {
|
||||
m_log_stream << "Object type: person. Box: " << rect << " action: " << action;
|
||||
if (!id.empty()) {
|
||||
m_log_stream << " id: " << id;
|
||||
}
|
||||
m_log_stream << slog::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void DetectionsLogger::AddDetectionToFrame(const TrackedObject& object, const int frame_idx) {
|
||||
if (m_act_det_log_stream.isOpened()) {
|
||||
m_act_det_log_stream << "{" << "frame_id" << frame_idx
|
||||
<< "det_conf" << object.confidence
|
||||
<< "label" << object.label
|
||||
<< "rect" << object.rect << "}";
|
||||
}
|
||||
}
|
||||
|
||||
void DetectionsLogger::FinalizeFrameRecord() {
|
||||
if (m_write_logs) {
|
||||
m_log_stream << slog::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void DetectionsLogger::DumpDetections(const std::string& video_path,
|
||||
const cv::Size frame_size,
|
||||
const size_t num_frames,
|
||||
const std::vector<Track>& face_tracks,
|
||||
const std::map<int, int>& track_id_to_label_faces,
|
||||
const std::vector<std::string>& action_idx_to_label,
|
||||
const std::vector<std::string>& person_id_to_label,
|
||||
const std::vector<std::map<int, int>>& frame_face_obj_id_to_action_maps) {
|
||||
std::map<int, std::vector<const TrackedObject*>> frame_idx_to_face_track_objs;
|
||||
|
||||
for (const auto& tr : face_tracks) {
|
||||
for (const auto& obj : tr.objects) {
|
||||
frame_idx_to_face_track_objs[obj.frame_idx].emplace_back(&obj);
|
||||
}
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> face_label_to_action;
|
||||
for (const auto& label : person_id_to_label) {
|
||||
face_label_to_action[label] = unknown_label;
|
||||
}
|
||||
m_act_stat_log_stream << "frame_idx";
|
||||
for (const auto& label : person_id_to_label) {
|
||||
m_act_stat_log_stream << "," << label;
|
||||
}
|
||||
m_act_stat_log_stream << std::endl;
|
||||
|
||||
for (size_t i = 0; i < num_frames; i++) {
|
||||
CreateNextFrameRecord(video_path, i, frame_size.width, frame_size.height);
|
||||
const auto& frame_face_obj_id_to_action = frame_face_obj_id_to_action_maps.at(i);
|
||||
for (auto& kv : face_label_to_action) {
|
||||
kv.second = unknown_label;
|
||||
}
|
||||
|
||||
for (const auto& p_obj : frame_idx_to_face_track_objs[i]) {
|
||||
const auto& obj = *p_obj;
|
||||
std::string action_label = unknown_label;
|
||||
if (frame_face_obj_id_to_action.count(obj.object_id) > 0) {
|
||||
action_label = GetUnknownOrLabel(action_idx_to_label, frame_face_obj_id_to_action.at(obj.object_id));
|
||||
}
|
||||
std::string face_label = GetUnknownOrLabel(person_id_to_label, track_id_to_label_faces.at(obj.object_id));
|
||||
face_label_to_action[face_label] = action_label;
|
||||
AddFaceToFrame(obj.rect, face_label, action_label);
|
||||
}
|
||||
|
||||
m_act_stat_log_stream << FrameIdxToString(video_path, i);
|
||||
for (const auto& label : person_id_to_label) {
|
||||
m_act_stat_log_stream << "," << face_label_to_action[label];
|
||||
}
|
||||
m_act_stat_log_stream << std::endl;
|
||||
|
||||
FinalizeFrameRecord();
|
||||
}
|
||||
}
|
||||
|
||||
void DetectionsLogger::DumpTracks(const std::map<int, RangeEventsTrack>& obj_id_to_events,
|
||||
const std::vector<std::string>& action_idx_to_label,
|
||||
const std::map<int, int>& track_id_to_label_faces,
|
||||
const std::vector<std::string>& person_id_to_label) {
|
||||
for (const auto& tup : obj_id_to_events) {
|
||||
const int obj_id = tup.first;
|
||||
if (track_id_to_label_faces.count(obj_id) > 0) {
|
||||
const auto& events = tup.second;
|
||||
|
||||
std::string face_label = GetUnknownOrLabel(person_id_to_label, track_id_to_label_faces.at(obj_id));
|
||||
m_log_stream << "Person: " << face_label << slog::endl;
|
||||
|
||||
for (const auto& event : events) {
|
||||
std::string action_label = GetUnknownOrLabel(action_idx_to_label, event.action);
|
||||
m_log_stream << " - " << action_label
|
||||
<< ": from " << event.begin_frame_id
|
||||
<< " to " << event.end_frame_id
|
||||
<< " frames" << slog::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DetectionsLogger::~DetectionsLogger() {
|
||||
if (m_act_det_log_stream.isOpened()) {
|
||||
m_act_det_log_stream << "]";
|
||||
}
|
||||
}
|
||||
186
engines/OpenVINOEngine/src/faceapp/reid_gallery.cpp
Normal file
186
engines/OpenVINOEngine/src/faceapp/reid_gallery.cpp
Normal file
@@ -0,0 +1,186 @@
|
||||
// Copyright (C) 2018-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <limits>
|
||||
|
||||
#include <opencv2/opencv.hpp>
|
||||
|
||||
#include "face_reid.hpp"
|
||||
#include "tracker/tracker.hpp"
|
||||
|
||||
namespace {
|
||||
float ComputeReidDistance(const cv::Mat& descr1, const cv::Mat& descr2) {
|
||||
float xy = static_cast<float>(descr1.dot(descr2));
|
||||
float xx = static_cast<float>(descr1.dot(descr1));
|
||||
float yy = static_cast<float>(descr2.dot(descr2));
|
||||
float norm = sqrt(xx * yy) + 1e-6f;
|
||||
return 1.0f - xy / norm;
|
||||
}
|
||||
|
||||
bool file_exists(const std::string& name) {
|
||||
std::ifstream f(name.c_str());
|
||||
return f.good();
|
||||
}
|
||||
|
||||
inline char separator() {
|
||||
#ifdef _WIN32
|
||||
return '\\';
|
||||
#else
|
||||
return '/';
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string folder_name(const std::string& path) {
|
||||
size_t found_pos;
|
||||
found_pos = path.find_last_of(separator());
|
||||
if (found_pos != std::string::npos)
|
||||
return path.substr(0, found_pos);
|
||||
return std::string(".") + separator();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
const char EmbeddingsGallery::unknown_label[] = "Unknown";
|
||||
const int EmbeddingsGallery::unknown_id = TrackedObject::UNKNOWN_LABEL_IDX;
|
||||
|
||||
EmbeddingsGallery::EmbeddingsGallery(const std::string& ids_list,
|
||||
double threshold, int min_size_fr,
|
||||
bool crop_gallery, const detection::DetectorConfig& detector_config,
|
||||
VectorCNN& landmarks_det,
|
||||
VectorCNN& image_reid,
|
||||
bool use_greedy_matcher) :
|
||||
reid_threshold(threshold), use_greedy_matcher(use_greedy_matcher) {
|
||||
if (ids_list.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
detection::FaceDetection detector(detector_config);
|
||||
|
||||
cv::FileStorage fs(ids_list, cv::FileStorage::Mode::READ);
|
||||
cv::FileNode fn = fs.root();
|
||||
int id = 0;
|
||||
for (cv::FileNodeIterator fit = fn.begin(); fit != fn.end(); ++fit) {
|
||||
cv::FileNode item = *fit;
|
||||
std::string label = item.name();
|
||||
std::vector<cv::Mat> embeddings;
|
||||
|
||||
// Please, note that the case when there are more than one image in gallery
|
||||
// for a person might not work properly with the current implementation
|
||||
// of the demo.
|
||||
// Remove this assert by your own risk.
|
||||
CV_Assert(item.size() == 1);
|
||||
|
||||
for (size_t i = 0; i < item.size(); i++) {
|
||||
std::string path;
|
||||
if (file_exists(item[i].string())) {
|
||||
path = item[i].string();
|
||||
} else {
|
||||
path = folder_name(ids_list) + separator() + item[i].string();
|
||||
}
|
||||
|
||||
cv::Mat image = cv::imread(path);
|
||||
CV_Assert(!image.empty());
|
||||
cv::Mat emb;
|
||||
RegistrationStatus status = RegisterIdentity(label, image, min_size_fr, crop_gallery, detector, landmarks_det, image_reid, emb);
|
||||
if (status == RegistrationStatus::SUCCESS) {
|
||||
embeddings.push_back(emb);
|
||||
idx_to_id.push_back(id);
|
||||
identities.emplace_back(embeddings, label, id);
|
||||
++id;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> EmbeddingsGallery::GetIDsByEmbeddings(const std::vector<cv::Mat>& embeddings) const {
|
||||
if (embeddings.empty() || idx_to_id.empty())
|
||||
return std::vector<int>(embeddings.size(), unknown_id);
|
||||
|
||||
cv::Mat distances(static_cast<int>(embeddings.size()), static_cast<int>(idx_to_id.size()), CV_32F);
|
||||
|
||||
for (int i = 0; i < distances.rows; i++) {
|
||||
int k = 0;
|
||||
for (size_t j = 0; j < identities.size(); j++) {
|
||||
for (const auto& reference_emb : identities[j].embeddings) {
|
||||
distances.at<float>(i, k) = ComputeReidDistance(embeddings[i], reference_emb);
|
||||
k++;
|
||||
}
|
||||
}
|
||||
}
|
||||
KuhnMunkres matcher(use_greedy_matcher);
|
||||
auto matched_idx = matcher.Solve(distances);
|
||||
std::vector<int> output_ids;
|
||||
for (auto col_idx : matched_idx) {
|
||||
if (int(col_idx) == -1) {
|
||||
output_ids.push_back(unknown_id);
|
||||
continue;
|
||||
}
|
||||
if (distances.at<float>(output_ids.size(), col_idx) > reid_threshold)
|
||||
output_ids.push_back(unknown_id);
|
||||
else
|
||||
output_ids.push_back(idx_to_id[col_idx]);
|
||||
}
|
||||
return output_ids;
|
||||
}
|
||||
|
||||
std::string EmbeddingsGallery::GetLabelByID(int id) const {
|
||||
if (id >= 0 && id < static_cast<int>(identities.size()))
|
||||
return identities[id].label;
|
||||
else
|
||||
return unknown_label;
|
||||
}
|
||||
|
||||
size_t EmbeddingsGallery::size() const {
|
||||
return identities.size();
|
||||
}
|
||||
|
||||
std::vector<std::string> EmbeddingsGallery::GetIDToLabelMap() const {
|
||||
std::vector<std::string> map;
|
||||
map.reserve(identities.size());
|
||||
for (const auto& item : identities) {
|
||||
map.emplace_back(item.label);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
bool EmbeddingsGallery::LabelExists(const std::string& label) const {
|
||||
return identities.end() != std::find_if(identities.begin(), identities.end(),
|
||||
[label](const GalleryObject& o){return o.label == label;});
|
||||
}
|
||||
|
||||
RegistrationStatus EmbeddingsGallery::RegisterIdentity(const std::string& identity_label,
|
||||
const cv::Mat& image,
|
||||
int min_size_fr, bool crop_gallery,
|
||||
detection::FaceDetection& detector,
|
||||
VectorCNN& landmarks_det,
|
||||
VectorCNN& image_reid,
|
||||
cv::Mat& embedding) {
|
||||
cv::Mat target = image;
|
||||
if (crop_gallery) {
|
||||
detector.enqueue(image);
|
||||
detector.submitRequest();
|
||||
detector.wait();
|
||||
detection::DetectedObjects faces = detector.fetchResults();
|
||||
if (faces.size() == 0) {
|
||||
return RegistrationStatus::FAILURE_NOT_DETECTED;
|
||||
}
|
||||
CV_Assert(faces.size() == 1);
|
||||
cv::Mat face_roi = image(faces[0].rect);
|
||||
target = face_roi;
|
||||
}
|
||||
if ((target.rows < min_size_fr) && (target.cols < min_size_fr)) {
|
||||
return RegistrationStatus::FAILURE_LOW_QUALITY;
|
||||
}
|
||||
cv::Mat landmarks;
|
||||
landmarks_det.Compute(target, &landmarks, cv::Size(2, 5));
|
||||
std::vector<cv::Mat> images = { target };
|
||||
std::vector<cv::Mat> landmarks_vec = { landmarks };
|
||||
AlignFaces(&images, &landmarks_vec);
|
||||
image_reid.Compute(images[0], &embedding);
|
||||
return RegistrationStatus::SUCCESS;
|
||||
}
|
||||
Reference in New Issue
Block a user