Initial setup for CLion
This commit is contained in:
418
engines/OpenVINOEngine/src/faceapp/action_detector.cpp
Normal file
418
engines/OpenVINOEngine/src/faceapp/action_detector.cpp
Normal file
@@ -0,0 +1,418 @@
|
||||
// Copyright (C) 2018-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
|
||||
#include "openvino/openvino.hpp"
|
||||
|
||||
#include "action_detector.hpp"
|
||||
|
||||
#define SSD_LOCATION_RECORD_SIZE 4
|
||||
#define SSD_PRIORBOX_RECORD_SIZE 4
|
||||
#define NUM_DETECTION_CLASSES 2
|
||||
#define POSITIVE_DETECTION_IDX 1
|
||||
|
||||
template <typename T>
|
||||
bool SortScorePairDescend(const std::pair<float, T>& pair1,
|
||||
const std::pair<float, T>& pair2) {
|
||||
return pair1.first > pair2.first;
|
||||
}
|
||||
|
||||
ActionDetection::ActionDetection(const ActionDetectorConfig& config) :
|
||||
BaseCnnDetection(config.is_async), m_config(config) {
|
||||
m_detectorName = "action detector";
|
||||
|
||||
ov::Layout desiredLayout = {"NHWC"};
|
||||
|
||||
slog::info << "Reading model: " << m_config.m_path_to_model << slog::endl;
|
||||
std::shared_ptr<ov::Model> model = m_config.m_core.read_model(m_config.m_path_to_model);
|
||||
logBasicModelInfo(model);
|
||||
|
||||
ov::OutputVector inputInfo = model->inputs();
|
||||
if (inputInfo.size() != 1) {
|
||||
throw std::runtime_error("Action Detection network should have only one input");
|
||||
}
|
||||
|
||||
m_input_name = model->input().get_any_name();
|
||||
|
||||
m_modelLayout = ov::layout::get_layout(model->input());
|
||||
if (m_modelLayout.empty())
|
||||
m_modelLayout = {"NCHW"};
|
||||
|
||||
m_network_input_size.height = model->input().get_shape()[ov::layout::height_idx(m_modelLayout)];
|
||||
m_network_input_size.width = model->input().get_shape()[ov::layout::width_idx(m_modelLayout)];
|
||||
|
||||
m_new_model = false;
|
||||
|
||||
ov::OutputVector outputs = model->outputs();
|
||||
auto cmp = [&](const ov::Output<ov::Node>& output) { return output.get_any_name() == m_config.new_loc_blob_name; };
|
||||
auto it = std::find_if(outputs.begin(), outputs.end(), cmp);
|
||||
if (it != outputs.end())
|
||||
m_new_model = true;
|
||||
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
|
||||
ppp.input().tensor()
|
||||
.set_element_type(ov::element::u8)
|
||||
.set_layout(desiredLayout);
|
||||
ppp.input().preprocess()
|
||||
.convert_layout(m_modelLayout)
|
||||
.convert_element_type(ov::element::f32);
|
||||
ppp.input().model().set_layout(m_modelLayout);
|
||||
|
||||
model = ppp.build();
|
||||
|
||||
slog::info << "PrePostProcessor configuration:" << slog::endl;
|
||||
slog::info << ppp << slog::endl;
|
||||
|
||||
ov::set_batch(model, m_config.m_max_batch_size);
|
||||
|
||||
m_model = m_config.m_core.compile_model(model, m_config.m_deviceName);
|
||||
logCompiledModelInfo(m_model, m_config.m_path_to_model, m_config.m_deviceName, m_config.m_model_type);
|
||||
|
||||
const auto& head_anchors = m_new_model ? m_config.new_anchors : m_config.old_anchors;
|
||||
const int num_heads = head_anchors.size();
|
||||
|
||||
m_head_ranges.resize(num_heads + 1);
|
||||
m_glob_anchor_map.resize(num_heads);
|
||||
m_head_step_sizes.resize(num_heads);
|
||||
|
||||
m_head_ranges[0] = 0;
|
||||
int head_shift = 0;
|
||||
|
||||
for (int head_id = 0; head_id < num_heads; ++head_id) {
|
||||
m_glob_anchor_map[head_id].resize(head_anchors[head_id]);
|
||||
int anchor_height, anchor_width;
|
||||
for (int anchor_id = 0; anchor_id < head_anchors[head_id]; ++anchor_id) {
|
||||
const auto glob_anchor_name = m_new_model ?
|
||||
m_config.new_action_conf_blob_name_prefix + std::to_string(head_id + 1) +
|
||||
m_config.new_action_conf_blob_name_suffix + std::to_string(anchor_id + 1) :
|
||||
m_config.old_action_conf_blob_name_prefix + std::to_string(anchor_id + 1);
|
||||
m_glob_anchor_names.push_back(glob_anchor_name);
|
||||
|
||||
const auto anchor_dims = m_model.output(glob_anchor_name).get_shape();
|
||||
|
||||
anchor_height = m_new_model ? anchor_dims[ov::layout::height_idx(m_modelLayout)] : anchor_dims[1];
|
||||
anchor_width = m_new_model ? anchor_dims[ov::layout::width_idx(m_modelLayout)] : anchor_dims[2];
|
||||
std::size_t num_action_classes = m_new_model ? anchor_dims[ov::layout::channels_idx(m_modelLayout)] : anchor_dims[3];
|
||||
if (num_action_classes != m_config.num_action_classes) {
|
||||
throw std::logic_error("The number of specified actions and the number of actions predicted by "
|
||||
"the Person/Action Detection Retail model must match");
|
||||
}
|
||||
|
||||
const int anchor_size = anchor_height * anchor_width;
|
||||
head_shift += anchor_size;
|
||||
|
||||
m_head_step_sizes[head_id] = m_new_model ? anchor_size : 1;
|
||||
m_glob_anchor_map[head_id][anchor_id] = m_num_glob_anchors++;
|
||||
}
|
||||
|
||||
m_head_ranges[head_id + 1] = head_shift;
|
||||
m_head_blob_sizes.emplace_back(anchor_width, anchor_height);
|
||||
}
|
||||
|
||||
m_num_candidates = head_shift;
|
||||
|
||||
m_binary_task = m_config.num_action_classes == 2;
|
||||
}
|
||||
|
||||
void ActionDetection::submitRequest() {
|
||||
if (!m_enqueued_frames)
|
||||
return;
|
||||
m_enqueued_frames = 0;
|
||||
|
||||
BaseCnnDetection::submitRequest();
|
||||
}
|
||||
|
||||
void ActionDetection::enqueue(const cv::Mat& frame) {
|
||||
if (m_request == nullptr) {
|
||||
m_request = std::make_shared<ov::InferRequest>(m_model.create_infer_request());
|
||||
}
|
||||
|
||||
m_width = static_cast<float>(frame.cols);
|
||||
m_height = static_cast<float>(frame.rows);
|
||||
|
||||
ov::Tensor input_tensor = m_request->get_tensor(m_input_name);
|
||||
|
||||
resize2tensor(frame, input_tensor);
|
||||
|
||||
m_enqueued_frames = 1;
|
||||
}
|
||||
|
||||
DetectedActions ActionDetection::fetchResults() {
|
||||
const auto loc_blob_name = m_new_model ? m_config.new_loc_blob_name : m_config.old_loc_blob_name;
|
||||
const auto det_conf_blob_name = m_new_model ? m_config.new_det_conf_blob_name : m_config.old_det_conf_blob_name;
|
||||
|
||||
ov::Shape loc_out_shape = m_model.output(loc_blob_name).get_shape();
|
||||
const cv::Mat loc_out(loc_out_shape[0],
|
||||
loc_out_shape[1],
|
||||
CV_32F,
|
||||
m_request->get_tensor(loc_blob_name).data<float>());
|
||||
|
||||
ov::Shape conf_out_shape = m_model.output(det_conf_blob_name).get_shape();
|
||||
const cv::Mat main_conf_out(conf_out_shape[0],
|
||||
conf_out_shape[1],
|
||||
CV_32F,
|
||||
m_request->get_tensor(det_conf_blob_name).data<float>());
|
||||
|
||||
std::vector<cv::Mat> add_conf_out;
|
||||
for (int glob_anchor_id = 0; glob_anchor_id < m_num_glob_anchors; ++glob_anchor_id) {
|
||||
const auto& blob_name = m_glob_anchor_names[glob_anchor_id];
|
||||
ov::Shape shape = m_request->get_tensor(blob_name).get_shape();
|
||||
add_conf_out.emplace_back(shape[ov::layout::height_idx(m_modelLayout)],
|
||||
shape[ov::layout::width_idx(m_modelLayout)],
|
||||
CV_32F,
|
||||
m_request->get_tensor(blob_name).data());
|
||||
}
|
||||
|
||||
// Parse detections
|
||||
DetectedActions result;
|
||||
if (m_new_model) {
|
||||
const cv::Mat priorbox_out;
|
||||
result = GetDetections(loc_out, main_conf_out, priorbox_out, add_conf_out,
|
||||
cv::Size(static_cast<int>(m_width), static_cast<int>(m_height)));
|
||||
} else {
|
||||
ov::Shape old_priorbox_shape = m_model.output(m_config.old_priorbox_blob_name).get_shape();
|
||||
const cv::Mat priorbox_out((int)old_priorbox_shape[0], (int)old_priorbox_shape[1],
|
||||
CV_32F,
|
||||
m_request->get_tensor(m_config.old_priorbox_blob_name).data());
|
||||
|
||||
result = GetDetections(loc_out, main_conf_out, priorbox_out, add_conf_out,
|
||||
cv::Size(static_cast<int>(m_width), static_cast<int>(m_height)));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline ActionDetection::NormalizedBBox
|
||||
ActionDetection::ParseBBoxRecord(const float* data, bool inverse) const {
|
||||
NormalizedBBox bbox;
|
||||
bbox.xmin = data[inverse ? 1 : 0];
|
||||
bbox.ymin = data[inverse ? 0 : 1];
|
||||
bbox.xmax = data[inverse ? 3 : 2];
|
||||
bbox.ymax = data[inverse ? 2 : 3];
|
||||
return bbox;
|
||||
}
|
||||
|
||||
inline ActionDetection::NormalizedBBox
|
||||
ActionDetection::GeneratePriorBox(int pos, int step, const cv::Size2f& anchor,
|
||||
const cv::Size& blob_size) const {
|
||||
const int row = pos / blob_size.width;
|
||||
const int col = pos % blob_size.width;
|
||||
|
||||
const float center_x = (col + 0.5f) * static_cast<float>(step);
|
||||
const float center_y = (row + 0.5f) * static_cast<float>(step);
|
||||
|
||||
NormalizedBBox bbox;
|
||||
bbox.xmin = (center_x - 0.5f * anchor.width) / static_cast<float>(m_network_input_size.width);
|
||||
bbox.ymin = (center_y - 0.5f * anchor.height) / static_cast<float>(m_network_input_size.height);
|
||||
bbox.xmax = (center_x + 0.5f * anchor.width) / static_cast<float>(m_network_input_size.width);
|
||||
bbox.ymax = (center_y + 0.5f * anchor.height) / static_cast<float>(m_network_input_size.height);
|
||||
|
||||
return bbox;
|
||||
}
|
||||
|
||||
cv::Rect ActionDetection::ConvertToRect(
|
||||
const NormalizedBBox& prior_bbox, const NormalizedBBox& variances,
|
||||
const NormalizedBBox& encoded_bbox, const cv::Size& frame_size) const {
|
||||
// Convert prior bbox to CV_Rect
|
||||
const float prior_width = prior_bbox.xmax - prior_bbox.xmin;
|
||||
const float prior_height = prior_bbox.ymax - prior_bbox.ymin;
|
||||
const float prior_center_x = 0.5f * (prior_bbox.xmin + prior_bbox.xmax);
|
||||
const float prior_center_y = 0.5f * (prior_bbox.ymin + prior_bbox.ymax);
|
||||
|
||||
// Decode bbox coordinates from the SSD format
|
||||
const float decoded_bbox_center_x =
|
||||
variances.xmin * encoded_bbox.xmin * prior_width + prior_center_x;
|
||||
const float decoded_bbox_center_y =
|
||||
variances.ymin * encoded_bbox.ymin * prior_height + prior_center_y;
|
||||
const float decoded_bbox_width =
|
||||
static_cast<float>(exp(static_cast<float>(variances.xmax * encoded_bbox.xmax))) * prior_width;
|
||||
const float decoded_bbox_height =
|
||||
static_cast<float>(exp(static_cast<float>(variances.ymax * encoded_bbox.ymax))) * prior_height;
|
||||
|
||||
// Create decoded bbox
|
||||
const float decoded_bbox_xmin = decoded_bbox_center_x - 0.5f * decoded_bbox_width;
|
||||
const float decoded_bbox_ymin = decoded_bbox_center_y - 0.5f * decoded_bbox_height;
|
||||
const float decoded_bbox_xmax = decoded_bbox_center_x + 0.5f * decoded_bbox_width;
|
||||
const float decoded_bbox_ymax = decoded_bbox_center_y + 0.5f * decoded_bbox_height;
|
||||
|
||||
// Convert decoded bbox to CV_Rect
|
||||
return cv::Rect(static_cast<int>(decoded_bbox_xmin * frame_size.width),
|
||||
static_cast<int>(decoded_bbox_ymin * frame_size.height),
|
||||
static_cast<int>((decoded_bbox_xmax - decoded_bbox_xmin) * frame_size.width),
|
||||
static_cast<int>((decoded_bbox_ymax - decoded_bbox_ymin) * frame_size.height));
|
||||
}
|
||||
|
||||
DetectedActions ActionDetection::GetDetections(const cv::Mat& loc, const cv::Mat& main_conf,
|
||||
const cv::Mat& priorboxes, const std::vector<cv::Mat>& add_conf,
|
||||
const cv::Size& frame_size) const {
|
||||
// Prepare input data buffers
|
||||
const float* loc_data = reinterpret_cast<float*>(loc.data);
|
||||
const float* det_conf_data = reinterpret_cast<float*>(main_conf.data);
|
||||
const float* prior_data = m_new_model ? NULL : reinterpret_cast<float*>(priorboxes.data);
|
||||
|
||||
const int total_num_anchors = add_conf.size();
|
||||
std::vector<float*> action_conf_data(total_num_anchors);
|
||||
for (int i = 0; i < total_num_anchors; ++i) {
|
||||
action_conf_data[i] = reinterpret_cast<float*>(add_conf[i].data);
|
||||
}
|
||||
|
||||
// Variable to store all detection candidates
|
||||
DetectedActions valid_detections;
|
||||
|
||||
// Iterate over all candidate bboxes
|
||||
for (int p = 0; p < m_num_candidates; ++p) {
|
||||
// Parse detection confidence from the SSD Detection output
|
||||
const float detection_conf =
|
||||
det_conf_data[p * NUM_DETECTION_CLASSES + POSITIVE_DETECTION_IDX];
|
||||
|
||||
// Skip low-confidence detections
|
||||
if (detection_conf < m_config.detection_confidence_threshold) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Estimate the action head ID
|
||||
int head_id = 0;
|
||||
while (p >= m_head_ranges[head_id + 1]) {
|
||||
++head_id;
|
||||
}
|
||||
const int head_p = p - m_head_ranges[head_id];
|
||||
|
||||
// Estimate the action anchor ID
|
||||
const int head_num_anchors =
|
||||
m_new_model ? m_config.new_anchors[head_id] : m_config.old_anchors[head_id];
|
||||
const int anchor_id = head_p % head_num_anchors;
|
||||
|
||||
// Estimate the action label
|
||||
const int glob_anchor_id = m_glob_anchor_map[head_id][anchor_id];
|
||||
const float* anchor_conf_data = action_conf_data[glob_anchor_id];
|
||||
const int action_conf_idx_shift = m_new_model ?
|
||||
head_p / head_num_anchors :
|
||||
head_p / head_num_anchors * m_config.num_action_classes;
|
||||
const int action_conf_step = m_head_step_sizes[head_id];
|
||||
const float scale = m_new_model ? m_config.new_action_scale : m_config.old_action_scale;
|
||||
int action_label = -1;
|
||||
float action_max_exp_value = 0.f;
|
||||
float action_sum_exp_values = 0.f;
|
||||
for (size_t c = 0; c < m_config.num_action_classes; ++c) {
|
||||
float action_exp_value =
|
||||
std::exp(scale * anchor_conf_data[action_conf_idx_shift + c * action_conf_step]);
|
||||
action_sum_exp_values += action_exp_value;
|
||||
if (action_exp_value > action_max_exp_value && ((c > 0 && m_binary_task) || !m_binary_task)) {
|
||||
action_max_exp_value = action_exp_value;
|
||||
action_label = c;
|
||||
}
|
||||
}
|
||||
|
||||
if (std::fabs(action_sum_exp_values) < std::numeric_limits<float>::epsilon()) {
|
||||
throw std::logic_error("action_sum_exp_values can't be equal to 0");
|
||||
}
|
||||
// Estimate the action confidence
|
||||
float action_conf = action_max_exp_value / action_sum_exp_values;
|
||||
|
||||
// Skip low-confidence actions
|
||||
if (action_label < 0 || action_conf < m_config.action_confidence_threshold) {
|
||||
action_label = m_config.default_action_id;
|
||||
action_conf = 0.f;
|
||||
}
|
||||
|
||||
// Parse bbox from the SSD Detection output
|
||||
const auto priorbox = m_new_model ?
|
||||
GeneratePriorBox(head_p / head_num_anchors,
|
||||
m_config.new_det_heads[head_id].step,
|
||||
m_config.new_det_heads[head_id].anchors[anchor_id],
|
||||
m_head_blob_sizes[head_id]) :
|
||||
ParseBBoxRecord(prior_data + p * SSD_PRIORBOX_RECORD_SIZE, false);
|
||||
const auto variance =
|
||||
ParseBBoxRecord(m_new_model ?
|
||||
m_config.variances :
|
||||
prior_data + (m_num_candidates + p) * SSD_PRIORBOX_RECORD_SIZE, false);
|
||||
const auto encoded_bbox =
|
||||
ParseBBoxRecord(loc_data + p * SSD_LOCATION_RECORD_SIZE, m_new_model);
|
||||
|
||||
const auto det_rect = ConvertToRect(priorbox, variance, encoded_bbox, frame_size);
|
||||
|
||||
// Store detected action
|
||||
valid_detections.emplace_back(det_rect, action_label, detection_conf, action_conf);
|
||||
}
|
||||
|
||||
// Merge most overlapped detections
|
||||
std::vector<int> out_det_indices;
|
||||
SoftNonMaxSuppression(valid_detections, m_config.nms_sigma, m_config.keep_top_k,
|
||||
m_config.detection_confidence_threshold, &out_det_indices);
|
||||
|
||||
DetectedActions detections;
|
||||
for (size_t i = 0; i < out_det_indices.size(); ++i) {
|
||||
detections.emplace_back(valid_detections[out_det_indices[i]]);
|
||||
}
|
||||
return detections;
|
||||
}
|
||||
|
||||
void ActionDetection::SoftNonMaxSuppression(const DetectedActions& detections,
|
||||
const float sigma, size_t top_k, const float min_det_conf,
|
||||
std::vector<int>* out_indices) const {
|
||||
// Store input bbox scores
|
||||
std::vector<float> scores(detections.size());
|
||||
for (size_t i = 0; i < detections.size(); ++i) {
|
||||
scores[i] = detections[i].detection_conf;
|
||||
}
|
||||
|
||||
top_k = std::min(top_k, scores.size());
|
||||
|
||||
// Select top-k score indices
|
||||
std::vector<size_t> score_idx(scores.size());
|
||||
std::iota(score_idx.begin(), score_idx.end(), 0);
|
||||
std::nth_element(score_idx.begin(), score_idx.begin() + top_k, score_idx.end(),
|
||||
[&scores](size_t i1, size_t i2) {return scores[i1] > scores[i2];});
|
||||
|
||||
// Extract top-k score values
|
||||
std::vector<float> top_scores(top_k);
|
||||
for (size_t i = 0; i < top_scores.size(); ++i) {
|
||||
top_scores[i] = scores[score_idx[i]];
|
||||
}
|
||||
|
||||
// Carry out Soft Non-Maximum Suppression algorithm
|
||||
out_indices->clear();
|
||||
for (size_t step = 0; step < top_scores.size(); ++step) {
|
||||
auto best_score_itr = std::max_element(top_scores.begin(), top_scores.end());
|
||||
if (*best_score_itr < min_det_conf) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Add current bbox to output list
|
||||
const size_t local_anchor_idx = std::distance(top_scores.begin(), best_score_itr);
|
||||
const int anchor_idx = score_idx[local_anchor_idx];
|
||||
out_indices->emplace_back(anchor_idx);
|
||||
*best_score_itr = 0.f;
|
||||
|
||||
// Update top_scores of the rest bboxes
|
||||
for (size_t local_reference_idx = 0; local_reference_idx < top_scores.size(); ++local_reference_idx) {
|
||||
// Skip updating step for the low-confidence bbox
|
||||
if (top_scores[local_reference_idx] < min_det_conf) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Calculate the Intersection over Union metric between two bboxes
|
||||
const size_t reference_idx = score_idx[local_reference_idx];
|
||||
const auto& rect1 = detections[anchor_idx].rect;
|
||||
const auto& rect2 = detections[reference_idx].rect;
|
||||
const auto intersection = rect1 & rect2;
|
||||
float overlap = 0.f;
|
||||
if (intersection.width > 0 && intersection.height > 0) {
|
||||
const int intersection_area = intersection.area();
|
||||
overlap = static_cast<float>(intersection_area) / static_cast<float>(rect1.area() + rect2.area() - intersection_area);
|
||||
}
|
||||
|
||||
// Scale bbox score using the exponential rule
|
||||
top_scores[local_reference_idx] *= std::exp(-overlap * overlap / sigma);
|
||||
}
|
||||
}
|
||||
}
|
||||
72
engines/OpenVINOEngine/src/faceapp/align_transform.cpp
Normal file
72
engines/OpenVINOEngine/src/faceapp/align_transform.cpp
Normal file
@@ -0,0 +1,72 @@
|
||||
// Copyright (C) 2018-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
#include "face_reid.hpp"
|
||||
|
||||
static const float h = 112.;
|
||||
static const float w = 96.;
|
||||
// reference landmarks points in the unit square [0,1]x[0,1]
|
||||
static const float ref_landmarks_normalized[] = {
|
||||
30.2946f / w, 51.6963f / h, 65.5318f / w, 51.5014f / h, 48.0252f / w,
|
||||
71.7366f / h, 33.5493f / w, 92.3655f / h, 62.7299f / w, 92.2041f / h};
|
||||
|
||||
cv::Mat GetTransform(cv::Mat* src, cv::Mat* dst) {
|
||||
cv::Mat col_mean_src;
|
||||
reduce(*src, col_mean_src, 0, cv::REDUCE_AVG);
|
||||
for (int i = 0; i < src->rows; i++) {
|
||||
src->row(i) -= col_mean_src;
|
||||
}
|
||||
|
||||
cv::Mat col_mean_dst;
|
||||
reduce(*dst, col_mean_dst, 0, cv::REDUCE_AVG);
|
||||
for (int i = 0; i < dst->rows; i++) {
|
||||
dst->row(i) -= col_mean_dst;
|
||||
}
|
||||
|
||||
cv::Scalar mean, dev_src, dev_dst;
|
||||
cv::meanStdDev(*src, mean, dev_src);
|
||||
dev_src(0) =
|
||||
std::max(static_cast<double>(std::numeric_limits<float>::epsilon()), dev_src(0));
|
||||
*src /= dev_src(0);
|
||||
cv::meanStdDev(*dst, mean, dev_dst);
|
||||
dev_dst(0) =
|
||||
std::max(static_cast<double>(std::numeric_limits<float>::epsilon()), dev_dst(0));
|
||||
*dst /= dev_dst(0);
|
||||
|
||||
cv::Mat w, u, vt;
|
||||
cv::SVD::compute((*src).t() * (*dst), w, u, vt);
|
||||
cv::Mat r = (u * vt).t();
|
||||
cv::Mat m(2, 3, CV_32F);
|
||||
m.colRange(0, 2) = r * (dev_dst(0) / dev_src(0));
|
||||
m.col(2) = (col_mean_dst.t() - m.colRange(0, 2) * col_mean_src.t());
|
||||
return m;
|
||||
}
|
||||
|
||||
void AlignFaces(std::vector<cv::Mat>* face_images, std::vector<cv::Mat>* landmarks_vec) {
|
||||
if (landmarks_vec->size() == 0) {
|
||||
return;
|
||||
}
|
||||
CV_Assert(face_images->size() == landmarks_vec->size());
|
||||
cv::Mat ref_landmarks = cv::Mat(5, 2, CV_32F);
|
||||
|
||||
for (size_t j = 0; j < face_images->size(); j++) {
|
||||
for (int i = 0; i < ref_landmarks.rows; i++) {
|
||||
ref_landmarks.at<float>(i, 0) =
|
||||
ref_landmarks_normalized[2 * i] * face_images->at(j).cols;
|
||||
ref_landmarks.at<float>(i, 1) =
|
||||
ref_landmarks_normalized[2 * i + 1] * face_images->at(j).rows;
|
||||
landmarks_vec->at(j).at<float>(i, 0) *= face_images->at(j).cols;
|
||||
landmarks_vec->at(j).at<float>(i, 1) *= face_images->at(j).rows;
|
||||
}
|
||||
cv::Mat m = GetTransform(&ref_landmarks, &landmarks_vec->at(j));
|
||||
cv::warpAffine(face_images->at(j), face_images->at(j), m,
|
||||
face_images->at(j).size(), cv::WARP_INVERSE_MAP);
|
||||
}
|
||||
}
|
||||
111
engines/OpenVINOEngine/src/faceapp/cnn.cpp
Normal file
111
engines/OpenVINOEngine/src/faceapp/cnn.cpp
Normal file
@@ -0,0 +1,111 @@
|
||||
// Copyright (C) 2018-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#include "openvino/openvino.hpp"
|
||||
#include "cnn.hpp"
|
||||
|
||||
CnnDLSDKBase::CnnDLSDKBase(const Config& config) : m_config(config) {}
|
||||
|
||||
void CnnDLSDKBase::Load() {
|
||||
std::shared_ptr<ov::Model> model = m_config.m_core.read_model(m_config.m_path_to_model);
|
||||
ov::OutputVector inputs = model->inputs();
|
||||
if (inputs.size() != 1) {
|
||||
return;
|
||||
}
|
||||
m_desired_layout = {"NHWC"};
|
||||
ov::Layout model_layout = ov::layout::get_layout(model->input());
|
||||
if (model_layout.empty()) {
|
||||
model_layout = {"NCHW"};
|
||||
}
|
||||
ov::OutputVector outputs = model->outputs();
|
||||
for (auto& item : outputs) {
|
||||
const std::string name = item.get_any_name();
|
||||
m_output_tensors_names.push_back(name);
|
||||
}
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
ppp.input().tensor()
|
||||
.set_element_type(ov::element::u8)
|
||||
.set_layout(m_desired_layout);
|
||||
ppp.input().preprocess()
|
||||
.convert_layout(model_layout)
|
||||
.convert_element_type(ov::element::f32);
|
||||
ppp.input().model().set_layout(model_layout);
|
||||
|
||||
model = ppp.build();
|
||||
m_modelShape = model->input().get_shape();
|
||||
m_modelShape[ov::layout::batch_idx(m_desired_layout)] = m_config.m_max_batch_size;
|
||||
ov::set_batch(model, {1, m_config.m_max_batch_size});
|
||||
m_compiled_model = m_config.m_core.compile_model(model, m_config.m_deviceName, { ov::hint::num_requests(1) });
|
||||
m_infer_request = m_compiled_model.create_infer_request();
|
||||
m_in_tensor = m_infer_request.get_input_tensor();
|
||||
m_in_tensor.set_shape(m_modelShape);
|
||||
}
|
||||
|
||||
void CnnDLSDKBase::InferBatch(const std::vector<cv::Mat>& frames,const std::function<void(const std::map<std::string, ov::Tensor>&, size_t)>& fetch_results)
|
||||
{
|
||||
size_t num_imgs = frames.size();
|
||||
size_t h = m_modelShape[ov::layout::height_idx(m_desired_layout)];
|
||||
size_t w = m_modelShape[ov::layout::width_idx(m_desired_layout)];
|
||||
size_t c = m_modelShape[ov::layout::channels_idx(m_desired_layout)];
|
||||
for (size_t i = 0; i < num_imgs; i++) {
|
||||
auto slice = ov::Tensor{ m_in_tensor, {i, 0, 0, 0}, {i + 1, h, w, c} };
|
||||
resize2tensor(frames[i], slice);
|
||||
}
|
||||
m_infer_request.set_input_tensor(ov::Tensor{ m_in_tensor, { 0, 0, 0, 0 }, {num_imgs, h, w, c} });
|
||||
m_infer_request.infer();
|
||||
|
||||
std::map<std::string, ov::Tensor> output_tensors;
|
||||
for (const auto& output_tensor_name : m_output_tensors_names) {
|
||||
output_tensors[output_tensor_name] = m_infer_request.get_tensor(output_tensor_name);
|
||||
}
|
||||
|
||||
fetch_results(output_tensors, num_imgs);
|
||||
}
|
||||
|
||||
VectorCNN::VectorCNN(const Config& config) : CnnDLSDKBase(config) {
|
||||
Load();
|
||||
}
|
||||
|
||||
void VectorCNN::Compute(const cv::Mat& frame, cv::Mat* vector, cv::Size outp_shape) {
|
||||
std::vector<cv::Mat> output;
|
||||
Compute({frame}, &output, outp_shape);
|
||||
*vector = output[0];
|
||||
}
|
||||
|
||||
void VectorCNN::Compute(const std::vector<cv::Mat>& images, std::vector<cv::Mat>* vectors,
|
||||
cv::Size outp_shape) {
|
||||
if (images.empty()) {
|
||||
return;
|
||||
}
|
||||
vectors->clear();
|
||||
auto results_fetcher = [vectors, outp_shape](const std::map<std::string, ov::Tensor>& outputs, size_t batch_size) {
|
||||
for (auto& output : outputs) {
|
||||
ov::Tensor tensor = output.second;
|
||||
ov::Shape shape = tensor.get_shape();
|
||||
std::vector<int> tensor_sizes(shape.size(), 0);
|
||||
for (size_t i = 0; i < tensor_sizes.size(); ++i) {
|
||||
tensor_sizes[i] = shape[i];
|
||||
}
|
||||
cv::Mat out_tensor(tensor_sizes, CV_32F, tensor.data<float>());
|
||||
for (size_t b = 0; b < batch_size; b++) {
|
||||
cv::Mat tensor_wrapper(out_tensor.size[1], 1, CV_32F,
|
||||
reinterpret_cast<void*>((out_tensor.ptr<float>(0) + b * out_tensor.size[1])));
|
||||
vectors->emplace_back();
|
||||
if (!isSizeEmpty(outp_shape))
|
||||
tensor_wrapper = tensor_wrapper.reshape(1, outp_shape.height);
|
||||
tensor_wrapper.copyTo(vectors->back());
|
||||
}
|
||||
}
|
||||
};
|
||||
InferBatch(images, results_fetcher);
|
||||
}
|
||||
int VectorCNN::maxBatchSize() const {
|
||||
return m_config.m_max_batch_size;
|
||||
}
|
||||
176
engines/OpenVINOEngine/src/faceapp/detector.cpp
Normal file
176
engines/OpenVINOEngine/src/faceapp/detector.cpp
Normal file
@@ -0,0 +1,176 @@
|
||||
// Copyright (C) 2018-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
|
||||
#include "openvino/openvino.hpp"
|
||||
|
||||
#include "detector.hpp"
|
||||
|
||||
#define SSD_EMPTY_DETECTIONS_INDICATOR -1.0
|
||||
|
||||
using namespace detection;
|
||||
|
||||
namespace {
|
||||
cv::Rect TruncateToValidRect(const cv::Rect& rect, const cv::Size& size) {
|
||||
auto tl = rect.tl(), br = rect.br();
|
||||
tl.x = std::max(0, std::min(size.width - 1, tl.x));
|
||||
tl.y = std::max(0, std::min(size.height - 1, tl.y));
|
||||
br.x = std::max(0, std::min(size.width, br.x));
|
||||
br.y = std::max(0, std::min(size.height, br.y));
|
||||
int w = std::max(0, br.x - tl.x);
|
||||
int h = std::max(0, br.y - tl.y);
|
||||
return cv::Rect(tl.x, tl.y, w, h);
|
||||
}
|
||||
|
||||
cv::Rect IncreaseRect(const cv::Rect& r, float coeff_x, float coeff_y) {
|
||||
cv::Point2f tl = r.tl();
|
||||
cv::Point2f br = r.br();
|
||||
cv::Point2f c = (tl * 0.5f) + (br * 0.5f);
|
||||
cv::Point2f diff = c - tl;
|
||||
cv::Point2f new_diff{diff.x * coeff_x, diff.y * coeff_y};
|
||||
cv::Point2f new_tl = c - new_diff;
|
||||
cv::Point2f new_br = c + new_diff;
|
||||
|
||||
cv::Point new_tl_int {static_cast<int>(std::floor(new_tl.x)), static_cast<int>(std::floor(new_tl.y))};
|
||||
cv::Point new_br_int {static_cast<int>(std::ceil(new_br.x)), static_cast<int>(std::ceil(new_br.y))};
|
||||
|
||||
return cv::Rect(new_tl_int, new_br_int);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
FaceDetection::FaceDetection(const DetectorConfig& config) :
|
||||
BaseCnnDetection(config.is_async), m_config(config) {
|
||||
m_detectorName = "Face Detection";
|
||||
|
||||
slog::info << "Reading model: " << m_config.m_path_to_model << slog::endl;
|
||||
std::shared_ptr<ov::Model> model = m_config.m_core.read_model(m_config.m_path_to_model);
|
||||
logBasicModelInfo(model);
|
||||
|
||||
ov::Layout desiredLayout = {"NHWC"};
|
||||
|
||||
ov::OutputVector inputs_info = model->inputs();
|
||||
if (inputs_info.size() != 1) {
|
||||
throw std::runtime_error("Face Detection network should have only one input");
|
||||
}
|
||||
|
||||
ov::OutputVector outputs_info = model->outputs();
|
||||
if (outputs_info.size() != 1) {
|
||||
throw std::runtime_error("Face Detection network should have only one output");
|
||||
}
|
||||
|
||||
ov::Output<ov::Node> input = model->input();
|
||||
m_input_name = input.get_any_name();
|
||||
|
||||
ov::Layout modelLayout = ov::layout::get_layout(input);
|
||||
if (modelLayout.empty())
|
||||
modelLayout = {"NCHW"};
|
||||
|
||||
ov::Shape shape = input.get_shape();
|
||||
shape[ov::layout::height_idx(modelLayout)] = m_config.input_h;
|
||||
shape[ov::layout::width_idx(modelLayout)] = m_config.input_w;
|
||||
|
||||
ov::Output<ov::Node> output = model->output();
|
||||
m_output_name = output.get_any_name();
|
||||
|
||||
ov::Shape outputDims = output.get_shape();
|
||||
m_max_detections_count = outputDims[2];
|
||||
m_object_size = outputDims[3];
|
||||
if (m_object_size != 7) {
|
||||
throw std::runtime_error("Face Detection network output layer should have 7 as a last dimension");
|
||||
}
|
||||
if (outputDims.size() != 4) {
|
||||
throw std::runtime_error("Face Detection network output should have 4 dimensions, but had " +
|
||||
std::to_string(outputDims.size()));
|
||||
}
|
||||
|
||||
std::map<std::string, ov::PartialShape> input_shapes;
|
||||
input_shapes[input.get_any_name()] = shape;
|
||||
model->reshape(input_shapes);
|
||||
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
|
||||
ppp.input().tensor()
|
||||
.set_element_type(ov::element::u8)
|
||||
.set_layout(desiredLayout);
|
||||
ppp.input().preprocess()
|
||||
.convert_layout(modelLayout)
|
||||
.convert_element_type(ov::element::f32);
|
||||
ppp.input().model().set_layout(modelLayout);
|
||||
|
||||
model = ppp.build();
|
||||
|
||||
slog::info << "PrePostProcessor configuration:" << slog::endl;
|
||||
slog::info << ppp << slog::endl;
|
||||
|
||||
m_model = m_config.m_core.compile_model(model, m_config.m_deviceName);
|
||||
logCompiledModelInfo(m_model, m_config.m_path_to_model, m_config.m_deviceName, m_detectorName);
|
||||
}
|
||||
|
||||
void FaceDetection::submitRequest() {
|
||||
if (!m_enqueued_frames)
|
||||
return;
|
||||
m_enqueued_frames = 0;
|
||||
|
||||
BaseCnnDetection::submitRequest();
|
||||
}
|
||||
|
||||
void FaceDetection::enqueue(const cv::Mat& frame) {
|
||||
if (m_request == nullptr) {
|
||||
m_request = std::make_shared<ov::InferRequest>(m_model.create_infer_request());
|
||||
}
|
||||
|
||||
m_width = static_cast<float>(frame.cols);
|
||||
m_height = static_cast<float>(frame.rows);
|
||||
|
||||
ov::Tensor inputTensor = m_request->get_tensor(m_input_name);
|
||||
|
||||
resize2tensor(frame, inputTensor);
|
||||
|
||||
m_enqueued_frames = 1;
|
||||
}
|
||||
|
||||
DetectedObjects FaceDetection::fetchResults() {
|
||||
DetectedObjects results;
|
||||
const float* data = m_request->get_tensor(m_output_name).data<float>();
|
||||
|
||||
for (int det_id = 0; det_id < m_max_detections_count; ++det_id) {
|
||||
const int start_pos = det_id * m_object_size;
|
||||
|
||||
const float batchID = data[start_pos];
|
||||
if (batchID == SSD_EMPTY_DETECTIONS_INDICATOR) {
|
||||
break;
|
||||
}
|
||||
|
||||
const float score = std::min(std::max(0.0f, data[start_pos + 2]), 1.0f);
|
||||
const float x0 = std::min(std::max(0.0f, data[start_pos + 3]), 1.0f) * m_width;
|
||||
const float y0 = std::min(std::max(0.0f, data[start_pos + 4]), 1.0f) * m_height;
|
||||
const float x1 = std::min(std::max(0.0f, data[start_pos + 5]), 1.0f) * m_width;
|
||||
const float y1 = std::min(std::max(0.0f, data[start_pos + 6]), 1.0f) * m_height;
|
||||
|
||||
DetectedObject object;
|
||||
object.confidence = score;
|
||||
object.rect = cv::Rect(cv::Point(static_cast<int>(round(static_cast<double>(x0))),
|
||||
static_cast<int>(round(static_cast<double>(y0)))),
|
||||
cv::Point(static_cast<int>(round(static_cast<double>(x1))),
|
||||
static_cast<int>(round(static_cast<double>(y1)))));
|
||||
|
||||
|
||||
object.rect = TruncateToValidRect(IncreaseRect(object.rect,
|
||||
m_config.increase_scale_x,
|
||||
m_config.increase_scale_y),
|
||||
cv::Size(static_cast<int>(m_width), static_cast<int>(m_height)));
|
||||
|
||||
if (object.confidence > m_config.confidence_threshold && object.rect.area() > 0) {
|
||||
results.emplace_back(object);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
166
engines/OpenVINOEngine/src/faceapp/logger.cpp
Normal file
166
engines/OpenVINOEngine/src/faceapp/logger.cpp
Normal file
@@ -0,0 +1,166 @@
|
||||
// Copyright (C) 2018-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <iomanip>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
|
||||
#include "logger.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
const char unknown_label[] = "Unknown";
|
||||
|
||||
std::string GetUnknownOrLabel(const std::vector<std::string>& labels, int idx) {
|
||||
return idx >= 0 ? labels.at(idx) : unknown_label;
|
||||
}
|
||||
|
||||
std::string FrameIdxToString(const std::string& path, int frame_idx) {
|
||||
std::stringstream ss;
|
||||
ss << std::setw(6) << std::setfill('0') << frame_idx;
|
||||
return path.substr(path.rfind("/") + 1) + "@" + ss.str();
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
DetectionsLogger::DetectionsLogger(slog::LogStream& stream, bool enabled,
|
||||
const std::string& act_stat_log_file,
|
||||
const std::string& act_det_log_file) :
|
||||
m_log_stream(stream) {
|
||||
m_write_logs = enabled;
|
||||
m_act_stat_log_stream.open(act_stat_log_file, std::fstream::out);
|
||||
|
||||
if (!act_det_log_file.empty()) {
|
||||
m_act_det_log_stream.open(act_det_log_file, cv::FileStorage::WRITE);
|
||||
|
||||
m_act_det_log_stream << "data" << "[";
|
||||
}
|
||||
}
|
||||
|
||||
void DetectionsLogger::CreateNextFrameRecord(
|
||||
const std::string& path, const int frame_idx, const size_t width, const size_t height) {
|
||||
if (m_write_logs)
|
||||
m_log_stream << "Frame_name: " << path << "@" << frame_idx << " width: "
|
||||
<< width << " height: " << height << slog::endl;
|
||||
}
|
||||
|
||||
void DetectionsLogger::AddFaceToFrame(const cv::Rect& rect, const std::string& id, const std::string& action) {
|
||||
if (m_write_logs) {
|
||||
m_log_stream << "Object type: face. Box: " << rect << " id: " << id;
|
||||
if (!action.empty()) {
|
||||
m_log_stream << " action: " << action;
|
||||
}
|
||||
m_log_stream << slog::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void DetectionsLogger::AddPersonToFrame(const cv::Rect& rect, const std::string& action, const std::string& id) {
|
||||
if (m_write_logs) {
|
||||
m_log_stream << "Object type: person. Box: " << rect << " action: " << action;
|
||||
if (!id.empty()) {
|
||||
m_log_stream << " id: " << id;
|
||||
}
|
||||
m_log_stream << slog::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void DetectionsLogger::AddDetectionToFrame(const TrackedObject& object, const int frame_idx) {
|
||||
if (m_act_det_log_stream.isOpened()) {
|
||||
m_act_det_log_stream << "{" << "frame_id" << frame_idx
|
||||
<< "det_conf" << object.confidence
|
||||
<< "label" << object.label
|
||||
<< "rect" << object.rect << "}";
|
||||
}
|
||||
}
|
||||
|
||||
void DetectionsLogger::FinalizeFrameRecord() {
|
||||
if (m_write_logs) {
|
||||
m_log_stream << slog::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void DetectionsLogger::DumpDetections(const std::string& video_path,
|
||||
const cv::Size frame_size,
|
||||
const size_t num_frames,
|
||||
const std::vector<Track>& face_tracks,
|
||||
const std::map<int, int>& track_id_to_label_faces,
|
||||
const std::vector<std::string>& action_idx_to_label,
|
||||
const std::vector<std::string>& person_id_to_label,
|
||||
const std::vector<std::map<int, int>>& frame_face_obj_id_to_action_maps) {
|
||||
std::map<int, std::vector<const TrackedObject*>> frame_idx_to_face_track_objs;
|
||||
|
||||
for (const auto& tr : face_tracks) {
|
||||
for (const auto& obj : tr.objects) {
|
||||
frame_idx_to_face_track_objs[obj.frame_idx].emplace_back(&obj);
|
||||
}
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> face_label_to_action;
|
||||
for (const auto& label : person_id_to_label) {
|
||||
face_label_to_action[label] = unknown_label;
|
||||
}
|
||||
m_act_stat_log_stream << "frame_idx";
|
||||
for (const auto& label : person_id_to_label) {
|
||||
m_act_stat_log_stream << "," << label;
|
||||
}
|
||||
m_act_stat_log_stream << std::endl;
|
||||
|
||||
for (size_t i = 0; i < num_frames; i++) {
|
||||
CreateNextFrameRecord(video_path, i, frame_size.width, frame_size.height);
|
||||
const auto& frame_face_obj_id_to_action = frame_face_obj_id_to_action_maps.at(i);
|
||||
for (auto& kv : face_label_to_action) {
|
||||
kv.second = unknown_label;
|
||||
}
|
||||
|
||||
for (const auto& p_obj : frame_idx_to_face_track_objs[i]) {
|
||||
const auto& obj = *p_obj;
|
||||
std::string action_label = unknown_label;
|
||||
if (frame_face_obj_id_to_action.count(obj.object_id) > 0) {
|
||||
action_label = GetUnknownOrLabel(action_idx_to_label, frame_face_obj_id_to_action.at(obj.object_id));
|
||||
}
|
||||
std::string face_label = GetUnknownOrLabel(person_id_to_label, track_id_to_label_faces.at(obj.object_id));
|
||||
face_label_to_action[face_label] = action_label;
|
||||
AddFaceToFrame(obj.rect, face_label, action_label);
|
||||
}
|
||||
|
||||
m_act_stat_log_stream << FrameIdxToString(video_path, i);
|
||||
for (const auto& label : person_id_to_label) {
|
||||
m_act_stat_log_stream << "," << face_label_to_action[label];
|
||||
}
|
||||
m_act_stat_log_stream << std::endl;
|
||||
|
||||
FinalizeFrameRecord();
|
||||
}
|
||||
}
|
||||
|
||||
void DetectionsLogger::DumpTracks(const std::map<int, RangeEventsTrack>& obj_id_to_events,
|
||||
const std::vector<std::string>& action_idx_to_label,
|
||||
const std::map<int, int>& track_id_to_label_faces,
|
||||
const std::vector<std::string>& person_id_to_label) {
|
||||
for (const auto& tup : obj_id_to_events) {
|
||||
const int obj_id = tup.first;
|
||||
if (track_id_to_label_faces.count(obj_id) > 0) {
|
||||
const auto& events = tup.second;
|
||||
|
||||
std::string face_label = GetUnknownOrLabel(person_id_to_label, track_id_to_label_faces.at(obj_id));
|
||||
m_log_stream << "Person: " << face_label << slog::endl;
|
||||
|
||||
for (const auto& event : events) {
|
||||
std::string action_label = GetUnknownOrLabel(action_idx_to_label, event.action);
|
||||
m_log_stream << " - " << action_label
|
||||
<< ": from " << event.begin_frame_id
|
||||
<< " to " << event.end_frame_id
|
||||
<< " frames" << slog::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DetectionsLogger::~DetectionsLogger() {
|
||||
if (m_act_det_log_stream.isOpened()) {
|
||||
m_act_det_log_stream << "]";
|
||||
}
|
||||
}
|
||||
186
engines/OpenVINOEngine/src/faceapp/reid_gallery.cpp
Normal file
186
engines/OpenVINOEngine/src/faceapp/reid_gallery.cpp
Normal file
@@ -0,0 +1,186 @@
|
||||
// Copyright (C) 2018-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <limits>
|
||||
|
||||
#include <opencv2/opencv.hpp>
|
||||
|
||||
#include "face_reid.hpp"
|
||||
#include "tracker/tracker.hpp"
|
||||
|
||||
namespace {
|
||||
float ComputeReidDistance(const cv::Mat& descr1, const cv::Mat& descr2) {
|
||||
float xy = static_cast<float>(descr1.dot(descr2));
|
||||
float xx = static_cast<float>(descr1.dot(descr1));
|
||||
float yy = static_cast<float>(descr2.dot(descr2));
|
||||
float norm = sqrt(xx * yy) + 1e-6f;
|
||||
return 1.0f - xy / norm;
|
||||
}
|
||||
|
||||
bool file_exists(const std::string& name) {
|
||||
std::ifstream f(name.c_str());
|
||||
return f.good();
|
||||
}
|
||||
|
||||
inline char separator() {
|
||||
#ifdef _WIN32
|
||||
return '\\';
|
||||
#else
|
||||
return '/';
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string folder_name(const std::string& path) {
|
||||
size_t found_pos;
|
||||
found_pos = path.find_last_of(separator());
|
||||
if (found_pos != std::string::npos)
|
||||
return path.substr(0, found_pos);
|
||||
return std::string(".") + separator();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
const char EmbeddingsGallery::unknown_label[] = "Unknown";
|
||||
const int EmbeddingsGallery::unknown_id = TrackedObject::UNKNOWN_LABEL_IDX;
|
||||
|
||||
EmbeddingsGallery::EmbeddingsGallery(const std::string& ids_list,
|
||||
double threshold, int min_size_fr,
|
||||
bool crop_gallery, const detection::DetectorConfig& detector_config,
|
||||
VectorCNN& landmarks_det,
|
||||
VectorCNN& image_reid,
|
||||
bool use_greedy_matcher) :
|
||||
reid_threshold(threshold), use_greedy_matcher(use_greedy_matcher) {
|
||||
if (ids_list.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
detection::FaceDetection detector(detector_config);
|
||||
|
||||
cv::FileStorage fs(ids_list, cv::FileStorage::Mode::READ);
|
||||
cv::FileNode fn = fs.root();
|
||||
int id = 0;
|
||||
for (cv::FileNodeIterator fit = fn.begin(); fit != fn.end(); ++fit) {
|
||||
cv::FileNode item = *fit;
|
||||
std::string label = item.name();
|
||||
std::vector<cv::Mat> embeddings;
|
||||
|
||||
// Please, note that the case when there are more than one image in gallery
|
||||
// for a person might not work properly with the current implementation
|
||||
// of the demo.
|
||||
// Remove this assert by your own risk.
|
||||
CV_Assert(item.size() == 1);
|
||||
|
||||
for (size_t i = 0; i < item.size(); i++) {
|
||||
std::string path;
|
||||
if (file_exists(item[i].string())) {
|
||||
path = item[i].string();
|
||||
} else {
|
||||
path = folder_name(ids_list) + separator() + item[i].string();
|
||||
}
|
||||
|
||||
cv::Mat image = cv::imread(path);
|
||||
CV_Assert(!image.empty());
|
||||
cv::Mat emb;
|
||||
RegistrationStatus status = RegisterIdentity(label, image, min_size_fr, crop_gallery, detector, landmarks_det, image_reid, emb);
|
||||
if (status == RegistrationStatus::SUCCESS) {
|
||||
embeddings.push_back(emb);
|
||||
idx_to_id.push_back(id);
|
||||
identities.emplace_back(embeddings, label, id);
|
||||
++id;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> EmbeddingsGallery::GetIDsByEmbeddings(const std::vector<cv::Mat>& embeddings) const {
|
||||
if (embeddings.empty() || idx_to_id.empty())
|
||||
return std::vector<int>(embeddings.size(), unknown_id);
|
||||
|
||||
cv::Mat distances(static_cast<int>(embeddings.size()), static_cast<int>(idx_to_id.size()), CV_32F);
|
||||
|
||||
for (int i = 0; i < distances.rows; i++) {
|
||||
int k = 0;
|
||||
for (size_t j = 0; j < identities.size(); j++) {
|
||||
for (const auto& reference_emb : identities[j].embeddings) {
|
||||
distances.at<float>(i, k) = ComputeReidDistance(embeddings[i], reference_emb);
|
||||
k++;
|
||||
}
|
||||
}
|
||||
}
|
||||
KuhnMunkres matcher(use_greedy_matcher);
|
||||
auto matched_idx = matcher.Solve(distances);
|
||||
std::vector<int> output_ids;
|
||||
for (auto col_idx : matched_idx) {
|
||||
if (int(col_idx) == -1) {
|
||||
output_ids.push_back(unknown_id);
|
||||
continue;
|
||||
}
|
||||
if (distances.at<float>(output_ids.size(), col_idx) > reid_threshold)
|
||||
output_ids.push_back(unknown_id);
|
||||
else
|
||||
output_ids.push_back(idx_to_id[col_idx]);
|
||||
}
|
||||
return output_ids;
|
||||
}
|
||||
|
||||
std::string EmbeddingsGallery::GetLabelByID(int id) const {
|
||||
if (id >= 0 && id < static_cast<int>(identities.size()))
|
||||
return identities[id].label;
|
||||
else
|
||||
return unknown_label;
|
||||
}
|
||||
|
||||
size_t EmbeddingsGallery::size() const {
|
||||
return identities.size();
|
||||
}
|
||||
|
||||
std::vector<std::string> EmbeddingsGallery::GetIDToLabelMap() const {
|
||||
std::vector<std::string> map;
|
||||
map.reserve(identities.size());
|
||||
for (const auto& item : identities) {
|
||||
map.emplace_back(item.label);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
bool EmbeddingsGallery::LabelExists(const std::string& label) const {
|
||||
return identities.end() != std::find_if(identities.begin(), identities.end(),
|
||||
[label](const GalleryObject& o){return o.label == label;});
|
||||
}
|
||||
|
||||
RegistrationStatus EmbeddingsGallery::RegisterIdentity(const std::string& identity_label,
|
||||
const cv::Mat& image,
|
||||
int min_size_fr, bool crop_gallery,
|
||||
detection::FaceDetection& detector,
|
||||
VectorCNN& landmarks_det,
|
||||
VectorCNN& image_reid,
|
||||
cv::Mat& embedding) {
|
||||
cv::Mat target = image;
|
||||
if (crop_gallery) {
|
||||
detector.enqueue(image);
|
||||
detector.submitRequest();
|
||||
detector.wait();
|
||||
detection::DetectedObjects faces = detector.fetchResults();
|
||||
if (faces.size() == 0) {
|
||||
return RegistrationStatus::FAILURE_NOT_DETECTED;
|
||||
}
|
||||
CV_Assert(faces.size() == 1);
|
||||
cv::Mat face_roi = image(faces[0].rect);
|
||||
target = face_roi;
|
||||
}
|
||||
if ((target.rows < min_size_fr) && (target.cols < min_size_fr)) {
|
||||
return RegistrationStatus::FAILURE_LOW_QUALITY;
|
||||
}
|
||||
cv::Mat landmarks;
|
||||
landmarks_det.Compute(target, &landmarks, cv::Size(2, 5));
|
||||
std::vector<cv::Mat> images = { target };
|
||||
std::vector<cv::Mat> landmarks_vec = { landmarks };
|
||||
AlignFaces(&images, &landmarks_vec);
|
||||
image_reid.Compute(images[0], &embedding);
|
||||
return RegistrationStatus::SUCCESS;
|
||||
}
|
||||
@@ -0,0 +1,201 @@
|
||||
/*
|
||||
// Copyright (C) 2021-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "models/associative_embedding_decoder.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
#include <vector>
|
||||
|
||||
#include <utils/kuhn_munkres.hpp>
|
||||
|
||||
void findPeaks(const std::vector<cv::Mat>& nmsHeatMaps,
|
||||
const std::vector<cv::Mat>& aembdsMaps,
|
||||
std::vector<std::vector<Peak>>& allPeaks,
|
||||
size_t jointId,
|
||||
size_t maxNumPeople,
|
||||
float detectionThreshold) {
|
||||
const cv::Mat& nmsHeatMap = nmsHeatMaps[jointId];
|
||||
const float* heatMapData = nmsHeatMap.ptr<float>();
|
||||
cv::Size outputSize = nmsHeatMap.size();
|
||||
|
||||
std::vector<int> indices(outputSize.area());
|
||||
std::iota(std::begin(indices), std::end(indices), 0);
|
||||
std::partial_sort(std::begin(indices),
|
||||
std::begin(indices) + maxNumPeople,
|
||||
std::end(indices),
|
||||
[heatMapData](int l, int r) {
|
||||
return heatMapData[l] > heatMapData[r];
|
||||
});
|
||||
|
||||
for (size_t personId = 0; personId < maxNumPeople; personId++) {
|
||||
int index = indices[personId];
|
||||
int x = index / outputSize.width;
|
||||
int y = index % outputSize.width;
|
||||
float tag = aembdsMaps[jointId].at<float>(x, y);
|
||||
float score = heatMapData[index];
|
||||
allPeaks[jointId].reserve(maxNumPeople);
|
||||
if (score > detectionThreshold) {
|
||||
allPeaks[jointId].emplace_back(Peak{cv::Point2f(static_cast<float>(x), static_cast<float>(y)), score, tag});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Pose> matchByTag(std::vector<std::vector<Peak>>& allPeaks,
|
||||
size_t maxNumPeople,
|
||||
size_t numJoints,
|
||||
float tagThreshold) {
|
||||
size_t jointOrder[]{0, 1, 2, 3, 4, 5, 6, 11, 12, 7, 8, 9, 10, 13, 14, 15, 16};
|
||||
std::vector<Pose> allPoses;
|
||||
for (size_t jointId : jointOrder) {
|
||||
std::vector<Peak>& jointPeaks = allPeaks[jointId];
|
||||
std::vector<float> tags;
|
||||
for (auto& peak : jointPeaks) {
|
||||
tags.push_back(peak.tag);
|
||||
}
|
||||
if (allPoses.empty()) {
|
||||
for (size_t personId = 0; personId < jointPeaks.size(); personId++) {
|
||||
Peak peak = jointPeaks[personId];
|
||||
Pose pose = Pose(numJoints);
|
||||
pose.add(jointId, peak);
|
||||
allPoses.push_back(pose);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (jointPeaks.empty() || (allPoses.size() == maxNumPeople)) {
|
||||
continue;
|
||||
}
|
||||
std::vector<float> posesTags;
|
||||
std::vector<cv::Point2f> posesCenters;
|
||||
for (auto& pose : allPoses) {
|
||||
posesTags.push_back(pose.getPoseTag());
|
||||
posesCenters.push_back(pose.getPoseCenter());
|
||||
}
|
||||
size_t numAdded = tags.size();
|
||||
size_t numGrouped = posesTags.size();
|
||||
cv::Mat tagsDiff(numAdded, numGrouped, CV_32F);
|
||||
cv::Mat matchingCost(numAdded, numGrouped, CV_32F);
|
||||
std::vector<float> dists(numAdded);
|
||||
for (size_t j = 0; j < numGrouped; j++) {
|
||||
float minDist = std::numeric_limits<float>::max();
|
||||
// Compute euclidean distance (in spatial space) between the pose center and all joints.
|
||||
const cv::Point2f center = posesCenters.at(j);
|
||||
for (size_t i = 0; i < numAdded; i++) {
|
||||
cv::Point2f v = jointPeaks.at(i).keypoint - center;
|
||||
float dist = std::sqrt(v.x * v.x + v.y * v.y);
|
||||
dists[i] = dist;
|
||||
minDist = std::min(dist, minDist);
|
||||
}
|
||||
// Compute semantic distance (in embedding space) between the pose tag and all joints
|
||||
// and corresponding matching costs.
|
||||
auto poseTag = posesTags[j];
|
||||
for (size_t i = 0; i < numAdded; i++) {
|
||||
float diff = static_cast<float>(cv::norm(tags[i] - poseTag));
|
||||
tagsDiff.at<float>(i, j) = diff;
|
||||
if (diff < tagThreshold) {
|
||||
diff *= dists[i] / (minDist + 1e-10f);
|
||||
}
|
||||
matchingCost.at<float>(i, j) = std::round(diff) * 100 - jointPeaks[i].score;
|
||||
}
|
||||
}
|
||||
|
||||
if (numAdded > numGrouped) {
|
||||
cv::copyMakeBorder(matchingCost,
|
||||
matchingCost,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
numAdded - numGrouped,
|
||||
cv::BORDER_CONSTANT,
|
||||
10000000);
|
||||
}
|
||||
// Get pairs
|
||||
auto res = KuhnMunkres().Solve(matchingCost);
|
||||
for (size_t row = 0; row < res.size(); row++) {
|
||||
size_t col = res[row];
|
||||
if (row < numAdded && col < numGrouped && tagsDiff.at<float>(row, col) < tagThreshold) {
|
||||
allPoses[col].add(jointId, jointPeaks[row]);
|
||||
} else {
|
||||
Pose pose = Pose(numJoints);
|
||||
pose.add(jointId, jointPeaks[row]);
|
||||
allPoses.push_back(pose);
|
||||
}
|
||||
}
|
||||
}
|
||||
return allPoses;
|
||||
}
|
||||
|
||||
namespace {
|
||||
cv::Point2f adjustLocation(const int x, const int y, const cv::Mat& heatMap) {
|
||||
cv::Point2f delta(0.f, 0.f);
|
||||
int width = heatMap.cols;
|
||||
int height = heatMap.rows;
|
||||
if ((1 < x) && (x < width - 1) && (1 < y) && (y < height - 1)) {
|
||||
auto diffX = heatMap.at<float>(y, x + 1) - heatMap.at<float>(y, x - 1);
|
||||
auto diffY = heatMap.at<float>(y + 1, x) - heatMap.at<float>(y - 1, x);
|
||||
delta.x = diffX > 0 ? 0.25f : -0.25f;
|
||||
delta.y = diffY > 0 ? 0.25f : -0.25f;
|
||||
}
|
||||
return delta;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void adjustAndRefine(std::vector<Pose>& allPoses,
|
||||
const std::vector<cv::Mat>& heatMaps,
|
||||
const std::vector<cv::Mat>& aembdsMaps,
|
||||
int poseId,
|
||||
const float delta) {
|
||||
Pose& pose = allPoses[poseId];
|
||||
float poseTag = pose.getPoseTag();
|
||||
for (size_t jointId = 0; jointId < pose.size(); jointId++) {
|
||||
Peak& peak = pose.getPeak(jointId);
|
||||
const cv::Mat& heatMap = heatMaps[jointId];
|
||||
const cv::Mat& aembds = aembdsMaps[jointId];
|
||||
|
||||
if (peak.score > 0) {
|
||||
// Adjust
|
||||
int x = static_cast<int>(peak.keypoint.x);
|
||||
int y = static_cast<int>(peak.keypoint.y);
|
||||
peak.keypoint += adjustLocation(x, y, heatMap);
|
||||
if (delta) {
|
||||
peak.keypoint.x += delta;
|
||||
peak.keypoint.y += delta;
|
||||
}
|
||||
} else {
|
||||
// Refine
|
||||
// Get position with the closest tag value to the pose tag
|
||||
cv::Mat diff = cv::abs(aembds - poseTag);
|
||||
diff.convertTo(diff, CV_32S, 1.0, 0.0);
|
||||
diff.convertTo(diff, CV_32F);
|
||||
diff -= heatMap;
|
||||
double min;
|
||||
cv::Point2i minLoc;
|
||||
cv::minMaxLoc(diff, &min, 0, &minLoc);
|
||||
int x = minLoc.x;
|
||||
int y = minLoc.y;
|
||||
float val = heatMap.at<float>(y, x);
|
||||
if (val > 0) {
|
||||
peak.keypoint.x = static_cast<float>(x);
|
||||
peak.keypoint.y = static_cast<float>(y);
|
||||
peak.keypoint += adjustLocation(x, y, heatMap);
|
||||
// Peak score is assigned directly, so it does not affect the pose score.
|
||||
peak.score = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
196
engines/OpenVINOEngine/src/models/classification_model.cpp
Normal file
196
engines/OpenVINOEngine/src/models/classification_model.cpp
Normal file
@@ -0,0 +1,196 @@
|
||||
/*
|
||||
// Copyright (C) 2020-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "models/classification_model.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <iterator>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <openvino/op/softmax.hpp>
|
||||
#include <openvino/op/topk.hpp>
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
#include <utils/slog.hpp>
|
||||
|
||||
#include "models/results.h"
|
||||
|
||||
ClassificationModel::ClassificationModel(const std::string& modelFileName,
|
||||
size_t nTop,
|
||||
bool useAutoResize,
|
||||
const std::vector<std::string>& labels,
|
||||
const std::string& layout)
|
||||
: ImageModel(modelFileName, useAutoResize, layout),
|
||||
nTop(nTop),
|
||||
labels(labels) {}
|
||||
|
||||
std::unique_ptr<ResultBase> ClassificationModel::postprocess(InferenceResult& infResult) {
|
||||
const ov::Tensor& indicesTensor = infResult.outputsData.find(outputsNames[0])->second;
|
||||
const int* indicesPtr = indicesTensor.data<int>();
|
||||
const ov::Tensor& scoresTensor = infResult.outputsData.find(outputsNames[1])->second;
|
||||
const float* scoresPtr = scoresTensor.data<float>();
|
||||
|
||||
ClassificationResult* result = new ClassificationResult(infResult.frameId, infResult.metaData);
|
||||
auto retVal = std::unique_ptr<ResultBase>(result);
|
||||
|
||||
result->topLabels.reserve(scoresTensor.get_size());
|
||||
for (size_t i = 0; i < scoresTensor.get_size(); ++i) {
|
||||
int ind = indicesPtr[i];
|
||||
if (ind < 0 || ind >= static_cast<int>(labels.size())) {
|
||||
throw std::runtime_error("Invalid index for the class label is found during postprocessing");
|
||||
}
|
||||
result->topLabels.emplace_back(ind, labels[ind], scoresPtr[i]);
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
std::vector<std::string> ClassificationModel::loadLabels(const std::string& labelFilename) {
|
||||
std::vector<std::string> labels;
|
||||
|
||||
/* Read labels */
|
||||
std::ifstream inputFile(labelFilename);
|
||||
if (!inputFile.is_open())
|
||||
throw std::runtime_error("Can't open the labels file: " + labelFilename);
|
||||
std::string labelsLine;
|
||||
while (std::getline(inputFile, labelsLine)) {
|
||||
size_t labelBeginIdx = labelsLine.find(' ');
|
||||
size_t labelEndIdx = labelsLine.find(','); // can be npos when class has only one label
|
||||
if (labelBeginIdx == std::string::npos) {
|
||||
throw std::runtime_error("The labels file has incorrect format.");
|
||||
}
|
||||
labels.push_back(labelsLine.substr(labelBeginIdx + 1, labelEndIdx - (labelBeginIdx + 1)));
|
||||
}
|
||||
if (labels.empty())
|
||||
throw std::logic_error("File is empty: " + labelFilename);
|
||||
|
||||
return labels;
|
||||
}
|
||||
|
||||
void ClassificationModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
|
||||
// --------------------------- Configure input & output -------------------------------------------------
|
||||
// --------------------------- Prepare input ------------------------------------------------------
|
||||
if (model->inputs().size() != 1) {
|
||||
throw std::logic_error("Classification model wrapper supports topologies with only 1 input");
|
||||
}
|
||||
const auto& input = model->input();
|
||||
inputsNames.push_back(input.get_any_name());
|
||||
|
||||
const ov::Shape& inputShape = input.get_shape();
|
||||
const ov::Layout& inputLayout = getInputLayout(input);
|
||||
|
||||
if (inputShape.size() != 4 || inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
|
||||
throw std::logic_error("3-channel 4-dimensional model's input is expected");
|
||||
}
|
||||
|
||||
const auto width = inputShape[ov::layout::width_idx(inputLayout)];
|
||||
const auto height = inputShape[ov::layout::height_idx(inputLayout)];
|
||||
if (height != width) {
|
||||
throw std::logic_error("Model input has incorrect image shape. Must be NxN square."
|
||||
" Got " +
|
||||
std::to_string(height) + "x" + std::to_string(width) + ".");
|
||||
}
|
||||
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
inputTransform.setPrecision(ppp, model->input().get_any_name());
|
||||
ppp.input().tensor().set_layout({ "NHWC" });
|
||||
|
||||
if (useAutoResize) {
|
||||
ppp.input().tensor().set_spatial_dynamic_shape();
|
||||
|
||||
ppp.input()
|
||||
.preprocess()
|
||||
.convert_element_type(ov::element::f32)
|
||||
.resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
|
||||
}
|
||||
ppp.input().model().set_layout(inputLayout);
|
||||
|
||||
// --------------------------- Prepare output -----------------------------------------------------
|
||||
if (model->outputs().size() != 1) {
|
||||
throw std::logic_error("Classification model wrapper supports topologies with only 1 output");
|
||||
}
|
||||
|
||||
const ov::Shape& outputShape = model->output().get_shape();
|
||||
if (outputShape.size() != 2 && outputShape.size() != 4) {
|
||||
throw std::logic_error("Classification model wrapper supports topologies only with"
|
||||
" 2-dimensional or 4-dimensional output");
|
||||
}
|
||||
|
||||
const ov::Layout outputLayout("NCHW");
|
||||
if (outputShape.size() == 4 && (outputShape[ov::layout::height_idx(outputLayout)] != 1 ||
|
||||
outputShape[ov::layout::width_idx(outputLayout)] != 1)) {
|
||||
throw std::logic_error("Classification model wrapper supports topologies only"
|
||||
" with 4-dimensional output which has last two dimensions of size 1");
|
||||
}
|
||||
|
||||
size_t classesNum = outputShape[ov::layout::channels_idx(outputLayout)];
|
||||
if (nTop > classesNum) {
|
||||
throw std::logic_error("The model provides " + std::to_string(classesNum) + " classes, but " +
|
||||
std::to_string(nTop) + " labels are requested to be predicted");
|
||||
}
|
||||
if (classesNum == labels.size() + 1) {
|
||||
labels.insert(labels.begin(), "other");
|
||||
slog::warn << "Inserted 'other' label as first." << slog::endl;
|
||||
} else if (classesNum != labels.size()) {
|
||||
throw std::logic_error("Model's number of classes and parsed labels must match (" +
|
||||
std::to_string(outputShape[1]) + " and " + std::to_string(labels.size()) + ')');
|
||||
}
|
||||
|
||||
ppp.output().tensor().set_element_type(ov::element::f32);
|
||||
model = ppp.build();
|
||||
|
||||
// --------------------------- Adding softmax and topK output ---------------------------
|
||||
auto nodes = model->get_ops();
|
||||
auto softmaxNodeIt = std::find_if(std::begin(nodes), std::end(nodes), [](const std::shared_ptr<ov::Node>& op) {
|
||||
return std::string(op->get_type_name()) == "Softmax";
|
||||
});
|
||||
|
||||
std::shared_ptr<ov::Node> softmaxNode;
|
||||
if (softmaxNodeIt == nodes.end()) {
|
||||
auto logitsNode = model->get_output_op(0)->input(0).get_source_output().get_node();
|
||||
softmaxNode = std::make_shared<ov::op::v1::Softmax>(logitsNode->output(0), 1);
|
||||
} else {
|
||||
softmaxNode = *softmaxNodeIt;
|
||||
}
|
||||
const auto k = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{}, std::vector<size_t>{nTop});
|
||||
std::shared_ptr<ov::Node> topkNode = std::make_shared<ov::op::v3::TopK>(softmaxNode,
|
||||
k,
|
||||
1,
|
||||
ov::op::v3::TopK::Mode::MAX,
|
||||
ov::op::v3::TopK::SortType::SORT_VALUES);
|
||||
|
||||
auto indices = std::make_shared<ov::op::v0::Result>(topkNode->output(0));
|
||||
auto scores = std::make_shared<ov::op::v0::Result>(topkNode->output(1));
|
||||
ov::ResultVector res({scores, indices});
|
||||
model = std::make_shared<ov::Model>(res, model->get_parameters(), "classification");
|
||||
|
||||
// manually set output tensors name for created topK node
|
||||
model->outputs()[0].set_names({"indices"});
|
||||
outputsNames.push_back("indices");
|
||||
model->outputs()[1].set_names({"scores"});
|
||||
outputsNames.push_back("scores");
|
||||
|
||||
// set output precisions
|
||||
ppp = ov::preprocess::PrePostProcessor(model);
|
||||
ppp.output("indices").tensor().set_element_type(ov::element::i32);
|
||||
ppp.output("scores").tensor().set_element_type(ov::element::f32);
|
||||
model = ppp.build();
|
||||
}
|
||||
54
engines/OpenVINOEngine/src/models/detection_model.cpp
Normal file
54
engines/OpenVINOEngine/src/models/detection_model.cpp
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
// Copyright (C) 2020-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "models/detection_model.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "models/image_model.h"
|
||||
|
||||
DetectionModel::DetectionModel() {
|
||||
}
|
||||
DetectionModel::DetectionModel(const std::string& modelFileName,
|
||||
float confidenceThreshold,
|
||||
bool useAutoResize,
|
||||
const std::vector<std::string>& labels,
|
||||
const std::string& layout)
|
||||
: ImageModel(modelFileName, useAutoResize, layout),
|
||||
confidenceThreshold(confidenceThreshold),
|
||||
labels(labels) {}
|
||||
|
||||
std::vector<std::string> DetectionModel::loadLabels(const std::string& labelFilename) {
|
||||
std::vector<std::string> labelsList;
|
||||
|
||||
/* Read labels (if any) */
|
||||
if (!labelFilename.empty()) {
|
||||
std::ifstream inputFile(labelFilename);
|
||||
if (!inputFile.is_open())
|
||||
throw std::runtime_error("Can't open the labels file: " + labelFilename);
|
||||
std::string label;
|
||||
while (std::getline(inputFile, label)) {
|
||||
labelsList.push_back(label);
|
||||
}
|
||||
if (labelsList.empty())
|
||||
throw std::logic_error("File is empty: " + labelFilename);
|
||||
}
|
||||
|
||||
return labelsList;
|
||||
}
|
||||
302
engines/OpenVINOEngine/src/models/detection_model_centernet.cpp
Normal file
302
engines/OpenVINOEngine/src/models/detection_model_centernet.cpp
Normal file
@@ -0,0 +1,302 @@
|
||||
/*
|
||||
// Copyright (C) 2020-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "models/detection_model_centernet.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
#include <utility>
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
#include <utils/common.hpp>
|
||||
#include <utils/image_utils.h>
|
||||
#include <utils/ocv_common.hpp>
|
||||
|
||||
#include "models/input_data.h"
|
||||
#include "models/internal_model_data.h"
|
||||
#include "models/results.h"
|
||||
|
||||
ModelCenterNet::ModelCenterNet(const std::string& modelFileName,
|
||||
float confidenceThreshold,
|
||||
const std::vector<std::string>& labels,
|
||||
const std::string& layout)
|
||||
: DetectionModel(modelFileName, confidenceThreshold, false, labels, layout) {}
|
||||
|
||||
void ModelCenterNet::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
|
||||
// --------------------------- Configure input & output -------------------------------------------------
|
||||
// --------------------------- Prepare input ------------------------------------------------------
|
||||
if (model->inputs().size() != 1) {
|
||||
throw std::logic_error("CenterNet model wrapper expects models that have only 1 input");
|
||||
}
|
||||
|
||||
const ov::Shape& inputShape = model->input().get_shape();
|
||||
const ov::Layout& inputLayout = getInputLayout(model->input());
|
||||
|
||||
if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
|
||||
throw std::logic_error("Expected 3-channel input");
|
||||
}
|
||||
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
inputTransform.setPrecision(ppp, model->input().get_any_name());
|
||||
ppp.input().tensor().set_layout("NHWC");
|
||||
|
||||
ppp.input().model().set_layout(inputLayout);
|
||||
|
||||
// --------------------------- Reading image input parameters -------------------------------------------
|
||||
inputsNames.push_back(model->input().get_any_name());
|
||||
netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
|
||||
netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
|
||||
|
||||
// --------------------------- Prepare output -----------------------------------------------------
|
||||
if (model->outputs().size() != 3) {
|
||||
throw std::logic_error("CenterNet model wrapper expects models that have 3 outputs");
|
||||
}
|
||||
|
||||
const ov::Layout outLayout{"NCHW"};
|
||||
for (const auto& output : model->outputs()) {
|
||||
auto outTensorName = output.get_any_name();
|
||||
outputsNames.push_back(outTensorName);
|
||||
ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outLayout);
|
||||
}
|
||||
std::sort(outputsNames.begin(), outputsNames.end());
|
||||
model = ppp.build();
|
||||
}
|
||||
|
||||
cv::Point2f getDir(const cv::Point2f& srcPoint, float rotRadius) {
|
||||
float sn = sinf(rotRadius);
|
||||
float cs = cosf(rotRadius);
|
||||
|
||||
cv::Point2f srcResult(0.0f, 0.0f);
|
||||
srcResult.x = srcPoint.x * cs - srcPoint.y * sn;
|
||||
srcResult.y = srcPoint.x * sn + srcPoint.y * cs;
|
||||
|
||||
return srcResult;
|
||||
}
|
||||
|
||||
cv::Point2f get3rdPoint(const cv::Point2f& a, const cv::Point2f& b) {
|
||||
cv::Point2f direct = a - b;
|
||||
return b + cv::Point2f(-direct.y, direct.x);
|
||||
}
|
||||
|
||||
cv::Mat getAffineTransform(float centerX,
|
||||
float centerY,
|
||||
int srcW,
|
||||
float rot,
|
||||
size_t outputWidth,
|
||||
size_t outputHeight,
|
||||
bool inv = false) {
|
||||
float rotRad = static_cast<float>(CV_PI) * rot / 180.0f;
|
||||
auto srcDir = getDir({0.0f, -0.5f * srcW}, rotRad);
|
||||
cv::Point2f dstDir(0.0f, -0.5f * outputWidth);
|
||||
std::vector<cv::Point2f> src(3, {0.0f, 0.0f});
|
||||
std::vector<cv::Point2f> dst(3, {0.0f, 0.0f});
|
||||
|
||||
src[0] = {centerX, centerY};
|
||||
src[1] = srcDir + src[0];
|
||||
src[2] = get3rdPoint(src[0], src[1]);
|
||||
|
||||
dst[0] = {outputWidth * 0.5f, outputHeight * 0.5f};
|
||||
dst[1] = dst[0] + dstDir;
|
||||
dst[2] = get3rdPoint(dst[0], dst[1]);
|
||||
|
||||
cv::Mat trans;
|
||||
if (inv) {
|
||||
trans = cv::getAffineTransform(dst, src);
|
||||
} else {
|
||||
trans = cv::getAffineTransform(src, dst);
|
||||
}
|
||||
|
||||
return trans;
|
||||
}
|
||||
|
||||
std::shared_ptr<InternalModelData> ModelCenterNet::preprocess(const InputData& inputData, ov::InferRequest& request) {
|
||||
auto& img = inputData.asRef<ImageInputData>().inputImage;
|
||||
const auto& resizedImg = resizeImageExt(img, netInputWidth, netInputHeight, RESIZE_KEEP_ASPECT_LETTERBOX);
|
||||
|
||||
request.set_input_tensor(wrapMat2Tensor(inputTransform(resizedImg)));
|
||||
return std::make_shared<InternalImageModelData>(img.cols, img.rows);
|
||||
}
|
||||
|
||||
std::vector<std::pair<size_t, float>> nms(float* scoresPtr, const ov::Shape& shape, float threshold, int kernel = 3) {
|
||||
std::vector<std::pair<size_t, float>> scores;
|
||||
scores.reserve(ModelCenterNet::INIT_VECTOR_SIZE);
|
||||
auto chSize = shape[2] * shape[3];
|
||||
|
||||
for (size_t i = 0; i < shape[1] * shape[2] * shape[3]; ++i) {
|
||||
scoresPtr[i] = expf(scoresPtr[i]) / (1 + expf(scoresPtr[i]));
|
||||
}
|
||||
|
||||
for (size_t ch = 0; ch < shape[1]; ++ch) {
|
||||
for (size_t w = 0; w < shape[2]; ++w) {
|
||||
for (size_t h = 0; h < shape[3]; ++h) {
|
||||
float max = scoresPtr[chSize * ch + shape[2] * w + h];
|
||||
|
||||
// --------------------- filter on threshold--------------------------------------
|
||||
if (max < threshold) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// --------------------- store index and score------------------------------------
|
||||
scores.push_back({chSize * ch + shape[2] * w + h, max});
|
||||
|
||||
bool next = true;
|
||||
// ---------------------- maxpool2d -----------------------------------------------
|
||||
for (int i = -kernel / 2; i < kernel / 2 + 1 && next; ++i) {
|
||||
for (int j = -kernel / 2; j < kernel / 2 + 1; ++j) {
|
||||
if (w + i >= 0 && w + i < shape[2] && h + j >= 0 && h + j < shape[3]) {
|
||||
if (scoresPtr[chSize * ch + shape[2] * (w + i) + h + j] > max) {
|
||||
scores.pop_back();
|
||||
next = false;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (max < 0) {
|
||||
scores.pop_back();
|
||||
next = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return scores;
|
||||
}
|
||||
|
||||
static std::vector<std::pair<size_t, float>> filterScores(const ov::Tensor& scoresTensor, float threshold) {
|
||||
auto shape = scoresTensor.get_shape();
|
||||
float* scoresPtr = scoresTensor.data<float>();
|
||||
|
||||
return nms(scoresPtr, shape, threshold);
|
||||
}
|
||||
|
||||
std::vector<std::pair<float, float>> filterReg(const ov::Tensor& regressionTensor,
|
||||
const std::vector<std::pair<size_t, float>>& scores,
|
||||
size_t chSize) {
|
||||
const float* regPtr = regressionTensor.data<float>();
|
||||
std::vector<std::pair<float, float>> reg;
|
||||
|
||||
for (auto s : scores) {
|
||||
reg.push_back({regPtr[s.first % chSize], regPtr[chSize + s.first % chSize]});
|
||||
}
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
std::vector<std::pair<float, float>> filterWH(const ov::Tensor& whTensor,
|
||||
const std::vector<std::pair<size_t, float>>& scores,
|
||||
size_t chSize) {
|
||||
const float* whPtr = whTensor.data<float>();
|
||||
std::vector<std::pair<float, float>> wh;
|
||||
|
||||
for (auto s : scores) {
|
||||
wh.push_back({whPtr[s.first % chSize], whPtr[chSize + s.first % chSize]});
|
||||
}
|
||||
|
||||
return wh;
|
||||
}
|
||||
|
||||
std::vector<ModelCenterNet::BBox> calcBoxes(const std::vector<std::pair<size_t, float>>& scores,
|
||||
const std::vector<std::pair<float, float>>& reg,
|
||||
const std::vector<std::pair<float, float>>& wh,
|
||||
const ov::Shape& shape) {
|
||||
std::vector<ModelCenterNet::BBox> boxes(scores.size());
|
||||
|
||||
for (size_t i = 0; i < boxes.size(); ++i) {
|
||||
size_t chIdx = scores[i].first % (shape[2] * shape[3]);
|
||||
auto xCenter = chIdx % shape[3];
|
||||
auto yCenter = chIdx / shape[3];
|
||||
|
||||
boxes[i].left = xCenter + reg[i].first - wh[i].first / 2.0f;
|
||||
boxes[i].top = yCenter + reg[i].second - wh[i].second / 2.0f;
|
||||
boxes[i].right = xCenter + reg[i].first + wh[i].first / 2.0f;
|
||||
boxes[i].bottom = yCenter + reg[i].second + wh[i].second / 2.0f;
|
||||
}
|
||||
|
||||
return boxes;
|
||||
}
|
||||
|
||||
void transform(std::vector<ModelCenterNet::BBox>& boxes,
|
||||
const ov::Shape& shape,
|
||||
int scale,
|
||||
float centerX,
|
||||
float centerY) {
|
||||
cv::Mat1f trans = getAffineTransform(centerX, centerY, scale, 0, shape[2], shape[3], true);
|
||||
|
||||
for (auto& b : boxes) {
|
||||
ModelCenterNet::BBox newbb;
|
||||
|
||||
newbb.left = trans.at<float>(0, 0) * b.left + trans.at<float>(0, 1) * b.top + trans.at<float>(0, 2);
|
||||
newbb.top = trans.at<float>(1, 0) * b.left + trans.at<float>(1, 1) * b.top + trans.at<float>(1, 2);
|
||||
newbb.right = trans.at<float>(0, 0) * b.right + trans.at<float>(0, 1) * b.bottom + trans.at<float>(0, 2);
|
||||
newbb.bottom = trans.at<float>(1, 0) * b.right + trans.at<float>(1, 1) * b.bottom + trans.at<float>(1, 2);
|
||||
|
||||
b = newbb;
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<ResultBase> ModelCenterNet::postprocess(InferenceResult& infResult) {
|
||||
// --------------------------- Filter data and get valid indices ---------------------------------
|
||||
const auto& heatmapTensor = infResult.outputsData[outputsNames[0]];
|
||||
const auto& heatmapTensorShape = heatmapTensor.get_shape();
|
||||
const auto chSize = heatmapTensorShape[2] * heatmapTensorShape[3];
|
||||
const auto scores = filterScores(heatmapTensor, confidenceThreshold);
|
||||
|
||||
const auto& regressionTensor = infResult.outputsData[outputsNames[1]];
|
||||
const auto reg = filterReg(regressionTensor, scores, chSize);
|
||||
|
||||
const auto& whTensor = infResult.outputsData[outputsNames[2]];
|
||||
const auto wh = filterWH(whTensor, scores, chSize);
|
||||
|
||||
// --------------------------- Calculate bounding boxes & apply inverse affine transform ----------
|
||||
auto boxes = calcBoxes(scores, reg, wh, heatmapTensorShape);
|
||||
|
||||
const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth;
|
||||
const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight;
|
||||
const auto scale = std::max(imgWidth, imgHeight);
|
||||
const float centerX = imgWidth / 2.0f;
|
||||
const float centerY = imgHeight / 2.0f;
|
||||
|
||||
transform(boxes, heatmapTensorShape, scale, centerX, centerY);
|
||||
|
||||
// --------------------------- Create detection result objects ------------------------------------
|
||||
DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
|
||||
|
||||
result->objects.reserve(scores.size());
|
||||
for (size_t i = 0; i < scores.size(); ++i) {
|
||||
DetectedObject desc;
|
||||
desc.confidence = scores[i].second;
|
||||
desc.labelID = scores[i].first / chSize;
|
||||
desc.label = getLabelName(desc.labelID);
|
||||
desc.x = clamp(boxes[i].left, 0.f, static_cast<float>(imgWidth));
|
||||
desc.y = clamp(boxes[i].top, 0.f, static_cast<float>(imgHeight));
|
||||
desc.width = clamp(boxes[i].getWidth(), 0.f, static_cast<float>(imgWidth));
|
||||
desc.height = clamp(boxes[i].getHeight(), 0.f, static_cast<float>(imgHeight));
|
||||
|
||||
result->objects.push_back(desc);
|
||||
}
|
||||
|
||||
return std::unique_ptr<ResultBase>(result);
|
||||
}
|
||||
261
engines/OpenVINOEngine/src/models/detection_model_faceboxes.cpp
Normal file
261
engines/OpenVINOEngine/src/models/detection_model_faceboxes.cpp
Normal file
@@ -0,0 +1,261 @@
|
||||
/*
|
||||
// Copyright (C) 2020-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "models/detection_model_faceboxes.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
#include <utils/common.hpp>
|
||||
#include <utils/nms.hpp>
|
||||
#include <utils/ocv_common.hpp>
|
||||
|
||||
#include "models/internal_model_data.h"
|
||||
#include "models/results.h"
|
||||
|
||||
ModelFaceBoxes::ModelFaceBoxes(const std::string& modelFileName,
|
||||
float confidenceThreshold,
|
||||
bool useAutoResize,
|
||||
float boxIOUThreshold,
|
||||
const std::string& layout)
|
||||
: DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout),
|
||||
maxProposalsCount(0),
|
||||
boxIOUThreshold(boxIOUThreshold),
|
||||
variance({0.1f, 0.2f}),
|
||||
steps({32, 64, 128}),
|
||||
minSizes({{32, 64, 128}, {256}, {512}}) {}
|
||||
|
||||
void ModelFaceBoxes::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
|
||||
// --------------------------- Configure input & output -------------------------------------------------
|
||||
// --------------------------- Prepare input ------------------------------------------------------
|
||||
if (model->inputs().size() != 1) {
|
||||
throw std::logic_error("FaceBoxes model wrapper expects models that have only 1 input");
|
||||
}
|
||||
|
||||
const ov::Shape& inputShape = model->input().get_shape();
|
||||
const ov::Layout& inputLayout = getInputLayout(model->input());
|
||||
|
||||
if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
|
||||
throw std::logic_error("Expected 3-channel input");
|
||||
}
|
||||
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
inputTransform.setPrecision(ppp, model->input().get_any_name());
|
||||
ppp.input().tensor().set_layout({"NHWC"});
|
||||
|
||||
if (useAutoResize) {
|
||||
ppp.input().tensor().set_spatial_dynamic_shape();
|
||||
|
||||
ppp.input()
|
||||
.preprocess()
|
||||
.convert_element_type(ov::element::f32)
|
||||
.resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
|
||||
}
|
||||
|
||||
ppp.input().model().set_layout(inputLayout);
|
||||
|
||||
// --------------------------- Reading image input parameters -------------------------------------------
|
||||
inputsNames.push_back(model->input().get_any_name());
|
||||
netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
|
||||
netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
|
||||
|
||||
// --------------------------- Prepare output -----------------------------------------------------
|
||||
if (model->outputs().size() != 2) {
|
||||
throw std::logic_error("FaceBoxes model wrapper expects models that have 2 outputs");
|
||||
}
|
||||
|
||||
const ov::Layout outputLayout{"CHW"};
|
||||
maxProposalsCount = model->outputs().front().get_shape()[ov::layout::height_idx(outputLayout)];
|
||||
for (const auto& output : model->outputs()) {
|
||||
const auto outTensorName = output.get_any_name();
|
||||
outputsNames.push_back(outTensorName);
|
||||
ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout);
|
||||
}
|
||||
std::sort(outputsNames.begin(), outputsNames.end());
|
||||
model = ppp.build();
|
||||
|
||||
// --------------------------- Calculating anchors ----------------------------------------------------
|
||||
std::vector<std::pair<size_t, size_t>> featureMaps;
|
||||
for (auto s : steps) {
|
||||
featureMaps.push_back({netInputHeight / s, netInputWidth / s});
|
||||
}
|
||||
|
||||
priorBoxes(featureMaps);
|
||||
}
|
||||
|
||||
void calculateAnchors(std::vector<Anchor>& anchors,
|
||||
const std::vector<float>& vx,
|
||||
const std::vector<float>& vy,
|
||||
const int minSize,
|
||||
const int step) {
|
||||
float skx = static_cast<float>(minSize);
|
||||
float sky = static_cast<float>(minSize);
|
||||
|
||||
std::vector<float> dense_cx, dense_cy;
|
||||
|
||||
for (auto x : vx) {
|
||||
dense_cx.push_back(x * step);
|
||||
}
|
||||
|
||||
for (auto y : vy) {
|
||||
dense_cy.push_back(y * step);
|
||||
}
|
||||
|
||||
for (auto cy : dense_cy) {
|
||||
for (auto cx : dense_cx) {
|
||||
anchors.push_back(
|
||||
{cx - 0.5f * skx, cy - 0.5f * sky, cx + 0.5f * skx, cy + 0.5f * sky}); // left top right bottom
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void calculateAnchorsZeroLevel(std::vector<Anchor>& anchors,
|
||||
const int fx,
|
||||
const int fy,
|
||||
const std::vector<int>& minSizes,
|
||||
const int step) {
|
||||
for (auto s : minSizes) {
|
||||
std::vector<float> vx, vy;
|
||||
if (s == 32) {
|
||||
vx.push_back(static_cast<float>(fx));
|
||||
vx.push_back(fx + 0.25f);
|
||||
vx.push_back(fx + 0.5f);
|
||||
vx.push_back(fx + 0.75f);
|
||||
|
||||
vy.push_back(static_cast<float>(fy));
|
||||
vy.push_back(fy + 0.25f);
|
||||
vy.push_back(fy + 0.5f);
|
||||
vy.push_back(fy + 0.75f);
|
||||
} else if (s == 64) {
|
||||
vx.push_back(static_cast<float>(fx));
|
||||
vx.push_back(fx + 0.5f);
|
||||
|
||||
vy.push_back(static_cast<float>(fy));
|
||||
vy.push_back(fy + 0.5f);
|
||||
} else {
|
||||
vx.push_back(fx + 0.5f);
|
||||
vy.push_back(fy + 0.5f);
|
||||
}
|
||||
calculateAnchors(anchors, vx, vy, s, step);
|
||||
}
|
||||
}
|
||||
|
||||
void ModelFaceBoxes::priorBoxes(const std::vector<std::pair<size_t, size_t>>& featureMaps) {
|
||||
anchors.reserve(maxProposalsCount);
|
||||
|
||||
for (size_t k = 0; k < featureMaps.size(); ++k) {
|
||||
std::vector<float> a;
|
||||
for (size_t i = 0; i < featureMaps[k].first; ++i) {
|
||||
for (size_t j = 0; j < featureMaps[k].second; ++j) {
|
||||
if (k == 0) {
|
||||
calculateAnchorsZeroLevel(anchors, j, i, minSizes[k], steps[k]);
|
||||
} else {
|
||||
calculateAnchors(anchors, {j + 0.5f}, {i + 0.5f}, minSizes[k][0], steps[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<std::vector<size_t>, std::vector<float>> filterScores(const ov::Tensor& scoresTensor,
|
||||
const float confidenceThreshold) {
|
||||
auto shape = scoresTensor.get_shape();
|
||||
const float* scoresPtr = ov::Tensor{scoresTensor}.data<const float>();
|
||||
|
||||
std::vector<size_t> indices;
|
||||
std::vector<float> scores;
|
||||
scores.reserve(ModelFaceBoxes::INIT_VECTOR_SIZE);
|
||||
indices.reserve(ModelFaceBoxes::INIT_VECTOR_SIZE);
|
||||
for (size_t i = 1; i < shape[1] * shape[2]; i = i + 2) {
|
||||
if (scoresPtr[i] > confidenceThreshold) {
|
||||
indices.push_back(i / 2);
|
||||
scores.push_back(scoresPtr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return {indices, scores};
|
||||
}
|
||||
|
||||
std::vector<Anchor> filterBoxes(const ov::Tensor& boxesTensor,
|
||||
const std::vector<Anchor>& anchors,
|
||||
const std::vector<size_t>& validIndices,
|
||||
const std::vector<float>& variance) {
|
||||
auto shape = boxesTensor.get_shape();
|
||||
const float* boxesPtr = ov::Tensor{boxesTensor}.data<const float>();
|
||||
|
||||
std::vector<Anchor> boxes;
|
||||
boxes.reserve(ModelFaceBoxes::INIT_VECTOR_SIZE);
|
||||
for (auto i : validIndices) {
|
||||
auto objStart = shape[2] * i;
|
||||
|
||||
auto dx = boxesPtr[objStart];
|
||||
auto dy = boxesPtr[objStart + 1];
|
||||
auto dw = boxesPtr[objStart + 2];
|
||||
auto dh = boxesPtr[objStart + 3];
|
||||
|
||||
auto predCtrX = dx * variance[0] * anchors[i].getWidth() + anchors[i].getXCenter();
|
||||
auto predCtrY = dy * variance[0] * anchors[i].getHeight() + anchors[i].getYCenter();
|
||||
auto predW = exp(dw * variance[1]) * anchors[i].getWidth();
|
||||
auto predH = exp(dh * variance[1]) * anchors[i].getHeight();
|
||||
|
||||
boxes.push_back({static_cast<float>(predCtrX - 0.5f * predW),
|
||||
static_cast<float>(predCtrY - 0.5f * predH),
|
||||
static_cast<float>(predCtrX + 0.5f * predW),
|
||||
static_cast<float>(predCtrY + 0.5f * predH)});
|
||||
}
|
||||
|
||||
return boxes;
|
||||
}
|
||||
|
||||
std::unique_ptr<ResultBase> ModelFaceBoxes::postprocess(InferenceResult& infResult) {
|
||||
// Filter scores and get valid indices for bounding boxes
|
||||
const auto scoresTensor = infResult.outputsData[outputsNames[1]];
|
||||
const auto scores = filterScores(scoresTensor, confidenceThreshold);
|
||||
|
||||
// Filter bounding boxes on indices
|
||||
auto boxesTensor = infResult.outputsData[outputsNames[0]];
|
||||
std::vector<Anchor> boxes = filterBoxes(boxesTensor, anchors, scores.first, variance);
|
||||
|
||||
// Apply Non-maximum Suppression
|
||||
const std::vector<int> keep = nms(boxes, scores.second, boxIOUThreshold);
|
||||
|
||||
// Create detection result objects
|
||||
DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
|
||||
const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth;
|
||||
const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight;
|
||||
const float scaleX = static_cast<float>(netInputWidth) / imgWidth;
|
||||
const float scaleY = static_cast<float>(netInputHeight) / imgHeight;
|
||||
|
||||
result->objects.reserve(keep.size());
|
||||
for (auto i : keep) {
|
||||
DetectedObject desc;
|
||||
desc.confidence = scores.second[i];
|
||||
desc.x = clamp(boxes[i].left / scaleX, 0.f, static_cast<float>(imgWidth));
|
||||
desc.y = clamp(boxes[i].top / scaleY, 0.f, static_cast<float>(imgHeight));
|
||||
desc.width = clamp(boxes[i].getWidth() / scaleX, 0.f, static_cast<float>(imgWidth));
|
||||
desc.height = clamp(boxes[i].getHeight() / scaleY, 0.f, static_cast<float>(imgHeight));
|
||||
desc.labelID = 0;
|
||||
desc.label = labels[0];
|
||||
|
||||
result->objects.push_back(desc);
|
||||
}
|
||||
|
||||
return std::unique_ptr<ResultBase>(result);
|
||||
}
|
||||
394
engines/OpenVINOEngine/src/models/detection_model_retinaface.cpp
Normal file
394
engines/OpenVINOEngine/src/models/detection_model_retinaface.cpp
Normal file
@@ -0,0 +1,394 @@
|
||||
/*
|
||||
// Copyright (C) 2020-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "models/detection_model_retinaface.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
#include <utils/common.hpp>
|
||||
#include <utils/nms.hpp>
|
||||
|
||||
#include "models/internal_model_data.h"
|
||||
#include "models/results.h"
|
||||
|
||||
ModelRetinaFace::ModelRetinaFace(const std::string& modelFileName,
|
||||
float confidenceThreshold,
|
||||
bool useAutoResize,
|
||||
float boxIOUThreshold,
|
||||
const std::string& layout)
|
||||
: DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout), // Default label is "Face"
|
||||
shouldDetectMasks(false),
|
||||
shouldDetectLandmarks(false),
|
||||
boxIOUThreshold(boxIOUThreshold),
|
||||
maskThreshold(0.8f),
|
||||
landmarkStd(1.0f),
|
||||
anchorCfg({{32, {32, 16}, 16, {1}}, {16, {8, 4}, 16, {1}}, {8, {2, 1}, 16, {1}}}) {
|
||||
generateAnchorsFpn();
|
||||
}
|
||||
|
||||
void ModelRetinaFace::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
|
||||
// --------------------------- Configure input & output -------------------------------------------------
|
||||
// --------------------------- Prepare input ------------------------------------------------------
|
||||
if (model->inputs().size() != 1) {
|
||||
throw std::logic_error("RetinaFace model wrapper expects models that have only 1 input");
|
||||
}
|
||||
const ov::Shape& inputShape = model->input().get_shape();
|
||||
const ov::Layout& inputLayout = getInputLayout(model->input());
|
||||
|
||||
if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
|
||||
throw std::logic_error("Expected 3-channel input");
|
||||
}
|
||||
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
|
||||
|
||||
if (useAutoResize) {
|
||||
ppp.input().tensor().set_spatial_dynamic_shape();
|
||||
|
||||
ppp.input()
|
||||
.preprocess()
|
||||
.convert_element_type(ov::element::f32)
|
||||
.resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
|
||||
}
|
||||
|
||||
ppp.input().model().set_layout(inputLayout);
|
||||
|
||||
// --------------------------- Reading image input parameters -------------------------------------------
|
||||
inputsNames.push_back(model->input().get_any_name());
|
||||
netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
|
||||
netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
|
||||
|
||||
// --------------------------- Prepare output -----------------------------------------------------
|
||||
|
||||
const ov::OutputVector& outputs = model->outputs();
|
||||
if (outputs.size() != 6 && outputs.size() != 9 && outputs.size() != 12) {
|
||||
throw std::logic_error("RetinaFace model wrapper expects models that have 6, 9 or 12 outputs");
|
||||
}
|
||||
|
||||
const ov::Layout outputLayout{"NCHW"};
|
||||
std::vector<size_t> outputsSizes[OUT_MAX];
|
||||
for (const auto& output : model->outputs()) {
|
||||
auto outTensorName = output.get_any_name();
|
||||
outputsNames.push_back(outTensorName);
|
||||
ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout);
|
||||
|
||||
OutputType type = OUT_MAX;
|
||||
if (outTensorName.find("box") != std::string::npos) {
|
||||
type = OUT_BOXES;
|
||||
} else if (outTensorName.find("cls") != std::string::npos) {
|
||||
type = OUT_SCORES;
|
||||
} else if (outTensorName.find("landmark") != std::string::npos) {
|
||||
type = OUT_LANDMARKS;
|
||||
shouldDetectLandmarks = true;
|
||||
} else if (outTensorName.find("type") != std::string::npos) {
|
||||
type = OUT_MASKSCORES;
|
||||
labels.clear();
|
||||
labels.push_back("No Mask");
|
||||
labels.push_back("Mask");
|
||||
shouldDetectMasks = true;
|
||||
landmarkStd = 0.2f;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t num = output.get_shape()[ov::layout::height_idx(outputLayout)];
|
||||
size_t i = 0;
|
||||
for (; i < outputsSizes[type].size(); ++i) {
|
||||
if (num < outputsSizes[type][i]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
separateOutputsNames[type].insert(separateOutputsNames[type].begin() + i, outTensorName);
|
||||
outputsSizes[type].insert(outputsSizes[type].begin() + i, num);
|
||||
}
|
||||
model = ppp.build();
|
||||
|
||||
for (size_t idx = 0; idx < outputsSizes[OUT_BOXES].size(); ++idx) {
|
||||
size_t width = outputsSizes[OUT_BOXES][idx];
|
||||
size_t height = outputsSizes[OUT_BOXES][idx];
|
||||
auto s = anchorCfg[idx].stride;
|
||||
auto anchorNum = anchorsFpn[s].size();
|
||||
|
||||
anchors.push_back(std::vector<Anchor>(height * width * anchorNum));
|
||||
for (size_t iw = 0; iw < width; ++iw) {
|
||||
size_t sw = iw * s;
|
||||
for (size_t ih = 0; ih < height; ++ih) {
|
||||
size_t sh = ih * s;
|
||||
for (size_t k = 0; k < anchorNum; ++k) {
|
||||
Anchor& anc = anchors[idx][(ih * width + iw) * anchorNum + k];
|
||||
anc.left = anchorsFpn[s][k].left + sw;
|
||||
anc.top = anchorsFpn[s][k].top + sh;
|
||||
anc.right = anchorsFpn[s][k].right + sw;
|
||||
anc.bottom = anchorsFpn[s][k].bottom + sh;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Anchor> ratioEnum(const Anchor& anchor, const std::vector<int>& ratios) {
|
||||
std::vector<Anchor> retVal;
|
||||
const auto w = anchor.getWidth();
|
||||
const auto h = anchor.getHeight();
|
||||
const auto xCtr = anchor.getXCenter();
|
||||
const auto yCtr = anchor.getYCenter();
|
||||
|
||||
for (const auto ratio : ratios) {
|
||||
const auto size = w * h;
|
||||
const auto sizeRatio = static_cast<float>(size) / ratio;
|
||||
const auto ws = sqrt(sizeRatio);
|
||||
const auto hs = ws * ratio;
|
||||
retVal.push_back({static_cast<float>(xCtr - 0.5f * (ws - 1.0f)),
|
||||
static_cast<float>(yCtr - 0.5f * (hs - 1.0f)),
|
||||
static_cast<float>(xCtr + 0.5f * (ws - 1.0f)),
|
||||
static_cast<float>(yCtr + 0.5f * (hs - 1.0f))});
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
std::vector<Anchor> scaleEnum(const Anchor& anchor, const std::vector<int>& scales) {
|
||||
std::vector<Anchor> retVal;
|
||||
const auto w = anchor.getWidth();
|
||||
const auto h = anchor.getHeight();
|
||||
const auto xCtr = anchor.getXCenter();
|
||||
const auto yCtr = anchor.getYCenter();
|
||||
|
||||
for (auto scale : scales) {
|
||||
const auto ws = w * scale;
|
||||
const auto hs = h * scale;
|
||||
retVal.push_back({static_cast<float>(xCtr - 0.5f * (ws - 1.0f)),
|
||||
static_cast<float>(yCtr - 0.5f * (hs - 1.0f)),
|
||||
static_cast<float>(xCtr + 0.5f * (ws - 1.0f)),
|
||||
static_cast<float>(yCtr + 0.5f * (hs - 1.0f))});
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
std::vector<Anchor> generateAnchors(const int baseSize,
|
||||
const std::vector<int>& ratios,
|
||||
const std::vector<int>& scales) {
|
||||
Anchor baseAnchor{0.0f, 0.0f, baseSize - 1.0f, baseSize - 1.0f};
|
||||
auto ratioAnchors = ratioEnum(baseAnchor, ratios);
|
||||
std::vector<Anchor> retVal;
|
||||
|
||||
for (const auto& ra : ratioAnchors) {
|
||||
auto addon = scaleEnum(ra, scales);
|
||||
retVal.insert(retVal.end(), addon.begin(), addon.end());
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void ModelRetinaFace::generateAnchorsFpn() {
|
||||
auto cfg = anchorCfg;
|
||||
std::sort(cfg.begin(), cfg.end(), [](const AnchorCfgLine& x, const AnchorCfgLine& y) {
|
||||
return x.stride > y.stride;
|
||||
});
|
||||
|
||||
for (const auto& cfgLine : cfg) {
|
||||
anchorsFpn.emplace(cfgLine.stride, generateAnchors(cfgLine.baseSize, cfgLine.ratios, cfgLine.scales));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<size_t> thresholding(const ov::Tensor& scoresTensor, const int anchorNum, const float confidenceThreshold) {
|
||||
std::vector<size_t> indices;
|
||||
indices.reserve(ModelRetinaFace::INIT_VECTOR_SIZE);
|
||||
auto shape = scoresTensor.get_shape();
|
||||
size_t restAnchors = shape[1] - anchorNum;
|
||||
const float* scoresPtr = scoresTensor.data<float>();
|
||||
|
||||
for (size_t x = anchorNum; x < shape[1]; ++x) {
|
||||
for (size_t y = 0; y < shape[2]; ++y) {
|
||||
for (size_t z = 0; z < shape[3]; ++z) {
|
||||
auto idx = (x * shape[2] + y) * shape[3] + z;
|
||||
auto score = scoresPtr[idx];
|
||||
if (score >= confidenceThreshold) {
|
||||
indices.push_back((y * shape[3] + z) * restAnchors + (x - anchorNum));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return indices;
|
||||
}
|
||||
|
||||
void filterScores(std::vector<float>& scores,
|
||||
const std::vector<size_t>& indices,
|
||||
const ov::Tensor& scoresTensor,
|
||||
const int anchorNum) {
|
||||
const auto& shape = scoresTensor.get_shape();
|
||||
const float* scoresPtr = scoresTensor.data<float>();
|
||||
const auto start = shape[2] * shape[3] * anchorNum;
|
||||
|
||||
for (auto i : indices) {
|
||||
auto offset = (i % anchorNum) * shape[2] * shape[3] + i / anchorNum;
|
||||
scores.push_back(scoresPtr[start + offset]);
|
||||
}
|
||||
}
|
||||
|
||||
void filterBoxes(std::vector<Anchor>& boxes,
|
||||
const std::vector<size_t>& indices,
|
||||
const ov::Tensor& boxesTensor,
|
||||
int anchorNum,
|
||||
const std::vector<Anchor>& anchors) {
|
||||
const auto& shape = boxesTensor.get_shape();
|
||||
const float* boxesPtr = boxesTensor.data<float>();
|
||||
const auto boxPredLen = shape[1] / anchorNum;
|
||||
const auto blockWidth = shape[2] * shape[3];
|
||||
|
||||
for (auto i : indices) {
|
||||
auto offset = blockWidth * boxPredLen * (i % anchorNum) + (i / anchorNum);
|
||||
|
||||
const auto dx = boxesPtr[offset];
|
||||
const auto dy = boxesPtr[offset + blockWidth];
|
||||
const auto dw = boxesPtr[offset + blockWidth * 2];
|
||||
const auto dh = boxesPtr[offset + blockWidth * 3];
|
||||
|
||||
const auto predCtrX = dx * anchors[i].getWidth() + anchors[i].getXCenter();
|
||||
const auto predCtrY = dy * anchors[i].getHeight() + anchors[i].getYCenter();
|
||||
const auto predW = exp(dw) * anchors[i].getWidth();
|
||||
const auto predH = exp(dh) * anchors[i].getHeight();
|
||||
|
||||
boxes.push_back({static_cast<float>(predCtrX - 0.5f * (predW - 1.0f)),
|
||||
static_cast<float>(predCtrY - 0.5f * (predH - 1.0f)),
|
||||
static_cast<float>(predCtrX + 0.5f * (predW - 1.0f)),
|
||||
static_cast<float>(predCtrY + 0.5f * (predH - 1.0f))});
|
||||
}
|
||||
}
|
||||
|
||||
void filterLandmarks(std::vector<cv::Point2f>& landmarks,
|
||||
const std::vector<size_t>& indices,
|
||||
const ov::Tensor& landmarksTensor,
|
||||
int anchorNum,
|
||||
const std::vector<Anchor>& anchors,
|
||||
const float landmarkStd) {
|
||||
const auto& shape = landmarksTensor.get_shape();
|
||||
const float* landmarksPtr = landmarksTensor.data<float>();
|
||||
const auto landmarkPredLen = shape[1] / anchorNum;
|
||||
const auto blockWidth = shape[2] * shape[3];
|
||||
|
||||
for (auto i : indices) {
|
||||
for (int j = 0; j < ModelRetinaFace::LANDMARKS_NUM; ++j) {
|
||||
auto offset = (i % anchorNum) * landmarkPredLen * shape[2] * shape[3] + i / anchorNum;
|
||||
auto deltaX = landmarksPtr[offset + j * 2 * blockWidth] * landmarkStd;
|
||||
auto deltaY = landmarksPtr[offset + (j * 2 + 1) * blockWidth] * landmarkStd;
|
||||
landmarks.push_back({deltaX * anchors[i].getWidth() + anchors[i].getXCenter(),
|
||||
deltaY * anchors[i].getHeight() + anchors[i].getYCenter()});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void filterMasksScores(std::vector<float>& masks,
|
||||
const std::vector<size_t>& indices,
|
||||
const ov::Tensor& maskScoresTensor,
|
||||
const int anchorNum) {
|
||||
auto shape = maskScoresTensor.get_shape();
|
||||
const float* maskScoresPtr = maskScoresTensor.data<float>();
|
||||
auto start = shape[2] * shape[3] * anchorNum * 2;
|
||||
|
||||
for (auto i : indices) {
|
||||
auto offset = (i % anchorNum) * shape[2] * shape[3] + i / anchorNum;
|
||||
masks.push_back(maskScoresPtr[start + offset]);
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<ResultBase> ModelRetinaFace::postprocess(InferenceResult& infResult) {
|
||||
std::vector<float> scores;
|
||||
scores.reserve(INIT_VECTOR_SIZE);
|
||||
std::vector<Anchor> boxes;
|
||||
boxes.reserve(INIT_VECTOR_SIZE);
|
||||
std::vector<cv::Point2f> landmarks;
|
||||
std::vector<float> masks;
|
||||
|
||||
if (shouldDetectLandmarks) {
|
||||
landmarks.reserve(INIT_VECTOR_SIZE);
|
||||
}
|
||||
if (shouldDetectMasks) {
|
||||
masks.reserve(INIT_VECTOR_SIZE);
|
||||
}
|
||||
|
||||
// --------------------------- Gather & Filter output from all levels
|
||||
// ----------------------------------------------------------
|
||||
for (size_t idx = 0; idx < anchorCfg.size(); ++idx) {
|
||||
const auto boxRaw = infResult.outputsData[separateOutputsNames[OUT_BOXES][idx]];
|
||||
const auto scoresRaw = infResult.outputsData[separateOutputsNames[OUT_SCORES][idx]];
|
||||
auto s = anchorCfg[idx].stride;
|
||||
auto anchorNum = anchorsFpn[s].size();
|
||||
|
||||
auto validIndices = thresholding(scoresRaw, anchorNum, confidenceThreshold);
|
||||
filterScores(scores, validIndices, scoresRaw, anchorNum);
|
||||
filterBoxes(boxes, validIndices, boxRaw, anchorNum, anchors[idx]);
|
||||
if (shouldDetectLandmarks) {
|
||||
const auto landmarksRaw = infResult.outputsData[separateOutputsNames[OUT_LANDMARKS][idx]];
|
||||
filterLandmarks(landmarks, validIndices, landmarksRaw, anchorNum, anchors[idx], landmarkStd);
|
||||
}
|
||||
if (shouldDetectMasks) {
|
||||
const auto masksRaw = infResult.outputsData[separateOutputsNames[OUT_MASKSCORES][idx]];
|
||||
filterMasksScores(masks, validIndices, masksRaw, anchorNum);
|
||||
}
|
||||
}
|
||||
// --------------------------- Apply Non-maximum Suppression
|
||||
// ---------------------------------------------------------- !shouldDetectLandmarks determines nms behavior, if
|
||||
// true - boundaries are included in areas calculation
|
||||
const auto keep = nms(boxes, scores, boxIOUThreshold, !shouldDetectLandmarks);
|
||||
|
||||
// --------------------------- Create detection result objects
|
||||
// --------------------------------------------------------
|
||||
RetinaFaceDetectionResult* result = new RetinaFaceDetectionResult(infResult.frameId, infResult.metaData);
|
||||
|
||||
const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth;
|
||||
const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight;
|
||||
const auto scaleX = static_cast<float>(netInputWidth) / imgWidth;
|
||||
const auto scaleY = static_cast<float>(netInputHeight) / imgHeight;
|
||||
|
||||
result->objects.reserve(keep.size());
|
||||
result->landmarks.reserve(keep.size() * ModelRetinaFace::LANDMARKS_NUM);
|
||||
for (auto i : keep) {
|
||||
DetectedObject desc;
|
||||
desc.confidence = scores[i];
|
||||
//--- Scaling coordinates
|
||||
boxes[i].left /= scaleX;
|
||||
boxes[i].top /= scaleY;
|
||||
boxes[i].right /= scaleX;
|
||||
boxes[i].bottom /= scaleY;
|
||||
|
||||
desc.x = clamp(boxes[i].left, 0.f, static_cast<float>(imgWidth));
|
||||
desc.y = clamp(boxes[i].top, 0.f, static_cast<float>(imgHeight));
|
||||
desc.width = clamp(boxes[i].getWidth(), 0.f, static_cast<float>(imgWidth));
|
||||
desc.height = clamp(boxes[i].getHeight(), 0.f, static_cast<float>(imgHeight));
|
||||
//--- Default label 0 - Face. If detecting masks then labels would be 0 - No Mask, 1 - Mask
|
||||
desc.labelID = shouldDetectMasks ? (masks[i] > maskThreshold) : 0;
|
||||
desc.label = labels[desc.labelID];
|
||||
result->objects.push_back(desc);
|
||||
|
||||
//--- Scaling landmarks coordinates
|
||||
for (size_t l = 0; l < ModelRetinaFace::LANDMARKS_NUM && shouldDetectLandmarks; ++l) {
|
||||
landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].x =
|
||||
clamp(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].x / scaleX, 0.f, static_cast<float>(imgWidth));
|
||||
landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].y =
|
||||
clamp(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].y / scaleY, 0.f, static_cast<float>(imgHeight));
|
||||
result->landmarks.push_back(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l]);
|
||||
}
|
||||
}
|
||||
|
||||
return std::unique_ptr<ResultBase>(result);
|
||||
}
|
||||
@@ -0,0 +1,277 @@
|
||||
/*
|
||||
// Copyright (C) 2021-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "models/detection_model_retinaface_pt.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
#include <utils/common.hpp>
|
||||
#include <utils/nms.hpp>
|
||||
#include <utils/ocv_common.hpp>
|
||||
|
||||
#include "models/internal_model_data.h"
|
||||
#include "models/results.h"
|
||||
|
||||
ModelRetinaFacePT::ModelRetinaFacePT(const std::string& modelFileName,
|
||||
float confidenceThreshold,
|
||||
bool useAutoResize,
|
||||
float boxIOUThreshold,
|
||||
const std::string& layout)
|
||||
: DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout), // Default label is "Face"
|
||||
landmarksNum(0),
|
||||
boxIOUThreshold(boxIOUThreshold) {}
|
||||
|
||||
void ModelRetinaFacePT::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
|
||||
// --------------------------- Configure input & output -------------------------------------------------
|
||||
// --------------------------- Prepare input ------------------------------------------------------
|
||||
if (model->inputs().size() != 1) {
|
||||
throw std::logic_error("RetinaFacePT model wrapper expects models that have only 1 input");
|
||||
}
|
||||
|
||||
const ov::Shape& inputShape = model->input().get_partial_shape().get_max_shape();
|
||||
const ov::Layout& inputLayout = getInputLayout(model->input());
|
||||
|
||||
if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
|
||||
throw std::logic_error("Expected 3-channel input");
|
||||
}
|
||||
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
inputTransform.setPrecision(ppp, model->input().get_any_name());
|
||||
ppp.input().tensor().set_layout({"NHWC"});
|
||||
|
||||
if (useAutoResize) {
|
||||
ppp.input().tensor().set_spatial_dynamic_shape();
|
||||
|
||||
ppp.input()
|
||||
.preprocess()
|
||||
.convert_element_type(ov::element::f32)
|
||||
.resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
|
||||
}
|
||||
|
||||
ppp.input().model().set_layout(inputLayout);
|
||||
|
||||
// --------------------------- Reading image input parameters -------------------------------------------
|
||||
inputsNames.push_back(model->input().get_any_name());
|
||||
netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
|
||||
netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
|
||||
|
||||
// --------------------------- Prepare output -----------------------------------------------------
|
||||
if (model->outputs().size() != 3) {
|
||||
throw std::logic_error("RetinaFace model wrapper expects models that have 3 outputs");
|
||||
}
|
||||
|
||||
landmarksNum = 0;
|
||||
|
||||
outputsNames.resize(2);
|
||||
std::vector<uint32_t> outputsSizes[OUT_MAX];
|
||||
const ov::Layout chw("CHW");
|
||||
const ov::Layout nchw("NCHW");
|
||||
for (auto& output : model->outputs()) {
|
||||
auto outTensorName = output.get_any_name();
|
||||
outputsNames.push_back(outTensorName);
|
||||
ppp.output(outTensorName)
|
||||
.tensor()
|
||||
.set_element_type(ov::element::f32)
|
||||
.set_layout(output.get_partial_shape().size() == 4 ? nchw : chw);
|
||||
|
||||
if (outTensorName.find("bbox") != std::string::npos) {
|
||||
outputsNames[OUT_BOXES] = outTensorName;
|
||||
} else if (outTensorName.find("cls") != std::string::npos) {
|
||||
outputsNames[OUT_SCORES] = outTensorName;
|
||||
} else if (outTensorName.find("landmark") != std::string::npos) {
|
||||
// Landmarks might be optional, if it is present, resize names array to fit landmarks output name to the
|
||||
// last item of array Considering that other outputs names are already filled in or will be filled later
|
||||
outputsNames.resize(std::max(outputsNames.size(), (size_t)OUT_LANDMARKS + 1));
|
||||
outputsNames[OUT_LANDMARKS] = outTensorName;
|
||||
landmarksNum =
|
||||
output.get_partial_shape()[ov::layout::width_idx(chw)].get_length() / 2; // Each landmark consist of 2 variables (x and y)
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (outputsNames[OUT_BOXES] == "" || outputsNames[OUT_SCORES] == "") {
|
||||
throw std::logic_error("Bbox or cls layers are not found");
|
||||
}
|
||||
|
||||
model = ppp.build();
|
||||
priors = generatePriorData();
|
||||
}
|
||||
|
||||
std::vector<size_t> ModelRetinaFacePT::filterByScore(const ov::Tensor& scoresTensor, const float confidenceThreshold) {
|
||||
std::vector<size_t> indicies;
|
||||
const auto& shape = scoresTensor.get_shape();
|
||||
const float* scoresPtr = scoresTensor.data<float>();
|
||||
|
||||
for (size_t x = 0; x < shape[1]; ++x) {
|
||||
const auto idx = (x * shape[2] + 1);
|
||||
const auto score = scoresPtr[idx];
|
||||
if (score >= confidenceThreshold) {
|
||||
indicies.push_back(x);
|
||||
}
|
||||
}
|
||||
|
||||
return indicies;
|
||||
}
|
||||
|
||||
std::vector<float> ModelRetinaFacePT::getFilteredScores(const ov::Tensor& scoresTensor,
|
||||
const std::vector<size_t>& indicies) {
|
||||
const auto& shape = scoresTensor.get_shape();
|
||||
const float* scoresPtr = scoresTensor.data<float>();
|
||||
|
||||
std::vector<float> scores;
|
||||
scores.reserve(indicies.size());
|
||||
|
||||
for (auto i : indicies) {
|
||||
scores.push_back(scoresPtr[i * shape[2] + 1]);
|
||||
}
|
||||
return scores;
|
||||
}
|
||||
|
||||
std::vector<cv::Point2f> ModelRetinaFacePT::getFilteredLandmarks(const ov::Tensor& landmarksTensor,
|
||||
const std::vector<size_t>& indicies,
|
||||
int imgWidth,
|
||||
int imgHeight) {
|
||||
const auto& shape = landmarksTensor.get_shape();
|
||||
const float* landmarksPtr = landmarksTensor.data<float>();
|
||||
|
||||
std::vector<cv::Point2f> landmarks(landmarksNum * indicies.size());
|
||||
|
||||
for (size_t i = 0; i < indicies.size(); i++) {
|
||||
const size_t idx = indicies[i];
|
||||
const auto& prior = priors[idx];
|
||||
for (size_t j = 0; j < landmarksNum; j++) {
|
||||
landmarks[i * landmarksNum + j].x =
|
||||
clamp(prior.cX + landmarksPtr[idx * shape[2] + j * 2] * variance[0] * prior.width, 0.f, 1.f) * imgWidth;
|
||||
landmarks[i * landmarksNum + j].y =
|
||||
clamp(prior.cY + landmarksPtr[idx * shape[2] + j * 2 + 1] * variance[0] * prior.height, 0.f, 1.f) *
|
||||
imgHeight;
|
||||
}
|
||||
}
|
||||
return landmarks;
|
||||
}
|
||||
|
||||
std::vector<ModelRetinaFacePT::Box> ModelRetinaFacePT::generatePriorData() {
|
||||
const float globalMinSizes[][2] = {{16, 32}, {64, 128}, {256, 512}};
|
||||
const float steps[] = {8., 16., 32.};
|
||||
std::vector<ModelRetinaFacePT::Box> anchors;
|
||||
for (size_t stepNum = 0; stepNum < arraySize(steps); stepNum++) {
|
||||
const int featureW = static_cast<int>(std::round(netInputWidth / steps[stepNum]));
|
||||
const int featureH = static_cast<int>(std::round(netInputHeight / steps[stepNum]));
|
||||
|
||||
const auto& minSizes = globalMinSizes[stepNum];
|
||||
for (int i = 0; i < featureH; i++) {
|
||||
for (int j = 0; j < featureW; j++) {
|
||||
for (auto minSize : minSizes) {
|
||||
const float sKX = minSize / netInputWidth;
|
||||
const float sKY = minSize / netInputHeight;
|
||||
const float denseCY = (i + 0.5f) * steps[stepNum] / netInputHeight;
|
||||
const float denseCX = (j + 0.5f) * steps[stepNum] / netInputWidth;
|
||||
anchors.push_back(ModelRetinaFacePT::Box{denseCX, denseCY, sKX, sKY});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return anchors;
|
||||
}
|
||||
|
||||
std::vector<Anchor> ModelRetinaFacePT::getFilteredProposals(const ov::Tensor& boxesTensor,
|
||||
const std::vector<size_t>& indicies,
|
||||
int imgWidth,
|
||||
int imgHeight) {
|
||||
std::vector<Anchor> rects;
|
||||
rects.reserve(indicies.size());
|
||||
|
||||
const auto& shape = boxesTensor.get_shape();
|
||||
const float* boxesPtr = boxesTensor.data<float>();
|
||||
|
||||
if (shape[1] != priors.size()) {
|
||||
throw std::logic_error("rawBoxes size is not equal to priors size");
|
||||
}
|
||||
|
||||
for (auto i : indicies) {
|
||||
const auto pRawBox = reinterpret_cast<const Box*>(boxesPtr + i * shape[2]);
|
||||
const auto& prior = priors[i];
|
||||
const float cX = priors[i].cX + pRawBox->cX * variance[0] * prior.width;
|
||||
const float cY = priors[i].cY + pRawBox->cY * variance[0] * prior.height;
|
||||
const float width = prior.width * exp(pRawBox->width * variance[1]);
|
||||
const float height = prior.height * exp(pRawBox->height * variance[1]);
|
||||
rects.push_back(Anchor{clamp(cX - width / 2, 0.f, 1.f) * imgWidth,
|
||||
clamp(cY - height / 2, 0.f, 1.f) * imgHeight,
|
||||
clamp(cX + width / 2, 0.f, 1.f) * imgWidth,
|
||||
clamp(cY + height / 2, 0.f, 1.f) * imgHeight});
|
||||
}
|
||||
|
||||
return rects;
|
||||
}
|
||||
|
||||
std::unique_ptr<ResultBase> ModelRetinaFacePT::postprocess(InferenceResult& infResult) {
|
||||
// (raw_output, scale_x, scale_y, face_prob_threshold, image_size):
|
||||
const auto boxesTensor = infResult.outputsData[outputsNames[OUT_BOXES]];
|
||||
const auto scoresTensor = infResult.outputsData[outputsNames[OUT_SCORES]];
|
||||
|
||||
const auto& validIndicies = filterByScore(scoresTensor, confidenceThreshold);
|
||||
const auto& scores = getFilteredScores(scoresTensor, validIndicies);
|
||||
|
||||
const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
|
||||
const auto& landmarks = landmarksNum ? getFilteredLandmarks(infResult.outputsData[outputsNames[OUT_LANDMARKS]],
|
||||
validIndicies,
|
||||
internalData.inputImgWidth,
|
||||
internalData.inputImgHeight)
|
||||
: std::vector<cv::Point2f>();
|
||||
|
||||
const auto& proposals =
|
||||
getFilteredProposals(boxesTensor, validIndicies, internalData.inputImgWidth, internalData.inputImgHeight);
|
||||
|
||||
const auto& keptIndicies = nms(proposals, scores, boxIOUThreshold, !landmarksNum);
|
||||
|
||||
// --------------------------- Create detection result objects
|
||||
// --------------------------------------------------------
|
||||
RetinaFaceDetectionResult* result = new RetinaFaceDetectionResult(infResult.frameId, infResult.metaData);
|
||||
|
||||
result->objects.reserve(keptIndicies.size());
|
||||
result->landmarks.reserve(keptIndicies.size() * landmarksNum);
|
||||
for (auto i : keptIndicies) {
|
||||
DetectedObject desc;
|
||||
desc.confidence = scores[i];
|
||||
|
||||
//--- Scaling coordinates
|
||||
desc.x = proposals[i].left;
|
||||
desc.y = proposals[i].top;
|
||||
desc.width = proposals[i].getWidth();
|
||||
desc.height = proposals[i].getHeight();
|
||||
|
||||
desc.labelID = 0;
|
||||
desc.label = labels[desc.labelID];
|
||||
result->objects.push_back(desc);
|
||||
|
||||
//--- Filtering landmarks coordinates
|
||||
for (uint32_t l = 0; l < landmarksNum; ++l) {
|
||||
result->landmarks.emplace_back(landmarks[i * landmarksNum + l].x, landmarks[i * landmarksNum + l].y);
|
||||
}
|
||||
}
|
||||
|
||||
return std::unique_ptr<ResultBase>(result);
|
||||
}
|
||||
286
engines/OpenVINOEngine/src/models/detection_model_ssd.cpp
Normal file
286
engines/OpenVINOEngine/src/models/detection_model_ssd.cpp
Normal file
@@ -0,0 +1,286 @@
|
||||
/*
|
||||
// Copyright (C) 2020-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "models/detection_model_ssd.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
#include <utils/common.hpp>
|
||||
#include <utils/ocv_common.hpp>
|
||||
|
||||
#include "models/internal_model_data.h"
|
||||
#include "models/results.h"
|
||||
|
||||
struct InputData;
|
||||
|
||||
ModelSSD::ModelSSD()
|
||||
{
|
||||
// Constructor
|
||||
|
||||
}
|
||||
ModelSSD::ModelSSD(const std::string& modelFileName,
|
||||
float confidenceThreshold,
|
||||
bool useAutoResize,
|
||||
const std::vector<std::string>& labels,
|
||||
const std::string& layout)
|
||||
: DetectionModel(modelFileName, confidenceThreshold, useAutoResize, labels, layout) {}
|
||||
|
||||
std::shared_ptr<InternalModelData> ModelSSD::preprocess(const InputData& inputData, ov::InferRequest& request) {
|
||||
if (inputsNames.size() > 1) {
|
||||
auto imageInfoTensor = request.get_tensor(inputsNames[1]);
|
||||
auto info = imageInfoTensor.data<float>();
|
||||
info[0] = static_cast<float>(netInputHeight);
|
||||
info[1] = static_cast<float>(netInputWidth);
|
||||
info[2] = 1;
|
||||
request.set_tensor(inputsNames[1], imageInfoTensor);
|
||||
}
|
||||
|
||||
return DetectionModel::preprocess(inputData, request);
|
||||
}
|
||||
|
||||
std::unique_ptr<ResultBase> ModelSSD::postprocess(InferenceResult& infResult) {
|
||||
return outputsNames.size() > 1 ? postprocessMultipleOutputs(infResult) : postprocessSingleOutput(infResult);
|
||||
}
|
||||
|
||||
std::unique_ptr<ResultBase> ModelSSD::postprocessSingleOutput(InferenceResult& infResult) {
|
||||
const ov::Tensor& detectionsTensor = infResult.getFirstOutputTensor();
|
||||
size_t detectionsNum = detectionsTensor.get_shape()[detectionsNumId];
|
||||
const float* detections = ov::Tensor{detectionsTensor}.data<const float>();
|
||||
|
||||
DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
|
||||
auto retVal = std::unique_ptr<ResultBase>(result);
|
||||
|
||||
const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
|
||||
|
||||
for (size_t i = 0; i < detectionsNum; i++) {
|
||||
float image_id = detections[i * objectSize + 0];
|
||||
if (image_id < 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
float confidence = detections[i * objectSize + 2];
|
||||
|
||||
/** Filtering out objects with confidence < confidence_threshold probability **/
|
||||
if (confidence > confidenceThreshold) {
|
||||
DetectedObject desc;
|
||||
|
||||
desc.confidence = confidence;
|
||||
desc.labelID = static_cast<int>(detections[i * objectSize + 1]);
|
||||
desc.label = getLabelName(desc.labelID);
|
||||
|
||||
desc.x = clamp(detections[i * objectSize + 3] * internalData.inputImgWidth,
|
||||
0.f,
|
||||
static_cast<float>(internalData.inputImgWidth));
|
||||
desc.y = clamp(detections[i * objectSize + 4] * internalData.inputImgHeight,
|
||||
0.f,
|
||||
static_cast<float>(internalData.inputImgHeight));
|
||||
desc.width = clamp(detections[i * objectSize + 5] * internalData.inputImgWidth,
|
||||
0.f,
|
||||
static_cast<float>(internalData.inputImgWidth)) -
|
||||
desc.x;
|
||||
desc.height = clamp(detections[i * objectSize + 6] * internalData.inputImgHeight,
|
||||
0.f,
|
||||
static_cast<float>(internalData.inputImgHeight)) -
|
||||
desc.y;
|
||||
|
||||
result->objects.push_back(desc);
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
std::unique_ptr<ResultBase> ModelSSD::postprocessMultipleOutputs(InferenceResult& infResult) {
|
||||
const float* boxes = infResult.outputsData[outputsNames[0]].data<const float>();
|
||||
size_t detectionsNum = infResult.outputsData[outputsNames[0]].get_shape()[detectionsNumId];
|
||||
const float* labels = infResult.outputsData[outputsNames[1]].data<const float>();
|
||||
const float* scores = outputsNames.size() > 2 ? infResult.outputsData[outputsNames[2]].data<const float>() : nullptr;
|
||||
|
||||
DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
|
||||
auto retVal = std::unique_ptr<ResultBase>(result);
|
||||
|
||||
const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
|
||||
|
||||
// In models with scores are stored in separate output, coordinates are normalized to [0,1]
|
||||
// In other multiple-outputs models coordinates are normalized to [0,netInputWidth] and [0,netInputHeight]
|
||||
float widthScale = static_cast<float>(internalData.inputImgWidth) / (scores ? 1 : netInputWidth);
|
||||
float heightScale = static_cast<float>(internalData.inputImgHeight) / (scores ? 1 : netInputHeight);
|
||||
|
||||
for (size_t i = 0; i < detectionsNum; i++) {
|
||||
float confidence = scores ? scores[i] : boxes[i * objectSize + 4];
|
||||
|
||||
/** Filtering out objects with confidence < confidence_threshold probability **/
|
||||
if (confidence > confidenceThreshold) {
|
||||
DetectedObject desc;
|
||||
|
||||
desc.confidence = confidence;
|
||||
desc.labelID = static_cast<int>(labels[i]);
|
||||
desc.label = getLabelName(desc.labelID);
|
||||
|
||||
desc.x = clamp(boxes[i * objectSize] * widthScale, 0.f, static_cast<float>(internalData.inputImgWidth));
|
||||
desc.y =
|
||||
clamp(boxes[i * objectSize + 1] * heightScale, 0.f, static_cast<float>(internalData.inputImgHeight));
|
||||
desc.width =
|
||||
clamp(boxes[i * objectSize + 2] * widthScale, 0.f, static_cast<float>(internalData.inputImgWidth)) -
|
||||
desc.x;
|
||||
desc.height =
|
||||
clamp(boxes[i * objectSize + 3] * heightScale, 0.f, static_cast<float>(internalData.inputImgHeight)) -
|
||||
desc.y;
|
||||
|
||||
result->objects.push_back(desc);
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void ModelSSD::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
|
||||
// --------------------------- Configure input & output -------------------------------------------------
|
||||
// --------------------------- Prepare input ------------------------------------------------------
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
for (const auto& input : model->inputs()) {
|
||||
auto inputTensorName = input.get_any_name();
|
||||
const ov::Shape& shape = input.get_shape();
|
||||
ov::Layout inputLayout = getInputLayout(input);
|
||||
|
||||
if (shape.size() == 4) { // 1st input contains images
|
||||
if (inputsNames.empty()) {
|
||||
inputsNames.push_back(inputTensorName);
|
||||
} else {
|
||||
inputsNames[0] = inputTensorName;
|
||||
}
|
||||
|
||||
inputTransform.setPrecision(ppp, inputTensorName);
|
||||
ppp.input(inputTensorName).tensor().set_layout({"NHWC"});
|
||||
|
||||
if (useAutoResize) {
|
||||
ppp.input(inputTensorName).tensor().set_spatial_dynamic_shape();
|
||||
|
||||
ppp.input(inputTensorName)
|
||||
.preprocess()
|
||||
.convert_element_type(ov::element::f32)
|
||||
.resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
|
||||
}
|
||||
|
||||
ppp.input(inputTensorName).model().set_layout(inputLayout);
|
||||
|
||||
netInputWidth = shape[ov::layout::width_idx(inputLayout)];
|
||||
netInputHeight = shape[ov::layout::height_idx(inputLayout)];
|
||||
} else if (shape.size() == 2) { // 2nd input contains image info
|
||||
inputsNames.resize(2);
|
||||
inputsNames[1] = inputTensorName;
|
||||
ppp.input(inputTensorName).tensor().set_element_type(ov::element::f32);
|
||||
} else {
|
||||
throw std::logic_error("Unsupported " + std::to_string(input.get_shape().size()) +
|
||||
"D "
|
||||
"input layer '" +
|
||||
input.get_any_name() +
|
||||
"'. "
|
||||
"Only 2D and 4D input layers are supported");
|
||||
}
|
||||
}
|
||||
model = ppp.build();
|
||||
|
||||
// --------------------------- Prepare output -----------------------------------------------------
|
||||
if (model->outputs().size() == 1) {
|
||||
prepareSingleOutput(model);
|
||||
} else {
|
||||
prepareMultipleOutputs(model);
|
||||
}
|
||||
}
|
||||
|
||||
void ModelSSD::prepareSingleOutput(std::shared_ptr<ov::Model>& model) {
|
||||
const auto& output = model->output();
|
||||
outputsNames.push_back(output.get_any_name());
|
||||
|
||||
const ov::Shape& shape = output.get_shape();
|
||||
const ov::Layout& layout("NCHW");
|
||||
if (shape.size() != 4) {
|
||||
throw std::logic_error("SSD single output must have 4 dimensions, but had " + std::to_string(shape.size()));
|
||||
}
|
||||
detectionsNumId = ov::layout::height_idx(layout);
|
||||
objectSize = shape[ov::layout::width_idx(layout)];
|
||||
if (objectSize != 7) {
|
||||
throw std::logic_error("SSD single output must have 7 as a last dimension, but had " +
|
||||
std::to_string(objectSize));
|
||||
}
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
ppp.output().tensor().set_element_type(ov::element::f32).set_layout(layout);
|
||||
model = ppp.build();
|
||||
}
|
||||
|
||||
void ModelSSD::prepareMultipleOutputs(std::shared_ptr<ov::Model>& model) {
|
||||
const ov::OutputVector& outputs = model->outputs();
|
||||
for (auto& output : outputs) {
|
||||
const auto& tensorNames = output.get_names();
|
||||
for (const auto& name : tensorNames) {
|
||||
if (name.find("boxes") != std::string::npos) {
|
||||
outputsNames.push_back(name);
|
||||
break;
|
||||
} else if (name.find("labels") != std::string::npos) {
|
||||
outputsNames.push_back(name);
|
||||
break;
|
||||
} else if (name.find("scores") != std::string::npos) {
|
||||
outputsNames.push_back(name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (outputsNames.size() != 2 && outputsNames.size() != 3) {
|
||||
throw std::logic_error("SSD model wrapper must have 2 or 3 outputs, but had " +
|
||||
std::to_string(outputsNames.size()));
|
||||
}
|
||||
std::sort(outputsNames.begin(), outputsNames.end());
|
||||
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
const auto& boxesShape = model->output(outputsNames[0]).get_partial_shape().get_max_shape();
|
||||
|
||||
ov::Layout boxesLayout;
|
||||
if (boxesShape.size() == 2) {
|
||||
boxesLayout = "NC";
|
||||
detectionsNumId = ov::layout::batch_idx(boxesLayout);
|
||||
objectSize = boxesShape[ov::layout::channels_idx(boxesLayout)];
|
||||
|
||||
if (objectSize != 5) {
|
||||
throw std::logic_error("Incorrect 'boxes' output shape, [n][5] shape is required");
|
||||
}
|
||||
} else if (boxesShape.size() == 3) {
|
||||
boxesLayout = "CHW";
|
||||
detectionsNumId = ov::layout::height_idx(boxesLayout);
|
||||
objectSize = boxesShape[ov::layout::width_idx(boxesLayout)];
|
||||
|
||||
if (objectSize != 4) {
|
||||
throw std::logic_error("Incorrect 'boxes' output shape, [b][n][4] shape is required");
|
||||
}
|
||||
} else {
|
||||
throw std::logic_error("Incorrect number of 'boxes' output dimensions, expected 2 or 3, but had " +
|
||||
std::to_string(boxesShape.size()));
|
||||
}
|
||||
|
||||
ppp.output(outputsNames[0]).tensor().set_layout(boxesLayout);
|
||||
|
||||
for (const auto& outName : outputsNames) {
|
||||
ppp.output(outName).tensor().set_element_type(ov::element::f32);
|
||||
}
|
||||
model = ppp.build();
|
||||
}
|
||||
482
engines/OpenVINOEngine/src/models/detection_model_yolo.cpp
Normal file
482
engines/OpenVINOEngine/src/models/detection_model_yolo.cpp
Normal file
@@ -0,0 +1,482 @@
|
||||
/*
|
||||
// Copyright (C) 2020-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "models/detection_model_yolo.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
#include <utils/common.hpp>
|
||||
#include <utils/slog.hpp>
|
||||
|
||||
#include "models/internal_model_data.h"
|
||||
#include "models/results.h"
|
||||
|
||||
std::vector<float> defaultAnchors[] = {
|
||||
// YOLOv1v2
|
||||
{0.57273f, 0.677385f, 1.87446f, 2.06253f, 3.33843f, 5.47434f, 7.88282f, 3.52778f, 9.77052f, 9.16828f},
|
||||
// YOLOv3
|
||||
{10.0f,
|
||||
13.0f,
|
||||
16.0f,
|
||||
30.0f,
|
||||
33.0f,
|
||||
23.0f,
|
||||
30.0f,
|
||||
61.0f,
|
||||
62.0f,
|
||||
45.0f,
|
||||
59.0f,
|
||||
119.0f,
|
||||
116.0f,
|
||||
90.0f,
|
||||
156.0f,
|
||||
198.0f,
|
||||
373.0f,
|
||||
326.0f},
|
||||
// YOLOv4
|
||||
{12.0f,
|
||||
16.0f,
|
||||
19.0f,
|
||||
36.0f,
|
||||
40.0f,
|
||||
28.0f,
|
||||
36.0f,
|
||||
75.0f,
|
||||
76.0f,
|
||||
55.0f,
|
||||
72.0f,
|
||||
146.0f,
|
||||
142.0f,
|
||||
110.0f,
|
||||
192.0f,
|
||||
243.0f,
|
||||
459.0f,
|
||||
401.0f},
|
||||
// YOLOv4_Tiny
|
||||
{10.0f, 14.0f, 23.0f, 27.0f, 37.0f, 58.0f, 81.0f, 82.0f, 135.0f, 169.0f, 344.0f, 319.0f},
|
||||
// YOLOF
|
||||
{16.0f, 16.0f, 32.0f, 32.0f, 64.0f, 64.0f, 128.0f, 128.0f, 256.0f, 256.0f, 512.0f, 512.0f}};
|
||||
|
||||
const std::vector<int64_t> defaultMasks[] = {
|
||||
// YOLOv1v2
|
||||
{},
|
||||
// YOLOv3
|
||||
{},
|
||||
// YOLOv4
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8},
|
||||
// YOLOv4_Tiny
|
||||
{1, 2, 3, 3, 4, 5},
|
||||
// YOLOF
|
||||
{0, 1, 2, 3, 4, 5}};
|
||||
|
||||
static inline float sigmoid(float x) {
|
||||
return 1.f / (1.f + exp(-x));
|
||||
}
|
||||
|
||||
static inline float linear(float x) {
|
||||
return x;
|
||||
}
|
||||
|
||||
ModelYolo::ModelYolo(const std::string& modelFileName,
|
||||
float confidenceThreshold,
|
||||
bool useAutoResize,
|
||||
bool useAdvancedPostprocessing,
|
||||
float boxIOUThreshold,
|
||||
const std::vector<std::string>& labels,
|
||||
const std::vector<float>& anchors,
|
||||
const std::vector<int64_t>& masks,
|
||||
const std::string& layout)
|
||||
: DetectionModel(modelFileName, confidenceThreshold, useAutoResize, labels, layout),
|
||||
boxIOUThreshold(boxIOUThreshold),
|
||||
useAdvancedPostprocessing(useAdvancedPostprocessing),
|
||||
yoloVersion(YOLO_V3),
|
||||
presetAnchors(anchors),
|
||||
presetMasks(masks) {}
|
||||
|
||||
void ModelYolo::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
|
||||
// --------------------------- Configure input & output -------------------------------------------------
|
||||
// --------------------------- Prepare input ------------------------------------------------------
|
||||
if (model->inputs().size() != 1) {
|
||||
throw std::logic_error("YOLO model wrapper accepts models that have only 1 input");
|
||||
}
|
||||
|
||||
const auto& input = model->input();
|
||||
const ov::Shape& inputShape = model->input().get_shape();
|
||||
ov::Layout inputLayout = getInputLayout(input);
|
||||
|
||||
if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
|
||||
throw std::logic_error("Expected 3-channel input");
|
||||
}
|
||||
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
inputTransform.setPrecision(ppp, input.get_any_name());
|
||||
ppp.input().tensor().set_layout({"NHWC"});
|
||||
|
||||
if (useAutoResize) {
|
||||
ppp.input().tensor().set_spatial_dynamic_shape();
|
||||
|
||||
ppp.input()
|
||||
.preprocess()
|
||||
.convert_element_type(ov::element::f32)
|
||||
.resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
|
||||
}
|
||||
|
||||
ppp.input().model().set_layout(inputLayout);
|
||||
|
||||
//--- Reading image input parameters
|
||||
inputsNames.push_back(model->input().get_any_name());
|
||||
netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
|
||||
netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
|
||||
|
||||
// --------------------------- Prepare output -----------------------------------------------------
|
||||
const ov::OutputVector& outputs = model->outputs();
|
||||
std::map<std::string, ov::Shape> outShapes;
|
||||
for (auto& out : outputs) {
|
||||
ppp.output(out.get_any_name()).tensor().set_element_type(ov::element::f32);
|
||||
if (out.get_shape().size() == 4) {
|
||||
if (out.get_shape()[ov::layout::height_idx("NCHW")] != out.get_shape()[ov::layout::width_idx("NCHW")] &&
|
||||
out.get_shape()[ov::layout::height_idx("NHWC")] == out.get_shape()[ov::layout::width_idx("NHWC")]) {
|
||||
ppp.output(out.get_any_name()).model().set_layout("NHWC");
|
||||
// outShapes are saved before ppp.build() thus set yoloRegionLayout as it is in model before ppp.build()
|
||||
yoloRegionLayout = "NHWC";
|
||||
}
|
||||
// yolo-v1-tiny-tf out shape is [1, 21125] thus set layout only for 4 dim tensors
|
||||
ppp.output(out.get_any_name()).tensor().set_layout("NCHW");
|
||||
}
|
||||
outputsNames.push_back(out.get_any_name());
|
||||
outShapes[out.get_any_name()] = out.get_shape();
|
||||
}
|
||||
model = ppp.build();
|
||||
|
||||
yoloVersion = YOLO_V3;
|
||||
bool isRegionFound = false;
|
||||
for (const auto& op : model->get_ordered_ops()) {
|
||||
if (std::string("RegionYolo") == op->get_type_name()) {
|
||||
auto regionYolo = std::dynamic_pointer_cast<ov::op::v0::RegionYolo>(op);
|
||||
|
||||
if (regionYolo) {
|
||||
if (!regionYolo->get_mask().size()) {
|
||||
yoloVersion = YOLO_V1V2;
|
||||
}
|
||||
|
||||
const auto& opName = op->get_friendly_name();
|
||||
for (const auto& out : outputs) {
|
||||
if (out.get_node()->get_friendly_name() == opName ||
|
||||
out.get_node()->get_input_node_ptr(0)->get_friendly_name() == opName) {
|
||||
isRegionFound = true;
|
||||
regions.emplace(out.get_any_name(), Region(regionYolo));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!isRegionFound) {
|
||||
switch (outputsNames.size()) {
|
||||
case 1:
|
||||
yoloVersion = YOLOF;
|
||||
break;
|
||||
case 2:
|
||||
yoloVersion = YOLO_V4_TINY;
|
||||
break;
|
||||
case 3:
|
||||
yoloVersion = YOLO_V4;
|
||||
break;
|
||||
}
|
||||
|
||||
int num = yoloVersion == YOLOF ? 6 : 3;
|
||||
isObjConf = yoloVersion == YOLOF ? 0 : 1;
|
||||
int i = 0;
|
||||
|
||||
auto chosenMasks = presetMasks.size() ? presetMasks : defaultMasks[yoloVersion];
|
||||
if (chosenMasks.size() != num * outputs.size()) {
|
||||
throw std::runtime_error(std::string("Invalid size of masks array, got ") +
|
||||
std::to_string(presetMasks.size()) + ", should be " +
|
||||
std::to_string(num * outputs.size()));
|
||||
}
|
||||
|
||||
std::sort(outputsNames.begin(),
|
||||
outputsNames.end(),
|
||||
[&outShapes, this](const std::string& x, const std::string& y) {
|
||||
return outShapes[x][ov::layout::height_idx(yoloRegionLayout)] >
|
||||
outShapes[y][ov::layout::height_idx(yoloRegionLayout)];
|
||||
});
|
||||
|
||||
for (const auto& name : outputsNames) {
|
||||
const auto& shape = outShapes[name];
|
||||
if (shape[ov::layout::channels_idx(yoloRegionLayout)] % num != 0) {
|
||||
throw std::logic_error(std::string("Output tensor ") + name + " has wrong channel dimension");
|
||||
}
|
||||
regions.emplace(
|
||||
name,
|
||||
Region(shape[ov::layout::channels_idx(yoloRegionLayout)] / num - 4 - (isObjConf ? 1 : 0),
|
||||
4,
|
||||
presetAnchors.size() ? presetAnchors : defaultAnchors[yoloVersion],
|
||||
std::vector<int64_t>(chosenMasks.begin() + i * num, chosenMasks.begin() + (i + 1) * num),
|
||||
shape[ov::layout::width_idx(yoloRegionLayout)],
|
||||
shape[ov::layout::height_idx(yoloRegionLayout)]));
|
||||
i++;
|
||||
}
|
||||
} else {
|
||||
// Currently externally set anchors and masks are supported only for YoloV4
|
||||
if (presetAnchors.size() || presetMasks.size()) {
|
||||
slog::warn << "Preset anchors and mask can be set for YoloV4 model only. "
|
||||
"This model is not YoloV4, so these options will be ignored."
|
||||
<< slog::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<ResultBase> ModelYolo::postprocess(InferenceResult& infResult) {
|
||||
DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
|
||||
std::vector<DetectedObject> objects;
|
||||
|
||||
// Parsing outputs
|
||||
const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
|
||||
|
||||
for (auto& output : infResult.outputsData) {
|
||||
this->parseYOLOOutput(output.first,
|
||||
output.second,
|
||||
netInputHeight,
|
||||
netInputWidth,
|
||||
internalData.inputImgHeight,
|
||||
internalData.inputImgWidth,
|
||||
objects);
|
||||
}
|
||||
|
||||
if (useAdvancedPostprocessing) {
|
||||
// Advanced postprocessing
|
||||
// Checking IOU threshold conformance
|
||||
// For every i-th object we're finding all objects it intersects with, and comparing confidence
|
||||
// If i-th object has greater confidence than all others, we include it into result
|
||||
for (const auto& obj1 : objects) {
|
||||
bool isGoodResult = true;
|
||||
for (const auto& obj2 : objects) {
|
||||
if (obj1.labelID == obj2.labelID && obj1.confidence < obj2.confidence &&
|
||||
intersectionOverUnion(obj1, obj2) >= boxIOUThreshold) { // if obj1 is the same as obj2, condition
|
||||
// expression will evaluate to false anyway
|
||||
isGoodResult = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isGoodResult) {
|
||||
result->objects.push_back(obj1);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Classic postprocessing
|
||||
std::sort(objects.begin(), objects.end(), [](const DetectedObject& x, const DetectedObject& y) {
|
||||
return x.confidence > y.confidence;
|
||||
});
|
||||
for (size_t i = 0; i < objects.size(); ++i) {
|
||||
if (objects[i].confidence == 0)
|
||||
continue;
|
||||
for (size_t j = i + 1; j < objects.size(); ++j)
|
||||
if (intersectionOverUnion(objects[i], objects[j]) >= boxIOUThreshold)
|
||||
objects[j].confidence = 0;
|
||||
result->objects.push_back(objects[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return std::unique_ptr<ResultBase>(result);
|
||||
}
|
||||
|
||||
void ModelYolo::parseYOLOOutput(const std::string& output_name,
|
||||
const ov::Tensor& tensor,
|
||||
const unsigned long resized_im_h,
|
||||
const unsigned long resized_im_w,
|
||||
const unsigned long original_im_h,
|
||||
const unsigned long original_im_w,
|
||||
std::vector<DetectedObject>& objects) {
|
||||
// --------------------------- Extracting layer parameters -------------------------------------
|
||||
auto it = regions.find(output_name);
|
||||
if (it == regions.end()) {
|
||||
throw std::runtime_error(std::string("Can't find output layer with name ") + output_name);
|
||||
}
|
||||
auto& region = it->second;
|
||||
|
||||
int sideW = 0;
|
||||
int sideH = 0;
|
||||
unsigned long scaleH;
|
||||
unsigned long scaleW;
|
||||
switch (yoloVersion) {
|
||||
case YOLO_V1V2:
|
||||
sideH = region.outputHeight;
|
||||
sideW = region.outputWidth;
|
||||
scaleW = region.outputWidth;
|
||||
scaleH = region.outputHeight;
|
||||
break;
|
||||
case YOLO_V3:
|
||||
case YOLO_V4:
|
||||
case YOLO_V4_TINY:
|
||||
case YOLOF:
|
||||
sideH = static_cast<int>(tensor.get_shape()[ov::layout::height_idx("NCHW")]);
|
||||
sideW = static_cast<int>(tensor.get_shape()[ov::layout::width_idx("NCHW")]);
|
||||
scaleW = resized_im_w;
|
||||
scaleH = resized_im_h;
|
||||
break;
|
||||
}
|
||||
|
||||
auto entriesNum = sideW * sideH;
|
||||
const float* outData = tensor.data<float>();
|
||||
|
||||
auto postprocessRawData =
|
||||
(yoloVersion == YOLO_V4 || yoloVersion == YOLO_V4_TINY || yoloVersion == YOLOF) ? sigmoid : linear;
|
||||
|
||||
// --------------------------- Parsing YOLO Region output -------------------------------------
|
||||
for (int i = 0; i < entriesNum; ++i) {
|
||||
int row = i / sideW;
|
||||
int col = i % sideW;
|
||||
for (int n = 0; n < region.num; ++n) {
|
||||
//--- Getting region data
|
||||
int obj_index = calculateEntryIndex(entriesNum,
|
||||
region.coords,
|
||||
region.classes + isObjConf,
|
||||
n * entriesNum + i,
|
||||
region.coords);
|
||||
int box_index =
|
||||
calculateEntryIndex(entriesNum, region.coords, region.classes + isObjConf, n * entriesNum + i, 0);
|
||||
float scale = isObjConf ? postprocessRawData(outData[obj_index]) : 1;
|
||||
|
||||
//--- Preliminary check for confidence threshold conformance
|
||||
if (scale >= confidenceThreshold) {
|
||||
//--- Calculating scaled region's coordinates
|
||||
float x, y;
|
||||
if (yoloVersion == YOLOF) {
|
||||
x = (static_cast<float>(col) / sideW +
|
||||
outData[box_index + 0 * entriesNum] * region.anchors[2 * n] / scaleW) *
|
||||
original_im_w;
|
||||
y = (static_cast<float>(row) / sideH +
|
||||
outData[box_index + 1 * entriesNum] * region.anchors[2 * n + 1] / scaleH) *
|
||||
original_im_h;
|
||||
} else {
|
||||
x = static_cast<float>((col + postprocessRawData(outData[box_index + 0 * entriesNum])) / sideW *
|
||||
original_im_w);
|
||||
y = static_cast<float>((row + postprocessRawData(outData[box_index + 1 * entriesNum])) / sideH *
|
||||
original_im_h);
|
||||
}
|
||||
float height = static_cast<float>(std::exp(outData[box_index + 3 * entriesNum]) *
|
||||
region.anchors[2 * n + 1] * original_im_h / scaleH);
|
||||
float width = static_cast<float>(std::exp(outData[box_index + 2 * entriesNum]) * region.anchors[2 * n] *
|
||||
original_im_w / scaleW);
|
||||
|
||||
DetectedObject obj;
|
||||
obj.x = clamp(x - width / 2, 0.f, static_cast<float>(original_im_w));
|
||||
obj.y = clamp(y - height / 2, 0.f, static_cast<float>(original_im_h));
|
||||
obj.width = clamp(width, 0.f, static_cast<float>(original_im_w - obj.x));
|
||||
obj.height = clamp(height, 0.f, static_cast<float>(original_im_h - obj.y));
|
||||
|
||||
for (size_t j = 0; j < region.classes; ++j) {
|
||||
int class_index = calculateEntryIndex(entriesNum,
|
||||
region.coords,
|
||||
region.classes + isObjConf,
|
||||
n * entriesNum + i,
|
||||
region.coords + isObjConf + j);
|
||||
float prob = scale * postprocessRawData(outData[class_index]);
|
||||
|
||||
//--- Checking confidence threshold conformance and adding region to the list
|
||||
if (prob >= confidenceThreshold) {
|
||||
obj.confidence = prob;
|
||||
obj.labelID = j;
|
||||
obj.label = getLabelName(obj.labelID);
|
||||
objects.push_back(obj);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int ModelYolo::calculateEntryIndex(int totalCells, int lcoords, size_t lclasses, int location, int entry) {
|
||||
int n = location / totalCells;
|
||||
int loc = location % totalCells;
|
||||
return (n * (lcoords + lclasses) + entry) * totalCells + loc;
|
||||
}
|
||||
|
||||
double ModelYolo::intersectionOverUnion(const DetectedObject& o1, const DetectedObject& o2) {
|
||||
double overlappingWidth = fmin(o1.x + o1.width, o2.x + o2.width) - fmax(o1.x, o2.x);
|
||||
double overlappingHeight = fmin(o1.y + o1.height, o2.y + o2.height) - fmax(o1.y, o2.y);
|
||||
double intersectionArea =
|
||||
(overlappingWidth < 0 || overlappingHeight < 0) ? 0 : overlappingHeight * overlappingWidth;
|
||||
double unionArea = o1.width * o1.height + o2.width * o2.height - intersectionArea;
|
||||
return intersectionArea / unionArea;
|
||||
}
|
||||
|
||||
ModelYolo::Region::Region(const std::shared_ptr<ov::op::v0::RegionYolo>& regionYolo) {
|
||||
coords = regionYolo->get_num_coords();
|
||||
classes = regionYolo->get_num_classes();
|
||||
auto mask = regionYolo->get_mask();
|
||||
num = mask.size();
|
||||
|
||||
auto shape = regionYolo->get_input_shape(0);
|
||||
outputWidth = shape[3];
|
||||
outputHeight = shape[2];
|
||||
|
||||
if (num) {
|
||||
// Parsing YoloV3 parameters
|
||||
anchors.resize(num * 2);
|
||||
|
||||
for (int i = 0; i < num; ++i) {
|
||||
anchors[i * 2] = regionYolo->get_anchors()[mask[i] * 2];
|
||||
anchors[i * 2 + 1] = regionYolo->get_anchors()[mask[i] * 2 + 1];
|
||||
}
|
||||
} else {
|
||||
// Parsing YoloV2 parameters
|
||||
num = regionYolo->get_num_regions();
|
||||
anchors = regionYolo->get_anchors();
|
||||
if (anchors.empty()) {
|
||||
anchors = defaultAnchors[YOLO_V1V2];
|
||||
num = 5;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ModelYolo::Region::Region(size_t classes,
|
||||
int coords,
|
||||
const std::vector<float>& anchors,
|
||||
const std::vector<int64_t>& masks,
|
||||
size_t outputWidth,
|
||||
size_t outputHeight)
|
||||
: classes(classes),
|
||||
coords(coords),
|
||||
outputWidth(outputWidth),
|
||||
outputHeight(outputHeight) {
|
||||
num = masks.size();
|
||||
|
||||
if (anchors.size() == 0 || anchors.size() % 2 != 0) {
|
||||
throw std::runtime_error("Explicitly initialized region should have non-empty even-sized regions vector");
|
||||
}
|
||||
|
||||
if (num) {
|
||||
this->anchors.resize(num * 2);
|
||||
|
||||
for (int i = 0; i < num; ++i) {
|
||||
this->anchors[i * 2] = anchors[masks[i] * 2];
|
||||
this->anchors[i * 2 + 1] = anchors[masks[i] * 2 + 1];
|
||||
}
|
||||
} else {
|
||||
this->anchors = anchors;
|
||||
num = anchors.size() / 2;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,180 @@
|
||||
/*
|
||||
// Copyright (C) 2022-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "models/detection_model_yolov3_onnx.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
#include <utils/common.hpp>
|
||||
#include <utils/slog.hpp>
|
||||
|
||||
#include "models/input_data.h"
|
||||
#include "models/internal_model_data.h"
|
||||
#include "models/results.h"
|
||||
#include "utils/image_utils.h"
|
||||
|
||||
ModelYoloV3ONNX::ModelYoloV3ONNX(const std::string& modelFileName,
|
||||
float confidenceThreshold,
|
||||
const std::vector<std::string>& labels,
|
||||
const std::string& layout)
|
||||
: DetectionModel(modelFileName, confidenceThreshold, false, labels, layout) {
|
||||
interpolationMode = cv::INTER_CUBIC;
|
||||
resizeMode = RESIZE_KEEP_ASPECT_LETTERBOX;
|
||||
}
|
||||
|
||||
|
||||
void ModelYoloV3ONNX::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
|
||||
// --------------------------- Configure input & output -------------------------------------------------
|
||||
// --------------------------- Prepare inputs ------------------------------------------------------
|
||||
const ov::OutputVector& inputs = model->inputs();
|
||||
if (inputs.size() != 2) {
|
||||
throw std::logic_error("YoloV3ONNX model wrapper expects models that have 2 inputs");
|
||||
}
|
||||
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
inputsNames.reserve(inputs.size());
|
||||
for (auto& input : inputs) {
|
||||
const ov::Shape& currentShape = input.get_shape();
|
||||
std::string currentName = input.get_any_name();
|
||||
const ov::Layout& currentLayout = getInputLayout(input);
|
||||
|
||||
if (currentShape.size() == 4) {
|
||||
if (currentShape[ov::layout::channels_idx(currentLayout)] != 3) {
|
||||
throw std::logic_error("Expected 4D image input with 3 channels");
|
||||
}
|
||||
inputsNames[0] = currentName;
|
||||
netInputWidth = currentShape[ov::layout::width_idx(currentLayout)];
|
||||
netInputHeight = currentShape[ov::layout::height_idx(currentLayout)];
|
||||
ppp.input(currentName).tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
|
||||
} else if (currentShape.size() == 2) {
|
||||
if (currentShape[ov::layout::channels_idx(currentLayout)] != 2) {
|
||||
throw std::logic_error("Expected 2D image info input with 2 channels");
|
||||
}
|
||||
inputsNames[1] = currentName;
|
||||
ppp.input(currentName).tensor().set_element_type(ov::element::i32);
|
||||
}
|
||||
ppp.input(currentName).model().set_layout(currentLayout);
|
||||
}
|
||||
|
||||
// --------------------------- Prepare outputs -----------------------------------------------------
|
||||
const ov::OutputVector& outputs = model->outputs();
|
||||
if (outputs.size() != 3) {
|
||||
throw std::logic_error("YoloV3ONNX model wrapper expects models that have 3 outputs");
|
||||
}
|
||||
|
||||
for (auto& output : outputs) {
|
||||
const ov::Shape& currentShape = output.get_partial_shape().get_max_shape();
|
||||
std::string currentName = output.get_any_name();
|
||||
if (currentShape.back() == 3) {
|
||||
indicesOutputName = currentName;
|
||||
ppp.output(currentName).tensor().set_element_type(ov::element::i32);
|
||||
} else if (currentShape[2] == 4) {
|
||||
boxesOutputName = currentName;
|
||||
ppp.output(currentName).tensor().set_element_type(ov::element::f32);
|
||||
} else if (currentShape[1] == numberOfClasses) {
|
||||
scoresOutputName = currentName;
|
||||
ppp.output(currentName).tensor().set_element_type(ov::element::f32);
|
||||
} else {
|
||||
throw std::logic_error("Expected shapes [:,:,4], [:,"
|
||||
+ std::to_string(numberOfClasses) + ",:] and [:,3] for outputs");
|
||||
}
|
||||
outputsNames.push_back(currentName);
|
||||
}
|
||||
model = ppp.build();
|
||||
}
|
||||
|
||||
std::shared_ptr<InternalModelData> ModelYoloV3ONNX::preprocess(const InputData& inputData,
|
||||
ov::InferRequest& request) {
|
||||
const auto& origImg = inputData.asRef<ImageInputData>().inputImage;
|
||||
ov::Tensor info{ov::element::i32, ov::Shape({1, 2})};
|
||||
int32_t* data = info.data<int32_t>();
|
||||
data[0] = origImg.rows;
|
||||
data[1] = origImg.cols;
|
||||
request.set_tensor(inputsNames[1], info);
|
||||
return ImageModel::preprocess(inputData, request);
|
||||
}
|
||||
|
||||
namespace {
|
||||
float getScore(const ov::Tensor& scoresTensor, size_t classInd, size_t boxInd) {
|
||||
return scoresTensor.data<float>()[classInd * scoresTensor.get_shape()[2] + boxInd];
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<ResultBase> ModelYoloV3ONNX::postprocess(InferenceResult& infResult) {
|
||||
// Get info about input image
|
||||
const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth;
|
||||
const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight;
|
||||
|
||||
// Get outputs tensors
|
||||
const ov::Tensor& boxes = infResult.outputsData[boxesOutputName];
|
||||
const float* boxesPtr = boxes.data<float>();
|
||||
|
||||
const ov::Tensor& scores = infResult.outputsData[scoresOutputName];
|
||||
const ov::Tensor& indices = infResult.outputsData[indicesOutputName];
|
||||
|
||||
const int* indicesData = indices.data<int>();
|
||||
const auto indicesShape = indices.get_shape();
|
||||
const auto boxShape = boxes.get_shape();
|
||||
|
||||
// Generate detection results
|
||||
DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
|
||||
size_t numberOfBoxes = indicesShape.size() == 3 ? indicesShape[1] : indicesShape[0];
|
||||
size_t indicesStride = indicesShape.size() == 3 ? indicesShape[2] : indicesShape[1];
|
||||
|
||||
for (size_t i = 0; i < numberOfBoxes; ++i) {
|
||||
int batchInd = indicesData[i * indicesStride];
|
||||
int classInd = indicesData[i * indicesStride + 1];
|
||||
int boxInd = indicesData[i * indicesStride + 2];
|
||||
|
||||
if (batchInd == -1) {
|
||||
break;
|
||||
}
|
||||
|
||||
float score = getScore(scores, classInd, boxInd);
|
||||
|
||||
if (score > confidenceThreshold) {
|
||||
DetectedObject obj;
|
||||
size_t startPos = boxShape[2] * boxInd;
|
||||
|
||||
auto x = boxesPtr[startPos + 1];
|
||||
auto y = boxesPtr[startPos];
|
||||
auto width = boxesPtr[startPos + 3] - x;
|
||||
auto height = boxesPtr[startPos + 2] - y;
|
||||
|
||||
// Create new detected box
|
||||
obj.x = clamp(x, 0.f, static_cast<float>(imgWidth));
|
||||
obj.y = clamp(y, 0.f, static_cast<float>(imgHeight));
|
||||
obj.height = clamp(height, 0.f, static_cast<float>(imgHeight));
|
||||
obj.width = clamp(width, 0.f, static_cast<float>(imgWidth));
|
||||
obj.confidence = score;
|
||||
obj.labelID = classInd;
|
||||
obj.label = getLabelName(classInd);
|
||||
|
||||
result->objects.push_back(obj);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return std::unique_ptr<ResultBase>(result);
|
||||
}
|
||||
194
engines/OpenVINOEngine/src/models/detection_model_yolox.cpp
Normal file
194
engines/OpenVINOEngine/src/models/detection_model_yolox.cpp
Normal file
@@ -0,0 +1,194 @@
|
||||
/*
|
||||
// Copyright (C) 2022-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "models/detection_model_yolox.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
#include <utils/common.hpp>
|
||||
#include <utils/slog.hpp>
|
||||
|
||||
#include "models/input_data.h"
|
||||
#include "models/internal_model_data.h"
|
||||
#include "models/results.h"
|
||||
#include "utils/image_utils.h"
|
||||
#include "utils/nms.hpp"
|
||||
|
||||
ModelYoloX::ModelYoloX(const std::string& modelFileName,
|
||||
float confidenceThreshold,
|
||||
float boxIOUThreshold,
|
||||
const std::vector<std::string>& labels,
|
||||
const std::string& layout)
|
||||
: DetectionModel(modelFileName, confidenceThreshold, false, labels, layout),
|
||||
boxIOUThreshold(boxIOUThreshold) {
|
||||
resizeMode = RESIZE_KEEP_ASPECT;
|
||||
}
|
||||
|
||||
void ModelYoloX::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
|
||||
// --------------------------- Configure input & output -------------------------------------------------
|
||||
// --------------------------- Prepare input ------------------------------------------------------
|
||||
const ov::OutputVector& inputs = model->inputs();
|
||||
if (inputs.size() != 1) {
|
||||
throw std::logic_error("YOLOX model wrapper accepts models that have only 1 input");
|
||||
}
|
||||
|
||||
//--- Check image input
|
||||
const auto& input = model->input();
|
||||
const ov::Shape& inputShape = model->input().get_shape();
|
||||
ov::Layout inputLayout = getInputLayout(input);
|
||||
|
||||
if (inputShape.size() != 4 && inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
|
||||
throw std::logic_error("Expected 4D image input with 3 channels");
|
||||
}
|
||||
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
|
||||
|
||||
ppp.input().model().set_layout(inputLayout);
|
||||
|
||||
//--- Reading image input parameters
|
||||
inputsNames.push_back(input.get_any_name());
|
||||
netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
|
||||
netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
|
||||
setStridesGrids();
|
||||
|
||||
// --------------------------- Prepare output -----------------------------------------------------
|
||||
if (model->outputs().size() != 1) {
|
||||
throw std::logic_error("YoloX model wrapper expects models that have only 1 output");
|
||||
}
|
||||
const auto& output = model->output();
|
||||
outputsNames.push_back(output.get_any_name());
|
||||
const ov::Shape& shape = output.get_shape();
|
||||
|
||||
if (shape.size() != 3) {
|
||||
throw std::logic_error("YOLOX single output must have 3 dimensions, but had " + std::to_string(shape.size()));
|
||||
}
|
||||
ppp.output().tensor().set_element_type(ov::element::f32);
|
||||
|
||||
model = ppp.build();
|
||||
}
|
||||
|
||||
void ModelYoloX::setStridesGrids() {
|
||||
std::vector<size_t> strides = {8, 16, 32};
|
||||
std::vector<size_t> hsizes(3);
|
||||
std::vector<size_t> wsizes(3);
|
||||
|
||||
for (size_t i = 0; i < strides.size(); ++i) {
|
||||
hsizes[i] = netInputHeight / strides[i];
|
||||
wsizes[i] = netInputWidth / strides[i];
|
||||
}
|
||||
|
||||
for (size_t size_index = 0; size_index < hsizes.size(); ++size_index) {
|
||||
for (size_t h_index = 0; h_index < hsizes[size_index]; ++h_index) {
|
||||
for (size_t w_index = 0; w_index < wsizes[size_index]; ++w_index) {
|
||||
grids.emplace_back(w_index, h_index);
|
||||
expandedStrides.push_back(strides[size_index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<InternalModelData> ModelYoloX::preprocess(const InputData& inputData,
|
||||
ov::InferRequest& request) {
|
||||
const auto& origImg = inputData.asRef<ImageInputData>().inputImage;
|
||||
float scale = std::min(static_cast<float>(netInputWidth) / origImg.cols,
|
||||
static_cast<float>(netInputHeight) / origImg.rows);
|
||||
|
||||
cv::Mat resizedImage = resizeImageExt(origImg, netInputWidth, netInputHeight, resizeMode,
|
||||
interpolationMode, nullptr, cv::Scalar(114, 114, 114));
|
||||
|
||||
request.set_input_tensor(wrapMat2Tensor(resizedImage));
|
||||
return std::make_shared<InternalScaleData>(origImg.cols, origImg.rows, scale, scale);
|
||||
}
|
||||
|
||||
std::unique_ptr<ResultBase> ModelYoloX::postprocess(InferenceResult& infResult) {
|
||||
// Get metadata about input image shape and scale
|
||||
const auto& scale = infResult.internalModelData->asRef<InternalScaleData>();
|
||||
|
||||
// Get output tensor
|
||||
const ov::Tensor& output = infResult.outputsData[outputsNames[0]];
|
||||
const auto& outputShape = output.get_shape();
|
||||
float* outputPtr = output.data<float>();
|
||||
|
||||
// Generate detection results
|
||||
DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
|
||||
|
||||
// Update coordinates according to strides
|
||||
for (size_t box_index = 0; box_index < expandedStrides.size(); ++box_index) {
|
||||
size_t startPos = outputShape[2] * box_index;
|
||||
outputPtr[startPos] = (outputPtr[startPos] + grids[box_index].first) * expandedStrides[box_index];
|
||||
outputPtr[startPos + 1] = (outputPtr[startPos + 1] + grids[box_index].second) * expandedStrides[box_index];
|
||||
outputPtr[startPos + 2] = std::exp(outputPtr[startPos + 2]) * expandedStrides[box_index];
|
||||
outputPtr[startPos + 3] = std::exp(outputPtr[startPos + 3]) * expandedStrides[box_index];
|
||||
}
|
||||
|
||||
// Filter predictions
|
||||
std::vector<Anchor> validBoxes;
|
||||
std::vector<float> scores;
|
||||
std::vector<size_t> classes;
|
||||
for (size_t box_index = 0; box_index < expandedStrides.size(); ++box_index) {
|
||||
size_t startPos = outputShape[2] * box_index;
|
||||
float score = outputPtr[startPos + 4];
|
||||
if (score < confidenceThreshold)
|
||||
continue;
|
||||
float maxClassScore = -1;
|
||||
size_t mainClass = 0;
|
||||
for (size_t class_index = 0; class_index < numberOfClasses; ++class_index) {
|
||||
if (outputPtr[startPos + 5 + class_index] > maxClassScore) {
|
||||
maxClassScore = outputPtr[startPos + 5 + class_index];
|
||||
mainClass = class_index;
|
||||
}
|
||||
}
|
||||
|
||||
// Filter by score
|
||||
score *= maxClassScore;
|
||||
if (score < confidenceThreshold)
|
||||
continue;
|
||||
|
||||
// Add successful boxes
|
||||
scores.push_back(score);
|
||||
classes.push_back(mainClass);
|
||||
Anchor trueBox = {outputPtr[startPos + 0] - outputPtr[startPos + 2] / 2, outputPtr[startPos + 1] - outputPtr[startPos + 3] / 2,
|
||||
outputPtr[startPos + 0] + outputPtr[startPos + 2] / 2, outputPtr[startPos + 1] + outputPtr[startPos + 3] / 2};
|
||||
validBoxes.push_back(Anchor({trueBox.left / scale.scaleX, trueBox.top / scale.scaleY,
|
||||
trueBox.right / scale.scaleX, trueBox.bottom / scale.scaleY}));
|
||||
}
|
||||
|
||||
// NMS for valid boxes
|
||||
std::vector<int> keep = nms(validBoxes, scores, boxIOUThreshold, true);
|
||||
for (auto& index: keep) {
|
||||
// Create new detected box
|
||||
DetectedObject obj;
|
||||
obj.x = clamp(validBoxes[index].left, 0.f, static_cast<float>(scale.inputImgWidth));
|
||||
obj.y = clamp(validBoxes[index].top, 0.f, static_cast<float>(scale.inputImgHeight));
|
||||
obj.height = clamp(validBoxes[index].bottom - validBoxes[index].top, 0.f, static_cast<float>(scale.inputImgHeight));
|
||||
obj.width = clamp(validBoxes[index].right - validBoxes[index].left, 0.f, static_cast<float>(scale.inputImgWidth));
|
||||
obj.confidence = scores[index];
|
||||
obj.labelID = classes[index];
|
||||
obj.label = getLabelName(classes[index]);
|
||||
result->objects.push_back(obj);
|
||||
}
|
||||
|
||||
return std::unique_ptr<ResultBase>(result);
|
||||
}
|
||||
@@ -0,0 +1,264 @@
|
||||
/*
|
||||
// Copyright (C) 2021-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "models/hpe_model_associative_embedding.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
#include <utils/image_utils.h>
|
||||
#include <utils/ocv_common.hpp>
|
||||
#include <utils/slog.hpp>
|
||||
|
||||
#include "models/associative_embedding_decoder.h"
|
||||
#include "models/input_data.h"
|
||||
#include "models/internal_model_data.h"
|
||||
#include "models/results.h"
|
||||
|
||||
const cv::Vec3f HpeAssociativeEmbedding::meanPixel = cv::Vec3f::all(128);
|
||||
const float HpeAssociativeEmbedding::detectionThreshold = 0.1f;
|
||||
const float HpeAssociativeEmbedding::tagThreshold = 1.0f;
|
||||
|
||||
HpeAssociativeEmbedding::HpeAssociativeEmbedding(const std::string& modelFileName,
|
||||
double aspectRatio,
|
||||
int targetSize,
|
||||
float confidenceThreshold,
|
||||
const std::string& layout,
|
||||
float delta,
|
||||
RESIZE_MODE resizeMode)
|
||||
: ImageModel(modelFileName, false, layout),
|
||||
aspectRatio(aspectRatio),
|
||||
targetSize(targetSize),
|
||||
confidenceThreshold(confidenceThreshold),
|
||||
delta(delta) {
|
||||
this->resizeMode = resizeMode;
|
||||
interpolationMode = cv::INTER_CUBIC;
|
||||
}
|
||||
|
||||
void HpeAssociativeEmbedding::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
|
||||
// --------------------------- Configure input & output -------------------------------------------------
|
||||
// --------------------------- Prepare input Tensors ------------------------------------------------------
|
||||
if (model->inputs().size() != 1) {
|
||||
throw std::logic_error("HPE AE model wrapper supports topologies with only 1 input.");
|
||||
}
|
||||
inputsNames.push_back(model->input().get_any_name());
|
||||
|
||||
const ov::Shape& inputShape = model->input().get_shape();
|
||||
const ov::Layout& inputLayout = getInputLayout(model->input());
|
||||
|
||||
if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 ||
|
||||
inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
|
||||
throw std::logic_error("3-channel 4-dimensional model's input is expected");
|
||||
}
|
||||
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
|
||||
|
||||
ppp.input().model().set_layout(inputLayout);
|
||||
|
||||
// --------------------------- Prepare output Tensors -----------------------------------------------------
|
||||
const ov::OutputVector& outputs = model->outputs();
|
||||
if (outputs.size() != 2 && outputs.size() != 3) {
|
||||
throw std::logic_error("HPE AE model model wrapper supports topologies only with 2 or 3 outputs");
|
||||
}
|
||||
|
||||
for (const auto& output : model->outputs()) {
|
||||
const auto& outTensorName = output.get_any_name();
|
||||
ppp.output(outTensorName).tensor().set_element_type(ov::element::f32);
|
||||
|
||||
for (const auto& name : output.get_names()) {
|
||||
outputsNames.push_back(name);
|
||||
}
|
||||
|
||||
const ov::Shape& outputShape = output.get_shape();
|
||||
if (outputShape.size() != 4 && outputShape.size() != 5) {
|
||||
throw std::logic_error("output tensors are expected to be 4-dimensional or 5-dimensional");
|
||||
}
|
||||
if (outputShape[ov::layout::batch_idx("NC...")] != 1 || outputShape[ov::layout::channels_idx("NC...")] != 17) {
|
||||
throw std::logic_error("output tensors are expected to have 1 batch size and 17 channels");
|
||||
}
|
||||
}
|
||||
model = ppp.build();
|
||||
|
||||
embeddingsTensorName = findTensorByName("embeddings", outputsNames);
|
||||
heatmapsTensorName = findTensorByName("heatmaps", outputsNames);
|
||||
try {
|
||||
nmsHeatmapsTensorName = findTensorByName("nms_heatmaps", outputsNames);
|
||||
} catch (const std::runtime_error&) { nmsHeatmapsTensorName = heatmapsTensorName; }
|
||||
|
||||
changeInputSize(model);
|
||||
}
|
||||
|
||||
void HpeAssociativeEmbedding::changeInputSize(std::shared_ptr<ov::Model>& model) {
|
||||
ov::Shape inputShape = model->input().get_shape();
|
||||
const ov::Layout& layout = ov::layout::get_layout(model->input());
|
||||
const auto batchId = ov::layout::batch_idx(layout);
|
||||
const auto heightId = ov::layout::height_idx(layout);
|
||||
const auto widthId = ov::layout::width_idx(layout);
|
||||
|
||||
if (!targetSize) {
|
||||
targetSize = static_cast<int>(std::min(inputShape[heightId], inputShape[widthId]));
|
||||
}
|
||||
int inputHeight = aspectRatio >= 1.0 ? targetSize : static_cast<int>(std::round(targetSize / aspectRatio));
|
||||
int inputWidth = aspectRatio >= 1.0 ? static_cast<int>(std::round(targetSize * aspectRatio)) : targetSize;
|
||||
int height = static_cast<int>((inputHeight + stride - 1) / stride) * stride;
|
||||
int width = static_cast<int>((inputWidth + stride - 1) / stride) * stride;
|
||||
inputShape[batchId] = 1;
|
||||
inputShape[heightId] = height;
|
||||
inputShape[widthId] = width;
|
||||
inputLayerSize = cv::Size(width, height);
|
||||
|
||||
model->reshape(inputShape);
|
||||
}
|
||||
|
||||
std::shared_ptr<InternalModelData> HpeAssociativeEmbedding::preprocess(const InputData& inputData,
|
||||
ov::InferRequest& request) {
|
||||
auto& image = inputData.asRef<ImageInputData>().inputImage;
|
||||
cv::Rect roi;
|
||||
auto paddedImage = resizeImageExt(image, inputLayerSize.width, inputLayerSize.height, resizeMode, interpolationMode, &roi);
|
||||
if (inputLayerSize.height - stride >= roi.height || inputLayerSize.width - stride >= roi.width) {
|
||||
slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
|
||||
}
|
||||
request.set_input_tensor(wrapMat2Tensor(paddedImage));
|
||||
|
||||
return std::make_shared<InternalScaleData>(paddedImage.cols,
|
||||
paddedImage.rows,
|
||||
image.size().width / static_cast<float>(roi.width),
|
||||
image.size().height / static_cast<float>(roi.height));
|
||||
}
|
||||
|
||||
std::unique_ptr<ResultBase> HpeAssociativeEmbedding::postprocess(InferenceResult& infResult) {
|
||||
HumanPoseResult* result = new HumanPoseResult(infResult.frameId, infResult.metaData);
|
||||
|
||||
const auto& aembds = infResult.outputsData[embeddingsTensorName];
|
||||
const ov::Shape& aembdsShape = aembds.get_shape();
|
||||
float* const aembdsMapped = aembds.data<float>();
|
||||
std::vector<cv::Mat> aembdsMaps = split(aembdsMapped, aembdsShape);
|
||||
|
||||
const auto& heats = infResult.outputsData[heatmapsTensorName];
|
||||
const ov::Shape& heatMapsShape = heats.get_shape();
|
||||
float* const heatMapsMapped = heats.data<float>();
|
||||
std::vector<cv::Mat> heatMaps = split(heatMapsMapped, heatMapsShape);
|
||||
|
||||
std::vector<cv::Mat> nmsHeatMaps = heatMaps;
|
||||
if (nmsHeatmapsTensorName != heatmapsTensorName) {
|
||||
const auto& nmsHeats = infResult.outputsData[nmsHeatmapsTensorName];
|
||||
const ov::Shape& nmsHeatMapsShape = nmsHeats.get_shape();
|
||||
float* const nmsHeatMapsMapped = nmsHeats.data<float>();
|
||||
nmsHeatMaps = split(nmsHeatMapsMapped, nmsHeatMapsShape);
|
||||
}
|
||||
std::vector<HumanPose> poses = extractPoses(heatMaps, aembdsMaps, nmsHeatMaps);
|
||||
|
||||
// Rescale poses to the original image
|
||||
const auto& scale = infResult.internalModelData->asRef<InternalScaleData>();
|
||||
const float outputScale = inputLayerSize.width / static_cast<float>(heatMapsShape[3]);
|
||||
float shiftX = 0.0, shiftY = 0.0;
|
||||
float scaleX = 1.0, scaleY = 1.0;
|
||||
|
||||
if (resizeMode == RESIZE_KEEP_ASPECT_LETTERBOX) {
|
||||
scaleX = scaleY = std::min(scale.scaleX, scale.scaleY);
|
||||
if (aspectRatio >= 1.0)
|
||||
shiftX = static_cast<float>((targetSize * scaleX * aspectRatio - scale.inputImgWidth * scaleX) / 2);
|
||||
else
|
||||
shiftY = static_cast<float>((targetSize * scaleY / aspectRatio - scale.inputImgHeight * scaleY) / 2);
|
||||
scaleX = scaleY *= outputScale;
|
||||
} else {
|
||||
scaleX = scale.scaleX * outputScale;
|
||||
scaleY = scale.scaleY * outputScale;
|
||||
}
|
||||
|
||||
for (auto& pose : poses) {
|
||||
for (auto& keypoint : pose.keypoints) {
|
||||
if (keypoint != cv::Point2f(-1, -1)) {
|
||||
keypoint.x = keypoint.x * scaleX + shiftX;
|
||||
keypoint.y = keypoint.y * scaleY + shiftY;
|
||||
}
|
||||
}
|
||||
result->poses.push_back(pose);
|
||||
}
|
||||
|
||||
return std::unique_ptr<ResultBase>(result);
|
||||
}
|
||||
|
||||
std::string HpeAssociativeEmbedding::findTensorByName(const std::string& tensorName,
|
||||
const std::vector<std::string>& outputsNames) {
|
||||
std::vector<std::string> suitableLayers;
|
||||
for (auto& outputName : outputsNames) {
|
||||
if (outputName.rfind(tensorName, 0) == 0) {
|
||||
suitableLayers.push_back(outputName);
|
||||
}
|
||||
}
|
||||
if (suitableLayers.empty()) {
|
||||
throw std::runtime_error("Suitable tensor for " + tensorName + " output is not found");
|
||||
} else if (suitableLayers.size() > 1) {
|
||||
throw std::runtime_error("More than 1 tensor matched to " + tensorName + " output");
|
||||
}
|
||||
return suitableLayers[0];
|
||||
}
|
||||
|
||||
std::vector<cv::Mat> HpeAssociativeEmbedding::split(float* data, const ov::Shape& shape) {
|
||||
std::vector<cv::Mat> flattenData(shape[1]);
|
||||
for (size_t i = 0; i < flattenData.size(); i++) {
|
||||
flattenData[i] = cv::Mat(shape[2], shape[3], CV_32FC1, data + i * shape[2] * shape[3]);
|
||||
}
|
||||
return flattenData;
|
||||
}
|
||||
|
||||
std::vector<HumanPose> HpeAssociativeEmbedding::extractPoses(std::vector<cv::Mat>& heatMaps,
|
||||
const std::vector<cv::Mat>& aembdsMaps,
|
||||
const std::vector<cv::Mat>& nmsHeatMaps) const {
|
||||
std::vector<std::vector<Peak>> allPeaks(numJoints);
|
||||
for (int i = 0; i < numJoints; i++) {
|
||||
findPeaks(nmsHeatMaps, aembdsMaps, allPeaks, i, maxNumPeople, detectionThreshold);
|
||||
}
|
||||
std::vector<Pose> allPoses = matchByTag(allPeaks, maxNumPeople, numJoints, tagThreshold);
|
||||
// swap for all poses
|
||||
for (auto& pose : allPoses) {
|
||||
for (size_t j = 0; j < numJoints; j++) {
|
||||
Peak& peak = pose.getPeak(j);
|
||||
std::swap(peak.keypoint.x, peak.keypoint.y);
|
||||
}
|
||||
}
|
||||
std::vector<HumanPose> poses;
|
||||
for (size_t i = 0; i < allPoses.size(); i++) {
|
||||
Pose& pose = allPoses[i];
|
||||
// Filtering poses with low mean scores
|
||||
if (pose.getMeanScore() <= confidenceThreshold) {
|
||||
continue;
|
||||
}
|
||||
for (size_t j = 0; j < heatMaps.size(); j++) {
|
||||
heatMaps[j] = cv::abs(heatMaps[j]);
|
||||
}
|
||||
adjustAndRefine(allPoses, heatMaps, aembdsMaps, i, delta);
|
||||
std::vector<cv::Point2f> keypoints;
|
||||
for (size_t j = 0; j < numJoints; j++) {
|
||||
Peak& peak = pose.getPeak(j);
|
||||
keypoints.push_back(peak.keypoint);
|
||||
}
|
||||
poses.push_back({keypoints, pose.getMeanScore()});
|
||||
}
|
||||
return poses;
|
||||
}
|
||||
256
engines/OpenVINOEngine/src/models/hpe_model_openpose.cpp
Normal file
256
engines/OpenVINOEngine/src/models/hpe_model_openpose.cpp
Normal file
@@ -0,0 +1,256 @@
|
||||
/*
|
||||
// Copyright (C) 2020-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "models/hpe_model_openpose.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
#include <utils/image_utils.h>
|
||||
#include <utils/ocv_common.hpp>
|
||||
#include <utils/slog.hpp>
|
||||
|
||||
#include "models/input_data.h"
|
||||
#include "models/internal_model_data.h"
|
||||
#include "models/openpose_decoder.h"
|
||||
#include "models/results.h"
|
||||
|
||||
const cv::Vec3f HPEOpenPose::meanPixel = cv::Vec3f::all(128);
|
||||
const float HPEOpenPose::minPeaksDistance = 3.0f;
|
||||
const float HPEOpenPose::midPointsScoreThreshold = 0.05f;
|
||||
const float HPEOpenPose::foundMidPointsRatioThreshold = 0.8f;
|
||||
const float HPEOpenPose::minSubsetScore = 0.2f;
|
||||
|
||||
HPEOpenPose::HPEOpenPose(const std::string& modelFileName,
|
||||
double aspectRatio,
|
||||
int targetSize,
|
||||
float confidenceThreshold,
|
||||
const std::string& layout)
|
||||
: ImageModel(modelFileName, false, layout),
|
||||
aspectRatio(aspectRatio),
|
||||
targetSize(targetSize),
|
||||
confidenceThreshold(confidenceThreshold) {
|
||||
resizeMode = RESIZE_KEEP_ASPECT;
|
||||
interpolationMode = cv::INTER_CUBIC;
|
||||
}
|
||||
|
||||
void HPEOpenPose::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
|
||||
// --------------------------- Configure input & output -------------------------------------------------
|
||||
// --------------------------- Prepare input ------------------------------------------------------
|
||||
if (model->inputs().size() != 1) {
|
||||
throw std::logic_error("HPE OpenPose model wrapper supports topologies with only 1 input");
|
||||
}
|
||||
inputsNames.push_back(model->input().get_any_name());
|
||||
const ov::Shape& inputShape = model->input().get_shape();
|
||||
const ov::Layout& inputLayout = getInputLayout(model->input());
|
||||
|
||||
if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 ||
|
||||
inputShape[ov::layout::channels_idx(inputLayout)] != 3)
|
||||
throw std::logic_error("3-channel 4-dimensional model's input is expected");
|
||||
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
|
||||
|
||||
ppp.input().model().set_layout(inputLayout);
|
||||
|
||||
// --------------------------- Prepare output -----------------------------------------------------
|
||||
const ov::OutputVector& outputs = model->outputs();
|
||||
if (outputs.size() != 2) {
|
||||
throw std::runtime_error("HPE OpenPose supports topologies with only 2 outputs");
|
||||
}
|
||||
|
||||
const ov::Layout outputLayout("NCHW");
|
||||
for (const auto& output : model->outputs()) {
|
||||
const auto& outTensorName = output.get_any_name();
|
||||
ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout);
|
||||
outputsNames.push_back(outTensorName);
|
||||
}
|
||||
model = ppp.build();
|
||||
|
||||
const size_t batchId = ov::layout::batch_idx(outputLayout);
|
||||
const size_t channelsId = ov::layout::channels_idx(outputLayout);
|
||||
const size_t widthId = ov::layout::width_idx(outputLayout);
|
||||
const size_t heightId = ov::layout::height_idx(outputLayout);
|
||||
|
||||
ov::Shape heatmapsOutputShape = model->outputs().front().get_shape();
|
||||
ov::Shape pafsOutputShape = model->outputs().back().get_shape();
|
||||
if (heatmapsOutputShape[channelsId] > pafsOutputShape[channelsId]) {
|
||||
std::swap(heatmapsOutputShape, pafsOutputShape);
|
||||
std::swap(outputsNames[0], outputsNames[1]);
|
||||
}
|
||||
|
||||
if (heatmapsOutputShape.size() != 4 || heatmapsOutputShape[batchId] != 1 ||
|
||||
heatmapsOutputShape[ov::layout::channels_idx(outputLayout)] != keypointsNumber + 1) {
|
||||
throw std::logic_error("1x" + std::to_string(keypointsNumber + 1) +
|
||||
"xHFMxWFM dimension of model's heatmap is expected");
|
||||
}
|
||||
if (pafsOutputShape.size() != 4 || pafsOutputShape[batchId] != 1 ||
|
||||
pafsOutputShape[channelsId] != 2 * (keypointsNumber + 1)) {
|
||||
throw std::logic_error("1x" + std::to_string(2 * (keypointsNumber + 1)) +
|
||||
"xHFMxWFM dimension of model's output is expected");
|
||||
}
|
||||
if (pafsOutputShape[heightId] != heatmapsOutputShape[heightId] ||
|
||||
pafsOutputShape[widthId] != heatmapsOutputShape[widthId]) {
|
||||
throw std::logic_error("output and heatmap are expected to have matching last two dimensions");
|
||||
}
|
||||
|
||||
changeInputSize(model);
|
||||
}
|
||||
|
||||
void HPEOpenPose::changeInputSize(std::shared_ptr<ov::Model>& model) {
|
||||
ov::Shape inputShape = model->input().get_shape();
|
||||
const ov::Layout& layout = ov::layout::get_layout(model->inputs().front());
|
||||
const auto batchId = ov::layout::batch_idx(layout);
|
||||
const auto heightId = ov::layout::height_idx(layout);
|
||||
const auto widthId = ov::layout::width_idx(layout);
|
||||
|
||||
if (!targetSize) {
|
||||
targetSize = inputShape[heightId];
|
||||
}
|
||||
int height = static_cast<int>((targetSize + stride - 1) / stride) * stride;
|
||||
int inputWidth = static_cast<int>(std::round(targetSize * aspectRatio));
|
||||
int width = static_cast<int>((inputWidth + stride - 1) / stride) * stride;
|
||||
inputShape[batchId] = 1;
|
||||
inputShape[heightId] = height;
|
||||
inputShape[widthId] = width;
|
||||
inputLayerSize = cv::Size(width, height);
|
||||
model->reshape(inputShape);
|
||||
}
|
||||
|
||||
std::shared_ptr<InternalModelData> HPEOpenPose::preprocess(const InputData& inputData, ov::InferRequest& request) {
|
||||
auto& image = inputData.asRef<ImageInputData>().inputImage;
|
||||
cv::Rect roi;
|
||||
auto paddedImage =
|
||||
resizeImageExt(image, inputLayerSize.width, inputLayerSize.height, resizeMode, interpolationMode, &roi);
|
||||
if (inputLayerSize.width < roi.width)
|
||||
throw std::runtime_error("The image aspect ratio doesn't fit current model shape");
|
||||
|
||||
if (inputLayerSize.width - stride >= roi.width) {
|
||||
slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
|
||||
}
|
||||
|
||||
request.set_input_tensor(wrapMat2Tensor(paddedImage));
|
||||
return std::make_shared<InternalScaleData>(paddedImage.cols,
|
||||
paddedImage.rows,
|
||||
image.cols / static_cast<float>(roi.width),
|
||||
image.rows / static_cast<float>(roi.height));
|
||||
}
|
||||
|
||||
std::unique_ptr<ResultBase> HPEOpenPose::postprocess(InferenceResult& infResult) {
|
||||
HumanPoseResult* result = new HumanPoseResult(infResult.frameId, infResult.metaData);
|
||||
|
||||
auto& heatMapsMapped = infResult.outputsData[outputsNames[0]];
|
||||
auto& outputMapped = infResult.outputsData[outputsNames[1]];
|
||||
|
||||
const ov::Shape& outputShape = outputMapped.get_shape();
|
||||
const ov::Shape& heatMapShape = heatMapsMapped.get_shape();
|
||||
|
||||
float* const predictions = outputMapped.data<float>();
|
||||
float* const heats = heatMapsMapped.data<float>();
|
||||
|
||||
std::vector<cv::Mat> heatMaps(keypointsNumber);
|
||||
for (size_t i = 0; i < heatMaps.size(); i++) {
|
||||
heatMaps[i] =
|
||||
cv::Mat(heatMapShape[2], heatMapShape[3], CV_32FC1, heats + i * heatMapShape[2] * heatMapShape[3]);
|
||||
}
|
||||
resizeFeatureMaps(heatMaps);
|
||||
|
||||
std::vector<cv::Mat> pafs(outputShape[1]);
|
||||
for (size_t i = 0; i < pafs.size(); i++) {
|
||||
pafs[i] =
|
||||
cv::Mat(heatMapShape[2], heatMapShape[3], CV_32FC1, predictions + i * heatMapShape[2] * heatMapShape[3]);
|
||||
}
|
||||
resizeFeatureMaps(pafs);
|
||||
|
||||
std::vector<HumanPose> poses = extractPoses(heatMaps, pafs);
|
||||
|
||||
const auto& scale = infResult.internalModelData->asRef<InternalScaleData>();
|
||||
float scaleX = stride / upsampleRatio * scale.scaleX;
|
||||
float scaleY = stride / upsampleRatio * scale.scaleY;
|
||||
for (auto& pose : poses) {
|
||||
for (auto& keypoint : pose.keypoints) {
|
||||
if (keypoint != cv::Point2f(-1, -1)) {
|
||||
keypoint.x *= scaleX;
|
||||
keypoint.y *= scaleY;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < poses.size(); ++i) {
|
||||
result->poses.push_back(poses[i]);
|
||||
}
|
||||
|
||||
return std::unique_ptr<ResultBase>(result);
|
||||
}
|
||||
|
||||
void HPEOpenPose::resizeFeatureMaps(std::vector<cv::Mat>& featureMaps) const {
|
||||
for (auto& featureMap : featureMaps) {
|
||||
cv::resize(featureMap, featureMap, cv::Size(), upsampleRatio, upsampleRatio, cv::INTER_CUBIC);
|
||||
}
|
||||
}
|
||||
|
||||
class FindPeaksBody : public cv::ParallelLoopBody {
|
||||
public:
|
||||
FindPeaksBody(const std::vector<cv::Mat>& heatMaps,
|
||||
float minPeaksDistance,
|
||||
std::vector<std::vector<Peak>>& peaksFromHeatMap,
|
||||
float confidenceThreshold)
|
||||
: heatMaps(heatMaps),
|
||||
minPeaksDistance(minPeaksDistance),
|
||||
peaksFromHeatMap(peaksFromHeatMap),
|
||||
confidenceThreshold(confidenceThreshold) {}
|
||||
|
||||
void operator()(const cv::Range& range) const override {
|
||||
for (int i = range.start; i < range.end; i++) {
|
||||
findPeaks(heatMaps, minPeaksDistance, peaksFromHeatMap, i, confidenceThreshold);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const std::vector<cv::Mat>& heatMaps;
|
||||
float minPeaksDistance;
|
||||
std::vector<std::vector<Peak>>& peaksFromHeatMap;
|
||||
float confidenceThreshold;
|
||||
};
|
||||
|
||||
std::vector<HumanPose> HPEOpenPose::extractPoses(const std::vector<cv::Mat>& heatMaps,
|
||||
const std::vector<cv::Mat>& pafs) const {
|
||||
std::vector<std::vector<Peak>> peaksFromHeatMap(heatMaps.size());
|
||||
FindPeaksBody findPeaksBody(heatMaps, minPeaksDistance, peaksFromHeatMap, confidenceThreshold);
|
||||
cv::parallel_for_(cv::Range(0, static_cast<int>(heatMaps.size())), findPeaksBody);
|
||||
int peaksBefore = 0;
|
||||
for (size_t heatmapId = 1; heatmapId < heatMaps.size(); heatmapId++) {
|
||||
peaksBefore += static_cast<int>(peaksFromHeatMap[heatmapId - 1].size());
|
||||
for (auto& peak : peaksFromHeatMap[heatmapId]) {
|
||||
peak.id += peaksBefore;
|
||||
}
|
||||
}
|
||||
std::vector<HumanPose> poses = groupPeaksToPoses(peaksFromHeatMap,
|
||||
pafs,
|
||||
keypointsNumber,
|
||||
midPointsScoreThreshold,
|
||||
foundMidPointsRatioThreshold,
|
||||
minJointsNumber,
|
||||
minSubsetScore);
|
||||
return poses;
|
||||
}
|
||||
62
engines/OpenVINOEngine/src/models/image_model.cpp
Normal file
62
engines/OpenVINOEngine/src/models/image_model.cpp
Normal file
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
// Copyright (C) 2021-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "models/image_model.h"
|
||||
|
||||
#include <stdexcept>
|
||||
#include <vector>
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
#include <utils/image_utils.h>
|
||||
#include <utils/ocv_common.hpp>
|
||||
|
||||
#include "models/input_data.h"
|
||||
#include "models/internal_model_data.h"
|
||||
|
||||
ImageModel::ImageModel() {
|
||||
|
||||
}
|
||||
ImageModel::ImageModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout)
|
||||
: ModelBase(modelFileName, layout),
|
||||
useAutoResize(useAutoResize) {}
|
||||
|
||||
std::shared_ptr<InternalModelData> ImageModel::preprocess(const InputData& inputData, ov::InferRequest& request) {
|
||||
const auto& origImg = inputData.asRef<ImageInputData>().inputImage;
|
||||
auto img = inputTransform(origImg);
|
||||
|
||||
if (!useAutoResize) {
|
||||
// /* Resize and copy data from the image to the input tensor */
|
||||
const ov::Tensor& frameTensor = request.get_tensor(inputsNames[0]); // first input should be image
|
||||
const ov::Shape& tensorShape = frameTensor.get_shape();
|
||||
const ov::Layout layout("NHWC");
|
||||
const size_t width = tensorShape[ov::layout::width_idx(layout)];
|
||||
const size_t height = tensorShape[ov::layout::height_idx(layout)];
|
||||
const size_t channels = tensorShape[ov::layout::channels_idx(layout)];
|
||||
if (static_cast<size_t>(img.channels()) != channels) {
|
||||
throw std::runtime_error(std::string("The number of channels for model input: ") +
|
||||
std::to_string(channels) + " and image: " +
|
||||
std::to_string(img.channels()) + " - must match");
|
||||
}
|
||||
if (channels != 1 && channels != 3) {
|
||||
throw std::runtime_error("Unsupported number of channels");
|
||||
}
|
||||
img = resizeImageExt(img, width, height, resizeMode, interpolationMode);
|
||||
}
|
||||
request.set_tensor(inputsNames[0], wrapMat2Tensor(img));
|
||||
return std::make_shared<InternalImageModelData>(origImg.cols, origImg.rows);
|
||||
}
|
||||
167
engines/OpenVINOEngine/src/models/jpeg_restoration_model.cpp
Normal file
167
engines/OpenVINOEngine/src/models/jpeg_restoration_model.cpp
Normal file
@@ -0,0 +1,167 @@
|
||||
/*
|
||||
// Copyright (C) 2021-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
#include <opencv2/imgcodecs.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
#include <utils/ocv_common.hpp>
|
||||
#include <utils/slog.hpp>
|
||||
|
||||
#include "models/image_model.h"
|
||||
#include "models/input_data.h"
|
||||
#include "models/internal_model_data.h"
|
||||
#include "models/jpeg_restoration_model.h"
|
||||
#include "models/results.h"
|
||||
|
||||
JPEGRestorationModel::JPEGRestorationModel(const std::string& modelFileName,
|
||||
const cv::Size& inputImgSize,
|
||||
bool _jpegCompression,
|
||||
const std::string& layout)
|
||||
: ImageModel(modelFileName, false, layout) {
|
||||
netInputHeight = inputImgSize.height;
|
||||
netInputWidth = inputImgSize.width;
|
||||
jpegCompression = _jpegCompression;
|
||||
}
|
||||
|
||||
void JPEGRestorationModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
|
||||
// --------------------------- Configure input & output -------------------------------------------------
|
||||
// --------------------------- Prepare input ------------------------------------------------------
|
||||
if (model->inputs().size() != 1) {
|
||||
throw std::logic_error("The JPEG Restoration model wrapper supports topologies with only 1 input");
|
||||
}
|
||||
inputsNames.push_back(model->input().get_any_name());
|
||||
|
||||
const ov::Shape& inputShape = model->input().get_shape();
|
||||
const ov::Layout& inputLayout = getInputLayout(model->input());
|
||||
|
||||
if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 ||
|
||||
inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
|
||||
throw std::logic_error("3-channel 4-dimensional model's input is expected");
|
||||
}
|
||||
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC");
|
||||
|
||||
ppp.input().model().set_layout(inputLayout);
|
||||
|
||||
// --------------------------- Prepare output -----------------------------------------------------
|
||||
const ov::OutputVector& outputs = model->outputs();
|
||||
if (outputs.size() != 1) {
|
||||
throw std::logic_error("The JPEG Restoration model wrapper supports topologies with only 1 output");
|
||||
}
|
||||
const ov::Shape& outputShape = model->output().get_shape();
|
||||
const ov::Layout outputLayout{"NCHW"};
|
||||
if (outputShape.size() != 4 || outputShape[ov::layout::batch_idx(outputLayout)] != 1 ||
|
||||
outputShape[ov::layout::channels_idx(outputLayout)] != 3) {
|
||||
throw std::logic_error("3-channel 4-dimensional model's output is expected");
|
||||
}
|
||||
|
||||
outputsNames.push_back(model->output().get_any_name());
|
||||
ppp.output().tensor().set_element_type(ov::element::f32);
|
||||
model = ppp.build();
|
||||
|
||||
changeInputSize(model);
|
||||
}
|
||||
|
||||
void JPEGRestorationModel::changeInputSize(std::shared_ptr<ov::Model>& model) {
|
||||
ov::Shape inputShape = model->input().get_shape();
|
||||
const ov::Layout& layout = ov::layout::get_layout(model->input());
|
||||
|
||||
const auto batchId = ov::layout::batch_idx(layout);
|
||||
const auto heightId = ov::layout::height_idx(layout);
|
||||
const auto widthId = ov::layout::width_idx(layout);
|
||||
|
||||
if (inputShape[heightId] % stride || inputShape[widthId] % stride) {
|
||||
throw std::logic_error("The shape of the model input must be divisible by stride");
|
||||
}
|
||||
|
||||
netInputHeight = static_cast<int>((netInputHeight + stride - 1) / stride) * stride;
|
||||
netInputWidth = static_cast<int>((netInputWidth + stride - 1) / stride) * stride;
|
||||
|
||||
inputShape[batchId] = 1;
|
||||
inputShape[heightId] = netInputHeight;
|
||||
inputShape[widthId] = netInputWidth;
|
||||
|
||||
model->reshape(inputShape);
|
||||
}
|
||||
|
||||
std::shared_ptr<InternalModelData> JPEGRestorationModel::preprocess(const InputData& inputData,
|
||||
ov::InferRequest& request) {
|
||||
cv::Mat image = inputData.asRef<ImageInputData>().inputImage;
|
||||
const size_t h = image.rows;
|
||||
const size_t w = image.cols;
|
||||
cv::Mat resizedImage;
|
||||
if (jpegCompression) {
|
||||
std::vector<uchar> encimg;
|
||||
std::vector<int> params{cv::IMWRITE_JPEG_QUALITY, 40};
|
||||
cv::imencode(".jpg", image, encimg, params);
|
||||
image = cv::imdecode(cv::Mat(encimg), 3);
|
||||
}
|
||||
|
||||
if (netInputHeight - stride < h && h <= netInputHeight && netInputWidth - stride < w && w <= netInputWidth) {
|
||||
int bottom = netInputHeight - h;
|
||||
int right = netInputWidth - w;
|
||||
cv::copyMakeBorder(image, resizedImage, 0, bottom, 0, right, cv::BORDER_CONSTANT, 0);
|
||||
} else {
|
||||
slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
|
||||
cv::resize(image, resizedImage, cv::Size(netInputWidth, netInputHeight));
|
||||
}
|
||||
request.set_input_tensor(wrapMat2Tensor(resizedImage));
|
||||
|
||||
return std::make_shared<InternalImageModelData>(image.cols, image.rows);
|
||||
}
|
||||
|
||||
std::unique_ptr<ResultBase> JPEGRestorationModel::postprocess(InferenceResult& infResult) {
|
||||
ImageResult* result = new ImageResult;
|
||||
*static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult);
|
||||
|
||||
const auto& inputImgSize = infResult.internalModelData->asRef<InternalImageModelData>();
|
||||
const auto outputData = infResult.getFirstOutputTensor().data<float>();
|
||||
|
||||
std::vector<cv::Mat> imgPlanes;
|
||||
const ov::Shape& outputShape = infResult.getFirstOutputTensor().get_shape();
|
||||
const size_t outHeight = static_cast<int>(outputShape[2]);
|
||||
const size_t outWidth = static_cast<int>(outputShape[3]);
|
||||
const size_t numOfPixels = outWidth * outHeight;
|
||||
imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0])),
|
||||
cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])),
|
||||
cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2]))};
|
||||
cv::Mat resultImg;
|
||||
cv::merge(imgPlanes, resultImg);
|
||||
|
||||
if (netInputHeight - stride < static_cast<size_t>(inputImgSize.inputImgHeight) &&
|
||||
static_cast<size_t>(inputImgSize.inputImgHeight) <= netInputHeight &&
|
||||
netInputWidth - stride < static_cast<size_t>(inputImgSize.inputImgWidth) &&
|
||||
static_cast<size_t>(inputImgSize.inputImgWidth) <= netInputWidth) {
|
||||
result->resultImage = resultImg(cv::Rect(0, 0, inputImgSize.inputImgWidth, inputImgSize.inputImgHeight));
|
||||
} else {
|
||||
cv::resize(resultImg, result->resultImage, cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight));
|
||||
}
|
||||
|
||||
result->resultImage.convertTo(result->resultImage, CV_8UC3, 255);
|
||||
|
||||
return std::unique_ptr<ResultBase>(result);
|
||||
}
|
||||
65
engines/OpenVINOEngine/src/models/model_base.cpp
Normal file
65
engines/OpenVINOEngine/src/models/model_base.cpp
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
// Copyright (C) 2021-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "models/model_base.h"
|
||||
#include <utility>
|
||||
#include <openvino/openvino.hpp>
|
||||
#include <utils/common.hpp>
|
||||
#include <utils/config_factory.h>
|
||||
#include <utils/ocv_common.hpp>
|
||||
#include <utils/slog.hpp>
|
||||
|
||||
std::shared_ptr<ov::Model> ModelBase::prepareModel(ov::Core& core) {
|
||||
// --------------------------- Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
|
||||
/** Read model **/
|
||||
slog::info << "Reading model " << modelFileName << slog::endl;
|
||||
std::shared_ptr<ov::Model> model = core.read_model(modelFileName);
|
||||
logBasicModelInfo(model);
|
||||
// -------------------------- Reading all outputs names and customizing I/O tensors (in inherited classes)
|
||||
prepareInputsOutputs(model);
|
||||
setBatch(model);
|
||||
|
||||
return model;
|
||||
}
|
||||
|
||||
ov::CompiledModel ModelBase::compileModel(const ModelConfig& config, ov::Core& core) {
|
||||
this->config = config;
|
||||
auto model = prepareModel(core);
|
||||
compiledModel = core.compile_model(model, config.deviceName, config.compiledModelConfig);
|
||||
logCompiledModelInfo(compiledModel, modelFileName, config.deviceName);
|
||||
return compiledModel;
|
||||
}
|
||||
|
||||
ov::Layout ModelBase::getInputLayout(const ov::Output<ov::Node>& input) {
|
||||
ov::Layout layout = ov::layout::get_layout(input);
|
||||
if (layout.empty()) {
|
||||
if (inputsLayouts.empty()) {
|
||||
layout = getLayoutFromShape(input.get_partial_shape());
|
||||
slog::warn << "Automatically detected layout '" << layout.to_string() << "' for input '"
|
||||
<< input.get_any_name() << "' will be used." << slog::endl;
|
||||
} else if (inputsLayouts.size() == 1) {
|
||||
layout = inputsLayouts.begin()->second;
|
||||
} else {
|
||||
layout = inputsLayouts[input.get_any_name()];
|
||||
}
|
||||
}
|
||||
|
||||
return layout;
|
||||
}
|
||||
|
||||
void ModelBase::setBatch(std::shared_ptr<ov::Model>& model) {
|
||||
ov::set_batch(model, 1);
|
||||
}
|
||||
345
engines/OpenVINOEngine/src/models/openpose_decoder.cpp
Normal file
345
engines/OpenVINOEngine/src/models/openpose_decoder.cpp
Normal file
@@ -0,0 +1,345 @@
|
||||
/*
|
||||
// Copyright (C) 2020-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "models/openpose_decoder.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <utils/common.hpp>
|
||||
|
||||
#include "models/results.h"
|
||||
|
||||
Peak::Peak(const int id, const cv::Point2f& pos, const float score) : id(id), pos(pos), score(score) {}
|
||||
|
||||
HumanPoseByPeaksIndices::HumanPoseByPeaksIndices(const int keypointsNumber)
|
||||
: peaksIndices(std::vector<int>(keypointsNumber, -1)),
|
||||
nJoints(0),
|
||||
score(0.0f) {}
|
||||
|
||||
TwoJointsConnection::TwoJointsConnection(const int firstJointIdx, const int secondJointIdx, const float score)
|
||||
: firstJointIdx(firstJointIdx),
|
||||
secondJointIdx(secondJointIdx),
|
||||
score(score) {}
|
||||
|
||||
void findPeaks(const std::vector<cv::Mat>& heatMaps,
|
||||
const float minPeaksDistance,
|
||||
std::vector<std::vector<Peak>>& allPeaks,
|
||||
int heatMapId,
|
||||
float confidenceThreshold) {
|
||||
std::vector<cv::Point> peaks;
|
||||
const cv::Mat& heatMap = heatMaps[heatMapId];
|
||||
const float* heatMapData = heatMap.ptr<float>();
|
||||
size_t heatMapStep = heatMap.step1();
|
||||
for (int y = -1; y < heatMap.rows + 1; y++) {
|
||||
for (int x = -1; x < heatMap.cols + 1; x++) {
|
||||
float val = 0;
|
||||
if (x >= 0 && y >= 0 && x < heatMap.cols && y < heatMap.rows) {
|
||||
val = heatMapData[y * heatMapStep + x];
|
||||
val = val >= confidenceThreshold ? val : 0;
|
||||
}
|
||||
|
||||
float left_val = 0;
|
||||
if (y >= 0 && x < (heatMap.cols - 1) && y < heatMap.rows) {
|
||||
left_val = heatMapData[y * heatMapStep + x + 1];
|
||||
left_val = left_val >= confidenceThreshold ? left_val : 0;
|
||||
}
|
||||
|
||||
float right_val = 0;
|
||||
if (x > 0 && y >= 0 && y < heatMap.rows) {
|
||||
right_val = heatMapData[y * heatMapStep + x - 1];
|
||||
right_val = right_val >= confidenceThreshold ? right_val : 0;
|
||||
}
|
||||
|
||||
float top_val = 0;
|
||||
if (x >= 0 && x < heatMap.cols && y < (heatMap.rows - 1)) {
|
||||
top_val = heatMapData[(y + 1) * heatMapStep + x];
|
||||
top_val = top_val >= confidenceThreshold ? top_val : 0;
|
||||
}
|
||||
|
||||
float bottom_val = 0;
|
||||
if (x >= 0 && y > 0 && x < heatMap.cols) {
|
||||
bottom_val = heatMapData[(y - 1) * heatMapStep + x];
|
||||
bottom_val = bottom_val >= confidenceThreshold ? bottom_val : 0;
|
||||
}
|
||||
|
||||
if ((val > left_val) && (val > right_val) && (val > top_val) && (val > bottom_val)) {
|
||||
peaks.push_back(cv::Point(x, y));
|
||||
}
|
||||
}
|
||||
}
|
||||
std::sort(peaks.begin(), peaks.end(), [](const cv::Point& a, const cv::Point& b) {
|
||||
return a.x < b.x;
|
||||
});
|
||||
std::vector<bool> isActualPeak(peaks.size(), true);
|
||||
int peakCounter = 0;
|
||||
std::vector<Peak>& peaksWithScoreAndID = allPeaks[heatMapId];
|
||||
for (size_t i = 0; i < peaks.size(); i++) {
|
||||
if (isActualPeak[i]) {
|
||||
for (size_t j = i + 1; j < peaks.size(); j++) {
|
||||
if (sqrt((peaks[i].x - peaks[j].x) * (peaks[i].x - peaks[j].x) +
|
||||
(peaks[i].y - peaks[j].y) * (peaks[i].y - peaks[j].y)) < minPeaksDistance) {
|
||||
isActualPeak[j] = false;
|
||||
}
|
||||
}
|
||||
peaksWithScoreAndID.push_back(Peak(peakCounter++, peaks[i], heatMap.at<float>(peaks[i])));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<HumanPose> groupPeaksToPoses(const std::vector<std::vector<Peak>>& allPeaks,
|
||||
const std::vector<cv::Mat>& pafs,
|
||||
const size_t keypointsNumber,
|
||||
const float midPointsScoreThreshold,
|
||||
const float foundMidPointsRatioThreshold,
|
||||
const int minJointsNumber,
|
||||
const float minSubsetScore) {
|
||||
static const std::pair<int, int> limbIdsHeatmap[] = {{2, 3},
|
||||
{2, 6},
|
||||
{3, 4},
|
||||
{4, 5},
|
||||
{6, 7},
|
||||
{7, 8},
|
||||
{2, 9},
|
||||
{9, 10},
|
||||
{10, 11},
|
||||
{2, 12},
|
||||
{12, 13},
|
||||
{13, 14},
|
||||
{2, 1},
|
||||
{1, 15},
|
||||
{15, 17},
|
||||
{1, 16},
|
||||
{16, 18},
|
||||
{3, 17},
|
||||
{6, 18}};
|
||||
static const std::pair<int, int> limbIdsPaf[] = {{31, 32},
|
||||
{39, 40},
|
||||
{33, 34},
|
||||
{35, 36},
|
||||
{41, 42},
|
||||
{43, 44},
|
||||
{19, 20},
|
||||
{21, 22},
|
||||
{23, 24},
|
||||
{25, 26},
|
||||
{27, 28},
|
||||
{29, 30},
|
||||
{47, 48},
|
||||
{49, 50},
|
||||
{53, 54},
|
||||
{51, 52},
|
||||
{55, 56},
|
||||
{37, 38},
|
||||
{45, 46}};
|
||||
|
||||
std::vector<Peak> candidates;
|
||||
for (const auto& peaks : allPeaks) {
|
||||
candidates.insert(candidates.end(), peaks.begin(), peaks.end());
|
||||
}
|
||||
std::vector<HumanPoseByPeaksIndices> subset(0, HumanPoseByPeaksIndices(keypointsNumber));
|
||||
for (size_t k = 0; k < arraySize(limbIdsPaf); k++) {
|
||||
std::vector<TwoJointsConnection> connections;
|
||||
const int mapIdxOffset = keypointsNumber + 1;
|
||||
std::pair<cv::Mat, cv::Mat> scoreMid = {pafs[limbIdsPaf[k].first - mapIdxOffset],
|
||||
pafs[limbIdsPaf[k].second - mapIdxOffset]};
|
||||
const int idxJointA = limbIdsHeatmap[k].first - 1;
|
||||
const int idxJointB = limbIdsHeatmap[k].second - 1;
|
||||
const std::vector<Peak>& candA = allPeaks[idxJointA];
|
||||
const std::vector<Peak>& candB = allPeaks[idxJointB];
|
||||
const size_t nJointsA = candA.size();
|
||||
const size_t nJointsB = candB.size();
|
||||
if (nJointsA == 0 && nJointsB == 0) {
|
||||
continue;
|
||||
} else if (nJointsA == 0) {
|
||||
for (size_t i = 0; i < nJointsB; i++) {
|
||||
int num = 0;
|
||||
for (size_t j = 0; j < subset.size(); j++) {
|
||||
if (subset[j].peaksIndices[idxJointB] == candB[i].id) {
|
||||
num++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (num == 0) {
|
||||
HumanPoseByPeaksIndices personKeypoints(keypointsNumber);
|
||||
personKeypoints.peaksIndices[idxJointB] = candB[i].id;
|
||||
personKeypoints.nJoints = 1;
|
||||
personKeypoints.score = candB[i].score;
|
||||
subset.push_back(personKeypoints);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
} else if (nJointsB == 0) {
|
||||
for (size_t i = 0; i < nJointsA; i++) {
|
||||
int num = 0;
|
||||
for (size_t j = 0; j < subset.size(); j++) {
|
||||
if (subset[j].peaksIndices[idxJointA] == candA[i].id) {
|
||||
num++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (num == 0) {
|
||||
HumanPoseByPeaksIndices personKeypoints(keypointsNumber);
|
||||
personKeypoints.peaksIndices[idxJointA] = candA[i].id;
|
||||
personKeypoints.nJoints = 1;
|
||||
personKeypoints.score = candA[i].score;
|
||||
subset.push_back(personKeypoints);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
std::vector<TwoJointsConnection> tempJointConnections;
|
||||
for (size_t i = 0; i < nJointsA; i++) {
|
||||
for (size_t j = 0; j < nJointsB; j++) {
|
||||
cv::Point2f pt = candA[i].pos * 0.5 + candB[j].pos * 0.5;
|
||||
cv::Point mid = cv::Point(cvRound(pt.x), cvRound(pt.y));
|
||||
cv::Point2f vec = candB[j].pos - candA[i].pos;
|
||||
double norm_vec = cv::norm(vec);
|
||||
if (norm_vec == 0) {
|
||||
continue;
|
||||
}
|
||||
vec /= norm_vec;
|
||||
float score = vec.x * scoreMid.first.at<float>(mid) + vec.y * scoreMid.second.at<float>(mid);
|
||||
int height_n = pafs[0].rows / 2;
|
||||
float suc_ratio = 0.0f;
|
||||
float mid_score = 0.0f;
|
||||
const int mid_num = 10;
|
||||
const float scoreThreshold = -100.0f;
|
||||
if (score > scoreThreshold) {
|
||||
float p_sum = 0;
|
||||
int p_count = 0;
|
||||
cv::Size2f step((candB[j].pos.x - candA[i].pos.x) / (mid_num - 1),
|
||||
(candB[j].pos.y - candA[i].pos.y) / (mid_num - 1));
|
||||
for (int n = 0; n < mid_num; n++) {
|
||||
cv::Point midPoint(cvRound(candA[i].pos.x + n * step.width),
|
||||
cvRound(candA[i].pos.y + n * step.height));
|
||||
cv::Point2f pred(scoreMid.first.at<float>(midPoint), scoreMid.second.at<float>(midPoint));
|
||||
score = vec.x * pred.x + vec.y * pred.y;
|
||||
if (score > midPointsScoreThreshold) {
|
||||
p_sum += score;
|
||||
p_count++;
|
||||
}
|
||||
}
|
||||
suc_ratio = static_cast<float>(p_count / mid_num);
|
||||
float ratio = p_count > 0 ? p_sum / p_count : 0.0f;
|
||||
mid_score = ratio + static_cast<float>(std::min(height_n / norm_vec - 1, 0.0));
|
||||
}
|
||||
if (mid_score > 0 && suc_ratio > foundMidPointsRatioThreshold) {
|
||||
tempJointConnections.push_back(TwoJointsConnection(i, j, mid_score));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!tempJointConnections.empty()) {
|
||||
std::sort(tempJointConnections.begin(),
|
||||
tempJointConnections.end(),
|
||||
[](const TwoJointsConnection& a, const TwoJointsConnection& b) {
|
||||
return (a.score > b.score);
|
||||
});
|
||||
}
|
||||
size_t num_limbs = std::min(nJointsA, nJointsB);
|
||||
size_t cnt = 0;
|
||||
std::vector<int> occurA(nJointsA, 0);
|
||||
std::vector<int> occurB(nJointsB, 0);
|
||||
for (size_t row = 0; row < tempJointConnections.size(); row++) {
|
||||
if (cnt == num_limbs) {
|
||||
break;
|
||||
}
|
||||
const int& indexA = tempJointConnections[row].firstJointIdx;
|
||||
const int& indexB = tempJointConnections[row].secondJointIdx;
|
||||
const float& score = tempJointConnections[row].score;
|
||||
if (occurA[indexA] == 0 && occurB[indexB] == 0) {
|
||||
connections.push_back(TwoJointsConnection(candA[indexA].id, candB[indexB].id, score));
|
||||
cnt++;
|
||||
occurA[indexA] = 1;
|
||||
occurB[indexB] = 1;
|
||||
}
|
||||
}
|
||||
if (connections.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool extraJointConnections = (k == 17 || k == 18);
|
||||
if (k == 0) {
|
||||
subset = std::vector<HumanPoseByPeaksIndices>(connections.size(), HumanPoseByPeaksIndices(keypointsNumber));
|
||||
for (size_t i = 0; i < connections.size(); i++) {
|
||||
const int& indexA = connections[i].firstJointIdx;
|
||||
const int& indexB = connections[i].secondJointIdx;
|
||||
subset[i].peaksIndices[idxJointA] = indexA;
|
||||
subset[i].peaksIndices[idxJointB] = indexB;
|
||||
subset[i].nJoints = 2;
|
||||
subset[i].score = candidates[indexA].score + candidates[indexB].score + connections[i].score;
|
||||
}
|
||||
} else if (extraJointConnections) {
|
||||
for (size_t i = 0; i < connections.size(); i++) {
|
||||
const int& indexA = connections[i].firstJointIdx;
|
||||
const int& indexB = connections[i].secondJointIdx;
|
||||
for (size_t j = 0; j < subset.size(); j++) {
|
||||
if (subset[j].peaksIndices[idxJointA] == indexA && subset[j].peaksIndices[idxJointB] == -1) {
|
||||
subset[j].peaksIndices[idxJointB] = indexB;
|
||||
} else if (subset[j].peaksIndices[idxJointB] == indexB && subset[j].peaksIndices[idxJointA] == -1) {
|
||||
subset[j].peaksIndices[idxJointA] = indexA;
|
||||
}
|
||||
}
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
for (size_t i = 0; i < connections.size(); i++) {
|
||||
const int& indexA = connections[i].firstJointIdx;
|
||||
const int& indexB = connections[i].secondJointIdx;
|
||||
bool num = false;
|
||||
for (size_t j = 0; j < subset.size(); j++) {
|
||||
if (subset[j].peaksIndices[idxJointA] == indexA) {
|
||||
subset[j].peaksIndices[idxJointB] = indexB;
|
||||
subset[j].nJoints++;
|
||||
subset[j].score += candidates[indexB].score + connections[i].score;
|
||||
num = true;
|
||||
}
|
||||
}
|
||||
if (!num) {
|
||||
HumanPoseByPeaksIndices hpWithScore(keypointsNumber);
|
||||
hpWithScore.peaksIndices[idxJointA] = indexA;
|
||||
hpWithScore.peaksIndices[idxJointB] = indexB;
|
||||
hpWithScore.nJoints = 2;
|
||||
hpWithScore.score = candidates[indexA].score + candidates[indexB].score + connections[i].score;
|
||||
subset.push_back(hpWithScore);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
std::vector<HumanPose> poses;
|
||||
for (const auto& subsetI : subset) {
|
||||
if (subsetI.nJoints < minJointsNumber || subsetI.score / subsetI.nJoints < minSubsetScore) {
|
||||
continue;
|
||||
}
|
||||
int position = -1;
|
||||
HumanPose pose{std::vector<cv::Point2f>(keypointsNumber, cv::Point2f(-1.0f, -1.0f)),
|
||||
subsetI.score * std::max(0, subsetI.nJoints - 1)};
|
||||
for (const auto& peakIdx : subsetI.peaksIndices) {
|
||||
position++;
|
||||
if (peakIdx >= 0) {
|
||||
pose.keypoints[position] = candidates[peakIdx].pos;
|
||||
pose.keypoints[position].x += 0.5;
|
||||
pose.keypoints[position].y += 0.5;
|
||||
}
|
||||
}
|
||||
poses.push_back(pose);
|
||||
}
|
||||
return poses;
|
||||
}
|
||||
160
engines/OpenVINOEngine/src/models/segmentation_model.cpp
Normal file
160
engines/OpenVINOEngine/src/models/segmentation_model.cpp
Normal file
@@ -0,0 +1,160 @@
|
||||
/*
|
||||
// Copyright (C) 2020-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "models/segmentation_model.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
#include "models/internal_model_data.h"
|
||||
#include "models/results.h"
|
||||
|
||||
SegmentationModel::SegmentationModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout)
|
||||
: ImageModel(modelFileName, useAutoResize, layout) {}
|
||||
|
||||
std::vector<std::string> SegmentationModel::loadLabels(const std::string& labelFilename) {
|
||||
std::vector<std::string> labelsList;
|
||||
|
||||
/* Read labels (if any) */
|
||||
if (!labelFilename.empty()) {
|
||||
std::ifstream inputFile(labelFilename);
|
||||
if (!inputFile.is_open())
|
||||
throw std::runtime_error("Can't open the labels file: " + labelFilename);
|
||||
std::string label;
|
||||
while (std::getline(inputFile, label)) {
|
||||
labelsList.push_back(label);
|
||||
}
|
||||
if (labelsList.empty())
|
||||
throw std::logic_error("File is empty: " + labelFilename);
|
||||
}
|
||||
|
||||
return labelsList;
|
||||
}
|
||||
|
||||
void SegmentationModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
|
||||
// --------------------------- Configure input & output ---------------------------------------------
|
||||
// --------------------------- Prepare input -----------------------------------------------------
|
||||
if (model->inputs().size() != 1) {
|
||||
throw std::logic_error("Segmentation model wrapper supports topologies with only 1 input");
|
||||
}
|
||||
const auto& input = model->input();
|
||||
inputsNames.push_back(input.get_any_name());
|
||||
|
||||
const ov::Layout& inputLayout = getInputLayout(input);
|
||||
const ov::Shape& inputShape = input.get_shape();
|
||||
if (inputShape.size() != 4 || inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
|
||||
throw std::logic_error("3-channel 4-dimensional model's input is expected");
|
||||
}
|
||||
if (model->outputs().size() != 1) {
|
||||
throw std::logic_error("Segmentation model wrapper supports topologies with only 1 output");
|
||||
}
|
||||
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
inputTransform.setPrecision(ppp, model->input().get_any_name());
|
||||
ppp.input().tensor().set_layout("NHWC");
|
||||
|
||||
if (useAutoResize) {
|
||||
ppp.input().tensor().set_spatial_dynamic_shape();
|
||||
|
||||
ppp.input()
|
||||
.preprocess()
|
||||
.convert_element_type(ov::element::f32)
|
||||
.resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
|
||||
}
|
||||
|
||||
ppp.input().model().set_layout(inputLayout);
|
||||
|
||||
// --------------------------- Prepare output -----------------------------------------------------
|
||||
if (model->outputs().size() != 1) {
|
||||
throw std::logic_error("Segmentation model wrapper supports topologies with only 1 output");
|
||||
}
|
||||
|
||||
const auto& output = model->output();
|
||||
outputsNames.push_back(output.get_any_name());
|
||||
|
||||
const ov::Shape& outputShape = output.get_partial_shape().get_max_shape();
|
||||
|
||||
ov::Layout outputLayout = getLayoutFromShape(outputShape);
|
||||
outHeight = static_cast<int>(outputShape[ov::layout::height_idx(outputLayout)]);
|
||||
outWidth = static_cast<int>(outputShape[ov::layout::width_idx(outputLayout)]);
|
||||
|
||||
ppp.output().model().set_layout(outputLayout);
|
||||
ppp.output().tensor().set_element_type(ov::element::f32);
|
||||
if (ov::layout::has_channels(outputLayout)) {
|
||||
ppp.output().tensor().set_layout("NCHW");
|
||||
outChannels = static_cast<int>(outputShape[ov::layout::channels_idx(outputLayout)]);
|
||||
} else {
|
||||
// deeplabv3
|
||||
ppp.output().tensor().set_layout("NHW");
|
||||
outChannels = 1;
|
||||
}
|
||||
model = ppp.build();
|
||||
}
|
||||
|
||||
std::unique_ptr<ResultBase> SegmentationModel::postprocess(InferenceResult& infResult) {
|
||||
ImageResult* result = new ImageResult(infResult.frameId, infResult.metaData);
|
||||
const auto& inputImgSize = infResult.internalModelData->asRef<InternalImageModelData>();
|
||||
const auto& outTensor = infResult.getFirstOutputTensor();
|
||||
|
||||
result->resultImage = cv::Mat(outHeight, outWidth, CV_8UC1);
|
||||
|
||||
if (outChannels == 1 && outTensor.get_element_type() == ov::element::i32) {
|
||||
cv::Mat predictions(outHeight, outWidth, CV_32SC1, outTensor.data<int32_t>());
|
||||
predictions.convertTo(result->resultImage, CV_8UC1);
|
||||
} else if (outChannels == 1 && outTensor.get_element_type() == ov::element::i64) {
|
||||
cv::Mat predictions(outHeight, outWidth, CV_32SC1);
|
||||
const auto data = outTensor.data<int64_t>();
|
||||
for (size_t i = 0; i < predictions.total(); ++i) {
|
||||
reinterpret_cast<int32_t*>(predictions.data)[i] = int32_t(data[i]);
|
||||
}
|
||||
predictions.convertTo(result->resultImage, CV_8UC1);
|
||||
} else if (outTensor.get_element_type() == ov::element::f32) {
|
||||
const float* data = outTensor.data<float>();
|
||||
for (int rowId = 0; rowId < outHeight; ++rowId) {
|
||||
for (int colId = 0; colId < outWidth; ++colId) {
|
||||
int classId = 0;
|
||||
float maxProb = -1.0f;
|
||||
for (int chId = 0; chId < outChannels; ++chId) {
|
||||
float prob = data[chId * outHeight * outWidth + rowId * outWidth + colId];
|
||||
if (prob > maxProb) {
|
||||
classId = chId;
|
||||
maxProb = prob;
|
||||
}
|
||||
} // nChannels
|
||||
|
||||
result->resultImage.at<uint8_t>(rowId, colId) = classId;
|
||||
} // width
|
||||
} // height
|
||||
}
|
||||
|
||||
cv::resize(result->resultImage,
|
||||
result->resultImage,
|
||||
cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight),
|
||||
0,
|
||||
0,
|
||||
cv::INTER_NEAREST);
|
||||
|
||||
return std::unique_ptr<ResultBase>(result);
|
||||
}
|
||||
107
engines/OpenVINOEngine/src/models/style_transfer_model.cpp
Normal file
107
engines/OpenVINOEngine/src/models/style_transfer_model.cpp
Normal file
@@ -0,0 +1,107 @@
|
||||
/*
|
||||
// Copyright (C) 2021-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "models/style_transfer_model.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
#include <utils/image_utils.h>
|
||||
#include <utils/ocv_common.hpp>
|
||||
|
||||
#include "models/input_data.h"
|
||||
#include "models/internal_model_data.h"
|
||||
#include "models/results.h"
|
||||
|
||||
StyleTransferModel::StyleTransferModel(const std::string& modelFileName, const std::string& layout)
|
||||
: ImageModel(modelFileName, false, layout) {}
|
||||
|
||||
void StyleTransferModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
|
||||
// --------------------------- Configure input & output ---------------------------------------------
|
||||
// --------------------------- Prepare input --------------------------------------------------
|
||||
if (model->inputs().size() != 1) {
|
||||
throw std::logic_error("Style transfer model wrapper supports topologies with only 1 input");
|
||||
}
|
||||
|
||||
inputsNames.push_back(model->input().get_any_name());
|
||||
|
||||
const ov::Shape& inputShape = model->input().get_shape();
|
||||
ov::Layout inputLayout = getInputLayout(model->input());
|
||||
|
||||
if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 ||
|
||||
inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
|
||||
throw std::logic_error("3-channel 4-dimensional model's input is expected");
|
||||
}
|
||||
|
||||
netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
|
||||
netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
|
||||
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
ppp.input().preprocess().convert_element_type(ov::element::f32);
|
||||
ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC");
|
||||
|
||||
ppp.input().model().set_layout(inputLayout);
|
||||
|
||||
// --------------------------- Prepare output -----------------------------------------------------
|
||||
const ov::OutputVector& outputs = model->outputs();
|
||||
if (outputs.size() != 1) {
|
||||
throw std::logic_error("Style transfer model wrapper supports topologies with only 1 output");
|
||||
}
|
||||
outputsNames.push_back(model->output().get_any_name());
|
||||
|
||||
const ov::Shape& outputShape = model->output().get_shape();
|
||||
ov::Layout outputLayout{"NCHW"};
|
||||
if (outputShape.size() != 4 || outputShape[ov::layout::batch_idx(outputLayout)] != 1 ||
|
||||
outputShape[ov::layout::channels_idx(outputLayout)] != 3) {
|
||||
throw std::logic_error("3-channel 4-dimensional model's output is expected");
|
||||
}
|
||||
|
||||
ppp.output().tensor().set_element_type(ov::element::f32);
|
||||
model = ppp.build();
|
||||
}
|
||||
|
||||
std::unique_ptr<ResultBase> StyleTransferModel::postprocess(InferenceResult& infResult) {
|
||||
ImageResult* result = new ImageResult;
|
||||
*static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult);
|
||||
|
||||
const auto& inputImgSize = infResult.internalModelData->asRef<InternalImageModelData>();
|
||||
const auto outputData = infResult.getFirstOutputTensor().data<float>();
|
||||
|
||||
const ov::Shape& outputShape = infResult.getFirstOutputTensor().get_shape();
|
||||
size_t outHeight = static_cast<int>(outputShape[2]);
|
||||
size_t outWidth = static_cast<int>(outputShape[3]);
|
||||
size_t numOfPixels = outWidth * outHeight;
|
||||
|
||||
std::vector<cv::Mat> imgPlanes;
|
||||
imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2])),
|
||||
cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])),
|
||||
cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0]))};
|
||||
cv::Mat resultImg;
|
||||
cv::merge(imgPlanes, resultImg);
|
||||
cv::resize(resultImg, result->resultImage, cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight));
|
||||
|
||||
result->resultImage.convertTo(result->resultImage, CV_8UC3);
|
||||
|
||||
return std::unique_ptr<ResultBase>(result);
|
||||
}
|
||||
310
engines/OpenVINOEngine/src/models/super_resolution_model.cpp
Normal file
310
engines/OpenVINOEngine/src/models/super_resolution_model.cpp
Normal file
@@ -0,0 +1,310 @@
|
||||
/*
|
||||
// Copyright (C) 2021-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "models/super_resolution_model.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
#include <utils/image_utils.h>
|
||||
#include <utils/ocv_common.hpp>
|
||||
#include <utils/slog.hpp>
|
||||
|
||||
#include "models/input_data.h"
|
||||
#include "models/internal_model_data.h"
|
||||
#include "models/results.h"
|
||||
|
||||
SuperResolutionModel::SuperResolutionModel(const std::string& modelFileName,
|
||||
const cv::Size& inputImgSize,
|
||||
const std::string& layout)
|
||||
: ImageModel(modelFileName, false, layout) {
|
||||
netInputHeight = inputImgSize.height;
|
||||
netInputWidth = inputImgSize.width;
|
||||
}
|
||||
|
||||
void SuperResolutionModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
|
||||
// --------------------------- Configure input & output ---------------------------------------------
|
||||
// --------------------------- Prepare input --------------------------------------------------
|
||||
const ov::OutputVector& inputs = model->inputs();
|
||||
if (inputs.size() != 1 && inputs.size() != 2) {
|
||||
throw std::logic_error("Super resolution model wrapper supports topologies with 1 or 2 inputs only");
|
||||
}
|
||||
std::string lrInputTensorName = inputs.begin()->get_any_name();
|
||||
inputsNames.push_back(lrInputTensorName);
|
||||
ov::Shape lrShape = inputs.begin()->get_shape();
|
||||
if (lrShape.size() != 4) {
|
||||
throw std::logic_error("Number of dimensions for an input must be 4");
|
||||
}
|
||||
// in case of 2 inputs they have the same layouts
|
||||
ov::Layout inputLayout = getInputLayout(model->inputs().front());
|
||||
|
||||
auto channelsId = ov::layout::channels_idx(inputLayout);
|
||||
auto heightId = ov::layout::height_idx(inputLayout);
|
||||
auto widthId = ov::layout::width_idx(inputLayout);
|
||||
|
||||
if (lrShape[channelsId] != 1 && lrShape[channelsId] != 3) {
|
||||
throw std::logic_error("Input layer is expected to have 1 or 3 channels");
|
||||
}
|
||||
|
||||
// A model like single-image-super-resolution-???? may take bicubic interpolation of the input image as the
|
||||
// second input
|
||||
std::string bicInputTensorName;
|
||||
if (inputs.size() == 2) {
|
||||
bicInputTensorName = (++inputs.begin())->get_any_name();
|
||||
inputsNames.push_back(bicInputTensorName);
|
||||
ov::Shape bicShape = (++inputs.begin())->get_shape();
|
||||
if (bicShape.size() != 4) {
|
||||
throw std::logic_error("Number of dimensions for both inputs must be 4");
|
||||
}
|
||||
if (lrShape[widthId] >= bicShape[widthId] && lrShape[heightId] >= bicShape[heightId]) {
|
||||
std::swap(bicShape, lrShape);
|
||||
inputsNames[0].swap(inputsNames[1]);
|
||||
} else if (!(lrShape[widthId] <= bicShape[widthId] && lrShape[heightId] <= bicShape[heightId])) {
|
||||
throw std::logic_error("Each spatial dimension of one input must surpass or be equal to a spatial"
|
||||
"dimension of another input");
|
||||
}
|
||||
}
|
||||
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
for (const auto& input : inputs) {
|
||||
inputTransform.setPrecision(ppp, input.get_any_name());
|
||||
ppp.input(input.get_any_name()).tensor().set_layout("NHWC");
|
||||
ppp.input(input.get_any_name()).model().set_layout(inputLayout);
|
||||
}
|
||||
|
||||
// --------------------------- Prepare output -----------------------------------------------------
|
||||
const ov::OutputVector& outputs = model->outputs();
|
||||
if (outputs.size() != 1) {
|
||||
throw std::logic_error("Super resolution model wrapper supports topologies with only 1 output");
|
||||
}
|
||||
|
||||
outputsNames.push_back(outputs.begin()->get_any_name());
|
||||
ppp.output().tensor().set_element_type(ov::element::f32);
|
||||
model = ppp.build();
|
||||
|
||||
const ov::Shape& outShape = model->output().get_shape();
|
||||
|
||||
const ov::Layout outputLayout("NCHW");
|
||||
const auto outWidth = outShape[ov::layout::width_idx(outputLayout)];
|
||||
const auto inWidth = lrShape[ov::layout::width_idx(outputLayout)];
|
||||
changeInputSize(model, static_cast<int>(outWidth / inWidth));
|
||||
}
|
||||
|
||||
void SuperResolutionModel::changeInputSize(std::shared_ptr<ov::Model>& model, int coeff) {
|
||||
std::map<std::string, ov::PartialShape> shapes;
|
||||
const ov::Layout& layout = ov::layout::get_layout(model->inputs().front());
|
||||
const auto batchId = ov::layout::batch_idx(layout);
|
||||
const auto heightId = ov::layout::height_idx(layout);
|
||||
const auto widthId = ov::layout::width_idx(layout);
|
||||
|
||||
const ov::OutputVector& inputs = model->inputs();
|
||||
std::string lrInputTensorName = inputs.begin()->get_any_name();
|
||||
ov::Shape lrShape = inputs.begin()->get_shape();
|
||||
|
||||
if (inputs.size() == 2) {
|
||||
std::string bicInputTensorName = (++inputs.begin())->get_any_name();
|
||||
ov::Shape bicShape = (++inputs.begin())->get_shape();
|
||||
if (lrShape[heightId] >= bicShape[heightId] && lrShape[widthId] >= bicShape[widthId]) {
|
||||
std::swap(bicShape, lrShape);
|
||||
std::swap(bicInputTensorName, lrInputTensorName);
|
||||
}
|
||||
bicShape[batchId] = 1;
|
||||
bicShape[heightId] = coeff * netInputHeight;
|
||||
bicShape[widthId] = coeff * netInputWidth;
|
||||
shapes[bicInputTensorName] = ov::PartialShape(bicShape);
|
||||
}
|
||||
|
||||
lrShape[batchId] = 1;
|
||||
lrShape[heightId] = netInputHeight;
|
||||
lrShape[widthId] = netInputWidth;
|
||||
shapes[lrInputTensorName] = ov::PartialShape(lrShape);
|
||||
|
||||
model->reshape(shapes);
|
||||
}
|
||||
|
||||
std::shared_ptr<InternalModelData> SuperResolutionModel::preprocess(const InputData& inputData,
|
||||
ov::InferRequest& request) {
|
||||
auto imgData = inputData.asRef<ImageInputData>();
|
||||
auto img = inputTransform(imgData.inputImage);
|
||||
const ov::Tensor lrInputTensor = request.get_tensor(inputsNames[0]);
|
||||
const ov::Layout layout("NHWC");
|
||||
|
||||
if (img.channels() != static_cast<int>(lrInputTensor.get_shape()[ov::layout::channels_idx(layout)])) {
|
||||
cv::cvtColor(img, img, cv::COLOR_BGR2GRAY);
|
||||
}
|
||||
|
||||
if (static_cast<size_t>(img.cols) != netInputWidth ||
|
||||
static_cast<size_t>(img.rows) != netInputHeight) {
|
||||
slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
|
||||
}
|
||||
const size_t height = lrInputTensor.get_shape()[ov::layout::height_idx(layout)];
|
||||
const size_t width = lrInputTensor.get_shape()[ov::layout::width_idx(layout)];
|
||||
img = resizeImageExt(img, width, height);
|
||||
request.set_tensor(inputsNames[0], wrapMat2Tensor(img));
|
||||
|
||||
if (inputsNames.size() == 2) {
|
||||
const ov::Tensor bicInputTensor = request.get_tensor(inputsNames[1]);
|
||||
const int h = static_cast<int>(bicInputTensor.get_shape()[ov::layout::height_idx(layout)]);
|
||||
const int w = static_cast<int>(bicInputTensor.get_shape()[ov::layout::width_idx(layout)]);
|
||||
cv::Mat resized;
|
||||
cv::resize(img, resized, cv::Size(w, h), 0, 0, cv::INTER_CUBIC);
|
||||
request.set_tensor(inputsNames[1], wrapMat2Tensor(resized));
|
||||
}
|
||||
|
||||
return std::make_shared<InternalImageModelData>(img.cols, img.rows);
|
||||
}
|
||||
|
||||
std::unique_ptr<ResultBase> SuperResolutionModel::postprocess(InferenceResult& infResult) {
|
||||
ImageResult* result = new ImageResult;
|
||||
*static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult);
|
||||
const auto outputData = infResult.getFirstOutputTensor().data<float>();
|
||||
|
||||
std::vector<cv::Mat> imgPlanes;
|
||||
const ov::Shape& outShape = infResult.getFirstOutputTensor().get_shape();
|
||||
const size_t outChannels = static_cast<int>(outShape[1]);
|
||||
const size_t outHeight = static_cast<int>(outShape[2]);
|
||||
const size_t outWidth = static_cast<int>(outShape[3]);
|
||||
const size_t numOfPixels = outWidth * outHeight;
|
||||
if (outChannels == 3) {
|
||||
imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0])),
|
||||
cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])),
|
||||
cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2]))};
|
||||
} else {
|
||||
imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0]))};
|
||||
// Post-processing for text-image-super-resolution models
|
||||
cv::threshold(imgPlanes[0], imgPlanes[0], 0.5f, 1.0f, cv::THRESH_BINARY);
|
||||
}
|
||||
|
||||
for (auto& img : imgPlanes) {
|
||||
img.convertTo(img, CV_8UC1, 255);
|
||||
}
|
||||
cv::Mat resultImg;
|
||||
cv::merge(imgPlanes, resultImg);
|
||||
result->resultImage = resultImg;
|
||||
|
||||
return std::unique_ptr<ResultBase>(result);
|
||||
}
|
||||
|
||||
std::unique_ptr<ResultBase> SuperResolutionChannelJoint::postprocess(InferenceResult& infResult) {
|
||||
ImageResult* result = new ImageResult;
|
||||
*static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult);
|
||||
const auto outputData = infResult.getFirstOutputTensor().data<float>();
|
||||
|
||||
const ov::Shape& outShape = infResult.getFirstOutputTensor().get_shape();
|
||||
|
||||
const size_t outHeight = static_cast<int>(outShape[2]);
|
||||
const size_t outWidth = static_cast<int>(outShape[3]);
|
||||
|
||||
std::vector<cv::Mat> channels;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
channels.emplace_back(outHeight, outWidth, CV_32FC1, &(outputData[0]) + i * outHeight * outWidth);
|
||||
}
|
||||
cv::Mat resultImg(outHeight, outWidth, CV_32FC3);
|
||||
cv::merge(channels, resultImg);
|
||||
|
||||
resultImg.convertTo(resultImg, CV_8UC3);
|
||||
result->resultImage = resultImg;
|
||||
|
||||
return std::unique_ptr<ResultBase>(result);
|
||||
}
|
||||
|
||||
std::shared_ptr<InternalModelData> SuperResolutionChannelJoint::preprocess(const InputData& inputData,
|
||||
ov::InferRequest& request) {
|
||||
auto imgData = inputData.asRef<ImageInputData>();
|
||||
auto& img = imgData.inputImage;
|
||||
|
||||
const ov::Tensor lrInputTensor = request.get_tensor(inputsNames[0]);
|
||||
const ov::Layout layout("NCHW");
|
||||
|
||||
if (static_cast<size_t>(img.cols) != netInputWidth || static_cast<size_t>(img.rows) != netInputHeight) {
|
||||
slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
|
||||
}
|
||||
|
||||
const size_t height = lrInputTensor.get_shape()[ov::layout::height_idx(layout)];
|
||||
const size_t width = lrInputTensor.get_shape()[ov::layout::width_idx(layout)];
|
||||
|
||||
std::vector<cv::Mat> channels;
|
||||
cv::split(img, channels);
|
||||
|
||||
ov::Tensor tensor(ov::element::u8, {3, 1, height, width});
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
matToTensor(channels[i], tensor, i);
|
||||
}
|
||||
|
||||
request.set_tensor(inputsNames[0], tensor);
|
||||
|
||||
return std::make_shared<InternalImageModelData>(img.cols, img.rows);
|
||||
}
|
||||
|
||||
void SuperResolutionChannelJoint::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
|
||||
// --------------------------- Configure input & output ---------------------------------------------
|
||||
// --------------------------- Prepare input --------------------------------------------------
|
||||
const ov::OutputVector& inputs = model->inputs();
|
||||
if (inputs.size() != 1) {
|
||||
throw std::logic_error("Super resolution channel joint model wrapper supports topologies with 1 inputs only");
|
||||
}
|
||||
std::string lrInputTensorName = inputs.begin()->get_any_name();
|
||||
inputsNames.push_back(lrInputTensorName);
|
||||
ov::Shape lrShape = inputs.begin()->get_shape();
|
||||
if (lrShape.size() != 4) {
|
||||
throw std::logic_error("Number of dimensions for an input must be 4");
|
||||
}
|
||||
// in case of 2 inputs they have the same layouts
|
||||
ov::Layout inputLayout = getInputLayout(model->inputs().front());
|
||||
|
||||
auto channelsId = ov::layout::channels_idx(inputLayout);
|
||||
|
||||
if (lrShape[channelsId] != 1) {
|
||||
throw std::logic_error("Input layer is expected to have 1 channel");
|
||||
}
|
||||
|
||||
ov::preprocess::PrePostProcessor ppp(model);
|
||||
const auto& input = inputs.front();
|
||||
ppp.input(input.get_any_name()).tensor().set_element_type(ov::element::u8).set_layout("NCHW");
|
||||
ppp.input(input.get_any_name()).model().set_layout(inputLayout);
|
||||
|
||||
// --------------------------- Prepare output -----------------------------------------------------
|
||||
const ov::OutputVector& outputs = model->outputs();
|
||||
if (outputs.size() != 1) {
|
||||
throw std::logic_error("Super resolution model wrapper supports topologies with only 1 output");
|
||||
}
|
||||
|
||||
outputsNames.push_back(outputs.begin()->get_any_name());
|
||||
ppp.output().tensor().set_element_type(ov::element::f32);
|
||||
model = ppp.build();
|
||||
|
||||
const ov::Shape& outShape = model->output().get_shape();
|
||||
|
||||
const ov::Layout outputLayout("NCHW");
|
||||
const auto outWidth = outShape[ov::layout::width_idx(outputLayout)];
|
||||
const auto inWidth = lrShape[ov::layout::width_idx(outputLayout)];
|
||||
|
||||
changeInputSize(model, static_cast<int>(outWidth / inWidth));
|
||||
|
||||
ov::set_batch(model, 3);
|
||||
}
|
||||
|
||||
void SuperResolutionChannelJoint::setBatch(std::shared_ptr<ov::Model>& model) {
|
||||
ov::set_batch(model, 3);
|
||||
}
|
||||
206
engines/OpenVINOEngine/src/monitors/cpu_monitor.cpp
Normal file
206
engines/OpenVINOEngine/src/monitors/cpu_monitor.cpp
Normal file
@@ -0,0 +1,206 @@
|
||||
// Copyright (C) 2019-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "monitors/cpu_monitor.h"
|
||||
|
||||
#include <algorithm>
|
||||
#ifdef _WIN32
|
||||
#include "query_wrapper.h"
|
||||
#include <string>
|
||||
#include <system_error>
|
||||
#include <PdhMsg.h>
|
||||
#include <Windows.h>
|
||||
|
||||
namespace {
|
||||
const std::size_t nCores = []() {
|
||||
SYSTEM_INFO sysinfo;
|
||||
GetSystemInfo(&sysinfo);
|
||||
return sysinfo.dwNumberOfProcessors;
|
||||
}();
|
||||
}
|
||||
|
||||
class CpuMonitor::PerformanceCounter {
|
||||
public:
|
||||
PerformanceCounter() : coreTimeCounters(nCores) {
|
||||
PDH_STATUS status;
|
||||
for (std::size_t i = 0; i < nCores; ++i) {
|
||||
std::wstring fullCounterPath{L"\\Processor(" + std::to_wstring(i) + L")\\% Processor Time"};
|
||||
status = PdhAddCounterW(query, fullCounterPath.c_str(), 0, &coreTimeCounters[i]);
|
||||
if (ERROR_SUCCESS != status) {
|
||||
throw std::system_error(status, std::system_category(), "PdhAddCounterW() failed");
|
||||
}
|
||||
status = PdhSetCounterScaleFactor(coreTimeCounters[i], -2); // scale counter to [0, 1]
|
||||
if (ERROR_SUCCESS != status) {
|
||||
throw std::system_error(status, std::system_category(), "PdhSetCounterScaleFactor() failed");
|
||||
}
|
||||
}
|
||||
status = PdhCollectQueryData(query);
|
||||
if (ERROR_SUCCESS != status) {
|
||||
throw std::system_error(status, std::system_category(), "PdhCollectQueryData() failed");
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<double> getCpuLoad() {
|
||||
PDH_STATUS status;
|
||||
status = PdhCollectQueryData(query);
|
||||
if (ERROR_SUCCESS != status) {
|
||||
throw std::system_error(status, std::system_category(), "PdhCollectQueryData() failed");
|
||||
}
|
||||
|
||||
PDH_FMT_COUNTERVALUE displayValue;
|
||||
std::vector<double> cpuLoad(coreTimeCounters.size());
|
||||
for (std::size_t i = 0; i < coreTimeCounters.size(); ++i) {
|
||||
status = PdhGetFormattedCounterValue(coreTimeCounters[i], PDH_FMT_DOUBLE, NULL,
|
||||
&displayValue);
|
||||
switch (status) {
|
||||
case ERROR_SUCCESS: break;
|
||||
// PdhGetFormattedCounterValue() can sometimes return PDH_CALC_NEGATIVE_DENOMINATOR for some reason
|
||||
case PDH_CALC_NEGATIVE_DENOMINATOR: return {};
|
||||
default:
|
||||
throw std::system_error(status, std::system_category(), "PdhGetFormattedCounterValue() failed");
|
||||
}
|
||||
if (PDH_CSTATUS_VALID_DATA != displayValue.CStatus && PDH_CSTATUS_NEW_DATA != displayValue.CStatus) {
|
||||
throw std::runtime_error("Error in counter data");
|
||||
}
|
||||
|
||||
cpuLoad[i] = displayValue.doubleValue;
|
||||
}
|
||||
return cpuLoad;
|
||||
}
|
||||
|
||||
private:
|
||||
QueryWrapper query;
|
||||
std::vector<PDH_HCOUNTER> coreTimeCounters;
|
||||
};
|
||||
|
||||
#elif __linux__
|
||||
#include <chrono>
|
||||
#include <regex>
|
||||
#include <utility>
|
||||
#include <fstream>
|
||||
#include <unistd.h>
|
||||
|
||||
namespace {
|
||||
const long clockTicks = sysconf(_SC_CLK_TCK);
|
||||
|
||||
const std::size_t nCores = sysconf(_SC_NPROCESSORS_CONF);
|
||||
|
||||
std::vector<unsigned long> getIdleCpuStat() {
|
||||
std::vector<unsigned long> idleCpuStat(nCores);
|
||||
std::ifstream procStat("/proc/stat");
|
||||
std::string line;
|
||||
std::smatch match;
|
||||
std::regex coreJiffies("^cpu(\\d+)\\s+"
|
||||
"(\\d+)\\s+"
|
||||
"(\\d+)\\s+"
|
||||
"(\\d+)\\s+"
|
||||
"(\\d+)\\s+" // idle
|
||||
"(\\d+)"); // iowait
|
||||
|
||||
while (std::getline(procStat, line)) {
|
||||
if (std::regex_search(line, match, coreJiffies)) {
|
||||
// it doesn't handle overflow of sum and overflows of /proc/stat values
|
||||
unsigned long idleInfo = stoul(match[5]) + stoul(match[6]),
|
||||
coreId = stoul(match[1]);
|
||||
if (nCores <= coreId) {
|
||||
throw std::runtime_error("The number of cores has changed");
|
||||
}
|
||||
idleCpuStat[coreId] = idleInfo;
|
||||
}
|
||||
}
|
||||
return idleCpuStat;
|
||||
}
|
||||
}
|
||||
|
||||
class CpuMonitor::PerformanceCounter {
|
||||
public:
|
||||
PerformanceCounter() : prevIdleCpuStat{getIdleCpuStat()}, prevTimePoint{std::chrono::steady_clock::now()} {}
|
||||
|
||||
std::vector<double> getCpuLoad() {
|
||||
std::vector<unsigned long> idleCpuStat = getIdleCpuStat();
|
||||
auto timePoint = std::chrono::steady_clock::now();
|
||||
// don't update data too frequently which may result in negative values for cpuLoad.
|
||||
// It may happen when collectData() is called just after setHistorySize().
|
||||
if (timePoint - prevTimePoint > std::chrono::milliseconds{100}) {
|
||||
std::vector<double> cpuLoad(nCores);
|
||||
for (std::size_t i = 0; i < idleCpuStat.size(); ++i) {
|
||||
double idleDiff = idleCpuStat[i] - prevIdleCpuStat[i];
|
||||
typedef std::chrono::duration<double, std::chrono::seconds::period> Sec;
|
||||
cpuLoad[i] = 1.0
|
||||
- idleDiff / clockTicks / std::chrono::duration_cast<Sec>(timePoint - prevTimePoint).count();
|
||||
}
|
||||
prevIdleCpuStat = std::move(idleCpuStat);
|
||||
prevTimePoint = timePoint;
|
||||
return cpuLoad;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
private:
|
||||
std::vector<unsigned long> prevIdleCpuStat;
|
||||
std::chrono::steady_clock::time_point prevTimePoint;
|
||||
};
|
||||
|
||||
#else
|
||||
// not implemented
|
||||
namespace {
|
||||
const std::size_t nCores{0};
|
||||
}
|
||||
|
||||
class CpuMonitor::PerformanceCounter {
|
||||
public:
|
||||
std::vector<double> getCpuLoad() {return {};};
|
||||
};
|
||||
#endif
|
||||
|
||||
CpuMonitor::CpuMonitor() :
|
||||
samplesNumber{0},
|
||||
historySize{0},
|
||||
cpuLoadSum(nCores, 0) {}
|
||||
|
||||
// PerformanceCounter is incomplete in header and destructor can't be defined implicitly
|
||||
CpuMonitor::~CpuMonitor() = default;
|
||||
|
||||
void CpuMonitor::setHistorySize(std::size_t size) {
|
||||
if (0 == historySize && 0 != size) {
|
||||
performanceCounter.reset(new PerformanceCounter);
|
||||
} else if (0 != historySize && 0 == size) {
|
||||
performanceCounter.reset();
|
||||
}
|
||||
historySize = size;
|
||||
std::ptrdiff_t newSize = static_cast<std::ptrdiff_t>(min(size, cpuLoadHistory.size()));
|
||||
cpuLoadHistory.erase(cpuLoadHistory.begin(), cpuLoadHistory.end() - newSize);
|
||||
}
|
||||
|
||||
void CpuMonitor::collectData() {
|
||||
std::vector<double> cpuLoad = performanceCounter->getCpuLoad();
|
||||
|
||||
if (!cpuLoad.empty()) {
|
||||
for (std::size_t i = 0; i < cpuLoad.size(); ++i) {
|
||||
cpuLoadSum[i] += cpuLoad[i];
|
||||
}
|
||||
++samplesNumber;
|
||||
|
||||
cpuLoadHistory.push_back(std::move(cpuLoad));
|
||||
if (cpuLoadHistory.size() > historySize) {
|
||||
cpuLoadHistory.pop_front();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t CpuMonitor::getHistorySize() const {
|
||||
return historySize;
|
||||
}
|
||||
|
||||
std::deque<std::vector<double>> CpuMonitor::getLastHistory() const {
|
||||
return cpuLoadHistory;
|
||||
}
|
||||
|
||||
std::vector<double> CpuMonitor::getMeanCpuLoad() const {
|
||||
std::vector<double> meanCpuLoad;
|
||||
meanCpuLoad.reserve(cpuLoadSum.size());
|
||||
for (double coreLoad : cpuLoadSum) {
|
||||
meanCpuLoad.push_back(samplesNumber ? coreLoad / samplesNumber : 0);
|
||||
}
|
||||
return meanCpuLoad;
|
||||
}
|
||||
213
engines/OpenVINOEngine/src/monitors/memory_monitor.cpp
Normal file
213
engines/OpenVINOEngine/src/monitors/memory_monitor.cpp
Normal file
@@ -0,0 +1,213 @@
|
||||
// Copyright (C) 2019-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "monitors/memory_monitor.h"
|
||||
|
||||
struct MemState {
|
||||
double memTotal, usedMem, usedSwap;
|
||||
};
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "query_wrapper.h"
|
||||
#include <algorithm>
|
||||
#define PSAPI_VERSION 2
|
||||
#include <system_error>
|
||||
#include <Windows.h>
|
||||
#include <PdhMsg.h>
|
||||
#include <Psapi.h>
|
||||
|
||||
namespace {
|
||||
double getMemTotal() {
|
||||
PERFORMANCE_INFORMATION performanceInformation;
|
||||
if (!GetPerformanceInfo(&performanceInformation, sizeof(performanceInformation))) {
|
||||
throw std::runtime_error("GetPerformanceInfo() failed");
|
||||
}
|
||||
return static_cast<double>(performanceInformation.PhysicalTotal * performanceInformation.PageSize)
|
||||
/ (1024 * 1024 * 1024);
|
||||
}
|
||||
}
|
||||
|
||||
class MemoryMonitor::PerformanceCounter {
|
||||
public:
|
||||
PerformanceCounter() {
|
||||
PDH_STATUS status = PdhAddCounterW(query, L"\\Paging File(_Total)\\% Usage", 0, &pagingFileUsageCounter);
|
||||
if (ERROR_SUCCESS != status) {
|
||||
throw std::system_error(status, std::system_category(), "PdhAddCounterW() failed");
|
||||
}
|
||||
status = PdhSetCounterScaleFactor(pagingFileUsageCounter, -2); // scale counter to [0, 1]
|
||||
if (ERROR_SUCCESS != status) {
|
||||
throw std::system_error(status, std::system_category(), "PdhSetCounterScaleFactor() failed");
|
||||
}
|
||||
}
|
||||
|
||||
MemState getMemState() {
|
||||
PERFORMANCE_INFORMATION performanceInformation;
|
||||
if (!GetPerformanceInfo(&performanceInformation, sizeof(performanceInformation))) {
|
||||
throw std::runtime_error("GetPerformanceInfo() failed");
|
||||
}
|
||||
|
||||
PDH_STATUS status;
|
||||
status = PdhCollectQueryData(query);
|
||||
if (ERROR_SUCCESS != status) {
|
||||
throw std::system_error(status, std::system_category(), "PdhCollectQueryData() failed");
|
||||
}
|
||||
PDH_FMT_COUNTERVALUE displayValue;
|
||||
status = PdhGetFormattedCounterValue(pagingFileUsageCounter, PDH_FMT_DOUBLE, NULL, &displayValue);
|
||||
if (ERROR_SUCCESS != status) {
|
||||
throw std::system_error(status, std::system_category(), "PdhGetFormattedCounterValue() failed");
|
||||
}
|
||||
if (PDH_CSTATUS_VALID_DATA != displayValue.CStatus && PDH_CSTATUS_NEW_DATA != displayValue.CStatus) {
|
||||
throw std::runtime_error("Error in counter data");
|
||||
}
|
||||
|
||||
double pagingFilesSize = static_cast<double>(
|
||||
(performanceInformation.CommitLimit - performanceInformation.PhysicalTotal)
|
||||
* performanceInformation.PageSize) / (1024 * 1024 * 1024);
|
||||
return {static_cast<double>(performanceInformation.PhysicalTotal * performanceInformation.PageSize)
|
||||
/ (1024 * 1024 * 1024),
|
||||
static_cast<double>(
|
||||
(performanceInformation.PhysicalTotal - performanceInformation.PhysicalAvailable)
|
||||
* performanceInformation.PageSize) / (1024 * 1024 * 1024),
|
||||
pagingFilesSize * displayValue.doubleValue};
|
||||
}
|
||||
private:
|
||||
QueryWrapper query;
|
||||
PDH_HCOUNTER pagingFileUsageCounter;
|
||||
};
|
||||
|
||||
#elif __linux__
|
||||
#include <fstream>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <regex>
|
||||
|
||||
namespace {
|
||||
std::pair<std::pair<double, double>, std::pair<double, double>> getAvailableMemSwapTotalMemSwap() {
|
||||
double memAvailable = 0, swapFree = 0, memTotal = 0, swapTotal = 0;
|
||||
std::regex memRegex("^(.+):\\s+(\\d+) kB$");
|
||||
std::string line;
|
||||
std::smatch match;
|
||||
std::ifstream meminfo("/proc/meminfo");
|
||||
while (std::getline(meminfo, line)) {
|
||||
if (std::regex_match(line, match, memRegex)) {
|
||||
if ("MemAvailable" == match[1]) {
|
||||
memAvailable = stod(match[2]) / (1024 * 1024);
|
||||
} else if ("SwapFree" == match[1]) {
|
||||
swapFree = stod(match[2]) / (1024 * 1024);
|
||||
} else if ("MemTotal" == match[1]) {
|
||||
memTotal = stod(match[2]) / (1024 * 1024);
|
||||
} else if ("SwapTotal" == match[1]) {
|
||||
swapTotal = stod(match[2]) / (1024 * 1024);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (0 == memTotal) {
|
||||
throw std::runtime_error("Can't get MemTotal");
|
||||
}
|
||||
return {{memAvailable, swapFree}, {memTotal, swapTotal}};
|
||||
}
|
||||
|
||||
double getMemTotal() {
|
||||
return getAvailableMemSwapTotalMemSwap().second.first;
|
||||
}
|
||||
}
|
||||
|
||||
class MemoryMonitor::PerformanceCounter {
|
||||
public:
|
||||
MemState getMemState() {
|
||||
std::pair<std::pair<double, double>, std::pair<double, double>> availableMemSwapTotalMemSwap
|
||||
= getAvailableMemSwapTotalMemSwap();
|
||||
double memTotal = availableMemSwapTotalMemSwap.second.first;
|
||||
double swapTotal = availableMemSwapTotalMemSwap.second.second;
|
||||
return {memTotal, memTotal - availableMemSwapTotalMemSwap.first.first, swapTotal - availableMemSwapTotalMemSwap.first.second};
|
||||
}
|
||||
};
|
||||
|
||||
#else
|
||||
// not implemented
|
||||
namespace {
|
||||
double getMemTotal() {return 0.0;}
|
||||
}
|
||||
|
||||
class MemoryMonitor::PerformanceCounter {
|
||||
public:
|
||||
MemState getMemState() {return {0.0, 0.0, 0.0};}
|
||||
};
|
||||
#endif
|
||||
|
||||
MemoryMonitor::MemoryMonitor() :
|
||||
samplesNumber{0},
|
||||
historySize{0},
|
||||
memSum{0.0},
|
||||
swapSum{0.0},
|
||||
maxMem{0.0},
|
||||
maxSwap{0.0},
|
||||
memTotal{0.0},
|
||||
maxMemTotal{0.0} {}
|
||||
|
||||
// PerformanceCounter is incomplete in header and destructor can't be defined implicitly
|
||||
MemoryMonitor::~MemoryMonitor() = default;
|
||||
|
||||
void MemoryMonitor::setHistorySize(std::size_t size) {
|
||||
if (0 == historySize && 0 != size) {
|
||||
performanceCounter.reset(new MemoryMonitor::PerformanceCounter);
|
||||
// memTotal is not initialized in constructor because for linux its initialization involves constructing
|
||||
// std::regex which is unimplemented and throws an exception for gcc 4.8.5 (default for CentOS 7.4).
|
||||
// Delaying initialization triggers the error only when the monitor is used
|
||||
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53631
|
||||
memTotal = ::getMemTotal();
|
||||
} else if (0 != historySize && 0 == size) {
|
||||
performanceCounter.reset();
|
||||
}
|
||||
historySize = size;
|
||||
std::size_t newSize = min(size, memSwapUsageHistory.size());
|
||||
memSwapUsageHistory.erase(memSwapUsageHistory.begin(), memSwapUsageHistory.end() - newSize);
|
||||
}
|
||||
|
||||
void MemoryMonitor::collectData() {
|
||||
MemState memState = performanceCounter->getMemState();
|
||||
maxMemTotal = max(maxMemTotal, memState.memTotal);
|
||||
memSum += memState.usedMem;
|
||||
swapSum += memState.usedSwap;
|
||||
++samplesNumber;
|
||||
maxMem = max(maxMem, memState.usedMem);
|
||||
maxSwap = max(maxSwap, memState.usedSwap);
|
||||
|
||||
memSwapUsageHistory.emplace_back(memState.usedMem, memState.usedSwap);
|
||||
if (memSwapUsageHistory.size() > historySize) {
|
||||
memSwapUsageHistory.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t MemoryMonitor::getHistorySize() const {
|
||||
return historySize;
|
||||
}
|
||||
|
||||
std::deque<std::pair<double, double>> MemoryMonitor::getLastHistory() const {
|
||||
return memSwapUsageHistory;
|
||||
}
|
||||
|
||||
double MemoryMonitor::getMeanMem() const {
|
||||
return samplesNumber ? memSum / samplesNumber : 0;
|
||||
}
|
||||
|
||||
double MemoryMonitor::getMeanSwap() const {
|
||||
return samplesNumber ? swapSum / samplesNumber : 0;
|
||||
}
|
||||
|
||||
double MemoryMonitor::getMaxMem() const {
|
||||
return maxMem;
|
||||
}
|
||||
|
||||
double MemoryMonitor::getMaxSwap() const {
|
||||
return maxSwap;
|
||||
}
|
||||
|
||||
double MemoryMonitor::getMemTotal() const {
|
||||
return memTotal;
|
||||
}
|
||||
|
||||
double MemoryMonitor::getMaxMemTotal() const {
|
||||
return maxMemTotal;
|
||||
}
|
||||
330
engines/OpenVINOEngine/src/monitors/presenter.cpp
Normal file
330
engines/OpenVINOEngine/src/monitors/presenter.cpp
Normal file
@@ -0,0 +1,330 @@
|
||||
// Copyright (C) 2019-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <cctype>
|
||||
#include <chrono>
|
||||
#include <iomanip>
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "monitors/presenter.h"
|
||||
|
||||
namespace {
|
||||
const std::map<int, MonitorType> keyToMonitorType{
|
||||
{'C', MonitorType::CpuAverage},
|
||||
{'D', MonitorType::DistributionCpu},
|
||||
{'M', MonitorType::Memory}};
|
||||
|
||||
std::set<MonitorType> strKeysToMonitorSet(const std::string& keys) {
|
||||
std::set<MonitorType> enabledMonitors;
|
||||
if (keys == "h") {
|
||||
return enabledMonitors;
|
||||
}
|
||||
for (unsigned char key: keys) {
|
||||
if (key == 'h') {
|
||||
throw std::runtime_error("Unacceptable combination of monitor types-can't show and hide info at the same time");
|
||||
}
|
||||
auto iter = keyToMonitorType.find(std::toupper(key));
|
||||
if (keyToMonitorType.end() == iter) {
|
||||
throw std::runtime_error("Unknown monitor type");
|
||||
} else {
|
||||
enabledMonitors.insert(iter->second);
|
||||
}
|
||||
}
|
||||
return enabledMonitors;
|
||||
}
|
||||
}
|
||||
|
||||
Presenter::Presenter(std::set<MonitorType> enabledMonitors,
|
||||
int yPos,
|
||||
cv::Size graphSize,
|
||||
std::size_t historySize) :
|
||||
yPos{yPos},
|
||||
graphSize{graphSize},
|
||||
graphPadding{std::max(1, static_cast<int>(graphSize.width * 0.05))},
|
||||
historySize{historySize},
|
||||
distributionCpuEnabled{false},
|
||||
strStream{std::ios_base::app} {
|
||||
for (MonitorType monitor : enabledMonitors) {
|
||||
addRemoveMonitor(monitor);
|
||||
}
|
||||
}
|
||||
|
||||
Presenter::Presenter(const std::string& keys, int yPos, cv::Size graphSize, std::size_t historySize) :
|
||||
Presenter{strKeysToMonitorSet(keys), yPos, graphSize, historySize} {}
|
||||
|
||||
void Presenter::addRemoveMonitor(MonitorType monitor) {
|
||||
unsigned updatedHistorySize = 1;
|
||||
if (historySize > 1) {
|
||||
int sampleStep = std::max(1, static_cast<int>(graphSize.width / (historySize - 1)));
|
||||
// +1 to plot graphSize.width/sampleStep segments
|
||||
// add round up to and an extra element if don't reach graph edge
|
||||
updatedHistorySize = (graphSize.width + sampleStep - 1) / sampleStep + 1;
|
||||
}
|
||||
switch(monitor) {
|
||||
case MonitorType::CpuAverage: {
|
||||
if (cpuMonitor.getHistorySize() > 1 && distributionCpuEnabled) {
|
||||
cpuMonitor.setHistorySize(1);
|
||||
} else if (cpuMonitor.getHistorySize() > 1 && !distributionCpuEnabled) {
|
||||
cpuMonitor.setHistorySize(0);
|
||||
} else { // cpuMonitor.getHistorySize() <= 1
|
||||
cpuMonitor.setHistorySize(updatedHistorySize);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case MonitorType::DistributionCpu: {
|
||||
if (distributionCpuEnabled) {
|
||||
distributionCpuEnabled = false;
|
||||
if (1 == cpuMonitor.getHistorySize()) { // cpuMonitor was used only for DistributionCpu => disable it
|
||||
cpuMonitor.setHistorySize(0);
|
||||
}
|
||||
} else {
|
||||
distributionCpuEnabled = true;
|
||||
cpuMonitor.setHistorySize(std::max(std::size_t{1}, cpuMonitor.getHistorySize()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case MonitorType::Memory: {
|
||||
if (memoryMonitor.getHistorySize() > 1) {
|
||||
memoryMonitor.setHistorySize(0);
|
||||
} else {
|
||||
memoryMonitor.setHistorySize(updatedHistorySize);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Presenter::handleKey(int key) {
|
||||
key = std::toupper(key);
|
||||
if ('H' == key) {
|
||||
if (0 == cpuMonitor.getHistorySize() && memoryMonitor.getHistorySize() <= 1) {
|
||||
addRemoveMonitor(MonitorType::CpuAverage);
|
||||
addRemoveMonitor(MonitorType::DistributionCpu);
|
||||
addRemoveMonitor(MonitorType::Memory);
|
||||
} else {
|
||||
cpuMonitor.setHistorySize(0);
|
||||
distributionCpuEnabled = false;
|
||||
memoryMonitor.setHistorySize(0);
|
||||
}
|
||||
} else {
|
||||
auto iter = keyToMonitorType.find(key);
|
||||
if (keyToMonitorType.end() != iter) {
|
||||
addRemoveMonitor(iter->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Presenter::drawGraphs(cv::Mat& frame) {
|
||||
const std::chrono::steady_clock::time_point curTimeStamp = std::chrono::steady_clock::now();
|
||||
if (curTimeStamp - prevTimeStamp >= std::chrono::milliseconds{1000}) {
|
||||
prevTimeStamp = curTimeStamp;
|
||||
if (0 != cpuMonitor.getHistorySize()) {
|
||||
cpuMonitor.collectData();
|
||||
}
|
||||
if (memoryMonitor.getHistorySize() > 1) {
|
||||
memoryMonitor.collectData();
|
||||
}
|
||||
}
|
||||
|
||||
int numberOfEnabledMonitors = (cpuMonitor.getHistorySize() > 1) + distributionCpuEnabled
|
||||
+ (memoryMonitor.getHistorySize() > 1);
|
||||
int panelWidth = graphSize.width * numberOfEnabledMonitors
|
||||
+ std::max(0, numberOfEnabledMonitors - 1) * graphPadding;
|
||||
while (panelWidth > frame.cols) {
|
||||
panelWidth = std::max(0, panelWidth - graphSize.width - graphPadding);
|
||||
--numberOfEnabledMonitors; // can't draw all monitors
|
||||
}
|
||||
int graphPos = std::max(0, (frame.cols - 1 - panelWidth) / 2);
|
||||
int textGraphSplittingLine = graphSize.height / 5;
|
||||
int graphRectHeight = graphSize.height - textGraphSplittingLine;
|
||||
int sampleStep = 1;
|
||||
unsigned possibleHistorySize = 1;
|
||||
if (historySize > 1) {
|
||||
sampleStep = std::max(1, static_cast<int>(graphSize.width / (historySize - 1)));
|
||||
possibleHistorySize = (graphSize.width + sampleStep - 1) / sampleStep + 1;
|
||||
}
|
||||
|
||||
if (cpuMonitor.getHistorySize() > 1 && possibleHistorySize > 1 && --numberOfEnabledMonitors >= 0) {
|
||||
std::deque<std::vector<double>> lastHistory = cpuMonitor.getLastHistory();
|
||||
cv::Rect intersection = cv::Rect{cv::Point(graphPos, yPos), graphSize} & cv::Rect{0, 0, frame.cols, frame.rows};
|
||||
if (!intersection.area()) {
|
||||
return;
|
||||
}
|
||||
cv::Mat graph = frame(intersection);
|
||||
graph = graph / 2 + cv::Scalar{127, 127, 127};
|
||||
|
||||
int lineXPos = graph.cols - 1;
|
||||
std::vector<cv::Point> averageLoad(lastHistory.size());
|
||||
|
||||
for (int i = lastHistory.size() - 1; i >= 0; --i) {
|
||||
double mean = std::accumulate(lastHistory[i].begin(), lastHistory[i].end(), 0.0) / lastHistory[i].size();
|
||||
averageLoad[i] = {lineXPos, graphSize.height - static_cast<int>(mean * graphRectHeight)};
|
||||
lineXPos -= sampleStep;
|
||||
}
|
||||
|
||||
cv::polylines(graph, averageLoad, false, {255, 0, 0}, 2);
|
||||
cv::rectangle(frame, cv::Rect{
|
||||
cv::Point{graphPos, yPos + textGraphSplittingLine},
|
||||
cv::Size{graphSize.width, graphSize.height - textGraphSplittingLine}
|
||||
}, {0, 0, 0});
|
||||
strStream.str("CPU");
|
||||
if (!lastHistory.empty()) {
|
||||
strStream << ": " << std::fixed << std::setprecision(1)
|
||||
<< std::accumulate(lastHistory.back().begin(), lastHistory.back().end(), 0.0)
|
||||
/ lastHistory.back().size() * 100 << '%';
|
||||
}
|
||||
int baseline;
|
||||
int textWidth = cv::getTextSize(strStream.str(),
|
||||
cv::FONT_HERSHEY_SIMPLEX,
|
||||
textGraphSplittingLine * 0.04,
|
||||
1,
|
||||
&baseline).width;
|
||||
cv::putText(graph,
|
||||
strStream.str(),
|
||||
cv::Point{(graphSize.width - textWidth) / 2, textGraphSplittingLine - 1},
|
||||
cv::FONT_HERSHEY_SIMPLEX,
|
||||
textGraphSplittingLine * 0.04,
|
||||
{70, 0, 0},
|
||||
1);
|
||||
graphPos += graphSize.width + graphPadding;
|
||||
}
|
||||
|
||||
if (distributionCpuEnabled && --numberOfEnabledMonitors >= 0) {
|
||||
std::deque<std::vector<double>> lastHistory = cpuMonitor.getLastHistory();
|
||||
cv::Rect intersection = cv::Rect{cv::Point(graphPos, yPos), graphSize} & cv::Rect{0, 0, frame.cols, frame.rows};
|
||||
if (!intersection.area()) {
|
||||
return;
|
||||
}
|
||||
cv::Mat graph = frame(intersection);
|
||||
graph = graph / 2 + cv::Scalar{127, 127, 127};
|
||||
|
||||
if (!lastHistory.empty()) {
|
||||
int rectXPos = 0;
|
||||
int step = (graph.cols + lastHistory.back().size() - 1) / lastHistory.back().size(); // round up
|
||||
double sum = 0;
|
||||
for (double coreLoad : lastHistory.back()) {
|
||||
sum += coreLoad;
|
||||
int height = static_cast<int>(graphRectHeight * coreLoad);
|
||||
cv::Rect pillar{cv::Point{rectXPos, graph.rows - height}, cv::Size{step, height}};
|
||||
cv::rectangle(graph, pillar, {255, 0, 0}, cv::FILLED);
|
||||
cv::rectangle(graph, pillar, {0, 0, 0});
|
||||
rectXPos += step;
|
||||
}
|
||||
sum /= lastHistory.back().size();
|
||||
int yLine = graph.rows - static_cast<int>(graphRectHeight * sum);
|
||||
cv::line(graph, cv::Point{0, yLine}, cv::Point{graph.cols, yLine}, {0, 255, 0}, 2);
|
||||
}
|
||||
cv::Rect border{cv::Point{graphPos, yPos + textGraphSplittingLine},
|
||||
cv::Size{graphSize.width, graphSize.height - textGraphSplittingLine}};
|
||||
cv::rectangle(frame, border, {0, 0, 0});
|
||||
strStream.str("Core load");
|
||||
if (!lastHistory.empty()) {
|
||||
strStream << ": " << std::fixed << std::setprecision(1)
|
||||
<< std::accumulate(lastHistory.back().begin(), lastHistory.back().end(), 0.0)
|
||||
/ lastHistory.back().size() * 100 << '%';
|
||||
}
|
||||
int baseline;
|
||||
int textWidth = cv::getTextSize(strStream.str(),
|
||||
cv::FONT_HERSHEY_SIMPLEX,
|
||||
textGraphSplittingLine * 0.04,
|
||||
1,
|
||||
&baseline).width;
|
||||
cv::putText(graph,
|
||||
strStream.str(),
|
||||
cv::Point{(graphSize.width - textWidth) / 2, textGraphSplittingLine - 1},
|
||||
cv::FONT_HERSHEY_SIMPLEX,
|
||||
textGraphSplittingLine * 0.04,
|
||||
{0, 70, 0});
|
||||
graphPos += graphSize.width + graphPadding;
|
||||
}
|
||||
|
||||
if (memoryMonitor.getHistorySize() > 1 && possibleHistorySize > 1 && --numberOfEnabledMonitors >= 0) {
|
||||
std::deque<std::pair<double, double>> lastHistory = memoryMonitor.getLastHistory();
|
||||
cv::Rect intersection = cv::Rect{cv::Point(graphPos, yPos), graphSize} & cv::Rect{0, 0, frame.cols, frame.rows};
|
||||
if (!intersection.area()) {
|
||||
return;
|
||||
}
|
||||
cv::Mat graph = frame(intersection);
|
||||
graph = graph / 2 + cv::Scalar{127, 127, 127};
|
||||
int histxPos = graph.cols - 1;
|
||||
double range = std::min(memoryMonitor.getMaxMemTotal() + memoryMonitor.getMaxSwap(),
|
||||
(memoryMonitor.getMaxMem() + memoryMonitor.getMaxSwap()) * 1.2);
|
||||
if (lastHistory.size() > 1) {
|
||||
for (auto memUsageIt = lastHistory.rbegin(); memUsageIt != lastHistory.rend() - 1; ++memUsageIt) {
|
||||
constexpr double SWAP_THRESHOLD = 10.0 / 1024; // 10 MiB
|
||||
cv::Vec3b color =
|
||||
(memoryMonitor.getMemTotal() * 0.95 > memUsageIt->first) || (memUsageIt->second < SWAP_THRESHOLD) ?
|
||||
cv::Vec3b{0, 255, 255} :
|
||||
cv::Vec3b{0, 0, 255};
|
||||
cv::Point right{histxPos,
|
||||
graph.rows - static_cast<int>(graphRectHeight * (memUsageIt->first + memUsageIt->second) / range)};
|
||||
cv::Point left{histxPos - sampleStep,
|
||||
graph.rows - static_cast<int>(
|
||||
graphRectHeight * ((memUsageIt + 1)->first + (memUsageIt + 1)->second) / range)};
|
||||
cv::line(graph, right, left, color, 2);
|
||||
histxPos -= sampleStep;
|
||||
}
|
||||
}
|
||||
|
||||
cv::Rect border{cv::Point{graphPos, yPos + textGraphSplittingLine},
|
||||
cv::Size{graphSize.width, graphSize.height - textGraphSplittingLine}};
|
||||
cv::rectangle(frame, {border}, {0, 0, 0});
|
||||
if (lastHistory.empty()) {
|
||||
strStream.str("Memory");
|
||||
} else {
|
||||
strStream.str("");
|
||||
strStream << std::fixed << std::setprecision(1) << lastHistory.back().first << " + "
|
||||
<< lastHistory.back().second << " GiB";
|
||||
}
|
||||
int baseline;
|
||||
int textWidth = cv::getTextSize(strStream.str(),
|
||||
cv::FONT_HERSHEY_SIMPLEX,
|
||||
textGraphSplittingLine * 0.04,
|
||||
1,
|
||||
&baseline).width;
|
||||
cv::putText(graph,
|
||||
strStream.str(),
|
||||
cv::Point{(graphSize.width - textWidth) / 2, textGraphSplittingLine - 1},
|
||||
cv::FONT_HERSHEY_SIMPLEX,
|
||||
textGraphSplittingLine * 0.04,
|
||||
{0, 35, 35});
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> Presenter::reportMeans() const {
|
||||
std::vector<std::string> collectedData;
|
||||
if (cpuMonitor.getHistorySize() > 1 || distributionCpuEnabled || memoryMonitor.getHistorySize() > 1) {
|
||||
collectedData.push_back("Resources usage:");
|
||||
}
|
||||
if (cpuMonitor.getHistorySize() > 1) {
|
||||
std::ostringstream collectedDataStream;
|
||||
collectedDataStream << std::fixed << std::setprecision(1);
|
||||
collectedDataStream << "\tMean core utilization: ";
|
||||
for (double mean : cpuMonitor.getMeanCpuLoad()) {
|
||||
collectedDataStream << mean * 100 << "% ";
|
||||
}
|
||||
collectedData.push_back(collectedDataStream.str());
|
||||
}
|
||||
if (distributionCpuEnabled) {
|
||||
std::ostringstream collectedDataStream;
|
||||
collectedDataStream << std::fixed << std::setprecision(1);
|
||||
std::vector<double> meanCpuLoad = cpuMonitor.getMeanCpuLoad();
|
||||
double mean = std::accumulate(meanCpuLoad.begin(), meanCpuLoad.end(), 0.0) / meanCpuLoad.size();
|
||||
collectedDataStream << "\tMean CPU utilization: " << mean * 100 << "%";
|
||||
collectedData.push_back(collectedDataStream.str());
|
||||
}
|
||||
if (memoryMonitor.getHistorySize() > 1) {
|
||||
std::ostringstream collectedDataStream;
|
||||
collectedDataStream << std::fixed << std::setprecision(1);
|
||||
collectedDataStream << "\tMemory mean usage: " << memoryMonitor.getMeanMem() << " GiB";
|
||||
collectedData.push_back(collectedDataStream.str());
|
||||
collectedDataStream.str("");
|
||||
collectedDataStream << "\tMean swap usage: " << memoryMonitor.getMeanSwap() << " GiB";
|
||||
collectedData.push_back(collectedDataStream.str());
|
||||
}
|
||||
|
||||
return collectedData;
|
||||
}
|
||||
22
engines/OpenVINOEngine/src/monitors/query_wrapper.cpp
Normal file
22
engines/OpenVINOEngine/src/monitors/query_wrapper.cpp
Normal file
@@ -0,0 +1,22 @@
|
||||
// Copyright (C) 2019-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "query_wrapper.h"
|
||||
|
||||
#include <Windows.h>
|
||||
#include <system_error>
|
||||
|
||||
QueryWrapper::QueryWrapper() {
|
||||
PDH_STATUS status = PdhOpenQuery(NULL, NULL, &query);
|
||||
if (ERROR_SUCCESS != status) {
|
||||
throw std::system_error(status, std::system_category(), "PdhOpenQuery() failed");
|
||||
}
|
||||
}
|
||||
QueryWrapper::~QueryWrapper() {
|
||||
PdhCloseQuery(query);
|
||||
}
|
||||
|
||||
QueryWrapper::operator PDH_HQUERY() const {
|
||||
return query;
|
||||
}
|
||||
17
engines/OpenVINOEngine/src/monitors/query_wrapper.h
Normal file
17
engines/OpenVINOEngine/src/monitors/query_wrapper.h
Normal file
@@ -0,0 +1,17 @@
|
||||
// Copyright (C) 2019-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <Pdh.h>
|
||||
class QueryWrapper {
|
||||
public:
|
||||
QueryWrapper();
|
||||
~QueryWrapper();
|
||||
QueryWrapper(const QueryWrapper&) = delete;
|
||||
QueryWrapper& operator=(const QueryWrapper&) = delete;
|
||||
operator PDH_HQUERY() const;
|
||||
private:
|
||||
PDH_HQUERY query;
|
||||
};
|
||||
163
engines/OpenVINOEngine/src/pipelines/async_pipeline.cpp
Normal file
163
engines/OpenVINOEngine/src/pipelines/async_pipeline.cpp
Normal file
@@ -0,0 +1,163 @@
|
||||
/*
|
||||
// Copyright (C) 2020-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "pipelines/async_pipeline.h"
|
||||
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
#include <models/model_base.h>
|
||||
#include <models/results.h>
|
||||
#include <utils/config_factory.h>
|
||||
#include <utils/performance_metrics.hpp>
|
||||
#include <utils/slog.hpp>
|
||||
|
||||
struct InputData;
|
||||
struct MetaData;
|
||||
|
||||
AsyncPipeline::AsyncPipeline(std::unique_ptr<ModelBase>&& modelInstance, const ModelConfig& config, ov::Core& core)
|
||||
: model(std::move(modelInstance)) {
|
||||
compiledModel = model->compileModel(config, core);
|
||||
// --------------------------- Create infer requests ------------------------------------------------
|
||||
unsigned int nireq = config.maxAsyncRequests;
|
||||
if (nireq == 0) {
|
||||
try {
|
||||
// +1 to use it as a buffer of the pipeline
|
||||
nireq = compiledModel.get_property(ov::optimal_number_of_infer_requests) + 1;
|
||||
} catch (const ov::Exception& ex) {
|
||||
std::cerr << "Can't query optimal number of infer requests: " << ex.what() << std::endl;
|
||||
}
|
||||
}
|
||||
slog::info << "\tNumber of inference requests: " << nireq << slog::endl;
|
||||
requestsPool.reset(new RequestsPool(compiledModel, nireq));
|
||||
// --------------------------- Call onLoadCompleted to complete initialization of model -------------
|
||||
model->onLoadCompleted(requestsPool->getInferRequestsList());
|
||||
}
|
||||
|
||||
AsyncPipeline::~AsyncPipeline() {
|
||||
waitForTotalCompletion();
|
||||
}
|
||||
|
||||
void AsyncPipeline::waitForData(bool shouldKeepOrder) {
|
||||
std::unique_lock<std::mutex> lock(mtx);
|
||||
|
||||
condVar.wait(lock, [&]() {
|
||||
return callbackException != nullptr || requestsPool->isIdleRequestAvailable() ||
|
||||
(shouldKeepOrder ? completedInferenceResults.find(outputFrameId) != completedInferenceResults.end()
|
||||
: !completedInferenceResults.empty());
|
||||
});
|
||||
|
||||
if (callbackException) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
int64_t AsyncPipeline::submitData(const InputData& inputData, const std::shared_ptr<MetaData>& metaData) {
|
||||
auto frameID = inputFrameId;
|
||||
|
||||
auto request = requestsPool->getIdleRequest();
|
||||
if (!request) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
auto startTime = std::chrono::steady_clock::now();
|
||||
auto internalModelData = model->preprocess(inputData, request);
|
||||
preprocessMetrics.update(startTime);
|
||||
|
||||
request.set_callback(
|
||||
[this, request, frameID, internalModelData, metaData, startTime](std::exception_ptr ex) mutable {
|
||||
{
|
||||
const std::lock_guard<std::mutex> lock(mtx);
|
||||
inferenceMetrics.update(startTime);
|
||||
try {
|
||||
if (ex) {
|
||||
std::rethrow_exception(ex);
|
||||
}
|
||||
InferenceResult result;
|
||||
|
||||
result.frameId = frameID;
|
||||
result.metaData = std::move(metaData);
|
||||
result.internalModelData = std::move(internalModelData);
|
||||
|
||||
for (const auto& outName : model->getOutputsNames()) {
|
||||
auto tensor = request.get_tensor(outName);
|
||||
result.outputsData.emplace(outName, tensor);
|
||||
}
|
||||
|
||||
completedInferenceResults.emplace(frameID, result);
|
||||
requestsPool->setRequestIdle(request);
|
||||
} catch (...) {
|
||||
if (!callbackException) {
|
||||
callbackException = std::current_exception();
|
||||
}
|
||||
}
|
||||
}
|
||||
condVar.notify_one();
|
||||
});
|
||||
|
||||
inputFrameId++;
|
||||
if (inputFrameId < 0)
|
||||
inputFrameId = 0;
|
||||
|
||||
request.start_async();
|
||||
|
||||
return frameID;
|
||||
}
|
||||
|
||||
std::unique_ptr<ResultBase> AsyncPipeline::getResult(bool shouldKeepOrder) {
|
||||
auto infResult = AsyncPipeline::getInferenceResult(shouldKeepOrder);
|
||||
if (infResult.IsEmpty()) {
|
||||
return std::unique_ptr<ResultBase>();
|
||||
}
|
||||
auto startTime = std::chrono::steady_clock::now();
|
||||
auto result = model->postprocess(infResult);
|
||||
postprocessMetrics.update(startTime);
|
||||
|
||||
*result = static_cast<ResultBase&>(infResult);
|
||||
return result;
|
||||
}
|
||||
|
||||
InferenceResult AsyncPipeline::getInferenceResult(bool shouldKeepOrder) {
|
||||
InferenceResult retVal;
|
||||
{
|
||||
const std::lock_guard<std::mutex> lock(mtx);
|
||||
|
||||
const auto& it =
|
||||
shouldKeepOrder ? completedInferenceResults.find(outputFrameId) : completedInferenceResults.begin();
|
||||
|
||||
if (it != completedInferenceResults.end()) {
|
||||
retVal = std::move(it->second);
|
||||
completedInferenceResults.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
if (!retVal.IsEmpty()) {
|
||||
outputFrameId = retVal.frameId;
|
||||
outputFrameId++;
|
||||
if (outputFrameId < 0) {
|
||||
outputFrameId = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
94
engines/OpenVINOEngine/src/pipelines/requests_pool.cpp
Normal file
94
engines/OpenVINOEngine/src/pipelines/requests_pool.cpp
Normal file
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
// Copyright (C) 2020-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "pipelines/requests_pool.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <exception>
|
||||
#include <vector>
|
||||
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
RequestsPool::RequestsPool(ov::CompiledModel& compiledModel, unsigned int size) : numRequestsInUse(0) {
|
||||
for (unsigned int infReqId = 0; infReqId < size; ++infReqId) {
|
||||
requests.emplace_back(compiledModel.create_infer_request(), false);
|
||||
}
|
||||
}
|
||||
|
||||
RequestsPool::~RequestsPool() {
|
||||
// Setting empty callback to free resources allocated for previously assigned lambdas
|
||||
for (auto& pair : requests) {
|
||||
pair.first.set_callback([](std::exception_ptr) {});
|
||||
}
|
||||
}
|
||||
|
||||
ov::InferRequest RequestsPool::getIdleRequest() {
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
|
||||
const auto& it = std::find_if(requests.begin(), requests.end(), [](const std::pair<ov::InferRequest, bool>& x) {
|
||||
return !x.second;
|
||||
});
|
||||
if (it == requests.end()) {
|
||||
return ov::InferRequest();
|
||||
} else {
|
||||
it->second = true;
|
||||
numRequestsInUse++;
|
||||
return it->first;
|
||||
}
|
||||
}
|
||||
|
||||
void RequestsPool::setRequestIdle(const ov::InferRequest& request) {
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
const auto& it = std::find_if(this->requests.begin(),
|
||||
this->requests.end(),
|
||||
[&request](const std::pair<ov::InferRequest, bool>& x) {
|
||||
return x.first == request;
|
||||
});
|
||||
it->second = false;
|
||||
numRequestsInUse--;
|
||||
}
|
||||
|
||||
size_t RequestsPool::getInUseRequestsCount() {
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
return numRequestsInUse;
|
||||
}
|
||||
|
||||
bool RequestsPool::isIdleRequestAvailable() {
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
return numRequestsInUse < requests.size();
|
||||
}
|
||||
|
||||
void RequestsPool::waitForTotalCompletion() {
|
||||
// Do not synchronize here to avoid deadlock (despite synchronization in other functions)
|
||||
// Request status will be changed to idle in callback,
|
||||
// upon completion of request we're waiting for. Synchronization is applied there
|
||||
for (auto pair : requests) {
|
||||
if (pair.second) {
|
||||
pair.first.wait();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ov::InferRequest> RequestsPool::getInferRequestsList() {
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
std::vector<ov::InferRequest> retVal;
|
||||
retVal.reserve(requests.size());
|
||||
for (auto& pair : requests) {
|
||||
retVal.push_back(pair.first);
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
398
engines/OpenVINOEngine/src/tracker/tracker.cpp
Normal file
398
engines/OpenVINOEngine/src/tracker/tracker.cpp
Normal file
@@ -0,0 +1,398 @@
|
||||
// Copyright (C) 2018-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "logger.hpp"
|
||||
#include "tracker/tracker.hpp"
|
||||
|
||||
const int TrackedObject::UNKNOWN_LABEL_IDX = -1;
|
||||
|
||||
cv::Point Center(const cv::Rect& rect) {
|
||||
return cv::Point(static_cast<int>(rect.x + rect.width * 0.5),
|
||||
static_cast<int>(rect.y + rect.height * 0.5));
|
||||
}
|
||||
|
||||
TrackerParams::TrackerParams() :
|
||||
min_track_duration(25),
|
||||
forget_delay(150),
|
||||
affinity_thr(0.85f),
|
||||
shape_affinity_w(0.5f),
|
||||
motion_affinity_w(0.2f),
|
||||
min_det_conf(0.0f),
|
||||
bbox_aspect_ratios_range(0.666f, 5.0f),
|
||||
bbox_heights_range(1, 1280),
|
||||
drop_forgotten_tracks(true),
|
||||
max_num_objects_in_track(300),
|
||||
averaging_window_size_for_rects(1),
|
||||
averaging_window_size_for_labels(1) {}
|
||||
|
||||
bool IsInRange(float x, const cv::Vec2f& v) { return v[0] <= x && x <= v[1]; }
|
||||
bool IsInRange(float x, float a, float b) { return a <= x && x <= b; }
|
||||
|
||||
void Tracker::FilterDetectionsAndStore(const TrackedObjects& detections) {
|
||||
m_detections.clear();
|
||||
for (const auto& det : detections) {
|
||||
float aspect_ratio = static_cast<float>(det.rect.height) / det.rect.width;
|
||||
if (det.confidence > m_params.min_det_conf &&
|
||||
IsInRange(aspect_ratio, m_params.bbox_aspect_ratios_range) &&
|
||||
IsInRange(static_cast<float>(det.rect.height), m_params.bbox_heights_range)) {
|
||||
m_detections.emplace_back(det);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Tracker::SolveAssignmentProblem(
|
||||
const std::set<size_t>& track_ids, const TrackedObjects& detections,
|
||||
std::set<size_t>* unmatched_tracks, std::set<size_t>* unmatched_detections,
|
||||
std::set<std::tuple<size_t, size_t, float>>* matches) {
|
||||
CV_Assert(unmatched_tracks);
|
||||
CV_Assert(unmatched_detections);
|
||||
unmatched_tracks->clear();
|
||||
unmatched_detections->clear();
|
||||
|
||||
CV_Assert(!track_ids.empty());
|
||||
CV_Assert(!detections.empty());
|
||||
CV_Assert(matches);
|
||||
matches->clear();
|
||||
|
||||
cv::Mat dissimilarity;
|
||||
ComputeDissimilarityMatrix(track_ids, detections, &dissimilarity);
|
||||
|
||||
auto res = KuhnMunkres().Solve(dissimilarity);
|
||||
|
||||
for (size_t i = 0; i < detections.size(); i++) {
|
||||
unmatched_detections->insert(i);
|
||||
}
|
||||
|
||||
size_t i = 0;
|
||||
for (size_t id : track_ids) {
|
||||
if (res[i] < detections.size()) {
|
||||
matches->emplace(id, res[i], 1 - dissimilarity.at<float>(i, res[i]));
|
||||
} else {
|
||||
unmatched_tracks->insert(id);
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
bool Tracker::EraseTrackIfBBoxIsOutOfFrame(size_t track_id) {
|
||||
if (m_tracks.find(track_id) == m_tracks.end())
|
||||
return true;
|
||||
auto c = Center(m_tracks.at(track_id).back().rect);
|
||||
if (m_frame_size != cv::Size() &&
|
||||
(c.x < 0 || c.y < 0 || c.x > m_frame_size.width ||
|
||||
c.y > m_frame_size.height)) {
|
||||
m_tracks.at(track_id).lost = m_params.forget_delay + 1;
|
||||
m_active_track_ids.erase(track_id);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Tracker::EraseTrackIfItWasLostTooManyFramesAgo(size_t track_id) {
|
||||
if (m_tracks.find(track_id) == m_tracks.end())
|
||||
return true;
|
||||
if (m_tracks.at(track_id).lost > m_params.forget_delay) {
|
||||
m_active_track_ids.erase(track_id);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Tracker::UptateLostTrackAndEraseIfItsNeeded(size_t track_id) {
|
||||
m_tracks.at(track_id).lost++;
|
||||
bool erased = EraseTrackIfBBoxIsOutOfFrame(track_id);
|
||||
if (!erased) erased = EraseTrackIfItWasLostTooManyFramesAgo(track_id);
|
||||
return erased;
|
||||
}
|
||||
|
||||
void Tracker::UpdateLostTracks(const std::set<size_t>& track_ids) {
|
||||
for (auto track_id : track_ids) {
|
||||
UptateLostTrackAndEraseIfItsNeeded(track_id);
|
||||
}
|
||||
}
|
||||
|
||||
void Tracker::Process(const cv::Mat& frame, const TrackedObjects& detections, int frame_idx) {
|
||||
if (m_frame_size == cv::Size()) {
|
||||
m_frame_size = frame.size();
|
||||
} else {
|
||||
CV_Assert(m_frame_size == frame.size());
|
||||
}
|
||||
|
||||
FilterDetectionsAndStore(detections);
|
||||
for (auto& obj : m_detections) {
|
||||
obj.frame_idx = frame_idx;
|
||||
}
|
||||
|
||||
auto active_tracks = m_active_track_ids;
|
||||
|
||||
if (!active_tracks.empty() && !m_detections.empty()) {
|
||||
std::set<size_t> unmatched_tracks, unmatched_detections;
|
||||
std::set<std::tuple<size_t, size_t, float>> matches;
|
||||
|
||||
SolveAssignmentProblem(active_tracks, m_detections, &unmatched_tracks,
|
||||
&unmatched_detections, &matches);
|
||||
|
||||
for (const auto& match : matches) {
|
||||
size_t track_id = std::get<0>(match);
|
||||
size_t det_id = std::get<1>(match);
|
||||
float conf = std::get<2>(match);
|
||||
if (conf > m_params.affinity_thr) {
|
||||
AppendToTrack(track_id, m_detections[det_id]);
|
||||
unmatched_detections.erase(det_id);
|
||||
} else {
|
||||
unmatched_tracks.insert(track_id);
|
||||
}
|
||||
}
|
||||
|
||||
AddNewTracks(m_detections, unmatched_detections);
|
||||
UpdateLostTracks(unmatched_tracks);
|
||||
|
||||
for (size_t id : active_tracks) {
|
||||
EraseTrackIfBBoxIsOutOfFrame(id);
|
||||
}
|
||||
} else {
|
||||
AddNewTracks(m_detections);
|
||||
UpdateLostTracks(active_tracks);
|
||||
}
|
||||
|
||||
if (m_params.drop_forgotten_tracks)
|
||||
DropForgottenTracks();
|
||||
}
|
||||
|
||||
void Tracker::DropForgottenTracks() {
|
||||
std::unordered_map<size_t, Track> new_tracks;
|
||||
std::set<size_t> new_active_tracks;
|
||||
|
||||
size_t max_id = 0;
|
||||
if (!m_active_track_ids.empty())
|
||||
max_id = *std::max_element(m_active_track_ids.begin(), m_active_track_ids.end());
|
||||
|
||||
const size_t kMaxTrackID = 10000;
|
||||
bool reassign_id = max_id > kMaxTrackID;
|
||||
|
||||
size_t counter = 0;
|
||||
for (const auto& pair : m_tracks) {
|
||||
if (!IsTrackForgotten(pair.first)) {
|
||||
new_tracks.emplace(reassign_id ? counter : pair.first, pair.second);
|
||||
new_active_tracks.emplace(reassign_id ? counter : pair.first);
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
m_tracks.swap(new_tracks);
|
||||
m_active_track_ids.swap(new_active_tracks);
|
||||
|
||||
m_tracks_counter = reassign_id ? counter : m_tracks_counter;
|
||||
}
|
||||
|
||||
float Tracker::ShapeAffinity(const cv::Rect& trk, const cv::Rect& det) {
|
||||
float w_dist = static_cast<float>(std::fabs(trk.width - det.width)) / static_cast<float>(trk.width + det.width);
|
||||
float h_dist = static_cast<float>(std::fabs(trk.height - det.height)) / static_cast<float>(trk.height + det.height);
|
||||
return exp(-m_params.shape_affinity_w * (w_dist + h_dist));
|
||||
}
|
||||
|
||||
float Tracker::MotionAffinity(const cv::Rect& trk, const cv::Rect& det) {
|
||||
float x_dist = static_cast<float>(trk.x - det.x) * (trk.x - det.x) / (det.width * det.width);
|
||||
float y_dist = static_cast<float>(trk.y - det.y) * (trk.y - det.y) / (det.height * det.height);
|
||||
return exp(-m_params.motion_affinity_w * (x_dist + y_dist));
|
||||
}
|
||||
|
||||
void Tracker::ComputeDissimilarityMatrix(
|
||||
const std::set<size_t>& active_tracks, const TrackedObjects& detections, cv::Mat* dissimilarity_matrix) {
|
||||
dissimilarity_matrix->create(active_tracks.size(), detections.size(), CV_32F);
|
||||
size_t i = 0;
|
||||
for (auto id : active_tracks) {
|
||||
auto ptr = dissimilarity_matrix->ptr<float>(i);
|
||||
for (size_t j = 0; j < detections.size(); j++) {
|
||||
auto last_det = m_tracks.at(id).objects.back();
|
||||
ptr[j] = Distance(last_det, detections[j]);
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
void Tracker::AddNewTracks(const TrackedObjects& detections) {
|
||||
for (size_t i = 0; i < detections.size(); i++) {
|
||||
AddNewTrack(detections[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void Tracker::AddNewTracks(const TrackedObjects& detections, const std::set<size_t>& ids) {
|
||||
for (size_t i : ids) {
|
||||
CV_Assert(i < detections.size());
|
||||
AddNewTrack(detections[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void Tracker::AddNewTrack(const TrackedObject& detection) {
|
||||
auto detection_with_id = detection;
|
||||
detection_with_id.object_id = m_tracks_counter;
|
||||
m_tracks.emplace(std::pair<size_t, Track>(m_tracks_counter, Track({detection_with_id})));
|
||||
|
||||
m_active_track_ids.insert(m_tracks_counter);
|
||||
m_tracks_counter++;
|
||||
}
|
||||
|
||||
void Tracker::AppendToTrack(size_t track_id, const TrackedObject& detection) {
|
||||
CV_Assert(!IsTrackForgotten(track_id));
|
||||
|
||||
auto detection_with_id = detection;
|
||||
detection_with_id.object_id = track_id;
|
||||
|
||||
auto& track = m_tracks.at(track_id);
|
||||
|
||||
track.objects.emplace_back(detection_with_id);
|
||||
track.lost = 0;
|
||||
track.length++;
|
||||
|
||||
if (m_params.max_num_objects_in_track > 0) {
|
||||
while (track.size() > static_cast<size_t>(m_params.max_num_objects_in_track)) {
|
||||
track.objects.erase(track.objects.begin());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float Tracker::Distance(const TrackedObject& obj1, const TrackedObject& obj2) {
|
||||
const float eps = 1e-6f;
|
||||
float shp_aff = ShapeAffinity(obj1.rect, obj2.rect);
|
||||
if (shp_aff < eps) return 1.0;
|
||||
|
||||
float mot_aff = MotionAffinity(obj1.rect, obj2.rect);
|
||||
if (mot_aff < eps) return 1.0;
|
||||
|
||||
return 1.0f - shp_aff * mot_aff;
|
||||
}
|
||||
|
||||
bool Tracker::IsTrackValid(size_t id) const {
|
||||
const auto& track = m_tracks.at(id);
|
||||
const auto& objects = track.objects;
|
||||
if (objects.empty()) {
|
||||
return false;
|
||||
}
|
||||
size_t duration_frames = objects.back().frame_idx - track.first_object.frame_idx;
|
||||
if (duration_frames < m_params.min_track_duration)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Tracker::IsTrackForgotten(size_t id) const {
|
||||
return m_tracks.at(id).lost > m_params.forget_delay;
|
||||
}
|
||||
|
||||
void Tracker::Reset() {
|
||||
m_active_track_ids.clear();
|
||||
m_tracks.clear();
|
||||
|
||||
m_detections.clear();
|
||||
|
||||
m_tracks_counter = 0;
|
||||
|
||||
m_frame_size = cv::Size();
|
||||
}
|
||||
|
||||
TrackedObjects Tracker::TrackedDetections() const {
|
||||
TrackedObjects detections;
|
||||
for (size_t idx : active_track_ids()) {
|
||||
auto track = tracks().at(idx);
|
||||
if (IsTrackValid(idx) && !track.lost) {
|
||||
detections.emplace_back(track.objects.back());
|
||||
}
|
||||
}
|
||||
return detections;
|
||||
}
|
||||
|
||||
TrackedObjects Tracker::TrackedDetectionsWithLabels() const {
|
||||
TrackedObjects detections;
|
||||
for (size_t idx : active_track_ids()) {
|
||||
const auto& track = tracks().at(idx);
|
||||
if (IsTrackValid(idx) && !track.lost) {
|
||||
TrackedObject object = track.objects.back();
|
||||
int counter = 1;
|
||||
size_t start = static_cast<int>(track.objects.size()) >= m_params.averaging_window_size_for_rects ?
|
||||
track.objects.size() - m_params.averaging_window_size_for_rects : 0;
|
||||
|
||||
for (size_t i = start; i < track.objects.size() - 1; i++) {
|
||||
object.rect.width += track.objects[i].rect.width;
|
||||
object.rect.height += track.objects[i].rect.height;
|
||||
object.rect.x += track.objects[i].rect.x;
|
||||
object.rect.y += track.objects[i].rect.y;
|
||||
counter++;
|
||||
}
|
||||
object.rect.width /= counter;
|
||||
object.rect.height /= counter;
|
||||
object.rect.x /= counter;
|
||||
object.rect.y /= counter;
|
||||
|
||||
object.label = LabelWithMaxFrequencyInTrack(track, m_params.averaging_window_size_for_labels);
|
||||
object.object_id = idx;
|
||||
|
||||
detections.push_back(object);
|
||||
}
|
||||
}
|
||||
return detections;
|
||||
}
|
||||
|
||||
int LabelWithMaxFrequencyInTrack(const Track& track, int window_size) {
|
||||
std::unordered_map<int, int> frequencies;
|
||||
int max_frequent_count = 0;
|
||||
int max_frequent_id = TrackedObject::UNKNOWN_LABEL_IDX;
|
||||
|
||||
int start = static_cast<int>(track.objects.size()) >= window_size ?
|
||||
static_cast<int>(track.objects.size()) - window_size : 0;
|
||||
|
||||
for (size_t i = start; i < track.objects.size(); i++) {
|
||||
const auto& detection = track.objects[i];
|
||||
if (detection.label == TrackedObject::UNKNOWN_LABEL_IDX)
|
||||
continue;
|
||||
int count = ++frequencies[detection.label];
|
||||
if (count > max_frequent_count) {
|
||||
max_frequent_count = count;
|
||||
max_frequent_id = detection.label;
|
||||
}
|
||||
}
|
||||
return max_frequent_id;
|
||||
}
|
||||
|
||||
std::vector<Track> UpdateTrackLabelsToBestAndFilterOutUnknowns(const std::vector<Track>& tracks) {
|
||||
std::vector<Track> new_tracks;
|
||||
for (auto& track : tracks) {
|
||||
int best_label = LabelWithMaxFrequencyInTrack(track, std::numeric_limits<int>::max());
|
||||
if (best_label == TrackedObject::UNKNOWN_LABEL_IDX)
|
||||
continue;
|
||||
|
||||
Track new_track = track;
|
||||
|
||||
for (auto& obj : new_track.objects) {
|
||||
obj.label = best_label;
|
||||
}
|
||||
new_track.first_object.label = best_label;
|
||||
|
||||
new_tracks.emplace_back(std::move(new_track));
|
||||
}
|
||||
return new_tracks;
|
||||
}
|
||||
|
||||
const std::unordered_map<size_t, Track>& Tracker::tracks() const {
|
||||
return m_tracks;
|
||||
}
|
||||
|
||||
std::vector<Track> Tracker::vector_tracks() const {
|
||||
std::set<size_t> keys;
|
||||
for (auto& cur_pair : tracks()) {
|
||||
keys.insert(cur_pair.first);
|
||||
}
|
||||
std::vector<Track> vec_tracks;
|
||||
for (size_t k : keys) {
|
||||
vec_tracks.push_back(tracks().at(k));
|
||||
}
|
||||
return vec_tracks;
|
||||
}
|
||||
187
engines/OpenVINOEngine/src/utils/args_helper.cpp
Normal file
187
engines/OpenVINOEngine/src/utils/args_helper.cpp
Normal file
@@ -0,0 +1,187 @@
|
||||
// Copyright (C) 2018-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "utils/args_helper.hpp"
|
||||
#include "utils/slog.hpp"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "w_dirent.hpp"
|
||||
#else
|
||||
#include <dirent.h>
|
||||
#endif
|
||||
|
||||
//#include <gflags/gflags.h>
|
||||
|
||||
#include <sys/stat.h>
|
||||
#include <map>
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <cctype>
|
||||
#include <sstream>
|
||||
|
||||
void readInputFilesArguments(std::vector<std::string>& files, const std::string& arg) {
|
||||
struct stat sb;
|
||||
if (stat(arg.c_str(), &sb) != 0) {
|
||||
if (arg.compare(0, 5, "rtsp:") != 0) {
|
||||
slog::warn << "File " << arg << " cannot be opened!" << slog::endl;
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (S_ISDIR(sb.st_mode)) {
|
||||
DIR *dp;
|
||||
dp = opendir(arg.c_str());
|
||||
if (dp == nullptr) {
|
||||
slog::warn << "Directory " << arg << " cannot be opened!" << slog::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
struct dirent *ep;
|
||||
while (nullptr != (ep = readdir(dp))) {
|
||||
std::string fileName = ep->d_name;
|
||||
if (fileName == "." || fileName == "..") continue;
|
||||
files.push_back(arg + "/" + ep->d_name);
|
||||
}
|
||||
closedir(dp);
|
||||
} else {
|
||||
files.push_back(arg);
|
||||
}
|
||||
}
|
||||
|
||||
//void parseInputFilesArguments(std::vector<std::string>& files) {
|
||||
// std::vector<std::string> args = gflags::GetArgvs();
|
||||
// bool readArguments = false;
|
||||
// for (size_t i = 0; i < args.size(); i++) {
|
||||
// if (args.at(i) == "-i" || args.at(i) == "--i") {
|
||||
// readArguments = true;
|
||||
// continue;
|
||||
// }
|
||||
// if (!readArguments) {
|
||||
// continue;
|
||||
// }
|
||||
// if (args.at(i).c_str()[0] == '-') {
|
||||
// break;
|
||||
// }
|
||||
// readInputFilesArguments(files, args.at(i));
|
||||
// }
|
||||
//}
|
||||
|
||||
std::vector<std::string> split(const std::string& s, char delim) {
|
||||
std::vector<std::string> result;
|
||||
std::stringstream ss(s);
|
||||
std::string item;
|
||||
|
||||
while (getline(ss, item, delim)) {
|
||||
result.push_back(item);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void split(const std::string& s, char delim, std::vector<float> &out) {
|
||||
std::stringstream ss(s);
|
||||
std::string item;
|
||||
|
||||
while (getline(ss, item, delim)) {
|
||||
try {
|
||||
out.push_back(std::stof(item));
|
||||
} catch (...) {
|
||||
std::cerr<<"cannot split the string: \"" + s + "\" onto floats";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class It>
|
||||
static std::string merge_impl(It begin, It end, const char* delim) {
|
||||
std::stringstream ss;
|
||||
std::copy(begin, end, std::ostream_iterator<std::string>(ss, delim));
|
||||
std::string result = ss.str();
|
||||
if (!result.empty()) {
|
||||
result.resize(result.size() - strlen(delim));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
std::string merge(std::initializer_list<std::string> list, const char* delim) {
|
||||
return merge_impl(list.begin(), list.end(), delim);
|
||||
}
|
||||
|
||||
std::string merge(const std::vector<std::string> &list, const char *delim) {
|
||||
return merge_impl(list.begin(), list.end(), delim);
|
||||
}
|
||||
|
||||
std::vector<std::string> parseDevices(const std::string& device_string) {
|
||||
const std::string::size_type colon_position = device_string.find(":");
|
||||
if (colon_position != std::string::npos) {
|
||||
std::string device_type = device_string.substr(0, colon_position);
|
||||
if (device_type == "HETERO" || device_type == "MULTI") {
|
||||
std::string comma_separated_devices = device_string.substr(colon_position + 1);
|
||||
std::vector<std::string> devices = split(comma_separated_devices, ',');
|
||||
for (auto& device : devices)
|
||||
device = device.substr(0, device.find("("));
|
||||
return devices;
|
||||
}
|
||||
}
|
||||
return {device_string};
|
||||
}
|
||||
|
||||
// Format: <device1>:<value1>,<device2>:<value2> or just <value>
|
||||
std::map<std::string, int32_t> parseValuePerDevice(const std::set<std::string>& devices,
|
||||
const std::string& values_string) {
|
||||
auto values_string_upper = values_string;
|
||||
std::transform(values_string_upper.begin(),
|
||||
values_string_upper.end(),
|
||||
values_string_upper.begin(),
|
||||
[](unsigned char c){ return std::toupper(c); });
|
||||
std::map<std::string, int32_t> result;
|
||||
auto device_value_strings = split(values_string_upper, ',');
|
||||
for (auto& device_value_string : device_value_strings) {
|
||||
auto device_value_vec = split(device_value_string, ':');
|
||||
if (device_value_vec.size() == 2) {
|
||||
auto it = std::find(devices.begin(), devices.end(), device_value_vec.at(0));
|
||||
if (it != devices.end()) {
|
||||
result[device_value_vec.at(0)] = std::stoi(device_value_vec.at(1));
|
||||
}
|
||||
} else if (device_value_vec.size() == 1) {
|
||||
uint32_t value = std::stoi(device_value_vec.at(0));
|
||||
for (const auto& device : devices) {
|
||||
result[device] = value;
|
||||
}
|
||||
} else if (device_value_vec.size() != 0) {
|
||||
std::cerr<<"Unknown string format: " << values_string;
|
||||
return {};
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
cv::Size stringToSize(const std::string& str) {
|
||||
std::vector<std::string> strings = split(str, 'x');
|
||||
if (strings.size() != 2) {
|
||||
return cv::Size(0, 0);
|
||||
}
|
||||
return {std::stoi(strings[0]), std::stoi(strings[1])};
|
||||
}
|
||||
|
||||
std::map<std::string, ov::Layout> parseLayoutString(const std::string& layout_string) {
|
||||
// Parse parameter string like "input0:NCHW,input1:NC" or "NCHW" (applied to all
|
||||
// inputs)
|
||||
std::map<std::string, ov::Layout> layouts;
|
||||
std::string searchStr = (layout_string.find_last_of(':') == std::string::npos && !layout_string.empty() ?
|
||||
":" : "") + layout_string;
|
||||
auto colonPos = searchStr.find_last_of(':');
|
||||
while (colonPos != std::string::npos) {
|
||||
auto startPos = searchStr.find_last_of(',');
|
||||
auto inputName = searchStr.substr(startPos + 1, colonPos - startPos - 1);
|
||||
auto inputLayout = searchStr.substr(colonPos + 1);
|
||||
layouts[inputName] = ov::Layout(inputLayout);
|
||||
searchStr = searchStr.substr(0, startPos + 1);
|
||||
if (searchStr.empty() || searchStr.back() != ',') {
|
||||
break;
|
||||
}
|
||||
searchStr.pop_back();
|
||||
colonPos = searchStr.find_last_of(':');
|
||||
}
|
||||
if (!searchStr.empty()) {
|
||||
return layouts;
|
||||
}
|
||||
return layouts;
|
||||
}
|
||||
65
engines/OpenVINOEngine/src/utils/common.cpp
Normal file
65
engines/OpenVINOEngine/src/utils/common.cpp
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
// Copyright (C) 2023-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include "utils/common.hpp"
|
||||
|
||||
std::vector<unsigned> loadClassIndices(const std::string &groundtruth_filepath,
|
||||
const std::vector<std::string> &imageNames)
|
||||
{
|
||||
std::vector<unsigned> classIndices;
|
||||
if (groundtruth_filepath.empty()) {
|
||||
classIndices.resize(imageNames.size(), 0);
|
||||
} else {
|
||||
std::map<std::string, unsigned> classIndicesMap;
|
||||
std::ifstream inputGtFile(groundtruth_filepath);
|
||||
if (!inputGtFile.is_open()) {
|
||||
throw std::runtime_error("Can't open the ground truth file: " + groundtruth_filepath);
|
||||
}
|
||||
|
||||
std::string line;
|
||||
while (std::getline(inputGtFile, line)) {
|
||||
size_t separatorIdx = line.find(' ');
|
||||
if (separatorIdx == std::string::npos) {
|
||||
throw std::runtime_error("The ground truth file has incorrect format.");
|
||||
}
|
||||
std::string imagePath = line.substr(0, separatorIdx);
|
||||
size_t imagePathEndIdx = imagePath.rfind('/');
|
||||
unsigned classIndex = static_cast<unsigned>(std::stoul(line.substr(separatorIdx + 1)));
|
||||
if ((imagePathEndIdx != 1 || imagePath[0] != '.') && imagePathEndIdx != std::string::npos) {
|
||||
throw std::runtime_error("The ground truth file has incorrect format.");
|
||||
}
|
||||
// std::map type for classIndicesMap guarantees to sort out images by name.
|
||||
// The same logic is applied in openImagesCapture() for DirReader source type,
|
||||
// which produces data for sorted pictures.
|
||||
// To be coherent in detection of ground truth for pictures we have to
|
||||
// use the same sorting approach for a source and ground truth data
|
||||
// If you're going to copy paste this code, remember that pictures need to be sorted
|
||||
classIndicesMap.insert({imagePath.substr(imagePathEndIdx + 1), classIndex});
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < imageNames.size(); i++) {
|
||||
auto imageSearchResult = classIndicesMap.find(imageNames[i]);
|
||||
if (imageSearchResult != classIndicesMap.end()) {
|
||||
classIndices.push_back(imageSearchResult->second);
|
||||
} else {
|
||||
throw std::runtime_error("No class specified for image " + imageNames[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return classIndices;
|
||||
}
|
||||
100
engines/OpenVINOEngine/src/utils/config_factory.cpp
Normal file
100
engines/OpenVINOEngine/src/utils/config_factory.cpp
Normal file
@@ -0,0 +1,100 @@
|
||||
/*
|
||||
// Copyright (C) 2020-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "utils/config_factory.h"
|
||||
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <openvino/runtime/intel_gpu/properties.hpp>
|
||||
|
||||
#include "utils/args_helper.hpp"
|
||||
|
||||
std::set<std::string> ModelConfig::getDevices() {
|
||||
if (devices.empty()) {
|
||||
for (const std::string& device : parseDevices(deviceName)) {
|
||||
devices.insert(device);
|
||||
}
|
||||
}
|
||||
|
||||
return devices;
|
||||
}
|
||||
|
||||
ModelConfig ConfigFactory::getUserConfig(const std::string& flags_d,
|
||||
uint32_t flags_nireq,
|
||||
const std::string& flags_nstreams,
|
||||
uint32_t flags_nthreads) {
|
||||
auto config = getCommonConfig(flags_d, flags_nireq);
|
||||
std::map<std::string, int> deviceNstreams = parseValuePerDevice(config.getDevices(), flags_nstreams);
|
||||
for (const auto& device : config.getDevices()) {
|
||||
if (device == "CPU") { // CPU supports a few special performance-oriented keys
|
||||
// limit threading for CPU portion of inference
|
||||
if (flags_nthreads != 0)
|
||||
config.compiledModelConfig.emplace(ov::inference_num_threads.name(), flags_nthreads);
|
||||
//config.compiledModelConfig.emplace(ov::affinity.name(), ov::Affinity::NONE);
|
||||
//config.compiledModelConfig.emplace(ov::hint::enable_cpu_pinning); // Do not know how to use it
|
||||
ov::streams::Num nstreams =
|
||||
deviceNstreams.count(device) > 0 ? ov::streams::Num(deviceNstreams[device]) : ov::streams::AUTO;
|
||||
config.compiledModelConfig.emplace(ov::streams::num.name(), nstreams);
|
||||
} else if (device == "GPU") {
|
||||
ov::streams::Num nstreams =
|
||||
deviceNstreams.count(device) > 0 ? ov::streams::Num(deviceNstreams[device]) : ov::streams::AUTO;
|
||||
config.compiledModelConfig.emplace(ov::streams::num.name(), nstreams);
|
||||
if (flags_d.find("MULTI") != std::string::npos &&
|
||||
config.getDevices().find("CPU") != config.getDevices().end()) {
|
||||
// multi-device execution with the CPU + GPU performs best with GPU throttling hint,
|
||||
// which releases another CPU thread (that is otherwise used by the GPU driver for active polling)
|
||||
config.compiledModelConfig.emplace(ov::intel_gpu::hint::queue_throttle.name(),
|
||||
ov::intel_gpu::hint::ThrottleLevel(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
return config;
|
||||
}
|
||||
|
||||
ModelConfig ConfigFactory::getMinLatencyConfig(const std::string& flags_d, uint32_t flags_nireq) {
|
||||
auto config = getCommonConfig(flags_d, flags_nireq);
|
||||
for (const auto& device : config.getDevices()) {
|
||||
if (device == "CPU") { // CPU supports a few special performance-oriented keys
|
||||
config.compiledModelConfig.emplace(ov::streams::num.name(), 1);
|
||||
} else if (device == "GPU") {
|
||||
config.compiledModelConfig.emplace(ov::streams::num.name(), 1);
|
||||
}
|
||||
}
|
||||
return config;
|
||||
}
|
||||
|
||||
ModelConfig ConfigFactory::getCommonConfig(const std::string& flags_d, uint32_t flags_nireq) {
|
||||
ModelConfig config;
|
||||
|
||||
if (!flags_d.empty()) {
|
||||
config.deviceName = flags_d;
|
||||
}
|
||||
|
||||
config.maxAsyncRequests = flags_nireq;
|
||||
|
||||
return config;
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> ModelConfig::getLegacyConfig() const {
|
||||
std::map<std::string, std::string> config;
|
||||
for (const auto& item : compiledModelConfig) {
|
||||
config[item.first] = item.second.as<std::string>();
|
||||
}
|
||||
return config;
|
||||
}
|
||||
55
engines/OpenVINOEngine/src/utils/image_utils.cpp
Normal file
55
engines/OpenVINOEngine/src/utils/image_utils.cpp
Normal file
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
// Copyright (C) 2021-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "utils/image_utils.h"
|
||||
|
||||
cv::Mat resizeImageExt(const cv::Mat& mat, int width, int height, RESIZE_MODE resizeMode,
|
||||
cv::InterpolationFlags interpolationMode, cv::Rect* roi, cv::Scalar BorderConstant) {
|
||||
if (width == mat.cols && height == mat.rows) {
|
||||
return mat;
|
||||
}
|
||||
|
||||
cv::Mat dst;
|
||||
|
||||
switch (resizeMode) {
|
||||
case RESIZE_FILL:
|
||||
{
|
||||
cv::resize(mat, dst, cv::Size(width, height), interpolationMode);
|
||||
if (roi) {
|
||||
*roi = cv::Rect(0, 0, width, height);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RESIZE_KEEP_ASPECT:
|
||||
case RESIZE_KEEP_ASPECT_LETTERBOX:
|
||||
{
|
||||
double scale = std::min(static_cast<double>(width) / mat.cols, static_cast<double>(height) / mat.rows);
|
||||
cv::Mat resizedImage;
|
||||
cv::resize(mat, resizedImage, cv::Size(0, 0), scale, scale, interpolationMode);
|
||||
|
||||
int dx = resizeMode == RESIZE_KEEP_ASPECT ? 0 : (width - resizedImage.cols) / 2;
|
||||
int dy = resizeMode == RESIZE_KEEP_ASPECT ? 0 : (height - resizedImage.rows) / 2;
|
||||
|
||||
cv::copyMakeBorder(resizedImage, dst, dy, height - resizedImage.rows - dy,
|
||||
dx, width - resizedImage.cols - dx, cv::BORDER_CONSTANT, BorderConstant);
|
||||
if (roi) {
|
||||
*roi = cv::Rect(dx, dy, resizedImage.cols, resizedImage.rows);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
326
engines/OpenVINOEngine/src/utils/images_capture.cpp
Normal file
326
engines/OpenVINOEngine/src/utils/images_capture.cpp
Normal file
@@ -0,0 +1,326 @@
|
||||
// Copyright (C) 2020-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include "utils/images_capture.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
# include "w_dirent.hpp"
|
||||
#else
|
||||
# include <dirent.h> // for closedir, dirent, opendir, readdir, DIR
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <opencv2/imgcodecs.hpp>
|
||||
#include <opencv2/videoio.hpp>
|
||||
|
||||
class InvalidInput : public std::runtime_error {
|
||||
public:
|
||||
explicit InvalidInput(const std::string& message) noexcept : std::runtime_error(message) {}
|
||||
};
|
||||
|
||||
class OpenError : public std::runtime_error {
|
||||
public:
|
||||
explicit OpenError(const std::string& message) noexcept : std::runtime_error(message) {}
|
||||
};
|
||||
|
||||
class ImreadWrapper : public ImagesCapture {
|
||||
cv::Mat img;
|
||||
bool canRead;
|
||||
|
||||
public:
|
||||
ImreadWrapper(const std::string& input, bool loop) : ImagesCapture{loop}, canRead{true} {
|
||||
auto startTime = std::chrono::steady_clock::now();
|
||||
|
||||
std::ifstream file(input.c_str());
|
||||
if (!file.good())
|
||||
throw InvalidInput("Can't find the image by " + input);
|
||||
|
||||
img = cv::imread(input);
|
||||
if (!img.data)
|
||||
throw OpenError("Can't open the image from " + input);
|
||||
else
|
||||
readerMetrics.update(startTime);
|
||||
}
|
||||
|
||||
double fps() const override {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
std::string getType() const override {
|
||||
return "IMAGE";
|
||||
}
|
||||
|
||||
cv::Mat read() override {
|
||||
if (loop)
|
||||
return img.clone();
|
||||
if (canRead) {
|
||||
canRead = false;
|
||||
return img.clone();
|
||||
}
|
||||
return cv::Mat{};
|
||||
}
|
||||
};
|
||||
|
||||
class DirReader : public ImagesCapture {
|
||||
std::vector<std::string> names;
|
||||
size_t fileId;
|
||||
size_t nextImgId;
|
||||
const size_t initialImageId;
|
||||
const size_t readLengthLimit;
|
||||
const std::string input;
|
||||
|
||||
public:
|
||||
DirReader(const std::string& input, bool loop, size_t initialImageId, size_t readLengthLimit)
|
||||
: ImagesCapture{loop},
|
||||
fileId{0},
|
||||
nextImgId{0},
|
||||
initialImageId{initialImageId},
|
||||
readLengthLimit{readLengthLimit},
|
||||
input{input} {
|
||||
DIR* dir = opendir(input.c_str());
|
||||
if (!dir)
|
||||
throw InvalidInput("Can't find the dir by " + input);
|
||||
while (struct dirent* ent = readdir(dir))
|
||||
if (strcmp(ent->d_name, ".") && strcmp(ent->d_name, ".."))
|
||||
names.emplace_back(ent->d_name);
|
||||
closedir(dir);
|
||||
if (names.empty())
|
||||
throw OpenError("The dir " + input + " is empty");
|
||||
sort(names.begin(), names.end());
|
||||
size_t readImgs = 0;
|
||||
while (fileId < names.size()) {
|
||||
cv::Mat img = cv::imread(input + '/' + names[fileId]);
|
||||
if (img.data) {
|
||||
++readImgs;
|
||||
if (readImgs - 1 >= initialImageId)
|
||||
return;
|
||||
}
|
||||
++fileId;
|
||||
}
|
||||
throw OpenError("Can't read the first image from " + input);
|
||||
}
|
||||
|
||||
double fps() const override {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
std::string getType() const override {
|
||||
return "DIR";
|
||||
}
|
||||
|
||||
cv::Mat read() override {
|
||||
auto startTime = std::chrono::steady_clock::now();
|
||||
|
||||
while (fileId < names.size() && nextImgId < readLengthLimit) {
|
||||
cv::Mat img = cv::imread(input + '/' + names[fileId]);
|
||||
++fileId;
|
||||
if (img.data) {
|
||||
++nextImgId;
|
||||
readerMetrics.update(startTime);
|
||||
return img;
|
||||
}
|
||||
}
|
||||
|
||||
if (loop) {
|
||||
fileId = 0;
|
||||
size_t readImgs = 0;
|
||||
while (fileId < names.size()) {
|
||||
cv::Mat img = cv::imread(input + '/' + names[fileId]);
|
||||
++fileId;
|
||||
if (img.data) {
|
||||
++readImgs;
|
||||
if (readImgs - 1 >= initialImageId) {
|
||||
nextImgId = 1;
|
||||
readerMetrics.update(startTime);
|
||||
return img;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return cv::Mat{};
|
||||
}
|
||||
};
|
||||
|
||||
class VideoCapWrapper : public ImagesCapture {
|
||||
cv::VideoCapture cap;
|
||||
bool first_read;
|
||||
const read_type type;
|
||||
size_t nextImgId;
|
||||
const double initialImageId;
|
||||
size_t readLengthLimit;
|
||||
|
||||
public:
|
||||
VideoCapWrapper(const std::string& input, bool loop, read_type type, size_t initialImageId, size_t readLengthLimit)
|
||||
: ImagesCapture{loop},
|
||||
first_read{true},
|
||||
type{type},
|
||||
nextImgId{0},
|
||||
initialImageId{static_cast<double>(initialImageId)} {
|
||||
if (0 == readLengthLimit) {
|
||||
throw std::runtime_error("readLengthLimit must be positive");
|
||||
}
|
||||
if (cap.open(input)) {
|
||||
this->readLengthLimit = readLengthLimit;
|
||||
if (!cap.set(cv::CAP_PROP_POS_FRAMES, this->initialImageId))
|
||||
throw OpenError("Can't set the frame to begin with");
|
||||
return;
|
||||
}
|
||||
throw InvalidInput("Can't open the video from " + input);
|
||||
}
|
||||
|
||||
double fps() const override {
|
||||
return cap.get(cv::CAP_PROP_FPS);
|
||||
}
|
||||
|
||||
std::string getType() const override {
|
||||
return "VIDEO";
|
||||
}
|
||||
|
||||
cv::Mat read() override {
|
||||
auto startTime = std::chrono::steady_clock::now();
|
||||
|
||||
if (nextImgId >= readLengthLimit) {
|
||||
if (loop && cap.set(cv::CAP_PROP_POS_FRAMES, initialImageId)) {
|
||||
nextImgId = 1;
|
||||
cv::Mat img;
|
||||
cap.read(img);
|
||||
if (type == read_type::safe) {
|
||||
img = img.clone();
|
||||
}
|
||||
readerMetrics.update(startTime);
|
||||
return img;
|
||||
}
|
||||
return cv::Mat{};
|
||||
}
|
||||
cv::Mat img;
|
||||
bool success = cap.read(img);
|
||||
if (!success && first_read) {
|
||||
throw std::runtime_error("The first image can't be read");
|
||||
}
|
||||
first_read = false;
|
||||
if (!success && loop && cap.set(cv::CAP_PROP_POS_FRAMES, initialImageId)) {
|
||||
nextImgId = 1;
|
||||
cap.read(img);
|
||||
} else {
|
||||
++nextImgId;
|
||||
}
|
||||
if (type == read_type::safe) {
|
||||
img = img.clone();
|
||||
}
|
||||
readerMetrics.update(startTime);
|
||||
return img;
|
||||
}
|
||||
};
|
||||
|
||||
class CameraCapWrapper : public ImagesCapture {
|
||||
cv::VideoCapture cap;
|
||||
const read_type type;
|
||||
size_t nextImgId;
|
||||
size_t readLengthLimit;
|
||||
|
||||
public:
|
||||
CameraCapWrapper(const std::string& input,
|
||||
bool loop,
|
||||
read_type type,
|
||||
size_t readLengthLimit,
|
||||
cv::Size cameraResolution)
|
||||
: ImagesCapture{loop},
|
||||
type{type},
|
||||
nextImgId{0} {
|
||||
if (0 == readLengthLimit) {
|
||||
throw std::runtime_error("readLengthLimit must be positive");
|
||||
}
|
||||
try {
|
||||
if (cap.open(std::stoi(input))) {
|
||||
this->readLengthLimit = loop ? std::numeric_limits<size_t>::max() : readLengthLimit;
|
||||
cap.set(cv::CAP_PROP_BUFFERSIZE, 1);
|
||||
cap.set(cv::CAP_PROP_FRAME_WIDTH, cameraResolution.width);
|
||||
cap.set(cv::CAP_PROP_FRAME_HEIGHT, cameraResolution.height);
|
||||
cap.set(cv::CAP_PROP_AUTOFOCUS, true);
|
||||
cap.set(cv::CAP_PROP_FOURCC, cv::VideoWriter::fourcc('M', 'J', 'P', 'G'));
|
||||
return;
|
||||
}
|
||||
throw OpenError("Can't open the camera from " + input);
|
||||
} catch (const std::invalid_argument&) {
|
||||
throw InvalidInput("Can't find the camera " + input);
|
||||
} catch (const std::out_of_range&) { throw InvalidInput("Can't find the camera " + input); }
|
||||
}
|
||||
|
||||
double fps() const override {
|
||||
return cap.get(cv::CAP_PROP_FPS) > 0 ? cap.get(cv::CAP_PROP_FPS) : 30;
|
||||
}
|
||||
|
||||
std::string getType() const override {
|
||||
return "CAMERA";
|
||||
}
|
||||
|
||||
cv::Mat read() override {
|
||||
auto startTime = std::chrono::steady_clock::now();
|
||||
|
||||
if (nextImgId >= readLengthLimit) {
|
||||
return cv::Mat{};
|
||||
}
|
||||
cv::Mat img;
|
||||
if (!cap.read(img)) {
|
||||
throw std::runtime_error("The image can't be captured from the camera");
|
||||
}
|
||||
if (type == read_type::safe) {
|
||||
img = img.clone();
|
||||
}
|
||||
++nextImgId;
|
||||
|
||||
readerMetrics.update(startTime);
|
||||
return img;
|
||||
}
|
||||
};
|
||||
|
||||
std::unique_ptr<ImagesCapture> openImagesCapture(const std::string& input,
|
||||
bool loop,
|
||||
read_type type,
|
||||
size_t initialImageId,
|
||||
size_t readLengthLimit,
|
||||
cv::Size cameraResolution) {
|
||||
if (readLengthLimit == 0)
|
||||
throw std::runtime_error{"Read length limit must be positive"};
|
||||
std::vector<std::string> invalidInputs, openErrors;
|
||||
try {
|
||||
return std::unique_ptr<ImagesCapture>(new ImreadWrapper{input, loop});
|
||||
} catch (const InvalidInput& e) { invalidInputs.push_back(e.what()); } catch (const OpenError& e) {
|
||||
openErrors.push_back(e.what());
|
||||
}
|
||||
|
||||
try {
|
||||
return std::unique_ptr<ImagesCapture>(new DirReader{input, loop, initialImageId, readLengthLimit});
|
||||
} catch (const InvalidInput& e) { invalidInputs.push_back(e.what()); } catch (const OpenError& e) {
|
||||
openErrors.push_back(e.what());
|
||||
}
|
||||
|
||||
try {
|
||||
return std::unique_ptr<ImagesCapture>(new VideoCapWrapper{input, loop, type, initialImageId, readLengthLimit});
|
||||
} catch (const InvalidInput& e) { invalidInputs.push_back(e.what()); } catch (const OpenError& e) {
|
||||
openErrors.push_back(e.what());
|
||||
}
|
||||
|
||||
try {
|
||||
return std::unique_ptr<ImagesCapture>(
|
||||
new CameraCapWrapper{input, loop, type, readLengthLimit, cameraResolution});
|
||||
} catch (const InvalidInput& e) { invalidInputs.push_back(e.what()); } catch (const OpenError& e) {
|
||||
openErrors.push_back(e.what());
|
||||
}
|
||||
|
||||
std::vector<std::string> errorMessages = openErrors.empty() ? invalidInputs : openErrors;
|
||||
std::string errorsInfo;
|
||||
for (const auto& message : errorMessages) {
|
||||
errorsInfo.append(message + "\n");
|
||||
}
|
||||
throw std::runtime_error(errorsInfo);
|
||||
}
|
||||
169
engines/OpenVINOEngine/src/utils/kuhn_munkres.cpp
Normal file
169
engines/OpenVINOEngine/src/utils/kuhn_munkres.cpp
Normal file
@@ -0,0 +1,169 @@
|
||||
// Copyright (C) 2018-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include <utils/kuhn_munkres.hpp>
|
||||
|
||||
KuhnMunkres::KuhnMunkres(bool greedy) : n_(), greedy_(greedy) {}
|
||||
|
||||
std::vector<size_t> KuhnMunkres::Solve(const cv::Mat& dissimilarity_matrix) {
|
||||
CV_Assert(dissimilarity_matrix.type() == CV_32F);
|
||||
double min_val;
|
||||
cv::minMaxLoc(dissimilarity_matrix, &min_val);
|
||||
|
||||
n_ = std::max(dissimilarity_matrix.rows, dissimilarity_matrix.cols);
|
||||
dm_ = cv::Mat(n_, n_, CV_32F, cv::Scalar(0));
|
||||
marked_ = cv::Mat(n_, n_, CV_8S, cv::Scalar(0));
|
||||
points_ = std::vector<cv::Point>(n_ * 2);
|
||||
|
||||
dissimilarity_matrix.copyTo(dm_(
|
||||
cv::Rect(0, 0, dissimilarity_matrix.cols, dissimilarity_matrix.rows)));
|
||||
|
||||
is_row_visited_ = std::vector<int>(n_, 0);
|
||||
is_col_visited_ = std::vector<int>(n_, 0);
|
||||
|
||||
Run();
|
||||
|
||||
std::vector<size_t> results(dissimilarity_matrix.rows, -1);
|
||||
for (int i = 0; i < dissimilarity_matrix.rows; i++) {
|
||||
const auto ptr = marked_.ptr<char>(i);
|
||||
for (int j = 0; j < dissimilarity_matrix.cols; j++) {
|
||||
if (ptr[j] == kStar) {
|
||||
results[i] = (size_t)j;
|
||||
}
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
void KuhnMunkres::TrySimpleCase() {
|
||||
auto is_row_visited = std::vector<int>(n_, 0);
|
||||
auto is_col_visited = std::vector<int>(n_, 0);
|
||||
|
||||
for (int row = 0; row < n_; row++) {
|
||||
auto ptr = dm_.ptr<float>(row);
|
||||
auto marked_ptr = marked_.ptr<char>(row);
|
||||
auto min_val = *std::min_element(ptr, ptr + n_);
|
||||
for (int col = 0; col < n_; col++) {
|
||||
ptr[col] -= min_val;
|
||||
if (ptr[col] == 0 && !is_col_visited[col] && !is_row_visited[row]) {
|
||||
marked_ptr[col] = kStar;
|
||||
is_col_visited[col] = 1;
|
||||
is_row_visited[row] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool KuhnMunkres::CheckIfOptimumIsFound() {
|
||||
int count = 0;
|
||||
for (int i = 0; i < n_; i++) {
|
||||
const auto marked_ptr = marked_.ptr<char>(i);
|
||||
for (int j = 0; j < n_; j++) {
|
||||
if (marked_ptr[j] == kStar) {
|
||||
is_col_visited_[j] = 1;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return count >= n_;
|
||||
}
|
||||
|
||||
cv::Point KuhnMunkres::FindUncoveredMinValPos() {
|
||||
auto min_val = std::numeric_limits<float>::max();
|
||||
cv::Point min_val_pos(-1, -1);
|
||||
for (int i = 0; i < n_; i++) {
|
||||
if (!is_row_visited_[i]) {
|
||||
auto dm_ptr = dm_.ptr<float>(i);
|
||||
for (int j = 0; j < n_; j++) {
|
||||
if (!is_col_visited_[j] && dm_ptr[j] < min_val) {
|
||||
min_val = dm_ptr[j];
|
||||
min_val_pos = cv::Point(j, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return min_val_pos;
|
||||
}
|
||||
|
||||
void KuhnMunkres::UpdateDissimilarityMatrix(float val) {
|
||||
for (int i = 0; i < n_; i++) {
|
||||
auto dm_ptr = dm_.ptr<float>(i);
|
||||
for (int j = 0; j < n_; j++) {
|
||||
if (is_row_visited_[i]) dm_ptr[j] += val;
|
||||
if (!is_col_visited_[j]) dm_ptr[j] -= val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int KuhnMunkres::FindInRow(int row, int what) {
|
||||
for (int j = 0; j < n_; j++) {
|
||||
if (marked_.at<char>(row, j) == what) {
|
||||
return j;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int KuhnMunkres::FindInCol(int col, int what) {
|
||||
for (int i = 0; i < n_; i++) {
|
||||
if (marked_.at<char>(i, col) == what) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void KuhnMunkres::Run() {
|
||||
TrySimpleCase();
|
||||
if (greedy_)
|
||||
return;
|
||||
while (!CheckIfOptimumIsFound()) {
|
||||
while (true) {
|
||||
auto point = FindUncoveredMinValPos();
|
||||
auto min_val = dm_.at<float>(point.y, point.x);
|
||||
if (min_val > 0) {
|
||||
UpdateDissimilarityMatrix(min_val);
|
||||
} else {
|
||||
marked_.at<char>(point.y, point.x) = kPrime;
|
||||
int col = FindInRow(point.y, kStar);
|
||||
if (col >= 0) {
|
||||
is_row_visited_[point.y] = 1;
|
||||
is_col_visited_[col] = 0;
|
||||
} else {
|
||||
int count = 0;
|
||||
points_[count] = point;
|
||||
|
||||
while (true) {
|
||||
int row = FindInCol(points_[count].x, kStar);
|
||||
if (row >= 0) {
|
||||
count++;
|
||||
points_[count] = cv::Point(points_[count - 1].x, row);
|
||||
int col = FindInRow(points_[count].y, kPrime);
|
||||
count++;
|
||||
points_[count] = cv::Point(col, points_[count - 1].y);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < count + 1; i++) {
|
||||
auto& mark = marked_.at<char>(points_[i].y, points_[i].x);
|
||||
mark = mark == kStar ? 0 : kStar;
|
||||
}
|
||||
|
||||
is_row_visited_ = std::vector<int>(n_, 0);
|
||||
is_col_visited_ = std::vector<int>(n_, 0);
|
||||
|
||||
marked_.setTo(0, marked_ == kPrime);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
114
engines/OpenVINOEngine/src/utils/performance_metrics.cpp
Normal file
114
engines/OpenVINOEngine/src/utils/performance_metrics.cpp
Normal file
@@ -0,0 +1,114 @@
|
||||
// Copyright (C) 2020-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <limits>
|
||||
#include "utils/performance_metrics.hpp"
|
||||
#include "utils/slog.hpp"
|
||||
|
||||
// timeWindow defines the length of the timespan over which the 'current fps' value is calculated
|
||||
PerformanceMetrics::PerformanceMetrics(Duration timeWindow)
|
||||
: timeWindowSize(timeWindow)
|
||||
, firstFrameProcessed(false)
|
||||
{}
|
||||
|
||||
void PerformanceMetrics::update(TimePoint lastRequestStartTime,
|
||||
const cv::Mat& frame,
|
||||
cv::Point position,
|
||||
int fontFace,
|
||||
double fontScale,
|
||||
cv::Scalar color,
|
||||
int thickness,
|
||||
MetricTypes metricType) {
|
||||
update(lastRequestStartTime);
|
||||
paintMetrics(frame, position, fontFace, fontScale, color, thickness, metricType);
|
||||
}
|
||||
|
||||
void PerformanceMetrics::update(TimePoint lastRequestStartTime) {
|
||||
TimePoint currentTime = Clock::now();
|
||||
|
||||
if (!firstFrameProcessed) {
|
||||
lastUpdateTime = lastRequestStartTime;
|
||||
firstFrameProcessed = true;
|
||||
}
|
||||
|
||||
currentMovingStatistic.latency += currentTime - lastRequestStartTime;
|
||||
currentMovingStatistic.period = currentTime - lastUpdateTime;
|
||||
currentMovingStatistic.frameCount++;
|
||||
|
||||
if (currentTime - lastUpdateTime > timeWindowSize) {
|
||||
lastMovingStatistic = currentMovingStatistic;
|
||||
totalStatistic.combine(lastMovingStatistic);
|
||||
currentMovingStatistic = Statistic();
|
||||
|
||||
lastUpdateTime = currentTime;
|
||||
}
|
||||
}
|
||||
|
||||
void PerformanceMetrics::paintMetrics(const cv::Mat& frame, cv::Point position, int fontFace,
|
||||
double fontScale, cv::Scalar color, int thickness, MetricTypes metricType) const {
|
||||
// Draw performance stats over frame
|
||||
Metrics metrics = getLast();
|
||||
|
||||
std::ostringstream out;
|
||||
if (!std::isnan(metrics.latency) &&
|
||||
(metricType == PerformanceMetrics::MetricTypes::LATENCY || metricType == PerformanceMetrics::MetricTypes::ALL)) {
|
||||
out << "Latency: " << std::fixed << std::setprecision(1) << metrics.latency << " ms";
|
||||
putHighlightedText(frame, out.str(), position, fontFace, fontScale, color, thickness);
|
||||
}
|
||||
if (!std::isnan(metrics.fps) &&
|
||||
(metricType == PerformanceMetrics::MetricTypes::FPS || metricType == PerformanceMetrics::MetricTypes::ALL)) {
|
||||
out.str("");
|
||||
out << "FPS: " << std::fixed << std::setprecision(1) << metrics.fps;
|
||||
int offset = metricType == PerformanceMetrics::MetricTypes::ALL ? 30 : 0;
|
||||
putHighlightedText(frame, out.str(), {position.x, position.y + offset}, fontFace, fontScale, color, thickness);
|
||||
}
|
||||
}
|
||||
|
||||
PerformanceMetrics::Metrics PerformanceMetrics::getLast() const {
|
||||
Metrics metrics;
|
||||
|
||||
metrics.latency = lastMovingStatistic.frameCount != 0
|
||||
? std::chrono::duration_cast<Ms>(lastMovingStatistic.latency).count()
|
||||
/ lastMovingStatistic.frameCount
|
||||
: std::numeric_limits<double>::signaling_NaN();
|
||||
metrics.fps = lastMovingStatistic.period != Duration::zero()
|
||||
? lastMovingStatistic.frameCount
|
||||
/ std::chrono::duration_cast<Sec>(lastMovingStatistic.period).count()
|
||||
: std::numeric_limits<double>::signaling_NaN();
|
||||
|
||||
return metrics;
|
||||
}
|
||||
|
||||
PerformanceMetrics::Metrics PerformanceMetrics::getTotal() const {
|
||||
Metrics metrics;
|
||||
|
||||
int frameCount = totalStatistic.frameCount + currentMovingStatistic.frameCount;
|
||||
if (frameCount != 0) {
|
||||
metrics.latency = std::chrono::duration_cast<Ms>(
|
||||
totalStatistic.latency + currentMovingStatistic.latency).count() / frameCount;
|
||||
metrics.fps = frameCount / std::chrono::duration_cast<Sec>(
|
||||
totalStatistic.period + currentMovingStatistic.period).count();
|
||||
} else {
|
||||
metrics.latency = std::numeric_limits<double>::signaling_NaN();
|
||||
metrics.fps = std::numeric_limits<double>::signaling_NaN();
|
||||
}
|
||||
|
||||
return metrics;
|
||||
}
|
||||
|
||||
void PerformanceMetrics::logTotal() const {
|
||||
Metrics metrics = getTotal();
|
||||
|
||||
slog::info << "\tLatency: " << std::fixed << std::setprecision(1) << metrics.latency << " ms" << slog::endl;
|
||||
slog::info << "\tFPS: " << metrics.fps << slog::endl;
|
||||
}
|
||||
|
||||
void logLatencyPerStage(double readLat, double preprocLat, double inferLat, double postprocLat, double renderLat) {
|
||||
slog::info << "\tDecoding:\t" << std::fixed << std::setprecision(1) <<
|
||||
readLat << " ms" << slog::endl;
|
||||
slog::info << "\tPreprocessing:\t" << preprocLat << " ms" << slog::endl;
|
||||
slog::info << "\tInference:\t" << inferLat << " ms" << slog::endl;
|
||||
slog::info << "\tPostprocessing:\t" << postprocLat << " ms" << slog::endl;
|
||||
slog::info << "\tRendering:\t" << renderLat << " ms" << slog::endl;
|
||||
}
|
||||
180
engines/OpenVINOEngine/src/utils/visualizer.cpp
Normal file
180
engines/OpenVINOEngine/src/utils/visualizer.cpp
Normal file
@@ -0,0 +1,180 @@
|
||||
/*
|
||||
// Copyright (C) 2021-2024 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
*/
|
||||
|
||||
#include "utils/visualizer.hpp"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
#include <models/results.h>
|
||||
#include <pipelines/metadata.h>
|
||||
#include <utils/ocv_common.hpp>
|
||||
|
||||
Visualizer::Visualizer(const std::string& type) {
|
||||
if (type == "sr")
|
||||
winName = "Image Processing Demo - Super Resolution (press A for help)";
|
||||
else if (type == "jr")
|
||||
winName = "Image Processing Demo - JPEG Restoration (press A for help)";
|
||||
}
|
||||
|
||||
cv::Size Visualizer::getSize() {
|
||||
return resolution;
|
||||
}
|
||||
|
||||
void Visualizer::handleKey(int key) {
|
||||
key = std::tolower(key);
|
||||
if (key == 'a') {
|
||||
isHelpShown = !isHelpShown;
|
||||
}
|
||||
if (cv::getWindowProperty(winName, 0) < 0) {
|
||||
mode = "result";
|
||||
disableTrackbar();
|
||||
}
|
||||
if (key == 'o') {
|
||||
mode = "orig";
|
||||
addTrackbar();
|
||||
}
|
||||
if (key == 'v') {
|
||||
mode = "diff";
|
||||
addTrackbar();
|
||||
}
|
||||
if (key == 'r') {
|
||||
mode = "result";
|
||||
cv::destroyWindow(winName);
|
||||
disableTrackbar();
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat Visualizer::renderResultData(ImageResult result, cv::Size& newResolution) {
|
||||
if (!result.metaData) {
|
||||
throw std::invalid_argument("Renderer: metadata is null");
|
||||
}
|
||||
// Input image is stored inside metadata, as we put it there during submission stage
|
||||
inputImg = result.metaData->asRef<ImageMetaData>().img;
|
||||
if (inputImg.empty()) {
|
||||
throw std::invalid_argument("Renderer: image provided in metadata is empty");
|
||||
}
|
||||
|
||||
if (!isResolutionSet) {
|
||||
setResolution(newResolution);
|
||||
}
|
||||
|
||||
cv::resize(result.resultImage, result.resultImage, resolution);
|
||||
cv::resize(inputImg, inputImg, resolution);
|
||||
|
||||
if (inputImg.channels() != result.resultImage.channels()) {
|
||||
cv::cvtColor(result.resultImage, resultImg, cv::COLOR_GRAY2BGR);
|
||||
} else {
|
||||
resultImg = result.resultImage;
|
||||
}
|
||||
/* changeDisplayImg();
|
||||
displayImg=resultImg.clone();*/
|
||||
return resultImg;
|
||||
}
|
||||
|
||||
void Visualizer::show(cv::Mat img) {
|
||||
if (img.empty()) {
|
||||
changeDisplayImg();
|
||||
img = displayImg;
|
||||
}
|
||||
|
||||
if (isHelpShown) {
|
||||
int pad = 10;
|
||||
int margin = 60;
|
||||
int baseline = 0;
|
||||
int lineH = cv::getTextSize(helpMessage[0], cv::FONT_HERSHEY_COMPLEX_SMALL, 0.75, 1, &baseline).height + pad;
|
||||
for (size_t i = 0; i < 4; ++i) {
|
||||
putHighlightedText(img,
|
||||
helpMessage[i],
|
||||
cv::Point(pad, margin + baseline + (i + 1) * lineH),
|
||||
cv::FONT_HERSHEY_COMPLEX,
|
||||
0.65,
|
||||
cv::Scalar(255, 0, 0),
|
||||
2);
|
||||
}
|
||||
}
|
||||
|
||||
cv::imshow(winName, img);
|
||||
}
|
||||
|
||||
void Visualizer::changeDisplayImg() {
|
||||
displayImg = resultImg.clone();
|
||||
if (mode == "orig") {
|
||||
inputImg(cv::Rect(0, 0, slider, inputImg.rows)).copyTo(displayImg(cv::Rect(0, 0, slider, resultImg.rows)));
|
||||
markImage(displayImg, {"O", "R"}, static_cast<float>(slider) / resolution.width);
|
||||
drawSweepLine(displayImg);
|
||||
} else if (mode == "result") {
|
||||
markImage(displayImg, {"R", ""}, 1);
|
||||
} else if (mode == "diff") {
|
||||
cv::Mat diffImg;
|
||||
cv::absdiff(inputImg, resultImg, diffImg);
|
||||
double min, max;
|
||||
cv::minMaxLoc(diffImg, &min, &max);
|
||||
double scale = 255.0 / (max - min);
|
||||
diffImg = (diffImg - min) * scale;
|
||||
diffImg(cv::Rect(0, 0, slider, resultImg.rows)).copyTo(displayImg(cv::Rect(0, 0, slider, displayImg.rows)));
|
||||
markImage(displayImg, {"D", "R"}, static_cast<float>(slider) / resolution.width);
|
||||
drawSweepLine(displayImg);
|
||||
}
|
||||
}
|
||||
|
||||
void Visualizer::markImage(cv::Mat& image, const std::pair<std::string, std::string>& marks, float alpha) {
|
||||
int pad = 25;
|
||||
std::pair<float, float> positions(static_cast<float>(image.cols) * alpha / 2.0f,
|
||||
static_cast<float>(image.cols) * (1 + alpha) / 2.0f);
|
||||
putHighlightedText(image,
|
||||
marks.first,
|
||||
cv::Point(static_cast<int>(positions.first) - pad, 25),
|
||||
cv::FONT_HERSHEY_COMPLEX,
|
||||
1,
|
||||
cv::Scalar(0, 0, 255),
|
||||
2);
|
||||
putHighlightedText(image,
|
||||
marks.second,
|
||||
cv::Point(static_cast<int>(positions.second), 25),
|
||||
cv::FONT_HERSHEY_COMPLEX,
|
||||
1,
|
||||
cv::Scalar(0, 0, 255),
|
||||
2);
|
||||
}
|
||||
|
||||
void Visualizer::drawSweepLine(cv::Mat& image) {
|
||||
cv::line(image, cv::Point(slider, 0), cv::Point(slider, image.rows), cv::Scalar(0, 255, 0), 2);
|
||||
}
|
||||
|
||||
void Visualizer::setResolution(cv::Size& newResolution) {
|
||||
resolution = newResolution;
|
||||
isResolutionSet = true;
|
||||
}
|
||||
|
||||
void Visualizer::addTrackbar() {
|
||||
if (!isTrackbarShown) {
|
||||
cv::createTrackbar(trackbarName, winName, &slider, resolution.width);
|
||||
cv::setTrackbarMin(trackbarName, winName, 1);
|
||||
isTrackbarShown = true;
|
||||
}
|
||||
}
|
||||
|
||||
void Visualizer::disableTrackbar() {
|
||||
isTrackbarShown = false;
|
||||
}
|
||||
114
engines/OpenVINOEngine/src/utils/w_dirent.hpp
Normal file
114
engines/OpenVINOEngine/src/utils/w_dirent.hpp
Normal file
@@ -0,0 +1,114 @@
|
||||
// Copyright (C) 2018-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
#ifndef NOMINMAX
|
||||
# define NOMINMAX
|
||||
#endif
|
||||
|
||||
#include <WinSock2.h>
|
||||
#include <Windows.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#else
|
||||
|
||||
#include <unistd.h>
|
||||
#include <cstdlib>
|
||||
#include <string.h>
|
||||
|
||||
#endif
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <sys/stat.h>
|
||||
|
||||
#if defined(WIN32)
|
||||
// Copied from linux libc sys/stat.h:
|
||||
#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
|
||||
#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
|
||||
#endif
|
||||
|
||||
struct dirent {
|
||||
char *d_name;
|
||||
|
||||
explicit dirent(const wchar_t *wsFilePath) {
|
||||
size_t i;
|
||||
auto slen = wcslen(wsFilePath);
|
||||
d_name = static_cast<char*>(malloc(slen + 1));
|
||||
wcstombs_s(&i, d_name, slen + 1, wsFilePath, slen);
|
||||
}
|
||||
|
||||
~dirent() {
|
||||
free(d_name);
|
||||
}
|
||||
};
|
||||
|
||||
class DIR {
|
||||
WIN32_FIND_DATAA FindFileData;
|
||||
HANDLE hFind;
|
||||
dirent *next;
|
||||
|
||||
static inline bool endsWith(const std::string &src, const char *with) {
|
||||
int wl = static_cast<int>(strlen(with));
|
||||
int so = static_cast<int>(src.length()) - wl;
|
||||
if (so < 0) return false;
|
||||
return 0 == strncmp(with, &src[so], wl);
|
||||
}
|
||||
|
||||
public:
|
||||
explicit DIR(const char *dirPath) : next(nullptr) {
|
||||
std::string ws = dirPath;
|
||||
if (endsWith(ws, "\\"))
|
||||
ws += "*";
|
||||
else
|
||||
ws += "\\*";
|
||||
hFind = FindFirstFileA(ws.c_str(), &FindFileData);
|
||||
FindFileData.dwReserved0 = hFind != INVALID_HANDLE_VALUE;
|
||||
}
|
||||
|
||||
~DIR() {
|
||||
if (!next) delete next;
|
||||
FindClose(hFind);
|
||||
}
|
||||
|
||||
bool isValid() const {
|
||||
return (hFind != INVALID_HANDLE_VALUE && FindFileData.dwReserved0);
|
||||
}
|
||||
|
||||
dirent* nextEnt() {
|
||||
if (next != nullptr) delete next;
|
||||
next = nullptr;
|
||||
|
||||
if (!FindFileData.dwReserved0) return nullptr;
|
||||
|
||||
wchar_t wbuf[4096];
|
||||
|
||||
size_t outSize;
|
||||
mbstowcs_s(&outSize, wbuf, 4094, FindFileData.cFileName, 4094);
|
||||
next = new dirent(wbuf);
|
||||
FindFileData.dwReserved0 = FindNextFileA(hFind, &FindFileData);
|
||||
return next;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static DIR *opendir(const char* dirPath) {
|
||||
auto dp = new DIR(dirPath);
|
||||
if (!dp->isValid()) {
|
||||
delete dp;
|
||||
return nullptr;
|
||||
}
|
||||
return dp;
|
||||
}
|
||||
|
||||
static struct dirent *readdir(DIR *dp) {
|
||||
return dp->nextEnt();
|
||||
}
|
||||
|
||||
static void closedir(DIR *dp) {
|
||||
delete dp;
|
||||
}
|
||||
Reference in New Issue
Block a user