188 lines
6.9 KiB
C++
188 lines
6.9 KiB
C++
#pragma once
|
|
#include <openvino/op/transpose.hpp>
|
|
#include <openvino/core/node.hpp>
|
|
#include <opencv2/dnn.hpp>
|
|
#include <opencv2/imgproc.hpp>
|
|
#include <models/detection_model_ssd.h>
|
|
#include <random>
|
|
namespace Yolov10 {
|
|
struct YoloDetection {
|
|
short class_id;
|
|
float confidence;
|
|
cv::Rect box;
|
|
};
|
|
class Inference {
|
|
public:
|
|
Inference() {}
|
|
Inference(const std::string& model_path, const float& model_confidence_threshold);
|
|
Inference(const std::string& model_path, const cv::Size model_input_shape, const float& model_confidence_threshold);
|
|
|
|
std::vector<YoloDetection> RunInference(const cv::Mat& frame);
|
|
|
|
private:
|
|
void InitialModel(const std::string& model_path);
|
|
void Preprocessing(const cv::Mat& frame);
|
|
void PostProcessing();
|
|
cv::Rect GetBoundingBox(const cv::Rect& src) const;
|
|
std::vector<YoloDetection> detections_;
|
|
float model_confidence_threshold_;
|
|
cv::Mat resized_frame_;
|
|
cv::Point2f factor_;
|
|
cv::Size2f model_input_shape_;
|
|
cv::Size model_output_shape_;
|
|
std::string _modelFilePath;
|
|
ov::Tensor input_tensor_;
|
|
ov::InferRequest inference_request_;
|
|
ov::CompiledModel compiled_model_;
|
|
};
|
|
|
|
void DrawDetectedObject(cv::Mat& frame, const std::vector<YoloDetection>& detections, const std::vector<std::string>& class_names);
|
|
std::vector<std::string> GetClassNameFromMetadata(const std::string& metadata_path);
|
|
|
|
|
|
Inference::Inference(const std::string& model_path, const float& model_confidence_threshold) {
|
|
model_input_shape_ = cv::Size(640, 640); // Set the default size for models with dynamic shapes to prevent errors.
|
|
model_confidence_threshold_ = model_confidence_threshold;
|
|
_modelFilePath = model_path;
|
|
InitialModel(model_path);
|
|
}
|
|
|
|
// If the model has dynamic shapes, we need to set the input shape.
|
|
Inference::Inference(const std::string& model_path, const cv::Size model_input_shape, const float& model_confidence_threshold) {
|
|
model_input_shape_ = model_input_shape;
|
|
model_confidence_threshold_ = model_confidence_threshold;
|
|
_modelFilePath = model_path;
|
|
|
|
InitialModel(model_path);
|
|
}
|
|
|
|
|
|
void Inference::InitialModel(const std::string& model_path) {
|
|
ov::Core core;
|
|
std::shared_ptr<ov::Model> model = core.read_model(model_path);
|
|
if (model->is_dynamic()) {
|
|
model->reshape({ 1, 3, static_cast<long int>(model_input_shape_.height), static_cast<long int>(model_input_shape_.width) });
|
|
}
|
|
ov::preprocess::PrePostProcessor ppp = ov::preprocess::PrePostProcessor(model);
|
|
|
|
ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC").set_color_format(ov::preprocess::ColorFormat::BGR);
|
|
ppp.input().preprocess().convert_element_type(ov::element::f32).convert_color(ov::preprocess::ColorFormat::RGB).scale({ 255, 255, 255 });
|
|
ppp.input().model().set_layout("NCHW");
|
|
ppp.output().tensor().set_element_type(ov::element::f32);
|
|
|
|
model = ppp.build();
|
|
//compiled_model_ = core.compile_model(model, "AUTO:GPU,CPU");
|
|
//core.set_property("AUTO", ov::device::priorities("GPU,CPU"));
|
|
compiled_model_ = core.compile_model(model, "C");
|
|
std::vector<std::string> available_devices = core.get_available_devices();
|
|
auto num_requests = compiled_model_.get_property(ov::optimal_number_of_infer_requests);
|
|
|
|
inference_request_ = compiled_model_.create_infer_request();
|
|
const std::vector<ov::Output<ov::Node>> inputs = model->inputs();
|
|
const ov::Shape input_shape = inputs[0].get_shape();
|
|
|
|
short height = input_shape[1];
|
|
short width = input_shape[2];
|
|
model_input_shape_ = cv::Size2f(width, height);
|
|
|
|
const std::vector<ov::Output<ov::Node>> outputs = model->outputs();
|
|
const ov::Shape output_shape = outputs[0].get_shape();
|
|
|
|
height = output_shape[1];
|
|
width = output_shape[2];
|
|
model_output_shape_ = cv::Size(width, height);
|
|
}
|
|
|
|
std::vector<YoloDetection> Inference::RunInference(const cv::Mat& frame) {
|
|
Preprocessing(frame);
|
|
inference_request_.infer();
|
|
PostProcessing();
|
|
|
|
return detections_;
|
|
}
|
|
|
|
void Inference::Preprocessing(const cv::Mat& frame) {
|
|
cv::resize(frame, resized_frame_, model_input_shape_, 0, 0, cv::INTER_AREA);
|
|
factor_.x = static_cast<float>(frame.cols / model_input_shape_.width);
|
|
factor_.y = static_cast<float>(frame.rows / model_input_shape_.height);
|
|
float* input_data = (float*)resized_frame_.data;
|
|
input_tensor_ = ov::Tensor(compiled_model_.input().get_element_type(), compiled_model_.input().get_shape(), input_data);
|
|
inference_request_.set_input_tensor(input_tensor_);
|
|
}
|
|
|
|
void Inference::PostProcessing() {
|
|
const float* detections = inference_request_.get_output_tensor().data<const float>();
|
|
detections_.clear();
|
|
/*
|
|
* 0 1 2 3 4 5
|
|
* x, y, w. h, confidence, class_id
|
|
*/
|
|
|
|
for (unsigned int i = 0; i < model_output_shape_.height; ++i) {
|
|
const unsigned int index = i * model_output_shape_.width;
|
|
const float& confidence = detections[index + 4];
|
|
if (confidence > model_confidence_threshold_) {
|
|
const float& x = detections[index + 0];
|
|
const float& y = detections[index + 1];
|
|
const float& w = detections[index + 2];
|
|
const float& h = detections[index + 3];
|
|
YoloDetection result;
|
|
result.class_id = static_cast<const short>(detections[index + 5]);
|
|
if (result.class_id > 9) result.class_id = 9;
|
|
result.confidence = confidence;
|
|
result.box = GetBoundingBox(cv::Rect(x, y, w, h));
|
|
detections_.push_back(result);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void DrawDetectedObject(cv::Mat& frame, const std::vector<YoloDetection>& detections, const std::vector<std::string>& class_names) {
|
|
std::random_device rd;
|
|
std::mt19937 gen(rd());
|
|
std::uniform_int_distribution<int> dis(120, 255);
|
|
|
|
for (const auto& detection : detections) {
|
|
const cv::Rect& box = detection.box;
|
|
const float& confidence = detection.confidence;
|
|
const int& class_id = detection.class_id;
|
|
|
|
const cv::Scalar color = cv::Scalar(dis(gen), dis(gen), dis(gen));
|
|
cv::rectangle(frame, box, color, 3);
|
|
|
|
std::string class_string;
|
|
|
|
if (class_names.empty())
|
|
class_string = "id[" + std::to_string(class_id) + "] " + std::to_string(confidence).substr(0, 4);
|
|
else
|
|
class_string = class_names[class_id] + " " + std::to_string(confidence).substr(0, 4);
|
|
|
|
const cv::Size text_size = cv::getTextSize(class_string, cv::FONT_HERSHEY_SIMPLEX, 0.6, 2, 0);
|
|
const cv::Rect text_box(box.x - 2, box.y - 27, text_size.width + 10, text_size.height + 15);
|
|
|
|
cv::rectangle(frame, text_box, color, cv::FILLED);
|
|
cv::putText(frame, class_string, cv::Point(box.x + 5, box.y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.6, cv::Scalar(0, 0, 0), 2, 0);
|
|
}
|
|
}
|
|
|
|
std::vector<std::string> GetClassNameFromMetadata(const std::string& metadata_path) {
|
|
std::vector<std::string> class_names;
|
|
|
|
class_names.push_back("person");
|
|
class_names.push_back("bicycle");
|
|
class_names.push_back("car");
|
|
class_names.push_back("motorcycle");
|
|
class_names.push_back("airplane");
|
|
class_names.push_back("bus");
|
|
class_names.push_back("train");
|
|
class_names.push_back("truck");
|
|
class_names.push_back("boat");
|
|
class_names.push_back("traffic light");
|
|
class_names.push_back("fire hydrant");
|
|
|
|
return class_names;
|
|
}
|
|
|
|
}
|