190 lines
7.6 KiB
C
190 lines
7.6 KiB
C
|
|
#pragma once
|
||
|
|
|
||
|
|
#pragma once
|
||
|
|
|
||
|
|
/// <summary>
|
||
|
|
/// https://www.cnblogs.com/guojin-blogs/p/18284497
|
||
|
|
/// wget https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10s.pt
|
||
|
|
// yolo export model = yolov10s.pt format = onnx opset = 13 simplify
|
||
|
|
/// </summary>
|
||
|
|
|
||
|
|
|
||
|
|
#include "opencv2/opencv.hpp"
|
||
|
|
#include <openvino/op/transpose.hpp>
|
||
|
|
#include <openvino/core/node.hpp>
|
||
|
|
#include <opencv2/dnn.hpp>
|
||
|
|
#include <opencv2/imgproc.hpp>
|
||
|
|
#include <models/detection_model_ssd.h>
|
||
|
|
|
||
|
|
namespace Yolov10OV {
|
||
|
|
struct DetResult {
|
||
|
|
cv::Rect bbox;
|
||
|
|
float conf;
|
||
|
|
int lable;
|
||
|
|
DetResult(cv::Rect bbox, float conf, int lable) :bbox(bbox), conf(conf), lable(lable) {}
|
||
|
|
};
|
||
|
|
|
||
|
|
void pre_process(cv::Mat* img, int length, float* factor, std::vector<float>& data) {
|
||
|
|
cv::Mat mat;
|
||
|
|
int rh = img->rows;
|
||
|
|
int rw = img->cols;
|
||
|
|
int rc = img->channels();
|
||
|
|
cv::cvtColor(*img, mat, cv::COLOR_BGR2RGB);
|
||
|
|
int max_image_length = rw > rh ? rw : rh;
|
||
|
|
cv::Mat max_image = cv::Mat::zeros(max_image_length, max_image_length, CV_8UC3);
|
||
|
|
max_image = max_image * 255;
|
||
|
|
cv::Rect roi(0, 0, rw, rh);
|
||
|
|
mat.copyTo(cv::Mat(max_image, roi));
|
||
|
|
cv::Mat resize_img;
|
||
|
|
cv::resize(max_image, resize_img, cv::Size(length, length), 0.0f, 0.0f, cv::INTER_LINEAR);
|
||
|
|
|
||
|
|
*factor = (float)((float)max_image_length / (float)length);
|
||
|
|
resize_img.convertTo(resize_img, CV_32FC3, 1 / 255.0);
|
||
|
|
rh = resize_img.rows;
|
||
|
|
rw = resize_img.cols;
|
||
|
|
rc = resize_img.channels();
|
||
|
|
for (int i = 0; i < rc; ++i) {
|
||
|
|
cv::extractChannel(resize_img, cv::Mat(rh, rw, CV_32FC1, data.data() + i * rh * rw), i);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
std::vector<DetResult> post_process(float* result, float factor, int outputLength) {
|
||
|
|
std::vector<cv::Rect> position_boxes;
|
||
|
|
std::vector <int> class_ids;
|
||
|
|
std::vector <float> confidences;
|
||
|
|
// Preprocessing output results
|
||
|
|
for (int i = 0; i < outputLength; i++)
|
||
|
|
{
|
||
|
|
int s = 6 * i;
|
||
|
|
if ((float)result[s + 4] > 0.2)
|
||
|
|
{
|
||
|
|
float cx = result[s + 0];
|
||
|
|
float cy = result[s + 1];
|
||
|
|
float dx = result[s + 2];
|
||
|
|
float dy = result[s + 3];
|
||
|
|
int x = (int)((cx)*factor);
|
||
|
|
int y = (int)((cy)*factor);
|
||
|
|
int width = (int)((dx - cx) * factor);
|
||
|
|
int height = (int)((dy - cy) * factor);
|
||
|
|
cv::Rect box(x, y, width, height);
|
||
|
|
|
||
|
|
position_boxes.push_back(box);
|
||
|
|
class_ids.push_back((int)result[s + 5]);
|
||
|
|
confidences.push_back((float)result[s + 4]);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
std::vector<DetResult> re;
|
||
|
|
for (int i = 0; i < position_boxes.size(); i++)
|
||
|
|
{
|
||
|
|
DetResult det(position_boxes[i], confidences[i], class_ids[i]);
|
||
|
|
re.push_back(det);
|
||
|
|
}
|
||
|
|
return re;
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
void draw_bbox(cv::Mat& img, std::vector<DetResult>& res) {
|
||
|
|
for (size_t j = 0; j < res.size(); j++) {
|
||
|
|
cv::rectangle(img, res[j].bbox, cv::Scalar(255, 0, 255), 2);
|
||
|
|
cv::putText(img, std::to_string(res[j].lable) + "-" + std::to_string(res[j].conf),
|
||
|
|
cv::Point(res[j].bbox.x, res[j].bbox.y - 1), cv::FONT_HERSHEY_PLAIN,
|
||
|
|
1.2, cv::Scalar(0, 0, 255), 2);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
void yolov10_infer_without_process() {
|
||
|
|
std::string videoPath = "E:\\Text_dataset\\car_test.mov";
|
||
|
|
std::string model_path = "E:\\Text_Model\\yolov10s_openvino_model\\yolov10s.xml";
|
||
|
|
ov::Core core;
|
||
|
|
auto model = core.read_model(model_path);
|
||
|
|
auto compiled_model = core.compile_model(model, "CPU");
|
||
|
|
ov::InferRequest request = compiled_model.create_infer_request();
|
||
|
|
cv::VideoCapture capture(videoPath);
|
||
|
|
if (!capture.isOpened()) {
|
||
|
|
std::cerr << "ERROR: " << std::endl;
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
float factor = 0;
|
||
|
|
request.get_input_tensor().set_shape(std::vector<size_t>{1, 3, 640, 640});
|
||
|
|
std::vector<float> inputData(640 * 640 * 3);
|
||
|
|
std::chrono::time_point<std::chrono::steady_clock> t_beg;
|
||
|
|
std::chrono::time_point<std::chrono::steady_clock> t_end;
|
||
|
|
while (true)
|
||
|
|
{
|
||
|
|
cv::Mat frame;
|
||
|
|
if (!capture.read(frame)) {
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
t_beg = std::chrono::high_resolution_clock::now();
|
||
|
|
pre_process(&frame, 640, &factor, inputData);
|
||
|
|
memcpy(request.get_input_tensor().data<float>(), inputData.data(), 640 * 640 * 3);
|
||
|
|
request.infer();
|
||
|
|
float* output_data = request.get_output_tensor().data<float>();
|
||
|
|
std::vector<DetResult> result = post_process(output_data, factor, 300);
|
||
|
|
t_end = std::chrono::high_resolution_clock::now();
|
||
|
|
|
||
|
|
cv::putText(frame, "FPS: " + std::to_string(1000.0 / std::chrono::duration<float, std::milli>(t_end - t_beg).count())
|
||
|
|
+ ", Time: " + std::to_string(std::chrono::duration<float, std::milli>(t_end - t_beg).count()) + "ms",
|
||
|
|
cv::Point(20, 40), 1, 2, cv::Scalar(255, 0, 255), 2);
|
||
|
|
draw_bbox(frame, result);
|
||
|
|
imshow("Frame", frame);
|
||
|
|
cv::waitKey(1); //30
|
||
|
|
}
|
||
|
|
cv::destroyAllWindows();
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
void yolov10_infer_ansy_without_process() {
|
||
|
|
std::string videoPath = "E:\\Text_dataset\\car_test.mov";
|
||
|
|
std::string model_path = "E:\\Text_Model\\yolov10s_openvino_model\\yolov10s.xml";
|
||
|
|
ov::Core core;
|
||
|
|
auto model = core.read_model(model_path);
|
||
|
|
auto compiled_model = core.compile_model(model, "CPU");
|
||
|
|
std::vector<ov::InferRequest>requests = { compiled_model.create_infer_request(), compiled_model.create_infer_request() };
|
||
|
|
cv::VideoCapture capture(videoPath);
|
||
|
|
if (!capture.isOpened()) {
|
||
|
|
std::cerr << "ERROR:" << std::endl;
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
float factor = 0;
|
||
|
|
requests[0].get_input_tensor().set_shape(std::vector<size_t>{1, 3, 640, 640});
|
||
|
|
requests[1].get_input_tensor().set_shape(std::vector<size_t>{1, 3, 640, 640});
|
||
|
|
cv::Mat frame;
|
||
|
|
capture.read(frame);
|
||
|
|
std::vector<float> inputData(640 * 640 * 3);
|
||
|
|
pre_process(&frame, 640, &factor, inputData);
|
||
|
|
memcpy(requests[0].get_input_tensor().data<float>(), inputData.data(), 640 * 640 * 3);
|
||
|
|
requests[0].start_async();
|
||
|
|
std::chrono::time_point<std::chrono::steady_clock> t_beg;
|
||
|
|
std::chrono::time_point<std::chrono::steady_clock> t_end;
|
||
|
|
while (true)
|
||
|
|
{
|
||
|
|
cv::Mat next_frame;
|
||
|
|
if (!capture.read(next_frame)) {
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
t_beg = std::chrono::high_resolution_clock::now();
|
||
|
|
pre_process(&next_frame, 640, &factor, inputData);
|
||
|
|
memcpy(requests[1].get_input_tensor().data<float>(), inputData.data(), 640 * 640 * 3);
|
||
|
|
requests[1].start_async();
|
||
|
|
requests[0].wait();
|
||
|
|
float* output_data = requests[0].get_output_tensor().data<float>();
|
||
|
|
std::vector<DetResult> result = post_process(output_data, factor, 300);
|
||
|
|
t_end = std::chrono::high_resolution_clock::now();
|
||
|
|
draw_bbox(frame, result);
|
||
|
|
cv::putText(frame, "FPS: " + std::to_string(1000.0 / std::chrono::duration<float, std::milli>(t_end - t_beg).count())
|
||
|
|
+ ", Time: " + std::to_string(std::chrono::duration<float, std::milli>(t_end - t_beg).count()) + "ms",
|
||
|
|
cv::Point(20, 40), 1, 2, cv::Scalar(255, 0, 255), 2);
|
||
|
|
imshow("Frame", frame);
|
||
|
|
cv::waitKey(1); //30
|
||
|
|
frame = next_frame;
|
||
|
|
std::swap(requests[0], requests[1]);
|
||
|
|
}
|
||
|
|
cv::destroyAllWindows();
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
}
|