#pragma once #pragma once /// /// https://www.cnblogs.com/guojin-blogs/p/18284497 /// wget https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10s.pt // yolo export model = yolov10s.pt format = onnx opset = 13 simplify /// #include "opencv2/opencv.hpp" #include #include #include #include #include namespace Yolov10OV { struct DetResult { cv::Rect bbox; float conf; int lable; DetResult(cv::Rect bbox, float conf, int lable) :bbox(bbox), conf(conf), lable(lable) {} }; void pre_process(cv::Mat* img, int length, float* factor, std::vector& data) { cv::Mat mat; int rh = img->rows; int rw = img->cols; int rc = img->channels(); cv::cvtColor(*img, mat, cv::COLOR_BGR2RGB); int max_image_length = rw > rh ? rw : rh; cv::Mat max_image = cv::Mat::zeros(max_image_length, max_image_length, CV_8UC3); max_image = max_image * 255; cv::Rect roi(0, 0, rw, rh); mat.copyTo(cv::Mat(max_image, roi)); cv::Mat resize_img; cv::resize(max_image, resize_img, cv::Size(length, length), 0.0f, 0.0f, cv::INTER_LINEAR); *factor = (float)((float)max_image_length / (float)length); resize_img.convertTo(resize_img, CV_32FC3, 1 / 255.0); rh = resize_img.rows; rw = resize_img.cols; rc = resize_img.channels(); for (int i = 0; i < rc; ++i) { cv::extractChannel(resize_img, cv::Mat(rh, rw, CV_32FC1, data.data() + i * rh * rw), i); } } std::vector post_process(float* result, float factor, int outputLength) { std::vector position_boxes; std::vector class_ids; std::vector confidences; // Preprocessing output results for (int i = 0; i < outputLength; i++) { int s = 6 * i; if ((float)result[s + 4] > 0.2) { float cx = result[s + 0]; float cy = result[s + 1]; float dx = result[s + 2]; float dy = result[s + 3]; int x = (int)((cx)*factor); int y = (int)((cy)*factor); int width = (int)((dx - cx) * factor); int height = (int)((dy - cy) * factor); cv::Rect box(x, y, width, height); position_boxes.push_back(box); class_ids.push_back((int)result[s + 5]); confidences.push_back((float)result[s + 4]); } } std::vector re; for (int i = 0; i < position_boxes.size(); i++) { DetResult det(position_boxes[i], confidences[i], class_ids[i]); re.push_back(det); } return re; } void draw_bbox(cv::Mat& img, std::vector& res) { for (size_t j = 0; j < res.size(); j++) { cv::rectangle(img, res[j].bbox, cv::Scalar(255, 0, 255), 2); cv::putText(img, std::to_string(res[j].lable) + "-" + std::to_string(res[j].conf), cv::Point(res[j].bbox.x, res[j].bbox.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0, 0, 255), 2); } } void yolov10_infer_without_process() { std::string videoPath = "E:\\Text_dataset\\car_test.mov"; std::string model_path = "E:\\Text_Model\\yolov10s_openvino_model\\yolov10s.xml"; ov::Core core; auto model = core.read_model(model_path); auto compiled_model = core.compile_model(model, "CPU"); ov::InferRequest request = compiled_model.create_infer_request(); cv::VideoCapture capture(videoPath); if (!capture.isOpened()) { std::cerr << "ERROR: " << std::endl; return; } float factor = 0; request.get_input_tensor().set_shape(std::vector{1, 3, 640, 640}); std::vector inputData(640 * 640 * 3); std::chrono::time_point t_beg; std::chrono::time_point t_end; while (true) { cv::Mat frame; if (!capture.read(frame)) { break; } t_beg = std::chrono::high_resolution_clock::now(); pre_process(&frame, 640, &factor, inputData); memcpy(request.get_input_tensor().data(), inputData.data(), 640 * 640 * 3); request.infer(); float* output_data = request.get_output_tensor().data(); std::vector result = post_process(output_data, factor, 300); t_end = std::chrono::high_resolution_clock::now(); cv::putText(frame, "FPS: " + std::to_string(1000.0 / std::chrono::duration(t_end - t_beg).count()) + ", Time: " + std::to_string(std::chrono::duration(t_end - t_beg).count()) + "ms", cv::Point(20, 40), 1, 2, cv::Scalar(255, 0, 255), 2); draw_bbox(frame, result); imshow("Frame", frame); cv::waitKey(1); //30 } cv::destroyAllWindows(); return; } void yolov10_infer_ansy_without_process() { std::string videoPath = "E:\\Text_dataset\\car_test.mov"; std::string model_path = "E:\\Text_Model\\yolov10s_openvino_model\\yolov10s.xml"; ov::Core core; auto model = core.read_model(model_path); auto compiled_model = core.compile_model(model, "CPU"); std::vectorrequests = { compiled_model.create_infer_request(), compiled_model.create_infer_request() }; cv::VideoCapture capture(videoPath); if (!capture.isOpened()) { std::cerr << "ERROR:" << std::endl; return; } float factor = 0; requests[0].get_input_tensor().set_shape(std::vector{1, 3, 640, 640}); requests[1].get_input_tensor().set_shape(std::vector{1, 3, 640, 640}); cv::Mat frame; capture.read(frame); std::vector inputData(640 * 640 * 3); pre_process(&frame, 640, &factor, inputData); memcpy(requests[0].get_input_tensor().data(), inputData.data(), 640 * 640 * 3); requests[0].start_async(); std::chrono::time_point t_beg; std::chrono::time_point t_end; while (true) { cv::Mat next_frame; if (!capture.read(next_frame)) { break; } t_beg = std::chrono::high_resolution_clock::now(); pre_process(&next_frame, 640, &factor, inputData); memcpy(requests[1].get_input_tensor().data(), inputData.data(), 640 * 640 * 3); requests[1].start_async(); requests[0].wait(); float* output_data = requests[0].get_output_tensor().data(); std::vector result = post_process(output_data, factor, 300); t_end = std::chrono::high_resolution_clock::now(); draw_bbox(frame, result); cv::putText(frame, "FPS: " + std::to_string(1000.0 / std::chrono::duration(t_end - t_beg).count()) + ", Time: " + std::to_string(std::chrono::duration(t_end - t_beg).count()) + "ms", cv::Point(20, 40), 1, 2, cv::Scalar(255, 0, 255), 2); imshow("Frame", frame); cv::waitKey(1); //30 frame = next_frame; std::swap(requests[0], requests[1]); } cv::destroyAllWindows(); return; } }