#pragma once /// /// //https://www.cnblogs.com/guojin-blogs/p/18258877 /// wget https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10s.pt // yolo export model = yolov10s.pt format = onnx opset = 13 simplify /// #include "opencv2/opencv.hpp" #include #include #include "cuda.h" #include "NvInfer.h" #include "NvOnnxParser.h" namespace Yolov10RT { class Logger : public nvinfer1::ILogger { void log(Severity severity, const char* msg) noexcept override { if (severity <= Severity::kWARNING) std::cout << msg << std::endl; } } logger; struct DetResult { cv::Rect bbox; float conf; int lable; DetResult(cv::Rect bbox, float conf, int lable) :bbox(bbox), conf(conf), lable(lable) {} }; void onnxToEngine(const char* onnxFile, int memorySize) { std::string path(onnxFile); std::string::size_type iPos = (path.find_last_of('\\') + 1) == 0 ? path.find_last_of('/') + 1 : path.find_last_of('\\') + 1; std::string modelPath = path.substr(0, iPos);// std::string modelName = path.substr(iPos, path.length() - iPos);// std::string modelName_ = modelName.substr(0, modelName.rfind("."));// std::string engineFile = modelPath + modelName_ + ".engine"; nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(logger); #if NV_TENSORRT_MAJOR >= 10 nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0); #else const auto explicitBatch = 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); nvinfer1::INetworkDefinition* network = builder->createNetworkV2(explicitBatch); #endif nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, logger); parser->parseFromFile(onnxFile, 2); for (int i = 0; i < parser->getNbErrors(); ++i) { std::cout << "load error: " << parser->getError(i)->desc() << std::endl; } printf("tensorRT load mask onnx model successfully!!!...\n"); nvinfer1::IBuilderConfig* config = builder->createBuilderConfig(); #if NV_TENSORRT_MAJOR < 10 config->setMaxWorkspaceSize(1024 * 1024 * memorySize); #else config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, 1024ULL * 1024 * memorySize); #endif config->setFlag(nvinfer1::BuilderFlag::kFP16); #if NV_TENSORRT_MAJOR >= 10 nvinfer1::IHostMemory* plan = builder->buildSerializedNetwork(*network, *config); std::cout << "try to save engine file now~~~" << std::endl; std::ofstream filePtr(engineFile, std::ios::binary); if (!filePtr) { std::cerr << "could not open plan output file" << std::endl; return; } filePtr.write(reinterpret_cast(plan->data()), plan->size()); delete plan; delete network; delete parser; #else nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config); std::cout << "try to save engine file now~~~" << std::endl; std::ofstream filePtr(engineFile, std::ios::binary); if (!filePtr) { std::cerr << "could not open plan output file" << std::endl; return; } nvinfer1::IHostMemory* modelStream = engine->serialize(); filePtr.write(reinterpret_cast(modelStream->data()), modelStream->size()); modelStream->destroy(); engine->destroy(); network->destroy(); parser->destroy(); #endif std::cout << "convert onnx model to TensorRT engine model successfully!" << std::endl; } void preProcess(cv::Mat* img, int length, float* factor, std::vector& data) { cv::Mat mat; int rh = img->rows; int rw = img->cols; int rc = img->channels(); cv::cvtColor(*img, mat, cv::COLOR_BGR2RGB); int maxImageLength = rw > rh ? rw : rh; cv::Mat maxImage = cv::Mat::zeros(maxImageLength, maxImageLength, CV_8UC3); maxImage = maxImage * 255; cv::Rect roi(0, 0, rw, rh); mat.copyTo(cv::Mat(maxImage, roi)); cv::Mat resizeImg; cv::resize(maxImage, resizeImg, cv::Size(length, length), 0.0f, 0.0f, cv::INTER_LINEAR); *factor = (float)((float)maxImageLength / (float)length); resizeImg.convertTo(resizeImg, CV_32FC3, 1 / 255.0); rh = resizeImg.rows; rw = resizeImg.cols; rc = resizeImg.channels(); for (int i = 0; i < rc; ++i) { cv::extractChannel(resizeImg, cv::Mat(rh, rw, CV_32FC1, data.data() + i * rh * rw), i); } } std::vector postProcess(float* result, float factor, int outputLength) { std::vector positionBoxes; std::vector classIds; std::vector confidences; // Preprocessing output results for (int i = 0; i < outputLength; i++) { int s = 6 * i; if ((float)result[s + 4] > 0.2) { float cx = result[s + 0]; float cy = result[s + 1]; float dx = result[s + 2]; float dy = result[s + 3]; int x = (int)((cx)*factor); int y = (int)((cy)*factor); int width = (int)((dx - cx) * factor); int height = (int)((dy - cy) * factor); cv::Rect box(x, y, width, height); positionBoxes.push_back(box); classIds.push_back((int)result[s + 5]); confidences.push_back((float)result[s + 4]); } } std::vector re; for (int i = 0; i < positionBoxes.size(); i++) { DetResult det(positionBoxes[i], confidences[i], classIds[i]); re.push_back(det); } return re; } void drawBbox(cv::Mat& img, std::vector& res) { for (size_t j = 0; j < res.size(); j++) { cv::rectangle(img, res[j].bbox, cv::Scalar(255, 0, 255), 2); cv::putText(img, std::to_string(res[j].lable) + "-" + std::to_string(res[j].conf), cv::Point(res[j].bbox.x, res[j].bbox.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0, 0, 255), 2); } } std::shared_ptr creatContext(std::string modelPath) { std::ifstream filePtr(modelPath, std::ios::binary); if (!filePtr.good()) { std::cerr << "Errror" << std::endl; return std::shared_ptr(); } size_t size = 0; filePtr.seekg(0, filePtr.end); // size = filePtr.tellg(); // filePtr.seekg(0, filePtr.beg); // char* modelStream = new char[size]; filePtr.read(modelStream, size); filePtr.close(); nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(logger); nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(modelStream, size); return std::shared_ptr(engine->createExecutionContext()); } void yolov10Infer() { const char* videoPath = "E:\\Text_dataset\\car_test.mov"; const char* enginePath = "E:\\Text_Model\\yolov10s.engine"; std::shared_ptr context = creatContext(enginePath); cv::VideoCapture capture(videoPath); if (!capture.isOpened()) { std::cerr << "ERROR:" << std::endl; return; } cudaStream_t stream; cudaStreamCreate(&stream); void* inputSrcDevice; void* outputSrcDevice; cudaMalloc(&inputSrcDevice, 3 * 640 * 640 * sizeof(float)); cudaMalloc(&outputSrcDevice, 1 * 300 * 6 * sizeof(float)); std::vector output_data(300 * 6); std::vector inputData(640 * 640 * 3); while (true) { cv::Mat frame; if (!capture.read(frame)) { break; } float factor = 0; preProcess(&frame, 640, &factor, inputData); cudaMemcpyAsync(inputSrcDevice, inputData.data(), 3 * 640 * 640 * sizeof(float), cudaMemcpyHostToDevice, stream); #if NV_TENSORRT_MAJOR >= 10 context->setTensorAddress("images", inputSrcDevice); context->setTensorAddress("output0", outputSrcDevice); context->enqueueV3(stream); #else void* bindings[] = { inputSrcDevice, outputSrcDevice }; context->enqueueV2((void**)bindings, stream, nullptr); #endif cudaMemcpyAsync(output_data.data(), outputSrcDevice, 300 * 6 * sizeof(float), cudaMemcpyDeviceToHost, stream); cudaStreamSynchronize(stream); std::vector result = postProcess(output_data.data(), factor, 300); drawBbox(frame, result); imshow("Frame", frame); cv::waitKey(10); } cv::destroyAllWindows(); } }