Use CPU resize before upload to GPU to remove PCIe bottleneck

This commit is contained in:
2026-04-04 22:29:08 +11:00
parent e134ebdf15
commit 98681f4da6
15 changed files with 572 additions and 493 deletions

View File

@@ -303,31 +303,27 @@ namespace ANSCENTER {
return embedding;
}
// GPU preprocessing pipeline
// CPU preprocessing: resize + color convert, then upload small image
cv::cuda::Stream stream;
cv::cuda::GpuMat d_img;
// Upload to GPU
d_img.upload(inputImage, stream);
// Handle grayscale conversion on GPU
if (inputImage.channels() == 1) {
cv::cuda::GpuMat d_bgr;
cv::cuda::cvtColor(d_img, d_bgr, cv::COLOR_GRAY2BGR, 0, stream);
d_img = d_bgr;
cv::Mat srcImg = inputImage;
if (srcImg.channels() == 1) {
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
}
// Resize on GPU if needed
if (inputImage.cols != FACE_WIDTH || inputImage.rows != FACE_HEIGHT) {
cv::cuda::GpuMat d_resized;
cv::cuda::resize(d_img, d_resized, cv::Size(FACE_WIDTH, FACE_HEIGHT),
0, 0, cv::INTER_LINEAR, stream);
d_img = d_resized;
cv::Mat cpuResized;
if (srcImg.rows != FACE_HEIGHT || srcImg.cols != FACE_WIDTH) {
cv::resize(srcImg, cpuResized, cv::Size(FACE_WIDTH, FACE_HEIGHT), 0, 0, cv::INTER_LINEAR);
} else {
cpuResized = srcImg;
}
// BGR to RGB conversion on GPU
cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
cv::cuda::GpuMat d_rgb;
cv::cuda::cvtColor(d_img, d_rgb, cv::COLOR_BGR2RGB, 0, stream);
d_rgb.upload(cpuRGB, stream);
stream.waitForCompletion();
// Prepare inference inputs
std::vector<cv::cuda::GpuMat> inputVec;
@@ -404,27 +400,24 @@ namespace ANSCENTER {
continue;
}
// Upload to GPU
cv::cuda::GpuMat d_img;
d_img.upload(roi, stream);
// Handle grayscale conversion on GPU
if (roi.channels() == 1) {
cv::cuda::GpuMat d_bgr;
cv::cuda::cvtColor(d_img, d_bgr, cv::COLOR_GRAY2BGR, 0, stream);
d_img = d_bgr;
// CPU preprocessing: resize + color convert, then upload small image
cv::Mat srcImg = roi;
if (srcImg.channels() == 1) {
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
}
// Resize on GPU if needed
if (roi.cols != FACE_WIDTH || roi.rows != FACE_HEIGHT) {
cv::cuda::GpuMat d_resized;
cv::cuda::resize(d_img, d_resized, targetSize, 0, 0, cv::INTER_LINEAR, stream);
d_img = d_resized;
cv::Mat cpuResized;
if (srcImg.rows != FACE_HEIGHT || srcImg.cols != FACE_WIDTH) {
cv::resize(srcImg, cpuResized, targetSize, 0, 0, cv::INTER_LINEAR);
} else {
cpuResized = srcImg;
}
// BGR to RGB conversion on GPU
cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
cv::cuda::GpuMat d_rgb;
cv::cuda::cvtColor(d_img, d_rgb, cv::COLOR_BGR2RGB, 0, stream);
d_rgb.upload(cpuRGB, stream);
batchGpu.emplace_back(std::move(d_rgb));
}