Use CPU resize before upload to GPU to remove PCIe bottleneck
This commit is contained in:
@@ -303,31 +303,27 @@ namespace ANSCENTER {
|
||||
return embedding;
|
||||
}
|
||||
|
||||
// GPU preprocessing pipeline
|
||||
// CPU preprocessing: resize + color convert, then upload small image
|
||||
cv::cuda::Stream stream;
|
||||
cv::cuda::GpuMat d_img;
|
||||
|
||||
// Upload to GPU
|
||||
d_img.upload(inputImage, stream);
|
||||
|
||||
// Handle grayscale conversion on GPU
|
||||
if (inputImage.channels() == 1) {
|
||||
cv::cuda::GpuMat d_bgr;
|
||||
cv::cuda::cvtColor(d_img, d_bgr, cv::COLOR_GRAY2BGR, 0, stream);
|
||||
d_img = d_bgr;
|
||||
cv::Mat srcImg = inputImage;
|
||||
if (srcImg.channels() == 1) {
|
||||
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
|
||||
// Resize on GPU if needed
|
||||
if (inputImage.cols != FACE_WIDTH || inputImage.rows != FACE_HEIGHT) {
|
||||
cv::cuda::GpuMat d_resized;
|
||||
cv::cuda::resize(d_img, d_resized, cv::Size(FACE_WIDTH, FACE_HEIGHT),
|
||||
0, 0, cv::INTER_LINEAR, stream);
|
||||
d_img = d_resized;
|
||||
cv::Mat cpuResized;
|
||||
if (srcImg.rows != FACE_HEIGHT || srcImg.cols != FACE_WIDTH) {
|
||||
cv::resize(srcImg, cpuResized, cv::Size(FACE_WIDTH, FACE_HEIGHT), 0, 0, cv::INTER_LINEAR);
|
||||
} else {
|
||||
cpuResized = srcImg;
|
||||
}
|
||||
|
||||
// BGR to RGB conversion on GPU
|
||||
cv::Mat cpuRGB;
|
||||
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
|
||||
|
||||
cv::cuda::GpuMat d_rgb;
|
||||
cv::cuda::cvtColor(d_img, d_rgb, cv::COLOR_BGR2RGB, 0, stream);
|
||||
d_rgb.upload(cpuRGB, stream);
|
||||
stream.waitForCompletion();
|
||||
|
||||
// Prepare inference inputs
|
||||
std::vector<cv::cuda::GpuMat> inputVec;
|
||||
@@ -404,27 +400,24 @@ namespace ANSCENTER {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Upload to GPU
|
||||
cv::cuda::GpuMat d_img;
|
||||
d_img.upload(roi, stream);
|
||||
|
||||
// Handle grayscale conversion on GPU
|
||||
if (roi.channels() == 1) {
|
||||
cv::cuda::GpuMat d_bgr;
|
||||
cv::cuda::cvtColor(d_img, d_bgr, cv::COLOR_GRAY2BGR, 0, stream);
|
||||
d_img = d_bgr;
|
||||
// CPU preprocessing: resize + color convert, then upload small image
|
||||
cv::Mat srcImg = roi;
|
||||
if (srcImg.channels() == 1) {
|
||||
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
|
||||
// Resize on GPU if needed
|
||||
if (roi.cols != FACE_WIDTH || roi.rows != FACE_HEIGHT) {
|
||||
cv::cuda::GpuMat d_resized;
|
||||
cv::cuda::resize(d_img, d_resized, targetSize, 0, 0, cv::INTER_LINEAR, stream);
|
||||
d_img = d_resized;
|
||||
cv::Mat cpuResized;
|
||||
if (srcImg.rows != FACE_HEIGHT || srcImg.cols != FACE_WIDTH) {
|
||||
cv::resize(srcImg, cpuResized, targetSize, 0, 0, cv::INTER_LINEAR);
|
||||
} else {
|
||||
cpuResized = srcImg;
|
||||
}
|
||||
|
||||
// BGR to RGB conversion on GPU
|
||||
cv::Mat cpuRGB;
|
||||
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
|
||||
|
||||
cv::cuda::GpuMat d_rgb;
|
||||
cv::cuda::cvtColor(d_img, d_rgb, cv::COLOR_BGR2RGB, 0, stream);
|
||||
d_rgb.upload(cpuRGB, stream);
|
||||
|
||||
batchGpu.emplace_back(std::move(d_rgb));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user