Use CPU resize before upload to GPU to remove PCIe bottleneck
This commit is contained in:
@@ -665,38 +665,37 @@ namespace ANSCENTER {
|
||||
}
|
||||
|
||||
if (!usedNV12) {
|
||||
// Standard BGR upload + resize + center-pad path
|
||||
cv::cuda::Stream stream;
|
||||
cv::cuda::GpuMat d_bgr;
|
||||
|
||||
// CPU center-padded letterbox + BGR->RGB, then upload small image
|
||||
cv::Mat srcImg;
|
||||
if (input.channels() == 1) {
|
||||
cv::Mat img3Channel;
|
||||
cv::cvtColor(input, img3Channel, cv::COLOR_GRAY2BGR);
|
||||
d_bgr.upload(img3Channel, stream);
|
||||
}
|
||||
else if (input.channels() == 3) {
|
||||
d_bgr.upload(input, stream);
|
||||
}
|
||||
else {
|
||||
cv::cvtColor(input, srcImg, cv::COLOR_GRAY2BGR);
|
||||
} else if (input.channels() == 3) {
|
||||
srcImg = input;
|
||||
} else {
|
||||
this->_logger.LogError("ANSSCRFDFD::Detect", "Unsupported channel count", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
cv::cuda::GpuMat d_rgb;
|
||||
cv::cuda::GpuMat d_resized;
|
||||
// CPU resize to unpadded size
|
||||
cv::Mat cpuResized;
|
||||
if (srcImg.rows != new_unpad_h || srcImg.cols != new_unpad_w) {
|
||||
cv::resize(srcImg, cpuResized, cv::Size(new_unpad_w, new_unpad_h), 0, 0, cv::INTER_LINEAR);
|
||||
} else {
|
||||
cpuResized = srcImg;
|
||||
}
|
||||
|
||||
// CPU center-pad to net_w x net_h
|
||||
cv::Mat cpuPadded(net_h, net_w, CV_8UC3, cv::Scalar(0, 0, 0));
|
||||
cpuResized.copyTo(cpuPadded(cv::Rect(dw, dh, new_unpad_w, new_unpad_h)));
|
||||
|
||||
// CPU BGR -> RGB
|
||||
cv::Mat cpuRGB;
|
||||
cv::cvtColor(cpuPadded, cpuRGB, cv::COLOR_BGR2RGB);
|
||||
|
||||
// Upload small padded image to GPU
|
||||
cv::cuda::Stream stream;
|
||||
cv::cuda::GpuMat d_padded;
|
||||
|
||||
cv::cuda::cvtColor(d_bgr, d_rgb, cv::COLOR_BGR2RGB, 0, stream);
|
||||
cv::cuda::resize(d_rgb, d_resized, cv::Size(new_unpad_w, new_unpad_h), 0, 0, cv::INTER_LINEAR, stream);
|
||||
|
||||
d_padded.create(net_h, net_w, d_resized.type());
|
||||
d_padded.setTo(cv::Scalar(0, 0, 0), stream);
|
||||
|
||||
cv::Rect roi(dw, dh, new_unpad_w, new_unpad_h > 0 ? new_unpad_h : 0);
|
||||
roi.width = new_unpad_w;
|
||||
roi.height = new_unpad_h;
|
||||
d_resized.copyTo(d_padded(roi), stream);
|
||||
|
||||
d_padded.upload(cpuRGB, stream);
|
||||
stream.waitForCompletion();
|
||||
|
||||
std::vector<cv::cuda::GpuMat> inputVec;
|
||||
|
||||
Reference in New Issue
Block a user