Use CPU resize before upload to GPU to remove PCIe bottleneck

This commit is contained in:
2026-04-04 22:29:08 +11:00
parent e134ebdf15
commit 98681f4da6
15 changed files with 572 additions and 493 deletions

View File

@@ -23,6 +23,29 @@ void Engine<T>::transformOutput(std::vector<std::vector<std::vector<T>>> &input,
}
output = std::move(input[0][0]);
}
// CPU letterbox resize — same logic as the GPU version but runs on CPU.
// Used in Preprocess to resize BEFORE GPU upload, reducing PCIe transfer
// from 25 MB (4K) to 1.2 MB (640×640) — 20x less bandwidth.
template <typename T>
cv::Mat Engine<T>::cpuResizeKeepAspectRatioPadRightBottom(const cv::Mat& input,
size_t height, size_t width,
const cv::Scalar& bgcolor) {
if (input.empty()) return cv::Mat();
float r = std::min(static_cast<float>(width) / input.cols,
static_cast<float>(height) / input.rows);
int unpad_w = static_cast<int>(r * input.cols);
int unpad_h = static_cast<int>(r * input.rows);
cv::Mat re;
cv::resize(input, re, cv::Size(unpad_w, unpad_h), 0, 0, cv::INTER_LINEAR);
cv::Mat out(static_cast<int>(height), static_cast<int>(width), input.type(), bgcolor);
re.copyTo(out(cv::Rect(0, 0, re.cols, re.rows)));
return out;
}
template <typename T>
cv::cuda::GpuMat Engine<T>::resizeKeepAspectRatioPadRightBottom(const cv::cuda::GpuMat& input,
size_t height, size_t width,