Use CPU resize before upload to GPU to remove PCIe bottleneck

This commit is contained in:
2026-04-04 22:29:08 +11:00
parent e134ebdf15
commit 98681f4da6
15 changed files with 572 additions and 493 deletions

View File

@@ -69,7 +69,13 @@
"Bash(powershell -Command \"\\(Get-Content ''C:\\\\Users\\\\nghia\\\\Downloads\\\\ANSLEGION34.log''\\).Count\")", "Bash(powershell -Command \"\\(Get-Content ''C:\\\\Users\\\\nghia\\\\Downloads\\\\ANSLEGION34.log''\\).Count\")",
"Bash(powershell -Command \"\\(Get-Content ''C:\\\\Users\\\\nghia\\\\Downloads\\\\ANSLEGION35.log''\\).Count\")", "Bash(powershell -Command \"\\(Get-Content ''C:\\\\Users\\\\nghia\\\\Downloads\\\\ANSLEGION35.log''\\).Count\")",
"Bash(powershell -Command \"\\(Get-Content ''C:\\\\Users\\\\nghia\\\\Downloads\\\\ANSLEGION36.log''\\).Count\")", "Bash(powershell -Command \"\\(Get-Content ''C:\\\\Users\\\\nghia\\\\Downloads\\\\ANSLEGION36.log''\\).Count\")",
"Bash(powershell -Command \"\\(Get-Content ''C:\\\\Users\\\\nghia\\\\Downloads\\\\ANSLEGION37.log''\\).Count\")" "Bash(powershell -Command \"\\(Get-Content ''C:\\\\Users\\\\nghia\\\\Downloads\\\\ANSLEGION37.log''\\).Count\")",
"Bash(powershell -Command \"\\(Get-Content ''C:\\\\Users\\\\nghia\\\\Downloads\\\\ANSLEGION38.log''\\).Count\")",
"Bash(powershell -Command \"\\(Get-Content ''C:\\\\Users\\\\nghia\\\\Downloads\\\\ANSLEGION39.log''\\).Count\")",
"Bash(powershell -Command \"\\(Get-Content ''C:\\\\Users\\\\nghia\\\\Downloads\\\\ANSLEGION40.log''\\).Count\")",
"Bash(python -c \":*)",
"Bash(find /c/Projects/CLionProjects/ANSCORE -type d -name *ANSODEngine*)",
"Bash(powershell -Command \"\\(Get-Content ''C:\\\\Users\\\\nghia\\\\Downloads\\\\ANSLEGION41.log''\\).Count\")"
] ]
} }
} }

View File

@@ -284,7 +284,13 @@ bool Engine<T>::runInference(const std::vector<std::vector<cv::cuda::GpuMat>>& i
// fatal "illegal memory access" that permanently corrupts the CUDA context. // fatal "illegal memory access" that permanently corrupts the CUDA context.
// //
// Pool-mode slots have their own busy-flag dispatch so they do NOT need this. // Pool-mode slots have their own busy-flag dispatch so they do NOT need this.
auto _mutexWaitStart = std::chrono::steady_clock::now();
std::lock_guard<std::mutex> inferenceLock(m_inferenceMutex); std::lock_guard<std::mutex> inferenceLock(m_inferenceMutex);
auto _mutexAcquired = std::chrono::steady_clock::now();
double _mutexWaitMs = std::chrono::duration<double, std::milli>(_mutexAcquired - _mutexWaitStart).count();
if (_mutexWaitMs > 50.0) {
ANS_DBG("TRT_Engine", "MUTEX WAIT: %.1fms (queued behind another inference)", _mutexWaitMs);
}
// ============================================================================ // ============================================================================
// THREAD-SAFE GPU CONTEXT // THREAD-SAFE GPU CONTEXT
@@ -955,6 +961,20 @@ bool Engine<T>::runInference(const std::vector<std::vector<cv::cuda::GpuMat>>& i
} }
} }
// ============================================================================
// Per-inference total timing breakdown (mutex wait + preprocess + GPU)
// ============================================================================
{
double totalMs = std::chrono::duration<double, std::milli>(
std::chrono::steady_clock::now() - _mutexWaitStart).count();
double gpuMs = totalMs - _mutexWaitMs; // Everything after mutex acquired
// Log every inference that takes >100ms total (including mutex wait)
if (totalMs > 100.0) {
ANS_DBG("TRT_Timing", "total=%.1fms (mutex=%.1fms gpu=%.1fms) batch=%d active=%d",
totalMs, _mutexWaitMs, gpuMs, batchSize, s_globalActiveInf.load());
}
}
// ============================================================================ // ============================================================================
// SM=100% DETECTOR — end-of-inference timing // SM=100% DETECTOR — end-of-inference timing
// ============================================================================ // ============================================================================

View File

@@ -23,6 +23,29 @@ void Engine<T>::transformOutput(std::vector<std::vector<std::vector<T>>> &input,
} }
output = std::move(input[0][0]); output = std::move(input[0][0]);
} }
// CPU letterbox resize — same logic as the GPU version but runs on CPU.
// Used in Preprocess to resize BEFORE GPU upload, reducing PCIe transfer
// from 25 MB (4K) to 1.2 MB (640×640) — 20x less bandwidth.
template <typename T>
cv::Mat Engine<T>::cpuResizeKeepAspectRatioPadRightBottom(const cv::Mat& input,
size_t height, size_t width,
const cv::Scalar& bgcolor) {
if (input.empty()) return cv::Mat();
float r = std::min(static_cast<float>(width) / input.cols,
static_cast<float>(height) / input.rows);
int unpad_w = static_cast<int>(r * input.cols);
int unpad_h = static_cast<int>(r * input.rows);
cv::Mat re;
cv::resize(input, re, cv::Size(unpad_w, unpad_h), 0, 0, cv::INTER_LINEAR);
cv::Mat out(static_cast<int>(height), static_cast<int>(width), input.type(), bgcolor);
re.copyTo(out(cv::Rect(0, 0, re.cols, re.rows)));
return out;
}
template <typename T> template <typename T>
cv::cuda::GpuMat Engine<T>::resizeKeepAspectRatioPadRightBottom(const cv::cuda::GpuMat& input, cv::cuda::GpuMat Engine<T>::resizeKeepAspectRatioPadRightBottom(const cv::cuda::GpuMat& input,
size_t height, size_t width, size_t height, size_t width,

View File

@@ -674,26 +674,22 @@ namespace ANSCENTER {
} }
try { try {
// Fix #8: Use pooled GPU buffers to avoid per-frame allocation // CPU preprocessing: resize + BGR→RGB before GPU upload
m_gpuImg.upload(inputImage, m_gpuStream); // Reduces PCIe transfer and eliminates GPU cvtColor/resize overhead
cv::Mat srcImg = inputImage;
// Handle grayscale conversion on GPU if (srcImg.channels() == 1) {
if (inputImage.channels() == 1) { cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
cv::cuda::cvtColor(m_gpuImg, m_gpuRgb, cv::COLOR_GRAY2BGR, 0, m_gpuStream);
std::swap(m_gpuImg, m_gpuRgb);
} }
cv::Mat cpuResized;
// Resize on GPU if needed if (srcImg.cols != GPU_FACE_WIDTH || srcImg.rows != GPU_FACE_HEIGHT) {
if (inputImage.cols != GPU_FACE_WIDTH || inputImage.rows != GPU_FACE_HEIGHT) { cv::resize(srcImg, cpuResized, cv::Size(GPU_FACE_WIDTH, GPU_FACE_HEIGHT), 0, 0, cv::INTER_LINEAR);
cv::cuda::resize(m_gpuImg, m_gpuResized, cv::Size(GPU_FACE_WIDTH, GPU_FACE_HEIGHT), } else {
0, 0, cv::INTER_LINEAR, m_gpuStream); cpuResized = srcImg;
} }
else { cv::Mat cpuRGB;
m_gpuResized = m_gpuImg; cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
} m_gpuRgb.upload(cpuRGB, m_gpuStream);
m_gpuStream.waitForCompletion();
// BGR to RGB conversion on GPU
cv::cuda::cvtColor(m_gpuResized, m_gpuRgb, cv::COLOR_BGR2RGB, 0, m_gpuStream);
// Prepare inference inputs // Prepare inference inputs
std::vector<cv::cuda::GpuMat> inputVec; std::vector<cv::cuda::GpuMat> inputVec;
@@ -781,33 +777,39 @@ namespace ANSCENTER {
batchGpu.reserve(chunkEnd - chunkStart); batchGpu.reserve(chunkEnd - chunkStart);
for (size_t i = chunkStart; i < chunkEnd; i++) { for (size_t i = chunkStart; i < chunkEnd; i++) {
cv::cuda::GpuMat d_img;
// Use GPU-resident face if available (NV12 affine warp path), // Use GPU-resident face if available (NV12 affine warp path),
// otherwise upload from CPU (standard path) // otherwise do CPU resize + BGR→RGB before upload
if (i < gpuFaceROIs.size() && !gpuFaceROIs[i].empty()) { if (i < gpuFaceROIs.size() && !gpuFaceROIs[i].empty()) {
d_img = gpuFaceROIs[i]; // already on GPU — skip upload cv::cuda::GpuMat d_img = gpuFaceROIs[i]; // already on GPU
} else {
const auto& roi = faceROIs[i];
if (roi.empty()) continue;
d_img.upload(roi, m_gpuStream);
if (roi.channels() == 1) {
cv::cuda::GpuMat d_bgr;
cv::cuda::cvtColor(d_img, d_bgr, cv::COLOR_GRAY2BGR, 0, m_gpuStream);
d_img = d_bgr;
}
}
if (d_img.cols != GPU_FACE_WIDTH || d_img.rows != GPU_FACE_HEIGHT) { if (d_img.cols != GPU_FACE_WIDTH || d_img.rows != GPU_FACE_HEIGHT) {
cv::cuda::GpuMat d_resized; cv::cuda::GpuMat d_resized;
cv::cuda::resize(d_img, d_resized, targetSize, 0, 0, cv::INTER_LINEAR, m_gpuStream); cv::cuda::resize(d_img, d_resized, targetSize, 0, 0, cv::INTER_LINEAR, m_gpuStream);
d_img = d_resized; d_img = d_resized;
} }
cv::cuda::GpuMat d_rgb; cv::cuda::GpuMat d_rgb;
cv::cuda::cvtColor(d_img, d_rgb, cv::COLOR_BGR2RGB, 0, m_gpuStream); cv::cuda::cvtColor(d_img, d_rgb, cv::COLOR_BGR2RGB, 0, m_gpuStream);
batchGpu.emplace_back(std::move(d_rgb)); batchGpu.emplace_back(std::move(d_rgb));
} else {
const auto& roi = faceROIs[i];
if (roi.empty()) continue;
// CPU preprocessing: resize + BGR→RGB before upload
cv::Mat srcImg = roi;
if (srcImg.channels() == 1) {
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
}
cv::Mat cpuResized;
if (srcImg.cols != GPU_FACE_WIDTH || srcImg.rows != GPU_FACE_HEIGHT) {
cv::resize(srcImg, cpuResized, targetSize, 0, 0, cv::INTER_LINEAR);
} else {
cpuResized = srcImg;
}
cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
cv::cuda::GpuMat d_rgb;
d_rgb.upload(cpuRGB, m_gpuStream);
batchGpu.emplace_back(std::move(d_rgb));
}
} }
FR_END_TIMER(gpu_preproc, "RunArcFaceBatch GPU preprocess (" + std::to_string(batchGpu.size()) + " faces)"); FR_END_TIMER(gpu_preproc, "RunArcFaceBatch GPU preprocess (" + std::to_string(batchGpu.size()) + " faces)");

View File

@@ -303,31 +303,27 @@ namespace ANSCENTER {
return embedding; return embedding;
} }
// GPU preprocessing pipeline // CPU preprocessing: resize + color convert, then upload small image
cv::cuda::Stream stream; cv::cuda::Stream stream;
cv::cuda::GpuMat d_img;
// Upload to GPU cv::Mat srcImg = inputImage;
d_img.upload(inputImage, stream); if (srcImg.channels() == 1) {
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
// Handle grayscale conversion on GPU
if (inputImage.channels() == 1) {
cv::cuda::GpuMat d_bgr;
cv::cuda::cvtColor(d_img, d_bgr, cv::COLOR_GRAY2BGR, 0, stream);
d_img = d_bgr;
} }
// Resize on GPU if needed cv::Mat cpuResized;
if (inputImage.cols != FACE_WIDTH || inputImage.rows != FACE_HEIGHT) { if (srcImg.rows != FACE_HEIGHT || srcImg.cols != FACE_WIDTH) {
cv::cuda::GpuMat d_resized; cv::resize(srcImg, cpuResized, cv::Size(FACE_WIDTH, FACE_HEIGHT), 0, 0, cv::INTER_LINEAR);
cv::cuda::resize(d_img, d_resized, cv::Size(FACE_WIDTH, FACE_HEIGHT), } else {
0, 0, cv::INTER_LINEAR, stream); cpuResized = srcImg;
d_img = d_resized;
} }
// BGR to RGB conversion on GPU cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
cv::cuda::GpuMat d_rgb; cv::cuda::GpuMat d_rgb;
cv::cuda::cvtColor(d_img, d_rgb, cv::COLOR_BGR2RGB, 0, stream); d_rgb.upload(cpuRGB, stream);
stream.waitForCompletion();
// Prepare inference inputs // Prepare inference inputs
std::vector<cv::cuda::GpuMat> inputVec; std::vector<cv::cuda::GpuMat> inputVec;
@@ -404,27 +400,24 @@ namespace ANSCENTER {
continue; continue;
} }
// Upload to GPU // CPU preprocessing: resize + color convert, then upload small image
cv::cuda::GpuMat d_img; cv::Mat srcImg = roi;
d_img.upload(roi, stream); if (srcImg.channels() == 1) {
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
// Handle grayscale conversion on GPU
if (roi.channels() == 1) {
cv::cuda::GpuMat d_bgr;
cv::cuda::cvtColor(d_img, d_bgr, cv::COLOR_GRAY2BGR, 0, stream);
d_img = d_bgr;
} }
// Resize on GPU if needed cv::Mat cpuResized;
if (roi.cols != FACE_WIDTH || roi.rows != FACE_HEIGHT) { if (srcImg.rows != FACE_HEIGHT || srcImg.cols != FACE_WIDTH) {
cv::cuda::GpuMat d_resized; cv::resize(srcImg, cpuResized, targetSize, 0, 0, cv::INTER_LINEAR);
cv::cuda::resize(d_img, d_resized, targetSize, 0, 0, cv::INTER_LINEAR, stream); } else {
d_img = d_resized; cpuResized = srcImg;
} }
// BGR to RGB conversion on GPU cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
cv::cuda::GpuMat d_rgb; cv::cuda::GpuMat d_rgb;
cv::cuda::cvtColor(d_img, d_rgb, cv::COLOR_BGR2RGB, 0, stream); d_rgb.upload(cpuRGB, stream);
batchGpu.emplace_back(std::move(d_rgb)); batchGpu.emplace_back(std::move(d_rgb));
} }

View File

@@ -178,10 +178,10 @@ std::vector<TextBox> RTOCRDetector::Detect(const cv::Mat& image,
} }
if (!usedNV12) { if (!usedNV12) {
// Fallback: standard BGR upload // Fallback: CPU resize then upload small image to GPU
cv::cuda::GpuMat gpuImg; cv::Mat cpuResized;
gpuImg.upload(image); cv::resize(image, cpuResized, resizeShape, 0, 0, cv::INTER_LINEAR);
cv::cuda::resize(gpuImg, gpuResized, resizeShape); gpuResized.upload(cpuResized);
} }
// Keep BGR order (PaddleOCR official does NOT convert BGR->RGB) // Keep BGR order (PaddleOCR official does NOT convert BGR->RGB)

View File

@@ -462,50 +462,46 @@ namespace ANSCENTER {
// Early-out if CUDA context is dead (sticky error from CUVID crash etc.) // Early-out if CUDA context is dead (sticky error from CUVID crash etc.)
if (!m_nv12Helper.isCudaContextHealthy(_logger, "ANSRTYOLO")) return {}; if (!m_nv12Helper.isCudaContextHealthy(_logger, "ANSRTYOLO")) return {};
cv::cuda::Stream stream; // --- CPU preprocessing: resize + BGR→RGB before GPU upload ---
cv::cuda::GpuMat gpuImg; // Reduces PCIe transfer from 25 MB (4K BGR) to 1.2 MB (640×640 RGB).
// With 12 AI tasks uploading concurrently, this eliminates the WDDM
// Resolve source Mat (handle grayscale → BGR on CPU first) // SRW lock convoy that causes 400-580ms preprocess spikes.
if (inputImage.channels() == 1) { cv::Mat srcImg = inputImage;
cv::Mat img3Channel; if (srcImg.channels() == 1) {
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR); cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
gpuImg.upload(img3Channel, stream);
} else {
gpuImg.upload(inputImage, stream);
} }
// GPU: BGR → RGB outMeta.imgHeight = static_cast<float>(srcImg.rows);
cv::cuda::GpuMat gpuRGB; outMeta.imgWidth = static_cast<float>(srcImg.cols);
cv::cuda::cvtColor(gpuImg, gpuRGB, cv::COLOR_BGR2RGB, 0, stream);
outMeta.imgHeight = static_cast<float>(gpuRGB.rows);
outMeta.imgWidth = static_cast<float>(gpuRGB.cols);
if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) { if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
outMeta.ratio = 1.f / std::min( outMeta.ratio = 1.f / std::min(
inputDims[0].d[2] / static_cast<float>(gpuRGB.cols), inputDims[0].d[2] / static_cast<float>(srcImg.cols),
inputDims[0].d[1] / static_cast<float>(gpuRGB.rows)); inputDims[0].d[1] / static_cast<float>(srcImg.rows));
// Check if model is classification (output ndims <= 2)
const auto& outputDims = m_trtEngine->getOutputDims(); const auto& outputDims = m_trtEngine->getOutputDims();
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2; const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
cv::cuda::GpuMat gpuResized; // CPU resize to model input size
if (gpuRGB.rows != inputH || gpuRGB.cols != inputW) { cv::Mat cpuResized;
if (srcImg.rows != inputH || srcImg.cols != inputW) {
if (isClassification) { if (isClassification) {
// Classification: direct resize (no letterbox padding) cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
cv::cuda::resize(gpuRGB, gpuResized, cv::Size(inputW, inputH), } else {
0, 0, cv::INTER_LINEAR, stream); cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
}
else {
// Detection/Seg/Pose/OBB: letterbox resize + right-bottom pad (on GPU)
gpuResized = Engine<float>::resizeKeepAspectRatioPadRightBottom(
gpuRGB, inputH, inputW);
} }
} else { } else {
gpuResized = gpuRGB; cpuResized = srcImg;
} }
// CPU BGR → RGB
cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
// Upload small image to GPU (1.2 MB instead of 25 MB for 4K)
cv::cuda::Stream stream;
cv::cuda::GpuMat gpuResized;
gpuResized.upload(cpuRGB, stream);
stream.waitForCompletion(); stream.waitForCompletion();
std::vector<cv::cuda::GpuMat> input{ std::move(gpuResized) }; std::vector<cv::cuda::GpuMat> input{ std::move(gpuResized) };
@@ -878,26 +874,18 @@ namespace ANSCENTER {
"Empty input image at index " + std::to_string(i), __FILE__, __LINE__); "Empty input image at index " + std::to_string(i), __FILE__, __LINE__);
return {}; return {};
} }
cv::cuda::GpuMat img; // CPU preprocessing: resize + BGR→RGB before GPU upload
if (inputImage.channels() == 1) { cv::Mat srcImg = inputImage;
cv::Mat img3Channel; if (srcImg.channels() == 1) {
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR); cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
img.upload(img3Channel, stream); } else if (srcImg.channels() != 3) {
}
else if (inputImage.channels() == 3) {
img.upload(inputImage, stream);
}
else {
_logger.LogError("ANSRTYOLO::PreprocessBatch", _logger.LogError("ANSRTYOLO::PreprocessBatch",
"Unsupported channel count at index " + std::to_string(i), __FILE__, __LINE__); "Unsupported channel count at index " + std::to_string(i), __FILE__, __LINE__);
return {}; return {};
} }
cv::cuda::GpuMat imgRGB; outMetadata.imgHeights[i] = srcImg.rows;
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream); outMetadata.imgWidths[i] = srcImg.cols;
outMetadata.imgHeights[i] = imgRGB.rows;
outMetadata.imgWidths[i] = imgRGB.cols;
if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) { if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) {
_logger.LogError("ANSRTYOLO::PreprocessBatch", _logger.LogError("ANSRTYOLO::PreprocessBatch",
"Invalid dimensions for image " + std::to_string(i), __FILE__, __LINE__); "Invalid dimensions for image " + std::to_string(i), __FILE__, __LINE__);
@@ -907,23 +895,27 @@ namespace ANSCENTER {
const auto& outputDims = m_trtEngine->getOutputDims(); const auto& outputDims = m_trtEngine->getOutputDims();
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2; const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
const float scaleW = inputW / static_cast<float>(imgRGB.cols); const float scaleW = inputW / static_cast<float>(srcImg.cols);
const float scaleH = inputH / static_cast<float>(imgRGB.rows); const float scaleH = inputH / static_cast<float>(srcImg.rows);
outMetadata.ratios[i] = isClassification ? 1.f : 1.f / std::min(scaleW, scaleH); outMetadata.ratios[i] = isClassification ? 1.f : 1.f / std::min(scaleW, scaleH);
cv::cuda::GpuMat resized; cv::Mat cpuResized;
if (imgRGB.rows != inputH || imgRGB.cols != inputW) { if (srcImg.rows != inputH || srcImg.cols != inputW) {
if (isClassification) { if (isClassification) {
cv::cuda::resize(imgRGB, resized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR, stream); cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
} else { } else {
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputH, inputW); cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
} }
} } else {
else { cpuResized = srcImg;
resized = imgRGB;
} }
batchProcessed.push_back(std::move(resized)); cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
cv::cuda::GpuMat gpuResized;
gpuResized.upload(cpuRGB, stream);
batchProcessed.push_back(std::move(gpuResized));
} }
stream.waitForCompletion(); stream.waitForCompletion();
@@ -1804,10 +1796,10 @@ namespace ANSCENTER {
std::vector<Object> ANSRTYOLO::DetectObjects(const cv::Mat& inputImage, std::vector<Object> ANSRTYOLO::DetectObjects(const cv::Mat& inputImage,
const std::string& camera_id) { const std::string& camera_id) {
try { try {
// --- Debug timer helper (zero-cost when _debugFlag == false) --- // --- Debug timer helper ---
using Clock = std::chrono::steady_clock; using Clock = std::chrono::steady_clock;
const bool dbg = _debugFlag; const bool dbg = _debugFlag;
auto t0 = dbg ? Clock::now() : Clock::time_point{}; auto t0 = Clock::now(); // Always set — used by ANS_DBG timing output
auto tPrev = t0; auto tPrev = t0;
auto elapsed = [&]() -> double { auto elapsed = [&]() -> double {
auto now = Clock::now(); auto now = Clock::now();
@@ -2045,14 +2037,22 @@ namespace ANSCENTER {
} }
// --- 6. Total pipeline time --- // --- 6. Total pipeline time ---
if (dbg) { {
double msTotal = std::chrono::duration<double, std::milli>(Clock::now() - t0).count(); double msTotal = std::chrono::duration<double, std::milli>(Clock::now() - t0).count();
if (dbg) {
_logger.LogInfo("ANSRTYOLO::DetectObjects", _logger.LogInfo("ANSRTYOLO::DetectObjects",
"[DEBUG] " + camera_id + " | TOTAL=" + std::to_string(msTotal) + "[DEBUG] " + camera_id + " | TOTAL=" + std::to_string(msTotal) +
"ms (" + std::to_string(inputImage.cols) + "x" + std::to_string(inputImage.rows) + "ms (" + std::to_string(inputImage.cols) + "x" + std::to_string(inputImage.rows) +
") Results=" + std::to_string(results.size()), ") Results=" + std::to_string(results.size()),
__FILE__, __LINE__); __FILE__, __LINE__);
} }
// DebugView output — controlled by ANSCORE_DEBUGVIEW
double msPreproc = std::chrono::duration<double, std::milli>(_trtStart - t0).count();
ANS_DBG("YOLO_Timing", "cam=%s total=%.1fms preproc=%.1fms inf=%.1fms %dx%d det=%zu %s",
camera_id.c_str(), msTotal, msPreproc, _trtMs,
inputImage.cols, inputImage.rows, results.size(),
usedNV12 ? "NV12" : "BGR");
}
return results; return results;
} }
@@ -2101,7 +2101,7 @@ namespace ANSCENTER {
// --- Debug timer helper --- // --- Debug timer helper ---
using Clock = std::chrono::steady_clock; using Clock = std::chrono::steady_clock;
const bool dbg = _debugFlag; const bool dbg = _debugFlag;
auto t0 = dbg ? Clock::now() : Clock::time_point{}; auto t0 = Clock::now(); // Always set — used by ANS_DBG timing output
auto tPrev = t0; auto tPrev = t0;
auto elapsed = [&]() -> double { auto elapsed = [&]() -> double {
auto now = Clock::now(); auto now = Clock::now();
@@ -2350,9 +2350,10 @@ namespace ANSCENTER {
} }
} }
if (dbg) { {
double msPostprocess = elapsed(); double msPostprocess = dbg ? elapsed() : 0;
double msTotal = std::chrono::duration<double, std::milli>(Clock::now() - t0).count(); double msTotal = std::chrono::duration<double, std::milli>(Clock::now() - t0).count();
if (dbg) {
_logger.LogInfo("ANSRTYOLO::DetectObjectsBatch", _logger.LogInfo("ANSRTYOLO::DetectObjectsBatch",
"[DEBUG] " + camera_id + "[DEBUG] " + camera_id +
" batch=" + std::to_string(realCount) + " batch=" + std::to_string(realCount) +
@@ -2364,6 +2365,9 @@ namespace ANSCENTER {
"ms TOTAL=" + std::to_string(msTotal) + "ms", "ms TOTAL=" + std::to_string(msTotal) + "ms",
__FILE__, __LINE__); __FILE__, __LINE__);
} }
ANS_DBG("YOLO_Timing", "cam=%s batch=%d total=%.1fms preproc=%.1fms inf=%.1fms",
camera_id.c_str(), realCount, msTotal, msPreprocess, msInference);
}
return batchDetections; return batchDetections;
} }

View File

@@ -534,27 +534,15 @@ namespace ANSCENTER
const int inputH = inputDims[0].d[1]; const int inputH = inputDims[0].d[1];
const int inputW = inputDims[0].d[2]; const int inputW = inputDims[0].d[2];
// Upload the image to GPU memory // --- CPU preprocessing: resize + BGR->RGB before GPU upload ---
cv::cuda::Stream stream; // Create a custom stream cv::Mat srcImg = inputImage;
cv::cuda::GpuMat img; if (srcImg.channels() == 1) {
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
if (inputImage.channels() == 1) {
// Convert grayscale to 3-channel BGR before uploading
cv::Mat img3Channel;
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
img.upload(img3Channel, stream);
} }
else {
img.upload(inputImage, stream);
}
// Convert BGR to RGB
cv::cuda::GpuMat imgRGB;
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
// These parameters will be used in the post-processing stage // These parameters will be used in the post-processing stage
outMeta.imgHeight = imgRGB.rows; outMeta.imgHeight = srcImg.rows;
outMeta.imgWidth = imgRGB.cols; outMeta.imgWidth = srcImg.cols;
if (outMeta.imgHeight <= 0 || outMeta.imgWidth <= 0) { if (outMeta.imgHeight <= 0 || outMeta.imgWidth <= 0) {
_logger.LogFatal("TENSORRTCL::Preprocess", "Image height or width is zero", __FILE__, __LINE__); _logger.LogFatal("TENSORRTCL::Preprocess", "Image height or width is zero", __FILE__, __LINE__);
@@ -564,19 +552,26 @@ namespace ANSCENTER
if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) { if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
outMeta.ratio = 1.f; outMeta.ratio = 1.f;
cv::cuda::GpuMat resized = imgRGB; // Classification: direct CPU resize (no letterbox padding)
cv::Mat cpuResized;
// Classification: direct resize (no letterbox padding) — matches ANSONNXCL if (srcImg.rows != inputH || srcImg.cols != inputW) {
// Must use explicit stream to avoid conflict with CUDA Graph capture on null stream cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
if (resized.rows != inputDims[0].d[1] || resized.cols != inputDims[0].d[2]) { } else {
cv::cuda::resize(imgRGB, resized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR, stream); cpuResized = srcImg;
} }
// Wait for all GPU ops to complete before returning GpuMats // CPU BGR -> RGB
cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
// Upload small image to GPU
cv::cuda::Stream stream;
cv::cuda::GpuMat gpuResized;
gpuResized.upload(cpuRGB, stream);
stream.waitForCompletion(); stream.waitForCompletion();
// Convert to format expected by our inference engine // Convert to format expected by our inference engine
std::vector<cv::cuda::GpuMat> input{ std::move(resized) }; std::vector<cv::cuda::GpuMat> input{ std::move(gpuResized) };
std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) }; std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) };
return inputs; return inputs;
} }
@@ -811,25 +806,17 @@ namespace ANSCENTER
return {}; return {};
} }
// Upload to GPU // CPU preprocessing: resize + BGR->RGB before GPU upload
cv::cuda::GpuMat img; cv::Mat srcImg = inputImage;
if (inputImage.channels() == 1) { if (srcImg.channels() == 1) {
// Convert grayscale to BGR
cv::Mat img3Channel; cv::Mat img3Channel;
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR); cv::cvtColor(srcImg, img3Channel, cv::COLOR_GRAY2BGR);
img.upload(img3Channel, stream); srcImg = img3Channel;
} }
else {
img.upload(inputImage, stream);
}
// Convert BGR to RGB
cv::cuda::GpuMat imgRGB;
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
// Store original dimensions // Store original dimensions
int imgHeight = imgRGB.rows; int imgHeight = srcImg.rows;
int imgWidth = imgRGB.cols; int imgWidth = srcImg.cols;
if (imgHeight <= 0 || imgWidth <= 0) { if (imgHeight <= 0 || imgWidth <= 0) {
_logger.LogFatal("TENSORRTCL::PreprocessBatch", _logger.LogFatal("TENSORRTCL::PreprocessBatch",
@@ -841,26 +828,25 @@ namespace ANSCENTER
outMetadata.imgHeights.push_back(imgHeight); outMetadata.imgHeights.push_back(imgHeight);
outMetadata.imgWidths.push_back(imgWidth); outMetadata.imgWidths.push_back(imgWidth);
// Calculate resize ratio // Classification: ratio is always 1.0
float ratio = 1.f / std::min( outMetadata.ratios.push_back(1.f);
inputDims[0].d[2] / static_cast<float>(imgRGB.cols),
inputDims[0].d[1] / static_cast<float>(imgRGB.rows)
);
outMetadata.ratios.push_back(ratio);
// Resize maintaining aspect ratio with padding // Classification: direct CPU resize (no letterbox padding)
cv::cuda::GpuMat resized; cv::Mat cpuResized;
if (imgRGB.rows != inputDims[0].d[1] || imgRGB.cols != inputDims[0].d[2]) { if (srcImg.rows != inputH || srcImg.cols != inputW) {
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom( cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
imgRGB, inputDims[0].d[1], inputDims[0].d[2] } else {
); cpuResized = srcImg;
}
else {
resized = imgRGB;
} }
cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
cv::cuda::GpuMat gpuResized;
gpuResized.upload(cpuRGB, stream);
// Add to batch // Add to batch
batchedImages.push_back(std::move(resized)); batchedImages.push_back(std::move(gpuResized));
} }
// Wait for all GPU operations to complete // Wait for all GPU operations to complete

View File

@@ -508,41 +508,46 @@ namespace ANSCENTER
const auto& inputDims = m_trtEngine->getInputDims(); const auto& inputDims = m_trtEngine->getInputDims();
const int inputH = inputDims[0].d[1]; const int inputH = inputDims[0].d[1];
const int inputW = inputDims[0].d[2]; const int inputW = inputDims[0].d[2];
// Upload the image to GPU memory // --- CPU preprocessing: resize + BGR->RGB before GPU upload ---
cv::Mat srcImg = inputImage;
if (srcImg.channels() == 1) {
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
}
// Set image size parameters from ORIGINAL image
outMeta.imgHeight = srcImg.rows;
outMeta.imgWidth = srcImg.cols;
if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(srcImg.cols),
inputDims[0].d[1] / static_cast<float>(srcImg.rows));
const auto& outputDims = m_trtEngine->getOutputDims();
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
// CPU resize to model input size
cv::Mat cpuResized;
if (srcImg.rows != inputH || srcImg.cols != inputW) {
if (isClassification) {
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
} else {
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
}
} else {
cpuResized = srcImg;
}
// CPU BGR -> RGB
cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
// Upload small image to GPU
cv::cuda::Stream stream; cv::cuda::Stream stream;
cv::cuda::GpuMat img; cv::cuda::GpuMat gpuResized;
gpuResized.upload(cpuRGB, stream);
if (inputImage.channels() == 1) {
// Convert grayscale to 3-channel BGR before uploading
cv::Mat img3Channel;
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
img.upload(img3Channel, stream);
}
else {
img.upload(inputImage, stream);
}
// Convert to RGB
cv::cuda::GpuMat imgRGB;
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
stream.waitForCompletion(); stream.waitForCompletion();
// Set image size parameters
outMeta.imgHeight = imgRGB.rows;
outMeta.imgWidth = imgRGB.cols;
if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(imgRGB.cols),
inputDims[0].d[1] / static_cast<float>(imgRGB.rows));
cv::cuda::GpuMat resized = imgRGB;
// Resize to the model's expected input size while maintaining aspect ratio with padding
if (resized.rows != inputDims[0].d[1] || resized.cols != inputDims[0].d[2]) {
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputDims[0].d[1], inputDims[0].d[2]);
}
// Convert to format expected by our inference engine // Convert to format expected by our inference engine
std::vector<cv::cuda::GpuMat> input{ std::move(resized) }; std::vector<cv::cuda::GpuMat> input{ std::move(gpuResized) };
std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) }; std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) };
return inputs; return inputs;
} }
@@ -793,19 +798,13 @@ namespace ANSCENTER
"Empty input image at index " + std::to_string(i), __FILE__, __LINE__); "Empty input image at index " + std::to_string(i), __FILE__, __LINE__);
return {}; return {};
} }
cv::cuda::GpuMat img; // CPU preprocessing: resize + BGR->RGB before GPU upload
if (inputImage.channels() == 1) { cv::Mat srcImg = inputImage;
cv::Mat img3Channel; if (srcImg.channels() == 1) {
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR); cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
img.upload(img3Channel, stream);
} }
else { outMetadata.imgHeights[i] = srcImg.rows;
img.upload(inputImage, stream); outMetadata.imgWidths[i] = srcImg.cols;
}
cv::cuda::GpuMat imgRGB;
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
outMetadata.imgHeights[i] = imgRGB.rows;
outMetadata.imgWidths[i] = imgRGB.cols;
if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) { if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) {
_logger.LogFatal("ANSTENSORRTPOSE::PreprocessBatch", _logger.LogFatal("ANSTENSORRTPOSE::PreprocessBatch",
"Image " + std::to_string(i) + " has invalid dimensions (Width: " + "Image " + std::to_string(i) + " has invalid dimensions (Width: " +
@@ -813,13 +812,30 @@ namespace ANSCENTER
std::to_string(outMetadata.imgHeights[i]) + ")", __FILE__, __LINE__); std::to_string(outMetadata.imgHeights[i]) + ")", __FILE__, __LINE__);
return {}; return {};
} }
outMetadata.ratios[i] = 1.f / std::min(inputW / static_cast<float>(imgRGB.cols),
inputH / static_cast<float>(imgRGB.rows)); const auto& outputDims = m_trtEngine->getOutputDims();
cv::cuda::GpuMat resized = imgRGB; const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
if (resized.rows != inputH || resized.cols != inputW) {
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputH, inputW); outMetadata.ratios[i] = isClassification ? 1.f : 1.f / std::min(inputW / static_cast<float>(srcImg.cols),
inputH / static_cast<float>(srcImg.rows));
cv::Mat cpuResized;
if (srcImg.rows != inputH || srcImg.cols != inputW) {
if (isClassification) {
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
} else {
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
} }
batchProcessed.push_back(std::move(resized)); } else {
cpuResized = srcImg;
}
cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
cv::cuda::GpuMat gpuResized;
gpuResized.upload(cpuRGB, stream);
batchProcessed.push_back(std::move(gpuResized));
} }
stream.waitForCompletion(); stream.waitForCompletion();
std::vector<std::vector<cv::cuda::GpuMat>> inputs; std::vector<std::vector<cv::cuda::GpuMat>> inputs;

View File

@@ -561,41 +561,46 @@ namespace ANSCENTER
const auto& inputDims = m_trtEngine->getInputDims(); const auto& inputDims = m_trtEngine->getInputDims();
const int inputH = inputDims[0].d[1]; const int inputH = inputDims[0].d[1];
const int inputW = inputDims[0].d[2]; const int inputW = inputDims[0].d[2];
// Upload the image to GPU memory // --- CPU preprocessing: resize + BGR->RGB before GPU upload ---
cv::Mat srcImg = inputImage;
if (srcImg.channels() == 1) {
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
}
// Set image size parameters from ORIGINAL image
outMeta.imgHeight = srcImg.rows;
outMeta.imgWidth = srcImg.cols;
if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(srcImg.cols),
inputDims[0].d[1] / static_cast<float>(srcImg.rows));
const auto& outputDims = m_trtEngine->getOutputDims();
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
// CPU resize to model input size
cv::Mat cpuResized;
if (srcImg.rows != inputH || srcImg.cols != inputW) {
if (isClassification) {
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
} else {
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
}
} else {
cpuResized = srcImg;
}
// CPU BGR -> RGB
cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
// Upload small image to GPU
cv::cuda::Stream stream; cv::cuda::Stream stream;
cv::cuda::GpuMat img; cv::cuda::GpuMat gpuResized;
gpuResized.upload(cpuRGB, stream);
if (inputImage.channels() == 1) {
// Convert grayscale to 3-channel BGR before uploading
cv::Mat img3Channel;
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
img.upload(img3Channel, stream);
}
else {
img.upload(inputImage, stream);
}
// Convert to RGB
cv::cuda::GpuMat imgRGB;
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
stream.waitForCompletion(); stream.waitForCompletion();
// Set image size parameters
outMeta.imgHeight = imgRGB.rows;
outMeta.imgWidth = imgRGB.cols;
if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(imgRGB.cols),
inputDims[0].d[1] / static_cast<float>(imgRGB.rows));
cv::cuda::GpuMat resized = imgRGB;
// Resize to the model's expected input size while maintaining aspect ratio with padding
if (resized.rows != inputDims[0].d[1] || resized.cols != inputDims[0].d[2]) {
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputDims[0].d[1], inputDims[0].d[2]);
}
// Convert to format expected by our inference engine // Convert to format expected by our inference engine
std::vector<cv::cuda::GpuMat> input{ std::move(resized) }; std::vector<cv::cuda::GpuMat> input{ std::move(gpuResized) };
std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) }; std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) };
return inputs; return inputs;
} }
@@ -891,26 +896,15 @@ namespace ANSCENTER
return {}; return {};
} }
// Upload to GPU // CPU preprocessing: resize + BGR->RGB before GPU upload
cv::cuda::GpuMat img; cv::Mat srcImg = inputImage;
if (srcImg.channels() == 1) {
// Convert grayscale to BGR if needed cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
if (inputImage.channels() == 1) {
cv::Mat img3Channel;
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
img.upload(img3Channel, stream);
} }
else {
img.upload(inputImage, stream);
}
// Convert to RGB
cv::cuda::GpuMat imgRGB;
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
// Store original dimensions // Store original dimensions
outMetadata.imgHeights[i] = imgRGB.rows; outMetadata.imgHeights[i] = srcImg.rows;
outMetadata.imgWidths[i] = imgRGB.cols; outMetadata.imgWidths[i] = srcImg.cols;
if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) { if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) {
_logger.LogFatal("TENSORRTSEG::PreprocessBatch", _logger.LogFatal("TENSORRTSEG::PreprocessBatch",
@@ -921,17 +915,31 @@ namespace ANSCENTER
return {}; return {};
} }
// Calculate ratio for this image const auto& outputDims = m_trtEngine->getOutputDims();
outMetadata.ratios[i] = 1.f / std::min(inputW / static_cast<float>(imgRGB.cols), const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
inputH / static_cast<float>(imgRGB.rows));
// Resize with padding // Calculate ratio for this image
cv::cuda::GpuMat resized = imgRGB; outMetadata.ratios[i] = isClassification ? 1.f : 1.f / std::min(inputW / static_cast<float>(srcImg.cols),
if (resized.rows != inputH || resized.cols != inputW) { inputH / static_cast<float>(srcImg.rows));
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputH, inputW);
// CPU resize to model input size
cv::Mat cpuResized;
if (srcImg.rows != inputH || srcImg.cols != inputW) {
if (isClassification) {
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
} else {
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
}
} else {
cpuResized = srcImg;
} }
batchProcessed.push_back(std::move(resized)); cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
cv::cuda::GpuMat gpuResized;
gpuResized.upload(cpuRGB, stream);
batchProcessed.push_back(std::move(gpuResized));
} }
stream.waitForCompletion(); stream.waitForCompletion();

View File

@@ -587,41 +587,46 @@ namespace ANSCENTER
const auto& inputDims = m_trtEngine->getInputDims(); const auto& inputDims = m_trtEngine->getInputDims();
const int inputH = inputDims[0].d[1]; const int inputH = inputDims[0].d[1];
const int inputW = inputDims[0].d[2]; const int inputW = inputDims[0].d[2];
// Upload the image to GPU memory // --- CPU preprocessing: resize + BGR->RGB before GPU upload ---
cv::Mat srcImg = inputImage;
if (srcImg.channels() == 1) {
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
}
// Set image size parameters from ORIGINAL image (before resize)
outMeta.imgHeight = static_cast<float>(srcImg.rows);
outMeta.imgWidth = static_cast<float>(srcImg.cols);
if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(srcImg.cols),
inputDims[0].d[1] / static_cast<float>(srcImg.rows));
const auto& outputDims = m_trtEngine->getOutputDims();
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
// CPU resize to model input size
cv::Mat cpuResized;
if (srcImg.rows != inputH || srcImg.cols != inputW) {
if (isClassification) {
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
} else {
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
}
} else {
cpuResized = srcImg;
}
// CPU BGR -> RGB
cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
// Upload small image to GPU
cv::cuda::Stream stream; cv::cuda::Stream stream;
cv::cuda::GpuMat img; cv::cuda::GpuMat gpuResized;
gpuResized.upload(cpuRGB, stream);
if (inputImage.channels() == 1) {
// Convert grayscale to 3-channel BGR before uploading
cv::Mat img3Channel;
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
img.upload(img3Channel, stream);
}
else {
img.upload(inputImage, stream);
}
// Convert to RGB
cv::cuda::GpuMat imgRGB;
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
stream.waitForCompletion(); stream.waitForCompletion();
// Set image size parameters into per-call metadata (not shared members)
outMeta.imgHeight = static_cast<float>(imgRGB.rows);
outMeta.imgWidth = static_cast<float>(imgRGB.cols);
if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(imgRGB.cols),
inputDims[0].d[1] / static_cast<float>(imgRGB.rows));
cv::cuda::GpuMat resized = imgRGB;
// Resize to the model's expected input size while maintaining aspect ratio with padding
if (resized.rows != inputDims[0].d[1] || resized.cols != inputDims[0].d[2]) {
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputDims[0].d[1], inputDims[0].d[2]);
}
// Convert to format expected by our inference engine // Convert to format expected by our inference engine
std::vector<cv::cuda::GpuMat> input{ std::move(resized) }; std::vector<cv::cuda::GpuMat> input{ std::move(gpuResized) };
std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) }; std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) };
return inputs; return inputs;
} }
@@ -1174,29 +1179,20 @@ namespace ANSCENTER
return {}; return {};
} }
cv::cuda::GpuMat img; // CPU preprocessing: resize + BGR->RGB before GPU upload
cv::Mat srcImg = inputImage;
if (inputImage.channels() == 1) { if (srcImg.channels() == 1) {
cv::Mat img3Channel; cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR); } else if (srcImg.channels() != 3) {
img.upload(img3Channel, stream);
}
else if (inputImage.channels() == 3) {
img.upload(inputImage, stream);
}
else {
_logger.LogError("TENSORRTOD::PreprocessBatch", _logger.LogError("TENSORRTOD::PreprocessBatch",
"Unsupported channel count at index " + std::to_string(i), "Unsupported channel count at index " + std::to_string(i),
__FILE__, __LINE__); __FILE__, __LINE__);
return {}; return {};
} }
cv::cuda::GpuMat imgRGB; // Store in output metadata from ORIGINAL image
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream); outMetadata.imgHeights[i] = srcImg.rows;
outMetadata.imgWidths[i] = srcImg.cols;
// Store in output metadata
outMetadata.imgHeights[i] = imgRGB.rows;
outMetadata.imgWidths[i] = imgRGB.cols;
if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) { if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) {
_logger.LogError("TENSORRTOD::PreprocessBatch", _logger.LogError("TENSORRTOD::PreprocessBatch",
@@ -1205,20 +1201,30 @@ namespace ANSCENTER
return {}; return {};
} }
const float scaleW = inputW / static_cast<float>(imgRGB.cols); const auto& outputDims = m_trtEngine->getOutputDims();
const float scaleH = inputH / static_cast<float>(imgRGB.rows); const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
outMetadata.ratios[i] = 1.f / std::min(scaleW, scaleH);
cv::cuda::GpuMat resized; const float scaleW = inputW / static_cast<float>(srcImg.cols);
if (imgRGB.rows != inputH || imgRGB.cols != inputW) { const float scaleH = inputH / static_cast<float>(srcImg.rows);
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom( outMetadata.ratios[i] = isClassification ? 1.f : 1.f / std::min(scaleW, scaleH);
imgRGB, inputH, inputW);
cv::Mat cpuResized;
if (srcImg.rows != inputH || srcImg.cols != inputW) {
if (isClassification) {
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
} else {
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
} }
else { } else {
resized = imgRGB; cpuResized = srcImg;
} }
batchProcessed.push_back(std::move(resized)); cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
cv::cuda::GpuMat gpuResized;
gpuResized.upload(cpuRGB, stream);
batchProcessed.push_back(std::move(gpuResized));
} }
stream.waitForCompletion(); stream.waitForCompletion();

View File

@@ -519,46 +519,46 @@ namespace ANSCENTER
const int inputH = inputDims[0].d[1]; const int inputH = inputDims[0].d[1];
const int inputW = inputDims[0].d[2]; const int inputW = inputDims[0].d[2];
// Upload input image to GPU // --- CPU preprocessing: resize + BGR->RGB before GPU upload ---
cv::cuda::Stream stream; cv::Mat srcImg = inputImage;
cv::cuda::GpuMat img; if (srcImg.channels() == 1) {
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
if (inputImage.empty()) {
_logger.LogFatal("ANSYOLOV10RTOD::Preprocess", "Empty input image", __FILE__, __LINE__);
return {};
} }
// Convert grayscale to BGR if needed outMeta.imgHeight = srcImg.rows;
if (inputImage.channels() == 1) { outMeta.imgWidth = srcImg.cols;
cv::Mat img3Channel;
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
img.upload(img3Channel, stream);
}
else {
img.upload(inputImage, stream);
}
// Convert to RGB
cv::cuda::GpuMat imgRGB;
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
stream.waitForCompletion();
outMeta.imgHeight = imgRGB.rows;
outMeta.imgWidth = imgRGB.cols;
if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) { if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(imgRGB.cols), outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(srcImg.cols),
inputDims[0].d[1] / static_cast<float>(imgRGB.rows)); inputDims[0].d[1] / static_cast<float>(srcImg.rows));
cv::cuda::GpuMat resized = imgRGB; const auto& outputDims = m_trtEngine->getOutputDims();
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
// Resize to the model's expected input size while maintaining aspect ratio with padding // CPU resize to model input size
if (resized.rows != inputDims[0].d[1] || resized.cols != inputDims[0].d[2]) { cv::Mat cpuResized;
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputDims[0].d[1], inputDims[0].d[2]); if (srcImg.rows != inputH || srcImg.cols != inputW) {
if (isClassification) {
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
} else {
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
}
} else {
cpuResized = srcImg;
} }
// CPU BGR -> RGB
cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
// Upload small image to GPU
cv::cuda::Stream stream;
cv::cuda::GpuMat gpuResized;
gpuResized.upload(cpuRGB, stream);
stream.waitForCompletion();
// Convert to format expected by our inference engine // Convert to format expected by our inference engine
std::vector<cv::cuda::GpuMat> input{ std::move(resized) }; std::vector<cv::cuda::GpuMat> input{ std::move(gpuResized) };
std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) }; std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) };
return inputs; return inputs;
} }
@@ -1058,26 +1058,15 @@ namespace ANSCENTER
return {}; return {};
} }
// Upload to GPU // CPU preprocessing: resize + BGR->RGB before GPU upload
cv::cuda::GpuMat img; cv::Mat srcImg = inputImage;
if (srcImg.channels() == 1) {
// Convert grayscale to BGR if needed cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
if (inputImage.channels() == 1) {
cv::Mat img3Channel;
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
img.upload(img3Channel, stream);
} }
else {
img.upload(inputImage, stream);
}
// Convert to RGB
cv::cuda::GpuMat imgRGB;
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
// Store original dimensions // Store original dimensions
outMetadata.imgHeights[i] = imgRGB.rows; outMetadata.imgHeights[i] = srcImg.rows;
outMetadata.imgWidths[i] = imgRGB.cols; outMetadata.imgWidths[i] = srcImg.cols;
if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) { if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) {
_logger.LogFatal("ANSYOLOV10RTOD::PreprocessBatch", _logger.LogFatal("ANSYOLOV10RTOD::PreprocessBatch",
@@ -1088,17 +1077,31 @@ namespace ANSCENTER
return {}; return {};
} }
// Calculate ratio for this image const auto& outputDims = m_trtEngine->getOutputDims();
outMetadata.ratios[i] = 1.f / std::min(inputW / static_cast<float>(imgRGB.cols), const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
inputH / static_cast<float>(imgRGB.rows));
// Resize with padding // Calculate ratio for this image
cv::cuda::GpuMat resized = imgRGB; outMetadata.ratios[i] = isClassification ? 1.f : 1.f / std::min(inputW / static_cast<float>(srcImg.cols),
if (resized.rows != inputH || resized.cols != inputW) { inputH / static_cast<float>(srcImg.rows));
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputH, inputW);
// CPU resize to model input size
cv::Mat cpuResized;
if (srcImg.rows != inputH || srcImg.cols != inputW) {
if (isClassification) {
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
} else {
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
}
} else {
cpuResized = srcImg;
} }
batchProcessed.push_back(std::move(resized)); cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
cv::cuda::GpuMat gpuResized;
gpuResized.upload(cpuRGB, stream);
batchProcessed.push_back(std::move(gpuResized));
} }
stream.waitForCompletion(); stream.waitForCompletion();

View File

@@ -484,39 +484,47 @@ namespace ANSCENTER
} }
const auto& inputDims = m_trtEngine->getInputDims(); const auto& inputDims = m_trtEngine->getInputDims();
// --- CPU preprocessing: resize + BGR->RGB before GPU upload ---
cv::Mat srcImg = inputImage;
if (srcImg.channels() == 1) {
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
}
outMeta.imgHeight = srcImg.rows;
outMeta.imgWidth = srcImg.cols;
if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(srcImg.cols),
inputDims[0].d[1] / static_cast<float>(srcImg.rows));
const auto& outputDims = m_trtEngine->getOutputDims();
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
// CPU resize to model input size
cv::Mat cpuResized;
const int inputH = inputDims[0].d[1];
const int inputW = inputDims[0].d[2];
if (srcImg.rows != inputH || srcImg.cols != inputW) {
if (isClassification) {
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
} else {
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
}
} else {
cpuResized = srcImg;
}
// CPU BGR -> RGB
cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
// Upload small image to GPU
cv::cuda::Stream stream; cv::cuda::Stream stream;
cv::cuda::GpuMat img; cv::cuda::GpuMat gpuResized;
gpuResized.upload(cpuRGB, stream);
// Upload to GPU
if (inputImage.channels() == 1) {
cv::Mat img3Channel;
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
img.upload(img3Channel, stream);
}
else {
img.upload(inputImage, stream);
}
// Convert BGR to RGB
cv::cuda::GpuMat imgRGB;
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
stream.waitForCompletion(); stream.waitForCompletion();
outMeta.imgHeight = imgRGB.rows;
outMeta.imgWidth = imgRGB.cols;
if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(imgRGB.cols),
inputDims[0].d[1] / static_cast<float>(imgRGB.rows));
cv::cuda::GpuMat resized = imgRGB;
// Resize to the model's expected input size while maintaining aspect ratio with padding
if (resized.rows != inputDims[0].d[1] || resized.cols != inputDims[0].d[2]) {
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputDims[0].d[1], inputDims[0].d[2]);
}
// Convert to format expected by our inference engine // Convert to format expected by our inference engine
std::vector<cv::cuda::GpuMat> input{ std::move(resized) }; std::vector<cv::cuda::GpuMat> input{ std::move(gpuResized) };
std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) }; std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) };
return inputs; return inputs;
} }
@@ -744,26 +752,15 @@ namespace ANSCENTER
return {}; return {};
} }
// Upload to GPU // CPU preprocessing: resize + BGR->RGB before GPU upload
cv::cuda::GpuMat img; cv::Mat srcImg = inputImage;
if (srcImg.channels() == 1) {
// Convert grayscale to BGR if needed cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
if (inputImage.channels() == 1) {
cv::Mat img3Channel;
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
img.upload(img3Channel, stream);
} }
else {
img.upload(inputImage, stream);
}
// Convert BGR to RGB
cv::cuda::GpuMat imgRGB;
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
// Store original dimensions // Store original dimensions
outMetadata.imgHeights[i] = imgRGB.rows; outMetadata.imgHeights[i] = srcImg.rows;
outMetadata.imgWidths[i] = imgRGB.cols; outMetadata.imgWidths[i] = srcImg.cols;
if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) { if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) {
_logger.LogFatal("ANSYOLOV12RTOD::PreprocessBatch", _logger.LogFatal("ANSYOLOV12RTOD::PreprocessBatch",
@@ -774,17 +771,31 @@ namespace ANSCENTER
return {}; return {};
} }
// Calculate ratio for this image const auto& outputDims = m_trtEngine->getOutputDims();
outMetadata.ratios[i] = 1.f / std::min(inputW / static_cast<float>(imgRGB.cols), const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
inputH / static_cast<float>(imgRGB.rows));
// Resize with padding // Calculate ratio for this image
cv::cuda::GpuMat resized = imgRGB; outMetadata.ratios[i] = isClassification ? 1.f : 1.f / std::min(inputW / static_cast<float>(srcImg.cols),
if (resized.rows != inputH || resized.cols != inputW) { inputH / static_cast<float>(srcImg.rows));
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputH, inputW);
// CPU resize to model input size
cv::Mat cpuResized;
if (srcImg.rows != inputH || srcImg.cols != inputW) {
if (isClassification) {
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
} else {
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
}
} else {
cpuResized = srcImg;
} }
batchProcessed.push_back(std::move(resized)); cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
cv::cuda::GpuMat gpuResized;
gpuResized.upload(cpuRGB, stream);
batchProcessed.push_back(std::move(gpuResized));
} }
stream.waitForCompletion(); stream.waitForCompletion();

View File

@@ -665,38 +665,37 @@ namespace ANSCENTER {
} }
if (!usedNV12) { if (!usedNV12) {
// Standard BGR upload + resize + center-pad path // CPU center-padded letterbox + BGR->RGB, then upload small image
cv::cuda::Stream stream; cv::Mat srcImg;
cv::cuda::GpuMat d_bgr;
if (input.channels() == 1) { if (input.channels() == 1) {
cv::Mat img3Channel; cv::cvtColor(input, srcImg, cv::COLOR_GRAY2BGR);
cv::cvtColor(input, img3Channel, cv::COLOR_GRAY2BGR); } else if (input.channels() == 3) {
d_bgr.upload(img3Channel, stream); srcImg = input;
} } else {
else if (input.channels() == 3) {
d_bgr.upload(input, stream);
}
else {
this->_logger.LogError("ANSSCRFDFD::Detect", "Unsupported channel count", __FILE__, __LINE__); this->_logger.LogError("ANSSCRFDFD::Detect", "Unsupported channel count", __FILE__, __LINE__);
return {}; return {};
} }
cv::cuda::GpuMat d_rgb; // CPU resize to unpadded size
cv::cuda::GpuMat d_resized; cv::Mat cpuResized;
if (srcImg.rows != new_unpad_h || srcImg.cols != new_unpad_w) {
cv::resize(srcImg, cpuResized, cv::Size(new_unpad_w, new_unpad_h), 0, 0, cv::INTER_LINEAR);
} else {
cpuResized = srcImg;
}
// CPU center-pad to net_w x net_h
cv::Mat cpuPadded(net_h, net_w, CV_8UC3, cv::Scalar(0, 0, 0));
cpuResized.copyTo(cpuPadded(cv::Rect(dw, dh, new_unpad_w, new_unpad_h)));
// CPU BGR -> RGB
cv::Mat cpuRGB;
cv::cvtColor(cpuPadded, cpuRGB, cv::COLOR_BGR2RGB);
// Upload small padded image to GPU
cv::cuda::Stream stream;
cv::cuda::GpuMat d_padded; cv::cuda::GpuMat d_padded;
d_padded.upload(cpuRGB, stream);
cv::cuda::cvtColor(d_bgr, d_rgb, cv::COLOR_BGR2RGB, 0, stream);
cv::cuda::resize(d_rgb, d_resized, cv::Size(new_unpad_w, new_unpad_h), 0, 0, cv::INTER_LINEAR, stream);
d_padded.create(net_h, net_w, d_resized.type());
d_padded.setTo(cv::Scalar(0, 0, 0), stream);
cv::Rect roi(dw, dh, new_unpad_w, new_unpad_h > 0 ? new_unpad_h : 0);
roi.width = new_unpad_w;
roi.height = new_unpad_h;
d_resized.copyTo(d_padded(roi), stream);
stream.waitForCompletion(); stream.waitForCompletion();
std::vector<cv::cuda::GpuMat> inputVec; std::vector<cv::cuda::GpuMat> inputVec;

View File

@@ -173,6 +173,8 @@ public:
// to the original reference frame. // to the original reference frame.
static cv::cuda::GpuMat resizeKeepAspectRatioPadRightBottom(const cv::cuda::GpuMat &input, size_t height, size_t width, static cv::cuda::GpuMat resizeKeepAspectRatioPadRightBottom(const cv::cuda::GpuMat &input, size_t height, size_t width,
const cv::Scalar &bgcolor = cv::Scalar(0, 0, 0)); const cv::Scalar &bgcolor = cv::Scalar(0, 0, 0));
static cv::Mat cpuResizeKeepAspectRatioPadRightBottom(const cv::Mat &input, size_t height, size_t width,
const cv::Scalar &bgcolor = cv::Scalar(114, 114, 114));
[[nodiscard]] const std::vector<nvinfer1::Dims3> &getInputDims() const override { return m_inputDims; }; [[nodiscard]] const std::vector<nvinfer1::Dims3> &getInputDims() const override { return m_inputDims; };
[[nodiscard]] const std::vector<nvinfer1::Dims> &getOutputDims() const override { return m_outputDims; }; [[nodiscard]] const std::vector<nvinfer1::Dims> &getOutputDims() const override { return m_outputDims; };