Use CPU resize before upload to GPU to remove PCIe bottleneck
This commit is contained in:
@@ -462,50 +462,46 @@ namespace ANSCENTER {
|
||||
// Early-out if CUDA context is dead (sticky error from CUVID crash etc.)
|
||||
if (!m_nv12Helper.isCudaContextHealthy(_logger, "ANSRTYOLO")) return {};
|
||||
|
||||
cv::cuda::Stream stream;
|
||||
cv::cuda::GpuMat gpuImg;
|
||||
|
||||
// Resolve source Mat (handle grayscale → BGR on CPU first)
|
||||
if (inputImage.channels() == 1) {
|
||||
cv::Mat img3Channel;
|
||||
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
|
||||
gpuImg.upload(img3Channel, stream);
|
||||
} else {
|
||||
gpuImg.upload(inputImage, stream);
|
||||
// --- CPU preprocessing: resize + BGR→RGB before GPU upload ---
|
||||
// Reduces PCIe transfer from 25 MB (4K BGR) to 1.2 MB (640×640 RGB).
|
||||
// With 12 AI tasks uploading concurrently, this eliminates the WDDM
|
||||
// SRW lock convoy that causes 400-580ms preprocess spikes.
|
||||
cv::Mat srcImg = inputImage;
|
||||
if (srcImg.channels() == 1) {
|
||||
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
|
||||
// GPU: BGR → RGB
|
||||
cv::cuda::GpuMat gpuRGB;
|
||||
cv::cuda::cvtColor(gpuImg, gpuRGB, cv::COLOR_BGR2RGB, 0, stream);
|
||||
|
||||
outMeta.imgHeight = static_cast<float>(gpuRGB.rows);
|
||||
outMeta.imgWidth = static_cast<float>(gpuRGB.cols);
|
||||
outMeta.imgHeight = static_cast<float>(srcImg.rows);
|
||||
outMeta.imgWidth = static_cast<float>(srcImg.cols);
|
||||
|
||||
if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
|
||||
outMeta.ratio = 1.f / std::min(
|
||||
inputDims[0].d[2] / static_cast<float>(gpuRGB.cols),
|
||||
inputDims[0].d[1] / static_cast<float>(gpuRGB.rows));
|
||||
inputDims[0].d[2] / static_cast<float>(srcImg.cols),
|
||||
inputDims[0].d[1] / static_cast<float>(srcImg.rows));
|
||||
|
||||
// Check if model is classification (output ndims <= 2)
|
||||
const auto& outputDims = m_trtEngine->getOutputDims();
|
||||
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
|
||||
|
||||
cv::cuda::GpuMat gpuResized;
|
||||
if (gpuRGB.rows != inputH || gpuRGB.cols != inputW) {
|
||||
// CPU resize to model input size
|
||||
cv::Mat cpuResized;
|
||||
if (srcImg.rows != inputH || srcImg.cols != inputW) {
|
||||
if (isClassification) {
|
||||
// Classification: direct resize (no letterbox padding)
|
||||
cv::cuda::resize(gpuRGB, gpuResized, cv::Size(inputW, inputH),
|
||||
0, 0, cv::INTER_LINEAR, stream);
|
||||
}
|
||||
else {
|
||||
// Detection/Seg/Pose/OBB: letterbox resize + right-bottom pad (on GPU)
|
||||
gpuResized = Engine<float>::resizeKeepAspectRatioPadRightBottom(
|
||||
gpuRGB, inputH, inputW);
|
||||
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
|
||||
} else {
|
||||
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
|
||||
}
|
||||
} else {
|
||||
gpuResized = gpuRGB;
|
||||
cpuResized = srcImg;
|
||||
}
|
||||
|
||||
// CPU BGR → RGB
|
||||
cv::Mat cpuRGB;
|
||||
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
|
||||
|
||||
// Upload small image to GPU (1.2 MB instead of 25 MB for 4K)
|
||||
cv::cuda::Stream stream;
|
||||
cv::cuda::GpuMat gpuResized;
|
||||
gpuResized.upload(cpuRGB, stream);
|
||||
stream.waitForCompletion();
|
||||
|
||||
std::vector<cv::cuda::GpuMat> input{ std::move(gpuResized) };
|
||||
@@ -878,26 +874,18 @@ namespace ANSCENTER {
|
||||
"Empty input image at index " + std::to_string(i), __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
cv::cuda::GpuMat img;
|
||||
if (inputImage.channels() == 1) {
|
||||
cv::Mat img3Channel;
|
||||
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
|
||||
img.upload(img3Channel, stream);
|
||||
}
|
||||
else if (inputImage.channels() == 3) {
|
||||
img.upload(inputImage, stream);
|
||||
}
|
||||
else {
|
||||
// CPU preprocessing: resize + BGR→RGB before GPU upload
|
||||
cv::Mat srcImg = inputImage;
|
||||
if (srcImg.channels() == 1) {
|
||||
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
|
||||
} else if (srcImg.channels() != 3) {
|
||||
_logger.LogError("ANSRTYOLO::PreprocessBatch",
|
||||
"Unsupported channel count at index " + std::to_string(i), __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
cv::cuda::GpuMat imgRGB;
|
||||
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
|
||||
|
||||
outMetadata.imgHeights[i] = imgRGB.rows;
|
||||
outMetadata.imgWidths[i] = imgRGB.cols;
|
||||
outMetadata.imgHeights[i] = srcImg.rows;
|
||||
outMetadata.imgWidths[i] = srcImg.cols;
|
||||
if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) {
|
||||
_logger.LogError("ANSRTYOLO::PreprocessBatch",
|
||||
"Invalid dimensions for image " + std::to_string(i), __FILE__, __LINE__);
|
||||
@@ -907,23 +895,27 @@ namespace ANSCENTER {
|
||||
const auto& outputDims = m_trtEngine->getOutputDims();
|
||||
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
|
||||
|
||||
const float scaleW = inputW / static_cast<float>(imgRGB.cols);
|
||||
const float scaleH = inputH / static_cast<float>(imgRGB.rows);
|
||||
const float scaleW = inputW / static_cast<float>(srcImg.cols);
|
||||
const float scaleH = inputH / static_cast<float>(srcImg.rows);
|
||||
outMetadata.ratios[i] = isClassification ? 1.f : 1.f / std::min(scaleW, scaleH);
|
||||
|
||||
cv::cuda::GpuMat resized;
|
||||
if (imgRGB.rows != inputH || imgRGB.cols != inputW) {
|
||||
cv::Mat cpuResized;
|
||||
if (srcImg.rows != inputH || srcImg.cols != inputW) {
|
||||
if (isClassification) {
|
||||
cv::cuda::resize(imgRGB, resized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR, stream);
|
||||
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
|
||||
} else {
|
||||
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputH, inputW);
|
||||
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
|
||||
}
|
||||
}
|
||||
else {
|
||||
resized = imgRGB;
|
||||
} else {
|
||||
cpuResized = srcImg;
|
||||
}
|
||||
|
||||
batchProcessed.push_back(std::move(resized));
|
||||
cv::Mat cpuRGB;
|
||||
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
|
||||
|
||||
cv::cuda::GpuMat gpuResized;
|
||||
gpuResized.upload(cpuRGB, stream);
|
||||
batchProcessed.push_back(std::move(gpuResized));
|
||||
}
|
||||
stream.waitForCompletion();
|
||||
|
||||
@@ -1804,10 +1796,10 @@ namespace ANSCENTER {
|
||||
std::vector<Object> ANSRTYOLO::DetectObjects(const cv::Mat& inputImage,
|
||||
const std::string& camera_id) {
|
||||
try {
|
||||
// --- Debug timer helper (zero-cost when _debugFlag == false) ---
|
||||
// --- Debug timer helper ---
|
||||
using Clock = std::chrono::steady_clock;
|
||||
const bool dbg = _debugFlag;
|
||||
auto t0 = dbg ? Clock::now() : Clock::time_point{};
|
||||
auto t0 = Clock::now(); // Always set — used by ANS_DBG timing output
|
||||
auto tPrev = t0;
|
||||
auto elapsed = [&]() -> double {
|
||||
auto now = Clock::now();
|
||||
@@ -2045,13 +2037,21 @@ namespace ANSCENTER {
|
||||
}
|
||||
|
||||
// --- 6. Total pipeline time ---
|
||||
if (dbg) {
|
||||
{
|
||||
double msTotal = std::chrono::duration<double, std::milli>(Clock::now() - t0).count();
|
||||
_logger.LogInfo("ANSRTYOLO::DetectObjects",
|
||||
"[DEBUG] " + camera_id + " | TOTAL=" + std::to_string(msTotal) +
|
||||
"ms (" + std::to_string(inputImage.cols) + "x" + std::to_string(inputImage.rows) +
|
||||
") Results=" + std::to_string(results.size()),
|
||||
__FILE__, __LINE__);
|
||||
if (dbg) {
|
||||
_logger.LogInfo("ANSRTYOLO::DetectObjects",
|
||||
"[DEBUG] " + camera_id + " | TOTAL=" + std::to_string(msTotal) +
|
||||
"ms (" + std::to_string(inputImage.cols) + "x" + std::to_string(inputImage.rows) +
|
||||
") Results=" + std::to_string(results.size()),
|
||||
__FILE__, __LINE__);
|
||||
}
|
||||
// DebugView output — controlled by ANSCORE_DEBUGVIEW
|
||||
double msPreproc = std::chrono::duration<double, std::milli>(_trtStart - t0).count();
|
||||
ANS_DBG("YOLO_Timing", "cam=%s total=%.1fms preproc=%.1fms inf=%.1fms %dx%d det=%zu %s",
|
||||
camera_id.c_str(), msTotal, msPreproc, _trtMs,
|
||||
inputImage.cols, inputImage.rows, results.size(),
|
||||
usedNV12 ? "NV12" : "BGR");
|
||||
}
|
||||
|
||||
return results;
|
||||
@@ -2101,7 +2101,7 @@ namespace ANSCENTER {
|
||||
// --- Debug timer helper ---
|
||||
using Clock = std::chrono::steady_clock;
|
||||
const bool dbg = _debugFlag;
|
||||
auto t0 = dbg ? Clock::now() : Clock::time_point{};
|
||||
auto t0 = Clock::now(); // Always set — used by ANS_DBG timing output
|
||||
auto tPrev = t0;
|
||||
auto elapsed = [&]() -> double {
|
||||
auto now = Clock::now();
|
||||
@@ -2350,19 +2350,23 @@ namespace ANSCENTER {
|
||||
}
|
||||
}
|
||||
|
||||
if (dbg) {
|
||||
double msPostprocess = elapsed();
|
||||
{
|
||||
double msPostprocess = dbg ? elapsed() : 0;
|
||||
double msTotal = std::chrono::duration<double, std::milli>(Clock::now() - t0).count();
|
||||
_logger.LogInfo("ANSRTYOLO::DetectObjectsBatch",
|
||||
"[DEBUG] " + camera_id +
|
||||
" batch=" + std::to_string(realCount) +
|
||||
" | SetDev=" + std::to_string(msSetDevice) +
|
||||
"ms Pad=" + std::to_string(msPad) +
|
||||
"ms Preproc=" + std::to_string(msPreprocess) +
|
||||
"ms Inf=" + std::to_string(msInference) +
|
||||
"ms Postproc=" + std::to_string(msPostprocess) +
|
||||
"ms TOTAL=" + std::to_string(msTotal) + "ms",
|
||||
__FILE__, __LINE__);
|
||||
if (dbg) {
|
||||
_logger.LogInfo("ANSRTYOLO::DetectObjectsBatch",
|
||||
"[DEBUG] " + camera_id +
|
||||
" batch=" + std::to_string(realCount) +
|
||||
" | SetDev=" + std::to_string(msSetDevice) +
|
||||
"ms Pad=" + std::to_string(msPad) +
|
||||
"ms Preproc=" + std::to_string(msPreprocess) +
|
||||
"ms Inf=" + std::to_string(msInference) +
|
||||
"ms Postproc=" + std::to_string(msPostprocess) +
|
||||
"ms TOTAL=" + std::to_string(msTotal) + "ms",
|
||||
__FILE__, __LINE__);
|
||||
}
|
||||
ANS_DBG("YOLO_Timing", "cam=%s batch=%d total=%.1fms preproc=%.1fms inf=%.1fms",
|
||||
camera_id.c_str(), realCount, msTotal, msPreprocess, msInference);
|
||||
}
|
||||
|
||||
return batchDetections;
|
||||
|
||||
@@ -534,27 +534,15 @@ namespace ANSCENTER
|
||||
const int inputH = inputDims[0].d[1];
|
||||
const int inputW = inputDims[0].d[2];
|
||||
|
||||
// Upload the image to GPU memory
|
||||
cv::cuda::Stream stream; // Create a custom stream
|
||||
cv::cuda::GpuMat img;
|
||||
|
||||
if (inputImage.channels() == 1) {
|
||||
// Convert grayscale to 3-channel BGR before uploading
|
||||
cv::Mat img3Channel;
|
||||
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
|
||||
img.upload(img3Channel, stream);
|
||||
// --- CPU preprocessing: resize + BGR->RGB before GPU upload ---
|
||||
cv::Mat srcImg = inputImage;
|
||||
if (srcImg.channels() == 1) {
|
||||
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
else {
|
||||
img.upload(inputImage, stream);
|
||||
}
|
||||
|
||||
// Convert BGR to RGB
|
||||
cv::cuda::GpuMat imgRGB;
|
||||
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
|
||||
|
||||
// These parameters will be used in the post-processing stage
|
||||
outMeta.imgHeight = imgRGB.rows;
|
||||
outMeta.imgWidth = imgRGB.cols;
|
||||
outMeta.imgHeight = srcImg.rows;
|
||||
outMeta.imgWidth = srcImg.cols;
|
||||
|
||||
if (outMeta.imgHeight <= 0 || outMeta.imgWidth <= 0) {
|
||||
_logger.LogFatal("TENSORRTCL::Preprocess", "Image height or width is zero", __FILE__, __LINE__);
|
||||
@@ -564,19 +552,26 @@ namespace ANSCENTER
|
||||
if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
|
||||
outMeta.ratio = 1.f;
|
||||
|
||||
cv::cuda::GpuMat resized = imgRGB;
|
||||
|
||||
// Classification: direct resize (no letterbox padding) — matches ANSONNXCL
|
||||
// Must use explicit stream to avoid conflict with CUDA Graph capture on null stream
|
||||
if (resized.rows != inputDims[0].d[1] || resized.cols != inputDims[0].d[2]) {
|
||||
cv::cuda::resize(imgRGB, resized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR, stream);
|
||||
// Classification: direct CPU resize (no letterbox padding)
|
||||
cv::Mat cpuResized;
|
||||
if (srcImg.rows != inputH || srcImg.cols != inputW) {
|
||||
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
|
||||
} else {
|
||||
cpuResized = srcImg;
|
||||
}
|
||||
|
||||
// Wait for all GPU ops to complete before returning GpuMats
|
||||
// CPU BGR -> RGB
|
||||
cv::Mat cpuRGB;
|
||||
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
|
||||
|
||||
// Upload small image to GPU
|
||||
cv::cuda::Stream stream;
|
||||
cv::cuda::GpuMat gpuResized;
|
||||
gpuResized.upload(cpuRGB, stream);
|
||||
stream.waitForCompletion();
|
||||
|
||||
// Convert to format expected by our inference engine
|
||||
std::vector<cv::cuda::GpuMat> input{ std::move(resized) };
|
||||
std::vector<cv::cuda::GpuMat> input{ std::move(gpuResized) };
|
||||
std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) };
|
||||
return inputs;
|
||||
}
|
||||
@@ -811,25 +806,17 @@ namespace ANSCENTER
|
||||
return {};
|
||||
}
|
||||
|
||||
// Upload to GPU
|
||||
cv::cuda::GpuMat img;
|
||||
if (inputImage.channels() == 1) {
|
||||
// Convert grayscale to BGR
|
||||
// CPU preprocessing: resize + BGR->RGB before GPU upload
|
||||
cv::Mat srcImg = inputImage;
|
||||
if (srcImg.channels() == 1) {
|
||||
cv::Mat img3Channel;
|
||||
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
|
||||
img.upload(img3Channel, stream);
|
||||
cv::cvtColor(srcImg, img3Channel, cv::COLOR_GRAY2BGR);
|
||||
srcImg = img3Channel;
|
||||
}
|
||||
else {
|
||||
img.upload(inputImage, stream);
|
||||
}
|
||||
|
||||
// Convert BGR to RGB
|
||||
cv::cuda::GpuMat imgRGB;
|
||||
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
|
||||
|
||||
// Store original dimensions
|
||||
int imgHeight = imgRGB.rows;
|
||||
int imgWidth = imgRGB.cols;
|
||||
int imgHeight = srcImg.rows;
|
||||
int imgWidth = srcImg.cols;
|
||||
|
||||
if (imgHeight <= 0 || imgWidth <= 0) {
|
||||
_logger.LogFatal("TENSORRTCL::PreprocessBatch",
|
||||
@@ -841,26 +828,25 @@ namespace ANSCENTER
|
||||
outMetadata.imgHeights.push_back(imgHeight);
|
||||
outMetadata.imgWidths.push_back(imgWidth);
|
||||
|
||||
// Calculate resize ratio
|
||||
float ratio = 1.f / std::min(
|
||||
inputDims[0].d[2] / static_cast<float>(imgRGB.cols),
|
||||
inputDims[0].d[1] / static_cast<float>(imgRGB.rows)
|
||||
);
|
||||
outMetadata.ratios.push_back(ratio);
|
||||
// Classification: ratio is always 1.0
|
||||
outMetadata.ratios.push_back(1.f);
|
||||
|
||||
// Resize maintaining aspect ratio with padding
|
||||
cv::cuda::GpuMat resized;
|
||||
if (imgRGB.rows != inputDims[0].d[1] || imgRGB.cols != inputDims[0].d[2]) {
|
||||
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(
|
||||
imgRGB, inputDims[0].d[1], inputDims[0].d[2]
|
||||
);
|
||||
}
|
||||
else {
|
||||
resized = imgRGB;
|
||||
// Classification: direct CPU resize (no letterbox padding)
|
||||
cv::Mat cpuResized;
|
||||
if (srcImg.rows != inputH || srcImg.cols != inputW) {
|
||||
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
|
||||
} else {
|
||||
cpuResized = srcImg;
|
||||
}
|
||||
|
||||
cv::Mat cpuRGB;
|
||||
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
|
||||
|
||||
cv::cuda::GpuMat gpuResized;
|
||||
gpuResized.upload(cpuRGB, stream);
|
||||
|
||||
// Add to batch
|
||||
batchedImages.push_back(std::move(resized));
|
||||
batchedImages.push_back(std::move(gpuResized));
|
||||
}
|
||||
|
||||
// Wait for all GPU operations to complete
|
||||
|
||||
@@ -508,41 +508,46 @@ namespace ANSCENTER
|
||||
const auto& inputDims = m_trtEngine->getInputDims();
|
||||
const int inputH = inputDims[0].d[1];
|
||||
const int inputW = inputDims[0].d[2];
|
||||
// Upload the image to GPU memory
|
||||
cv::cuda::Stream stream;
|
||||
cv::cuda::GpuMat img;
|
||||
|
||||
if (inputImage.channels() == 1) {
|
||||
// Convert grayscale to 3-channel BGR before uploading
|
||||
cv::Mat img3Channel;
|
||||
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
|
||||
img.upload(img3Channel, stream);
|
||||
}
|
||||
else {
|
||||
img.upload(inputImage, stream);
|
||||
// --- CPU preprocessing: resize + BGR->RGB before GPU upload ---
|
||||
cv::Mat srcImg = inputImage;
|
||||
if (srcImg.channels() == 1) {
|
||||
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
|
||||
// Convert to RGB
|
||||
cv::cuda::GpuMat imgRGB;
|
||||
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
|
||||
stream.waitForCompletion();
|
||||
|
||||
// Set image size parameters
|
||||
outMeta.imgHeight = imgRGB.rows;
|
||||
outMeta.imgWidth = imgRGB.cols;
|
||||
// Set image size parameters from ORIGINAL image
|
||||
outMeta.imgHeight = srcImg.rows;
|
||||
outMeta.imgWidth = srcImg.cols;
|
||||
if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
|
||||
outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(imgRGB.cols),
|
||||
inputDims[0].d[1] / static_cast<float>(imgRGB.rows));
|
||||
outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(srcImg.cols),
|
||||
inputDims[0].d[1] / static_cast<float>(srcImg.rows));
|
||||
|
||||
cv::cuda::GpuMat resized = imgRGB;
|
||||
const auto& outputDims = m_trtEngine->getOutputDims();
|
||||
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
|
||||
|
||||
// Resize to the model's expected input size while maintaining aspect ratio with padding
|
||||
if (resized.rows != inputDims[0].d[1] || resized.cols != inputDims[0].d[2]) {
|
||||
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputDims[0].d[1], inputDims[0].d[2]);
|
||||
// CPU resize to model input size
|
||||
cv::Mat cpuResized;
|
||||
if (srcImg.rows != inputH || srcImg.cols != inputW) {
|
||||
if (isClassification) {
|
||||
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
|
||||
} else {
|
||||
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
|
||||
}
|
||||
} else {
|
||||
cpuResized = srcImg;
|
||||
}
|
||||
|
||||
// CPU BGR -> RGB
|
||||
cv::Mat cpuRGB;
|
||||
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
|
||||
|
||||
// Upload small image to GPU
|
||||
cv::cuda::Stream stream;
|
||||
cv::cuda::GpuMat gpuResized;
|
||||
gpuResized.upload(cpuRGB, stream);
|
||||
stream.waitForCompletion();
|
||||
|
||||
// Convert to format expected by our inference engine
|
||||
std::vector<cv::cuda::GpuMat> input{ std::move(resized) };
|
||||
std::vector<cv::cuda::GpuMat> input{ std::move(gpuResized) };
|
||||
std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) };
|
||||
return inputs;
|
||||
}
|
||||
@@ -793,19 +798,13 @@ namespace ANSCENTER
|
||||
"Empty input image at index " + std::to_string(i), __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
cv::cuda::GpuMat img;
|
||||
if (inputImage.channels() == 1) {
|
||||
cv::Mat img3Channel;
|
||||
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
|
||||
img.upload(img3Channel, stream);
|
||||
// CPU preprocessing: resize + BGR->RGB before GPU upload
|
||||
cv::Mat srcImg = inputImage;
|
||||
if (srcImg.channels() == 1) {
|
||||
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
else {
|
||||
img.upload(inputImage, stream);
|
||||
}
|
||||
cv::cuda::GpuMat imgRGB;
|
||||
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
|
||||
outMetadata.imgHeights[i] = imgRGB.rows;
|
||||
outMetadata.imgWidths[i] = imgRGB.cols;
|
||||
outMetadata.imgHeights[i] = srcImg.rows;
|
||||
outMetadata.imgWidths[i] = srcImg.cols;
|
||||
if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) {
|
||||
_logger.LogFatal("ANSTENSORRTPOSE::PreprocessBatch",
|
||||
"Image " + std::to_string(i) + " has invalid dimensions (Width: " +
|
||||
@@ -813,13 +812,30 @@ namespace ANSCENTER
|
||||
std::to_string(outMetadata.imgHeights[i]) + ")", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
outMetadata.ratios[i] = 1.f / std::min(inputW / static_cast<float>(imgRGB.cols),
|
||||
inputH / static_cast<float>(imgRGB.rows));
|
||||
cv::cuda::GpuMat resized = imgRGB;
|
||||
if (resized.rows != inputH || resized.cols != inputW) {
|
||||
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputH, inputW);
|
||||
|
||||
const auto& outputDims = m_trtEngine->getOutputDims();
|
||||
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
|
||||
|
||||
outMetadata.ratios[i] = isClassification ? 1.f : 1.f / std::min(inputW / static_cast<float>(srcImg.cols),
|
||||
inputH / static_cast<float>(srcImg.rows));
|
||||
|
||||
cv::Mat cpuResized;
|
||||
if (srcImg.rows != inputH || srcImg.cols != inputW) {
|
||||
if (isClassification) {
|
||||
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
|
||||
} else {
|
||||
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
|
||||
}
|
||||
} else {
|
||||
cpuResized = srcImg;
|
||||
}
|
||||
batchProcessed.push_back(std::move(resized));
|
||||
|
||||
cv::Mat cpuRGB;
|
||||
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
|
||||
|
||||
cv::cuda::GpuMat gpuResized;
|
||||
gpuResized.upload(cpuRGB, stream);
|
||||
batchProcessed.push_back(std::move(gpuResized));
|
||||
}
|
||||
stream.waitForCompletion();
|
||||
std::vector<std::vector<cv::cuda::GpuMat>> inputs;
|
||||
|
||||
@@ -561,41 +561,46 @@ namespace ANSCENTER
|
||||
const auto& inputDims = m_trtEngine->getInputDims();
|
||||
const int inputH = inputDims[0].d[1];
|
||||
const int inputW = inputDims[0].d[2];
|
||||
// Upload the image to GPU memory
|
||||
cv::cuda::Stream stream;
|
||||
cv::cuda::GpuMat img;
|
||||
|
||||
if (inputImage.channels() == 1) {
|
||||
// Convert grayscale to 3-channel BGR before uploading
|
||||
cv::Mat img3Channel;
|
||||
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
|
||||
img.upload(img3Channel, stream);
|
||||
}
|
||||
else {
|
||||
img.upload(inputImage, stream);
|
||||
// --- CPU preprocessing: resize + BGR->RGB before GPU upload ---
|
||||
cv::Mat srcImg = inputImage;
|
||||
if (srcImg.channels() == 1) {
|
||||
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
|
||||
// Convert to RGB
|
||||
cv::cuda::GpuMat imgRGB;
|
||||
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
|
||||
stream.waitForCompletion();
|
||||
|
||||
// Set image size parameters
|
||||
outMeta.imgHeight = imgRGB.rows;
|
||||
outMeta.imgWidth = imgRGB.cols;
|
||||
// Set image size parameters from ORIGINAL image
|
||||
outMeta.imgHeight = srcImg.rows;
|
||||
outMeta.imgWidth = srcImg.cols;
|
||||
if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
|
||||
outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(imgRGB.cols),
|
||||
inputDims[0].d[1] / static_cast<float>(imgRGB.rows));
|
||||
outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(srcImg.cols),
|
||||
inputDims[0].d[1] / static_cast<float>(srcImg.rows));
|
||||
|
||||
cv::cuda::GpuMat resized = imgRGB;
|
||||
const auto& outputDims = m_trtEngine->getOutputDims();
|
||||
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
|
||||
|
||||
// Resize to the model's expected input size while maintaining aspect ratio with padding
|
||||
if (resized.rows != inputDims[0].d[1] || resized.cols != inputDims[0].d[2]) {
|
||||
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputDims[0].d[1], inputDims[0].d[2]);
|
||||
// CPU resize to model input size
|
||||
cv::Mat cpuResized;
|
||||
if (srcImg.rows != inputH || srcImg.cols != inputW) {
|
||||
if (isClassification) {
|
||||
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
|
||||
} else {
|
||||
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
|
||||
}
|
||||
} else {
|
||||
cpuResized = srcImg;
|
||||
}
|
||||
|
||||
// CPU BGR -> RGB
|
||||
cv::Mat cpuRGB;
|
||||
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
|
||||
|
||||
// Upload small image to GPU
|
||||
cv::cuda::Stream stream;
|
||||
cv::cuda::GpuMat gpuResized;
|
||||
gpuResized.upload(cpuRGB, stream);
|
||||
stream.waitForCompletion();
|
||||
|
||||
// Convert to format expected by our inference engine
|
||||
std::vector<cv::cuda::GpuMat> input{ std::move(resized) };
|
||||
std::vector<cv::cuda::GpuMat> input{ std::move(gpuResized) };
|
||||
std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) };
|
||||
return inputs;
|
||||
}
|
||||
@@ -891,26 +896,15 @@ namespace ANSCENTER
|
||||
return {};
|
||||
}
|
||||
|
||||
// Upload to GPU
|
||||
cv::cuda::GpuMat img;
|
||||
|
||||
// Convert grayscale to BGR if needed
|
||||
if (inputImage.channels() == 1) {
|
||||
cv::Mat img3Channel;
|
||||
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
|
||||
img.upload(img3Channel, stream);
|
||||
// CPU preprocessing: resize + BGR->RGB before GPU upload
|
||||
cv::Mat srcImg = inputImage;
|
||||
if (srcImg.channels() == 1) {
|
||||
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
else {
|
||||
img.upload(inputImage, stream);
|
||||
}
|
||||
|
||||
// Convert to RGB
|
||||
cv::cuda::GpuMat imgRGB;
|
||||
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
|
||||
|
||||
// Store original dimensions
|
||||
outMetadata.imgHeights[i] = imgRGB.rows;
|
||||
outMetadata.imgWidths[i] = imgRGB.cols;
|
||||
outMetadata.imgHeights[i] = srcImg.rows;
|
||||
outMetadata.imgWidths[i] = srcImg.cols;
|
||||
|
||||
if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) {
|
||||
_logger.LogFatal("TENSORRTSEG::PreprocessBatch",
|
||||
@@ -921,17 +915,31 @@ namespace ANSCENTER
|
||||
return {};
|
||||
}
|
||||
|
||||
// Calculate ratio for this image
|
||||
outMetadata.ratios[i] = 1.f / std::min(inputW / static_cast<float>(imgRGB.cols),
|
||||
inputH / static_cast<float>(imgRGB.rows));
|
||||
const auto& outputDims = m_trtEngine->getOutputDims();
|
||||
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
|
||||
|
||||
// Resize with padding
|
||||
cv::cuda::GpuMat resized = imgRGB;
|
||||
if (resized.rows != inputH || resized.cols != inputW) {
|
||||
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputH, inputW);
|
||||
// Calculate ratio for this image
|
||||
outMetadata.ratios[i] = isClassification ? 1.f : 1.f / std::min(inputW / static_cast<float>(srcImg.cols),
|
||||
inputH / static_cast<float>(srcImg.rows));
|
||||
|
||||
// CPU resize to model input size
|
||||
cv::Mat cpuResized;
|
||||
if (srcImg.rows != inputH || srcImg.cols != inputW) {
|
||||
if (isClassification) {
|
||||
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
|
||||
} else {
|
||||
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
|
||||
}
|
||||
} else {
|
||||
cpuResized = srcImg;
|
||||
}
|
||||
|
||||
batchProcessed.push_back(std::move(resized));
|
||||
cv::Mat cpuRGB;
|
||||
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
|
||||
|
||||
cv::cuda::GpuMat gpuResized;
|
||||
gpuResized.upload(cpuRGB, stream);
|
||||
batchProcessed.push_back(std::move(gpuResized));
|
||||
}
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
@@ -587,41 +587,46 @@ namespace ANSCENTER
|
||||
const auto& inputDims = m_trtEngine->getInputDims();
|
||||
const int inputH = inputDims[0].d[1];
|
||||
const int inputW = inputDims[0].d[2];
|
||||
// Upload the image to GPU memory
|
||||
cv::cuda::Stream stream;
|
||||
cv::cuda::GpuMat img;
|
||||
|
||||
if (inputImage.channels() == 1) {
|
||||
// Convert grayscale to 3-channel BGR before uploading
|
||||
cv::Mat img3Channel;
|
||||
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
|
||||
img.upload(img3Channel, stream);
|
||||
}
|
||||
else {
|
||||
img.upload(inputImage, stream);
|
||||
// --- CPU preprocessing: resize + BGR->RGB before GPU upload ---
|
||||
cv::Mat srcImg = inputImage;
|
||||
if (srcImg.channels() == 1) {
|
||||
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
|
||||
// Convert to RGB
|
||||
cv::cuda::GpuMat imgRGB;
|
||||
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
|
||||
stream.waitForCompletion();
|
||||
|
||||
// Set image size parameters into per-call metadata (not shared members)
|
||||
outMeta.imgHeight = static_cast<float>(imgRGB.rows);
|
||||
outMeta.imgWidth = static_cast<float>(imgRGB.cols);
|
||||
// Set image size parameters from ORIGINAL image (before resize)
|
||||
outMeta.imgHeight = static_cast<float>(srcImg.rows);
|
||||
outMeta.imgWidth = static_cast<float>(srcImg.cols);
|
||||
if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
|
||||
outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(imgRGB.cols),
|
||||
inputDims[0].d[1] / static_cast<float>(imgRGB.rows));
|
||||
outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(srcImg.cols),
|
||||
inputDims[0].d[1] / static_cast<float>(srcImg.rows));
|
||||
|
||||
cv::cuda::GpuMat resized = imgRGB;
|
||||
const auto& outputDims = m_trtEngine->getOutputDims();
|
||||
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
|
||||
|
||||
// Resize to the model's expected input size while maintaining aspect ratio with padding
|
||||
if (resized.rows != inputDims[0].d[1] || resized.cols != inputDims[0].d[2]) {
|
||||
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputDims[0].d[1], inputDims[0].d[2]);
|
||||
// CPU resize to model input size
|
||||
cv::Mat cpuResized;
|
||||
if (srcImg.rows != inputH || srcImg.cols != inputW) {
|
||||
if (isClassification) {
|
||||
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
|
||||
} else {
|
||||
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
|
||||
}
|
||||
} else {
|
||||
cpuResized = srcImg;
|
||||
}
|
||||
|
||||
// CPU BGR -> RGB
|
||||
cv::Mat cpuRGB;
|
||||
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
|
||||
|
||||
// Upload small image to GPU
|
||||
cv::cuda::Stream stream;
|
||||
cv::cuda::GpuMat gpuResized;
|
||||
gpuResized.upload(cpuRGB, stream);
|
||||
stream.waitForCompletion();
|
||||
|
||||
// Convert to format expected by our inference engine
|
||||
std::vector<cv::cuda::GpuMat> input{ std::move(resized) };
|
||||
std::vector<cv::cuda::GpuMat> input{ std::move(gpuResized) };
|
||||
std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) };
|
||||
return inputs;
|
||||
}
|
||||
@@ -1174,29 +1179,20 @@ namespace ANSCENTER
|
||||
return {};
|
||||
}
|
||||
|
||||
cv::cuda::GpuMat img;
|
||||
|
||||
if (inputImage.channels() == 1) {
|
||||
cv::Mat img3Channel;
|
||||
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
|
||||
img.upload(img3Channel, stream);
|
||||
}
|
||||
else if (inputImage.channels() == 3) {
|
||||
img.upload(inputImage, stream);
|
||||
}
|
||||
else {
|
||||
// CPU preprocessing: resize + BGR->RGB before GPU upload
|
||||
cv::Mat srcImg = inputImage;
|
||||
if (srcImg.channels() == 1) {
|
||||
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
|
||||
} else if (srcImg.channels() != 3) {
|
||||
_logger.LogError("TENSORRTOD::PreprocessBatch",
|
||||
"Unsupported channel count at index " + std::to_string(i),
|
||||
__FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
cv::cuda::GpuMat imgRGB;
|
||||
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
|
||||
|
||||
// Store in output metadata
|
||||
outMetadata.imgHeights[i] = imgRGB.rows;
|
||||
outMetadata.imgWidths[i] = imgRGB.cols;
|
||||
// Store in output metadata from ORIGINAL image
|
||||
outMetadata.imgHeights[i] = srcImg.rows;
|
||||
outMetadata.imgWidths[i] = srcImg.cols;
|
||||
|
||||
if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) {
|
||||
_logger.LogError("TENSORRTOD::PreprocessBatch",
|
||||
@@ -1205,20 +1201,30 @@ namespace ANSCENTER
|
||||
return {};
|
||||
}
|
||||
|
||||
const float scaleW = inputW / static_cast<float>(imgRGB.cols);
|
||||
const float scaleH = inputH / static_cast<float>(imgRGB.rows);
|
||||
outMetadata.ratios[i] = 1.f / std::min(scaleW, scaleH);
|
||||
const auto& outputDims = m_trtEngine->getOutputDims();
|
||||
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
|
||||
|
||||
cv::cuda::GpuMat resized;
|
||||
if (imgRGB.rows != inputH || imgRGB.cols != inputW) {
|
||||
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(
|
||||
imgRGB, inputH, inputW);
|
||||
}
|
||||
else {
|
||||
resized = imgRGB;
|
||||
const float scaleW = inputW / static_cast<float>(srcImg.cols);
|
||||
const float scaleH = inputH / static_cast<float>(srcImg.rows);
|
||||
outMetadata.ratios[i] = isClassification ? 1.f : 1.f / std::min(scaleW, scaleH);
|
||||
|
||||
cv::Mat cpuResized;
|
||||
if (srcImg.rows != inputH || srcImg.cols != inputW) {
|
||||
if (isClassification) {
|
||||
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
|
||||
} else {
|
||||
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
|
||||
}
|
||||
} else {
|
||||
cpuResized = srcImg;
|
||||
}
|
||||
|
||||
batchProcessed.push_back(std::move(resized));
|
||||
cv::Mat cpuRGB;
|
||||
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
|
||||
|
||||
cv::cuda::GpuMat gpuResized;
|
||||
gpuResized.upload(cpuRGB, stream);
|
||||
batchProcessed.push_back(std::move(gpuResized));
|
||||
}
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
@@ -519,46 +519,46 @@ namespace ANSCENTER
|
||||
const int inputH = inputDims[0].d[1];
|
||||
const int inputW = inputDims[0].d[2];
|
||||
|
||||
// Upload input image to GPU
|
||||
cv::cuda::Stream stream;
|
||||
cv::cuda::GpuMat img;
|
||||
|
||||
if (inputImage.empty()) {
|
||||
_logger.LogFatal("ANSYOLOV10RTOD::Preprocess", "Empty input image", __FILE__, __LINE__);
|
||||
return {};
|
||||
// --- CPU preprocessing: resize + BGR->RGB before GPU upload ---
|
||||
cv::Mat srcImg = inputImage;
|
||||
if (srcImg.channels() == 1) {
|
||||
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
|
||||
// Convert grayscale to BGR if needed
|
||||
if (inputImage.channels() == 1) {
|
||||
cv::Mat img3Channel;
|
||||
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
|
||||
img.upload(img3Channel, stream);
|
||||
}
|
||||
else {
|
||||
img.upload(inputImage, stream);
|
||||
}
|
||||
|
||||
// Convert to RGB
|
||||
cv::cuda::GpuMat imgRGB;
|
||||
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
|
||||
stream.waitForCompletion();
|
||||
|
||||
outMeta.imgHeight = imgRGB.rows;
|
||||
outMeta.imgWidth = imgRGB.cols;
|
||||
outMeta.imgHeight = srcImg.rows;
|
||||
outMeta.imgWidth = srcImg.cols;
|
||||
|
||||
if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
|
||||
outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(imgRGB.cols),
|
||||
inputDims[0].d[1] / static_cast<float>(imgRGB.rows));
|
||||
outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(srcImg.cols),
|
||||
inputDims[0].d[1] / static_cast<float>(srcImg.rows));
|
||||
|
||||
cv::cuda::GpuMat resized = imgRGB;
|
||||
const auto& outputDims = m_trtEngine->getOutputDims();
|
||||
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
|
||||
|
||||
// Resize to the model's expected input size while maintaining aspect ratio with padding
|
||||
if (resized.rows != inputDims[0].d[1] || resized.cols != inputDims[0].d[2]) {
|
||||
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputDims[0].d[1], inputDims[0].d[2]);
|
||||
// CPU resize to model input size
|
||||
cv::Mat cpuResized;
|
||||
if (srcImg.rows != inputH || srcImg.cols != inputW) {
|
||||
if (isClassification) {
|
||||
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
|
||||
} else {
|
||||
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
|
||||
}
|
||||
} else {
|
||||
cpuResized = srcImg;
|
||||
}
|
||||
|
||||
// CPU BGR -> RGB
|
||||
cv::Mat cpuRGB;
|
||||
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
|
||||
|
||||
// Upload small image to GPU
|
||||
cv::cuda::Stream stream;
|
||||
cv::cuda::GpuMat gpuResized;
|
||||
gpuResized.upload(cpuRGB, stream);
|
||||
stream.waitForCompletion();
|
||||
|
||||
// Convert to format expected by our inference engine
|
||||
std::vector<cv::cuda::GpuMat> input{ std::move(resized) };
|
||||
std::vector<cv::cuda::GpuMat> input{ std::move(gpuResized) };
|
||||
std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) };
|
||||
return inputs;
|
||||
}
|
||||
@@ -1058,26 +1058,15 @@ namespace ANSCENTER
|
||||
return {};
|
||||
}
|
||||
|
||||
// Upload to GPU
|
||||
cv::cuda::GpuMat img;
|
||||
|
||||
// Convert grayscale to BGR if needed
|
||||
if (inputImage.channels() == 1) {
|
||||
cv::Mat img3Channel;
|
||||
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
|
||||
img.upload(img3Channel, stream);
|
||||
// CPU preprocessing: resize + BGR->RGB before GPU upload
|
||||
cv::Mat srcImg = inputImage;
|
||||
if (srcImg.channels() == 1) {
|
||||
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
else {
|
||||
img.upload(inputImage, stream);
|
||||
}
|
||||
|
||||
// Convert to RGB
|
||||
cv::cuda::GpuMat imgRGB;
|
||||
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
|
||||
|
||||
// Store original dimensions
|
||||
outMetadata.imgHeights[i] = imgRGB.rows;
|
||||
outMetadata.imgWidths[i] = imgRGB.cols;
|
||||
outMetadata.imgHeights[i] = srcImg.rows;
|
||||
outMetadata.imgWidths[i] = srcImg.cols;
|
||||
|
||||
if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) {
|
||||
_logger.LogFatal("ANSYOLOV10RTOD::PreprocessBatch",
|
||||
@@ -1088,17 +1077,31 @@ namespace ANSCENTER
|
||||
return {};
|
||||
}
|
||||
|
||||
// Calculate ratio for this image
|
||||
outMetadata.ratios[i] = 1.f / std::min(inputW / static_cast<float>(imgRGB.cols),
|
||||
inputH / static_cast<float>(imgRGB.rows));
|
||||
const auto& outputDims = m_trtEngine->getOutputDims();
|
||||
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
|
||||
|
||||
// Resize with padding
|
||||
cv::cuda::GpuMat resized = imgRGB;
|
||||
if (resized.rows != inputH || resized.cols != inputW) {
|
||||
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputH, inputW);
|
||||
// Calculate ratio for this image
|
||||
outMetadata.ratios[i] = isClassification ? 1.f : 1.f / std::min(inputW / static_cast<float>(srcImg.cols),
|
||||
inputH / static_cast<float>(srcImg.rows));
|
||||
|
||||
// CPU resize to model input size
|
||||
cv::Mat cpuResized;
|
||||
if (srcImg.rows != inputH || srcImg.cols != inputW) {
|
||||
if (isClassification) {
|
||||
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
|
||||
} else {
|
||||
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
|
||||
}
|
||||
} else {
|
||||
cpuResized = srcImg;
|
||||
}
|
||||
|
||||
batchProcessed.push_back(std::move(resized));
|
||||
cv::Mat cpuRGB;
|
||||
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
|
||||
|
||||
cv::cuda::GpuMat gpuResized;
|
||||
gpuResized.upload(cpuRGB, stream);
|
||||
batchProcessed.push_back(std::move(gpuResized));
|
||||
}
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
@@ -484,39 +484,47 @@ namespace ANSCENTER
|
||||
}
|
||||
|
||||
const auto& inputDims = m_trtEngine->getInputDims();
|
||||
cv::cuda::Stream stream;
|
||||
cv::cuda::GpuMat img;
|
||||
|
||||
// Upload to GPU
|
||||
if (inputImage.channels() == 1) {
|
||||
cv::Mat img3Channel;
|
||||
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
|
||||
img.upload(img3Channel, stream);
|
||||
}
|
||||
else {
|
||||
img.upload(inputImage, stream);
|
||||
// --- CPU preprocessing: resize + BGR->RGB before GPU upload ---
|
||||
cv::Mat srcImg = inputImage;
|
||||
if (srcImg.channels() == 1) {
|
||||
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
|
||||
// Convert BGR to RGB
|
||||
cv::cuda::GpuMat imgRGB;
|
||||
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
|
||||
stream.waitForCompletion();
|
||||
|
||||
outMeta.imgHeight = imgRGB.rows;
|
||||
outMeta.imgWidth = imgRGB.cols;
|
||||
outMeta.imgHeight = srcImg.rows;
|
||||
outMeta.imgWidth = srcImg.cols;
|
||||
if (outMeta.imgHeight > 0 && outMeta.imgWidth > 0) {
|
||||
outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(imgRGB.cols),
|
||||
inputDims[0].d[1] / static_cast<float>(imgRGB.rows));
|
||||
outMeta.ratio = 1.f / std::min(inputDims[0].d[2] / static_cast<float>(srcImg.cols),
|
||||
inputDims[0].d[1] / static_cast<float>(srcImg.rows));
|
||||
|
||||
cv::cuda::GpuMat resized = imgRGB;
|
||||
const auto& outputDims = m_trtEngine->getOutputDims();
|
||||
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
|
||||
|
||||
// Resize to the model's expected input size while maintaining aspect ratio with padding
|
||||
if (resized.rows != inputDims[0].d[1] || resized.cols != inputDims[0].d[2]) {
|
||||
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputDims[0].d[1], inputDims[0].d[2]);
|
||||
// CPU resize to model input size
|
||||
cv::Mat cpuResized;
|
||||
const int inputH = inputDims[0].d[1];
|
||||
const int inputW = inputDims[0].d[2];
|
||||
if (srcImg.rows != inputH || srcImg.cols != inputW) {
|
||||
if (isClassification) {
|
||||
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
|
||||
} else {
|
||||
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
|
||||
}
|
||||
} else {
|
||||
cpuResized = srcImg;
|
||||
}
|
||||
|
||||
// CPU BGR -> RGB
|
||||
cv::Mat cpuRGB;
|
||||
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
|
||||
|
||||
// Upload small image to GPU
|
||||
cv::cuda::Stream stream;
|
||||
cv::cuda::GpuMat gpuResized;
|
||||
gpuResized.upload(cpuRGB, stream);
|
||||
stream.waitForCompletion();
|
||||
|
||||
// Convert to format expected by our inference engine
|
||||
std::vector<cv::cuda::GpuMat> input{ std::move(resized) };
|
||||
std::vector<cv::cuda::GpuMat> input{ std::move(gpuResized) };
|
||||
std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(input) };
|
||||
return inputs;
|
||||
}
|
||||
@@ -744,26 +752,15 @@ namespace ANSCENTER
|
||||
return {};
|
||||
}
|
||||
|
||||
// Upload to GPU
|
||||
cv::cuda::GpuMat img;
|
||||
|
||||
// Convert grayscale to BGR if needed
|
||||
if (inputImage.channels() == 1) {
|
||||
cv::Mat img3Channel;
|
||||
cv::cvtColor(inputImage, img3Channel, cv::COLOR_GRAY2BGR);
|
||||
img.upload(img3Channel, stream);
|
||||
// CPU preprocessing: resize + BGR->RGB before GPU upload
|
||||
cv::Mat srcImg = inputImage;
|
||||
if (srcImg.channels() == 1) {
|
||||
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
else {
|
||||
img.upload(inputImage, stream);
|
||||
}
|
||||
|
||||
// Convert BGR to RGB
|
||||
cv::cuda::GpuMat imgRGB;
|
||||
cv::cuda::cvtColor(img, imgRGB, cv::COLOR_BGR2RGB, 0, stream);
|
||||
|
||||
// Store original dimensions
|
||||
outMetadata.imgHeights[i] = imgRGB.rows;
|
||||
outMetadata.imgWidths[i] = imgRGB.cols;
|
||||
outMetadata.imgHeights[i] = srcImg.rows;
|
||||
outMetadata.imgWidths[i] = srcImg.cols;
|
||||
|
||||
if (outMetadata.imgHeights[i] <= 0 || outMetadata.imgWidths[i] <= 0) {
|
||||
_logger.LogFatal("ANSYOLOV12RTOD::PreprocessBatch",
|
||||
@@ -774,17 +771,31 @@ namespace ANSCENTER
|
||||
return {};
|
||||
}
|
||||
|
||||
// Calculate ratio for this image
|
||||
outMetadata.ratios[i] = 1.f / std::min(inputW / static_cast<float>(imgRGB.cols),
|
||||
inputH / static_cast<float>(imgRGB.rows));
|
||||
const auto& outputDims = m_trtEngine->getOutputDims();
|
||||
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
|
||||
|
||||
// Resize with padding
|
||||
cv::cuda::GpuMat resized = imgRGB;
|
||||
if (resized.rows != inputH || resized.cols != inputW) {
|
||||
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputH, inputW);
|
||||
// Calculate ratio for this image
|
||||
outMetadata.ratios[i] = isClassification ? 1.f : 1.f / std::min(inputW / static_cast<float>(srcImg.cols),
|
||||
inputH / static_cast<float>(srcImg.rows));
|
||||
|
||||
// CPU resize to model input size
|
||||
cv::Mat cpuResized;
|
||||
if (srcImg.rows != inputH || srcImg.cols != inputW) {
|
||||
if (isClassification) {
|
||||
cv::resize(srcImg, cpuResized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
|
||||
} else {
|
||||
cpuResized = Engine<float>::cpuResizeKeepAspectRatioPadRightBottom(srcImg, inputH, inputW);
|
||||
}
|
||||
} else {
|
||||
cpuResized = srcImg;
|
||||
}
|
||||
|
||||
batchProcessed.push_back(std::move(resized));
|
||||
cv::Mat cpuRGB;
|
||||
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
|
||||
|
||||
cv::cuda::GpuMat gpuResized;
|
||||
gpuResized.upload(cpuRGB, stream);
|
||||
batchProcessed.push_back(std::move(gpuResized));
|
||||
}
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
@@ -665,38 +665,37 @@ namespace ANSCENTER {
|
||||
}
|
||||
|
||||
if (!usedNV12) {
|
||||
// Standard BGR upload + resize + center-pad path
|
||||
cv::cuda::Stream stream;
|
||||
cv::cuda::GpuMat d_bgr;
|
||||
|
||||
// CPU center-padded letterbox + BGR->RGB, then upload small image
|
||||
cv::Mat srcImg;
|
||||
if (input.channels() == 1) {
|
||||
cv::Mat img3Channel;
|
||||
cv::cvtColor(input, img3Channel, cv::COLOR_GRAY2BGR);
|
||||
d_bgr.upload(img3Channel, stream);
|
||||
}
|
||||
else if (input.channels() == 3) {
|
||||
d_bgr.upload(input, stream);
|
||||
}
|
||||
else {
|
||||
cv::cvtColor(input, srcImg, cv::COLOR_GRAY2BGR);
|
||||
} else if (input.channels() == 3) {
|
||||
srcImg = input;
|
||||
} else {
|
||||
this->_logger.LogError("ANSSCRFDFD::Detect", "Unsupported channel count", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
cv::cuda::GpuMat d_rgb;
|
||||
cv::cuda::GpuMat d_resized;
|
||||
// CPU resize to unpadded size
|
||||
cv::Mat cpuResized;
|
||||
if (srcImg.rows != new_unpad_h || srcImg.cols != new_unpad_w) {
|
||||
cv::resize(srcImg, cpuResized, cv::Size(new_unpad_w, new_unpad_h), 0, 0, cv::INTER_LINEAR);
|
||||
} else {
|
||||
cpuResized = srcImg;
|
||||
}
|
||||
|
||||
// CPU center-pad to net_w x net_h
|
||||
cv::Mat cpuPadded(net_h, net_w, CV_8UC3, cv::Scalar(0, 0, 0));
|
||||
cpuResized.copyTo(cpuPadded(cv::Rect(dw, dh, new_unpad_w, new_unpad_h)));
|
||||
|
||||
// CPU BGR -> RGB
|
||||
cv::Mat cpuRGB;
|
||||
cv::cvtColor(cpuPadded, cpuRGB, cv::COLOR_BGR2RGB);
|
||||
|
||||
// Upload small padded image to GPU
|
||||
cv::cuda::Stream stream;
|
||||
cv::cuda::GpuMat d_padded;
|
||||
|
||||
cv::cuda::cvtColor(d_bgr, d_rgb, cv::COLOR_BGR2RGB, 0, stream);
|
||||
cv::cuda::resize(d_rgb, d_resized, cv::Size(new_unpad_w, new_unpad_h), 0, 0, cv::INTER_LINEAR, stream);
|
||||
|
||||
d_padded.create(net_h, net_w, d_resized.type());
|
||||
d_padded.setTo(cv::Scalar(0, 0, 0), stream);
|
||||
|
||||
cv::Rect roi(dw, dh, new_unpad_w, new_unpad_h > 0 ? new_unpad_h : 0);
|
||||
roi.width = new_unpad_w;
|
||||
roi.height = new_unpad_h;
|
||||
d_resized.copyTo(d_padded(roi), stream);
|
||||
|
||||
d_padded.upload(cpuRGB, stream);
|
||||
stream.waitForCompletion();
|
||||
|
||||
std::vector<cv::cuda::GpuMat> inputVec;
|
||||
|
||||
@@ -173,6 +173,8 @@ public:
|
||||
// to the original reference frame.
|
||||
static cv::cuda::GpuMat resizeKeepAspectRatioPadRightBottom(const cv::cuda::GpuMat &input, size_t height, size_t width,
|
||||
const cv::Scalar &bgcolor = cv::Scalar(0, 0, 0));
|
||||
static cv::Mat cpuResizeKeepAspectRatioPadRightBottom(const cv::Mat &input, size_t height, size_t width,
|
||||
const cv::Scalar &bgcolor = cv::Scalar(114, 114, 114));
|
||||
|
||||
[[nodiscard]] const std::vector<nvinfer1::Dims3> &getInputDims() const override { return m_inputDims; };
|
||||
[[nodiscard]] const std::vector<nvinfer1::Dims> &getOutputDims() const override { return m_outputDims; };
|
||||
|
||||
Reference in New Issue
Block a user