Use software decoder by default
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
#include "ANSRTYOLO.h"
|
||||
#include "Utility.h"
|
||||
#include "ANSLicense.h" // ANS_DBG macro for DebugView
|
||||
#include <future>
|
||||
#include <numeric>
|
||||
#include <cmath>
|
||||
@@ -903,7 +904,6 @@ namespace ANSCENTER {
|
||||
return {};
|
||||
}
|
||||
|
||||
// Check if model is classification (output ndims <= 2)
|
||||
const auto& outputDims = m_trtEngine->getOutputDims();
|
||||
const bool isClassification = !outputDims.empty() && outputDims[0].nbDims <= 2;
|
||||
|
||||
@@ -914,11 +914,8 @@ namespace ANSCENTER {
|
||||
cv::cuda::GpuMat resized;
|
||||
if (imgRGB.rows != inputH || imgRGB.cols != inputW) {
|
||||
if (isClassification) {
|
||||
// Classification: direct resize (no letterbox padding)
|
||||
// Must use explicit stream to avoid conflict with CUDA Graph capture on null stream
|
||||
cv::cuda::resize(imgRGB, resized, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR, stream);
|
||||
} else {
|
||||
// Detection/Seg/Pose/OBB: letterbox resize + right-bottom pad
|
||||
resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(imgRGB, inputH, inputW);
|
||||
}
|
||||
}
|
||||
@@ -1831,8 +1828,7 @@ namespace ANSCENTER {
|
||||
}
|
||||
|
||||
// --- 2. Preprocess under lock ---
|
||||
// Try NV12 fast path first (12MB upload vs 24MB BGR for 4K)
|
||||
// Falls back to standard GPU preprocessing if no NV12 data available.
|
||||
ANS_DBG("YOLO", "Preprocess START %dx%d", inputImage.cols, inputImage.rows);
|
||||
ImageMetadata meta;
|
||||
std::vector<std::vector<cv::cuda::GpuMat>> input;
|
||||
bool usedNV12 = false;
|
||||
@@ -1874,11 +1870,22 @@ namespace ANSCENTER {
|
||||
}
|
||||
|
||||
// --- 3. TRT Inference (mutex released for concurrent GPU slots) ---
|
||||
ANS_DBG("YOLO", "TRT inference START nv12=%d inputSize=%dx%d",
|
||||
(int)usedNV12,
|
||||
input.empty() ? 0 : (input[0].empty() ? 0 : input[0][0].cols),
|
||||
input.empty() ? 0 : (input[0].empty() ? 0 : input[0][0].rows));
|
||||
auto _trtStart = std::chrono::steady_clock::now();
|
||||
std::vector<std::vector<std::vector<float>>> featureVectors;
|
||||
if (!m_trtEngine->runInference(input, featureVectors)) {
|
||||
ANS_DBG("YOLO", "ERROR: TRT runInference FAILED");
|
||||
_logger.LogError("ANSRTYOLO::DetectObjects", "Error running inference", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
auto _trtEnd = std::chrono::steady_clock::now();
|
||||
double _trtMs = std::chrono::duration<double, std::milli>(_trtEnd - _trtStart).count();
|
||||
if (_trtMs > 500.0) {
|
||||
ANS_DBG("YOLO", "SLOW TRT inference: %.1fms", _trtMs);
|
||||
}
|
||||
double msInference = dbg ? elapsed() : 0;
|
||||
|
||||
// --- 4. Transform output ---
|
||||
|
||||
Reference in New Issue
Block a user