Add logs to custom models and model optimisation
This commit is contained in:
@@ -2411,6 +2411,12 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
|
||||
const std::array<float, 3>& divVals,
|
||||
bool normalize)
|
||||
{
|
||||
ANS_DBG("TRT_Build", "buildWithRetry ENTRY onnx=%s normalize=%d optMaxHW=%dx%d optOptHW=%dx%d optMinHW=%dx%d",
|
||||
onnxModelPath.c_str(), (int)normalize,
|
||||
m_options.maxInputHeight, m_options.maxInputWidth,
|
||||
m_options.optInputHeight, m_options.optInputWidth,
|
||||
m_options.minInputHeight, m_options.minInputWidth);
|
||||
|
||||
// -- Quick pre-analysis: detect dynamic spatial dims in ONNX ---------------
|
||||
bool hasDynamicSpatial = false;
|
||||
int onnxFixedH = 0, onnxFixedW = 0; // 0 = dynamic (-1 in ONNX)
|
||||
@@ -2423,6 +2429,10 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
|
||||
nvonnxparser::createParser(*tempNetwork, m_logger));
|
||||
|
||||
std::ifstream onnxFile(onnxModelPath, std::ios::binary | std::ios::ate);
|
||||
if (!onnxFile.good()) {
|
||||
ANS_DBG("TRT_Build", "buildWithRetry WARN cannot open ONNX for pre-parse path=%s",
|
||||
onnxModelPath.c_str());
|
||||
}
|
||||
if (onnxFile.good()) {
|
||||
std::streamsize onnxSize = onnxFile.tellg();
|
||||
onnxFile.seekg(0, std::ios::beg);
|
||||
@@ -2432,6 +2442,8 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
|
||||
bool retryParsed = parseOnnxModelSafe(tempParser.get(),
|
||||
onnxBuffer.data(), onnxBuffer.size(), &sehRetryParse);
|
||||
if (sehRetryParse != 0) {
|
||||
ANS_DBG("TRT_Build", "buildWithRetry WARN SEH=0x%lx during pre-parse — fall through to single build()",
|
||||
sehRetryParse);
|
||||
// hasDynamicSpatial stays false → single build() attempt
|
||||
}
|
||||
else if (retryParsed && tempNetwork->getNbInputs() > 0) {
|
||||
@@ -2441,19 +2453,39 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
|
||||
hasDynamicSpatial = true;
|
||||
onnxFixedH = (dims.d[2] != -1) ? dims.d[2] : 0;
|
||||
onnxFixedW = (dims.d[3] != -1) ? dims.d[3] : 0;
|
||||
ANS_DBG("TRT_Build", "buildWithRetry pre-parse OK nbDims=%d dims=[%d,%d,%d,%d] dynSpatial=%d fixedHW=%dx%d",
|
||||
dims.nbDims,
|
||||
(int)dims.d[0], (int)dims.d[1], (int)dims.d[2], (int)dims.d[3],
|
||||
(int)hasDynamicSpatial, onnxFixedH, onnxFixedW);
|
||||
} else {
|
||||
ANS_DBG("TRT_Build", "buildWithRetry WARN nbDims=%d (<4) — treating as fixed", dims.nbDims);
|
||||
}
|
||||
} else {
|
||||
ANS_DBG("TRT_Build", "buildWithRetry WARN parse failed or no inputs (parsed=%d nbInputs=%d)",
|
||||
(int)retryParsed,
|
||||
retryParsed ? tempNetwork->getNbInputs() : -1);
|
||||
}
|
||||
} else {
|
||||
ANS_DBG("TRT_Build", "buildWithRetry WARN ONNX read failed bytes=%zd",
|
||||
(long long)onnxSize);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ANS_DBG("TRT_Build", "buildWithRetry SKIP pre-parse (maxInputHW=%dx%d not both >0)",
|
||||
m_options.maxInputHeight, m_options.maxInputWidth);
|
||||
}
|
||||
|
||||
// -- Fixed-spatial or no dynamic dims: single build attempt ----------------
|
||||
if (!hasDynamicSpatial) {
|
||||
ANS_DBG("TRT_Build", "buildWithRetry FIXED_SPATIAL → single buildSafe() attempt");
|
||||
unsigned long sehBuild = 0;
|
||||
bool ok = buildSafe(onnxModelPath, subVals, divVals, normalize, &sehBuild);
|
||||
if (sehBuild != 0) {
|
||||
ANS_DBG("TRT_Build", "buildWithRetry FAIL fixed-spatial SEH=0x%lx", sehBuild);
|
||||
return false;
|
||||
}
|
||||
ANS_DBG("TRT_Build", "buildWithRetry %s fixed-spatial",
|
||||
ok ? "SUCCESS" : "FAIL");
|
||||
return ok;
|
||||
}
|
||||
|
||||
@@ -2482,6 +2514,16 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
|
||||
if (candidates.back() > 640) candidates.push_back(640);
|
||||
if (candidates.back() > 320) candidates.push_back(320);
|
||||
|
||||
{
|
||||
std::ostringstream oss;
|
||||
for (size_t i = 0; i < candidates.size(); ++i) {
|
||||
if (i) oss << ",";
|
||||
oss << candidates[i];
|
||||
}
|
||||
ANS_DBG("TRT_Build", "buildWithRetry DYNAMIC_SPATIAL dynH=%d dynW=%d maxDynDim=%d candidates=[%s]",
|
||||
(int)dynamicH, (int)dynamicW, maxDynDim, oss.str().c_str());
|
||||
}
|
||||
|
||||
// Helper: configure m_options for a given candidate
|
||||
auto setCandidateOptions = [&](int candidate) {
|
||||
float scale = static_cast<float>(candidate) / maxDynDim;
|
||||
@@ -2506,16 +2548,28 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
|
||||
for (size_t attempt = 0; attempt < candidates.size(); ++attempt) {
|
||||
setCandidateOptions(candidates[attempt]);
|
||||
|
||||
ANS_DBG("TRT_Build", "buildWithRetry ATTEMPT %zu/%zu candidate=%d maxHW=%dx%d optHW=%dx%d minHW=%dx%d",
|
||||
attempt + 1, candidates.size(), candidates[attempt],
|
||||
m_options.maxInputHeight, m_options.maxInputWidth,
|
||||
m_options.optInputHeight, m_options.optInputWidth,
|
||||
m_options.minInputHeight, m_options.minInputWidth);
|
||||
|
||||
{
|
||||
unsigned long sehAttempt = 0;
|
||||
bool attemptOk = buildSafe(onnxModelPath, subVals, divVals, normalize, &sehAttempt);
|
||||
if (sehAttempt != 0) {
|
||||
ANS_DBG("TRT_Build", "buildWithRetry FAIL SEH=0x%lx on candidate=%d — abort (CUDA may be corrupted)",
|
||||
sehAttempt, candidates[attempt]);
|
||||
// CUDA context may be corrupted — no point retrying
|
||||
return false;
|
||||
}
|
||||
if (attemptOk) {
|
||||
ANS_DBG("TRT_Build", "buildWithRetry SUCCESS at attempt %zu/%zu candidate=%d (final maxHW=%dx%d)",
|
||||
attempt + 1, candidates.size(), candidates[attempt],
|
||||
m_options.maxInputHeight, m_options.maxInputWidth);
|
||||
return true;
|
||||
}
|
||||
ANS_DBG("TRT_Build", "buildWithRetry attempt %zu FAILED — trying smaller", attempt + 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2527,6 +2581,8 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
|
||||
m_options.minInputHeight = origMinH;
|
||||
m_options.minInputWidth = origMinW;
|
||||
|
||||
ANS_DBG("TRT_Build", "buildWithRetry FAIL all %zu candidates exhausted — restored origMaxHW=%dx%d",
|
||||
candidates.size(), origMaxH, origMaxW);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -2553,6 +2609,9 @@ bool Engine<T>::buildLoadNetwork(
|
||||
int maxSlotsPerGpu,
|
||||
double memSafetyFactor)
|
||||
{
|
||||
ANS_DBG("TRT_Build", "buildLoadNetwork(6p) ENTRY onnx=%s normalize=%d maxSlotsPerGpu=%d memSafety=%.3f",
|
||||
onnxModelPath.c_str(), (int)normalize, maxSlotsPerGpu, memSafetyFactor);
|
||||
|
||||
// Force single-GPU when: maxSlotsPerGpu==0 (optimizer bypass),
|
||||
// per-instance forceNoPool, global bypass (OptimizeModelStr),
|
||||
// exported g_forceNoPool, OR single-GPU system with maxSlotsPerGpu==1.
|
||||
@@ -2563,18 +2622,32 @@ bool Engine<T>::buildLoadNetwork(
|
||||
{
|
||||
extern std::atomic<bool> g_forceNoPool;
|
||||
int gpuCount = 0;
|
||||
cudaGetDeviceCount(&gpuCount);
|
||||
cudaError_t cudaErr = cudaGetDeviceCount(&gpuCount);
|
||||
if (cudaErr != cudaSuccess) {
|
||||
ANS_DBG("TRT_Build", "buildLoadNetwork(6p) WARN cudaGetDeviceCount err=%d (%s) — assuming gpuCount=0",
|
||||
(int)cudaErr, cudaGetErrorString(cudaErr));
|
||||
}
|
||||
bool singleGpuNoElastic = (gpuCount <= 1 && maxSlotsPerGpu == 1);
|
||||
bool gForceNoPool = g_forceNoPool.load(std::memory_order_relaxed);
|
||||
bool globalBypass = TRTEngineCache::globalBypass().load(std::memory_order_relaxed);
|
||||
bool noPool = (maxSlotsPerGpu == 0) || m_forceNoPool ||
|
||||
g_forceNoPool.load(std::memory_order_relaxed) ||
|
||||
TRTEngineCache::globalBypass().load(std::memory_order_relaxed) ||
|
||||
singleGpuNoElastic;
|
||||
gForceNoPool || globalBypass || singleGpuNoElastic;
|
||||
|
||||
ANS_DBG("TRT_Build", "buildLoadNetwork(6p) DECISION gpuCount=%d maxSlots==0:%d m_forceNoPool=%d g_forceNoPool=%d globalBypass=%d singleGpuNoElastic=%d → noPool=%d",
|
||||
gpuCount, (int)(maxSlotsPerGpu == 0), (int)m_forceNoPool,
|
||||
(int)gForceNoPool, (int)globalBypass, (int)singleGpuNoElastic,
|
||||
(int)noPool);
|
||||
|
||||
if (noPool) {
|
||||
std::cout << "Info: buildLoadNetwork -- single-GPU forced (maxSlots=" << maxSlotsPerGpu
|
||||
<< ", forceNoPool=" << m_forceNoPool
|
||||
<< ", g_forceNoPool=" << g_forceNoPool.load()
|
||||
<< ", gpuCount=" << gpuCount << ")" << std::endl;
|
||||
return buildLoadNetwork(onnxModelPath, subVals, divVals, normalize);
|
||||
ANS_DBG("TRT_Build", "buildLoadNetwork(6p) → single-GPU 4-param overload");
|
||||
bool ok = buildLoadNetwork(onnxModelPath, subVals, divVals, normalize);
|
||||
ANS_DBG("TRT_Build", "buildLoadNetwork(6p) %s (single-GPU path)",
|
||||
ok ? "SUCCESS" : "FAIL");
|
||||
return ok;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2583,11 +2656,16 @@ bool Engine<T>::buildLoadNetwork(
|
||||
std::cout << "Info: buildLoadNetwork -- activating multi-GPU pool"
|
||||
<< " (maxSlotsPerGpu=" << maxSlotsPerGpu
|
||||
<< ", memSafetyFactor=" << memSafetyFactor << ")" << std::endl;
|
||||
ANS_DBG("TRT_Build", "buildLoadNetwork(6p) → loadSlots multi-GPU pool maxSlotsPerGpu=%d memSafety=%.3f",
|
||||
maxSlotsPerGpu, memSafetyFactor);
|
||||
|
||||
return loadSlots(m_options, onnxModelPath,
|
||||
subVals, divVals, normalize,
|
||||
/*fromOnnx=*/true,
|
||||
maxSlotsPerGpu, memSafetyFactor);
|
||||
bool ok = loadSlots(m_options, onnxModelPath,
|
||||
subVals, divVals, normalize,
|
||||
/*fromOnnx=*/true,
|
||||
maxSlotsPerGpu, memSafetyFactor);
|
||||
ANS_DBG("TRT_Build", "buildLoadNetwork(6p) %s (multi-GPU pool path)",
|
||||
ok ? "SUCCESS" : "FAIL");
|
||||
return ok;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
||||
Reference in New Issue
Block a user