Add logs to custom models and model optimisation

This commit is contained in:
2026-04-25 13:17:36 +10:00
parent 8a95ed6b8c
commit ef2a122fec
2 changed files with 231 additions and 19 deletions

View File

@@ -2411,6 +2411,12 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
const std::array<float, 3>& divVals,
bool normalize)
{
ANS_DBG("TRT_Build", "buildWithRetry ENTRY onnx=%s normalize=%d optMaxHW=%dx%d optOptHW=%dx%d optMinHW=%dx%d",
onnxModelPath.c_str(), (int)normalize,
m_options.maxInputHeight, m_options.maxInputWidth,
m_options.optInputHeight, m_options.optInputWidth,
m_options.minInputHeight, m_options.minInputWidth);
// -- Quick pre-analysis: detect dynamic spatial dims in ONNX ---------------
bool hasDynamicSpatial = false;
int onnxFixedH = 0, onnxFixedW = 0; // 0 = dynamic (-1 in ONNX)
@@ -2423,6 +2429,10 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
nvonnxparser::createParser(*tempNetwork, m_logger));
std::ifstream onnxFile(onnxModelPath, std::ios::binary | std::ios::ate);
if (!onnxFile.good()) {
ANS_DBG("TRT_Build", "buildWithRetry WARN cannot open ONNX for pre-parse path=%s",
onnxModelPath.c_str());
}
if (onnxFile.good()) {
std::streamsize onnxSize = onnxFile.tellg();
onnxFile.seekg(0, std::ios::beg);
@@ -2432,6 +2442,8 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
bool retryParsed = parseOnnxModelSafe(tempParser.get(),
onnxBuffer.data(), onnxBuffer.size(), &sehRetryParse);
if (sehRetryParse != 0) {
ANS_DBG("TRT_Build", "buildWithRetry WARN SEH=0x%lx during pre-parse — fall through to single build()",
sehRetryParse);
// hasDynamicSpatial stays false → single build() attempt
}
else if (retryParsed && tempNetwork->getNbInputs() > 0) {
@@ -2441,19 +2453,39 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
hasDynamicSpatial = true;
onnxFixedH = (dims.d[2] != -1) ? dims.d[2] : 0;
onnxFixedW = (dims.d[3] != -1) ? dims.d[3] : 0;
ANS_DBG("TRT_Build", "buildWithRetry pre-parse OK nbDims=%d dims=[%d,%d,%d,%d] dynSpatial=%d fixedHW=%dx%d",
dims.nbDims,
(int)dims.d[0], (int)dims.d[1], (int)dims.d[2], (int)dims.d[3],
(int)hasDynamicSpatial, onnxFixedH, onnxFixedW);
} else {
ANS_DBG("TRT_Build", "buildWithRetry WARN nbDims=%d (<4) — treating as fixed", dims.nbDims);
}
} else {
ANS_DBG("TRT_Build", "buildWithRetry WARN parse failed or no inputs (parsed=%d nbInputs=%d)",
(int)retryParsed,
retryParsed ? tempNetwork->getNbInputs() : -1);
}
} else {
ANS_DBG("TRT_Build", "buildWithRetry WARN ONNX read failed bytes=%zd",
(long long)onnxSize);
}
}
} else {
ANS_DBG("TRT_Build", "buildWithRetry SKIP pre-parse (maxInputHW=%dx%d not both >0)",
m_options.maxInputHeight, m_options.maxInputWidth);
}
// -- Fixed-spatial or no dynamic dims: single build attempt ----------------
if (!hasDynamicSpatial) {
ANS_DBG("TRT_Build", "buildWithRetry FIXED_SPATIAL → single buildSafe() attempt");
unsigned long sehBuild = 0;
bool ok = buildSafe(onnxModelPath, subVals, divVals, normalize, &sehBuild);
if (sehBuild != 0) {
ANS_DBG("TRT_Build", "buildWithRetry FAIL fixed-spatial SEH=0x%lx", sehBuild);
return false;
}
ANS_DBG("TRT_Build", "buildWithRetry %s fixed-spatial",
ok ? "SUCCESS" : "FAIL");
return ok;
}
@@ -2482,6 +2514,16 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
if (candidates.back() > 640) candidates.push_back(640);
if (candidates.back() > 320) candidates.push_back(320);
{
std::ostringstream oss;
for (size_t i = 0; i < candidates.size(); ++i) {
if (i) oss << ",";
oss << candidates[i];
}
ANS_DBG("TRT_Build", "buildWithRetry DYNAMIC_SPATIAL dynH=%d dynW=%d maxDynDim=%d candidates=[%s]",
(int)dynamicH, (int)dynamicW, maxDynDim, oss.str().c_str());
}
// Helper: configure m_options for a given candidate
auto setCandidateOptions = [&](int candidate) {
float scale = static_cast<float>(candidate) / maxDynDim;
@@ -2506,16 +2548,28 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
for (size_t attempt = 0; attempt < candidates.size(); ++attempt) {
setCandidateOptions(candidates[attempt]);
ANS_DBG("TRT_Build", "buildWithRetry ATTEMPT %zu/%zu candidate=%d maxHW=%dx%d optHW=%dx%d minHW=%dx%d",
attempt + 1, candidates.size(), candidates[attempt],
m_options.maxInputHeight, m_options.maxInputWidth,
m_options.optInputHeight, m_options.optInputWidth,
m_options.minInputHeight, m_options.minInputWidth);
{
unsigned long sehAttempt = 0;
bool attemptOk = buildSafe(onnxModelPath, subVals, divVals, normalize, &sehAttempt);
if (sehAttempt != 0) {
ANS_DBG("TRT_Build", "buildWithRetry FAIL SEH=0x%lx on candidate=%d — abort (CUDA may be corrupted)",
sehAttempt, candidates[attempt]);
// CUDA context may be corrupted — no point retrying
return false;
}
if (attemptOk) {
ANS_DBG("TRT_Build", "buildWithRetry SUCCESS at attempt %zu/%zu candidate=%d (final maxHW=%dx%d)",
attempt + 1, candidates.size(), candidates[attempt],
m_options.maxInputHeight, m_options.maxInputWidth);
return true;
}
ANS_DBG("TRT_Build", "buildWithRetry attempt %zu FAILED — trying smaller", attempt + 1);
}
}
@@ -2527,6 +2581,8 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
m_options.minInputHeight = origMinH;
m_options.minInputWidth = origMinW;
ANS_DBG("TRT_Build", "buildWithRetry FAIL all %zu candidates exhausted — restored origMaxHW=%dx%d",
candidates.size(), origMaxH, origMaxW);
return false;
}
@@ -2553,6 +2609,9 @@ bool Engine<T>::buildLoadNetwork(
int maxSlotsPerGpu,
double memSafetyFactor)
{
ANS_DBG("TRT_Build", "buildLoadNetwork(6p) ENTRY onnx=%s normalize=%d maxSlotsPerGpu=%d memSafety=%.3f",
onnxModelPath.c_str(), (int)normalize, maxSlotsPerGpu, memSafetyFactor);
// Force single-GPU when: maxSlotsPerGpu==0 (optimizer bypass),
// per-instance forceNoPool, global bypass (OptimizeModelStr),
// exported g_forceNoPool, OR single-GPU system with maxSlotsPerGpu==1.
@@ -2563,18 +2622,32 @@ bool Engine<T>::buildLoadNetwork(
{
extern std::atomic<bool> g_forceNoPool;
int gpuCount = 0;
cudaGetDeviceCount(&gpuCount);
cudaError_t cudaErr = cudaGetDeviceCount(&gpuCount);
if (cudaErr != cudaSuccess) {
ANS_DBG("TRT_Build", "buildLoadNetwork(6p) WARN cudaGetDeviceCount err=%d (%s) — assuming gpuCount=0",
(int)cudaErr, cudaGetErrorString(cudaErr));
}
bool singleGpuNoElastic = (gpuCount <= 1 && maxSlotsPerGpu == 1);
bool gForceNoPool = g_forceNoPool.load(std::memory_order_relaxed);
bool globalBypass = TRTEngineCache::globalBypass().load(std::memory_order_relaxed);
bool noPool = (maxSlotsPerGpu == 0) || m_forceNoPool ||
g_forceNoPool.load(std::memory_order_relaxed) ||
TRTEngineCache::globalBypass().load(std::memory_order_relaxed) ||
singleGpuNoElastic;
gForceNoPool || globalBypass || singleGpuNoElastic;
ANS_DBG("TRT_Build", "buildLoadNetwork(6p) DECISION gpuCount=%d maxSlots==0:%d m_forceNoPool=%d g_forceNoPool=%d globalBypass=%d singleGpuNoElastic=%d → noPool=%d",
gpuCount, (int)(maxSlotsPerGpu == 0), (int)m_forceNoPool,
(int)gForceNoPool, (int)globalBypass, (int)singleGpuNoElastic,
(int)noPool);
if (noPool) {
std::cout << "Info: buildLoadNetwork -- single-GPU forced (maxSlots=" << maxSlotsPerGpu
<< ", forceNoPool=" << m_forceNoPool
<< ", g_forceNoPool=" << g_forceNoPool.load()
<< ", gpuCount=" << gpuCount << ")" << std::endl;
return buildLoadNetwork(onnxModelPath, subVals, divVals, normalize);
ANS_DBG("TRT_Build", "buildLoadNetwork(6p) → single-GPU 4-param overload");
bool ok = buildLoadNetwork(onnxModelPath, subVals, divVals, normalize);
ANS_DBG("TRT_Build", "buildLoadNetwork(6p) %s (single-GPU path)",
ok ? "SUCCESS" : "FAIL");
return ok;
}
}
@@ -2583,11 +2656,16 @@ bool Engine<T>::buildLoadNetwork(
std::cout << "Info: buildLoadNetwork -- activating multi-GPU pool"
<< " (maxSlotsPerGpu=" << maxSlotsPerGpu
<< ", memSafetyFactor=" << memSafetyFactor << ")" << std::endl;
ANS_DBG("TRT_Build", "buildLoadNetwork(6p) → loadSlots multi-GPU pool maxSlotsPerGpu=%d memSafety=%.3f",
maxSlotsPerGpu, memSafetyFactor);
return loadSlots(m_options, onnxModelPath,
subVals, divVals, normalize,
/*fromOnnx=*/true,
maxSlotsPerGpu, memSafetyFactor);
bool ok = loadSlots(m_options, onnxModelPath,
subVals, divVals, normalize,
/*fromOnnx=*/true,
maxSlotsPerGpu, memSafetyFactor);
ANS_DBG("TRT_Build", "buildLoadNetwork(6p) %s (multi-GPU pool path)",
ok ? "SUCCESS" : "FAIL");
return ok;
}
template <typename T>