Add logs to custom models and model optimisation

2026-04-25 13:17:36 +10:00
parent 8a95ed6b8c
commit ef2a122fec
2 changed files with 231 additions and 19 deletions
--- a/engines/TensorRTAPI/include/engine/EngineBuildLoadNetwork.inl
+++ b/engines/TensorRTAPI/include/engine/EngineBuildLoadNetwork.inl
@@ -2411,6 +2411,12 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
                                const std::array<float, 3>& divVals,
                                bool normalize)
 {
+    ANS_DBG("TRT_Build", "buildWithRetry ENTRY onnx=%s normalize=%d optMaxHW=%dx%d optOptHW=%dx%d optMinHW=%dx%d",
+        onnxModelPath.c_str(), (int)normalize,
+        m_options.maxInputHeight, m_options.maxInputWidth,
+        m_options.optInputHeight, m_options.optInputWidth,
+        m_options.minInputHeight, m_options.minInputWidth);
+
    // -- Quick pre-analysis: detect dynamic spatial dims in ONNX ---------------
    bool hasDynamicSpatial = false;
    int onnxFixedH = 0, onnxFixedW = 0;  // 0 = dynamic (-1 in ONNX)
@@ -2423,6 +2429,10 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
            nvonnxparser::createParser(*tempNetwork, m_logger));

        std::ifstream onnxFile(onnxModelPath, std::ios::binary | std::ios::ate);
+        if (!onnxFile.good()) {
+            ANS_DBG("TRT_Build", "buildWithRetry WARN cannot open ONNX for pre-parse path=%s",
+                onnxModelPath.c_str());
+        }
        if (onnxFile.good()) {
            std::streamsize onnxSize = onnxFile.tellg();
            onnxFile.seekg(0, std::ios::beg);
@@ -2432,6 +2442,8 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
                bool retryParsed = parseOnnxModelSafe(tempParser.get(),
                    onnxBuffer.data(), onnxBuffer.size(), &sehRetryParse);
                if (sehRetryParse != 0) {
+                    ANS_DBG("TRT_Build", "buildWithRetry WARN SEH=0x%lx during pre-parse — fall through to single build()",
+                        sehRetryParse);
                    // hasDynamicSpatial stays false → single build() attempt
                }
                else if (retryParsed && tempNetwork->getNbInputs() > 0) {
@@ -2441,19 +2453,39 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
                            hasDynamicSpatial = true;
                        onnxFixedH = (dims.d[2] != -1) ? dims.d[2] : 0;
                        onnxFixedW = (dims.d[3] != -1) ? dims.d[3] : 0;
+                        ANS_DBG("TRT_Build", "buildWithRetry pre-parse OK nbDims=%d dims=[%d,%d,%d,%d] dynSpatial=%d fixedHW=%dx%d",
+                            dims.nbDims,
+                            (int)dims.d[0], (int)dims.d[1], (int)dims.d[2], (int)dims.d[3],
+                            (int)hasDynamicSpatial, onnxFixedH, onnxFixedW);
+                    } else {
+                        ANS_DBG("TRT_Build", "buildWithRetry WARN nbDims=%d (<4) — treating as fixed", dims.nbDims);
                    }
+                } else {
+                    ANS_DBG("TRT_Build", "buildWithRetry WARN parse failed or no inputs (parsed=%d nbInputs=%d)",
+                        (int)retryParsed,
+                        retryParsed ? tempNetwork->getNbInputs() : -1);
                }
+            } else {
+                ANS_DBG("TRT_Build", "buildWithRetry WARN ONNX read failed bytes=%zd",
+                    (long long)onnxSize);
            }
        }
+    } else {
+        ANS_DBG("TRT_Build", "buildWithRetry SKIP pre-parse (maxInputHW=%dx%d not both >0)",
+            m_options.maxInputHeight, m_options.maxInputWidth);
    }

    // -- Fixed-spatial or no dynamic dims: single build attempt ----------------
    if (!hasDynamicSpatial) {
+        ANS_DBG("TRT_Build", "buildWithRetry FIXED_SPATIAL → single buildSafe() attempt");
        unsigned long sehBuild = 0;
        bool ok = buildSafe(onnxModelPath, subVals, divVals, normalize, &sehBuild);
        if (sehBuild != 0) {
+            ANS_DBG("TRT_Build", "buildWithRetry FAIL fixed-spatial SEH=0x%lx", sehBuild);
            return false;
        }
+        ANS_DBG("TRT_Build", "buildWithRetry %s fixed-spatial",
+            ok ? "SUCCESS" : "FAIL");
        return ok;
    }

@@ -2482,6 +2514,16 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
    if (candidates.back() > 640) candidates.push_back(640);
    if (candidates.back() > 320) candidates.push_back(320);

+    {
+        std::ostringstream oss;
+        for (size_t i = 0; i < candidates.size(); ++i) {
+            if (i) oss << ",";
+            oss << candidates[i];
+        }
+        ANS_DBG("TRT_Build", "buildWithRetry DYNAMIC_SPATIAL dynH=%d dynW=%d maxDynDim=%d candidates=[%s]",
+            (int)dynamicH, (int)dynamicW, maxDynDim, oss.str().c_str());
+    }
+
    // Helper: configure m_options for a given candidate
    auto setCandidateOptions = [&](int candidate) {
        float scale = static_cast<float>(candidate) / maxDynDim;
@@ -2506,16 +2548,28 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
    for (size_t attempt = 0; attempt < candidates.size(); ++attempt) {
        setCandidateOptions(candidates[attempt]);

+        ANS_DBG("TRT_Build", "buildWithRetry ATTEMPT %zu/%zu candidate=%d maxHW=%dx%d optHW=%dx%d minHW=%dx%d",
+            attempt + 1, candidates.size(), candidates[attempt],
+            m_options.maxInputHeight, m_options.maxInputWidth,
+            m_options.optInputHeight, m_options.optInputWidth,
+            m_options.minInputHeight, m_options.minInputWidth);
+
        {
            unsigned long sehAttempt = 0;
            bool attemptOk = buildSafe(onnxModelPath, subVals, divVals, normalize, &sehAttempt);
            if (sehAttempt != 0) {
+                ANS_DBG("TRT_Build", "buildWithRetry FAIL SEH=0x%lx on candidate=%d — abort (CUDA may be corrupted)",
+                    sehAttempt, candidates[attempt]);
                // CUDA context may be corrupted — no point retrying
                return false;
            }
            if (attemptOk) {
+                ANS_DBG("TRT_Build", "buildWithRetry SUCCESS at attempt %zu/%zu candidate=%d (final maxHW=%dx%d)",
+                    attempt + 1, candidates.size(), candidates[attempt],
+                    m_options.maxInputHeight, m_options.maxInputWidth);
                return true;
            }
+            ANS_DBG("TRT_Build", "buildWithRetry attempt %zu FAILED — trying smaller", attempt + 1);
        }
    }

@@ -2527,6 +2581,8 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
    m_options.minInputHeight = origMinH;
    m_options.minInputWidth  = origMinW;

+    ANS_DBG("TRT_Build", "buildWithRetry FAIL all %zu candidates exhausted — restored origMaxHW=%dx%d",
+        candidates.size(), origMaxH, origMaxW);
    return false;
 }

@@ -2553,6 +2609,9 @@ bool Engine<T>::buildLoadNetwork(
        int                          maxSlotsPerGpu,
        double                       memSafetyFactor)
 {
+    ANS_DBG("TRT_Build", "buildLoadNetwork(6p) ENTRY onnx=%s normalize=%d maxSlotsPerGpu=%d memSafety=%.3f",
+        onnxModelPath.c_str(), (int)normalize, maxSlotsPerGpu, memSafetyFactor);
+
    // Force single-GPU when: maxSlotsPerGpu==0 (optimizer bypass),
    // per-instance forceNoPool, global bypass (OptimizeModelStr),
    // exported g_forceNoPool, OR single-GPU system with maxSlotsPerGpu==1.
@@ -2563,18 +2622,32 @@ bool Engine<T>::buildLoadNetwork(
    {
        extern std::atomic<bool> g_forceNoPool;
        int gpuCount = 0;
-        cudaGetDeviceCount(&gpuCount);
+        cudaError_t cudaErr = cudaGetDeviceCount(&gpuCount);
+        if (cudaErr != cudaSuccess) {
+            ANS_DBG("TRT_Build", "buildLoadNetwork(6p) WARN cudaGetDeviceCount err=%d (%s) — assuming gpuCount=0",
+                (int)cudaErr, cudaGetErrorString(cudaErr));
+        }
        bool singleGpuNoElastic = (gpuCount <= 1 && maxSlotsPerGpu == 1);
+        bool gForceNoPool      = g_forceNoPool.load(std::memory_order_relaxed);
+        bool globalBypass      = TRTEngineCache::globalBypass().load(std::memory_order_relaxed);
        bool noPool = (maxSlotsPerGpu == 0) || m_forceNoPool ||
-                      g_forceNoPool.load(std::memory_order_relaxed) ||
-                      TRTEngineCache::globalBypass().load(std::memory_order_relaxed) ||
-                      singleGpuNoElastic;
+                      gForceNoPool || globalBypass || singleGpuNoElastic;
+
+        ANS_DBG("TRT_Build", "buildLoadNetwork(6p) DECISION gpuCount=%d maxSlots==0:%d m_forceNoPool=%d g_forceNoPool=%d globalBypass=%d singleGpuNoElastic=%d → noPool=%d",
+            gpuCount, (int)(maxSlotsPerGpu == 0), (int)m_forceNoPool,
+            (int)gForceNoPool, (int)globalBypass, (int)singleGpuNoElastic,
+            (int)noPool);
+
        if (noPool) {
            std::cout << "Info: buildLoadNetwork -- single-GPU forced (maxSlots=" << maxSlotsPerGpu
                      << ", forceNoPool=" << m_forceNoPool
                      << ", g_forceNoPool=" << g_forceNoPool.load()
                      << ", gpuCount=" << gpuCount << ")" << std::endl;
-            return buildLoadNetwork(onnxModelPath, subVals, divVals, normalize);
+            ANS_DBG("TRT_Build", "buildLoadNetwork(6p) → single-GPU 4-param overload");
+            bool ok = buildLoadNetwork(onnxModelPath, subVals, divVals, normalize);
+            ANS_DBG("TRT_Build", "buildLoadNetwork(6p) %s (single-GPU path)",
+                ok ? "SUCCESS" : "FAIL");
+            return ok;
        }
    }

@@ -2583,11 +2656,16 @@ bool Engine<T>::buildLoadNetwork(
    std::cout << "Info: buildLoadNetwork -- activating multi-GPU pool"
              << " (maxSlotsPerGpu=" << maxSlotsPerGpu
              << ", memSafetyFactor=" << memSafetyFactor << ")" << std::endl;
+    ANS_DBG("TRT_Build", "buildLoadNetwork(6p) → loadSlots multi-GPU pool maxSlotsPerGpu=%d memSafety=%.3f",
+        maxSlotsPerGpu, memSafetyFactor);

-    return loadSlots(m_options, onnxModelPath,
-                     subVals, divVals, normalize,
-                     /*fromOnnx=*/true,
-                     maxSlotsPerGpu, memSafetyFactor);
+    bool ok = loadSlots(m_options, onnxModelPath,
+                        subVals, divVals, normalize,
+                        /*fromOnnx=*/true,
+                        maxSlotsPerGpu, memSafetyFactor);
+    ANS_DBG("TRT_Build", "buildLoadNetwork(6p) %s (multi-GPU pool path)",
+        ok ? "SUCCESS" : "FAIL");
+    return ok;
 }

 template <typename T>