Improve ANSCV

2026-04-21 09:26:02 +10:00
parent 9f0a10a4c8
commit 7e772f76bc
15 changed files with 749 additions and 421 deletions
--- a/modules/ANSODEngine/engine.h
+++ b/modules/ANSODEngine/engine.h
@@ -720,8 +720,24 @@ void Engine<T>::lockGpuClocks(int deviceIndex, int requestedMHz) {
    if (rc == nvml_types::SUCCESS) {
        m_clocksLocked  = true;
        m_nvmlDeviceIdx = static_cast<unsigned int>(deviceIndex);
+        // Always emit to DebugView so operators can confirm the lock took
+        // effect without needing to read engine-level verbose output.
+        ANS_DBG("TRT_Clock",
+            "GPU clocks LOCKED at %u MHz (device %d) — P-state will stay high, "
+            "no WDDM down-clock between inferences",
+            targetMHz, deviceIndex);
        if (m_verbose) std::cout << "Info: GPU clocks locked at " << targetMHz << " MHz (device " << deviceIndex << ")" << std::endl;
    } else {
+        // Surface the failure reason + remediation in DebugView. Most common
+        // failure is access-denied (requires Administrator) or the driver
+        // refusing the requested frequency. Users see this in the log and
+        // know to elevate, set NVCP 'Prefer maximum performance', or run
+        // `nvidia-smi -lgc <MHz>,<MHz>` before launching.
+        ANS_DBG("TRT_Clock",
+            "GPU clock lock FAILED (nvml rc=%s) — expect 2-3x inference latency from "
+            "WDDM down-clocking. Fix: run as Admin, OR set NVCP 'Prefer maximum "
+            "performance' for this app, OR: nvidia-smi -lgc %u,%u",
+            errName(rc), targetMHz, targetMHz);
        if (m_verbose) {
            std::cout << "Warning: nvmlDeviceSetGpuLockedClocks failed: " << errName(rc) << std::endl;
            std::cout << "  (Run as Administrator, or use: nvidia-smi -lgc " << targetMHz << "," << targetMHz << ")" << std::endl;