Improve ANSCV

This commit is contained in:
2026-04-21 09:26:02 +10:00
parent 9f0a10a4c8
commit 7e772f76bc
15 changed files with 749 additions and 421 deletions

View File

@@ -720,8 +720,24 @@ void Engine<T>::lockGpuClocks(int deviceIndex, int requestedMHz) {
if (rc == nvml_types::SUCCESS) {
m_clocksLocked = true;
m_nvmlDeviceIdx = static_cast<unsigned int>(deviceIndex);
// Always emit to DebugView so operators can confirm the lock took
// effect without needing to read engine-level verbose output.
ANS_DBG("TRT_Clock",
"GPU clocks LOCKED at %u MHz (device %d) — P-state will stay high, "
"no WDDM down-clock between inferences",
targetMHz, deviceIndex);
if (m_verbose) std::cout << "Info: GPU clocks locked at " << targetMHz << " MHz (device " << deviceIndex << ")" << std::endl;
} else {
// Surface the failure reason + remediation in DebugView. Most common
// failure is access-denied (requires Administrator) or the driver
// refusing the requested frequency. Users see this in the log and
// know to elevate, set NVCP 'Prefer maximum performance', or run
// `nvidia-smi -lgc <MHz>,<MHz>` before launching.
ANS_DBG("TRT_Clock",
"GPU clock lock FAILED (nvml rc=%s) — expect 2-3x inference latency from "
"WDDM down-clocking. Fix: run as Admin, OR set NVCP 'Prefer maximum "
"performance' for this app, OR: nvidia-smi -lgc %u,%u",
errName(rc), targetMHz, targetMHz);
if (m_verbose) {
std::cout << "Warning: nvmlDeviceSetGpuLockedClocks failed: " << errName(rc) << std::endl;
std::cout << " (Run as Administrator, or use: nvidia-smi -lgc " << targetMHz << "," << targetMHz << ")" << std::endl;