Remove [Engine] and [EnginePoolManager] debug log messages
Cleaned up verbose engine telemetry emitted to stdout/stderr and the Windows Event Viewer. Removes logEngineEvent/logEvent calls (and their diagnostic-only locals) across the TensorRT engine load, build, run, multi-GPU, and pool-manager paths, plus the now-unused logEvent helper in EnginePoolManager. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -99,8 +99,6 @@ public:
|
||||
// Note: maxSlotsPerGpu==1 is now the normal "1 slot per GPU" multi-GPU
|
||||
// round-robin mode, so it goes through the pool path below.
|
||||
if (maxSlotsPerGpu == 0) {
|
||||
logEvent("[EnginePoolManager] BYPASS (maxSlots=0): " + key.modelPath
|
||||
+ " — creating non-shared engine");
|
||||
auto engine = std::make_shared<Engine<T>>(options);
|
||||
bool ok = engine->buildLoadNetwork(modelPath, subVals, divVals, normalize);
|
||||
return ok ? engine : nullptr;
|
||||
@@ -114,8 +112,6 @@ public:
|
||||
it->second.evictTime = TimePoint{}; // cancel pending eviction
|
||||
int refs = it->second.refcount;
|
||||
auto engine = it->second.engine;
|
||||
logEvent("[EnginePoolManager] HIT: " + key.modelPath
|
||||
+ " refs=" + std::to_string(refs));
|
||||
|
||||
// Demand-driven growth: only in elastic mode (maxSlotsPerGpu <= 0
|
||||
// or > 1). With maxSlotsPerGpu==1 (round-robin default), the pool
|
||||
@@ -134,19 +130,9 @@ public:
|
||||
constexpr size_t kMinVramForGrowth = 6ULL * 1024 * 1024 * 1024; // 6 GB
|
||||
if (totalVram >= kMinVramForGrowth) {
|
||||
lock.unlock(); // release PoolManager lock before growing
|
||||
std::thread([engine, alive, refs, modelPath = key.modelPath]() {
|
||||
int created = engine->growPool(1);
|
||||
if (created > 0) {
|
||||
logEngineEvent("[EnginePoolManager] DEMAND GROWTH: " + modelPath
|
||||
+ " grew from " + std::to_string(alive)
|
||||
+ " to " + std::to_string(engine->getTotalCapacity())
|
||||
+ " slots (refs=" + std::to_string(refs) + ")");
|
||||
}
|
||||
std::thread([engine]() {
|
||||
engine->growPool(1);
|
||||
}).detach();
|
||||
} else {
|
||||
logEvent("[EnginePoolManager] SKIP GROWTH: " + key.modelPath
|
||||
+ " (GPU VRAM " + std::to_string(totalVram >> 20)
|
||||
+ " MiB < 6 GB threshold, refs=" + std::to_string(refs) + ")");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -155,31 +141,12 @@ public:
|
||||
}
|
||||
|
||||
// Cache miss — create new Engine pool
|
||||
logEvent("[EnginePoolManager] MISS: Creating pool for " + key.modelPath + "...");
|
||||
|
||||
// Log VRAM before attempting to create probe
|
||||
{
|
||||
size_t freeMem = 0, totalMem = 0;
|
||||
cudaSetDevice(options.deviceIndex);
|
||||
cudaMemGetInfo(&freeMem, &totalMem);
|
||||
logEvent("[EnginePoolManager] GPU[" + std::to_string(options.deviceIndex)
|
||||
+ "] VRAM: " + std::to_string(freeMem >> 20) + " MiB free / "
|
||||
+ std::to_string(totalMem >> 20) + " MiB total (before probe)");
|
||||
}
|
||||
|
||||
auto engine = std::make_shared<Engine<T>>(options);
|
||||
bool ok = engine->buildLoadNetwork(modelPath, subVals, divVals, normalize, maxSlotsPerGpu);
|
||||
if (!ok) {
|
||||
// Step 1: Force-evict all pools with refcount=0 to reclaim VRAM
|
||||
int evicted = forceEvictPending();
|
||||
if (evicted > 0) {
|
||||
size_t freeMem2 = 0, totalMem2 = 0;
|
||||
cudaSetDevice(options.deviceIndex);
|
||||
cudaMemGetInfo(&freeMem2, &totalMem2);
|
||||
logEvent("[EnginePoolManager] RETRY EVICT: Force-evicted " + std::to_string(evicted)
|
||||
+ " pending pool(s), now " + std::to_string(freeMem2 >> 20)
|
||||
+ " MiB free. Retrying " + key.modelPath + "...");
|
||||
|
||||
engine = std::make_shared<Engine<T>>(options);
|
||||
ok = engine->buildLoadNetwork(modelPath, subVals, divVals, normalize, maxSlotsPerGpu);
|
||||
}
|
||||
@@ -189,13 +156,6 @@ public:
|
||||
// consumes ~300-500 MB vs ~50-100 MB for a simple loadNetwork.
|
||||
// Lightweight mode: tasks queue for a single shared slot — slower but works.
|
||||
if (!ok) {
|
||||
size_t freeMem3 = 0, totalMem3 = 0;
|
||||
cudaSetDevice(options.deviceIndex);
|
||||
cudaMemGetInfo(&freeMem3, &totalMem3);
|
||||
logEvent("[EnginePoolManager] RETRY LIGHTWEIGHT: Elastic probe failed, "
|
||||
+ std::to_string(freeMem3 >> 20) + " MiB free. "
|
||||
"Retrying with single-slot mode for " + key.modelPath + "...");
|
||||
|
||||
engine = std::make_shared<Engine<T>>(options);
|
||||
ok = engine->buildLoadNetwork(modelPath, subVals, divVals, normalize);
|
||||
}
|
||||
@@ -208,13 +168,6 @@ public:
|
||||
// Evidence: FireSmoke/detector.onnx failed at 3740 MiB free, then
|
||||
// succeeded 4 seconds later at 3154 MiB free (less VRAM!).
|
||||
if (!ok) {
|
||||
size_t freeMem4 = 0, totalMem4 = 0;
|
||||
cudaSetDevice(options.deviceIndex);
|
||||
cudaMemGetInfo(&freeMem4, &totalMem4);
|
||||
logEvent("[EnginePoolManager] RETRY DELAYED: All attempts failed with "
|
||||
+ std::to_string(freeMem4 >> 20) + " MiB free. "
|
||||
"Waiting 3s before final retry for " + key.modelPath + "...");
|
||||
|
||||
// Release mutex during sleep so other tasks can proceed
|
||||
// (they may complete pool creation that resolves our issue)
|
||||
lock.unlock();
|
||||
@@ -226,29 +179,15 @@ public:
|
||||
if (it2 != m_pools.end()) {
|
||||
it2->second.refcount++;
|
||||
it2->second.evictTime = TimePoint{};
|
||||
logEvent("[EnginePoolManager] HIT (after delay): " + key.modelPath
|
||||
+ " refs=" + std::to_string(it2->second.refcount));
|
||||
return it2->second.engine;
|
||||
}
|
||||
|
||||
// Final retry — try lightweight again after delay
|
||||
cudaSetDevice(options.deviceIndex);
|
||||
cudaMemGetInfo(&freeMem4, &totalMem4);
|
||||
logEvent("[EnginePoolManager] RETRY FINAL: " + std::to_string(freeMem4 >> 20)
|
||||
+ " MiB free. Last attempt for " + key.modelPath + "...");
|
||||
|
||||
engine = std::make_shared<Engine<T>>(options);
|
||||
ok = engine->buildLoadNetwork(modelPath, subVals, divVals, normalize);
|
||||
}
|
||||
|
||||
if (!ok) {
|
||||
size_t freeMem = 0, totalMem = 0;
|
||||
cudaMemGetInfo(&freeMem, &totalMem);
|
||||
logEvent("[EnginePoolManager] FAILED: Could not load engine for "
|
||||
+ key.modelPath + " | GPU[" + std::to_string(options.deviceIndex)
|
||||
+ "] VRAM: " + std::to_string(freeMem >> 20) + " MiB free / "
|
||||
+ std::to_string(totalMem >> 20) + " MiB total"
|
||||
+ " (after 4 attempts: elastic, evict, lightweight, delayed)", true);
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
@@ -261,7 +200,6 @@ public:
|
||||
// Start the lazy-eviction sweeper if not already running
|
||||
startSweeperIfNeeded();
|
||||
|
||||
logEvent("[EnginePoolManager] CREATED: " + key.modelPath + " refs=1");
|
||||
return engine;
|
||||
}
|
||||
|
||||
@@ -280,14 +218,10 @@ public:
|
||||
if (it->second.refcount <= 0) return;
|
||||
|
||||
it->second.refcount--;
|
||||
logEvent("[EnginePoolManager] RELEASE: " + key.modelPath
|
||||
+ " refs=" + std::to_string(it->second.refcount));
|
||||
|
||||
if (it->second.refcount <= 0) {
|
||||
// Mark for lazy eviction — don't destroy yet
|
||||
it->second.evictTime = Clock::now() + std::chrono::seconds(kEvictGraceSec);
|
||||
logEvent("[EnginePoolManager] PENDING EVICT: " + key.modelPath
|
||||
+ " (will evict in " + std::to_string(kEvictGraceSec) + "s if not re-acquired)");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -295,7 +229,6 @@ public:
|
||||
void clearAll() {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
logEvent("[EnginePoolManager] CLEAR ALL (" + std::to_string(m_pools.size()) + " pools)");
|
||||
m_pools.clear();
|
||||
}
|
||||
stopSweeper();
|
||||
@@ -361,17 +294,6 @@ private:
|
||||
using Clock = std::chrono::steady_clock;
|
||||
using TimePoint = std::chrono::time_point<Clock>;
|
||||
|
||||
// Log to stdout/stderr only — no Windows Event Viewer.
|
||||
// Event Viewer logging is handled by logEngineEvent() in engine.h for
|
||||
// critical engine-level errors. EnginePoolManager messages are
|
||||
// informational (HIT/MISS/EVICT) and don't need Event Viewer entries.
|
||||
static void logEvent(const std::string& msg, bool isError = false) {
|
||||
if (isError)
|
||||
std::cerr << msg << std::endl;
|
||||
else
|
||||
std::cout << msg << std::endl;
|
||||
}
|
||||
|
||||
struct PoolEntry {
|
||||
std::shared_ptr<Engine<T>> engine;
|
||||
int refcount = 0;
|
||||
@@ -408,7 +330,6 @@ private:
|
||||
int evicted = 0;
|
||||
for (auto it = m_pools.begin(); it != m_pools.end(); ) {
|
||||
if (it->second.refcount <= 0) {
|
||||
logEvent("[EnginePoolManager] FORCE EVICT (VRAM recovery): " + it->first.modelPath);
|
||||
it = m_pools.erase(it);
|
||||
evicted++;
|
||||
} else {
|
||||
@@ -428,7 +349,6 @@ private:
|
||||
&& entry.evictTime != TimePoint{}
|
||||
&& now >= entry.evictTime)
|
||||
{
|
||||
logEvent("[EnginePoolManager] EVICT (expired): " + it->first.modelPath);
|
||||
it = m_pools.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
|
||||
Reference in New Issue
Block a user