Remove [Engine] and [EnginePoolManager] debug log messages
Cleaned up verbose engine telemetry emitted to stdout/stderr and the Windows Event Viewer. Removes logEngineEvent/logEvent calls (and their diagnostic-only locals) across the TensorRT engine load, build, run, multi-GPU, and pool-manager paths, plus the now-unused logEvent helper in EnginePoolManager. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -267,7 +267,6 @@ bool Engine<T>::buildLoadNetwork(std::string onnxModelPath, const std::array<flo
|
||||
|
||||
if (FileExist(engineName)) {
|
||||
if (m_verbose) { std::cout << "Engine file found: " << engineName << std::endl; }
|
||||
logEngineEvent("[Engine] buildLoadNetwork: Loading cached engine: " + engineName);
|
||||
bool loadOk = loadNetwork(engineName, subVals, divVals, normalize);
|
||||
if (loadOk) {
|
||||
return true;
|
||||
@@ -280,10 +279,6 @@ bool Engine<T>::buildLoadNetwork(std::string onnxModelPath, const std::array<flo
|
||||
if (m_skipOnnxRebuild) {
|
||||
// Elastic growth / non-critical path — don't delete and rebuild.
|
||||
// Just fail gracefully; the pool continues with existing slots.
|
||||
size_t freeMem = 0, totalMem = 0;
|
||||
cudaMemGetInfo(&freeMem, &totalMem);
|
||||
logEngineEvent("[Engine] buildLoadNetwork: Load failed (skip rebuild, "
|
||||
+ std::to_string(freeMem >> 20) + " MiB free): " + engineName, true);
|
||||
return false;
|
||||
}
|
||||
// Check if the failure was due to VRAM exhaustion vs. corrupt file.
|
||||
@@ -301,17 +296,11 @@ bool Engine<T>::buildLoadNetwork(std::string onnxModelPath, const std::array<flo
|
||||
cudaMemGetInfo(&freeCheck, &totalCheck);
|
||||
constexpr size_t kMinFreeBytes = 256ULL * 1024 * 1024;
|
||||
if (m_lastLoadFailedVRAM || freeCheck < kMinFreeBytes) {
|
||||
logEngineEvent("[Engine] buildLoadNetwork: Load failed due to LOW VRAM ("
|
||||
+ std::to_string(freeCheck / (1024 * 1024)) + " MiB free / "
|
||||
+ std::to_string(totalCheck / (1024 * 1024)) + " MiB total"
|
||||
+ ", vramFlag=" + std::to_string(m_lastLoadFailedVRAM)
|
||||
+ "). Preserving engine file (not corrupt): " + engineName, true);
|
||||
return false; // Don't delete the file, don't try ONNX rebuild
|
||||
}
|
||||
}
|
||||
// Enough VRAM AND loadNetwork didn't flag VRAM as cause → file is
|
||||
// likely corrupt/incompatible. Delete and rebuild from ONNX.
|
||||
logEngineEvent("[Engine] buildLoadNetwork: Cached engine INVALID, deleting and rebuilding: " + engineName, true);
|
||||
try { std::filesystem::remove(engineName); } catch (...) {}
|
||||
// Fall through to ONNX build path below
|
||||
}
|
||||
@@ -321,14 +310,11 @@ bool Engine<T>::buildLoadNetwork(std::string onnxModelPath, const std::array<flo
|
||||
// Demand-driven growth: if no cached engine exists, bail out rather
|
||||
// than triggering a full ONNX→TRT build (30-60s, massive VRAM).
|
||||
if (m_skipOnnxBuild) {
|
||||
logEngineEvent("[Engine] buildLoadNetwork: Engine file NOT found, skipping ONNX build (demand growth): " + engineName);
|
||||
return false;
|
||||
}
|
||||
logEngineEvent("[Engine] buildLoadNetwork: Engine file NOT found, will build from ONNX: " + engineName);
|
||||
}
|
||||
if (!FileExist(onnxModelPath)) {
|
||||
// ONNX model does not exist, try to find alternative precision engine
|
||||
logEngineEvent("[Engine] buildLoadNetwork: ONNX model also not found: " + onnxModelPath, true);
|
||||
std::cout << "Searching for alternative precision engine..." << std::endl;
|
||||
|
||||
size_t lastDot = engineName.find_last_of('.');
|
||||
@@ -411,9 +397,7 @@ bool Engine<T>::buildLoadNetwork(std::string onnxModelPath, const std::array<flo
|
||||
bool preParsed = parseOnnxModelSafe(tempParser.get(),
|
||||
onnxBuffer.data(), onnxBuffer.size(), &sehPreAnalysis);
|
||||
if (sehPreAnalysis != 0) {
|
||||
std::cout << "[Engine] WARNING: ONNX pre-analysis parse crashed ("
|
||||
<< formatCrashCode(sehPreAnalysis)
|
||||
<< "). Skipping pre-analysis, proceeding with build..." << std::endl;
|
||||
// Skipping pre-analysis, proceeding with build...
|
||||
}
|
||||
else if (preParsed) {
|
||||
auto numInputs = tempNetwork->getNbInputs();
|
||||
@@ -718,7 +702,6 @@ bool Engine<T>::loadNetwork(std::string trtModelPath, const std::array<float, 3>
|
||||
// ============================================================================
|
||||
|
||||
if (!Util::doesFileExist(trtModelPath)) {
|
||||
logEngineEvent("[Engine] loadNetwork FAIL: Engine file not found: " + trtModelPath, true);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -727,13 +710,11 @@ bool Engine<T>::loadNetwork(std::string trtModelPath, const std::array<float, 3>
|
||||
{
|
||||
std::ifstream file(trtModelPath, std::ios::binary | std::ios::ate);
|
||||
if (!file.is_open()) {
|
||||
logEngineEvent("[Engine] loadNetwork FAIL: Cannot open engine file: " + trtModelPath, true);
|
||||
return false;
|
||||
}
|
||||
|
||||
std::streamsize size = file.tellg();
|
||||
if (size <= 0) {
|
||||
logEngineEvent("[Engine] loadNetwork FAIL: Engine file is empty (0 bytes): " + trtModelPath, true);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -741,7 +722,6 @@ bool Engine<T>::loadNetwork(std::string trtModelPath, const std::array<float, 3>
|
||||
|
||||
std::vector<char> buffer(size);
|
||||
if (!file.read(buffer.data(), size)) {
|
||||
logEngineEvent("[Engine] loadNetwork FAIL: Read error on engine file: " + trtModelPath, true);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -761,7 +741,6 @@ bool Engine<T>::loadNetwork(std::string trtModelPath, const std::array<float, 3>
|
||||
|
||||
m_runtime = std::shared_ptr<nvinfer1::IRuntime>{ nvinfer1::createInferRuntime(m_logger) };
|
||||
if (!m_runtime) {
|
||||
logEngineEvent("[Engine] loadNetwork FAIL: createInferRuntime returned null for " + trtModelPath, true);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -830,17 +809,8 @@ bool Engine<T>::loadNetwork(std::string trtModelPath, const std::array<float, 3>
|
||||
constexpr size_t kMinFreeBytes = 256ULL * 1024 * 1024; // 256 MiB minimum
|
||||
if (memErr != cudaSuccess) {
|
||||
// cudaMemGetInfo failed — CUDA context may not be initialized on this thread.
|
||||
// Log but don't reject: let TRT try to deserialize (it may succeed).
|
||||
logEngineEvent("[Engine] loadNetwork WARNING: cudaMemGetInfo failed ("
|
||||
+ std::string(cudaGetErrorString(memErr)) + ") on GPU["
|
||||
+ std::to_string(m_options.deviceIndex) + "] — skipping VRAM check for "
|
||||
+ trtModelPath, true);
|
||||
// Don't reject: let TRT try to deserialize (it may succeed).
|
||||
} else if (freeVRAM < kMinFreeBytes) {
|
||||
logEngineEvent("[Engine] loadNetwork FAIL: GPU[" + std::to_string(m_options.deviceIndex)
|
||||
+ "] only " + std::to_string(freeVRAM / (1024 * 1024))
|
||||
+ " MiB free / " + std::to_string(totalVRAM / (1024 * 1024))
|
||||
+ " MiB total (need " + std::to_string(kMinFreeBytes / (1024 * 1024))
|
||||
+ " MiB) for " + trtModelPath, true);
|
||||
m_lastLoadFailedVRAM = true; // signal to buildLoadNetwork: engine file is NOT corrupt
|
||||
return false;
|
||||
}
|
||||
@@ -861,13 +831,9 @@ bool Engine<T>::loadNetwork(std::string trtModelPath, const std::array<float, 3>
|
||||
deserializeCudaEngineSafe(m_runtime.get(), buffer.data(),
|
||||
buffer.size(), &sehCodeDeserialize));
|
||||
if (sehCodeDeserialize != 0) {
|
||||
logEngineEvent("[Engine] loadNetwork FAIL: deserializeCudaEngine CRASHED (SEH "
|
||||
+ formatCrashCode(sehCodeDeserialize) + ") for " + trtModelPath, true);
|
||||
return false;
|
||||
}
|
||||
if (!m_engine) {
|
||||
logEngineEvent("[Engine] loadNetwork FAIL: deserializeCudaEngine returned null for "
|
||||
+ trtModelPath + " (file size=" + std::to_string(size / (1024*1024)) + " MiB)", true);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1018,8 +984,6 @@ trt_cache_create_context:
|
||||
m_context = std::unique_ptr<nvinfer1::IExecutionContext>(m_engine->createExecutionContext());
|
||||
if (!m_context) {
|
||||
ANS_DBG("TRT_Load", "ERROR: createExecutionContext returned null");
|
||||
logEngineEvent("[Engine] loadNetwork FAIL: createExecutionContext returned null for "
|
||||
+ trtModelPath, true);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1106,9 +1070,6 @@ trt_cache_create_context:
|
||||
// Allocate GPU memory
|
||||
cudaError_t err = cudaMalloc(&m_buffers[i], requestedMemory);
|
||||
if (err != cudaSuccess) {
|
||||
logEngineEvent("[Engine] loadNetwork FAIL: cudaMalloc input buffer ("
|
||||
+ std::to_string(requestedMemory / (1024*1024)) + " MiB): "
|
||||
+ cudaGetErrorString(err) + " for " + trtModelPath, true);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1179,9 +1140,6 @@ trt_cache_create_context:
|
||||
// Allocate GPU memory
|
||||
cudaError_t err = cudaMalloc(&m_buffers[i], requestedMemory);
|
||||
if (err != cudaSuccess) {
|
||||
logEngineEvent("[Engine] loadNetwork FAIL: cudaMalloc output buffer ("
|
||||
+ std::to_string(requestedMemory / (1024*1024)) + " MiB): "
|
||||
+ cudaGetErrorString(err) + " for " + trtModelPath, true);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1534,9 +1492,6 @@ bool Engine<T>::build(std::string onnxModelPath, const std::array<float, 3>& sub
|
||||
auto parsed = parseOnnxModelSafe(parser.get(), buffer.data(),
|
||||
buffer.size(), &sehCodeParse);
|
||||
if (sehCodeParse != 0) {
|
||||
std::cout << "[Engine] FATAL: ONNX parser crashed ("
|
||||
<< formatCrashCode(sehCodeParse) << ")" << std::endl;
|
||||
std::cout << "[Engine] This may indicate a corrupt ONNX file or driver issue." << std::endl;
|
||||
return false;
|
||||
}
|
||||
if (!parsed) {
|
||||
@@ -2317,12 +2272,6 @@ bool Engine<T>::build(std::string onnxModelPath, const std::array<float, 3>& sub
|
||||
auto endTime = std::chrono::high_resolution_clock::now();
|
||||
|
||||
if (sehCodeBuild != 0) {
|
||||
std::cout << "\n========================================" << std::endl;
|
||||
std::cout << "Build CRASHED!" << std::endl;
|
||||
std::cout << "========================================" << std::endl;
|
||||
std::cout << "[Engine] FATAL: buildSerializedNetwork crashed ("
|
||||
<< formatCrashCode(sehCodeBuild) << ")" << std::endl;
|
||||
std::cout << "[Engine] This typically indicates insufficient GPU memory or a driver crash." << std::endl;
|
||||
Util::checkCudaErrorCode(cudaStreamDestroy(profileStream));
|
||||
return false;
|
||||
}
|
||||
@@ -2478,9 +2427,6 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
|
||||
bool retryParsed = parseOnnxModelSafe(tempParser.get(),
|
||||
onnxBuffer.data(), onnxBuffer.size(), &sehRetryParse);
|
||||
if (sehRetryParse != 0) {
|
||||
std::cout << "[Engine] WARNING: ONNX pre-analysis parse crashed in "
|
||||
<< "buildWithRetry (" << formatCrashCode(sehRetryParse)
|
||||
<< "). Skipping spatial analysis." << std::endl;
|
||||
// hasDynamicSpatial stays false → single build() attempt
|
||||
}
|
||||
else if (retryParsed && tempNetwork->getNbInputs() > 0) {
|
||||
@@ -2501,8 +2447,6 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
|
||||
unsigned long sehBuild = 0;
|
||||
bool ok = buildSafe(onnxModelPath, subVals, divVals, normalize, &sehBuild);
|
||||
if (sehBuild != 0) {
|
||||
std::cout << "[Engine] FATAL: build() crashed in buildWithRetry ("
|
||||
<< formatCrashCode(sehBuild) << ")" << std::endl;
|
||||
return false;
|
||||
}
|
||||
return ok;
|
||||
@@ -2557,40 +2501,17 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
|
||||
for (size_t attempt = 0; attempt < candidates.size(); ++attempt) {
|
||||
setCandidateOptions(candidates[attempt]);
|
||||
|
||||
std::cout << "[Engine] buildWithRetry attempt " << (attempt + 1)
|
||||
<< "/" << candidates.size() << " (max "
|
||||
<< m_options.maxInputHeight << "x"
|
||||
<< m_options.maxInputWidth << ")" << std::endl;
|
||||
|
||||
{
|
||||
unsigned long sehAttempt = 0;
|
||||
bool attemptOk = buildSafe(onnxModelPath, subVals, divVals, normalize, &sehAttempt);
|
||||
if (sehAttempt != 0) {
|
||||
std::cout << "[Engine] Build crashed ("
|
||||
<< formatCrashCode(sehAttempt) << ") at max "
|
||||
<< m_options.maxInputHeight << "x"
|
||||
<< m_options.maxInputWidth << std::endl;
|
||||
// CUDA context may be corrupted — no point retrying
|
||||
return false;
|
||||
}
|
||||
if (attemptOk) {
|
||||
if (attempt > 0) {
|
||||
std::cout << "[Engine] Built with reduced max "
|
||||
<< m_options.maxInputHeight << "x"
|
||||
<< m_options.maxInputWidth
|
||||
<< " (requested " << origMaxH << "x" << origMaxW
|
||||
<< " exceeded GPU capacity)" << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (attempt + 1 < candidates.size()) {
|
||||
std::cout << "[Engine] Build failed at max "
|
||||
<< m_options.maxInputHeight << "x"
|
||||
<< m_options.maxInputWidth
|
||||
<< ", trying smaller..." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// All candidates exhausted — restore original options for error reporting
|
||||
@@ -2601,10 +2522,6 @@ bool Engine<T>::buildWithRetry(std::string onnxModelPath,
|
||||
m_options.minInputHeight = origMinH;
|
||||
m_options.minInputWidth = origMinW;
|
||||
|
||||
std::cout << "[Engine] buildWithRetry: all spatial dimension fallbacks "
|
||||
<< "exhausted (tried " << candidates.size() << " candidates from "
|
||||
<< candidates.front() << " down to " << candidates.back() << ")"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user