From 01eabf76bdb2deb0aea5a96def860e0f96286abd Mon Sep 17 00:00:00 2001 From: Tuan Nghia Nguyen Date: Mon, 30 Mar 2026 09:24:04 +1100 Subject: [PATCH] Fix ANSOCR TensorRT Release --- .claude/settings.local.json | 5 +++- .../include/engine/EnginePoolManager.h | 12 +++++++++ .../include/engine/TRTEngineCache.h | 12 +++++++++ modules/ANSOCR/dllmain.cpp | 27 +++++++++++++++++++ tests/ANSOCR-UnitTest/ANSOCR-UnitTest.cpp | 4 +-- 5 files changed, 57 insertions(+), 3 deletions(-) diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 480850e..c74e35c 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -2,7 +2,10 @@ "permissions": { "allow": [ "Bash(find C:ProjectsCLionProjectsANSCORE -type f \\\\\\(-name *.h -o -name *.hpp -o -name *.cpp -o -name *.cc \\\\\\))", - "Bash(xargs grep:*)" + "Bash(xargs grep:*)", + "Bash(grep -n \"~Engine\\\\|TRTEngineCache::release\\\\|EnginePoolManager::release\\\\|destructor\" /c/Projects/CLionProjects/ANSCORE/engines/TensorRTAPI/include/engine/*.inl /c/Projects/CLionProjects/ANSCORE/modules/ANSODEngine/*.h)", + "Bash(grep -l \"EnginePoolManager\" /c/Projects/CLionProjects/ANSCORE/modules/ANSODEngine/*.cpp)", + "Bash(grep -n \"g_processExiting\" /c/Projects/CLionProjects/ANSCORE/engines/TensorRTAPI/include/engine/*.h /c/Projects/CLionProjects/ANSCORE/modules/ANSODEngine/engine.h)" ] } } diff --git a/engines/TensorRTAPI/include/engine/EnginePoolManager.h b/engines/TensorRTAPI/include/engine/EnginePoolManager.h index e9a7565..4f8b9b8 100644 --- a/engines/TensorRTAPI/include/engine/EnginePoolManager.h +++ b/engines/TensorRTAPI/include/engine/EnginePoolManager.h @@ -323,7 +323,19 @@ private: // unreliable. Don't destroy Engine objects (their destructors // call cudaFree, thread::join, etc. which deadlock or crash). // The OS reclaims all memory, VRAM, and handles at process exit. + // + // Intentionally leak Engine shared_ptrs: after the explicit dtor + // body returns, the compiler still runs implicit member dtors for + // m_pools. That would destroy shared_ptr>, triggering + // TRT/CUDA cleanup on a dead context. Detach the shared_ptrs + // first so the map destructor only frees empty entries. m_sweeperRunning.store(false); + // Leak Engine objects: bump refcount so shared_ptr dtor won't + // actually delete them when m_pools is implicitly destroyed. + for (auto& [_, entry] : m_pools) { + auto* leaked = new std::shared_ptr>(std::move(entry.engine)); + (void)leaked; // intentional leak — OS reclaims at exit + } return; } // Normal FreeLibrary path: threads are alive, safe to clean up. diff --git a/engines/TensorRTAPI/include/engine/TRTEngineCache.h b/engines/TensorRTAPI/include/engine/TRTEngineCache.h index 32e3d0a..3a07049 100644 --- a/engines/TensorRTAPI/include/engine/TRTEngineCache.h +++ b/engines/TensorRTAPI/include/engine/TRTEngineCache.h @@ -150,6 +150,18 @@ public: private: TRTEngineCache() = default; + ~TRTEngineCache() { + if (g_processExiting().load(std::memory_order_relaxed)) { + // ExitProcess path: CUDA context is dead. Leak ICudaEngine and + // IRuntime shared_ptrs so their destructors don't call into a + // destroyed CUDA driver. The OS reclaims everything at exit. + for (auto& [_, entry] : m_cache) { + auto* le = new std::shared_ptr(std::move(entry.engine)); + auto* lr = new std::shared_ptr(std::move(entry.runtime)); + (void)le; (void)lr; // intentional leak + } + } + } TRTEngineCache(const TRTEngineCache&) = delete; TRTEngineCache& operator=(const TRTEngineCache&) = delete; diff --git a/modules/ANSOCR/dllmain.cpp b/modules/ANSOCR/dllmain.cpp index 7e6fa03..7dae22f 100644 --- a/modules/ANSOCR/dllmain.cpp +++ b/modules/ANSOCR/dllmain.cpp @@ -8,6 +8,8 @@ #include "ANSGpuFrameRegistry.h" #include #include "NV12PreprocessHelper.h" +#include "engine/TRTEngineCache.h" +#include "engine/EnginePoolManager.h" #include #include #include @@ -93,7 +95,32 @@ BOOL APIENTRY DllMain(HMODULE hModule, case DLL_PROCESS_ATTACH: case DLL_THREAD_ATTACH: case DLL_THREAD_DETACH: + break; case DLL_PROCESS_DETACH: + // When lpReserved != NULL, the process is terminating via ExitProcess. + // The OS has already killed all worker threads (idle timers, CUDA + // threads, etc.). Set the global flag so atexit destructors skip + // thread joins and CUDA/TRT cleanup that would fail on a dead context. + if (lpReserved != nullptr) { + g_processExiting().store(true, std::memory_order_relaxed); + break; + } + + // Dynamic FreeLibrary — threads are still alive, safe to clean up. + try { + std::vector leakedHandles; + { + std::lock_guard lk(OCRHandleRegistryMutex()); + for (auto& [h, _] : OCRHandleRegistry()) + leakedHandles.push_back(h); + OCRHandleRegistry().clear(); + } + for (auto* h : leakedHandles) { + try { h->Destroy(); delete h; } catch (...) {} + } + try { EnginePoolManager::instance().clearAll(); } catch (...) {} + try { TRTEngineCache::instance().clearAll(); } catch (...) {} + } catch (...) {} break; } return TRUE; diff --git a/tests/ANSOCR-UnitTest/ANSOCR-UnitTest.cpp b/tests/ANSOCR-UnitTest/ANSOCR-UnitTest.cpp index a2520b6..6b35813 100644 --- a/tests/ANSOCR-UnitTest/ANSOCR-UnitTest.cpp +++ b/tests/ANSOCR-UnitTest/ANSOCR-UnitTest.cpp @@ -304,10 +304,10 @@ int TestOCRv5mage() { std::cout << "Current working directory: " << currentPath << std::endl; std::string licenseKey = ""; std::string modelFilePath = "C:\\Projects\\ANSVIS\\Models\\ANS_GenericOCR_v2.0.zip"; - std::string imagePath = "C:\\Programs\\ModelTraining\\JALPR\\data\\20260329_174127_834.jpg";//"E:\\Programs\\DemoAssets\\Images\\OCR\\ref3_000.bmp"; + std::string imagePath = "C:\\Programs\\ModelTraining\\JLPD\\data\\0b9b013343f0bd8c7809653dfab16eac_jpeg.rf.1438e1237023ad7a254605942193df99.jpg";//"E:\\Programs\\DemoAssets\\Images\\OCR\\ref3_000.bmp"; int language = 0; // CUSTOM - int engine = 0;// GPU + int engine = 1;// GPU // For high-resolution images with PP-OCRv5 server models, use higher limitSideLen // (default 960 downscales large images too aggressively, missing small text)