diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 51de336..2cb833f 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -24,7 +24,31 @@ "Bash(git -C \"C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\" status --short engines/ONNXEngine/ONNXEngine.cpp)", "Bash(git -C \"C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\" diff engines/ONNXEngine/ONNXEngine.cpp)", "Bash(git -C \"C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\" status --short)", - "Bash(grep -E \"\\\\.\\(cpp|h\\)$\")" + "Bash(grep -E \"\\\\.\\(cpp|h\\)$\")", + "Bash(awk '{print \"MEDIA_Leak heartbeats: \"$1}')", + "Bash(awk '{print \"ANSMOT heartbeats: \"$1}')", + "Bash(awk '{print \"PROC_MEM samples: \"$1}')", + "Bash(awk 'NR==1 || NR%15==0 { *)", + "Bash(awk 'NR==1 || NR%10==0 || NR==78 {print}')", + "Bash(sort -t= -k2 -n)", + "Bash(awk ' *)", + "Bash(sort -k3 -t= -n)", + "Bash(awk '{print \"MEDIA_Leak: \"$1}')", + "Bash(awk '{print \"BYTETracker heartbeats: \"$1}')", + "Bash(awk '{print \"PROC_MEM: \"$1}')", + "Bash(sort -k2 -t= -n)", + "Bash(awk 'NR==1 || NR%30==0 || NR==571 { *)", + "Bash(awk 'NR==2 || NR==80 || NR==160 || NR==240 || NR==285 {print}')", + "Bash(grep -n \"^\\\\s*return\\\\s\\\\+\\\\\\(true\\\\|false\\\\\\);$\\\\|^}\\\\s*$\" C:/Projects/CLionProjects/ANSCORE/engines/TensorRTAPI/include/engine/EngineRunInference.inl)", + "Bash(awk '$1 > 267 && $1 < 1100')", + "Bash(awk 'NR>267 && /^}/ {print NR\": \"$0; c++; if\\(c>=3\\)exit}' C:/Projects/CLionProjects/ANSCORE/engines/TensorRTAPI/include/engine/EngineRunInference.inl)", + "Bash(awk '{print \"TRT_Leak: \"$1}')", + "Bash(awk '{print \"Pool_Leak: \"$1}')", + "Bash(awk '{print \"BYTETracker: \"$1}')", + "Bash(awk 'NR==1 || NR%70==0 || NR==889 { *)", + "Read(//c/Users/nghia/Downloads/**)", + "Bash(awk '{print \"OCR_Leak: \"$1}')", + "Bash(awk 'NR==1 || NR%20==0 || NR==157 { *)" ] } } diff --git a/MediaClient/media/video_player.cpp b/MediaClient/media/video_player.cpp index 0b2ccce..59f2734 100644 --- a/MediaClient/media/video_player.cpp +++ b/MediaClient/media/video_player.cpp @@ -46,8 +46,11 @@ std::atomic g_queueClones{0}; std::atomic g_queueFrees{0}; std::atomic g_nv12Clones{0}; std::atomic g_nv12Frees{0}; +std::atomic g_nv12Escapes{0}; std::atomic g_cudaHWClones{0}; std::atomic g_cudaHWFrees{0}; +std::atomic g_cudaHWEscapes{0}; +std::atomic g_avframePendingReturns{0}; extern std::atomic g_contiguousAllocs; extern std::atomic g_contiguousFrees; @@ -1621,7 +1624,9 @@ AVFrame* CVideoPlayer::getNV12Frame() { // (Previously used ownership transfer — only the first caller got NV12, // and the second caller fell back to BGR.) std::lock_guard lock(_mutex); - return m_currentNV12Frame ? av_frame_clone(m_currentNV12Frame) : nullptr; + AVFrame* clone = m_currentNV12Frame ? av_frame_clone(m_currentNV12Frame) : nullptr; + if (clone) g_nv12Escapes.fetch_add(1, std::memory_order_relaxed); + return clone; } AVFrame* CVideoPlayer::getCudaHWFrame() { @@ -1632,7 +1637,9 @@ AVFrame* CVideoPlayer::getCudaHWFrame() { // extra_hw_frames=2 in the decoder provides surface pool headroom // for the 3 concurrent clones (decoder + player + registry). std::lock_guard lock(_mutex); - return m_currentCudaHWFrame ? av_frame_clone(m_currentCudaHWFrame) : nullptr; + AVFrame* clone = m_currentCudaHWFrame ? av_frame_clone(m_currentCudaHWFrame) : nullptr; + if (clone) g_cudaHWEscapes.fetch_add(1, std::memory_order_relaxed); + return clone; } bool CVideoPlayer::isCudaHWAccel() const { @@ -2516,20 +2523,29 @@ cv::Mat CVideoPlayer::getImage(int& width, int& height, int64_t& pts) { const int64_t qF = g_queueFrees.load(std::memory_order_relaxed); const int64_t nvA = g_nv12Clones.load(std::memory_order_relaxed); const int64_t nvF = g_nv12Frees.load(std::memory_order_relaxed); + const int64_t nvE = g_nv12Escapes.load(std::memory_order_relaxed); const int64_t cuA = g_cudaHWClones.load(std::memory_order_relaxed); const int64_t cuF = g_cudaHWFrees.load(std::memory_order_relaxed); + const int64_t cuE = g_cudaHWEscapes.load(std::memory_order_relaxed); + const int64_t pR = g_avframePendingReturns.load(std::memory_order_relaxed); const int64_t cgA = g_contiguousAllocs.load(std::memory_order_relaxed); const int64_t cgF = g_contiguousFrees.load(std::memory_order_relaxed); const int64_t cgB = g_contiguousBytesInFlight.load(std::memory_order_relaxed); + // escapeBalance = (nv12Esc + cuHWEsc) - pendingReturns. + // Positive growing value = external callers hold clones they + // never returned to the pendingFree drain → escape-path leak. + const int64_t escBal = (nvE + cuE) - pR; ANS_DBG("MEDIA_Leak", "queue(C=%lld F=%lld net=%lld depth=%zu) " - "nv12(C=%lld F=%lld net=%lld) " - "cudaHW(C=%lld F=%lld net=%lld) " + "nv12(C=%lld F=%lld net=%lld esc=%lld) " + "cudaHW(C=%lld F=%lld net=%lld esc=%lld) " + "pendingReturns=%lld escBal=%lld " "contig(A=%lld F=%lld net=%lld bytesMB=%.1f)", (long long)qA, (long long)qF, (long long)(qA - qF), g_frameQueue.size(), - (long long)nvA, (long long)nvF, (long long)(nvA - nvF), - (long long)cuA, (long long)cuF, (long long)(cuA - cuF), + (long long)nvA, (long long)nvF, (long long)(nvA - nvF), (long long)nvE, + (long long)cuA, (long long)cuF, (long long)(cuA - cuF), (long long)cuE, + (long long)pR, (long long)escBal, (long long)cgA, (long long)cgF, (long long)(cgA - cgF), (double)cgB / (1024.0 * 1024.0)); } diff --git a/MediaClient/media/video_player.h b/MediaClient/media/video_player.h index 7ad3b8d..8ebcb95 100644 --- a/MediaClient/media/video_player.h +++ b/MediaClient/media/video_player.h @@ -22,10 +22,17 @@ // Defined in video_player.cpp; also incremented from FrameQueue here. extern std::atomic g_queueClones; // av_frame_clone from FrameQueue extern std::atomic g_queueFrees; // av_frame_free from FrameQueue -extern std::atomic g_nv12Clones; // m_currentNV12Frame = av_frame_clone -extern std::atomic g_nv12Frees; // av_frame_free(&m_currentNV12Frame) -extern std::atomic g_cudaHWClones; // m_currentCudaHWFrame = clone -extern std::atomic g_cudaHWFrees; // av_frame_free(&m_currentCudaHWFrame) +extern std::atomic g_nv12Clones; // m_currentNV12Frame = av_frame_clone (INTERNAL replace-on-update) +extern std::atomic g_nv12Frees; // av_frame_free(&m_currentNV12Frame) (INTERNAL) +extern std::atomic g_nv12Escapes; // getNV12Frame() clones handed to callers (EXTERNAL — should be balanced by caller's av_frame_free) +extern std::atomic g_cudaHWClones; // m_currentCudaHWFrame = clone (INTERNAL) +extern std::atomic g_cudaHWFrees; // av_frame_free(&m_currentCudaHWFrame) (INTERNAL) +extern std::atomic g_cudaHWEscapes; // getCudaHWFrame() clones handed to callers (EXTERNAL) +// Inbound side: AVFrames pushed to pendingFree (drained by gpu_frame_evict_stale). +// Incremented in ANSGpuFrameRegistry::pushPendingFree_locked — counts AVFrames that +// callers relinquished back to the media layer for deferred freeing. If escapes grow +// faster than pendingReturns, the escape path is leaking. +extern std::atomic g_avframePendingReturns; typedef struct { diff --git a/engines/TensorRTAPI/include/engine/EngineBuildLoadNetwork.inl b/engines/TensorRTAPI/include/engine/EngineBuildLoadNetwork.inl index b25d5e8..c9797d4 100644 --- a/engines/TensorRTAPI/include/engine/EngineBuildLoadNetwork.inl +++ b/engines/TensorRTAPI/include/engine/EngineBuildLoadNetwork.inl @@ -1167,7 +1167,12 @@ trt_cache_create_context: // -- Pinned output buffers (CUDA graph prerequisite) ----------------------- // Invalidate any graphs captured by a previous loadNetwork() call on this instance. - for (auto& [bs, ge] : m_graphExecs) { if (ge) cudaGraphExecDestroy(ge); } + for (auto& [bs, ge] : m_graphExecs) { + if (ge) { + cudaGraphExecDestroy(ge); + m_trtGraphDestroys.fetch_add(1, std::memory_order_relaxed); + } + } m_graphExecs.clear(); // Free any previously allocated pinned buffers. for (T* p : m_pinnedOutputBuffers) { if (p) cudaFreeHost(p); } diff --git a/engines/TensorRTAPI/include/engine/EngineRunInference.inl b/engines/TensorRTAPI/include/engine/EngineRunInference.inl index b0619ba..1944486 100644 --- a/engines/TensorRTAPI/include/engine/EngineRunInference.inl +++ b/engines/TensorRTAPI/include/engine/EngineRunInference.inl @@ -731,7 +731,10 @@ bool Engine::runInference(const std::vector>& i if (!m_graphExecs.empty()) { size_t destroyed = m_graphExecs.size(); for (auto& [bs, ge] : m_graphExecs) { - if (ge) cudaGraphExecDestroy(ge); + if (ge) { + cudaGraphExecDestroy(ge); + m_trtGraphDestroys.fetch_add(1, std::memory_order_relaxed); + } } m_graphExecs.clear(); ANS_DBG("TRT_Engine", "INVALIDATED %zu cached CUDA graphs after shape change (batch=%d)", @@ -901,8 +904,10 @@ bool Engine::runInference(const std::vector>& i if (captureOk) { cudaGraphExec_t exec = nullptr; - if (cudaGraphInstantiate(&exec, graph, nullptr, nullptr, 0) == cudaSuccess) + if (cudaGraphInstantiate(&exec, graph, nullptr, nullptr, 0) == cudaSuccess) { graphExec = exec; + m_trtGraphCreates.fetch_add(1, std::memory_order_relaxed); + } cudaGraphDestroy(graph); ANS_DBG("TRT_Engine", "CUDA graph CAPTURED OK for batch=%d exec=%p", batchSize, (void*)graphExec); @@ -1053,5 +1058,32 @@ bool Engine::runInference(const std::vector>& i s_globalActiveInf.fetch_sub(1); } + // Leak diagnostic — one [TRT_Leak] line per engine instance per 60 s. + // Reports CUDA graph create/destroy balance and current cache size. + // If (creates - destroys) climbs monotonically, graph execs are being + // leaked on every shape change; each leaked exec is tens of MB. + // Lock-free window claim via compare_exchange — concurrent inference + // threads race to log but only one wins per 60-s window. + { + using clk = std::chrono::steady_clock; + const long long tick = clk::now().time_since_epoch().count(); + long long expected = m_trtLeakNextLogTick.load(std::memory_order_relaxed); + if (tick >= expected) { + const long long deadline = tick + + std::chrono::duration_cast( + std::chrono::seconds(60)).count(); + if (m_trtLeakNextLogTick.compare_exchange_strong( + expected, deadline, std::memory_order_relaxed)) { + const int64_t cr = m_trtGraphCreates.load(std::memory_order_relaxed); + const int64_t ds = m_trtGraphDestroys.load(std::memory_order_relaxed); + ANS_DBG("TRT_Leak", + "engine=%p creates=%lld destroys=%lld net=%lld cached=%zu", + (void*)this, + (long long)cr, (long long)ds, (long long)(cr - ds), + m_graphExecs.size()); + } + } + } + return true; } diff --git a/modules/ANSCV/ANSFilePlayer.cpp b/modules/ANSCV/ANSFilePlayer.cpp index ec8f0e1..b390f86 100644 --- a/modules/ANSCV/ANSFilePlayer.cpp +++ b/modules/ANSCV/ANSFilePlayer.cpp @@ -57,8 +57,11 @@ namespace ANSCENTER { _logger.LogError("ANSFILEPLAYER::Destroy.", "Unknown exception", __FILE__, __LINE__); } } + // Destructor calls close() exactly once — do not call close() explicitly + // beforehand. CFilePlayer::close() is not safe to call twice (it re-enters + // decoder Stop/flush on an already-torn-down decoder). if (clientToClose) { - clientToClose->close(); + clientToClose.reset(); } } void ANSFILEPLAYER::CheckLicense() { @@ -102,7 +105,8 @@ namespace ANSCENTER { std::lock_guard lock(_mutex); _isPlaying = false; } - _playerClient->close(); + // CFilePlayer::open() calls close() internally at the top — no need + // to close explicitly here (doing so would double-close the decoder). std::lock_guard lock(_mutex); Setup(); return Start(); diff --git a/modules/ANSCV/ANSGpuFrameOps.h b/modules/ANSCV/ANSGpuFrameOps.h index 6cec22e..eca90bd 100644 --- a/modules/ANSCV/ANSGpuFrameOps.h +++ b/modules/ANSCV/ANSGpuFrameOps.h @@ -26,11 +26,19 @@ extern "C" { #include #include #include +#include #ifdef _WIN32 #include #endif +// Leak diagnostic — counts AVFrames handed back to the media layer for +// deferred freeing. Defined in video_player.cpp. Paired with g_nv12Escapes / +// g_cudaHWEscapes in the [MEDIA_Leak] heartbeat: if escapes > pendingReturns +// and the delta grows, external callers (via getNV12Frame/getCudaHWFrame) +// are holding clones instead of returning them. +extern std::atomic g_avframePendingReturns; + // Debug logging macro for GPU frame operations. // Define ANSCORE_GPU_DEBUG=1 to enable verbose per-frame GPU logging. #ifndef GPU_FRAME_DBG @@ -172,6 +180,7 @@ inline void gpu_frame_attach(cv::Mat* mat, AVFrame* nv12, int gpuIdx, int64_t pt auto& reg = ANSGpuFrameRegistry::instance(); auto lk = reg.acquire_lock(); reg.pushPendingFree_locked(old); + g_avframePendingReturns.fetch_add(1, std::memory_order_relaxed); } // NOTE: No drain_pending() here (hot path). Freed by evict_stale. @@ -378,6 +387,7 @@ inline void gpu_frame_attach_cuda(cv::Mat* mat, AVFrame* cudaFrame, int gpuIdx, auto& reg = ANSGpuFrameRegistry::instance(); auto lk = reg.acquire_lock(); reg.pushPendingFree_locked(cudaFrame); + g_avframePendingReturns.fetch_add(1, std::memory_order_relaxed); } data.avframe = nullptr; } @@ -386,6 +396,7 @@ inline void gpu_frame_attach_cuda(cv::Mat* mat, AVFrame* cudaFrame, int gpuIdx, auto& reg = ANSGpuFrameRegistry::instance(); auto lk = reg.acquire_lock(); reg.pushPendingFree_locked(cpuNV12); + g_avframePendingReturns.fetch_add(1, std::memory_order_relaxed); } data.cpuAvframe = nullptr; @@ -399,6 +410,7 @@ inline void gpu_frame_attach_cuda(cv::Mat* mat, AVFrame* cudaFrame, int gpuIdx, auto& reg = ANSGpuFrameRegistry::instance(); auto lk = reg.acquire_lock(); reg.pushPendingFree_locked(old); + g_avframePendingReturns.fetch_add(1, std::memory_order_relaxed); } // NOTE: No drain_pending() here (hot path). AVFrames accumulate in diff --git a/modules/ANSCV/ANSVideoPlayer.cpp b/modules/ANSCV/ANSVideoPlayer.cpp index d45f1b5..b7f492c 100644 --- a/modules/ANSCV/ANSVideoPlayer.cpp +++ b/modules/ANSCV/ANSVideoPlayer.cpp @@ -47,9 +47,6 @@ namespace ANSCENTER { { std::lock_guard lock(_mutex); try { - if (_hwPlayer) { - try { _hwPlayer->stop(); } catch (...) {} - } hwPlayerToClose = std::move(_hwPlayer); _hwDecodeActive = false; _hwGpuIndex = -1; @@ -80,9 +77,10 @@ namespace ANSCENTER { } } // end lock scope - // CUDA cleanup happens here, outside the mutex + // CUDA cleanup happens here, outside the mutex. + // Destructor calls close() once — do not call stop()/close() explicitly + // beforehand (double-close re-enters torn-down decoder state). if (hwPlayerToClose) { - try { hwPlayerToClose->close(); } catch (...) {} hwPlayerToClose.reset(); } } @@ -201,13 +199,10 @@ namespace ANSCENTER { { std::lock_guard lock(_mutex); _isPlaying = false; // GetImage() returns cached frame while we reconnect - if (_hwPlayer) { - try { _hwPlayer->stop(); } catch (...) {} - hwPlayerToClose = std::move(_hwPlayer); - } + hwPlayerToClose = std::move(_hwPlayer); } + // Destructor calls close() exactly once — single teardown. if (hwPlayerToClose) { - try { hwPlayerToClose->close(); } catch (...) {} hwPlayerToClose.reset(); } @@ -241,11 +236,24 @@ namespace ANSCENTER { bool ANSVIDEOPLAYER::Start() { std::lock_guard lock(_mutex); try { + // Re-initialize after a prior Stop(): _hwPlayer was released and + // cap was closed. Setup() reopens whichever backend applies. + // Why: CFilePlayer::stop() == close(), which frees m_pFormatContext. + // Calling play() on a closed player dereferences NULL and crashes. + if (!_hwPlayer && !cap.isOpened()) { + if (!Setup()) { + this->_logger.LogError("ANSVIDEOPLAYER::Start. Exception occurred:", + "Setup() failed on restart", __FILE__, __LINE__); + return false; + } + } + // --- HW decode path --- if (_hwDecodeActive && _hwPlayer) { _hwPlayer->play(); // starts read/video/audio threads _hwEOF = false; _hwFrameCount = 0; + _hwLastPts = 0; _isPlaying = true; // Wait for first frame outside the mutex to let decode threads run @@ -284,15 +292,26 @@ namespace ANSCENTER { } } bool ANSVIDEOPLAYER::Stop() { - decltype(_hwPlayer.get()) hwPlayer = nullptr; + // Move HW player out of lock scope — CFilePlayer::stop() == close(), + // which does CUDA cleanup that must not run under _mutex to avoid + // deadlocking with the nvcuda64 SRW lock held by inference. + decltype(_hwPlayer) hwPlayerToClose; { std::lock_guard lock(_mutex); try { // --- HW decode path --- if (_hwDecodeActive && _hwPlayer) { _isPlaying = false; - hwPlayer = _hwPlayer.get(); - // stop() called outside the lock below; skip cap path + // Release the player completely — CFilePlayer::stop() == close(), + // which frees m_pFormatContext. Keeping the unique_ptr alive after + // this point is a landmine: a later play() would deref NULL. + hwPlayerToClose = std::move(_hwPlayer); + _hwDecodeActive = false; + _hwGpuIndex = -1; + _hwCudaAccel = false; + _hwEOF = false; + _hwFrameCount = 0; + _hwLastPts = 0; } else { // --- cv::VideoCapture fallback --- @@ -322,8 +341,12 @@ namespace ANSCENTER { return false; } } - if (hwPlayer) { - hwPlayer->stop(); + // CUDA cleanup happens here, outside the mutex. + // Rely on the destructor to call close() exactly once. Calling stop() + // (== close()) explicitly would double-close the CFilePlayer, which + // re-enters decoder Stop/flush on an already-torn-down decoder. + if (hwPlayerToClose) { + hwPlayerToClose.reset(); } return true; } diff --git a/modules/ANSCV/GpuNV12SlotPool.cpp b/modules/ANSCV/GpuNV12SlotPool.cpp index 71f5f65..0b0a6f7 100644 --- a/modules/ANSCV/GpuNV12SlotPool.cpp +++ b/modules/ANSCV/GpuNV12SlotPool.cpp @@ -7,8 +7,11 @@ #define NOMINMAX #include #include "GpuNV12SlotPool.h" +#include "ANSLicense.h" // ANS_DBG macro for [Pool_Leak] heartbeat #include +#include +#include // ANSCV.dll owns the process-wide singleton. GpuNV12SlotPool* GpuNV12SlotPool::resolveProcessWide() { @@ -40,6 +43,41 @@ void GpuNV12SlotPool::drainCooledSlots_locked() { GpuNV12Slot* GpuNV12SlotPool::acquire(int gpuIdx, int w, int h) { std::lock_guard lock(m_mutex); + // Leak diagnostic — [Pool_Leak] heartbeat fires at most once per 60 s. + // Reports current slot count and rough VRAM footprint. Slot count is + // bounded by GPU_NV12_POOL_MAX_SLOTS; if it persists near the cap we + // also see ACTIVE/COOLING state distribution which can hint at slots + // not being released. + { + using clk = std::chrono::steady_clock; + static std::atomic s_nextLog{0}; + const long long tick = clk::now().time_since_epoch().count(); + long long expected = s_nextLog.load(std::memory_order_relaxed); + if (tick >= expected) { + const long long deadline = tick + + std::chrono::duration_cast( + std::chrono::seconds(60)).count(); + if (s_nextLog.compare_exchange_strong(expected, deadline, + std::memory_order_relaxed)) { + size_t totalBytes = 0; + size_t active = 0, cooling = 0, free_ = 0; + for (const auto& sp : m_slots) { + totalBytes += sp->pitchY * sp->height + + sp->pitchUV * (sp->height / 2); + const int st = sp->state.load(std::memory_order_relaxed); + if (st == GpuNV12Slot::STATE_ACTIVE) ++active; + else if (st == GpuNV12Slot::STATE_COOLING) ++cooling; + else ++free_; + } + ANS_DBG("Pool_Leak", + "NV12Pool slots=%zu (active=%zu cooling=%zu free=%zu) bytesMB=%.1f (max=%d)", + m_slots.size(), active, cooling, free_, + (double)totalBytes / (1024.0 * 1024.0), + GPU_NV12_POOL_MAX_SLOTS); + } + } + } + // 1. Drain cooled-down slots to make them available drainCooledSlots_locked(); diff --git a/modules/ANSLPR/ANSLPR_OCR.cpp b/modules/ANSLPR/ANSLPR_OCR.cpp index 874f5f4..d4b53ea 100644 --- a/modules/ANSLPR/ANSLPR_OCR.cpp +++ b/modules/ANSLPR/ANSLPR_OCR.cpp @@ -6,6 +6,7 @@ #include #include +#include #include // --------------------------------------------------------------------------- @@ -1063,6 +1064,34 @@ namespace ANSCENTER std::lock_guard plateLock(_plateIdentitiesMutex); auto& identities = _plateIdentities[cameraId]; + // Leak diagnostic — [OCR_Leak] heartbeat, at most once per 60 s + // process-wide. Same fields as the ANSALPR_OD variant for direct + // comparison: cams, ids_tot, clr, imgtrk. If any of these climb + // monotonically, the corresponding state container is the leak. + { + using clk = std::chrono::steady_clock; + static std::atomic s_nextLog{0}; + const long long tick = clk::now().time_since_epoch().count(); + long long expected = s_nextLog.load(std::memory_order_relaxed); + if (tick >= expected) { + const long long deadline = tick + + std::chrono::duration_cast( + std::chrono::seconds(60)).count(); + if (s_nextLog.compare_exchange_strong(expected, deadline, + std::memory_order_relaxed)) { + size_t ids_tot = 0; + for (const auto& [cam, v] : _plateIdentities) ids_tot += v.size(); + ANS_DBG("OCR_Leak", + "ANSALPR_OCR this=%p cams=%zu ids_tot=%zu clr=%zu imgtrk=%zu", + (void*)this, + _plateIdentities.size(), + ids_tot, + _colourCache.size(), + _imageSizeTrackers.size()); + } + } + } + // Auto-detect mode by detection count. // 1 detection → pipeline/single-crop mode → no dedup needed. // 2+ detections → full-frame mode → apply accumulated scoring. diff --git a/modules/ANSLPR/ANSLPR_OD.cpp b/modules/ANSLPR/ANSLPR_OD.cpp index 452de04..0bfb586 100644 --- a/modules/ANSLPR/ANSLPR_OD.cpp +++ b/modules/ANSLPR/ANSLPR_OD.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include // --------------------------------------------------------------------------- // Check ONNX model opset version by reading the protobuf header directly. @@ -3121,6 +3122,41 @@ namespace ANSCENTER { std::lock_guard plateLock(_plateIdentitiesMutex); auto& identities = _plateIdentities[cameraId]; + // Leak diagnostic — [OCR_Leak] heartbeat fires at most once per 60 s + // process-wide. Reports the three per-camera state containers that + // _could_ accumulate: _plateIdentities (keyed by cameraId), its sum + // of inner-vector sizes, _colourCache, _imageSizeTrackers. All three + // have stated bounds; heartbeat confirms they actually hold. + // cams — number of distinct cameraId keys in _plateIdentities + // ids_tot — sum of per-camera identity-vector sizes (should plateau) + // clr — _colourCache size (bounded at COLOUR_CACHE_MAX_SIZE=200) + // imgtrk — _imageSizeTrackers size (one entry per cameraId) + // All size() reads outside locks are diagnostic snapshots; brief + // races are acceptable (we're looking at trends over minutes). + { + using clk = std::chrono::steady_clock; + static std::atomic s_nextLog{0}; + const long long tick = clk::now().time_since_epoch().count(); + long long expected = s_nextLog.load(std::memory_order_relaxed); + if (tick >= expected) { + const long long deadline = tick + + std::chrono::duration_cast( + std::chrono::seconds(60)).count(); + if (s_nextLog.compare_exchange_strong(expected, deadline, + std::memory_order_relaxed)) { + size_t ids_tot = 0; + for (const auto& [cam, v] : _plateIdentities) ids_tot += v.size(); + ANS_DBG("OCR_Leak", + "ANSALPR_OD this=%p cams=%zu ids_tot=%zu clr=%zu imgtrk=%zu", + (void*)this, + _plateIdentities.size(), + ids_tot, + _colourCache.size(), + _imageSizeTrackers.size()); + } + } + } + // Option B: Auto-detect mode by counting detections. // 1 detection → crop/pipeline mode → return instant result, no accumulated scoring // 2+ detections → full-frame mode → use accumulated scoring for dedup diff --git a/modules/ANSMOT/ByteTrack/src/BYTETracker.cpp b/modules/ANSMOT/ByteTrack/src/BYTETracker.cpp index 792cf65..31c6263 100644 --- a/modules/ANSMOT/ByteTrack/src/BYTETracker.cpp +++ b/modules/ANSMOT/ByteTrack/src/BYTETracker.cpp @@ -318,13 +318,28 @@ std::vector ByteTrack::BYTETracker::update(co lost_stracks_ = subStracks(jointStracks(subStracks(lost_stracks_, tracked_stracks_), current_lost_stracks), removed_stracks_); removed_stracks_ = jointStracks(removed_stracks_, current_removed_stracks); + // Cap removed_stracks_ to prevent unbounded growth. Its only job is to + // block re-entry into lost_stracks_ for tracks that have already timed + // out (see subStracks(..., removed_stracks_) on the previous line). A + // track that's been removed for more than a few hundred frames cannot + // plausibly re-appear as "lost" — by then it's been reaped elsewhere + // and any new detection would get a fresh track_id. 1 000 entries is + // ~100 s at 10 fps per camera, well beyond any re-identification + // window. Older entries (front of vector) are dropped first. + static constexpr size_t kRemovedCap = 1000; + if (removed_stracks_.size() > kRemovedCap) { + const size_t drop = removed_stracks_.size() - kRemovedCap; + removed_stracks_.erase(removed_stracks_.begin(), + removed_stracks_.begin() + drop); + } + std::vector tracked_stracks_out, lost_stracks_out; removeDuplicateStracks(tracked_stracks_, lost_stracks_, tracked_stracks_out, lost_stracks_out); tracked_stracks_ = tracked_stracks_out; lost_stracks_ = lost_stracks_out; // Diagnostic: report tracker state size at most once every 60 s per instance. - // removed_stracks_ is append-only in this implementation — watch it grow. + // With the cap above, removed_stracks_ should plateau at <= kRemovedCap. { static thread_local std::chrono::steady_clock::time_point s_nextLog{}; auto now = std::chrono::steady_clock::now(); diff --git a/modules/ANSODEngine/engine.h b/modules/ANSODEngine/engine.h index 1492924..d00228e 100644 --- a/modules/ANSODEngine/engine.h +++ b/modules/ANSODEngine/engine.h @@ -434,6 +434,16 @@ private: // the first time each batch size is seen; subsequent calls reuse it. std::unordered_map m_graphExecs; + // Leak diagnostics — per-engine-instance counters for CUDA graph + // create/destroy balance. Incremented in EngineRunInference.inl and + // EngineBuildLoadNetwork.inl. Read by the [TRT_Leak] heartbeat in + // runInference (fires ≤1×/60s per engine instance). + // m_trtLeakNextLogTick stores a steady_clock epoch count for lock-free + // compare_exchange window claim across concurrent inference threads. + std::atomic m_trtGraphCreates{0}; + std::atomic m_trtGraphDestroys{0}; + std::atomic m_trtLeakNextLogTick{0}; + Logger m_logger; bool m_verbose{ true }; // false for non-probe pool slots bool m_disableGraphs{ true }; // DISABLED by default — concurrent graph launches + uploads cause GPU deadlock on WDDM @@ -569,7 +579,12 @@ template Engine::~Engine() { // Destroy cached CUDA graphs try { - for (auto& [bs, ge] : m_graphExecs) { if (ge) cudaGraphExecDestroy(ge); } + for (auto& [bs, ge] : m_graphExecs) { + if (ge) { + cudaGraphExecDestroy(ge); + m_trtGraphDestroys.fetch_add(1, std::memory_order_relaxed); + } + } m_graphExecs.clear(); } catch (...) {} diff --git a/tests/ANSCV-UnitTest/ANSCV-UnitTest.cpp b/tests/ANSCV-UnitTest/ANSCV-UnitTest.cpp index 28874e4..7afdff7 100644 --- a/tests/ANSCV-UnitTest/ANSCV-UnitTest.cpp +++ b/tests/ANSCV-UnitTest/ANSCV-UnitTest.cpp @@ -48,40 +48,6 @@ cv::Mat JpegStringToMat(const std::string& jpegStr) { return emptyImage; } } -int VideoTestClient() { - int width = 0; - int height = 0; - int64_t pts = 0; - ANSVIDEOPLAYER* filePlayerClient; - std::string testVideoFile = "C:\\Programs\\DemoAssets\\Videos\\FireNSmoke\\SimFire.mp4"; - CreateANSVideoPlayerHandle(&filePlayerClient, "", testVideoFile.c_str()); - - StartVideoPlayer(&filePlayerClient); - cv::namedWindow("Image", cv::WINDOW_NORMAL); // Create a resizable window. - cv::resizeWindow("Image", 1920, 1080); // Set initial size of the window. - std::string jpegImage; - int index = 0; - while (true) { - index++; - GetVideoPlayerStrImage(&filePlayerClient, width, height, pts, jpegImage); - if (jpegImage.empty()) { - sleep(1); - continue; // Skip the rest of the loop if image is empty - } - cv::Mat image = JpegStringToMat(jpegImage); - cv::Mat resizedImage; - cv::resize(image, resizedImage, cv::Size(width, height)); - cv::imshow("Image", resizedImage); // Show the resized image inside the window. - if (cv::waitKey(30) == 27) { - break; - } - } - cv::destroyAllWindows(); // Destroy all OpenCV windows - StopVideoPlayer(&filePlayerClient); - ReleaseANSVideoPlayerHandle(&filePlayerClient); - return 0; - -} int FilePlayerTestClient() { int width = 0; @@ -1473,6 +1439,203 @@ int OpenCVFunctionTest() { } +int VideoTestClient() { + int width = 0; + int height = 0; + int64_t pts = 0; + ANSVIDEOPLAYER* filePlayerClient; + std::string testVideoFile = "C:\\Programs\\DemoAssets\\Videos\\FireNSmoke\\SimFire.mp4"; + CreateANSVideoPlayerHandle(&filePlayerClient, "", testVideoFile.c_str()); + + StartVideoPlayer(&filePlayerClient); + cv::namedWindow("Image", cv::WINDOW_NORMAL); // Create a resizable window. + cv::resizeWindow("Image", 1920, 1080); // Set initial size of the window. + std::string jpegImage; + int index = 0; + while (true) { + index++; + GetVideoPlayerStrImage(&filePlayerClient, width, height, pts, jpegImage); + if (jpegImage.empty()) { + sleep(1); + continue; // Skip the rest of the loop if image is empty + } + cv::Mat image = JpegStringToMat(jpegImage); + cv::Mat resizedImage; + cv::resize(image, resizedImage, cv::Size(width, height)); + cv::imshow("Image", resizedImage); // Show the resized image inside the window. + if (cv::waitKey(30) == 27) { + break; + } + } + cv::destroyAllWindows(); // Destroy all OpenCV windows + StopVideoPlayer(&filePlayerClient); + ReleaseANSVideoPlayerHandle(&filePlayerClient); + return 0; + +} + + +int VideoPlayerClientTest() { + int width = 0; + int height = 0; + int64_t pts = 0; + ANSVIDEOPLAYER* videoClient; + + std::string testVideoFile = "E:\\Programs\\DemoAssets\\Videos\\classroom.mp4"; + CreateANSVideoPlayerHandle(&videoClient, "", testVideoFile.c_str()); + + StartVideoPlayer(&videoClient); + cv::namedWindow("Image", cv::WINDOW_NORMAL); // Create a resizable window. + cv::resizeWindow("Image", 1920, 1080); // Set initial size of the window (landscape). + int index = 0; + while (true) { + index++; + std::cout << "Index=" << index << std::endl; + if ((index == 200) || (index == 800) || (index == 1200)) { StopVideoPlayer(&videoClient); } + if ((index == 400) || (index == 1000) || (index == 1500)) { StartVideoPlayer(&videoClient); } + if ((index == 1800) || (index == 2200) || (index == 2500)) { StopVideoPlayer(&videoClient); } + if ((index == 2000) || (index == 2300) || (index == 2700)) { StartVideoPlayer(&videoClient); } + if (index > 20000) break; + auto start = std::chrono::system_clock::now(); + cv::Mat* image = nullptr; // ✅ Use a pointer to hold the allocated image + GetVideoPlayerCVImage(&videoClient, width, height, pts,&image); + auto end1 = std::chrono::system_clock::now(); + auto elapsed1 = std::chrono::duration_cast(end1 - start); + if (elapsed1.count() > 0)std::cout << "Time to get image:" << elapsed1.count() << "ms" << std::endl; + + // ✅ Check if the image is valid BEFORE accessing it + if (!image || image->empty()) { + ANSCV_ReleaseImage_S(&image); + std::this_thread::sleep_for(std::chrono::seconds(1)); + continue; // Skip processing if the image is empty + } + + // High-quality downscale for display: INTER_LANCZOS4 preserves sharpness and edges + cv::Mat displayImage; + if (image->cols > 1920) { + double scale = 1920.0 / image->cols; + cv::resize(*image, displayImage, cv::Size(), scale, scale, cv::INTER_LANCZOS4); + } + else { + displayImage = *image; + } + cv::imshow("Image", displayImage); + + ANSCV_ReleaseImage_S(&image); + //std::cout << "Index="<empty()) { + ANSCV_ReleaseImage_S(&image); + std::this_thread::sleep_for(std::chrono::seconds(1)); + continue; + } + + cv::Mat displayImage; + if (image->cols > 1920) { + double scale = 1920.0 / image->cols; + cv::resize(*image, displayImage, cv::Size(), scale, scale, cv::INTER_LANCZOS4); + } + else { + displayImage = *image; + } + cv::imshow("Image", displayImage); + + ANSCV_ReleaseImage_S(&image); + if (cv::waitKey(30) == 27) { + std::cout << "Break" << std::endl; + break; + } + } + cv::destroyAllWindows(); + StopFilePlayer(&filePlayerClient); + ReleaseANSFilePlayerHandle(&filePlayerClient); + return 0; +} + + +int FilePlayerClientDoubleDestroy() { + + ANSFILEPLAYER* filePlayerClient; + std::string testVideoFile = "E:\\Programs\\DemoAssets\\Videos\\classroom.mp4"; + std::cout << "create File Player" << std::endl; + CreateANSFilePlayerHandle(&filePlayerClient, "", testVideoFile.c_str()); + std::cout << "Start 1" << std::endl; + StartFilePlayer(&filePlayerClient); + std::cout << "Stop 1" << std::endl; + StopFilePlayer(&filePlayerClient); + std::cout << "Start 2" << std::endl; + StartFilePlayer(&filePlayerClient); + std::cout << "Stop 2" << std::endl; + StopFilePlayer(&filePlayerClient); + std::cout << "released" << std::endl; + ReleaseANSFilePlayerHandle(&filePlayerClient); + return 0; + +} + + int main() { ANSCENTER::ANSOPENCV::InitCameraNetwork(); @@ -1481,15 +1644,18 @@ int main() // resolved inside ANSCV.dll (which is linked against libavcodec etc.), // so this works without the unit test having to link FFmpeg itself. //ANSCV_PrintFFmpegLicense_S(); - - + //FilePlayerClientDoubleDestroy(); + FilePlayerClientCVTest(); + //VideoPlayerClientTest(); + //VideoPlayerClientDoubleDestroy(); + // VideoPlayerClientTest(); //OpenCVFunctionTest(); //GenerateVideo(); //VideoTestClient(); // TestGetImage(); //PureOpenCV(); // RSTPTestClient(); - RSTPTestCVClient(); + //RSTPTestCVClient(); //TestCreateImageFromJpegStringFile(); //TestCreateImageFromFile(); //for (int i = 0; i < 100; i++) {