Fix double stop in ANSVideoPlayer
This commit is contained in:
@@ -57,8 +57,11 @@ namespace ANSCENTER {
|
||||
_logger.LogError("ANSFILEPLAYER::Destroy.", "Unknown exception", __FILE__, __LINE__);
|
||||
}
|
||||
}
|
||||
// Destructor calls close() exactly once — do not call close() explicitly
|
||||
// beforehand. CFilePlayer::close() is not safe to call twice (it re-enters
|
||||
// decoder Stop/flush on an already-torn-down decoder).
|
||||
if (clientToClose) {
|
||||
clientToClose->close();
|
||||
clientToClose.reset();
|
||||
}
|
||||
}
|
||||
void ANSFILEPLAYER::CheckLicense() {
|
||||
@@ -102,7 +105,8 @@ namespace ANSCENTER {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
_isPlaying = false;
|
||||
}
|
||||
_playerClient->close();
|
||||
// CFilePlayer::open() calls close() internally at the top — no need
|
||||
// to close explicitly here (doing so would double-close the decoder).
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
Setup();
|
||||
return Start();
|
||||
|
||||
@@ -26,11 +26,19 @@ extern "C" {
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <cstdio>
|
||||
#include <atomic>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
// Leak diagnostic — counts AVFrames handed back to the media layer for
|
||||
// deferred freeing. Defined in video_player.cpp. Paired with g_nv12Escapes /
|
||||
// g_cudaHWEscapes in the [MEDIA_Leak] heartbeat: if escapes > pendingReturns
|
||||
// and the delta grows, external callers (via getNV12Frame/getCudaHWFrame)
|
||||
// are holding clones instead of returning them.
|
||||
extern std::atomic<int64_t> g_avframePendingReturns;
|
||||
|
||||
// Debug logging macro for GPU frame operations.
|
||||
// Define ANSCORE_GPU_DEBUG=1 to enable verbose per-frame GPU logging.
|
||||
#ifndef GPU_FRAME_DBG
|
||||
@@ -172,6 +180,7 @@ inline void gpu_frame_attach(cv::Mat* mat, AVFrame* nv12, int gpuIdx, int64_t pt
|
||||
auto& reg = ANSGpuFrameRegistry::instance();
|
||||
auto lk = reg.acquire_lock();
|
||||
reg.pushPendingFree_locked(old);
|
||||
g_avframePendingReturns.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
// NOTE: No drain_pending() here (hot path). Freed by evict_stale.
|
||||
@@ -378,6 +387,7 @@ inline void gpu_frame_attach_cuda(cv::Mat* mat, AVFrame* cudaFrame, int gpuIdx,
|
||||
auto& reg = ANSGpuFrameRegistry::instance();
|
||||
auto lk = reg.acquire_lock();
|
||||
reg.pushPendingFree_locked(cudaFrame);
|
||||
g_avframePendingReturns.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
data.avframe = nullptr;
|
||||
}
|
||||
@@ -386,6 +396,7 @@ inline void gpu_frame_attach_cuda(cv::Mat* mat, AVFrame* cudaFrame, int gpuIdx,
|
||||
auto& reg = ANSGpuFrameRegistry::instance();
|
||||
auto lk = reg.acquire_lock();
|
||||
reg.pushPendingFree_locked(cpuNV12);
|
||||
g_avframePendingReturns.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
data.cpuAvframe = nullptr;
|
||||
|
||||
@@ -399,6 +410,7 @@ inline void gpu_frame_attach_cuda(cv::Mat* mat, AVFrame* cudaFrame, int gpuIdx,
|
||||
auto& reg = ANSGpuFrameRegistry::instance();
|
||||
auto lk = reg.acquire_lock();
|
||||
reg.pushPendingFree_locked(old);
|
||||
g_avframePendingReturns.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
// NOTE: No drain_pending() here (hot path). AVFrames accumulate in
|
||||
|
||||
@@ -47,9 +47,6 @@ namespace ANSCENTER {
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
try {
|
||||
if (_hwPlayer) {
|
||||
try { _hwPlayer->stop(); } catch (...) {}
|
||||
}
|
||||
hwPlayerToClose = std::move(_hwPlayer);
|
||||
_hwDecodeActive = false;
|
||||
_hwGpuIndex = -1;
|
||||
@@ -80,9 +77,10 @@ namespace ANSCENTER {
|
||||
}
|
||||
} // end lock scope
|
||||
|
||||
// CUDA cleanup happens here, outside the mutex
|
||||
// CUDA cleanup happens here, outside the mutex.
|
||||
// Destructor calls close() once — do not call stop()/close() explicitly
|
||||
// beforehand (double-close re-enters torn-down decoder state).
|
||||
if (hwPlayerToClose) {
|
||||
try { hwPlayerToClose->close(); } catch (...) {}
|
||||
hwPlayerToClose.reset();
|
||||
}
|
||||
}
|
||||
@@ -201,13 +199,10 @@ namespace ANSCENTER {
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
_isPlaying = false; // GetImage() returns cached frame while we reconnect
|
||||
if (_hwPlayer) {
|
||||
try { _hwPlayer->stop(); } catch (...) {}
|
||||
hwPlayerToClose = std::move(_hwPlayer);
|
||||
}
|
||||
hwPlayerToClose = std::move(_hwPlayer);
|
||||
}
|
||||
// Destructor calls close() exactly once — single teardown.
|
||||
if (hwPlayerToClose) {
|
||||
try { hwPlayerToClose->close(); } catch (...) {}
|
||||
hwPlayerToClose.reset();
|
||||
}
|
||||
|
||||
@@ -241,11 +236,24 @@ namespace ANSCENTER {
|
||||
bool ANSVIDEOPLAYER::Start() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
try {
|
||||
// Re-initialize after a prior Stop(): _hwPlayer was released and
|
||||
// cap was closed. Setup() reopens whichever backend applies.
|
||||
// Why: CFilePlayer::stop() == close(), which frees m_pFormatContext.
|
||||
// Calling play() on a closed player dereferences NULL and crashes.
|
||||
if (!_hwPlayer && !cap.isOpened()) {
|
||||
if (!Setup()) {
|
||||
this->_logger.LogError("ANSVIDEOPLAYER::Start. Exception occurred:",
|
||||
"Setup() failed on restart", __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// --- HW decode path ---
|
||||
if (_hwDecodeActive && _hwPlayer) {
|
||||
_hwPlayer->play(); // starts read/video/audio threads
|
||||
_hwEOF = false;
|
||||
_hwFrameCount = 0;
|
||||
_hwLastPts = 0;
|
||||
_isPlaying = true;
|
||||
|
||||
// Wait for first frame outside the mutex to let decode threads run
|
||||
@@ -284,15 +292,26 @@ namespace ANSCENTER {
|
||||
}
|
||||
}
|
||||
bool ANSVIDEOPLAYER::Stop() {
|
||||
decltype(_hwPlayer.get()) hwPlayer = nullptr;
|
||||
// Move HW player out of lock scope — CFilePlayer::stop() == close(),
|
||||
// which does CUDA cleanup that must not run under _mutex to avoid
|
||||
// deadlocking with the nvcuda64 SRW lock held by inference.
|
||||
decltype(_hwPlayer) hwPlayerToClose;
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
try {
|
||||
// --- HW decode path ---
|
||||
if (_hwDecodeActive && _hwPlayer) {
|
||||
_isPlaying = false;
|
||||
hwPlayer = _hwPlayer.get();
|
||||
// stop() called outside the lock below; skip cap path
|
||||
// Release the player completely — CFilePlayer::stop() == close(),
|
||||
// which frees m_pFormatContext. Keeping the unique_ptr alive after
|
||||
// this point is a landmine: a later play() would deref NULL.
|
||||
hwPlayerToClose = std::move(_hwPlayer);
|
||||
_hwDecodeActive = false;
|
||||
_hwGpuIndex = -1;
|
||||
_hwCudaAccel = false;
|
||||
_hwEOF = false;
|
||||
_hwFrameCount = 0;
|
||||
_hwLastPts = 0;
|
||||
}
|
||||
else {
|
||||
// --- cv::VideoCapture fallback ---
|
||||
@@ -322,8 +341,12 @@ namespace ANSCENTER {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (hwPlayer) {
|
||||
hwPlayer->stop();
|
||||
// CUDA cleanup happens here, outside the mutex.
|
||||
// Rely on the destructor to call close() exactly once. Calling stop()
|
||||
// (== close()) explicitly would double-close the CFilePlayer, which
|
||||
// re-enters decoder Stop/flush on an already-torn-down decoder.
|
||||
if (hwPlayerToClose) {
|
||||
hwPlayerToClose.reset();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -7,8 +7,11 @@
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
#include "GpuNV12SlotPool.h"
|
||||
#include "ANSLicense.h" // ANS_DBG macro for [Pool_Leak] heartbeat
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
|
||||
// ANSCV.dll owns the process-wide singleton.
|
||||
GpuNV12SlotPool* GpuNV12SlotPool::resolveProcessWide() {
|
||||
@@ -40,6 +43,41 @@ void GpuNV12SlotPool::drainCooledSlots_locked() {
|
||||
GpuNV12Slot* GpuNV12SlotPool::acquire(int gpuIdx, int w, int h) {
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
|
||||
// Leak diagnostic — [Pool_Leak] heartbeat fires at most once per 60 s.
|
||||
// Reports current slot count and rough VRAM footprint. Slot count is
|
||||
// bounded by GPU_NV12_POOL_MAX_SLOTS; if it persists near the cap we
|
||||
// also see ACTIVE/COOLING state distribution which can hint at slots
|
||||
// not being released.
|
||||
{
|
||||
using clk = std::chrono::steady_clock;
|
||||
static std::atomic<long long> s_nextLog{0};
|
||||
const long long tick = clk::now().time_since_epoch().count();
|
||||
long long expected = s_nextLog.load(std::memory_order_relaxed);
|
||||
if (tick >= expected) {
|
||||
const long long deadline = tick +
|
||||
std::chrono::duration_cast<clk::duration>(
|
||||
std::chrono::seconds(60)).count();
|
||||
if (s_nextLog.compare_exchange_strong(expected, deadline,
|
||||
std::memory_order_relaxed)) {
|
||||
size_t totalBytes = 0;
|
||||
size_t active = 0, cooling = 0, free_ = 0;
|
||||
for (const auto& sp : m_slots) {
|
||||
totalBytes += sp->pitchY * sp->height
|
||||
+ sp->pitchUV * (sp->height / 2);
|
||||
const int st = sp->state.load(std::memory_order_relaxed);
|
||||
if (st == GpuNV12Slot::STATE_ACTIVE) ++active;
|
||||
else if (st == GpuNV12Slot::STATE_COOLING) ++cooling;
|
||||
else ++free_;
|
||||
}
|
||||
ANS_DBG("Pool_Leak",
|
||||
"NV12Pool slots=%zu (active=%zu cooling=%zu free=%zu) bytesMB=%.1f (max=%d)",
|
||||
m_slots.size(), active, cooling, free_,
|
||||
(double)totalBytes / (1024.0 * 1024.0),
|
||||
GPU_NV12_POOL_MAX_SLOTS);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 1. Drain cooled-down slots to make them available
|
||||
drainCooledSlots_locked();
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
#include <json.hpp>
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -1063,6 +1064,34 @@ namespace ANSCENTER
|
||||
std::lock_guard<std::mutex> plateLock(_plateIdentitiesMutex);
|
||||
auto& identities = _plateIdentities[cameraId];
|
||||
|
||||
// Leak diagnostic — [OCR_Leak] heartbeat, at most once per 60 s
|
||||
// process-wide. Same fields as the ANSALPR_OD variant for direct
|
||||
// comparison: cams, ids_tot, clr, imgtrk. If any of these climb
|
||||
// monotonically, the corresponding state container is the leak.
|
||||
{
|
||||
using clk = std::chrono::steady_clock;
|
||||
static std::atomic<long long> s_nextLog{0};
|
||||
const long long tick = clk::now().time_since_epoch().count();
|
||||
long long expected = s_nextLog.load(std::memory_order_relaxed);
|
||||
if (tick >= expected) {
|
||||
const long long deadline = tick +
|
||||
std::chrono::duration_cast<clk::duration>(
|
||||
std::chrono::seconds(60)).count();
|
||||
if (s_nextLog.compare_exchange_strong(expected, deadline,
|
||||
std::memory_order_relaxed)) {
|
||||
size_t ids_tot = 0;
|
||||
for (const auto& [cam, v] : _plateIdentities) ids_tot += v.size();
|
||||
ANS_DBG("OCR_Leak",
|
||||
"ANSALPR_OCR this=%p cams=%zu ids_tot=%zu clr=%zu imgtrk=%zu",
|
||||
(void*)this,
|
||||
_plateIdentities.size(),
|
||||
ids_tot,
|
||||
_colourCache.size(),
|
||||
_imageSizeTrackers.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Auto-detect mode by detection count.
|
||||
// 1 detection → pipeline/single-crop mode → no dedup needed.
|
||||
// 2+ detections → full-frame mode → apply accumulated scoring.
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <thread>
|
||||
#include <chrono>
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <unordered_map>
|
||||
// ---------------------------------------------------------------------------
|
||||
// Check ONNX model opset version by reading the protobuf header directly.
|
||||
@@ -3121,6 +3122,41 @@ namespace ANSCENTER {
|
||||
std::lock_guard<std::mutex> plateLock(_plateIdentitiesMutex);
|
||||
auto& identities = _plateIdentities[cameraId];
|
||||
|
||||
// Leak diagnostic — [OCR_Leak] heartbeat fires at most once per 60 s
|
||||
// process-wide. Reports the three per-camera state containers that
|
||||
// _could_ accumulate: _plateIdentities (keyed by cameraId), its sum
|
||||
// of inner-vector sizes, _colourCache, _imageSizeTrackers. All three
|
||||
// have stated bounds; heartbeat confirms they actually hold.
|
||||
// cams — number of distinct cameraId keys in _plateIdentities
|
||||
// ids_tot — sum of per-camera identity-vector sizes (should plateau)
|
||||
// clr — _colourCache size (bounded at COLOUR_CACHE_MAX_SIZE=200)
|
||||
// imgtrk — _imageSizeTrackers size (one entry per cameraId)
|
||||
// All size() reads outside locks are diagnostic snapshots; brief
|
||||
// races are acceptable (we're looking at trends over minutes).
|
||||
{
|
||||
using clk = std::chrono::steady_clock;
|
||||
static std::atomic<long long> s_nextLog{0};
|
||||
const long long tick = clk::now().time_since_epoch().count();
|
||||
long long expected = s_nextLog.load(std::memory_order_relaxed);
|
||||
if (tick >= expected) {
|
||||
const long long deadline = tick +
|
||||
std::chrono::duration_cast<clk::duration>(
|
||||
std::chrono::seconds(60)).count();
|
||||
if (s_nextLog.compare_exchange_strong(expected, deadline,
|
||||
std::memory_order_relaxed)) {
|
||||
size_t ids_tot = 0;
|
||||
for (const auto& [cam, v] : _plateIdentities) ids_tot += v.size();
|
||||
ANS_DBG("OCR_Leak",
|
||||
"ANSALPR_OD this=%p cams=%zu ids_tot=%zu clr=%zu imgtrk=%zu",
|
||||
(void*)this,
|
||||
_plateIdentities.size(),
|
||||
ids_tot,
|
||||
_colourCache.size(),
|
||||
_imageSizeTrackers.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Option B: Auto-detect mode by counting detections.
|
||||
// 1 detection → crop/pipeline mode → return instant result, no accumulated scoring
|
||||
// 2+ detections → full-frame mode → use accumulated scoring for dedup
|
||||
|
||||
@@ -318,13 +318,28 @@ std::vector<ByteTrack::BYTETracker::STrackPtr> ByteTrack::BYTETracker::update(co
|
||||
lost_stracks_ = subStracks(jointStracks(subStracks(lost_stracks_, tracked_stracks_), current_lost_stracks), removed_stracks_);
|
||||
removed_stracks_ = jointStracks(removed_stracks_, current_removed_stracks);
|
||||
|
||||
// Cap removed_stracks_ to prevent unbounded growth. Its only job is to
|
||||
// block re-entry into lost_stracks_ for tracks that have already timed
|
||||
// out (see subStracks(..., removed_stracks_) on the previous line). A
|
||||
// track that's been removed for more than a few hundred frames cannot
|
||||
// plausibly re-appear as "lost" — by then it's been reaped elsewhere
|
||||
// and any new detection would get a fresh track_id. 1 000 entries is
|
||||
// ~100 s at 10 fps per camera, well beyond any re-identification
|
||||
// window. Older entries (front of vector) are dropped first.
|
||||
static constexpr size_t kRemovedCap = 1000;
|
||||
if (removed_stracks_.size() > kRemovedCap) {
|
||||
const size_t drop = removed_stracks_.size() - kRemovedCap;
|
||||
removed_stracks_.erase(removed_stracks_.begin(),
|
||||
removed_stracks_.begin() + drop);
|
||||
}
|
||||
|
||||
std::vector<STrackPtr> tracked_stracks_out, lost_stracks_out;
|
||||
removeDuplicateStracks(tracked_stracks_, lost_stracks_, tracked_stracks_out, lost_stracks_out);
|
||||
tracked_stracks_ = tracked_stracks_out;
|
||||
lost_stracks_ = lost_stracks_out;
|
||||
|
||||
// Diagnostic: report tracker state size at most once every 60 s per instance.
|
||||
// removed_stracks_ is append-only in this implementation — watch it grow.
|
||||
// With the cap above, removed_stracks_ should plateau at <= kRemovedCap.
|
||||
{
|
||||
static thread_local std::chrono::steady_clock::time_point s_nextLog{};
|
||||
auto now = std::chrono::steady_clock::now();
|
||||
|
||||
@@ -434,6 +434,16 @@ private:
|
||||
// the first time each batch size is seen; subsequent calls reuse it.
|
||||
std::unordered_map<int, cudaGraphExec_t> m_graphExecs;
|
||||
|
||||
// Leak diagnostics — per-engine-instance counters for CUDA graph
|
||||
// create/destroy balance. Incremented in EngineRunInference.inl and
|
||||
// EngineBuildLoadNetwork.inl. Read by the [TRT_Leak] heartbeat in
|
||||
// runInference (fires ≤1×/60s per engine instance).
|
||||
// m_trtLeakNextLogTick stores a steady_clock epoch count for lock-free
|
||||
// compare_exchange window claim across concurrent inference threads.
|
||||
std::atomic<int64_t> m_trtGraphCreates{0};
|
||||
std::atomic<int64_t> m_trtGraphDestroys{0};
|
||||
std::atomic<long long> m_trtLeakNextLogTick{0};
|
||||
|
||||
Logger m_logger;
|
||||
bool m_verbose{ true }; // false for non-probe pool slots
|
||||
bool m_disableGraphs{ true }; // DISABLED by default — concurrent graph launches + uploads cause GPU deadlock on WDDM
|
||||
@@ -569,7 +579,12 @@ template <typename T> Engine<T>::~Engine() {
|
||||
|
||||
// Destroy cached CUDA graphs
|
||||
try {
|
||||
for (auto& [bs, ge] : m_graphExecs) { if (ge) cudaGraphExecDestroy(ge); }
|
||||
for (auto& [bs, ge] : m_graphExecs) {
|
||||
if (ge) {
|
||||
cudaGraphExecDestroy(ge);
|
||||
m_trtGraphDestroys.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
m_graphExecs.clear();
|
||||
} catch (...) {}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user