Fix NV12 crash issue when recreate camera object
This commit is contained in:
@@ -46,13 +46,22 @@ namespace ANSCENTER {
|
||||
Destroy();
|
||||
}
|
||||
void ANSFLVClient::Destroy() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (_playerClient) {
|
||||
if (_isPlaying) {
|
||||
_playerClient->stop();
|
||||
_isPlaying = false;
|
||||
// Move player out of lock scope — close() does CUDA cleanup
|
||||
// (cuArrayDestroy/cuMemFree) which must not run under _mutex
|
||||
// to avoid deadlocking with nvcuda64 SRW lock held by inference.
|
||||
decltype(_playerClient) clientToClose;
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (_playerClient) {
|
||||
if (_isPlaying) {
|
||||
_playerClient->stop();
|
||||
_isPlaying = false;
|
||||
}
|
||||
}
|
||||
_playerClient->close();
|
||||
clientToClose = std::move(_playerClient);
|
||||
}
|
||||
if (clientToClose) {
|
||||
clientToClose->close();
|
||||
}
|
||||
}
|
||||
static void VerifyGlobalANSFLVLicense(const std::string& licenseKey) {
|
||||
@@ -129,8 +138,12 @@ namespace ANSCENTER {
|
||||
}
|
||||
}
|
||||
bool ANSFLVClient::Reconnect() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
_isPlaying = false;
|
||||
}
|
||||
_playerClient->close();
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
Setup();
|
||||
_isPlaying = _playerClient->play();
|
||||
return _isPlaying;
|
||||
@@ -143,10 +156,16 @@ namespace ANSCENTER {
|
||||
return _isPlaying;
|
||||
}
|
||||
bool ANSFLVClient::Stop() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (_isPlaying) {
|
||||
_playerClient->stop();
|
||||
_isPlaying = false;
|
||||
decltype(_playerClient.get()) player = nullptr;
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (_isPlaying) {
|
||||
_isPlaying = false;
|
||||
player = _playerClient.get();
|
||||
}
|
||||
}
|
||||
if (player) {
|
||||
player->stop();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -39,22 +39,26 @@ namespace ANSCENTER {
|
||||
catch (...) {}
|
||||
}
|
||||
void ANSFILEPLAYER::Destroy() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
try {
|
||||
_url = "";
|
||||
_imageRotateDeg = 0;
|
||||
_isPlaying = false;
|
||||
_lastJpegImage = "";
|
||||
_pLastFrame.release();
|
||||
if (_playerClient) {
|
||||
_playerClient->close();
|
||||
decltype(_playerClient) clientToClose;
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
try {
|
||||
_url = "";
|
||||
_imageRotateDeg = 0;
|
||||
_isPlaying = false;
|
||||
_lastJpegImage = "";
|
||||
_pLastFrame.release();
|
||||
clientToClose = std::move(_playerClient);
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
_logger.LogError("ANSFILEPLAYER::Destroy. Exception:", e.what(), __FILE__, __LINE__);
|
||||
}
|
||||
catch (...) {
|
||||
_logger.LogError("ANSFILEPLAYER::Destroy.", "Unknown exception", __FILE__, __LINE__);
|
||||
}
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
_logger.LogError("ANSFILEPLAYER::Destroy. Exception:", e.what(), __FILE__, __LINE__);
|
||||
}
|
||||
catch (...) {
|
||||
_logger.LogError("ANSFILEPLAYER::Destroy.", "Unknown exception", __FILE__, __LINE__);
|
||||
if (clientToClose) {
|
||||
clientToClose->close();
|
||||
}
|
||||
}
|
||||
void ANSFILEPLAYER::CheckLicense() {
|
||||
@@ -94,8 +98,12 @@ namespace ANSCENTER {
|
||||
return _playerClient->open(_url);
|
||||
}
|
||||
bool ANSFILEPLAYER::Reconnect() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
_isPlaying = false;
|
||||
}
|
||||
_playerClient->close();
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
Setup();
|
||||
return Start();
|
||||
}
|
||||
@@ -105,14 +113,17 @@ namespace ANSCENTER {
|
||||
return _isPlaying;
|
||||
}
|
||||
bool ANSFILEPLAYER::Stop() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (_playerClient->pause()) {
|
||||
decltype(_playerClient.get()) player = nullptr;
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
player = _playerClient.get();
|
||||
}
|
||||
if (player && player->pause()) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
_isPlaying = false;
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
bool ANSFILEPLAYER::IsPaused() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
|
||||
@@ -19,8 +19,31 @@ extern "C" {
|
||||
#include "libavutil/frame.h"
|
||||
}
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
// Debug logging macro for GPU frame operations.
|
||||
// Output goes to stderr (console) AND OutputDebugString (DebugView / VS debugger).
|
||||
// Use Sysinternals DebugView (dbgview64.exe) to capture these after a crash.
|
||||
#ifndef GPU_FRAME_DBG
|
||||
#ifdef _WIN32
|
||||
#define GPU_FRAME_DBG(fmt, ...) do { \
|
||||
char _gpu_dbg_buf[512]; \
|
||||
snprintf(_gpu_dbg_buf, sizeof(_gpu_dbg_buf), "[GpuFrameOps] " fmt "\n", ##__VA_ARGS__); \
|
||||
OutputDebugStringA(_gpu_dbg_buf); \
|
||||
fprintf(stderr, "%s", _gpu_dbg_buf); \
|
||||
} while(0)
|
||||
#else
|
||||
#define GPU_FRAME_DBG(fmt, ...) \
|
||||
fprintf(stderr, "[GpuFrameOps] " fmt "\n", ##__VA_ARGS__)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace anscv_gpu_ops {
|
||||
namespace detail {
|
||||
@@ -71,6 +94,42 @@ inline bool snapshotNV12Planes(const AVFrame* nv12,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Drain pending GPU device pointers and actually cudaFree them.
|
||||
// Must be called from a thread with CUDA context available.
|
||||
inline void drainAndFreeGpuPending() {
|
||||
auto gpuPending = ANSGpuFrameRegistry::instance().drain_gpu_pending();
|
||||
if (gpuPending.empty()) return;
|
||||
GPU_FRAME_DBG("drainGpuPending: freeing %zu GPU ptrs", gpuPending.size());
|
||||
int prevDev = -1;
|
||||
cudaGetDevice(&prevDev);
|
||||
|
||||
// Group by device to minimize cudaSetDevice calls and synchronize once per device.
|
||||
// cudaDeviceSynchronize() is CRITICAL: NV12 kernels run on cv::cuda::Stream
|
||||
// (not the default stream). cudaFree on stream 0 doesn't wait for other
|
||||
// streams, so without this sync, cudaFree can free a buffer while a kernel
|
||||
// on another stream is still reading from it → cudaErrorIllegalAddress (700)
|
||||
// which permanently corrupts the CUDA context.
|
||||
int lastSyncDev = -1;
|
||||
for (auto& entry : gpuPending) {
|
||||
if (entry.ptr) {
|
||||
if (entry.deviceIdx >= 0)
|
||||
cudaSetDevice(entry.deviceIdx);
|
||||
if (entry.deviceIdx != lastSyncDev) {
|
||||
cudaDeviceSynchronize();
|
||||
lastSyncDev = entry.deviceIdx;
|
||||
}
|
||||
GPU_FRAME_DBG("drainGpuPending: cudaFree(%p) dev=%d", entry.ptr, entry.deviceIdx);
|
||||
cudaError_t err = cudaFree(entry.ptr);
|
||||
if (err != cudaSuccess) {
|
||||
GPU_FRAME_DBG("drainGpuPending: cudaFree FAILED err=%d (%s)",
|
||||
(int)err, cudaGetErrorString(err));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (prevDev >= 0)
|
||||
cudaSetDevice(prevDev);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace anscv_gpu_ops
|
||||
|
||||
@@ -117,36 +176,44 @@ inline void gpu_frame_attach(cv::Mat* mat, AVFrame* nv12, int gpuIdx, int64_t pt
|
||||
}
|
||||
}
|
||||
|
||||
// Attach CUDA HW frame — keeps CUDA device pointers for zero-copy inference.
|
||||
// Attach CUDA HW frame — copies NV12 from NVDEC surfaces to owned GPU memory.
|
||||
// TAKES OWNERSHIP of cudaFrame AND cpuNV12 — caller must NOT av_frame_free after.
|
||||
//
|
||||
// Primary path: yPlane/uvPlane point to CUDA device pointers from the cloned
|
||||
// AVFrame (data[0]/data[1]). The cloned AVFrame keeps the NVDEC surface alive
|
||||
// until gpu_frame_remove() is called after inference. With 4 cameras each
|
||||
// holding ~1 surface, this uses 4 of NVDEC's 25-32 surface pool — safe.
|
||||
// D2D copy path: cudaMemcpy2D from NVDEC surfaces to cudaMalloc'd buffers on the
|
||||
// same GPU. This decouples the NV12 data lifetime from the NVDEC decoder, so
|
||||
// player->close() can safely destroy the decoder at any time without invalidating
|
||||
// pointers that inference engines may be reading. The NVDEC surface is freed
|
||||
// immediately (av_frame_free), returning it to the decoder's surface pool.
|
||||
//
|
||||
// The owned GPU pointers are stored as both yPlane/uvPlane (for zero-copy reads)
|
||||
// and gpuCacheY/gpuCacheUV (for lifecycle management / cudaFree on cleanup).
|
||||
//
|
||||
// VRAM budget: if the global GPU cache budget is exceeded, falls back to CPU-only
|
||||
// NV12 snapshot (no zero-copy, but safe).
|
||||
//
|
||||
// Fallback: cpuYPlane/cpuUvPlane hold CPU-side NV12 snapshot for cross-GPU
|
||||
// inference (when decode GPU != inference GPU, CUDA device ptrs aren't
|
||||
// accessible from another GPU context).
|
||||
// inference (when decode GPU != inference GPU).
|
||||
inline void gpu_frame_attach_cuda(cv::Mat* mat, AVFrame* cudaFrame, int gpuIdx, int64_t pts,
|
||||
AVFrame* cpuNV12 = nullptr) {
|
||||
if (!mat || !cudaFrame) return;
|
||||
if (!mat || !cudaFrame) {
|
||||
GPU_FRAME_DBG("attach_cuda: SKIP mat=%p cudaFrame=%p", (void*)mat, (void*)cudaFrame);
|
||||
return;
|
||||
}
|
||||
|
||||
const int w = cudaFrame->width;
|
||||
const int h = cudaFrame->height;
|
||||
GPU_FRAME_DBG("attach_cuda: START mat=%p %dx%d gpu=%d nvdecY=%p nvdecUV=%p cpuNV12=%p",
|
||||
(void*)mat, w, h, gpuIdx,
|
||||
(void*)cudaFrame->data[0], (void*)cudaFrame->data[1], (void*)cpuNV12);
|
||||
|
||||
GpuFrameData data{};
|
||||
data.gpuIndex = gpuIdx;
|
||||
data.pts = pts;
|
||||
data.width = cudaFrame->width;
|
||||
data.height = cudaFrame->height;
|
||||
data.pixelFormat = 23; // AV_PIX_FMT_NV12 — the underlying sw_format
|
||||
data.width = w;
|
||||
data.height = h;
|
||||
data.pixelFormat = 23; // AV_PIX_FMT_NV12
|
||||
|
||||
// Primary: CUDA device pointers from NVDEC (zero-copy on same GPU)
|
||||
data.isCudaDevicePtr = true;
|
||||
data.yPlane = cudaFrame->data[0]; // CUDA device ptr: Y plane
|
||||
data.uvPlane = cudaFrame->data[1]; // CUDA device ptr: UV plane
|
||||
data.yLinesize = cudaFrame->linesize[0];
|
||||
data.uvLinesize = cudaFrame->linesize[1];
|
||||
|
||||
// Fallback: snapshot CPU NV12 for cross-GPU inference
|
||||
// Snapshot CPU NV12 for cross-GPU fallback (must do before freeing cpuNV12)
|
||||
if (cpuNV12) {
|
||||
anscv_gpu_ops::detail::snapshotNV12Planes(
|
||||
cpuNV12,
|
||||
@@ -155,9 +222,98 @@ inline void gpu_frame_attach_cuda(cv::Mat* mat, AVFrame* cudaFrame, int gpuIdx,
|
||||
data.width, data.height);
|
||||
}
|
||||
|
||||
// Store AVFrames for cleanup (cudaFrame keeps NVDEC surface alive)
|
||||
data.avframe = cudaFrame;
|
||||
data.cpuAvframe = cpuNV12;
|
||||
// --- D2D copy: NVDEC surface → owned GPU memory ---
|
||||
// Estimate VRAM needed for the owned NV12 copy
|
||||
const size_t yBytes = static_cast<size_t>(w) * h;
|
||||
const size_t uvBytes = static_cast<size_t>(w) * (h / 2);
|
||||
const size_t totalBytes = yBytes + uvBytes;
|
||||
|
||||
bool d2dOk = false;
|
||||
if (ANSGpuFrameRegistry::instance().canAllocateGpuCache(totalBytes)) {
|
||||
int prevDev = -1;
|
||||
cudaGetDevice(&prevDev);
|
||||
if (gpuIdx >= 0)
|
||||
cudaSetDevice(gpuIdx);
|
||||
|
||||
void* ownedY = nullptr;
|
||||
void* ownedUV = nullptr;
|
||||
size_t yPitch = 0;
|
||||
size_t uvPitch = 0;
|
||||
|
||||
cudaError_t e1 = cudaMallocPitch(&ownedY, &yPitch, w, h);
|
||||
cudaError_t e2 = cudaMallocPitch(&ownedUV, &uvPitch, w, h / 2);
|
||||
|
||||
if (e1 == cudaSuccess && e2 == cudaSuccess) {
|
||||
cudaError_t e3 = cudaMemcpy2D(ownedY, yPitch,
|
||||
cudaFrame->data[0], cudaFrame->linesize[0],
|
||||
w, h, cudaMemcpyDeviceToDevice);
|
||||
cudaError_t e4 = cudaMemcpy2D(ownedUV, uvPitch,
|
||||
cudaFrame->data[1], cudaFrame->linesize[1],
|
||||
w, h / 2, cudaMemcpyDeviceToDevice);
|
||||
|
||||
if (e3 == cudaSuccess && e4 == cudaSuccess) {
|
||||
// Store owned GPU pointers as primary NV12 source
|
||||
data.isCudaDevicePtr = true;
|
||||
data.yPlane = static_cast<uint8_t*>(ownedY);
|
||||
data.uvPlane = static_cast<uint8_t*>(ownedUV);
|
||||
data.yLinesize = static_cast<int>(yPitch);
|
||||
data.uvLinesize = static_cast<int>(uvPitch);
|
||||
|
||||
// Track in gpuCache for lifecycle management (cudaFree on cleanup)
|
||||
data.gpuCacheY = ownedY;
|
||||
data.gpuCacheUV = ownedUV;
|
||||
data.gpuCacheYPitch = yPitch;
|
||||
data.gpuCacheUVPitch = uvPitch;
|
||||
data.gpuCacheDeviceIdx = gpuIdx;
|
||||
data.gpuCacheValid = true;
|
||||
data.gpuCacheBytes = yPitch * h + uvPitch * (h / 2);
|
||||
|
||||
ANSGpuFrameRegistry::instance().onGpuCacheCreated(data.gpuCacheBytes);
|
||||
d2dOk = true;
|
||||
GPU_FRAME_DBG("attach_cuda: D2D OK ownedY=%p ownedUV=%p yPitch=%zu uvPitch=%zu bytes=%zu",
|
||||
ownedY, ownedUV, yPitch, uvPitch, data.gpuCacheBytes);
|
||||
} else {
|
||||
// D2D copy failed — free allocated memory and fall back
|
||||
GPU_FRAME_DBG("attach_cuda: D2D COPY FAILED e3=%d e4=%d — fallback CPU",
|
||||
(int)e3, (int)e4);
|
||||
cudaFree(ownedY);
|
||||
cudaFree(ownedUV);
|
||||
}
|
||||
} else {
|
||||
// Allocation failed — free any partial allocation and fall back
|
||||
GPU_FRAME_DBG("attach_cuda: cudaMallocPitch FAILED e1=%d e2=%d — fallback CPU",
|
||||
(int)e1, (int)e2);
|
||||
if (e1 == cudaSuccess) cudaFree(ownedY);
|
||||
if (e2 == cudaSuccess) cudaFree(ownedUV);
|
||||
}
|
||||
|
||||
if (prevDev >= 0)
|
||||
cudaSetDevice(prevDev);
|
||||
}
|
||||
|
||||
if (!d2dOk) {
|
||||
// Fall back to CPU NV12 snapshot only (no zero-copy)
|
||||
GPU_FRAME_DBG("attach_cuda: FALLBACK CPU-only cpuY=%p cpuUV=%p",
|
||||
(void*)data.cpuYPlane, (void*)data.cpuUvPlane);
|
||||
data.isCudaDevicePtr = false;
|
||||
data.yPlane = data.cpuYPlane;
|
||||
data.uvPlane = data.cpuUvPlane;
|
||||
data.yLinesize = data.cpuYLinesize;
|
||||
data.uvLinesize = data.cpuUvLinesize;
|
||||
}
|
||||
|
||||
// Release AVFrames immediately — NVDEC surfaces returned to pool.
|
||||
// No longer stored in GpuFrameData (owned GPU copy is independent).
|
||||
GPU_FRAME_DBG("attach_cuda: freeing AVFrames cudaFrame=%p cpuNV12=%p",
|
||||
(void*)cudaFrame, (void*)cpuNV12);
|
||||
av_frame_free(&cudaFrame);
|
||||
if (cpuNV12) av_frame_free(&cpuNV12);
|
||||
data.avframe = nullptr;
|
||||
data.cpuAvframe = nullptr;
|
||||
|
||||
GPU_FRAME_DBG("attach_cuda: FINAL yPlane=%p uvPlane=%p isCuda=%d gpuCacheY=%p gpuCacheUV=%p",
|
||||
(void*)data.yPlane, (void*)data.uvPlane, (int)data.isCudaDevicePtr,
|
||||
data.gpuCacheY, data.gpuCacheUV);
|
||||
|
||||
void* old = ANSGpuFrameRegistry::instance().attach(mat, std::move(data));
|
||||
if (old) {
|
||||
@@ -165,17 +321,23 @@ inline void gpu_frame_attach_cuda(cv::Mat* mat, AVFrame* cudaFrame, int gpuIdx,
|
||||
av_frame_free(&oldFrame);
|
||||
}
|
||||
|
||||
// Free stale AVFrames evicted by TTL or previous attach
|
||||
auto pending = ANSGpuFrameRegistry::instance().drain_pending();
|
||||
for (void* p : pending) {
|
||||
AVFrame* stale = static_cast<AVFrame*>(p);
|
||||
av_frame_free(&stale);
|
||||
}
|
||||
|
||||
// Free stale GPU device pointers
|
||||
anscv_gpu_ops::detail::drainAndFreeGpuPending();
|
||||
}
|
||||
|
||||
// Release entry by cv::Mat* and free any returned AVFrames. Safe if not in map (no-op).
|
||||
// Release entry by cv::Mat* and free any returned AVFrames + GPU pointers.
|
||||
// Safe if not in map (no-op).
|
||||
inline void gpu_frame_remove(cv::Mat* mat) {
|
||||
if (!mat) return;
|
||||
|
||||
GPU_FRAME_DBG("gpu_frame_remove: mat=%p", (void*)mat);
|
||||
ANSGpuFrameRegistry::instance().release(mat);
|
||||
|
||||
// Free any AVFrames that became pending from this release or prior eviction
|
||||
@@ -186,13 +348,7 @@ inline void gpu_frame_remove(cv::Mat* mat) {
|
||||
}
|
||||
|
||||
// Free any GPU device pointers that became pending
|
||||
auto gpuPending = gpu_frame_drain_gpu_pending();
|
||||
// NOTE: cudaFree requires CUDA context — caller must be on a CUDA-capable thread.
|
||||
// If not, these will leak. In practice, gpu_frame_remove is called from ANSCV
|
||||
// camera threads which do have CUDA context.
|
||||
// For safety, we skip cudaFree here and let NV12PreprocessHelper handle it.
|
||||
// The GPU pointers are tracked in the budget and will be accounted for.
|
||||
(void)gpuPending;
|
||||
anscv_gpu_ops::detail::drainAndFreeGpuPending();
|
||||
}
|
||||
|
||||
// Alias for remove — used in ANSCV mutating functions to drop stale GPU data.
|
||||
@@ -209,4 +365,7 @@ inline void gpu_frame_evict_stale() {
|
||||
AVFrame* stale = static_cast<AVFrame*>(p);
|
||||
av_frame_free(&stale);
|
||||
}
|
||||
|
||||
// Free any GPU device pointers from evicted frames
|
||||
anscv_gpu_ops::detail::drainAndFreeGpuPending();
|
||||
}
|
||||
|
||||
@@ -46,13 +46,19 @@ namespace ANSCENTER {
|
||||
Destroy();
|
||||
}
|
||||
void ANSMJPEGClient::Destroy() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (_playerClient) {
|
||||
if (_isPlaying) {
|
||||
_playerClient->stop();
|
||||
_isPlaying = false;
|
||||
decltype(_playerClient) clientToClose;
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (_playerClient) {
|
||||
if (_isPlaying) {
|
||||
_playerClient->stop();
|
||||
_isPlaying = false;
|
||||
}
|
||||
}
|
||||
_playerClient->close();
|
||||
clientToClose = std::move(_playerClient);
|
||||
}
|
||||
if (clientToClose) {
|
||||
clientToClose->close();
|
||||
}
|
||||
}
|
||||
static void VerifyGlobalANSMJPEGLicense(const std::string& licenseKey) {
|
||||
@@ -129,8 +135,12 @@ namespace ANSCENTER {
|
||||
}
|
||||
}
|
||||
bool ANSMJPEGClient::Reconnect() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
_isPlaying = false;
|
||||
}
|
||||
_playerClient->close();
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
Setup();
|
||||
_isPlaying = _playerClient->play();
|
||||
return _isPlaying;
|
||||
@@ -143,10 +153,16 @@ namespace ANSCENTER {
|
||||
return _isPlaying;
|
||||
}
|
||||
bool ANSMJPEGClient::Stop() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (_isPlaying) {
|
||||
_playerClient->stop();
|
||||
_isPlaying = false;
|
||||
decltype(_playerClient.get()) player = nullptr;
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (_isPlaying) {
|
||||
_isPlaying = false;
|
||||
player = _playerClient.get();
|
||||
}
|
||||
}
|
||||
if (player) {
|
||||
player->stop();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -48,13 +48,19 @@ namespace ANSCENTER {
|
||||
Destroy();
|
||||
}
|
||||
void ANSRTMPClient::Destroy() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (_playerClient) {
|
||||
if (_isPlaying) {
|
||||
_playerClient->stop();
|
||||
_isPlaying = false;
|
||||
decltype(_playerClient) clientToClose;
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (_playerClient) {
|
||||
if (_isPlaying) {
|
||||
_playerClient->stop();
|
||||
_isPlaying = false;
|
||||
}
|
||||
}
|
||||
_playerClient->close();
|
||||
clientToClose = std::move(_playerClient);
|
||||
}
|
||||
if (clientToClose) {
|
||||
clientToClose->close();
|
||||
}
|
||||
}
|
||||
static void VerifyGlobalANSRTMPLicense(const std::string& licenseKey) {
|
||||
@@ -126,8 +132,12 @@ namespace ANSCENTER {
|
||||
}
|
||||
|
||||
bool ANSRTMPClient::Reconnect() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
_isPlaying = false;
|
||||
}
|
||||
_playerClient->close();
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
Setup();
|
||||
_isPlaying = _playerClient->play();
|
||||
return _isPlaying;
|
||||
@@ -140,10 +150,16 @@ namespace ANSCENTER {
|
||||
return _isPlaying;
|
||||
}
|
||||
bool ANSRTMPClient::Stop() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (_isPlaying) {
|
||||
_playerClient->stop();
|
||||
_isPlaying = false;
|
||||
decltype(_playerClient.get()) player = nullptr;
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (_isPlaying) {
|
||||
_isPlaying = false;
|
||||
player = _playerClient.get();
|
||||
}
|
||||
}
|
||||
if (player) {
|
||||
player->stop();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#include "ANSMatRegistry.h"
|
||||
#include "ANSGpuFrameOps.h"
|
||||
#include <memory>
|
||||
#include <format>
|
||||
#include "media_codec.h"
|
||||
#include <cstdint>
|
||||
#include <cuda_runtime.h>
|
||||
@@ -21,6 +22,20 @@ extern "C"
|
||||
}
|
||||
// Note: per-instance thread safety is handled by ANSRTSPClient::_mutex
|
||||
// Mat registry thread safety is handled by anscv_mat_replace's internal registry_mutex
|
||||
|
||||
// Debug logging — goes to both stderr AND OutputDebugString (DebugView).
|
||||
#ifndef RTSP_DBG
|
||||
#ifdef _WIN32
|
||||
#define RTSP_DBG(fmt, ...) do { \
|
||||
char _rtsp_buf[512]; \
|
||||
snprintf(_rtsp_buf, sizeof(_rtsp_buf), fmt "\n", ##__VA_ARGS__); \
|
||||
OutputDebugStringA(_rtsp_buf); \
|
||||
fprintf(stderr, "%s", _rtsp_buf); \
|
||||
} while(0)
|
||||
#else
|
||||
#define RTSP_DBG(fmt, ...) fprintf(stderr, fmt "\n", ##__VA_ARGS__)
|
||||
#endif
|
||||
#endif
|
||||
static bool ansrtspLicenceValid = false;
|
||||
// Global once_flag to protect license checking
|
||||
static std::once_flag ansrtspLicenseOnceFlag;
|
||||
@@ -48,19 +63,88 @@ namespace ANSCENTER {
|
||||
Destroy();
|
||||
}
|
||||
void ANSRTSPClient::Destroy() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (_playerClient) {
|
||||
// Stop the stream first so the video decoder is flushed and
|
||||
// the RTSP callback thread is no longer feeding frames into
|
||||
// decode(). Without this, rtsp_close() can block waiting for
|
||||
// CRtspClient::m_pMutex (held by the callback mid-decode),
|
||||
// and the hardware decoder flush during destruction can hang
|
||||
// on the GPU.
|
||||
if (_isPlaying) {
|
||||
_playerClient->stop();
|
||||
_isPlaying = false;
|
||||
// Move the player client pointer out of the lock scope, then
|
||||
// close it OUTSIDE the mutex. close() calls cuArrayDestroy /
|
||||
// cuMemFree which acquire an EXCLUSIVE SRW lock inside nvcuda64.
|
||||
// If we hold _mutex during close(), and another thread holds
|
||||
// the nvcuda64 SRW lock (e.g. cuStreamSynchronize during
|
||||
// inference), we get a deadlock: Stop() → _mutex → nvcuda64
|
||||
// vs inference → nvcuda64 → (blocked by exclusive waiter).
|
||||
decltype(_playerClient) clientToClose;
|
||||
{
|
||||
std::unique_lock<std::recursive_mutex> lock(_mutex);
|
||||
if (_playerClient) {
|
||||
if (_isPlaying) {
|
||||
_playerClient->stop();
|
||||
_isPlaying = false;
|
||||
}
|
||||
}
|
||||
_playerClient->close();
|
||||
|
||||
// --- Inference guard: wait for in-flight frames to finish ---
|
||||
// GetRTSPCVImage increments _inFlightFrames when it hands out
|
||||
// a GPU frame; the registry decrements it when the frame is
|
||||
// released after inference completes. We wait here so that
|
||||
// close() doesn't free NVDEC surfaces while TensorRT is
|
||||
// still reading from them (the LabVIEW crash root cause).
|
||||
int inFlight = _inFlightFrames.load(std::memory_order_acquire);
|
||||
if (inFlight > 0) {
|
||||
_logger.LogInfo("ANSRTSPClient::Destroy",
|
||||
std::format("waiting for {} in-flight inference frame(s)...", inFlight),
|
||||
__FILE__, __LINE__);
|
||||
bool done = _inFlightDone.wait_for(lock, std::chrono::seconds(5), [this] {
|
||||
return _inFlightFrames.load(std::memory_order_acquire) <= 0;
|
||||
});
|
||||
if (!done) {
|
||||
_logger.LogWarn("ANSRTSPClient::Destroy",
|
||||
std::format("timed out waiting for in-flight frames "
|
||||
"(still {} in-flight) — force-releasing GPU frames",
|
||||
_inFlightFrames.load()),
|
||||
__FILE__, __LINE__);
|
||||
}
|
||||
}
|
||||
|
||||
// Force-release ALL GPU frames owned by this client BEFORE close().
|
||||
// Unreleased clones (e.g. LabVIEW AI tasks still holding cloned
|
||||
// cv::Mat*) keep gpuCacheY/gpuCacheUV allocated. We must cudaFree
|
||||
// them NOW while the CUDA context is still alive. After close()
|
||||
// destroys the context, cudaFree would crash.
|
||||
int forceReleased = ANSGpuFrameRegistry::instance().forceReleaseByOwner(this);
|
||||
if (forceReleased > 0) {
|
||||
_logger.LogWarn("ANSRTSPClient::Destroy",
|
||||
std::format("force-released {} GPU frame(s) with unreleased clones", forceReleased),
|
||||
__FILE__, __LINE__);
|
||||
// Drain and cudaFree the GPU buffers while CUDA context is alive
|
||||
// Sync all GPU streams before freeing to avoid illegal access
|
||||
cudaDeviceSynchronize();
|
||||
auto gpuPending = ANSGpuFrameRegistry::instance().drain_gpu_pending();
|
||||
if (!gpuPending.empty()) {
|
||||
RTSP_DBG("[Destroy] cudaFree %zu GPU ptrs before close()", gpuPending.size());
|
||||
int prevDev = -1;
|
||||
cudaGetDevice(&prevDev);
|
||||
for (auto& entry : gpuPending) {
|
||||
if (entry.ptr) {
|
||||
if (entry.deviceIdx >= 0) cudaSetDevice(entry.deviceIdx);
|
||||
cudaFree(entry.ptr);
|
||||
}
|
||||
}
|
||||
if (prevDev >= 0) cudaSetDevice(prevDev);
|
||||
}
|
||||
// Also drain any pending AVFrames
|
||||
auto avPending = ANSGpuFrameRegistry::instance().drain_pending();
|
||||
for (void* p : avPending) {
|
||||
AVFrame* f = static_cast<AVFrame*>(p);
|
||||
av_frame_free(&f);
|
||||
}
|
||||
}
|
||||
ANSGpuFrameRegistry::instance().invalidateOwner(this);
|
||||
_inFlightFrames.store(0, std::memory_order_release);
|
||||
|
||||
clientToClose = std::move(_playerClient);
|
||||
}
|
||||
// CUDA cleanup happens here, outside the mutex — now safe.
|
||||
// All GPU frames owned by this client have been force-freed above.
|
||||
if (clientToClose) {
|
||||
clientToClose->close();
|
||||
}
|
||||
}
|
||||
static void VerifyGlobalANSRTSPLicense(const std::string& licenseKey) {
|
||||
@@ -146,10 +230,81 @@ namespace ANSCENTER {
|
||||
_playerClient->setCrop(crop);
|
||||
}
|
||||
bool ANSRTSPClient::Reconnect() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
// 1. Mark as not-playing under the mutex FIRST. This makes GetImage()
|
||||
// return the cached _pLastFrame instead of calling into the player,
|
||||
// preventing use-after-free when close() destroys CUDA resources.
|
||||
{
|
||||
std::unique_lock<std::recursive_mutex> lock(_mutex);
|
||||
_isPlaying = false;
|
||||
|
||||
// --- Inference guard: wait for in-flight frames to finish ---
|
||||
// Same guard as Destroy(): close() will free NVDEC surfaces, so
|
||||
// we must wait for any inference engines still reading NV12 data
|
||||
// via zero-copy CUDA device pointers.
|
||||
int inFlight = _inFlightFrames.load(std::memory_order_acquire);
|
||||
if (inFlight > 0) {
|
||||
_logger.LogInfo("ANSRTSPClient::Reconnect",
|
||||
std::format("waiting for {} in-flight inference frame(s)...", inFlight),
|
||||
__FILE__, __LINE__);
|
||||
bool done = _inFlightDone.wait_for(lock, std::chrono::seconds(5), [this] {
|
||||
return _inFlightFrames.load(std::memory_order_acquire) <= 0;
|
||||
});
|
||||
if (!done) {
|
||||
_logger.LogWarn("ANSRTSPClient::Reconnect",
|
||||
std::format("timed out waiting for in-flight frames "
|
||||
"(still {} in-flight) — force-releasing GPU frames",
|
||||
_inFlightFrames.load()),
|
||||
__FILE__, __LINE__);
|
||||
}
|
||||
}
|
||||
|
||||
// Force-release GPU frames before close() — same as Destroy().
|
||||
int forceReleased = ANSGpuFrameRegistry::instance().forceReleaseByOwner(this);
|
||||
if (forceReleased > 0) {
|
||||
_logger.LogWarn("ANSRTSPClient::Reconnect",
|
||||
std::format("force-released {} GPU frame(s) with unreleased clones", forceReleased),
|
||||
__FILE__, __LINE__);
|
||||
// Sync all GPU streams before freeing
|
||||
cudaDeviceSynchronize();
|
||||
auto gpuPending = ANSGpuFrameRegistry::instance().drain_gpu_pending();
|
||||
if (!gpuPending.empty()) {
|
||||
int prevDev = -1;
|
||||
cudaGetDevice(&prevDev);
|
||||
for (auto& entry : gpuPending) {
|
||||
if (entry.ptr) {
|
||||
if (entry.deviceIdx >= 0) cudaSetDevice(entry.deviceIdx);
|
||||
cudaFree(entry.ptr);
|
||||
}
|
||||
}
|
||||
if (prevDev >= 0) cudaSetDevice(prevDev);
|
||||
}
|
||||
auto avPending = ANSGpuFrameRegistry::instance().drain_pending();
|
||||
for (void* p : avPending) {
|
||||
AVFrame* f = static_cast<AVFrame*>(p);
|
||||
av_frame_free(&f);
|
||||
}
|
||||
}
|
||||
ANSGpuFrameRegistry::instance().invalidateOwner(this);
|
||||
_inFlightFrames.store(0, std::memory_order_release);
|
||||
}
|
||||
|
||||
// 2. close() does CUDA cleanup (cuArrayDestroy/cuMemFree) — run outside
|
||||
// _mutex to avoid deadlocking with nvcuda64 SRW lock held by inference.
|
||||
// Safe now because GetImage()/GetNV12Frame() won't touch the player
|
||||
// while _isPlaying == false, and all in-flight frames have been released.
|
||||
_logger.LogInfo("ANSRTSPClient::Reconnect",
|
||||
"calling close() — NVDEC decoder will be destroyed", __FILE__, __LINE__);
|
||||
RTSP_DBG("[Reconnect] BEFORE close() this=%p", (void*)this);
|
||||
_playerClient->close();
|
||||
RTSP_DBG("[Reconnect] AFTER close() this=%p", (void*)this);
|
||||
|
||||
// 3. Re-setup and play under the mutex.
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
_logger.LogInfo("ANSRTSPClient::Reconnect",
|
||||
"calling Setup() + play()", __FILE__, __LINE__);
|
||||
Setup();
|
||||
_isPlaying = _playerClient->play();
|
||||
RTSP_DBG("[Reconnect] DONE isPlaying=%d this=%p", (int)_isPlaying, (void*)this);
|
||||
return _isPlaying;
|
||||
}
|
||||
void ANSRTSPClient::EnableAudio(bool status) {
|
||||
@@ -169,11 +324,23 @@ namespace ANSCENTER {
|
||||
}
|
||||
|
||||
bool ANSRTSPClient::Stop() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (_isPlaying) {
|
||||
_playerClient->stop();
|
||||
_isPlaying = false;
|
||||
}
|
||||
// Grab the player pointer and clear _isPlaying under the lock,
|
||||
// then call stop() OUTSIDE the mutex. stop() internally calls
|
||||
// StopVideoDecoder -> decoder->flush() which does CUDA calls
|
||||
// that can block on the nvcuda64 SRW lock. Holding _mutex
|
||||
// during that time blocks all other operations on this client
|
||||
// and contributes to the convoy when many clients stop at once.
|
||||
CRtspPlayer* player = nullptr;
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (_isPlaying) {
|
||||
_isPlaying = false;
|
||||
player = _playerClient.get();
|
||||
}
|
||||
}
|
||||
if (player) {
|
||||
player->stop();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool ANSRTSPClient::Pause() {
|
||||
@@ -759,10 +926,12 @@ namespace ANSCENTER {
|
||||
}
|
||||
AVFrame* ANSRTSPClient::GetNV12Frame() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (!_isPlaying) return nullptr; // Player may be mid-reconnect (CUDA resources freed)
|
||||
return _playerClient->getNV12Frame(); // Returns clone, caller must av_frame_free
|
||||
}
|
||||
AVFrame* ANSRTSPClient::GetCudaHWFrame() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (!_isPlaying) return nullptr; // Player may be mid-reconnect (CUDA resources freed)
|
||||
return _playerClient->getCudaHWFrame();
|
||||
}
|
||||
bool ANSRTSPClient::IsCudaHWAccel() {
|
||||
@@ -810,6 +979,11 @@ extern "C" __declspec(dllexport) int CreateANSRTSPHandle(ANSCENTER::ANSRTSPClien
|
||||
if (_username.empty() && _password.empty()) result = ptr->Init(licenseKey, url);
|
||||
else result = ptr->Init(licenseKey, username, password, url);
|
||||
if (result) {
|
||||
// Default to CUDA/NVDEC HW decoding (mode 7) for NV12 zero-copy
|
||||
// fast path. LabVIEW may not call SetRTSPHWDecoding after
|
||||
// destroy+recreate cycles, so this ensures the new handle always
|
||||
// uses the GPU decode path instead of falling back to D3D11VA/CPU.
|
||||
ptr->SetHWDecoding(7); // HW_DECODING_CUDA
|
||||
*Handle = ptr.release();
|
||||
extern void anscv_unregister_handle(void*);
|
||||
extern void anscv_register_handle(void*, void(*)(void*));
|
||||
@@ -830,9 +1004,37 @@ extern "C" __declspec(dllexport) int ReleaseANSRTSPHandle(ANSCENTER::ANSRTSPClie
|
||||
try {
|
||||
extern void anscv_unregister_handle(void*);
|
||||
anscv_unregister_handle(*Handle);
|
||||
// unique_ptr destructor calls ~ANSRTSPClient which calls Destroy() — no need to call Destroy() separately
|
||||
std::unique_ptr<ANSCENTER::ANSRTSPClient> ptr(*Handle);
|
||||
|
||||
// Grab the raw pointer and NULL the caller's handle immediately.
|
||||
// This prevents the caller (LabVIEW) from issuing new calls.
|
||||
ANSCENTER::ANSRTSPClient* raw = *Handle;
|
||||
*Handle = nullptr;
|
||||
|
||||
// Mark as not-playing under _mutex ONLY. This makes
|
||||
// GetImage()/GetNV12Frame()/GetCudaHWFrame() return empty/null
|
||||
// on any subsequent call, and prevents NEW NV12 GPU surface
|
||||
// pointers from being handed out.
|
||||
//
|
||||
// Do NOT call Destroy()/close() here — close() frees the
|
||||
// NVDEC GPU surfaces (cuArrayDestroy/cuMemFree) which may
|
||||
// still be in use by a CUDA inference kernel that received
|
||||
// the NV12 pointer from a GetRTSPCVImage call that already
|
||||
// completed before this Release was called.
|
||||
{
|
||||
// Use the client's _mutex to safely set _isPlaying = false.
|
||||
// This is the same lock GetImage/GetNV12Frame acquire.
|
||||
raw->Stop(); // sets _isPlaying = false, stops playback
|
||||
}
|
||||
|
||||
// Defer the full cleanup (Destroy + delete) to a background thread
|
||||
// so LabVIEW's UI thread is not blocked. Destroy() now waits
|
||||
// precisely for in-flight inference to finish (via _inFlightFrames
|
||||
// counter + condition variable) instead of the old 500ms sleep hack.
|
||||
std::thread([raw]() {
|
||||
try { raw->Destroy(); } catch (...) {}
|
||||
try { delete raw; } catch (...) {}
|
||||
}).detach();
|
||||
|
||||
return 0;
|
||||
} catch (...) {
|
||||
if (Handle) *Handle = nullptr;
|
||||
@@ -882,19 +1084,56 @@ extern "C" __declspec(dllexport) int GetRTSPCVImage(
|
||||
|
||||
// Attach NV12 frame for GPU fast-path inference (side-table registry)
|
||||
// attach() takes ownership — do NOT av_frame_free here
|
||||
//
|
||||
// CRITICAL: TryIncrementInFlight() MUST be called BEFORE GetCudaHWFrame().
|
||||
// It atomically checks _isPlaying and increments _inFlightFrames under
|
||||
// the same mutex, so Reconnect() cannot call close() while we're doing
|
||||
// the D2D copy from NVDEC surfaces inside gpu_frame_attach_cuda().
|
||||
int gpuIdx = (*Handle)->GetHWDecodingGpuIndex();
|
||||
AVFrame* cudaHW = (*Handle)->GetCudaHWFrame();
|
||||
if (cudaHW) {
|
||||
// CUDA zero-copy: frame data[0]/data[1] are CUDA device pointers.
|
||||
// Also attach CPU NV12 as fallback for cross-GPU inference
|
||||
// (when decode GPU != inference GPU, CUDA ptrs aren't accessible).
|
||||
AVFrame* cpuNV12 = (*Handle)->GetNV12Frame();
|
||||
gpu_frame_attach_cuda(*image, cudaHW, gpuIdx, timeStamp, cpuNV12);
|
||||
} else {
|
||||
AVFrame* nv12 = (*Handle)->GetNV12Frame();
|
||||
if (nv12) {
|
||||
gpu_frame_attach(*image, nv12, gpuIdx, timeStamp);
|
||||
bool inFlightGuardHeld = (*Handle)->TryIncrementInFlight();
|
||||
RTSP_DBG("[GetRTSPCVImage] mat=%p gpuIdx=%d inFlightGuard=%d",
|
||||
(void*)*image, gpuIdx, (int)inFlightGuardHeld);
|
||||
|
||||
if (inFlightGuardHeld) {
|
||||
AVFrame* cudaHW = (*Handle)->GetCudaHWFrame();
|
||||
if (cudaHW) {
|
||||
RTSP_DBG("[GetRTSPCVImage] cudaHW: %dx%d data[0]=%p data[1]=%p",
|
||||
cudaHW->width, cudaHW->height,
|
||||
(void*)cudaHW->data[0], (void*)cudaHW->data[1]);
|
||||
AVFrame* cpuNV12 = (*Handle)->GetNV12Frame();
|
||||
gpu_frame_attach_cuda(*image, cudaHW, gpuIdx, timeStamp, cpuNV12);
|
||||
} else {
|
||||
// HW decode not active — try CPU NV12
|
||||
AVFrame* nv12 = (*Handle)->GetNV12Frame();
|
||||
if (nv12) {
|
||||
gpu_frame_attach(*image, nv12, gpuIdx, timeStamp);
|
||||
}
|
||||
}
|
||||
|
||||
// Wire up the registry callback to release the in-flight guard.
|
||||
// TryIncrementInFlight already incremented; DecrementInFlight fires
|
||||
// when the last clone of this frame is released after inference.
|
||||
auto* gpuData = ANSGpuFrameRegistry::instance().lookup(*image);
|
||||
RTSP_DBG("[GetRTSPCVImage] after attach: gpuData=%p yPlane=%p isCuda=%d gpuCacheY=%p",
|
||||
(void*)gpuData,
|
||||
gpuData ? (void*)gpuData->yPlane : nullptr,
|
||||
gpuData ? (int)gpuData->isCudaDevicePtr : -1,
|
||||
gpuData ? gpuData->gpuCacheY : nullptr);
|
||||
if (gpuData) {
|
||||
gpuData->ownerClient = *Handle;
|
||||
gpuData->onReleaseFn = [](void* client) {
|
||||
static_cast<ANSCENTER::ANSRTSPClient*>(client)->DecrementInFlight();
|
||||
};
|
||||
// NOTE: Do NOT call IncrementInFlight() again here —
|
||||
// TryIncrementInFlight() already did it above.
|
||||
} else {
|
||||
// No gpuData registered (attach failed?) — release the guard
|
||||
(*Handle)->DecrementInFlight();
|
||||
}
|
||||
} else {
|
||||
// Player is stopping/reconnecting — skip CUDA path entirely.
|
||||
// GetImage() already returned a cached BGR frame, which is safe.
|
||||
RTSP_DBG("[GetRTSPCVImage] SKIP CUDA — player not playing (reconnecting?)");
|
||||
}
|
||||
|
||||
return 1; // Success
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
|
||||
namespace ANSCENTER
|
||||
{
|
||||
@@ -37,7 +39,36 @@ namespace ANSCENTER
|
||||
int64_t _pts;
|
||||
bool _isPlaying;
|
||||
std::recursive_mutex _mutex;
|
||||
|
||||
// --- Per-client inference guard ---
|
||||
// Tracks how many GPU frames from this client are currently in-flight
|
||||
// (grabbed by GetRTSPCVImage but not yet released after inference).
|
||||
// Destroy() waits for this to reach 0 before freeing NVDEC surfaces,
|
||||
// preventing the use-after-free crash when LabVIEW stops a camera
|
||||
// while AI inference is still reading CUDA device pointers.
|
||||
std::atomic<int> _inFlightFrames{0};
|
||||
std::condition_variable_any _inFlightDone;
|
||||
public:
|
||||
void IncrementInFlight() { _inFlightFrames.fetch_add(1, std::memory_order_acq_rel); }
|
||||
void DecrementInFlight() {
|
||||
if (_inFlightFrames.fetch_sub(1, std::memory_order_acq_rel) <= 1) {
|
||||
_inFlightDone.notify_all();
|
||||
}
|
||||
}
|
||||
// Atomically check _isPlaying AND increment _inFlightFrames under the
|
||||
// same mutex. Returns true if the caller may proceed to access CUDA
|
||||
// resources (GetCudaHWFrame + D2D copy). Returns false if the player
|
||||
// is stopping/reconnecting — caller must NOT touch CUDA resources.
|
||||
//
|
||||
// This closes the race window where Reconnect() sets _isPlaying=false
|
||||
// and calls close() while GetRTSPCVImage is between GetCudaHWFrame()
|
||||
// and the D2D copy in gpu_frame_attach_cuda().
|
||||
bool TryIncrementInFlight() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (!_isPlaying) return false;
|
||||
_inFlightFrames.fetch_add(1, std::memory_order_acq_rel);
|
||||
return true;
|
||||
}
|
||||
ANSRTSPClient();
|
||||
~ANSRTSPClient() noexcept;
|
||||
[[nodiscard]] bool Init(std::string licenseKey, std::string url);
|
||||
|
||||
@@ -48,13 +48,19 @@ namespace ANSCENTER {
|
||||
Destroy();
|
||||
}
|
||||
void ANSSRTClient::Destroy() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (_playerClient) {
|
||||
if (_isPlaying) {
|
||||
_playerClient->stop();
|
||||
_isPlaying = false;
|
||||
decltype(_playerClient) clientToClose;
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (_playerClient) {
|
||||
if (_isPlaying) {
|
||||
_playerClient->stop();
|
||||
_isPlaying = false;
|
||||
}
|
||||
}
|
||||
_playerClient->close();
|
||||
clientToClose = std::move(_playerClient);
|
||||
}
|
||||
if (clientToClose) {
|
||||
clientToClose->close();
|
||||
}
|
||||
}
|
||||
static void VerifyGlobalANSSRTLicense(const std::string& licenseKey) {
|
||||
@@ -124,8 +130,12 @@ namespace ANSCENTER {
|
||||
}
|
||||
}
|
||||
bool ANSSRTClient::Reconnect() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
_isPlaying = false;
|
||||
}
|
||||
_playerClient->close();
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
Setup();
|
||||
_isPlaying = _playerClient->play();
|
||||
return _isPlaying;
|
||||
@@ -155,10 +165,16 @@ namespace ANSCENTER {
|
||||
}
|
||||
|
||||
bool ANSSRTClient::Stop() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (_isPlaying) {
|
||||
_playerClient->stop();
|
||||
_isPlaying = false;
|
||||
decltype(_playerClient.get()) player = nullptr;
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (_isPlaying) {
|
||||
_isPlaying = false;
|
||||
player = _playerClient.get();
|
||||
}
|
||||
}
|
||||
if (player) {
|
||||
player->stop();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -40,33 +40,34 @@ namespace ANSCENTER {
|
||||
catch (...) {}
|
||||
}
|
||||
void ANSVIDEOPLAYER::Destroy() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
try {
|
||||
// --- HW decode cleanup ---
|
||||
if (_hwPlayer) {
|
||||
try {
|
||||
_hwPlayer->stop();
|
||||
_hwPlayer->close();
|
||||
} catch (...) {}
|
||||
_hwPlayer.reset(); // releases CFilePlayer + HWDecoderPool slot
|
||||
}
|
||||
_hwDecodeActive = false;
|
||||
_hwGpuIndex = -1;
|
||||
_hwCudaAccel = false;
|
||||
_hwEOF = false;
|
||||
_hwFrameCount = 0;
|
||||
// Move HW player out of lock scope — close() does CUDA cleanup
|
||||
// (cuArrayDestroy/cuMemFree) which must not run under _mutex
|
||||
// to avoid deadlocking with nvcuda64 SRW lock held by inference.
|
||||
decltype(_hwPlayer) hwPlayerToClose;
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
try {
|
||||
if (_hwPlayer) {
|
||||
try { _hwPlayer->stop(); } catch (...) {}
|
||||
}
|
||||
hwPlayerToClose = std::move(_hwPlayer);
|
||||
_hwDecodeActive = false;
|
||||
_hwGpuIndex = -1;
|
||||
_hwCudaAccel = false;
|
||||
_hwEOF = false;
|
||||
_hwFrameCount = 0;
|
||||
|
||||
// --- cv::VideoCapture cleanup ---
|
||||
_previousImage.release();
|
||||
_inferenceImage.release();
|
||||
_inferenceCloneCurr.release();
|
||||
_inferenceClonePrev.release();
|
||||
_lastJpegImage = "";
|
||||
_isPlaying = false;
|
||||
_resWidth = 0;
|
||||
_resHeight = 0;
|
||||
_currentFrame = 0;
|
||||
_previousPTS = 0;
|
||||
// --- cv::VideoCapture cleanup ---
|
||||
_previousImage.release();
|
||||
_inferenceImage.release();
|
||||
_inferenceCloneCurr.release();
|
||||
_inferenceClonePrev.release();
|
||||
_lastJpegImage = "";
|
||||
_isPlaying = false;
|
||||
_resWidth = 0;
|
||||
_resHeight = 0;
|
||||
_currentFrame = 0;
|
||||
_previousPTS = 0;
|
||||
if (cap.isOpened()) {
|
||||
cap.release();
|
||||
}
|
||||
@@ -77,6 +78,13 @@ namespace ANSCENTER {
|
||||
catch (...) {
|
||||
_logger.LogError("ANSVIDEOPLAYER::Destroy.", "Unknown exception", __FILE__, __LINE__);
|
||||
}
|
||||
} // end lock scope
|
||||
|
||||
// CUDA cleanup happens here, outside the mutex
|
||||
if (hwPlayerToClose) {
|
||||
try { hwPlayerToClose->close(); } catch (...) {}
|
||||
hwPlayerToClose.reset();
|
||||
}
|
||||
}
|
||||
|
||||
static void VerifyGlobalANSVPLicense(const std::string& licenseKey) {
|
||||
@@ -187,15 +195,25 @@ namespace ANSCENTER {
|
||||
}
|
||||
|
||||
bool ANSVIDEOPLAYER::Reconnect() {
|
||||
// HW decoder close() does CUDA cleanup — run outside _mutex
|
||||
// to avoid deadlocking with nvcuda64 SRW lock held by inference.
|
||||
decltype(_hwPlayer) hwPlayerToClose;
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
_isPlaying = false; // GetImage() returns cached frame while we reconnect
|
||||
if (_hwPlayer) {
|
||||
try { _hwPlayer->stop(); } catch (...) {}
|
||||
hwPlayerToClose = std::move(_hwPlayer);
|
||||
}
|
||||
}
|
||||
if (hwPlayerToClose) {
|
||||
try { hwPlayerToClose->close(); } catch (...) {}
|
||||
hwPlayerToClose.reset();
|
||||
}
|
||||
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
try {
|
||||
_currentFrame = 0;
|
||||
|
||||
// --- HW decode: destroy and re-setup ---
|
||||
if (_hwPlayer) {
|
||||
try { _hwPlayer->stop(); _hwPlayer->close(); } catch (...) {}
|
||||
_hwPlayer.reset();
|
||||
}
|
||||
_hwDecodeActive = false;
|
||||
_hwGpuIndex = -1;
|
||||
_hwCudaAccel = false;
|
||||
@@ -266,41 +284,48 @@ namespace ANSCENTER {
|
||||
}
|
||||
}
|
||||
bool ANSVIDEOPLAYER::Stop() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
try {
|
||||
// --- HW decode path ---
|
||||
if (_hwDecodeActive && _hwPlayer) {
|
||||
_hwPlayer->stop();
|
||||
_isPlaying = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// --- cv::VideoCapture fallback ---
|
||||
if (cap.isOpened()) {
|
||||
try {
|
||||
double frame_pos = cap.get(cv::CAP_PROP_POS_FRAMES);
|
||||
if (frame_pos >= 0) {
|
||||
_currentFrame = static_cast<int64_t>(frame_pos);
|
||||
}
|
||||
else {
|
||||
_currentFrame = 0;
|
||||
this->_logger.LogError("ANSVIDEOPLAYER::Stop. Exception occurred:", "Unable to retrieve current frame position", __FILE__, __LINE__);
|
||||
}
|
||||
decltype(_hwPlayer.get()) hwPlayer = nullptr;
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
try {
|
||||
// --- HW decode path ---
|
||||
if (_hwDecodeActive && _hwPlayer) {
|
||||
_isPlaying = false;
|
||||
hwPlayer = _hwPlayer.get();
|
||||
// stop() called outside the lock below; skip cap path
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
this->_logger.LogError("ANSVIDEOPLAYER::Stop. Exception occurred:", e.what(), __FILE__, __LINE__);
|
||||
_currentFrame = 0;
|
||||
else {
|
||||
// --- cv::VideoCapture fallback ---
|
||||
if (cap.isOpened()) {
|
||||
try {
|
||||
double frame_pos = cap.get(cv::CAP_PROP_POS_FRAMES);
|
||||
if (frame_pos >= 0) {
|
||||
_currentFrame = static_cast<int64_t>(frame_pos);
|
||||
}
|
||||
else {
|
||||
_currentFrame = 0;
|
||||
this->_logger.LogError("ANSVIDEOPLAYER::Stop. Exception occurred:", "Unable to retrieve current frame position", __FILE__, __LINE__);
|
||||
}
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
this->_logger.LogError("ANSVIDEOPLAYER::Stop. Exception occurred:", e.what(), __FILE__, __LINE__);
|
||||
_currentFrame = 0;
|
||||
}
|
||||
cap.release();
|
||||
}
|
||||
_isPlaying = false;
|
||||
return true;
|
||||
}
|
||||
cap.release();
|
||||
}
|
||||
_isPlaying = false;
|
||||
return true;
|
||||
catch (const std::exception& e) {
|
||||
this->_logger.LogError("ANSVIDEOPLAYER::Stop. Exception occurred:", e.what(), __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
this->_logger.LogError("ANSVIDEOPLAYER::Stop. Exception occurred:", e.what(), __FILE__, __LINE__);
|
||||
return false;
|
||||
if (hwPlayer) {
|
||||
hwPlayer->stop();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
void ANSVIDEOPLAYER::SetBBox(cv::Rect bbox) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
|
||||
Reference in New Issue
Block a user