Fix NV12 crash issue when recreate camera object

This commit is contained in:
2026-04-02 22:07:27 +11:00
parent 4bedf3a3a2
commit 958cab6ae3
25 changed files with 1459 additions and 393 deletions

View File

@@ -275,6 +275,26 @@ namespace ANSCENTER {
gpuData->gpuIndex == inferenceGpu;
const bool useZeroCopy = isCudaDevice && gpuMatch;
// --- Debug: log pointer state before reading ---
{
char _nv12_dbg[512];
snprintf(_nv12_dbg, sizeof(_nv12_dbg),
"[NV12Helper] tryNV12: gpuData=%p yPlane=%p uvPlane=%p isCuda=%d "
"gpuIdx=%d infGpu=%d gpuMatch=%d zeroCopy=%d "
"gpuCacheY=%p gpuCacheUV=%p gpuCacheValid=%d refcount=%d %dx%d\n",
(void*)gpuData, (void*)gpuData->yPlane, (void*)gpuData->uvPlane,
(int)isCudaDevice, gpuData->gpuIndex, inferenceGpu,
(int)gpuMatch, (int)useZeroCopy,
gpuData->gpuCacheY, gpuData->gpuCacheUV,
(int)gpuData->gpuCacheValid,
gpuData->refcount.load(),
frameW, frameH);
#ifdef _WIN32
OutputDebugStringA(_nv12_dbg);
#endif
fprintf(stderr, "%s", _nv12_dbg);
}
// Effective plane pointers — for zero-copy, use CUDA device ptrs;
// for CPU upload, use the CPU snapshot buffers.
uint8_t* effYPlane;
@@ -283,7 +303,7 @@ namespace ANSCENTER {
int effUvLinesize;
if (useZeroCopy) {
// Same GPU: wrap NVDEC device pointers directly
// Same GPU: wrap owned CUDA device pointers directly
effYPlane = gpuData->yPlane;
effUvPlane = gpuData->uvPlane;
effYLinesize = gpuData->yLinesize;
@@ -435,6 +455,18 @@ namespace ANSCENTER {
gpuResized.create(inputH, inputW, CV_8UC3);
cudaStream_t rawStream = cv::cuda::StreamAccessor::getStream(stream);
{
char _nv12_dbg2[256];
snprintf(_nv12_dbg2, sizeof(_nv12_dbg2),
"[NV12Helper] KERNEL LAUNCH: gpuY=%p(%dx%d) gpuUV=%p(%dx%d) -> %dx%d zeroCopy=%d\n",
(void*)gpuY.data, gpuY.cols, gpuY.rows,
(void*)gpuUV.data, gpuUV.cols, gpuUV.rows,
inputW, inputH, (int)useZeroCopy);
#ifdef _WIN32
OutputDebugStringA(_nv12_dbg2);
#endif
fprintf(stderr, "%s", _nv12_dbg2);
}
launcher(gpuY, gpuUV, gpuResized, frameW, frameH, inputW, inputH, rawStream);
stream.waitForCompletion();
@@ -945,7 +977,15 @@ namespace ANSCENTER {
inputW, inputH, frameW, frameH, stream);
}
cudaStreamSynchronize(stream);
// Use polling sync instead of cudaStreamSynchronize to avoid
// holding nvcuda64 SRW lock continuously (WDDM deadlock prevention).
{
cudaError_t err = cudaStreamQuery(stream);
while (err == cudaErrorNotReady) {
Sleep(0);
err = cudaStreamQuery(stream);
}
}
// (No registry lock to release — data kept alive by refcount)