Disable NV12 path for ANSCV by default. Currenly use cv::Mat** directly

This commit is contained in:
2026-04-04 10:09:47 +11:00
parent 445abefebe
commit 3a21026790
19 changed files with 575 additions and 232 deletions

View File

@@ -132,6 +132,13 @@ struct GpuFrameData {
// freed while any consumer is still reading it.
GpuNV12Slot* poolSlot = nullptr;
// --- Async D2D copy stream ---
// The CUDA stream used for the async D2D copy from NVDEC surface to pool buffer.
// Inference MUST call cudaStreamSynchronize on this before reading yPlane/uvPlane
// to ensure the copy has completed. Stored as void* to avoid cuda_runtime.h here.
// nullptr means D2D was synchronous (legacy path) or no D2D copy was done.
void* d2dCopyStream = nullptr;
// Default constructor
GpuFrameData() = default;
@@ -151,6 +158,7 @@ struct GpuFrameData {
, refcount(o.refcount.load()), createdAt(o.createdAt)
, ownerClient(o.ownerClient), onReleaseFn(o.onReleaseFn)
, poolSlot(o.poolSlot)
, d2dCopyStream(o.d2dCopyStream)
{
// Null out source to prevent double-free of owned pointers
o.cpuYPlane = nullptr;
@@ -165,6 +173,7 @@ struct GpuFrameData {
o.ownerClient = nullptr;
o.onReleaseFn = nullptr;
o.poolSlot = nullptr;
o.d2dCopyStream = nullptr;
}
// No copy
@@ -360,6 +369,12 @@ public:
return result;
}
// Push an AVFrame* (as void*) for deferred freeing.
// Caller MUST hold the lock via acquire_lock().
void pushPendingFree_locked(void* ptr) {
if (ptr) m_pendingFree.push_back(ptr);
}
// --- Drain pending GPU device pointers for caller to cudaFree ---
// Each entry includes the device index for cudaSetDevice before cudaFree.
// If minAgeMs > 0, only drain entries older than minAgeMs milliseconds.

View File

@@ -97,7 +97,8 @@ struct GpuNV12Slot {
// first — causing 1-2 second stalls. Using a dedicated non-blocking
// stream avoids this implicit sync entirely.
// Stored as void* to avoid cuda_runtime.h in the header.
void* copyStream = nullptr; // cudaStream_t
void* copyStream = nullptr; // cudaStream_t
};
class GpuNV12SlotPool {
@@ -119,6 +120,7 @@ public:
// Returns nullptr if pool full — caller falls back to CPU path.
GpuNV12Slot* acquire(int gpuIdx, int w, int h);
// Deferred release: moves slot from ACTIVE → COOLING.
// Called from freeOwnedBuffers_locked() when GpuFrameData refcount → 0.
// The slot becomes FREE after SLOT_COOLDOWN_MS elapses (checked in acquire).