Disable NV12 path for ANSCV by default. Currenly use cv::Mat** directly
This commit is contained in:
@@ -23,6 +23,7 @@ GpuNV12SlotPool* GpuNV12SlotPool_GetInstance() {
|
||||
}
|
||||
|
||||
// Transition all COOLING slots past the cooldown threshold to FREE.
|
||||
// Collects pending AVFrames for the caller to av_frame_free.
|
||||
void GpuNV12SlotPool::drainCooledSlots_locked() {
|
||||
auto now = std::chrono::steady_clock::now();
|
||||
auto threshold = std::chrono::milliseconds(SLOT_COOLDOWN_MS);
|
||||
@@ -67,7 +68,7 @@ GpuNV12Slot* GpuNV12SlotPool::acquire(int gpuIdx, int w, int h) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Allocate CUDA buffers on the target GPU
|
||||
// Allocate CUDA buffers + stream + event on the target GPU
|
||||
int prevDev = -1;
|
||||
cudaGetDevice(&prevDev);
|
||||
if (gpuIdx >= 0) cudaSetDevice(gpuIdx);
|
||||
@@ -76,10 +77,7 @@ GpuNV12Slot* GpuNV12SlotPool::acquire(int gpuIdx, int w, int h) {
|
||||
cudaError_t e1 = cudaMallocPitch(&slot->bufY, &slot->pitchY, w, h);
|
||||
cudaError_t e2 = cudaMallocPitch(&slot->bufUV, &slot->pitchUV, w, h / 2);
|
||||
|
||||
// Non-blocking stream avoids NULL-stream implicit sync with inference.
|
||||
// On WDDM, the NULL stream must wait for ALL other streams to finish
|
||||
// before executing — this caused 1-2 second stalls when inference
|
||||
// kernels were running. A non-blocking stream runs independently.
|
||||
// Non-blocking stream: avoids NULL-stream implicit sync with inference.
|
||||
cudaStream_t stream = nullptr;
|
||||
cudaError_t e3 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
|
||||
|
||||
@@ -88,7 +86,6 @@ GpuNV12Slot* GpuNV12SlotPool::acquire(int gpuIdx, int w, int h) {
|
||||
if (e1 != cudaSuccess || e2 != cudaSuccess) {
|
||||
NV12POOL_DBG("acquire: cudaMallocPitch FAILED %dx%d gpu=%d e1=%d e2=%d",
|
||||
w, h, gpuIdx, (int)e1, (int)e2);
|
||||
// Clean up partial allocation
|
||||
int prev2 = -1; cudaGetDevice(&prev2);
|
||||
if (gpuIdx >= 0) cudaSetDevice(gpuIdx);
|
||||
if (e1 == cudaSuccess && slot->bufY) cudaFree(slot->bufY);
|
||||
@@ -107,21 +104,18 @@ GpuNV12Slot* GpuNV12SlotPool::acquire(int gpuIdx, int w, int h) {
|
||||
GpuNV12Slot* raw = slot.get();
|
||||
m_slots.push_back(std::move(slot));
|
||||
|
||||
// Always log new slot allocation to DebugView (rare event — once per resolution per camera).
|
||||
// Always log new slot allocation to DebugView (rare event).
|
||||
{
|
||||
char _buf[256];
|
||||
snprintf(_buf, sizeof(_buf),
|
||||
"[NV12Pool] NEW slot #%zu: %dx%d gpu=%d Y=%p UV=%p pitchY=%zu stream=%p\n",
|
||||
m_slots.size(), w, h, gpuIdx, raw->bufY, raw->bufUV, raw->pitchY, raw->copyStream);
|
||||
m_slots.size(), w, h, gpuIdx, raw->bufY, raw->bufUV, raw->pitchY,
|
||||
raw->copyStream);
|
||||
#ifdef _WIN32
|
||||
OutputDebugStringA(_buf);
|
||||
#endif
|
||||
fprintf(stderr, "%s", _buf);
|
||||
}
|
||||
|
||||
// Also log POOL FULL to DebugView (important diagnostic).
|
||||
NV12POOL_DBG("acquire: NEW slot Y=%p UV=%p pitchY=%zu pitchUV=%zu %dx%d gpu=%d stream=%p (total=%zu)",
|
||||
raw->bufY, raw->bufUV, raw->pitchY, raw->pitchUV,
|
||||
w, h, gpuIdx, raw->copyStream, m_slots.size());
|
||||
return raw;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user