Disable NV12 path for ANSCV by default. Currenly use cv::Mat** directly

2026-04-04 10:09:47 +11:00
parent 445abefebe
commit 3a21026790
19 changed files with 575 additions and 232 deletions
--- a/modules/ANSODEngine/NV12PreprocessHelper.cpp
+++ b/modules/ANSODEngine/NV12PreprocessHelper.cpp
@@ -269,6 +269,15 @@ namespace ANSCENTER {
            return result;
        }

+        // Ensure async D2D copy (NVDEC → pool buffer) has completed before
+        // reading yPlane/uvPlane.  The copy was queued in gpu_frame_attach_cuda()
+        // on a non-blocking stream.  By the time inference runs (~50-200ms later),
+        // the copy (~0.3ms) has long finished, so this sync returns immediately.
+        if (gpuData->d2dCopyStream) {
+            cudaStreamSynchronize(static_cast<cudaStream_t>(gpuData->d2dCopyStream));
+            gpuData->d2dCopyStream = nullptr;  // Only sync once per frame
+        }
+
        const bool isCudaDevice = gpuData->isCudaDevicePtr;
        const bool gpuMatch = !isCudaDevice ||
                              gpuData->gpuIndex < 0 ||
@@ -367,7 +376,6 @@ namespace ANSCENTER {
        cv::cuda::GpuMat gpuY, gpuUV;

        if (useZeroCopy) {
-            // CUDA zero-copy: wrap pool buffer device pointers directly
            gpuY  = cv::cuda::GpuMat(frameH,     frameW, CV_8UC1,
                        effYPlane,  static_cast<size_t>(effYLinesize));
            gpuUV = cv::cuda::GpuMat(frameH / 2, frameW, CV_8UC1,
@@ -641,6 +649,12 @@ namespace ANSCENTER {
            return result;
        }

+        // Ensure async D2D copy has completed before reading NV12 buffers
+        if (gpuData->d2dCopyStream) {
+            cudaStreamSynchronize(static_cast<cudaStream_t>(gpuData->d2dCopyStream));
+            gpuData->d2dCopyStream = nullptr;
+        }
+
        const bool isCudaDevice = gpuData->isCudaDevicePtr;
        const bool gpuMatch = !isCudaDevice ||
                              gpuData->gpuIndex < 0 ||
@@ -775,6 +789,12 @@ namespace ANSCENTER {
        if (!gpuData->isCudaDevicePtr || !gpuData->yPlane || !gpuData->uvPlane)
            return result;  // NV12 not on GPU

+        // Ensure async D2D copy has completed before reading NV12 buffers
+        if (gpuData->d2dCopyStream) {
+            cudaStreamSynchronize(static_cast<cudaStream_t>(gpuData->d2dCopyStream));
+            gpuData->d2dCopyStream = nullptr;
+        }
+
        const int frameW = gpuData->width;
        const int frameH = gpuData->height;

@@ -890,6 +910,12 @@ namespace ANSCENTER {
            return result;
        }

+        // Ensure async D2D copy has completed before reading NV12 buffers
+        if (gpuData->d2dCopyStream) {
+            cudaStreamSynchronize(static_cast<cudaStream_t>(gpuData->d2dCopyStream));
+            gpuData->d2dCopyStream = nullptr;
+        }
+
        const bool isCudaDevice = gpuData->isCudaDevicePtr;
        const bool gpuMatch = !isCudaDevice ||
                              gpuData->gpuIndex < 0 ||