Fix NV12 crash issue when recreate camera object
This commit is contained in:
@@ -12,7 +12,29 @@
|
|||||||
"Bash(grep -n \"struct Object\\\\|class Object\" /c/Projects/CLionProjects/ANSCORE/modules/ANSLPR/*.h /c/Projects/CLionProjects/ANSCORE/modules/ANSLPR/include/*.h)",
|
"Bash(grep -n \"struct Object\\\\|class Object\" /c/Projects/CLionProjects/ANSCORE/modules/ANSLPR/*.h /c/Projects/CLionProjects/ANSCORE/modules/ANSLPR/include/*.h)",
|
||||||
"Bash(grep -n \"cudaStream\\\\|cudaMalloc\\\\|cudaFree\\\\|queue\\\\|Task\\\\|mutex\" /c/Projects/CLionProjects/ANSCORE/engines/TensorRTAPI/include/engine/*.inl)",
|
"Bash(grep -n \"cudaStream\\\\|cudaMalloc\\\\|cudaFree\\\\|queue\\\\|Task\\\\|mutex\" /c/Projects/CLionProjects/ANSCORE/engines/TensorRTAPI/include/engine/*.inl)",
|
||||||
"Bash(grep -n \"~Engine\\\\|destructor\\\\|cleanup\\\\|~\" /c/Projects/CLionProjects/ANSCORE/engines/TensorRTAPI/include/engine/*.inl)",
|
"Bash(grep -n \"~Engine\\\\|destructor\\\\|cleanup\\\\|~\" /c/Projects/CLionProjects/ANSCORE/engines/TensorRTAPI/include/engine/*.inl)",
|
||||||
"Bash(grep -n \"for.*cudaFree\\\\|m_buffers\\\\[\" /c/Projects/CLionProjects/ANSCORE/engines/TensorRTAPI/include/engine/*.inl)"
|
"Bash(grep -n \"for.*cudaFree\\\\|m_buffers\\\\[\" /c/Projects/CLionProjects/ANSCORE/engines/TensorRTAPI/include/engine/*.inl)",
|
||||||
|
"Bash(find /c/Projects/CLionProjects/ANSCORE -name ANSGpuFrameRegistry* -type f)",
|
||||||
|
"Bash(ls -la /c/Projects/CLionProjects/ANSCORE/modules/ANSLPR/*.h)",
|
||||||
|
"Bash(\"C:\\\\Users\\\\nghia\\\\AppData\\\\Local\\\\Programs\\\\CLion 2026.1\\\\bin\\\\cmake\\\\win\\\\x64\\\\bin\\\\cmake.exe\" --build cmake-build-release --target all -j 30)",
|
||||||
|
"Bash(cmake --build build --target ANSLPR-UnitTest --config Release)",
|
||||||
|
"Bash(ls -d C:/Projects/CLionProjects/ANSCORE/cmake-build-*)",
|
||||||
|
"Bash(ls -d C:/Projects/CLionProjects/ANSCORE/out/*)",
|
||||||
|
"Bash(cmake --build C:/Projects/CLionProjects/ANSCORE/cmake-build-release --target ANSLPR-UnitTest --config Release)",
|
||||||
|
"Bash(cmake --build C:/Projects/CLionProjects/ANSCORE/cmake-build-release --target ANSLPR-UnitTest)",
|
||||||
|
"Bash('C:/Program Files/Microsoft Visual Studio/2022/Community/Common7/Tools/VsDevCmd.bat' -arch=amd64)",
|
||||||
|
"Bash(cmake -B C:/Projects/CLionProjects/ANSCORE/cmake-build-release -S C:/Projects/CLionProjects/ANSCORE -G Ninja -DCMAKE_BUILD_TYPE=Release)",
|
||||||
|
"Bash(cmd //C \"call \"\"C:\\\\Program Files\\\\Microsoft Visual Studio\\\\2022\\\\Community\\\\VC\\\\Auxiliary\\\\Build\\\\vcvarsall.bat\"\" amd64 >nul 2>&1 && cmake --build C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\\\\cmake-build-release --target ANSLPR-UnitTest\")",
|
||||||
|
"Bash(1 EOF cmd /C C:tmpbuild.bat)",
|
||||||
|
"Read(//tmp/**)",
|
||||||
|
"Bash(\"C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Auxiliary/Build/vcvarsall.bat\" amd64)",
|
||||||
|
"Bash(export INCLUDE=\"C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.44.35207/include;C:/Program Files \\(x86\\)/Windows Kits/10/Include/10.0.26100.0/ucrt;C:/Program Files \\(x86\\)/Windows Kits/10/Include/10.0.26100.0/um;C:/Program Files \\(x86\\)/Windows Kits/10/Include/10.0.26100.0/shared;C:/Program Files \\(x86\\)/Windows Kits/10/Include/10.0.26100.0/winrt;C:/Program Files \\(x86\\)/Windows Kits/10/Include/10.0.26100.0/cppwinrt\" export LIB=\"C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.44.35207/lib/x64;C:/Program Files \\(x86\\)/Windows Kits/10/Lib/10.0.26100.0/ucrt/x64;C:/Program Files \\(x86\\)/Windows Kits/10/Lib/10.0.26100.0/um/x64\" cmake --build \"C:/Projects/CLionProjects/ANSCORE/cmake-build-release\" --target ANSLPR-UnitTest)",
|
||||||
|
"Bash(tasklist)",
|
||||||
|
"Bash(taskkill /F /IM ANSLPR-UnitTest.exe)",
|
||||||
|
"Bash(export \"INCLUDE=C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.44.35207/include;C:/Program Files \\(x86\\)/Windows Kits/10/Include/10.0.26100.0/ucrt;C:/Program Files \\(x86\\)/Windows Kits/10/Include/10.0.26100.0/um;C:/Program Files \\(x86\\)/Windows Kits/10/Include/10.0.26100.0/shared;C:/Program Files \\(x86\\)/Windows Kits/10/Include/10.0.26100.0/winrt;C:/Program Files \\(x86\\)/Windows Kits/10/Include/10.0.26100.0/cppwinrt\")",
|
||||||
|
"Bash(export \"LIB=C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.44.35207/lib/x64;C:/Program Files \\(x86\\)/Windows Kits/10/Lib/10.0.26100.0/ucrt/x64;C:/Program Files \\(x86\\)/Windows Kits/10/Lib/10.0.26100.0/um/x64\")",
|
||||||
|
"Bash(grep -E \"\\\\.\\(cpp|h|hpp\\)$\")",
|
||||||
|
"Bash(find /c/Projects/CLionProjects/ANSCORE -name *Logger* -type f)",
|
||||||
|
"Bash(find /c/Projects/CLionProjects/ANSCORE -name *SPDLogger* -o -name *ANSLogger*)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -575,8 +575,13 @@ void CVideoDecoder::Start() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void CVideoDecoder::Stop() {
|
void CVideoDecoder::Stop() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// Atomically signal the decoder to stop WITHOUT acquiring _mutex.
|
||||||
m_bRunning = FALSE;
|
// decode() holds _mutex while inside avcodec_send_packet / CUDA calls
|
||||||
|
// that can block on the nvcuda64 SRW lock for a long time.
|
||||||
|
// If we waited for _mutex here, Stop() would deadlock whenever a
|
||||||
|
// concurrent decode() is stuck waiting for a CUDA operation held by
|
||||||
|
// an inference thread.
|
||||||
|
m_bRunning.store(FALSE, std::memory_order_release);
|
||||||
log_print(HT_LOG_INFO, "%s, Video decoder stopped\r\n", __FUNCTION__);
|
log_print(HT_LOG_INFO, "%s, Video decoder stopped\r\n", __FUNCTION__);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
#include "sys_inc.h"
|
#include "sys_inc.h"
|
||||||
#include "media_format.h"
|
#include "media_format.h"
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <atomic>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
extern "C"
|
extern "C"
|
||||||
@@ -152,7 +153,7 @@ private:
|
|||||||
int hwDecoderInit(AVCodecContext* ctx, int hwMode, int preferredGpu = -1);
|
int hwDecoderInit(AVCodecContext* ctx, int hwMode, int preferredGpu = -1);
|
||||||
private:
|
private:
|
||||||
BOOL m_bInited;
|
BOOL m_bInited;
|
||||||
BOOL m_bRunning;
|
std::atomic<BOOL> m_bRunning;
|
||||||
BOOL m_bHardwareDecoderEnabled; // Track if hardware decoder is enabled
|
BOOL m_bHardwareDecoderEnabled; // Track if hardware decoder is enabled
|
||||||
bool m_bCudaHWAccel; // true when using AV_HWDEVICE_TYPE_CUDA
|
bool m_bCudaHWAccel; // true when using AV_HWDEVICE_TYPE_CUDA
|
||||||
int m_hwGpuIndex; // GPU index assigned by HWDecoderPool (-1 = legacy)
|
int m_hwGpuIndex; // GPU index assigned by HWDecoderPool (-1 = legacy)
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
|
#include <semaphore>
|
||||||
#include "TRTCompat.h"
|
#include "TRTCompat.h"
|
||||||
|
|
||||||
// Per-device mutex for CUDA graph capture.
|
// Per-device mutex for CUDA graph capture.
|
||||||
@@ -15,6 +16,95 @@ static std::mutex& graphCaptureMutex() {
|
|||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// GPU INFERENCE THROTTLE
|
||||||
|
// ============================================================================
|
||||||
|
// Global counting semaphore that limits how many Engine instances can execute
|
||||||
|
// CUDA inference simultaneously. Without this, N separate Engine instances
|
||||||
|
// (one per camera) all submit GPU work at once, causing:
|
||||||
|
// 1. SM 100% saturation → each inference takes 5-10x longer
|
||||||
|
// 2. GPU thermal throttling at 85°C → further slowdown
|
||||||
|
// 3. cudaStreamSynchronize blocking indefinitely → system freeze
|
||||||
|
//
|
||||||
|
// Auto-computed from GPU VRAM:
|
||||||
|
// ≤ 4 GB → 2 concurrent 8 GB → 4 concurrent
|
||||||
|
// 6 GB → 3 concurrent 12+ GB → 6 concurrent
|
||||||
|
// Multi-GPU: sum across all GPUs
|
||||||
|
//
|
||||||
|
// Excess threads wait on CPU (nearly zero cost) while the bounded set
|
||||||
|
// runs efficiently on the GPU without thermal throttling.
|
||||||
|
static std::counting_semaphore<64>& gpuInferenceSemaphore() {
|
||||||
|
static int maxConcurrent = []() {
|
||||||
|
int totalSlots = 0;
|
||||||
|
int gpuCount = 0;
|
||||||
|
cudaGetDeviceCount(&gpuCount);
|
||||||
|
if (gpuCount <= 0) return 4; // fallback
|
||||||
|
|
||||||
|
for (int i = 0; i < gpuCount; ++i) {
|
||||||
|
size_t freeMem = 0, totalMem = 0;
|
||||||
|
cudaSetDevice(i);
|
||||||
|
cudaMemGetInfo(&freeMem, &totalMem);
|
||||||
|
int gbTotal = static_cast<int>(totalMem / (1024ULL * 1024ULL * 1024ULL));
|
||||||
|
|
||||||
|
// Scale concurrency with VRAM: ~1 slot per 2 GB, min 2, max 6 per GPU
|
||||||
|
int slotsThisGpu = std::clamp(gbTotal / 2, 2, 6);
|
||||||
|
totalSlots += slotsThisGpu;
|
||||||
|
}
|
||||||
|
|
||||||
|
totalSlots = std::clamp(totalSlots, 2, 64);
|
||||||
|
std::cout << "Info [GPU Throttle]: max concurrent inferences = "
|
||||||
|
<< totalSlots << " (across " << gpuCount << " GPU(s))" << std::endl;
|
||||||
|
return totalSlots;
|
||||||
|
}();
|
||||||
|
static std::counting_semaphore<64> sem(maxConcurrent);
|
||||||
|
return sem;
|
||||||
|
}
|
||||||
|
|
||||||
|
// RAII guard for the GPU inference semaphore
|
||||||
|
struct GpuInferenceGuard {
|
||||||
|
GpuInferenceGuard() { gpuInferenceSemaphore().acquire(); }
|
||||||
|
~GpuInferenceGuard() { gpuInferenceSemaphore().release(); }
|
||||||
|
GpuInferenceGuard(const GpuInferenceGuard&) = delete;
|
||||||
|
GpuInferenceGuard& operator=(const GpuInferenceGuard&) = delete;
|
||||||
|
};
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// WDDM-SAFE STREAM SYNCHRONIZATION
|
||||||
|
// ============================================================================
|
||||||
|
// Under Windows WDDM, cudaStreamSynchronize calls cuStreamQuery in a tight
|
||||||
|
// loop with SwitchToThread, holding nvcuda64's internal SRW lock the entire
|
||||||
|
// time. When the GPU is busy with inference, this spin blocks ALL other CUDA
|
||||||
|
// operations — including HW video decode (nvcuvid), cuMemAlloc, cuArrayDestroy.
|
||||||
|
// If a camera Reconnect or decode buffer allocation needs an exclusive SRW lock
|
||||||
|
// while inference is spinning, the entire system deadlocks.
|
||||||
|
//
|
||||||
|
// This function replaces cudaStreamSynchronize with a polling loop that
|
||||||
|
// explicitly releases the SRW lock between queries by sleeping briefly.
|
||||||
|
// This allows other CUDA operations to interleave with the sync wait.
|
||||||
|
static inline cudaError_t cudaStreamSynchronize_Safe(cudaStream_t stream) {
|
||||||
|
// Fast path: check if already done (no sleep overhead for quick kernels)
|
||||||
|
cudaError_t err = cudaStreamQuery(stream);
|
||||||
|
if (err != cudaErrorNotReady) return err;
|
||||||
|
|
||||||
|
// Short Sleep(0) fast path (~10 iterations) catches sub-ms kernel completions.
|
||||||
|
// Then switch to Sleep(1) to give cleanup operations (cuArrayDestroy, cuMemFree)
|
||||||
|
// a window to acquire the exclusive nvcuda64 SRW lock.
|
||||||
|
// Previously used 1000 Sleep(0) iterations which hogged the SRW lock and
|
||||||
|
// caused ~20-second stalls when concurrent cleanup needed exclusive access.
|
||||||
|
for (int i = 0; i < 10; ++i) {
|
||||||
|
Sleep(0);
|
||||||
|
err = cudaStreamQuery(stream);
|
||||||
|
if (err != cudaErrorNotReady) return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1ms sleeps — adds negligible latency at 30 FPS but prevents SRW lock starvation.
|
||||||
|
while (true) {
|
||||||
|
Sleep(1);
|
||||||
|
err = cudaStreamQuery(stream);
|
||||||
|
if (err != cudaErrorNotReady) return err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void Engine<T>::warmUp(int iterations) {
|
void Engine<T>::warmUp(int iterations) {
|
||||||
if (m_verbose) {
|
if (m_verbose) {
|
||||||
@@ -163,6 +253,16 @@ bool Engine<T>::runInference(const std::vector<std::vector<cv::cuda::GpuMat>>& i
|
|||||||
return runInferenceFromPool(inputs, featureVectors);
|
return runInferenceFromPool(inputs, featureVectors);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// GPU INFERENCE THROTTLE
|
||||||
|
// ============================================================================
|
||||||
|
// Limit how many Engine instances can run CUDA inference simultaneously.
|
||||||
|
// Without this, 12 cameras each with their own Engine all submit GPU work
|
||||||
|
// at once → SM 100% → thermal throttle → cudaStreamSynchronize hangs.
|
||||||
|
// The semaphore lets excess threads wait on CPU (nearly zero cost) while
|
||||||
|
// a bounded number use the GPU efficiently.
|
||||||
|
GpuInferenceGuard gpuThrottle;
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// SINGLE-ENGINE SERIALISATION
|
// SINGLE-ENGINE SERIALISATION
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
@@ -376,7 +476,7 @@ bool Engine<T>::runInference(const std::vector<std::vector<cv::cuda::GpuMat>>& i
|
|||||||
std::cout << "Error: Failed to set optimization profile 0" << std::endl;
|
std::cout << "Error: Failed to set optimization profile 0" << std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
cudaError_t syncErr = cudaStreamSynchronize(m_inferenceStream);
|
cudaError_t syncErr = cudaStreamSynchronize_Safe(m_inferenceStream);
|
||||||
if (syncErr != cudaSuccess) {
|
if (syncErr != cudaSuccess) {
|
||||||
std::cout << "Error: Failed to sync after profile change: "
|
std::cout << "Error: Failed to sync after profile change: "
|
||||||
<< cudaGetErrorString(syncErr) << std::endl;
|
<< cudaGetErrorString(syncErr) << std::endl;
|
||||||
@@ -642,7 +742,7 @@ bool Engine<T>::runInference(const std::vector<std::vector<cv::cuda::GpuMat>>& i
|
|||||||
if (graphExec) {
|
if (graphExec) {
|
||||||
// Launch the pre-captured graph (single API call replaces many).
|
// Launch the pre-captured graph (single API call replaces many).
|
||||||
cudaGraphLaunch(graphExec, m_inferenceStream);
|
cudaGraphLaunch(graphExec, m_inferenceStream);
|
||||||
cudaStreamSynchronize(m_inferenceStream);
|
cudaStreamSynchronize_Safe(m_inferenceStream);
|
||||||
|
|
||||||
// CPU memcpy: pinned buffers -> featureVectors (interleaved by batch).
|
// CPU memcpy: pinned buffers -> featureVectors (interleaved by batch).
|
||||||
for (int batch = 0; batch < batchSize; ++batch) {
|
for (int batch = 0; batch < batchSize; ++batch) {
|
||||||
@@ -705,7 +805,7 @@ bool Engine<T>::runInference(const std::vector<std::vector<cv::cuda::GpuMat>>& i
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaError_t syncErr = cudaStreamSynchronize(m_inferenceStream);
|
cudaError_t syncErr = cudaStreamSynchronize_Safe(m_inferenceStream);
|
||||||
if (syncErr != cudaSuccess) {
|
if (syncErr != cudaSuccess) {
|
||||||
std::string errMsg = "[Engine] runInference FAIL: cudaStreamSynchronize: "
|
std::string errMsg = "[Engine] runInference FAIL: cudaStreamSynchronize: "
|
||||||
+ std::string(cudaGetErrorString(syncErr));
|
+ std::string(cudaGetErrorString(syncErr));
|
||||||
|
|||||||
@@ -34,15 +34,40 @@
|
|||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
#include <cstdio>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <opencv2/core/mat.hpp>
|
#include <opencv2/core/mat.hpp>
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include <windows.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Debug logging for registry operations — both stderr and OutputDebugString.
|
||||||
|
#ifndef REG_DBG
|
||||||
|
#ifdef _WIN32
|
||||||
|
#define REG_DBG(fmt, ...) do { \
|
||||||
|
char _reg_buf[512]; \
|
||||||
|
snprintf(_reg_buf, sizeof(_reg_buf), "[Registry] " fmt "\n", ##__VA_ARGS__); \
|
||||||
|
OutputDebugStringA(_reg_buf); \
|
||||||
|
fprintf(stderr, "%s", _reg_buf); \
|
||||||
|
} while(0)
|
||||||
|
#else
|
||||||
|
#define REG_DBG(fmt, ...) fprintf(stderr, "[Registry] " fmt "\n", ##__VA_ARGS__)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
// Safety constants
|
// Safety constants
|
||||||
static constexpr int MAX_FRAME_REFCOUNT = 64;
|
static constexpr int MAX_FRAME_REFCOUNT = 64;
|
||||||
static constexpr int FRAME_TTL_SECONDS = 3;
|
static constexpr int FRAME_TTL_SECONDS = 3;
|
||||||
static constexpr size_t GPU_CACHE_BUDGET_DEFAULT = 1ULL * 1024 * 1024 * 1024; // 1GB
|
static constexpr size_t GPU_CACHE_BUDGET_DEFAULT = 1ULL * 1024 * 1024 * 1024; // 1GB
|
||||||
static constexpr int EVICT_CHECK_INTERVAL_MS = 500;
|
static constexpr int EVICT_CHECK_INTERVAL_MS = 500;
|
||||||
|
|
||||||
|
// Entry for deferred GPU memory deallocation (tracks device index for cudaSetDevice)
|
||||||
|
struct GpuPendingFreeEntry {
|
||||||
|
void* ptr = nullptr;
|
||||||
|
int deviceIdx = -1;
|
||||||
|
};
|
||||||
|
|
||||||
struct GpuFrameData {
|
struct GpuFrameData {
|
||||||
// --- CPU NV12 snapshot (OWNED malloc'd buffers, independent of decoder) ---
|
// --- CPU NV12 snapshot (OWNED malloc'd buffers, independent of decoder) ---
|
||||||
uint8_t* cpuYPlane = nullptr; // malloc'd Y plane copy
|
uint8_t* cpuYPlane = nullptr; // malloc'd Y plane copy
|
||||||
@@ -83,6 +108,14 @@ struct GpuFrameData {
|
|||||||
std::atomic<int> refcount{1};
|
std::atomic<int> refcount{1};
|
||||||
std::chrono::steady_clock::time_point createdAt;
|
std::chrono::steady_clock::time_point createdAt;
|
||||||
|
|
||||||
|
// --- Owner callback (for per-client inference guard) ---
|
||||||
|
// When the last reference to this frame drops, onReleaseFn is called
|
||||||
|
// with ownerClient to decrement the RTSP client's in-flight counter.
|
||||||
|
// This lets Destroy() wait for in-flight inference to finish before
|
||||||
|
// freeing NVDEC surfaces (fixes LabVIEW crash).
|
||||||
|
void* ownerClient = nullptr;
|
||||||
|
void (*onReleaseFn)(void*) = nullptr;
|
||||||
|
|
||||||
// Default constructor
|
// Default constructor
|
||||||
GpuFrameData() = default;
|
GpuFrameData() = default;
|
||||||
|
|
||||||
@@ -100,6 +133,7 @@ struct GpuFrameData {
|
|||||||
, yPlane(o.yPlane), uvPlane(o.uvPlane)
|
, yPlane(o.yPlane), uvPlane(o.uvPlane)
|
||||||
, yLinesize(o.yLinesize), uvLinesize(o.uvLinesize)
|
, yLinesize(o.yLinesize), uvLinesize(o.uvLinesize)
|
||||||
, refcount(o.refcount.load()), createdAt(o.createdAt)
|
, refcount(o.refcount.load()), createdAt(o.createdAt)
|
||||||
|
, ownerClient(o.ownerClient), onReleaseFn(o.onReleaseFn)
|
||||||
{
|
{
|
||||||
// Null out source to prevent double-free of owned pointers
|
// Null out source to prevent double-free of owned pointers
|
||||||
o.cpuYPlane = nullptr;
|
o.cpuYPlane = nullptr;
|
||||||
@@ -111,6 +145,8 @@ struct GpuFrameData {
|
|||||||
o.yPlane = nullptr;
|
o.yPlane = nullptr;
|
||||||
o.uvPlane = nullptr;
|
o.uvPlane = nullptr;
|
||||||
o.gpuCacheBytes = 0;
|
o.gpuCacheBytes = 0;
|
||||||
|
o.ownerClient = nullptr;
|
||||||
|
o.onReleaseFn = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// No copy
|
// No copy
|
||||||
@@ -140,32 +176,50 @@ public:
|
|||||||
if (!mat) return nullptr;
|
if (!mat) return nullptr;
|
||||||
void* oldAvframe = nullptr;
|
void* oldAvframe = nullptr;
|
||||||
|
|
||||||
|
// Capture old frame's owner callback to invoke OUTSIDE m_mutex
|
||||||
|
void* oldOwner = nullptr;
|
||||||
|
void (*oldReleaseFn)(void*) = nullptr;
|
||||||
|
|
||||||
data.createdAt = std::chrono::steady_clock::now();
|
data.createdAt = std::chrono::steady_clock::now();
|
||||||
data.refcount.store(1);
|
data.refcount.store(1);
|
||||||
|
|
||||||
auto* heapData = new GpuFrameData(std::move(data));
|
auto* heapData = new GpuFrameData(std::move(data));
|
||||||
|
REG_DBG("attach mat=%p new frame=%p yPlane=%p gpuCacheY=%p isCuda=%d %dx%d",
|
||||||
|
(void*)mat, (void*)heapData,
|
||||||
|
(void*)heapData->yPlane, heapData->gpuCacheY,
|
||||||
|
(int)heapData->isCudaDevicePtr,
|
||||||
|
heapData->width, heapData->height);
|
||||||
|
|
||||||
std::lock_guard<std::mutex> lock(m_mutex);
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(m_mutex);
|
||||||
|
|
||||||
// If this Mat* already has an entry, release the old one
|
// If this Mat* already has an entry, release the old one
|
||||||
auto it = m_map.find(mat);
|
auto it = m_map.find(mat);
|
||||||
if (it != m_map.end()) {
|
if (it != m_map.end()) {
|
||||||
auto* oldFrame = it->second;
|
auto* oldFrame = it->second;
|
||||||
int oldRef = oldFrame->refcount.fetch_sub(1);
|
int oldRef = oldFrame->refcount.fetch_sub(1);
|
||||||
if (oldRef <= 1) {
|
if (oldRef <= 1) {
|
||||||
oldAvframe = oldFrame->avframe;
|
oldOwner = oldFrame->ownerClient;
|
||||||
if (oldFrame->cpuAvframe)
|
oldReleaseFn = oldFrame->onReleaseFn;
|
||||||
m_pendingFree.push_back(oldFrame->cpuAvframe);
|
oldAvframe = oldFrame->avframe;
|
||||||
freeOwnedBuffers_locked(oldFrame);
|
if (oldFrame->cpuAvframe)
|
||||||
m_frameSet.erase(oldFrame);
|
m_pendingFree.push_back(oldFrame->cpuAvframe);
|
||||||
delete oldFrame;
|
freeOwnedBuffers_locked(oldFrame);
|
||||||
|
m_frameSet.erase(oldFrame);
|
||||||
|
delete oldFrame;
|
||||||
|
}
|
||||||
|
// If oldRef > 1, other clones still reference it — just unlink this Mat*
|
||||||
|
m_map.erase(it);
|
||||||
}
|
}
|
||||||
// If oldRef > 1, other clones still reference it — just unlink this Mat*
|
|
||||||
m_map.erase(it);
|
m_map[mat] = heapData;
|
||||||
|
m_frameSet.insert(heapData);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_map[mat] = heapData;
|
// Notify old frame's owner OUTSIDE m_mutex
|
||||||
m_frameSet.insert(heapData);
|
if (oldReleaseFn && oldOwner) {
|
||||||
|
oldReleaseFn(oldOwner);
|
||||||
|
}
|
||||||
|
|
||||||
return oldAvframe; // Caller must av_frame_free if non-null
|
return oldAvframe; // Caller must av_frame_free if non-null
|
||||||
}
|
}
|
||||||
@@ -197,24 +251,46 @@ public:
|
|||||||
void release(cv::Mat* mat) {
|
void release(cv::Mat* mat) {
|
||||||
if (!mat) return;
|
if (!mat) return;
|
||||||
|
|
||||||
std::lock_guard<std::mutex> lock(m_mutex);
|
// Capture owner callback to invoke OUTSIDE m_mutex (deadlock safety)
|
||||||
|
void* owner = nullptr;
|
||||||
|
void (*releaseFn)(void*) = nullptr;
|
||||||
|
|
||||||
auto it = m_map.find(mat);
|
{
|
||||||
if (it == m_map.end()) return;
|
std::lock_guard<std::mutex> lock(m_mutex);
|
||||||
|
|
||||||
auto* frame = it->second;
|
auto it = m_map.find(mat);
|
||||||
m_map.erase(it);
|
if (it == m_map.end()) return;
|
||||||
|
|
||||||
int oldRef = frame->refcount.fetch_sub(1);
|
auto* frame = it->second;
|
||||||
if (oldRef <= 1) {
|
m_map.erase(it);
|
||||||
// Last reference — free everything
|
|
||||||
if (frame->avframe)
|
int oldRef = frame->refcount.fetch_sub(1);
|
||||||
m_pendingFree.push_back(frame->avframe);
|
REG_DBG("release mat=%p refcount %d->%d yPlane=%p gpuCacheY=%p owner=%p",
|
||||||
if (frame->cpuAvframe)
|
(void*)mat, oldRef, oldRef - 1,
|
||||||
m_pendingFree.push_back(frame->cpuAvframe);
|
(void*)frame->yPlane, frame->gpuCacheY, frame->ownerClient);
|
||||||
freeOwnedBuffers_locked(frame);
|
if (oldRef <= 1) {
|
||||||
m_frameSet.erase(frame);
|
// Capture owner callback before deleting frame
|
||||||
delete frame;
|
owner = frame->ownerClient;
|
||||||
|
releaseFn = frame->onReleaseFn;
|
||||||
|
REG_DBG("LAST REF — freeing frame=%p cpuY=%p gpuCacheY=%p gpuCacheUV=%p bytes=%zu",
|
||||||
|
(void*)frame, (void*)frame->cpuYPlane,
|
||||||
|
frame->gpuCacheY, frame->gpuCacheUV, frame->gpuCacheBytes);
|
||||||
|
// Last reference — free everything
|
||||||
|
if (frame->avframe)
|
||||||
|
m_pendingFree.push_back(frame->avframe);
|
||||||
|
if (frame->cpuAvframe)
|
||||||
|
m_pendingFree.push_back(frame->cpuAvframe);
|
||||||
|
freeOwnedBuffers_locked(frame);
|
||||||
|
m_frameSet.erase(frame);
|
||||||
|
delete frame;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Notify owner OUTSIDE m_mutex — prevents lock-ordering deadlock
|
||||||
|
// with ANSRTSPClient::_mutex (used by Destroy's condition_variable wait)
|
||||||
|
if (releaseFn && owner) {
|
||||||
|
REG_DBG("calling onReleaseFn owner=%p", owner);
|
||||||
|
releaseFn(owner);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -267,9 +343,10 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// --- Drain pending GPU device pointers for caller to cudaFree ---
|
// --- Drain pending GPU device pointers for caller to cudaFree ---
|
||||||
std::vector<void*> drain_gpu_pending() {
|
// Each entry includes the device index for cudaSetDevice before cudaFree.
|
||||||
|
std::vector<GpuPendingFreeEntry> drain_gpu_pending() {
|
||||||
std::lock_guard<std::mutex> lock(m_mutex);
|
std::lock_guard<std::mutex> lock(m_mutex);
|
||||||
std::vector<void*> result;
|
std::vector<GpuPendingFreeEntry> result;
|
||||||
result.swap(m_pendingGpuFree);
|
result.swap(m_pendingGpuFree);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@@ -287,31 +364,46 @@ public:
|
|||||||
m_lastEvictCheck = now;
|
m_lastEvictCheck = now;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::lock_guard<std::mutex> lock(m_mutex);
|
// Collect owner callbacks to invoke OUTSIDE m_mutex
|
||||||
for (auto it = m_frameSet.begin(); it != m_frameSet.end(); ) {
|
struct OwnerCallback { void* client; void (*fn)(void*); };
|
||||||
auto* frame = *it;
|
std::vector<OwnerCallback> callbacks;
|
||||||
auto age_s = std::chrono::duration_cast<std::chrono::seconds>(
|
|
||||||
now - frame->createdAt).count();
|
{
|
||||||
if (age_s > FRAME_TTL_SECONDS && frame->refcount.load() > 0) {
|
std::lock_guard<std::mutex> lock(m_mutex);
|
||||||
// Force cleanup — remove all Mat* keys pointing to this frame
|
for (auto it = m_frameSet.begin(); it != m_frameSet.end(); ) {
|
||||||
for (auto jt = m_map.begin(); jt != m_map.end(); ) {
|
auto* frame = *it;
|
||||||
if (jt->second == frame)
|
auto age_s = std::chrono::duration_cast<std::chrono::seconds>(
|
||||||
jt = m_map.erase(jt);
|
now - frame->createdAt).count();
|
||||||
else
|
if (age_s > FRAME_TTL_SECONDS && frame->refcount.load() > 0) {
|
||||||
++jt;
|
// Capture owner callback before deleting
|
||||||
|
if (frame->onReleaseFn && frame->ownerClient) {
|
||||||
|
callbacks.push_back({frame->ownerClient, frame->onReleaseFn});
|
||||||
|
}
|
||||||
|
// Force cleanup — remove all Mat* keys pointing to this frame
|
||||||
|
for (auto jt = m_map.begin(); jt != m_map.end(); ) {
|
||||||
|
if (jt->second == frame)
|
||||||
|
jt = m_map.erase(jt);
|
||||||
|
else
|
||||||
|
++jt;
|
||||||
|
}
|
||||||
|
// Push avframes to pendingFree
|
||||||
|
if (frame->avframe)
|
||||||
|
m_pendingFree.push_back(frame->avframe);
|
||||||
|
if (frame->cpuAvframe)
|
||||||
|
m_pendingFree.push_back(frame->cpuAvframe);
|
||||||
|
freeOwnedBuffers_locked(frame);
|
||||||
|
it = m_frameSet.erase(it);
|
||||||
|
delete frame;
|
||||||
|
} else {
|
||||||
|
++it;
|
||||||
}
|
}
|
||||||
// Push avframes to pendingFree
|
|
||||||
if (frame->avframe)
|
|
||||||
m_pendingFree.push_back(frame->avframe);
|
|
||||||
if (frame->cpuAvframe)
|
|
||||||
m_pendingFree.push_back(frame->cpuAvframe);
|
|
||||||
freeOwnedBuffers_locked(frame);
|
|
||||||
it = m_frameSet.erase(it);
|
|
||||||
delete frame;
|
|
||||||
} else {
|
|
||||||
++it;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Notify owners OUTSIDE m_mutex
|
||||||
|
for (auto& cb : callbacks) {
|
||||||
|
cb.fn(cb.client);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- VRAM budget management ---
|
// --- VRAM budget management ---
|
||||||
@@ -340,6 +432,70 @@ public:
|
|||||||
void setGpuCacheBudget(size_t bytes) { m_gpuCacheBudget = bytes; }
|
void setGpuCacheBudget(size_t bytes) { m_gpuCacheBudget = bytes; }
|
||||||
size_t gpuCacheBudget() const { return m_gpuCacheBudget; }
|
size_t gpuCacheBudget() const { return m_gpuCacheBudget; }
|
||||||
|
|
||||||
|
// --- Invalidate owner: nullify all callbacks for a client being destroyed ---
|
||||||
|
// Called by Destroy() on timeout to prevent callbacks into a deleted object.
|
||||||
|
void invalidateOwner(void* client) {
|
||||||
|
if (!client) return;
|
||||||
|
std::lock_guard<std::mutex> lock(m_mutex);
|
||||||
|
for (auto* frame : m_frameSet) {
|
||||||
|
if (frame->ownerClient == client) {
|
||||||
|
frame->ownerClient = nullptr;
|
||||||
|
frame->onReleaseFn = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Force-release all frames owned by a client ---
|
||||||
|
// Called by Destroy() BEFORE close() to free GPU buffers while the CUDA
|
||||||
|
// context is still alive. Without this, unreleased clones (e.g. 70 cloned
|
||||||
|
// images held by LabVIEW AI tasks that haven't finished) keep gpuCacheY/UV
|
||||||
|
// allocated. When close() destroys the CUDA context, those buffers become
|
||||||
|
// orphaned and later cudaFree calls crash.
|
||||||
|
//
|
||||||
|
// This force-frees ALL owned buffers for frames belonging to this client,
|
||||||
|
// removes all Mat* keys pointing to them, and deletes the GpuFrameData.
|
||||||
|
// Returns the number of frames force-released.
|
||||||
|
int forceReleaseByOwner(void* client) {
|
||||||
|
if (!client) return 0;
|
||||||
|
int count = 0;
|
||||||
|
|
||||||
|
std::lock_guard<std::mutex> lock(m_mutex);
|
||||||
|
|
||||||
|
for (auto it = m_frameSet.begin(); it != m_frameSet.end(); ) {
|
||||||
|
auto* frame = *it;
|
||||||
|
if (frame->ownerClient == client) {
|
||||||
|
REG_DBG("forceReleaseByOwner: frame=%p refcount=%d gpuCacheY=%p gpuCacheUV=%p bytes=%zu",
|
||||||
|
(void*)frame, frame->refcount.load(),
|
||||||
|
frame->gpuCacheY, frame->gpuCacheUV, frame->gpuCacheBytes);
|
||||||
|
|
||||||
|
// Remove all Mat* keys pointing to this frame
|
||||||
|
for (auto jt = m_map.begin(); jt != m_map.end(); ) {
|
||||||
|
if (jt->second == frame)
|
||||||
|
jt = m_map.erase(jt);
|
||||||
|
else
|
||||||
|
++jt;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Free owned buffers (CPU + GPU pending)
|
||||||
|
if (frame->avframe)
|
||||||
|
m_pendingFree.push_back(frame->avframe);
|
||||||
|
if (frame->cpuAvframe)
|
||||||
|
m_pendingFree.push_back(frame->cpuAvframe);
|
||||||
|
freeOwnedBuffers_locked(frame);
|
||||||
|
it = m_frameSet.erase(it);
|
||||||
|
delete frame;
|
||||||
|
++count;
|
||||||
|
} else {
|
||||||
|
++it;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (count > 0) {
|
||||||
|
REG_DBG("forceReleaseByOwner: force-released %d frames for client=%p", count, client);
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ANSGpuFrameRegistry() = default;
|
ANSGpuFrameRegistry() = default;
|
||||||
|
|
||||||
@@ -350,6 +506,10 @@ private:
|
|||||||
// Free malloc'd CPU NV12 buffers and GPU cache (but NOT avframe/cpuAvframe —
|
// Free malloc'd CPU NV12 buffers and GPU cache (but NOT avframe/cpuAvframe —
|
||||||
// those go to pendingFree for the caller to av_frame_free).
|
// those go to pendingFree for the caller to av_frame_free).
|
||||||
void freeOwnedBuffers_locked(GpuFrameData* frame) {
|
void freeOwnedBuffers_locked(GpuFrameData* frame) {
|
||||||
|
REG_DBG("freeOwnedBuffers: frame=%p cpuY=%p cpuUV=%p gpuCacheY=%p gpuCacheUV=%p bytes=%zu dev=%d",
|
||||||
|
(void*)frame, (void*)frame->cpuYPlane, (void*)frame->cpuUvPlane,
|
||||||
|
frame->gpuCacheY, frame->gpuCacheUV,
|
||||||
|
frame->gpuCacheBytes, frame->gpuCacheDeviceIdx);
|
||||||
if (frame->cpuYPlane) {
|
if (frame->cpuYPlane) {
|
||||||
std::free(frame->cpuYPlane);
|
std::free(frame->cpuYPlane);
|
||||||
frame->cpuYPlane = nullptr;
|
frame->cpuYPlane = nullptr;
|
||||||
@@ -358,23 +518,17 @@ private:
|
|||||||
std::free(frame->cpuUvPlane);
|
std::free(frame->cpuUvPlane);
|
||||||
frame->cpuUvPlane = nullptr;
|
frame->cpuUvPlane = nullptr;
|
||||||
}
|
}
|
||||||
// GPU cache freed via CUDA — caller (ANSODEngine) must handle this
|
// GPU cache freed via CUDA — push to deferred list with device index
|
||||||
// since we can't call cudaFree from this FFmpeg-free header.
|
// so the caller (ANSGpuFrameOps.h) can cudaSetDevice + cudaFree.
|
||||||
// The gpuCacheBytes are tracked; actual deallocation happens in
|
|
||||||
// NV12PreprocessHelper or a GPU-aware cleanup path.
|
|
||||||
if (frame->gpuCacheBytes > 0) {
|
if (frame->gpuCacheBytes > 0) {
|
||||||
onGpuCacheFreed(frame->gpuCacheBytes);
|
onGpuCacheFreed(frame->gpuCacheBytes);
|
||||||
// Mark as invalid so no one reads stale pointers
|
|
||||||
frame->gpuCacheValid = false;
|
frame->gpuCacheValid = false;
|
||||||
frame->gpuCacheBytes = 0;
|
frame->gpuCacheBytes = 0;
|
||||||
// NOTE: gpuCacheY/gpuCacheUV device pointers are leaked here
|
int devIdx = frame->gpuCacheDeviceIdx;
|
||||||
// unless the caller handles GPU cleanup. This is addressed in
|
|
||||||
// Step 8 (NV12PreprocessHelper) where cudaFree is available.
|
|
||||||
// For now, push to a separate GPU-free list.
|
|
||||||
if (frame->gpuCacheY)
|
if (frame->gpuCacheY)
|
||||||
m_pendingGpuFree.push_back(frame->gpuCacheY);
|
m_pendingGpuFree.push_back({frame->gpuCacheY, devIdx});
|
||||||
if (frame->gpuCacheUV)
|
if (frame->gpuCacheUV)
|
||||||
m_pendingGpuFree.push_back(frame->gpuCacheUV);
|
m_pendingGpuFree.push_back({frame->gpuCacheUV, devIdx});
|
||||||
frame->gpuCacheY = nullptr;
|
frame->gpuCacheY = nullptr;
|
||||||
frame->gpuCacheUV = nullptr;
|
frame->gpuCacheUV = nullptr;
|
||||||
}
|
}
|
||||||
@@ -384,7 +538,7 @@ private:
|
|||||||
std::unordered_map<cv::Mat*, GpuFrameData*> m_map;
|
std::unordered_map<cv::Mat*, GpuFrameData*> m_map;
|
||||||
std::unordered_set<GpuFrameData*> m_frameSet; // All unique frames (for TTL scan)
|
std::unordered_set<GpuFrameData*> m_frameSet; // All unique frames (for TTL scan)
|
||||||
std::vector<void*> m_pendingFree; // AVFrame* pointers to av_frame_free
|
std::vector<void*> m_pendingFree; // AVFrame* pointers to av_frame_free
|
||||||
std::vector<void*> m_pendingGpuFree; // CUDA device pointers to cudaFree
|
std::vector<GpuPendingFreeEntry> m_pendingGpuFree; // CUDA device pointers to cudaFree
|
||||||
std::atomic<size_t> m_totalGpuCacheBytes{0};
|
std::atomic<size_t> m_totalGpuCacheBytes{0};
|
||||||
size_t m_gpuCacheBudget = GPU_CACHE_BUDGET_DEFAULT;
|
size_t m_gpuCacheBudget = GPU_CACHE_BUDGET_DEFAULT;
|
||||||
std::chrono::steady_clock::time_point m_lastEvictCheck;
|
std::chrono::steady_clock::time_point m_lastEvictCheck;
|
||||||
@@ -408,7 +562,7 @@ inline bool gpu_frame_addref(cv::Mat* src, cv::Mat* dst) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Drain GPU device pointers that need cudaFree.
|
// Drain GPU device pointers that need cudaFree.
|
||||||
// Caller must cudaFree each returned pointer.
|
// Caller must cudaSetDevice(entry.deviceIdx) + cudaFree(entry.ptr) for each.
|
||||||
inline std::vector<void*> gpu_frame_drain_gpu_pending() {
|
inline std::vector<GpuPendingFreeEntry> gpu_frame_drain_gpu_pending() {
|
||||||
return ANSGpuFrameRegistry::instance().drain_gpu_pending();
|
return ANSGpuFrameRegistry::instance().drain_gpu_pending();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -46,13 +46,22 @@ namespace ANSCENTER {
|
|||||||
Destroy();
|
Destroy();
|
||||||
}
|
}
|
||||||
void ANSFLVClient::Destroy() {
|
void ANSFLVClient::Destroy() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// Move player out of lock scope — close() does CUDA cleanup
|
||||||
if (_playerClient) {
|
// (cuArrayDestroy/cuMemFree) which must not run under _mutex
|
||||||
if (_isPlaying) {
|
// to avoid deadlocking with nvcuda64 SRW lock held by inference.
|
||||||
_playerClient->stop();
|
decltype(_playerClient) clientToClose;
|
||||||
_isPlaying = false;
|
{
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
|
if (_playerClient) {
|
||||||
|
if (_isPlaying) {
|
||||||
|
_playerClient->stop();
|
||||||
|
_isPlaying = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
_playerClient->close();
|
clientToClose = std::move(_playerClient);
|
||||||
|
}
|
||||||
|
if (clientToClose) {
|
||||||
|
clientToClose->close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
static void VerifyGlobalANSFLVLicense(const std::string& licenseKey) {
|
static void VerifyGlobalANSFLVLicense(const std::string& licenseKey) {
|
||||||
@@ -129,8 +138,12 @@ namespace ANSCENTER {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
bool ANSFLVClient::Reconnect() {
|
bool ANSFLVClient::Reconnect() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
{
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
|
_isPlaying = false;
|
||||||
|
}
|
||||||
_playerClient->close();
|
_playerClient->close();
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
Setup();
|
Setup();
|
||||||
_isPlaying = _playerClient->play();
|
_isPlaying = _playerClient->play();
|
||||||
return _isPlaying;
|
return _isPlaying;
|
||||||
@@ -143,10 +156,16 @@ namespace ANSCENTER {
|
|||||||
return _isPlaying;
|
return _isPlaying;
|
||||||
}
|
}
|
||||||
bool ANSFLVClient::Stop() {
|
bool ANSFLVClient::Stop() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
decltype(_playerClient.get()) player = nullptr;
|
||||||
if (_isPlaying) {
|
{
|
||||||
_playerClient->stop();
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
_isPlaying = false;
|
if (_isPlaying) {
|
||||||
|
_isPlaying = false;
|
||||||
|
player = _playerClient.get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (player) {
|
||||||
|
player->stop();
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -39,22 +39,26 @@ namespace ANSCENTER {
|
|||||||
catch (...) {}
|
catch (...) {}
|
||||||
}
|
}
|
||||||
void ANSFILEPLAYER::Destroy() {
|
void ANSFILEPLAYER::Destroy() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
decltype(_playerClient) clientToClose;
|
||||||
try {
|
{
|
||||||
_url = "";
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
_imageRotateDeg = 0;
|
try {
|
||||||
_isPlaying = false;
|
_url = "";
|
||||||
_lastJpegImage = "";
|
_imageRotateDeg = 0;
|
||||||
_pLastFrame.release();
|
_isPlaying = false;
|
||||||
if (_playerClient) {
|
_lastJpegImage = "";
|
||||||
_playerClient->close();
|
_pLastFrame.release();
|
||||||
|
clientToClose = std::move(_playerClient);
|
||||||
|
}
|
||||||
|
catch (const std::exception& e) {
|
||||||
|
_logger.LogError("ANSFILEPLAYER::Destroy. Exception:", e.what(), __FILE__, __LINE__);
|
||||||
|
}
|
||||||
|
catch (...) {
|
||||||
|
_logger.LogError("ANSFILEPLAYER::Destroy.", "Unknown exception", __FILE__, __LINE__);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (const std::exception& e) {
|
if (clientToClose) {
|
||||||
_logger.LogError("ANSFILEPLAYER::Destroy. Exception:", e.what(), __FILE__, __LINE__);
|
clientToClose->close();
|
||||||
}
|
|
||||||
catch (...) {
|
|
||||||
_logger.LogError("ANSFILEPLAYER::Destroy.", "Unknown exception", __FILE__, __LINE__);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void ANSFILEPLAYER::CheckLicense() {
|
void ANSFILEPLAYER::CheckLicense() {
|
||||||
@@ -94,8 +98,12 @@ namespace ANSCENTER {
|
|||||||
return _playerClient->open(_url);
|
return _playerClient->open(_url);
|
||||||
}
|
}
|
||||||
bool ANSFILEPLAYER::Reconnect() {
|
bool ANSFILEPLAYER::Reconnect() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
{
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
|
_isPlaying = false;
|
||||||
|
}
|
||||||
_playerClient->close();
|
_playerClient->close();
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
Setup();
|
Setup();
|
||||||
return Start();
|
return Start();
|
||||||
}
|
}
|
||||||
@@ -105,14 +113,17 @@ namespace ANSCENTER {
|
|||||||
return _isPlaying;
|
return _isPlaying;
|
||||||
}
|
}
|
||||||
bool ANSFILEPLAYER::Stop() {
|
bool ANSFILEPLAYER::Stop() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
decltype(_playerClient.get()) player = nullptr;
|
||||||
if (_playerClient->pause()) {
|
{
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
|
player = _playerClient.get();
|
||||||
|
}
|
||||||
|
if (player && player->pause()) {
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
_isPlaying = false;
|
_isPlaying = false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
else {
|
return false;
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
bool ANSFILEPLAYER::IsPaused() {
|
bool ANSFILEPLAYER::IsPaused() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
|
|||||||
@@ -19,8 +19,31 @@ extern "C" {
|
|||||||
#include "libavutil/frame.h"
|
#include "libavutil/frame.h"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include <windows.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Debug logging macro for GPU frame operations.
|
||||||
|
// Output goes to stderr (console) AND OutputDebugString (DebugView / VS debugger).
|
||||||
|
// Use Sysinternals DebugView (dbgview64.exe) to capture these after a crash.
|
||||||
|
#ifndef GPU_FRAME_DBG
|
||||||
|
#ifdef _WIN32
|
||||||
|
#define GPU_FRAME_DBG(fmt, ...) do { \
|
||||||
|
char _gpu_dbg_buf[512]; \
|
||||||
|
snprintf(_gpu_dbg_buf, sizeof(_gpu_dbg_buf), "[GpuFrameOps] " fmt "\n", ##__VA_ARGS__); \
|
||||||
|
OutputDebugStringA(_gpu_dbg_buf); \
|
||||||
|
fprintf(stderr, "%s", _gpu_dbg_buf); \
|
||||||
|
} while(0)
|
||||||
|
#else
|
||||||
|
#define GPU_FRAME_DBG(fmt, ...) \
|
||||||
|
fprintf(stderr, "[GpuFrameOps] " fmt "\n", ##__VA_ARGS__)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace anscv_gpu_ops {
|
namespace anscv_gpu_ops {
|
||||||
namespace detail {
|
namespace detail {
|
||||||
@@ -71,6 +94,42 @@ inline bool snapshotNV12Planes(const AVFrame* nv12,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Drain pending GPU device pointers and actually cudaFree them.
|
||||||
|
// Must be called from a thread with CUDA context available.
|
||||||
|
inline void drainAndFreeGpuPending() {
|
||||||
|
auto gpuPending = ANSGpuFrameRegistry::instance().drain_gpu_pending();
|
||||||
|
if (gpuPending.empty()) return;
|
||||||
|
GPU_FRAME_DBG("drainGpuPending: freeing %zu GPU ptrs", gpuPending.size());
|
||||||
|
int prevDev = -1;
|
||||||
|
cudaGetDevice(&prevDev);
|
||||||
|
|
||||||
|
// Group by device to minimize cudaSetDevice calls and synchronize once per device.
|
||||||
|
// cudaDeviceSynchronize() is CRITICAL: NV12 kernels run on cv::cuda::Stream
|
||||||
|
// (not the default stream). cudaFree on stream 0 doesn't wait for other
|
||||||
|
// streams, so without this sync, cudaFree can free a buffer while a kernel
|
||||||
|
// on another stream is still reading from it → cudaErrorIllegalAddress (700)
|
||||||
|
// which permanently corrupts the CUDA context.
|
||||||
|
int lastSyncDev = -1;
|
||||||
|
for (auto& entry : gpuPending) {
|
||||||
|
if (entry.ptr) {
|
||||||
|
if (entry.deviceIdx >= 0)
|
||||||
|
cudaSetDevice(entry.deviceIdx);
|
||||||
|
if (entry.deviceIdx != lastSyncDev) {
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
lastSyncDev = entry.deviceIdx;
|
||||||
|
}
|
||||||
|
GPU_FRAME_DBG("drainGpuPending: cudaFree(%p) dev=%d", entry.ptr, entry.deviceIdx);
|
||||||
|
cudaError_t err = cudaFree(entry.ptr);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
GPU_FRAME_DBG("drainGpuPending: cudaFree FAILED err=%d (%s)",
|
||||||
|
(int)err, cudaGetErrorString(err));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (prevDev >= 0)
|
||||||
|
cudaSetDevice(prevDev);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
} // namespace anscv_gpu_ops
|
} // namespace anscv_gpu_ops
|
||||||
|
|
||||||
@@ -117,36 +176,44 @@ inline void gpu_frame_attach(cv::Mat* mat, AVFrame* nv12, int gpuIdx, int64_t pt
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Attach CUDA HW frame — keeps CUDA device pointers for zero-copy inference.
|
// Attach CUDA HW frame — copies NV12 from NVDEC surfaces to owned GPU memory.
|
||||||
// TAKES OWNERSHIP of cudaFrame AND cpuNV12 — caller must NOT av_frame_free after.
|
// TAKES OWNERSHIP of cudaFrame AND cpuNV12 — caller must NOT av_frame_free after.
|
||||||
//
|
//
|
||||||
// Primary path: yPlane/uvPlane point to CUDA device pointers from the cloned
|
// D2D copy path: cudaMemcpy2D from NVDEC surfaces to cudaMalloc'd buffers on the
|
||||||
// AVFrame (data[0]/data[1]). The cloned AVFrame keeps the NVDEC surface alive
|
// same GPU. This decouples the NV12 data lifetime from the NVDEC decoder, so
|
||||||
// until gpu_frame_remove() is called after inference. With 4 cameras each
|
// player->close() can safely destroy the decoder at any time without invalidating
|
||||||
// holding ~1 surface, this uses 4 of NVDEC's 25-32 surface pool — safe.
|
// pointers that inference engines may be reading. The NVDEC surface is freed
|
||||||
|
// immediately (av_frame_free), returning it to the decoder's surface pool.
|
||||||
|
//
|
||||||
|
// The owned GPU pointers are stored as both yPlane/uvPlane (for zero-copy reads)
|
||||||
|
// and gpuCacheY/gpuCacheUV (for lifecycle management / cudaFree on cleanup).
|
||||||
|
//
|
||||||
|
// VRAM budget: if the global GPU cache budget is exceeded, falls back to CPU-only
|
||||||
|
// NV12 snapshot (no zero-copy, but safe).
|
||||||
//
|
//
|
||||||
// Fallback: cpuYPlane/cpuUvPlane hold CPU-side NV12 snapshot for cross-GPU
|
// Fallback: cpuYPlane/cpuUvPlane hold CPU-side NV12 snapshot for cross-GPU
|
||||||
// inference (when decode GPU != inference GPU, CUDA device ptrs aren't
|
// inference (when decode GPU != inference GPU).
|
||||||
// accessible from another GPU context).
|
|
||||||
inline void gpu_frame_attach_cuda(cv::Mat* mat, AVFrame* cudaFrame, int gpuIdx, int64_t pts,
|
inline void gpu_frame_attach_cuda(cv::Mat* mat, AVFrame* cudaFrame, int gpuIdx, int64_t pts,
|
||||||
AVFrame* cpuNV12 = nullptr) {
|
AVFrame* cpuNV12 = nullptr) {
|
||||||
if (!mat || !cudaFrame) return;
|
if (!mat || !cudaFrame) {
|
||||||
|
GPU_FRAME_DBG("attach_cuda: SKIP mat=%p cudaFrame=%p", (void*)mat, (void*)cudaFrame);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int w = cudaFrame->width;
|
||||||
|
const int h = cudaFrame->height;
|
||||||
|
GPU_FRAME_DBG("attach_cuda: START mat=%p %dx%d gpu=%d nvdecY=%p nvdecUV=%p cpuNV12=%p",
|
||||||
|
(void*)mat, w, h, gpuIdx,
|
||||||
|
(void*)cudaFrame->data[0], (void*)cudaFrame->data[1], (void*)cpuNV12);
|
||||||
|
|
||||||
GpuFrameData data{};
|
GpuFrameData data{};
|
||||||
data.gpuIndex = gpuIdx;
|
data.gpuIndex = gpuIdx;
|
||||||
data.pts = pts;
|
data.pts = pts;
|
||||||
data.width = cudaFrame->width;
|
data.width = w;
|
||||||
data.height = cudaFrame->height;
|
data.height = h;
|
||||||
data.pixelFormat = 23; // AV_PIX_FMT_NV12 — the underlying sw_format
|
data.pixelFormat = 23; // AV_PIX_FMT_NV12
|
||||||
|
|
||||||
// Primary: CUDA device pointers from NVDEC (zero-copy on same GPU)
|
// Snapshot CPU NV12 for cross-GPU fallback (must do before freeing cpuNV12)
|
||||||
data.isCudaDevicePtr = true;
|
|
||||||
data.yPlane = cudaFrame->data[0]; // CUDA device ptr: Y plane
|
|
||||||
data.uvPlane = cudaFrame->data[1]; // CUDA device ptr: UV plane
|
|
||||||
data.yLinesize = cudaFrame->linesize[0];
|
|
||||||
data.uvLinesize = cudaFrame->linesize[1];
|
|
||||||
|
|
||||||
// Fallback: snapshot CPU NV12 for cross-GPU inference
|
|
||||||
if (cpuNV12) {
|
if (cpuNV12) {
|
||||||
anscv_gpu_ops::detail::snapshotNV12Planes(
|
anscv_gpu_ops::detail::snapshotNV12Planes(
|
||||||
cpuNV12,
|
cpuNV12,
|
||||||
@@ -155,9 +222,98 @@ inline void gpu_frame_attach_cuda(cv::Mat* mat, AVFrame* cudaFrame, int gpuIdx,
|
|||||||
data.width, data.height);
|
data.width, data.height);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store AVFrames for cleanup (cudaFrame keeps NVDEC surface alive)
|
// --- D2D copy: NVDEC surface → owned GPU memory ---
|
||||||
data.avframe = cudaFrame;
|
// Estimate VRAM needed for the owned NV12 copy
|
||||||
data.cpuAvframe = cpuNV12;
|
const size_t yBytes = static_cast<size_t>(w) * h;
|
||||||
|
const size_t uvBytes = static_cast<size_t>(w) * (h / 2);
|
||||||
|
const size_t totalBytes = yBytes + uvBytes;
|
||||||
|
|
||||||
|
bool d2dOk = false;
|
||||||
|
if (ANSGpuFrameRegistry::instance().canAllocateGpuCache(totalBytes)) {
|
||||||
|
int prevDev = -1;
|
||||||
|
cudaGetDevice(&prevDev);
|
||||||
|
if (gpuIdx >= 0)
|
||||||
|
cudaSetDevice(gpuIdx);
|
||||||
|
|
||||||
|
void* ownedY = nullptr;
|
||||||
|
void* ownedUV = nullptr;
|
||||||
|
size_t yPitch = 0;
|
||||||
|
size_t uvPitch = 0;
|
||||||
|
|
||||||
|
cudaError_t e1 = cudaMallocPitch(&ownedY, &yPitch, w, h);
|
||||||
|
cudaError_t e2 = cudaMallocPitch(&ownedUV, &uvPitch, w, h / 2);
|
||||||
|
|
||||||
|
if (e1 == cudaSuccess && e2 == cudaSuccess) {
|
||||||
|
cudaError_t e3 = cudaMemcpy2D(ownedY, yPitch,
|
||||||
|
cudaFrame->data[0], cudaFrame->linesize[0],
|
||||||
|
w, h, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaError_t e4 = cudaMemcpy2D(ownedUV, uvPitch,
|
||||||
|
cudaFrame->data[1], cudaFrame->linesize[1],
|
||||||
|
w, h / 2, cudaMemcpyDeviceToDevice);
|
||||||
|
|
||||||
|
if (e3 == cudaSuccess && e4 == cudaSuccess) {
|
||||||
|
// Store owned GPU pointers as primary NV12 source
|
||||||
|
data.isCudaDevicePtr = true;
|
||||||
|
data.yPlane = static_cast<uint8_t*>(ownedY);
|
||||||
|
data.uvPlane = static_cast<uint8_t*>(ownedUV);
|
||||||
|
data.yLinesize = static_cast<int>(yPitch);
|
||||||
|
data.uvLinesize = static_cast<int>(uvPitch);
|
||||||
|
|
||||||
|
// Track in gpuCache for lifecycle management (cudaFree on cleanup)
|
||||||
|
data.gpuCacheY = ownedY;
|
||||||
|
data.gpuCacheUV = ownedUV;
|
||||||
|
data.gpuCacheYPitch = yPitch;
|
||||||
|
data.gpuCacheUVPitch = uvPitch;
|
||||||
|
data.gpuCacheDeviceIdx = gpuIdx;
|
||||||
|
data.gpuCacheValid = true;
|
||||||
|
data.gpuCacheBytes = yPitch * h + uvPitch * (h / 2);
|
||||||
|
|
||||||
|
ANSGpuFrameRegistry::instance().onGpuCacheCreated(data.gpuCacheBytes);
|
||||||
|
d2dOk = true;
|
||||||
|
GPU_FRAME_DBG("attach_cuda: D2D OK ownedY=%p ownedUV=%p yPitch=%zu uvPitch=%zu bytes=%zu",
|
||||||
|
ownedY, ownedUV, yPitch, uvPitch, data.gpuCacheBytes);
|
||||||
|
} else {
|
||||||
|
// D2D copy failed — free allocated memory and fall back
|
||||||
|
GPU_FRAME_DBG("attach_cuda: D2D COPY FAILED e3=%d e4=%d — fallback CPU",
|
||||||
|
(int)e3, (int)e4);
|
||||||
|
cudaFree(ownedY);
|
||||||
|
cudaFree(ownedUV);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Allocation failed — free any partial allocation and fall back
|
||||||
|
GPU_FRAME_DBG("attach_cuda: cudaMallocPitch FAILED e1=%d e2=%d — fallback CPU",
|
||||||
|
(int)e1, (int)e2);
|
||||||
|
if (e1 == cudaSuccess) cudaFree(ownedY);
|
||||||
|
if (e2 == cudaSuccess) cudaFree(ownedUV);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (prevDev >= 0)
|
||||||
|
cudaSetDevice(prevDev);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!d2dOk) {
|
||||||
|
// Fall back to CPU NV12 snapshot only (no zero-copy)
|
||||||
|
GPU_FRAME_DBG("attach_cuda: FALLBACK CPU-only cpuY=%p cpuUV=%p",
|
||||||
|
(void*)data.cpuYPlane, (void*)data.cpuUvPlane);
|
||||||
|
data.isCudaDevicePtr = false;
|
||||||
|
data.yPlane = data.cpuYPlane;
|
||||||
|
data.uvPlane = data.cpuUvPlane;
|
||||||
|
data.yLinesize = data.cpuYLinesize;
|
||||||
|
data.uvLinesize = data.cpuUvLinesize;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Release AVFrames immediately — NVDEC surfaces returned to pool.
|
||||||
|
// No longer stored in GpuFrameData (owned GPU copy is independent).
|
||||||
|
GPU_FRAME_DBG("attach_cuda: freeing AVFrames cudaFrame=%p cpuNV12=%p",
|
||||||
|
(void*)cudaFrame, (void*)cpuNV12);
|
||||||
|
av_frame_free(&cudaFrame);
|
||||||
|
if (cpuNV12) av_frame_free(&cpuNV12);
|
||||||
|
data.avframe = nullptr;
|
||||||
|
data.cpuAvframe = nullptr;
|
||||||
|
|
||||||
|
GPU_FRAME_DBG("attach_cuda: FINAL yPlane=%p uvPlane=%p isCuda=%d gpuCacheY=%p gpuCacheUV=%p",
|
||||||
|
(void*)data.yPlane, (void*)data.uvPlane, (int)data.isCudaDevicePtr,
|
||||||
|
data.gpuCacheY, data.gpuCacheUV);
|
||||||
|
|
||||||
void* old = ANSGpuFrameRegistry::instance().attach(mat, std::move(data));
|
void* old = ANSGpuFrameRegistry::instance().attach(mat, std::move(data));
|
||||||
if (old) {
|
if (old) {
|
||||||
@@ -165,17 +321,23 @@ inline void gpu_frame_attach_cuda(cv::Mat* mat, AVFrame* cudaFrame, int gpuIdx,
|
|||||||
av_frame_free(&oldFrame);
|
av_frame_free(&oldFrame);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Free stale AVFrames evicted by TTL or previous attach
|
||||||
auto pending = ANSGpuFrameRegistry::instance().drain_pending();
|
auto pending = ANSGpuFrameRegistry::instance().drain_pending();
|
||||||
for (void* p : pending) {
|
for (void* p : pending) {
|
||||||
AVFrame* stale = static_cast<AVFrame*>(p);
|
AVFrame* stale = static_cast<AVFrame*>(p);
|
||||||
av_frame_free(&stale);
|
av_frame_free(&stale);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Free stale GPU device pointers
|
||||||
|
anscv_gpu_ops::detail::drainAndFreeGpuPending();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Release entry by cv::Mat* and free any returned AVFrames. Safe if not in map (no-op).
|
// Release entry by cv::Mat* and free any returned AVFrames + GPU pointers.
|
||||||
|
// Safe if not in map (no-op).
|
||||||
inline void gpu_frame_remove(cv::Mat* mat) {
|
inline void gpu_frame_remove(cv::Mat* mat) {
|
||||||
if (!mat) return;
|
if (!mat) return;
|
||||||
|
|
||||||
|
GPU_FRAME_DBG("gpu_frame_remove: mat=%p", (void*)mat);
|
||||||
ANSGpuFrameRegistry::instance().release(mat);
|
ANSGpuFrameRegistry::instance().release(mat);
|
||||||
|
|
||||||
// Free any AVFrames that became pending from this release or prior eviction
|
// Free any AVFrames that became pending from this release or prior eviction
|
||||||
@@ -186,13 +348,7 @@ inline void gpu_frame_remove(cv::Mat* mat) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Free any GPU device pointers that became pending
|
// Free any GPU device pointers that became pending
|
||||||
auto gpuPending = gpu_frame_drain_gpu_pending();
|
anscv_gpu_ops::detail::drainAndFreeGpuPending();
|
||||||
// NOTE: cudaFree requires CUDA context — caller must be on a CUDA-capable thread.
|
|
||||||
// If not, these will leak. In practice, gpu_frame_remove is called from ANSCV
|
|
||||||
// camera threads which do have CUDA context.
|
|
||||||
// For safety, we skip cudaFree here and let NV12PreprocessHelper handle it.
|
|
||||||
// The GPU pointers are tracked in the budget and will be accounted for.
|
|
||||||
(void)gpuPending;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Alias for remove — used in ANSCV mutating functions to drop stale GPU data.
|
// Alias for remove — used in ANSCV mutating functions to drop stale GPU data.
|
||||||
@@ -209,4 +365,7 @@ inline void gpu_frame_evict_stale() {
|
|||||||
AVFrame* stale = static_cast<AVFrame*>(p);
|
AVFrame* stale = static_cast<AVFrame*>(p);
|
||||||
av_frame_free(&stale);
|
av_frame_free(&stale);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Free any GPU device pointers from evicted frames
|
||||||
|
anscv_gpu_ops::detail::drainAndFreeGpuPending();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -46,13 +46,19 @@ namespace ANSCENTER {
|
|||||||
Destroy();
|
Destroy();
|
||||||
}
|
}
|
||||||
void ANSMJPEGClient::Destroy() {
|
void ANSMJPEGClient::Destroy() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
decltype(_playerClient) clientToClose;
|
||||||
if (_playerClient) {
|
{
|
||||||
if (_isPlaying) {
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
_playerClient->stop();
|
if (_playerClient) {
|
||||||
_isPlaying = false;
|
if (_isPlaying) {
|
||||||
|
_playerClient->stop();
|
||||||
|
_isPlaying = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
_playerClient->close();
|
clientToClose = std::move(_playerClient);
|
||||||
|
}
|
||||||
|
if (clientToClose) {
|
||||||
|
clientToClose->close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
static void VerifyGlobalANSMJPEGLicense(const std::string& licenseKey) {
|
static void VerifyGlobalANSMJPEGLicense(const std::string& licenseKey) {
|
||||||
@@ -129,8 +135,12 @@ namespace ANSCENTER {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
bool ANSMJPEGClient::Reconnect() {
|
bool ANSMJPEGClient::Reconnect() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
{
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
|
_isPlaying = false;
|
||||||
|
}
|
||||||
_playerClient->close();
|
_playerClient->close();
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
Setup();
|
Setup();
|
||||||
_isPlaying = _playerClient->play();
|
_isPlaying = _playerClient->play();
|
||||||
return _isPlaying;
|
return _isPlaying;
|
||||||
@@ -143,10 +153,16 @@ namespace ANSCENTER {
|
|||||||
return _isPlaying;
|
return _isPlaying;
|
||||||
}
|
}
|
||||||
bool ANSMJPEGClient::Stop() {
|
bool ANSMJPEGClient::Stop() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
decltype(_playerClient.get()) player = nullptr;
|
||||||
if (_isPlaying) {
|
{
|
||||||
_playerClient->stop();
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
_isPlaying = false;
|
if (_isPlaying) {
|
||||||
|
_isPlaying = false;
|
||||||
|
player = _playerClient.get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (player) {
|
||||||
|
player->stop();
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -48,13 +48,19 @@ namespace ANSCENTER {
|
|||||||
Destroy();
|
Destroy();
|
||||||
}
|
}
|
||||||
void ANSRTMPClient::Destroy() {
|
void ANSRTMPClient::Destroy() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
decltype(_playerClient) clientToClose;
|
||||||
if (_playerClient) {
|
{
|
||||||
if (_isPlaying) {
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
_playerClient->stop();
|
if (_playerClient) {
|
||||||
_isPlaying = false;
|
if (_isPlaying) {
|
||||||
|
_playerClient->stop();
|
||||||
|
_isPlaying = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
_playerClient->close();
|
clientToClose = std::move(_playerClient);
|
||||||
|
}
|
||||||
|
if (clientToClose) {
|
||||||
|
clientToClose->close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
static void VerifyGlobalANSRTMPLicense(const std::string& licenseKey) {
|
static void VerifyGlobalANSRTMPLicense(const std::string& licenseKey) {
|
||||||
@@ -126,8 +132,12 @@ namespace ANSCENTER {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool ANSRTMPClient::Reconnect() {
|
bool ANSRTMPClient::Reconnect() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
{
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
|
_isPlaying = false;
|
||||||
|
}
|
||||||
_playerClient->close();
|
_playerClient->close();
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
Setup();
|
Setup();
|
||||||
_isPlaying = _playerClient->play();
|
_isPlaying = _playerClient->play();
|
||||||
return _isPlaying;
|
return _isPlaying;
|
||||||
@@ -140,10 +150,16 @@ namespace ANSCENTER {
|
|||||||
return _isPlaying;
|
return _isPlaying;
|
||||||
}
|
}
|
||||||
bool ANSRTMPClient::Stop() {
|
bool ANSRTMPClient::Stop() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
decltype(_playerClient.get()) player = nullptr;
|
||||||
if (_isPlaying) {
|
{
|
||||||
_playerClient->stop();
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
_isPlaying = false;
|
if (_isPlaying) {
|
||||||
|
_isPlaying = false;
|
||||||
|
player = _playerClient.get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (player) {
|
||||||
|
player->stop();
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
#include "ANSMatRegistry.h"
|
#include "ANSMatRegistry.h"
|
||||||
#include "ANSGpuFrameOps.h"
|
#include "ANSGpuFrameOps.h"
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <format>
|
||||||
#include "media_codec.h"
|
#include "media_codec.h"
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
@@ -21,6 +22,20 @@ extern "C"
|
|||||||
}
|
}
|
||||||
// Note: per-instance thread safety is handled by ANSRTSPClient::_mutex
|
// Note: per-instance thread safety is handled by ANSRTSPClient::_mutex
|
||||||
// Mat registry thread safety is handled by anscv_mat_replace's internal registry_mutex
|
// Mat registry thread safety is handled by anscv_mat_replace's internal registry_mutex
|
||||||
|
|
||||||
|
// Debug logging — goes to both stderr AND OutputDebugString (DebugView).
|
||||||
|
#ifndef RTSP_DBG
|
||||||
|
#ifdef _WIN32
|
||||||
|
#define RTSP_DBG(fmt, ...) do { \
|
||||||
|
char _rtsp_buf[512]; \
|
||||||
|
snprintf(_rtsp_buf, sizeof(_rtsp_buf), fmt "\n", ##__VA_ARGS__); \
|
||||||
|
OutputDebugStringA(_rtsp_buf); \
|
||||||
|
fprintf(stderr, "%s", _rtsp_buf); \
|
||||||
|
} while(0)
|
||||||
|
#else
|
||||||
|
#define RTSP_DBG(fmt, ...) fprintf(stderr, fmt "\n", ##__VA_ARGS__)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
static bool ansrtspLicenceValid = false;
|
static bool ansrtspLicenceValid = false;
|
||||||
// Global once_flag to protect license checking
|
// Global once_flag to protect license checking
|
||||||
static std::once_flag ansrtspLicenseOnceFlag;
|
static std::once_flag ansrtspLicenseOnceFlag;
|
||||||
@@ -48,19 +63,88 @@ namespace ANSCENTER {
|
|||||||
Destroy();
|
Destroy();
|
||||||
}
|
}
|
||||||
void ANSRTSPClient::Destroy() {
|
void ANSRTSPClient::Destroy() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// Move the player client pointer out of the lock scope, then
|
||||||
if (_playerClient) {
|
// close it OUTSIDE the mutex. close() calls cuArrayDestroy /
|
||||||
// Stop the stream first so the video decoder is flushed and
|
// cuMemFree which acquire an EXCLUSIVE SRW lock inside nvcuda64.
|
||||||
// the RTSP callback thread is no longer feeding frames into
|
// If we hold _mutex during close(), and another thread holds
|
||||||
// decode(). Without this, rtsp_close() can block waiting for
|
// the nvcuda64 SRW lock (e.g. cuStreamSynchronize during
|
||||||
// CRtspClient::m_pMutex (held by the callback mid-decode),
|
// inference), we get a deadlock: Stop() → _mutex → nvcuda64
|
||||||
// and the hardware decoder flush during destruction can hang
|
// vs inference → nvcuda64 → (blocked by exclusive waiter).
|
||||||
// on the GPU.
|
decltype(_playerClient) clientToClose;
|
||||||
if (_isPlaying) {
|
{
|
||||||
_playerClient->stop();
|
std::unique_lock<std::recursive_mutex> lock(_mutex);
|
||||||
_isPlaying = false;
|
if (_playerClient) {
|
||||||
|
if (_isPlaying) {
|
||||||
|
_playerClient->stop();
|
||||||
|
_isPlaying = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
_playerClient->close();
|
|
||||||
|
// --- Inference guard: wait for in-flight frames to finish ---
|
||||||
|
// GetRTSPCVImage increments _inFlightFrames when it hands out
|
||||||
|
// a GPU frame; the registry decrements it when the frame is
|
||||||
|
// released after inference completes. We wait here so that
|
||||||
|
// close() doesn't free NVDEC surfaces while TensorRT is
|
||||||
|
// still reading from them (the LabVIEW crash root cause).
|
||||||
|
int inFlight = _inFlightFrames.load(std::memory_order_acquire);
|
||||||
|
if (inFlight > 0) {
|
||||||
|
_logger.LogInfo("ANSRTSPClient::Destroy",
|
||||||
|
std::format("waiting for {} in-flight inference frame(s)...", inFlight),
|
||||||
|
__FILE__, __LINE__);
|
||||||
|
bool done = _inFlightDone.wait_for(lock, std::chrono::seconds(5), [this] {
|
||||||
|
return _inFlightFrames.load(std::memory_order_acquire) <= 0;
|
||||||
|
});
|
||||||
|
if (!done) {
|
||||||
|
_logger.LogWarn("ANSRTSPClient::Destroy",
|
||||||
|
std::format("timed out waiting for in-flight frames "
|
||||||
|
"(still {} in-flight) — force-releasing GPU frames",
|
||||||
|
_inFlightFrames.load()),
|
||||||
|
__FILE__, __LINE__);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Force-release ALL GPU frames owned by this client BEFORE close().
|
||||||
|
// Unreleased clones (e.g. LabVIEW AI tasks still holding cloned
|
||||||
|
// cv::Mat*) keep gpuCacheY/gpuCacheUV allocated. We must cudaFree
|
||||||
|
// them NOW while the CUDA context is still alive. After close()
|
||||||
|
// destroys the context, cudaFree would crash.
|
||||||
|
int forceReleased = ANSGpuFrameRegistry::instance().forceReleaseByOwner(this);
|
||||||
|
if (forceReleased > 0) {
|
||||||
|
_logger.LogWarn("ANSRTSPClient::Destroy",
|
||||||
|
std::format("force-released {} GPU frame(s) with unreleased clones", forceReleased),
|
||||||
|
__FILE__, __LINE__);
|
||||||
|
// Drain and cudaFree the GPU buffers while CUDA context is alive
|
||||||
|
// Sync all GPU streams before freeing to avoid illegal access
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
auto gpuPending = ANSGpuFrameRegistry::instance().drain_gpu_pending();
|
||||||
|
if (!gpuPending.empty()) {
|
||||||
|
RTSP_DBG("[Destroy] cudaFree %zu GPU ptrs before close()", gpuPending.size());
|
||||||
|
int prevDev = -1;
|
||||||
|
cudaGetDevice(&prevDev);
|
||||||
|
for (auto& entry : gpuPending) {
|
||||||
|
if (entry.ptr) {
|
||||||
|
if (entry.deviceIdx >= 0) cudaSetDevice(entry.deviceIdx);
|
||||||
|
cudaFree(entry.ptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (prevDev >= 0) cudaSetDevice(prevDev);
|
||||||
|
}
|
||||||
|
// Also drain any pending AVFrames
|
||||||
|
auto avPending = ANSGpuFrameRegistry::instance().drain_pending();
|
||||||
|
for (void* p : avPending) {
|
||||||
|
AVFrame* f = static_cast<AVFrame*>(p);
|
||||||
|
av_frame_free(&f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ANSGpuFrameRegistry::instance().invalidateOwner(this);
|
||||||
|
_inFlightFrames.store(0, std::memory_order_release);
|
||||||
|
|
||||||
|
clientToClose = std::move(_playerClient);
|
||||||
|
}
|
||||||
|
// CUDA cleanup happens here, outside the mutex — now safe.
|
||||||
|
// All GPU frames owned by this client have been force-freed above.
|
||||||
|
if (clientToClose) {
|
||||||
|
clientToClose->close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
static void VerifyGlobalANSRTSPLicense(const std::string& licenseKey) {
|
static void VerifyGlobalANSRTSPLicense(const std::string& licenseKey) {
|
||||||
@@ -146,10 +230,81 @@ namespace ANSCENTER {
|
|||||||
_playerClient->setCrop(crop);
|
_playerClient->setCrop(crop);
|
||||||
}
|
}
|
||||||
bool ANSRTSPClient::Reconnect() {
|
bool ANSRTSPClient::Reconnect() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// 1. Mark as not-playing under the mutex FIRST. This makes GetImage()
|
||||||
|
// return the cached _pLastFrame instead of calling into the player,
|
||||||
|
// preventing use-after-free when close() destroys CUDA resources.
|
||||||
|
{
|
||||||
|
std::unique_lock<std::recursive_mutex> lock(_mutex);
|
||||||
|
_isPlaying = false;
|
||||||
|
|
||||||
|
// --- Inference guard: wait for in-flight frames to finish ---
|
||||||
|
// Same guard as Destroy(): close() will free NVDEC surfaces, so
|
||||||
|
// we must wait for any inference engines still reading NV12 data
|
||||||
|
// via zero-copy CUDA device pointers.
|
||||||
|
int inFlight = _inFlightFrames.load(std::memory_order_acquire);
|
||||||
|
if (inFlight > 0) {
|
||||||
|
_logger.LogInfo("ANSRTSPClient::Reconnect",
|
||||||
|
std::format("waiting for {} in-flight inference frame(s)...", inFlight),
|
||||||
|
__FILE__, __LINE__);
|
||||||
|
bool done = _inFlightDone.wait_for(lock, std::chrono::seconds(5), [this] {
|
||||||
|
return _inFlightFrames.load(std::memory_order_acquire) <= 0;
|
||||||
|
});
|
||||||
|
if (!done) {
|
||||||
|
_logger.LogWarn("ANSRTSPClient::Reconnect",
|
||||||
|
std::format("timed out waiting for in-flight frames "
|
||||||
|
"(still {} in-flight) — force-releasing GPU frames",
|
||||||
|
_inFlightFrames.load()),
|
||||||
|
__FILE__, __LINE__);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Force-release GPU frames before close() — same as Destroy().
|
||||||
|
int forceReleased = ANSGpuFrameRegistry::instance().forceReleaseByOwner(this);
|
||||||
|
if (forceReleased > 0) {
|
||||||
|
_logger.LogWarn("ANSRTSPClient::Reconnect",
|
||||||
|
std::format("force-released {} GPU frame(s) with unreleased clones", forceReleased),
|
||||||
|
__FILE__, __LINE__);
|
||||||
|
// Sync all GPU streams before freeing
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
auto gpuPending = ANSGpuFrameRegistry::instance().drain_gpu_pending();
|
||||||
|
if (!gpuPending.empty()) {
|
||||||
|
int prevDev = -1;
|
||||||
|
cudaGetDevice(&prevDev);
|
||||||
|
for (auto& entry : gpuPending) {
|
||||||
|
if (entry.ptr) {
|
||||||
|
if (entry.deviceIdx >= 0) cudaSetDevice(entry.deviceIdx);
|
||||||
|
cudaFree(entry.ptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (prevDev >= 0) cudaSetDevice(prevDev);
|
||||||
|
}
|
||||||
|
auto avPending = ANSGpuFrameRegistry::instance().drain_pending();
|
||||||
|
for (void* p : avPending) {
|
||||||
|
AVFrame* f = static_cast<AVFrame*>(p);
|
||||||
|
av_frame_free(&f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ANSGpuFrameRegistry::instance().invalidateOwner(this);
|
||||||
|
_inFlightFrames.store(0, std::memory_order_release);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. close() does CUDA cleanup (cuArrayDestroy/cuMemFree) — run outside
|
||||||
|
// _mutex to avoid deadlocking with nvcuda64 SRW lock held by inference.
|
||||||
|
// Safe now because GetImage()/GetNV12Frame() won't touch the player
|
||||||
|
// while _isPlaying == false, and all in-flight frames have been released.
|
||||||
|
_logger.LogInfo("ANSRTSPClient::Reconnect",
|
||||||
|
"calling close() — NVDEC decoder will be destroyed", __FILE__, __LINE__);
|
||||||
|
RTSP_DBG("[Reconnect] BEFORE close() this=%p", (void*)this);
|
||||||
_playerClient->close();
|
_playerClient->close();
|
||||||
|
RTSP_DBG("[Reconnect] AFTER close() this=%p", (void*)this);
|
||||||
|
|
||||||
|
// 3. Re-setup and play under the mutex.
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
|
_logger.LogInfo("ANSRTSPClient::Reconnect",
|
||||||
|
"calling Setup() + play()", __FILE__, __LINE__);
|
||||||
Setup();
|
Setup();
|
||||||
_isPlaying = _playerClient->play();
|
_isPlaying = _playerClient->play();
|
||||||
|
RTSP_DBG("[Reconnect] DONE isPlaying=%d this=%p", (int)_isPlaying, (void*)this);
|
||||||
return _isPlaying;
|
return _isPlaying;
|
||||||
}
|
}
|
||||||
void ANSRTSPClient::EnableAudio(bool status) {
|
void ANSRTSPClient::EnableAudio(bool status) {
|
||||||
@@ -169,11 +324,23 @@ namespace ANSCENTER {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool ANSRTSPClient::Stop() {
|
bool ANSRTSPClient::Stop() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// Grab the player pointer and clear _isPlaying under the lock,
|
||||||
if (_isPlaying) {
|
// then call stop() OUTSIDE the mutex. stop() internally calls
|
||||||
_playerClient->stop();
|
// StopVideoDecoder -> decoder->flush() which does CUDA calls
|
||||||
_isPlaying = false;
|
// that can block on the nvcuda64 SRW lock. Holding _mutex
|
||||||
}
|
// during that time blocks all other operations on this client
|
||||||
|
// and contributes to the convoy when many clients stop at once.
|
||||||
|
CRtspPlayer* player = nullptr;
|
||||||
|
{
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
|
if (_isPlaying) {
|
||||||
|
_isPlaying = false;
|
||||||
|
player = _playerClient.get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (player) {
|
||||||
|
player->stop();
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
bool ANSRTSPClient::Pause() {
|
bool ANSRTSPClient::Pause() {
|
||||||
@@ -759,10 +926,12 @@ namespace ANSCENTER {
|
|||||||
}
|
}
|
||||||
AVFrame* ANSRTSPClient::GetNV12Frame() {
|
AVFrame* ANSRTSPClient::GetNV12Frame() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
|
if (!_isPlaying) return nullptr; // Player may be mid-reconnect (CUDA resources freed)
|
||||||
return _playerClient->getNV12Frame(); // Returns clone, caller must av_frame_free
|
return _playerClient->getNV12Frame(); // Returns clone, caller must av_frame_free
|
||||||
}
|
}
|
||||||
AVFrame* ANSRTSPClient::GetCudaHWFrame() {
|
AVFrame* ANSRTSPClient::GetCudaHWFrame() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
|
if (!_isPlaying) return nullptr; // Player may be mid-reconnect (CUDA resources freed)
|
||||||
return _playerClient->getCudaHWFrame();
|
return _playerClient->getCudaHWFrame();
|
||||||
}
|
}
|
||||||
bool ANSRTSPClient::IsCudaHWAccel() {
|
bool ANSRTSPClient::IsCudaHWAccel() {
|
||||||
@@ -810,6 +979,11 @@ extern "C" __declspec(dllexport) int CreateANSRTSPHandle(ANSCENTER::ANSRTSPClien
|
|||||||
if (_username.empty() && _password.empty()) result = ptr->Init(licenseKey, url);
|
if (_username.empty() && _password.empty()) result = ptr->Init(licenseKey, url);
|
||||||
else result = ptr->Init(licenseKey, username, password, url);
|
else result = ptr->Init(licenseKey, username, password, url);
|
||||||
if (result) {
|
if (result) {
|
||||||
|
// Default to CUDA/NVDEC HW decoding (mode 7) for NV12 zero-copy
|
||||||
|
// fast path. LabVIEW may not call SetRTSPHWDecoding after
|
||||||
|
// destroy+recreate cycles, so this ensures the new handle always
|
||||||
|
// uses the GPU decode path instead of falling back to D3D11VA/CPU.
|
||||||
|
ptr->SetHWDecoding(7); // HW_DECODING_CUDA
|
||||||
*Handle = ptr.release();
|
*Handle = ptr.release();
|
||||||
extern void anscv_unregister_handle(void*);
|
extern void anscv_unregister_handle(void*);
|
||||||
extern void anscv_register_handle(void*, void(*)(void*));
|
extern void anscv_register_handle(void*, void(*)(void*));
|
||||||
@@ -830,9 +1004,37 @@ extern "C" __declspec(dllexport) int ReleaseANSRTSPHandle(ANSCENTER::ANSRTSPClie
|
|||||||
try {
|
try {
|
||||||
extern void anscv_unregister_handle(void*);
|
extern void anscv_unregister_handle(void*);
|
||||||
anscv_unregister_handle(*Handle);
|
anscv_unregister_handle(*Handle);
|
||||||
// unique_ptr destructor calls ~ANSRTSPClient which calls Destroy() — no need to call Destroy() separately
|
|
||||||
std::unique_ptr<ANSCENTER::ANSRTSPClient> ptr(*Handle);
|
// Grab the raw pointer and NULL the caller's handle immediately.
|
||||||
|
// This prevents the caller (LabVIEW) from issuing new calls.
|
||||||
|
ANSCENTER::ANSRTSPClient* raw = *Handle;
|
||||||
*Handle = nullptr;
|
*Handle = nullptr;
|
||||||
|
|
||||||
|
// Mark as not-playing under _mutex ONLY. This makes
|
||||||
|
// GetImage()/GetNV12Frame()/GetCudaHWFrame() return empty/null
|
||||||
|
// on any subsequent call, and prevents NEW NV12 GPU surface
|
||||||
|
// pointers from being handed out.
|
||||||
|
//
|
||||||
|
// Do NOT call Destroy()/close() here — close() frees the
|
||||||
|
// NVDEC GPU surfaces (cuArrayDestroy/cuMemFree) which may
|
||||||
|
// still be in use by a CUDA inference kernel that received
|
||||||
|
// the NV12 pointer from a GetRTSPCVImage call that already
|
||||||
|
// completed before this Release was called.
|
||||||
|
{
|
||||||
|
// Use the client's _mutex to safely set _isPlaying = false.
|
||||||
|
// This is the same lock GetImage/GetNV12Frame acquire.
|
||||||
|
raw->Stop(); // sets _isPlaying = false, stops playback
|
||||||
|
}
|
||||||
|
|
||||||
|
// Defer the full cleanup (Destroy + delete) to a background thread
|
||||||
|
// so LabVIEW's UI thread is not blocked. Destroy() now waits
|
||||||
|
// precisely for in-flight inference to finish (via _inFlightFrames
|
||||||
|
// counter + condition variable) instead of the old 500ms sleep hack.
|
||||||
|
std::thread([raw]() {
|
||||||
|
try { raw->Destroy(); } catch (...) {}
|
||||||
|
try { delete raw; } catch (...) {}
|
||||||
|
}).detach();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
if (Handle) *Handle = nullptr;
|
if (Handle) *Handle = nullptr;
|
||||||
@@ -882,19 +1084,56 @@ extern "C" __declspec(dllexport) int GetRTSPCVImage(
|
|||||||
|
|
||||||
// Attach NV12 frame for GPU fast-path inference (side-table registry)
|
// Attach NV12 frame for GPU fast-path inference (side-table registry)
|
||||||
// attach() takes ownership — do NOT av_frame_free here
|
// attach() takes ownership — do NOT av_frame_free here
|
||||||
|
//
|
||||||
|
// CRITICAL: TryIncrementInFlight() MUST be called BEFORE GetCudaHWFrame().
|
||||||
|
// It atomically checks _isPlaying and increments _inFlightFrames under
|
||||||
|
// the same mutex, so Reconnect() cannot call close() while we're doing
|
||||||
|
// the D2D copy from NVDEC surfaces inside gpu_frame_attach_cuda().
|
||||||
int gpuIdx = (*Handle)->GetHWDecodingGpuIndex();
|
int gpuIdx = (*Handle)->GetHWDecodingGpuIndex();
|
||||||
AVFrame* cudaHW = (*Handle)->GetCudaHWFrame();
|
bool inFlightGuardHeld = (*Handle)->TryIncrementInFlight();
|
||||||
if (cudaHW) {
|
RTSP_DBG("[GetRTSPCVImage] mat=%p gpuIdx=%d inFlightGuard=%d",
|
||||||
// CUDA zero-copy: frame data[0]/data[1] are CUDA device pointers.
|
(void*)*image, gpuIdx, (int)inFlightGuardHeld);
|
||||||
// Also attach CPU NV12 as fallback for cross-GPU inference
|
|
||||||
// (when decode GPU != inference GPU, CUDA ptrs aren't accessible).
|
if (inFlightGuardHeld) {
|
||||||
AVFrame* cpuNV12 = (*Handle)->GetNV12Frame();
|
AVFrame* cudaHW = (*Handle)->GetCudaHWFrame();
|
||||||
gpu_frame_attach_cuda(*image, cudaHW, gpuIdx, timeStamp, cpuNV12);
|
if (cudaHW) {
|
||||||
} else {
|
RTSP_DBG("[GetRTSPCVImage] cudaHW: %dx%d data[0]=%p data[1]=%p",
|
||||||
AVFrame* nv12 = (*Handle)->GetNV12Frame();
|
cudaHW->width, cudaHW->height,
|
||||||
if (nv12) {
|
(void*)cudaHW->data[0], (void*)cudaHW->data[1]);
|
||||||
gpu_frame_attach(*image, nv12, gpuIdx, timeStamp);
|
AVFrame* cpuNV12 = (*Handle)->GetNV12Frame();
|
||||||
|
gpu_frame_attach_cuda(*image, cudaHW, gpuIdx, timeStamp, cpuNV12);
|
||||||
|
} else {
|
||||||
|
// HW decode not active — try CPU NV12
|
||||||
|
AVFrame* nv12 = (*Handle)->GetNV12Frame();
|
||||||
|
if (nv12) {
|
||||||
|
gpu_frame_attach(*image, nv12, gpuIdx, timeStamp);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Wire up the registry callback to release the in-flight guard.
|
||||||
|
// TryIncrementInFlight already incremented; DecrementInFlight fires
|
||||||
|
// when the last clone of this frame is released after inference.
|
||||||
|
auto* gpuData = ANSGpuFrameRegistry::instance().lookup(*image);
|
||||||
|
RTSP_DBG("[GetRTSPCVImage] after attach: gpuData=%p yPlane=%p isCuda=%d gpuCacheY=%p",
|
||||||
|
(void*)gpuData,
|
||||||
|
gpuData ? (void*)gpuData->yPlane : nullptr,
|
||||||
|
gpuData ? (int)gpuData->isCudaDevicePtr : -1,
|
||||||
|
gpuData ? gpuData->gpuCacheY : nullptr);
|
||||||
|
if (gpuData) {
|
||||||
|
gpuData->ownerClient = *Handle;
|
||||||
|
gpuData->onReleaseFn = [](void* client) {
|
||||||
|
static_cast<ANSCENTER::ANSRTSPClient*>(client)->DecrementInFlight();
|
||||||
|
};
|
||||||
|
// NOTE: Do NOT call IncrementInFlight() again here —
|
||||||
|
// TryIncrementInFlight() already did it above.
|
||||||
|
} else {
|
||||||
|
// No gpuData registered (attach failed?) — release the guard
|
||||||
|
(*Handle)->DecrementInFlight();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Player is stopping/reconnecting — skip CUDA path entirely.
|
||||||
|
// GetImage() already returned a cached BGR frame, which is safe.
|
||||||
|
RTSP_DBG("[GetRTSPCVImage] SKIP CUDA — player not playing (reconnecting?)");
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1; // Success
|
return 1; // Success
|
||||||
|
|||||||
@@ -16,6 +16,8 @@
|
|||||||
#include <opencv2/imgproc.hpp>
|
#include <opencv2/imgproc.hpp>
|
||||||
#include <opencv2/highgui.hpp>
|
#include <opencv2/highgui.hpp>
|
||||||
#include <opencv2/opencv.hpp>
|
#include <opencv2/opencv.hpp>
|
||||||
|
#include <atomic>
|
||||||
|
#include <condition_variable>
|
||||||
|
|
||||||
namespace ANSCENTER
|
namespace ANSCENTER
|
||||||
{
|
{
|
||||||
@@ -37,7 +39,36 @@ namespace ANSCENTER
|
|||||||
int64_t _pts;
|
int64_t _pts;
|
||||||
bool _isPlaying;
|
bool _isPlaying;
|
||||||
std::recursive_mutex _mutex;
|
std::recursive_mutex _mutex;
|
||||||
|
|
||||||
|
// --- Per-client inference guard ---
|
||||||
|
// Tracks how many GPU frames from this client are currently in-flight
|
||||||
|
// (grabbed by GetRTSPCVImage but not yet released after inference).
|
||||||
|
// Destroy() waits for this to reach 0 before freeing NVDEC surfaces,
|
||||||
|
// preventing the use-after-free crash when LabVIEW stops a camera
|
||||||
|
// while AI inference is still reading CUDA device pointers.
|
||||||
|
std::atomic<int> _inFlightFrames{0};
|
||||||
|
std::condition_variable_any _inFlightDone;
|
||||||
public:
|
public:
|
||||||
|
void IncrementInFlight() { _inFlightFrames.fetch_add(1, std::memory_order_acq_rel); }
|
||||||
|
void DecrementInFlight() {
|
||||||
|
if (_inFlightFrames.fetch_sub(1, std::memory_order_acq_rel) <= 1) {
|
||||||
|
_inFlightDone.notify_all();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Atomically check _isPlaying AND increment _inFlightFrames under the
|
||||||
|
// same mutex. Returns true if the caller may proceed to access CUDA
|
||||||
|
// resources (GetCudaHWFrame + D2D copy). Returns false if the player
|
||||||
|
// is stopping/reconnecting — caller must NOT touch CUDA resources.
|
||||||
|
//
|
||||||
|
// This closes the race window where Reconnect() sets _isPlaying=false
|
||||||
|
// and calls close() while GetRTSPCVImage is between GetCudaHWFrame()
|
||||||
|
// and the D2D copy in gpu_frame_attach_cuda().
|
||||||
|
bool TryIncrementInFlight() {
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
|
if (!_isPlaying) return false;
|
||||||
|
_inFlightFrames.fetch_add(1, std::memory_order_acq_rel);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
ANSRTSPClient();
|
ANSRTSPClient();
|
||||||
~ANSRTSPClient() noexcept;
|
~ANSRTSPClient() noexcept;
|
||||||
[[nodiscard]] bool Init(std::string licenseKey, std::string url);
|
[[nodiscard]] bool Init(std::string licenseKey, std::string url);
|
||||||
|
|||||||
@@ -48,13 +48,19 @@ namespace ANSCENTER {
|
|||||||
Destroy();
|
Destroy();
|
||||||
}
|
}
|
||||||
void ANSSRTClient::Destroy() {
|
void ANSSRTClient::Destroy() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
decltype(_playerClient) clientToClose;
|
||||||
if (_playerClient) {
|
{
|
||||||
if (_isPlaying) {
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
_playerClient->stop();
|
if (_playerClient) {
|
||||||
_isPlaying = false;
|
if (_isPlaying) {
|
||||||
|
_playerClient->stop();
|
||||||
|
_isPlaying = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
_playerClient->close();
|
clientToClose = std::move(_playerClient);
|
||||||
|
}
|
||||||
|
if (clientToClose) {
|
||||||
|
clientToClose->close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
static void VerifyGlobalANSSRTLicense(const std::string& licenseKey) {
|
static void VerifyGlobalANSSRTLicense(const std::string& licenseKey) {
|
||||||
@@ -124,8 +130,12 @@ namespace ANSCENTER {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
bool ANSSRTClient::Reconnect() {
|
bool ANSSRTClient::Reconnect() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
{
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
|
_isPlaying = false;
|
||||||
|
}
|
||||||
_playerClient->close();
|
_playerClient->close();
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
Setup();
|
Setup();
|
||||||
_isPlaying = _playerClient->play();
|
_isPlaying = _playerClient->play();
|
||||||
return _isPlaying;
|
return _isPlaying;
|
||||||
@@ -155,10 +165,16 @@ namespace ANSCENTER {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool ANSSRTClient::Stop() {
|
bool ANSSRTClient::Stop() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
decltype(_playerClient.get()) player = nullptr;
|
||||||
if (_isPlaying) {
|
{
|
||||||
_playerClient->stop();
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
_isPlaying = false;
|
if (_isPlaying) {
|
||||||
|
_isPlaying = false;
|
||||||
|
player = _playerClient.get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (player) {
|
||||||
|
player->stop();
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,33 +40,34 @@ namespace ANSCENTER {
|
|||||||
catch (...) {}
|
catch (...) {}
|
||||||
}
|
}
|
||||||
void ANSVIDEOPLAYER::Destroy() {
|
void ANSVIDEOPLAYER::Destroy() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// Move HW player out of lock scope — close() does CUDA cleanup
|
||||||
try {
|
// (cuArrayDestroy/cuMemFree) which must not run under _mutex
|
||||||
// --- HW decode cleanup ---
|
// to avoid deadlocking with nvcuda64 SRW lock held by inference.
|
||||||
if (_hwPlayer) {
|
decltype(_hwPlayer) hwPlayerToClose;
|
||||||
try {
|
{
|
||||||
_hwPlayer->stop();
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
_hwPlayer->close();
|
try {
|
||||||
} catch (...) {}
|
if (_hwPlayer) {
|
||||||
_hwPlayer.reset(); // releases CFilePlayer + HWDecoderPool slot
|
try { _hwPlayer->stop(); } catch (...) {}
|
||||||
}
|
}
|
||||||
_hwDecodeActive = false;
|
hwPlayerToClose = std::move(_hwPlayer);
|
||||||
_hwGpuIndex = -1;
|
_hwDecodeActive = false;
|
||||||
_hwCudaAccel = false;
|
_hwGpuIndex = -1;
|
||||||
_hwEOF = false;
|
_hwCudaAccel = false;
|
||||||
_hwFrameCount = 0;
|
_hwEOF = false;
|
||||||
|
_hwFrameCount = 0;
|
||||||
|
|
||||||
// --- cv::VideoCapture cleanup ---
|
// --- cv::VideoCapture cleanup ---
|
||||||
_previousImage.release();
|
_previousImage.release();
|
||||||
_inferenceImage.release();
|
_inferenceImage.release();
|
||||||
_inferenceCloneCurr.release();
|
_inferenceCloneCurr.release();
|
||||||
_inferenceClonePrev.release();
|
_inferenceClonePrev.release();
|
||||||
_lastJpegImage = "";
|
_lastJpegImage = "";
|
||||||
_isPlaying = false;
|
_isPlaying = false;
|
||||||
_resWidth = 0;
|
_resWidth = 0;
|
||||||
_resHeight = 0;
|
_resHeight = 0;
|
||||||
_currentFrame = 0;
|
_currentFrame = 0;
|
||||||
_previousPTS = 0;
|
_previousPTS = 0;
|
||||||
if (cap.isOpened()) {
|
if (cap.isOpened()) {
|
||||||
cap.release();
|
cap.release();
|
||||||
}
|
}
|
||||||
@@ -77,6 +78,13 @@ namespace ANSCENTER {
|
|||||||
catch (...) {
|
catch (...) {
|
||||||
_logger.LogError("ANSVIDEOPLAYER::Destroy.", "Unknown exception", __FILE__, __LINE__);
|
_logger.LogError("ANSVIDEOPLAYER::Destroy.", "Unknown exception", __FILE__, __LINE__);
|
||||||
}
|
}
|
||||||
|
} // end lock scope
|
||||||
|
|
||||||
|
// CUDA cleanup happens here, outside the mutex
|
||||||
|
if (hwPlayerToClose) {
|
||||||
|
try { hwPlayerToClose->close(); } catch (...) {}
|
||||||
|
hwPlayerToClose.reset();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void VerifyGlobalANSVPLicense(const std::string& licenseKey) {
|
static void VerifyGlobalANSVPLicense(const std::string& licenseKey) {
|
||||||
@@ -187,15 +195,25 @@ namespace ANSCENTER {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool ANSVIDEOPLAYER::Reconnect() {
|
bool ANSVIDEOPLAYER::Reconnect() {
|
||||||
|
// HW decoder close() does CUDA cleanup — run outside _mutex
|
||||||
|
// to avoid deadlocking with nvcuda64 SRW lock held by inference.
|
||||||
|
decltype(_hwPlayer) hwPlayerToClose;
|
||||||
|
{
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
|
_isPlaying = false; // GetImage() returns cached frame while we reconnect
|
||||||
|
if (_hwPlayer) {
|
||||||
|
try { _hwPlayer->stop(); } catch (...) {}
|
||||||
|
hwPlayerToClose = std::move(_hwPlayer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (hwPlayerToClose) {
|
||||||
|
try { hwPlayerToClose->close(); } catch (...) {}
|
||||||
|
hwPlayerToClose.reset();
|
||||||
|
}
|
||||||
|
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
try {
|
try {
|
||||||
_currentFrame = 0;
|
_currentFrame = 0;
|
||||||
|
|
||||||
// --- HW decode: destroy and re-setup ---
|
|
||||||
if (_hwPlayer) {
|
|
||||||
try { _hwPlayer->stop(); _hwPlayer->close(); } catch (...) {}
|
|
||||||
_hwPlayer.reset();
|
|
||||||
}
|
|
||||||
_hwDecodeActive = false;
|
_hwDecodeActive = false;
|
||||||
_hwGpuIndex = -1;
|
_hwGpuIndex = -1;
|
||||||
_hwCudaAccel = false;
|
_hwCudaAccel = false;
|
||||||
@@ -266,41 +284,48 @@ namespace ANSCENTER {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
bool ANSVIDEOPLAYER::Stop() {
|
bool ANSVIDEOPLAYER::Stop() {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
decltype(_hwPlayer.get()) hwPlayer = nullptr;
|
||||||
try {
|
{
|
||||||
// --- HW decode path ---
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
if (_hwDecodeActive && _hwPlayer) {
|
try {
|
||||||
_hwPlayer->stop();
|
// --- HW decode path ---
|
||||||
_isPlaying = false;
|
if (_hwDecodeActive && _hwPlayer) {
|
||||||
return true;
|
_isPlaying = false;
|
||||||
}
|
hwPlayer = _hwPlayer.get();
|
||||||
|
// stop() called outside the lock below; skip cap path
|
||||||
// --- cv::VideoCapture fallback ---
|
|
||||||
if (cap.isOpened()) {
|
|
||||||
try {
|
|
||||||
double frame_pos = cap.get(cv::CAP_PROP_POS_FRAMES);
|
|
||||||
if (frame_pos >= 0) {
|
|
||||||
_currentFrame = static_cast<int64_t>(frame_pos);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
_currentFrame = 0;
|
|
||||||
this->_logger.LogError("ANSVIDEOPLAYER::Stop. Exception occurred:", "Unable to retrieve current frame position", __FILE__, __LINE__);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
catch (const std::exception& e) {
|
else {
|
||||||
this->_logger.LogError("ANSVIDEOPLAYER::Stop. Exception occurred:", e.what(), __FILE__, __LINE__);
|
// --- cv::VideoCapture fallback ---
|
||||||
_currentFrame = 0;
|
if (cap.isOpened()) {
|
||||||
|
try {
|
||||||
|
double frame_pos = cap.get(cv::CAP_PROP_POS_FRAMES);
|
||||||
|
if (frame_pos >= 0) {
|
||||||
|
_currentFrame = static_cast<int64_t>(frame_pos);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
_currentFrame = 0;
|
||||||
|
this->_logger.LogError("ANSVIDEOPLAYER::Stop. Exception occurred:", "Unable to retrieve current frame position", __FILE__, __LINE__);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (const std::exception& e) {
|
||||||
|
this->_logger.LogError("ANSVIDEOPLAYER::Stop. Exception occurred:", e.what(), __FILE__, __LINE__);
|
||||||
|
_currentFrame = 0;
|
||||||
|
}
|
||||||
|
cap.release();
|
||||||
|
}
|
||||||
|
_isPlaying = false;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
cap.release();
|
|
||||||
}
|
}
|
||||||
_isPlaying = false;
|
catch (const std::exception& e) {
|
||||||
return true;
|
this->_logger.LogError("ANSVIDEOPLAYER::Stop. Exception occurred:", e.what(), __FILE__, __LINE__);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
catch (const std::exception& e) {
|
if (hwPlayer) {
|
||||||
this->_logger.LogError("ANSVIDEOPLAYER::Stop. Exception occurred:", e.what(), __FILE__, __LINE__);
|
hwPlayer->stop();
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
void ANSVIDEOPLAYER::SetBBox(cv::Rect bbox) {
|
void ANSVIDEOPLAYER::SetBBox(cv::Rect bbox) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
|
|||||||
@@ -378,7 +378,7 @@ namespace ANSCENTER {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::vector<Object> ANSALPR_CPU::RunInference(const cv::Mat& input, const std::string &cameraId) {
|
std::vector<Object> ANSALPR_CPU::RunInference(const cv::Mat& input, const std::string &cameraId) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — sub-components have their own fine-grained locks.
|
||||||
std::vector<Object> output;
|
std::vector<Object> output;
|
||||||
output.clear();
|
output.clear();
|
||||||
// Initial validation
|
// Initial validation
|
||||||
@@ -419,17 +419,18 @@ namespace ANSCENTER {
|
|||||||
#ifdef FNS_DEBUG // Corrected preprocessor directive
|
#ifdef FNS_DEBUG // Corrected preprocessor directive
|
||||||
cv::Mat draw = input.clone();
|
cv::Mat draw = input.clone();
|
||||||
#endif
|
#endif
|
||||||
_detectedArea = cv::Rect(0, 0, frame.cols, frame.rows);
|
// Use local variable instead of shared _detectedArea for thread safety
|
||||||
if ((_detectedArea.width > 50) && (_detectedArea.height > 50)) {
|
cv::Rect detectedArea(0, 0, frame.cols, frame.rows);
|
||||||
|
if ((detectedArea.width > 50) && (detectedArea.height > 50)) {
|
||||||
#ifdef FNS_DEBUG // Corrected preprocessor directive
|
#ifdef FNS_DEBUG // Corrected preprocessor directive
|
||||||
cv::rectangle(draw, _detectedArea, cv::Scalar(0, 0, 255), 2); // RED for detectedArea
|
cv::rectangle(draw, detectedArea, cv::Scalar(0, 0, 255), 2); // RED for detectedArea
|
||||||
#endif
|
#endif
|
||||||
// Ensure _lprDetector is valid
|
// Ensure _lprDetector is valid
|
||||||
if (!_lprDetector) {
|
if (!_lprDetector) {
|
||||||
this->_logger.LogFatal("ANSALPR_CPU::Inference", "_lprDetector is null", __FILE__, __LINE__);
|
this->_logger.LogFatal("ANSALPR_CPU::Inference", "_lprDetector is null", __FILE__, __LINE__);
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
cv::Mat activeFrame = frame(_detectedArea).clone();
|
cv::Mat activeFrame = frame(detectedArea).clone();
|
||||||
|
|
||||||
//std::vector<Object> lprOutputRaw = _lpDetector->RunInference(activeFrame, cameraId);
|
//std::vector<Object> lprOutputRaw = _lpDetector->RunInference(activeFrame, cameraId);
|
||||||
//std::vector<Object> lprOutput = AdjustLicensePlateBoundingBoxes(lprOutputRaw, _detectedArea, frame.size(), 3.0);
|
//std::vector<Object> lprOutput = AdjustLicensePlateBoundingBoxes(lprOutputRaw, _detectedArea, frame.size(), 3.0);
|
||||||
@@ -471,8 +472,12 @@ namespace ANSCENTER {
|
|||||||
lprObject.cameraId = cameraId;
|
lprObject.cameraId = cameraId;
|
||||||
lprObject.polygon = RectToNormalizedPolygon(lprObject.box, input.cols, input.rows);
|
lprObject.polygon = RectToNormalizedPolygon(lprObject.box, input.cols, input.rows);
|
||||||
|
|
||||||
// OCR inference
|
// OCR inference (ppocr is not thread-safe, use fine-grained lock)
|
||||||
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(alignedLPR);
|
std::vector<PaddleOCR::OCRPredictResult> res_ocr;
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> ocrLock(_ocrMutex);
|
||||||
|
res_ocr = ppocr->ocr(alignedLPR);
|
||||||
|
}
|
||||||
std::string ocrText;
|
std::string ocrText;
|
||||||
|
|
||||||
if (!res_ocr.empty() && res_ocr.size() < 3) {
|
if (!res_ocr.empty() && res_ocr.size() < 3) {
|
||||||
@@ -515,13 +520,13 @@ namespace ANSCENTER {
|
|||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
bool ANSALPR_CPU::Inference(const cv::Mat& input, std::string& lprResult) {
|
bool ANSALPR_CPU::Inference(const cv::Mat& input, std::string& lprResult) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — delegates to Inference(input, lprResult, cameraId)
|
||||||
if (input.empty()) return false;
|
if (input.empty()) return false;
|
||||||
if ((input.cols < 5) || (input.rows < 5)) return false;
|
if ((input.cols < 5) || (input.rows < 5)) return false;
|
||||||
return Inference(input, lprResult, "CustomCam");
|
return Inference(input, lprResult, "CustomCam");
|
||||||
}
|
}
|
||||||
bool ANSALPR_CPU::Inference(const cv::Mat& input, std::string& lprResult, const std::string & cameraId) {
|
bool ANSALPR_CPU::Inference(const cv::Mat& input, std::string& lprResult, const std::string & cameraId) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — sub-components have fine-grained locks.
|
||||||
std::vector<Object> output;
|
std::vector<Object> output;
|
||||||
output.clear();
|
output.clear();
|
||||||
if (!_licenseValid) {
|
if (!_licenseValid) {
|
||||||
@@ -587,10 +592,15 @@ namespace ANSCENTER {
|
|||||||
cv::Mat lprImage = frame(lprPos).clone();
|
cv::Mat lprImage = frame(lprPos).clone();
|
||||||
lprObject.cameraId = cameraId;
|
lprObject.cameraId = cameraId;
|
||||||
lprObject.polygon = RectToNormalizedPolygon(lprObject.box, input.cols, input.rows);
|
lprObject.polygon = RectToNormalizedPolygon(lprObject.box, input.cols, input.rows);
|
||||||
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(lprImage);
|
// ppocr is not thread-safe, use fine-grained lock
|
||||||
|
std::vector<PaddleOCR::OCRPredictResult> res_ocr;
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> ocrLock(_ocrMutex);
|
||||||
|
res_ocr = ppocr->ocr(lprImage);
|
||||||
|
}
|
||||||
int detectionSize = res_ocr.size();
|
int detectionSize = res_ocr.size();
|
||||||
if ((detectionSize > 0) && (detectionSize < 3)) {
|
if ((detectionSize > 0) && (detectionSize < 3)) {
|
||||||
for (int n = 0; n < res_ocr.size(); n++) { // number of detections
|
for (int n = 0; n < res_ocr.size(); n++) { // number of detections
|
||||||
ocrText.append(res_ocr[n].text);
|
ocrText.append(res_ocr[n].text);
|
||||||
}
|
}
|
||||||
std::string rawText = AnalyseLicensePlateText(ocrText);
|
std::string rawText = AnalyseLicensePlateText(ocrText);
|
||||||
@@ -613,7 +623,7 @@ namespace ANSCENTER {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
bool ANSALPR_CPU::Inference(const cv::Mat& input, const std::vector<cv::Rect> & Bbox, std::string& lprResult) {
|
bool ANSALPR_CPU::Inference(const cv::Mat& input, const std::vector<cv::Rect> & Bbox, std::string& lprResult) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — delegates to Inference(input, Bbox, lprResult, cameraId)
|
||||||
if (input.empty()) return false;
|
if (input.empty()) return false;
|
||||||
if ((input.cols < 5) || (input.rows < 5)) return false;
|
if ((input.cols < 5) || (input.rows < 5)) return false;
|
||||||
return Inference(input, Bbox, lprResult, "CustomCam");
|
return Inference(input, Bbox, lprResult, "CustomCam");
|
||||||
@@ -622,7 +632,7 @@ namespace ANSCENTER {
|
|||||||
bool ANSALPR_CPU::Inference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox,
|
bool ANSALPR_CPU::Inference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox,
|
||||||
std::string& lprResult, const std::string& cameraId)
|
std::string& lprResult, const std::string& cameraId)
|
||||||
{
|
{
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — sub-components have fine-grained locks.
|
||||||
|
|
||||||
// Early validation
|
// Early validation
|
||||||
if (!_licenseValid) {
|
if (!_licenseValid) {
|
||||||
@@ -668,16 +678,12 @@ namespace ANSCENTER {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Convert grayscale to BGR if necessary
|
// Convert grayscale to BGR if necessary (use local buffer for thread safety)
|
||||||
const cv::Mat* framePtr;
|
cv::Mat localFrame;
|
||||||
if (input.channels() == 1) {
|
if (input.channels() == 1) {
|
||||||
cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR);
|
cv::cvtColor(input, localFrame, cv::COLOR_GRAY2BGR);
|
||||||
framePtr = &this->_frameBuffer;
|
|
||||||
}
|
}
|
||||||
else {
|
const cv::Mat& frame = (input.channels() == 1) ? localFrame : input;
|
||||||
framePtr = &input;
|
|
||||||
}
|
|
||||||
const cv::Mat& frame = *framePtr;
|
|
||||||
|
|
||||||
const int frameWidth = frame.cols;
|
const int frameWidth = frame.cols;
|
||||||
const int frameHeight = frame.rows;
|
const int frameHeight = frame.rows;
|
||||||
@@ -794,7 +800,12 @@ namespace ANSCENTER {
|
|||||||
cv::Mat lprImage = frame(plateRect);
|
cv::Mat lprImage = frame(plateRect);
|
||||||
cv::Mat alignedLPR = enhanceForOCR(lprImage);
|
cv::Mat alignedLPR = enhanceForOCR(lprImage);
|
||||||
|
|
||||||
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(alignedLPR);
|
// ppocr is not thread-safe, use fine-grained lock
|
||||||
|
std::vector<PaddleOCR::OCRPredictResult> res_ocr;
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> ocrLock(_ocrMutex);
|
||||||
|
res_ocr = ppocr->ocr(alignedLPR);
|
||||||
|
}
|
||||||
|
|
||||||
const size_t detectionSize = res_ocr.size();
|
const size_t detectionSize = res_ocr.size();
|
||||||
if (detectionSize == 0 || detectionSize >= 3) {
|
if (detectionSize == 0 || detectionSize >= 3) {
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
#include <list>
|
#include <list>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <mutex>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <include/paddleocr.h>
|
#include <include/paddleocr.h>
|
||||||
@@ -157,6 +158,7 @@ namespace ANSCENTER
|
|||||||
"43B1", "68L1", "70G1", "36M1", "81N1", "90K1", "17B1", "64E1", "99D1", "60B2", "74L1", "60C1", "68M1", "63B7", "34B1", "69M1", "24B1", "15M1", "83Y1", "48C1", "95H1", "79X1", "17B6", "36E1", "38K1", "25N1", "25U1", "61B1", "36C1", "36B3", "38F1", "99G1", "69N1", "97D1", "92T1", "92B1", "88B1", "97G1", "14U1", "63A1", "26N1", "19D1", "93C1", "73B1", "84B1", "81K1", "18L1", "64D1", "35M1", "61N1", "83P1", "15S1", "82B1", "92U1", "43D1", "22L1", "63B5", "64G1", "27N1", "14X1", "62C1", "81D1", "38G1", "19F1", "34K1", "49P1", "89H1", "14T1", "19M1", "78D1", "76A1", "66K1", "66C1", "71C1", "37K1", "19G1", "15F1", "85C1", "49B1", "21B1", "89F1", "23M1", "66L1", "90B5", "93M1", "14P1", "77N1", "36B8", "86B1", "12U1", "63B3", "21L1", "36G5", "65G1", "82E1", "61H1", "65H1", "84A1", "23F1", "95C1", "99K1", "49G1", "92D1", "36K3", "92N1", "82X1", "83M1", "11N1", "14K1", "19H1", "93H1", "60A1", "79A1", "20D1", "90D1", "81C1", "66P1", "36K1", "92V1", "18B1", "37P1", "22Y1", "23H1", "26D1", "66G1", "78F1", "49C1", "26H1", "38P1", "47T1", "74H1", "63P1", "47D1", "15D1", "23D1", "68E1", "20B1", "49F1", "43K1", "65K1", "27Z1", "92S1", "79H1", "21E1", "35Y1", "14S1", "75E1", "24Y1", "12T1", "27P1", "77B1", "88H1", "60B3", "23P1", "61F1", "99H1", "23K1", "59A3", "26C1", "81B1", "74E1", "66B1", "22S1", "92P1", "93B1", "69B1", "81P1", "12H1", "62K1", "35A1", "77C1", "27V1", "68N1", "12D1", "64K1", "41A1", "12Z1", "76C1", "38B1", "78G1", "74K1", "69H1", "94A1", "61K1", "86B7", "82G1", "14N1", "82M1", "76E1", "18E1", "61C1", "15N1", "90A1", "77F1", "34D1", "47B1", "62S1", "43E1", "81M1", "92X1", "75B1", "34F1", "70H1", "62B1", "26B1", "60B4", "61A1", "12B1", "90T1", "92E1", "34C1", "47G1", "97B1", "25S1", "70E1", "93Y1", "47S1", "37F1", "28N1", "11K1", "38E1", "78M1", "74C1", "12S1", "75S1", "37A1", "28D1", "65L1", "22B1", "99B1", "74G1", "79K1", "76K1", "76H1", "23B1", "15R1", "36B1", "74D1", "62L1", "37E1", "78E1", "89K1", "26M1", "25F1", "48H1", "79D1", "43H1", "76F1", "36L1", "43L1", "21K1", "88L1", "27S1", "92K1", "77D1", "19N1", "66H1", "36H5", "62N1", "18G1", "75D1", "37L1", "68K1", "28C1", "26E1", "35N1", "85H1", "62D1", "27U1", "19E1", "99E1", "14Y1", "49L1", "66M1", "73F1", "70K1", "36F5", "97H1", "93E1", "68P1", "43F1", "48G1", "75K1", "62U1", "86B9", "65F1", "27L1", "70L1", "63B8", "78L1", "11Z1", "68C1", "18D1", "15L1", "99C1", "49E1", "84E1", "69E1", "38A1", "48D1", "68S1", "81E1", "84K1", "63B6", "24T1", "95A1", "86B4", "34M1", "84L1", "24V1", "14M1", "36H1", "15B1", "69F1", "47E1", "38H1", "88D1", "28E1", "60C2", "63B9", "75Y1", "21D1", "35H1", "68F1", "86B5", "15H1", "36B5", "83X1", "17B7", "12V1", "86B8", "95E1", "63B2", "74F1", "86C1", "48K1", "89M1", "85D1", "71C4", "34E1", "97C1", "88E1", "81F1", "60B5", "84M1", "92H1", "28L1", "34H1", "38X1", "82L1", "61E1", "82F1", "62P1", "93F1", "65B1", "93L1", "95B1", "15P1", "77G1", "28M1", "35B1", "68G1", "36C2", "68D1", "69K1", "14L1", "36M3", "24X1", "24Z1", "86A1", "88C1", "15E1", "77E1", "83E1", "47L1", "25T1", "89C1", "71C3", "49D1", "36L6", "48F1", "36B6", "34P1", "84D1", "15C1", "38M1", "85F1", "77K1", "86B3", "74B1", "78H1", "89G1", "64A2", "15K1", "85B1", "49K1", "21H1", "73C1", "47U1", "65E1", "18C1", "69D1", "63B1", "95G1", "19L1", "20G1", "76D1", "29A1", "68T1", "75L1", "12L1", "89L1", "37C1", "27B1", "19C1", "11H1", "81X1", "70B1", "11V1", "43G1", "22A1", "83C1", "75C1", "79C1", "22F1", "92F1", "81G1", "81T1", "28H1", "66N1", "71B1", "18H1", "76P1", "26F1", "81U1", "34N1", "64F1", "76N1", "24S1", "26P1", "63B4", "35T1", "36N1", "47F1", "81L1", "61G1", "77M1", "34G1", "26G1", "97F1", "62H1", "28F1", "62T1", "93G1", "73D1", "65A1", "47P1", "74P1", "82N1", "20E1", "36D1", "60B1", "49M1", "37H1", "37M1", "38D1", "84F1", "88F1", "36B2", "65C1", "92M1", "86B6", "75H1", "38L1", "20C1", "97E1", "85E1", "38N1", "26K1", "89B1", "99F1", "28B1", "34L1", "86B2", "66F1", "77L1", "27Y1", "68H1", "37D1", "92L1", "82K1", "99A1", "69L1", "76M1", "90B4", "48B1", "95D1", "20H1", "64H1", "79Z1", "92G1", "23G1", "21G1", "37G1", "35K1", "81H1", "83Z1", "76T1", "36F1", "36B4", "14B9", "47K1", "20K1", "62M1", "84H1", "62F1", "74A1", "18A1", "73H1", "37N1", "79N1", "61D1", "11P1", "15G1", "47N1", "19K1", "71C2", "81S1", "11M1", "60B7", "60B8", "62G1", "71A1", "24P1", "69A1", "38C1", "49N1", "21C1", "84G1", "37B1", "72A1", "88K1", "88G1", "83V1", "78C1", "73K1", "78K1", "73E189D1", "67A1", "27X1", "62A1", "18K1", "70F1", "36K5", "19B1", "49H1", "66S1", "12P1"};
|
"43B1", "68L1", "70G1", "36M1", "81N1", "90K1", "17B1", "64E1", "99D1", "60B2", "74L1", "60C1", "68M1", "63B7", "34B1", "69M1", "24B1", "15M1", "83Y1", "48C1", "95H1", "79X1", "17B6", "36E1", "38K1", "25N1", "25U1", "61B1", "36C1", "36B3", "38F1", "99G1", "69N1", "97D1", "92T1", "92B1", "88B1", "97G1", "14U1", "63A1", "26N1", "19D1", "93C1", "73B1", "84B1", "81K1", "18L1", "64D1", "35M1", "61N1", "83P1", "15S1", "82B1", "92U1", "43D1", "22L1", "63B5", "64G1", "27N1", "14X1", "62C1", "81D1", "38G1", "19F1", "34K1", "49P1", "89H1", "14T1", "19M1", "78D1", "76A1", "66K1", "66C1", "71C1", "37K1", "19G1", "15F1", "85C1", "49B1", "21B1", "89F1", "23M1", "66L1", "90B5", "93M1", "14P1", "77N1", "36B8", "86B1", "12U1", "63B3", "21L1", "36G5", "65G1", "82E1", "61H1", "65H1", "84A1", "23F1", "95C1", "99K1", "49G1", "92D1", "36K3", "92N1", "82X1", "83M1", "11N1", "14K1", "19H1", "93H1", "60A1", "79A1", "20D1", "90D1", "81C1", "66P1", "36K1", "92V1", "18B1", "37P1", "22Y1", "23H1", "26D1", "66G1", "78F1", "49C1", "26H1", "38P1", "47T1", "74H1", "63P1", "47D1", "15D1", "23D1", "68E1", "20B1", "49F1", "43K1", "65K1", "27Z1", "92S1", "79H1", "21E1", "35Y1", "14S1", "75E1", "24Y1", "12T1", "27P1", "77B1", "88H1", "60B3", "23P1", "61F1", "99H1", "23K1", "59A3", "26C1", "81B1", "74E1", "66B1", "22S1", "92P1", "93B1", "69B1", "81P1", "12H1", "62K1", "35A1", "77C1", "27V1", "68N1", "12D1", "64K1", "41A1", "12Z1", "76C1", "38B1", "78G1", "74K1", "69H1", "94A1", "61K1", "86B7", "82G1", "14N1", "82M1", "76E1", "18E1", "61C1", "15N1", "90A1", "77F1", "34D1", "47B1", "62S1", "43E1", "81M1", "92X1", "75B1", "34F1", "70H1", "62B1", "26B1", "60B4", "61A1", "12B1", "90T1", "92E1", "34C1", "47G1", "97B1", "25S1", "70E1", "93Y1", "47S1", "37F1", "28N1", "11K1", "38E1", "78M1", "74C1", "12S1", "75S1", "37A1", "28D1", "65L1", "22B1", "99B1", "74G1", "79K1", "76K1", "76H1", "23B1", "15R1", "36B1", "74D1", "62L1", "37E1", "78E1", "89K1", "26M1", "25F1", "48H1", "79D1", "43H1", "76F1", "36L1", "43L1", "21K1", "88L1", "27S1", "92K1", "77D1", "19N1", "66H1", "36H5", "62N1", "18G1", "75D1", "37L1", "68K1", "28C1", "26E1", "35N1", "85H1", "62D1", "27U1", "19E1", "99E1", "14Y1", "49L1", "66M1", "73F1", "70K1", "36F5", "97H1", "93E1", "68P1", "43F1", "48G1", "75K1", "62U1", "86B9", "65F1", "27L1", "70L1", "63B8", "78L1", "11Z1", "68C1", "18D1", "15L1", "99C1", "49E1", "84E1", "69E1", "38A1", "48D1", "68S1", "81E1", "84K1", "63B6", "24T1", "95A1", "86B4", "34M1", "84L1", "24V1", "14M1", "36H1", "15B1", "69F1", "47E1", "38H1", "88D1", "28E1", "60C2", "63B9", "75Y1", "21D1", "35H1", "68F1", "86B5", "15H1", "36B5", "83X1", "17B7", "12V1", "86B8", "95E1", "63B2", "74F1", "86C1", "48K1", "89M1", "85D1", "71C4", "34E1", "97C1", "88E1", "81F1", "60B5", "84M1", "92H1", "28L1", "34H1", "38X1", "82L1", "61E1", "82F1", "62P1", "93F1", "65B1", "93L1", "95B1", "15P1", "77G1", "28M1", "35B1", "68G1", "36C2", "68D1", "69K1", "14L1", "36M3", "24X1", "24Z1", "86A1", "88C1", "15E1", "77E1", "83E1", "47L1", "25T1", "89C1", "71C3", "49D1", "36L6", "48F1", "36B6", "34P1", "84D1", "15C1", "38M1", "85F1", "77K1", "86B3", "74B1", "78H1", "89G1", "64A2", "15K1", "85B1", "49K1", "21H1", "73C1", "47U1", "65E1", "18C1", "69D1", "63B1", "95G1", "19L1", "20G1", "76D1", "29A1", "68T1", "75L1", "12L1", "89L1", "37C1", "27B1", "19C1", "11H1", "81X1", "70B1", "11V1", "43G1", "22A1", "83C1", "75C1", "79C1", "22F1", "92F1", "81G1", "81T1", "28H1", "66N1", "71B1", "18H1", "76P1", "26F1", "81U1", "34N1", "64F1", "76N1", "24S1", "26P1", "63B4", "35T1", "36N1", "47F1", "81L1", "61G1", "77M1", "34G1", "26G1", "97F1", "62H1", "28F1", "62T1", "93G1", "73D1", "65A1", "47P1", "74P1", "82N1", "20E1", "36D1", "60B1", "49M1", "37H1", "37M1", "38D1", "84F1", "88F1", "36B2", "65C1", "92M1", "86B6", "75H1", "38L1", "20C1", "97E1", "85E1", "38N1", "26K1", "89B1", "99F1", "28B1", "34L1", "86B2", "66F1", "77L1", "27Y1", "68H1", "37D1", "92L1", "82K1", "99A1", "69L1", "76M1", "90B4", "48B1", "95D1", "20H1", "64H1", "79Z1", "92G1", "23G1", "21G1", "37G1", "35K1", "81H1", "83Z1", "76T1", "36F1", "36B4", "14B9", "47K1", "20K1", "62M1", "84H1", "62F1", "74A1", "18A1", "73H1", "37N1", "79N1", "61D1", "11P1", "15G1", "47N1", "19K1", "71C2", "81S1", "11M1", "60B7", "60B8", "62G1", "71A1", "24P1", "69A1", "38C1", "49N1", "21C1", "84G1", "37B1", "72A1", "88K1", "88G1", "83V1", "78C1", "73K1", "78K1", "73E189D1", "67A1", "27X1", "62A1", "18K1", "70F1", "36K5", "19B1", "49H1", "66S1", "12P1"};
|
||||||
ALPRChecker alprChecker;
|
ALPRChecker alprChecker;
|
||||||
std::vector<std::string> ValidVNCarList = { "94H", "49F", "93A", "20F", "81H", "95R", "38R", "29F", "81F", "28G", "19A", "85B", "2", "43H", "51L", "28C", "21A", "51D", "50F", "24H", "93R", "92H", "71G", "75H", "86G", "30L", "79A", "82B", "79H", "78C", "61E", "70A", "90C", "72G", "34B", "17E", "18E", "78A", "37F", "51E", "71A", "28F", "47E", "83D", "81B", "84C", "71H", "76G", "92E", "36A", "69R", "30M", "27R", "71D", "19B", "34E", "38K", "88G", "68G", "30E", "68E", "25F", "74D", "98K", "89H", "36R", "84D", "61F", "49G", "25H", "17F", "14R", "36H", "47G", "90A", "68A", "83C", "26B", "15B", "61C", "15K", "47H", "78E", "75D", "15C", "63E", "34C", "36F", "38G", "15E", "93F", "22G", "60B", "94D", "62R", "24D", "11R", "12A", "76A", "94C", "97R", "24E", "26A", "15F", "72A", "49H", "62D", "98C", "71B", "61A", "12C", "27A", "78R", "51M", "69E", "76D", "78F", "49R", "81A", "64F", "29D", "18A", "19F", "21E", "92A", "65G", "86E", "62G", "61K", "47A", "23R", "14F", "95D", "36B", "74R", "11H", "24C", "11G", "66D", "63A", "43R", "70F", "86B", "61G", "47M", "67C", "37D", "43G", "14H", "90F", "51G", "86A", "11E", "29K", "85C", "83F", "24B", "98R", "19E", "61B", "90D", "82G", "14K", "74G", "72D", "85A", "19C", "37G", "98E", "74F", "28H", "90E", "89D", "35R", "97H", "83H", "95A", "20C", "65E", "15R", "73C", "37A", "38E", "77G", "94B", "17A", "75R", "98F", "65R", "76R", "20B", "24G", "25B", "73G", "62F", "29G", "77C", "22H", "14D", "23F", "93C", "19R", "15D", "47R", "79D", "60G", "77A", "82C", "63G", "21H", "81E", "25D", "12D", "37R", "36K", "84F", "98G", "28B", "51N", "18F", "50R", "74C", "35C", "30G", "64A", "95F", "18C", "99G", "99B", "37C", "76H", "60K", "67R", "75A", "83R", "28E", "65F", "17D", "92G", "23C", "60R", "90R", "38A", "43D", "50H", "43C", "77H", "47B", "89F", "82F", "65H", "89E", "62C", "24R", "26G", "84E", "17C", "65B", "34A", "12B", "64R", "29H", "71C", "88D", "79F", "76C", "98A", "69H", "22B", "29A", "72R", "67H", "48C", "22D", "60C", "35H", "38H", "63P", "70D", "49D", "18H", "89A", "72E", "92D", "26H", "73R", "85G", "20E", "98H", "69C", "18B", "73B", "22E", "34G", "30K", "20D", "50A", "34D", "15H", "34H", "71E", "62E", "64C", "51R", "82D", "99E", "70R", "18D", "92F", "94R", "24A", "85H", "11C", "73E", "95E", "86C", "94F", "86R", "37K", "23B", "20H", "73D", "95H", "35A", "89B", "82H", "67F", "70H", "97F", "29E", "97A", "51K", "68D", "37B", "82E", "18R", "86H", "35B", "43E", "35F", "95B", "70E", "21D", "27F", "36E", "63D", "68C", "50E", "36G", "75F", "21G", "29B", "93B", "22A", "18G", "43F", "93G", "62A", "83B", "28D", "75C", "22C", "21R", "25E", "23G", "97C", "75E", "79E", "19H", "47K", "65C", "35E", "20R", "68B", "89R", "67A", "75G", "81R", "78B", "77D", "78G", "20K", "36D", "66C", "38F", "27G", "19D", "67B", "84G", "22F", "61D", "20G", "48A", "76F", "48H", "92B", "85R", "26C", "65A", "70B", "38D", "14C", "66A", "73A", "49C", "74E", "68R", "66B", "74A", "49E", "17B", "69D", "51C", "85F", "21F", "99C", "17G", "72H", "94E", "51F", "92R", "60H", "21B", "93D", "19G", "86F", "51A", "66R", "72B", "26D", "64E", "93H", "12H", "97E", "60E", "82A", "60A", "83E", "27D", "64B", "11B", "11D", "76B", "95G", "14A", "61R", "21C", "30F", "23H", "89C", "97G", "62B", "63R", "88B", "98B", "90B", "67G", "69F", "73H", "20A", "72C", "65D", "68H", "51H", "79G", "70C", "90G", "66G", "83A", "77F", "63B", "64G", "25A", "88E", "68F", "99D", "26E", "94A", "48F", "34R", "61H", "90H", "74B", "14G", "12F", "15A", "27E", "69A", "35D", "12E", "85E", "25C", "29M", "89G", "17R", "78D", "84R", "95C", "15G", "28R", "99A", "69G", "48D", "97D", "27C", "78H", "14E", "79R", "73F", "88A", "48E", "48B", "64H", "99R", "14B", "77R", "75B", "88F", "84B", "11A", "67E", "12R", "50M", "11F", "79C", "49A", "43A", "88R", "77E", "48G", "51B", "81D", "74H", "93E", "37H", "88C", "71F", "94G", "38C", "29C", "43B", "30H", "81G", "28A", "26R", "66H", "66E", "17H", "79B", "49B", "63C", "98D", "81C", "69B", "63H", "85D", "26F", "22R", "83G", "37E", "12G", "77B", "35G", "62H", "60D", "60F", "99H", "70G", "76E", "84A", "72F", "25R", "27B", "30A", "47F", "34F", "97B", "23E", "36C", "66F", "48R", "92C", "71R", "23A", "50G", "47C", "82R", "63F", "84H", "38B", "47D", "67D", "25G", "86D", "88H", "64D", "24F", "23D", "99F" };
|
std::vector<std::string> ValidVNCarList = { "94H", "49F", "93A", "20F", "81H", "95R", "38R", "29F", "81F", "28G", "19A", "85B", "2", "43H", "51L", "28C", "21A", "51D", "50F", "24H", "93R", "92H", "71G", "75H", "86G", "30L", "79A", "82B", "79H", "78C", "61E", "70A", "90C", "72G", "34B", "17E", "18E", "78A", "37F", "51E", "71A", "28F", "47E", "83D", "81B", "84C", "71H", "76G", "92E", "36A", "69R", "30M", "27R", "71D", "19B", "34E", "38K", "88G", "68G", "30E", "68E", "25F", "74D", "98K", "89H", "36R", "84D", "61F", "49G", "25H", "17F", "14R", "36H", "47G", "90A", "68A", "83C", "26B", "15B", "61C", "15K", "47H", "78E", "75D", "15C", "63E", "34C", "36F", "38G", "15E", "93F", "22G", "60B", "94D", "62R", "24D", "11R", "12A", "76A", "94C", "97R", "24E", "26A", "15F", "72A", "49H", "62D", "98C", "71B", "61A", "12C", "27A", "78R", "51M", "69E", "76D", "78F", "49R", "81A", "64F", "29D", "18A", "19F", "21E", "92A", "65G", "86E", "62G", "61K", "47A", "23R", "14F", "95D", "36B", "74R", "11H", "24C", "11G", "66D", "63A", "43R", "70F", "86B", "61G", "47M", "67C", "37D", "43G", "14H", "90F", "51G", "86A", "11E", "29K", "85C", "83F", "24B", "98R", "19E", "61B", "90D", "82G", "14K", "74G", "72D", "85A", "19C", "37G", "98E", "74F", "28H", "90E", "89D", "35R", "97H", "83H", "95A", "20C", "65E", "15R", "73C", "37A", "38E", "77G", "94B", "17A", "75R", "98F", "65R", "76R", "20B", "24G", "25B", "73G", "62F", "29G", "77C", "22H", "14D", "23F", "93C", "19R", "15D", "47R", "79D", "60G", "77A", "82C", "63G", "21H", "81E", "25D", "12D", "37R", "36K", "84F", "98G", "28B", "51N", "18F", "50R", "74C", "35C", "30G", "64A", "95F", "18C", "99G", "99B", "37C", "76H", "60K", "67R", "75A", "83R", "28E", "65F", "17D", "92G", "23C", "60R", "90R", "38A", "43D", "50H", "43C", "77H", "47B", "89F", "82F", "65H", "89E", "62C", "24R", "26G", "84E", "17C", "65B", "34A", "12B", "64R", "29H", "71C", "88D", "79F", "76C", "98A", "69H", "22B", "29A", "72R", "67H", "48C", "22D", "60C", "35H", "38H", "63P", "70D", "49D", "18H", "89A", "72E", "92D", "26H", "73R", "85G", "20E", "98H", "69C", "18B", "73B", "22E", "34G", "30K", "20D", "50A", "34D", "15H", "34H", "71E", "62E", "64C", "51R", "82D", "99E", "70R", "18D", "92F", "94R", "24A", "85H", "11C", "73E", "95E", "86C", "94F", "86R", "37K", "23B", "20H", "73D", "95H", "35A", "89B", "82H", "67F", "70H", "97F", "29E", "97A", "51K", "68D", "37B", "82E", "18R", "86H", "35B", "43E", "35F", "95B", "70E", "21D", "27F", "36E", "63D", "68C", "50E", "36G", "75F", "21G", "29B", "93B", "22A", "18G", "43F", "93G", "62A", "83B", "28D", "75C", "22C", "21R", "25E", "23G", "97C", "75E", "79E", "19H", "47K", "65C", "35E", "20R", "68B", "89R", "67A", "75G", "81R", "78B", "77D", "78G", "20K", "36D", "66C", "38F", "27G", "19D", "67B", "84G", "22F", "61D", "20G", "48A", "76F", "48H", "92B", "85R", "26C", "65A", "70B", "38D", "14C", "66A", "73A", "49C", "74E", "68R", "66B", "74A", "49E", "17B", "69D", "51C", "85F", "21F", "99C", "17G", "72H", "94E", "51F", "92R", "60H", "21B", "93D", "19G", "86F", "51A", "66R", "72B", "26D", "64E", "93H", "12H", "97E", "60E", "82A", "60A", "83E", "27D", "64B", "11B", "11D", "76B", "95G", "14A", "61R", "21C", "30F", "23H", "89C", "97G", "62B", "63R", "88B", "98B", "90B", "67G", "69F", "73H", "20A", "72C", "65D", "68H", "51H", "79G", "70C", "90G", "66G", "83A", "77F", "63B", "64G", "25A", "88E", "68F", "99D", "26E", "94A", "48F", "34R", "61H", "90H", "74B", "14G", "12F", "15A", "27E", "69A", "35D", "12E", "85E", "25C", "29M", "89G", "17R", "78D", "84R", "95C", "15G", "28R", "99A", "69G", "48D", "97D", "27C", "78H", "14E", "79R", "73F", "88A", "48E", "48B", "64H", "99R", "14B", "77R", "75B", "88F", "84B", "11A", "67E", "12R", "50M", "11F", "79C", "49A", "43A", "88R", "77E", "48G", "51B", "81D", "74H", "93E", "37H", "88C", "71F", "94G", "38C", "29C", "43B", "30H", "81G", "28A", "26R", "66H", "66E", "17H", "79B", "49B", "63C", "98D", "81C", "69B", "63H", "85D", "26F", "22R", "83G", "37E", "12G", "77B", "35G", "62H", "60D", "60F", "99H", "70G", "76E", "84A", "72F", "25R", "27B", "30A", "47F", "34F", "97B", "23E", "36C", "66F", "48R", "92C", "71R", "23A", "50G", "47C", "82R", "63F", "84H", "38B", "47D", "67D", "25G", "86D", "88H", "64D", "24F", "23D", "99F" };
|
||||||
|
std::mutex _ocrMutex; // Fine-grained lock for PaddleOCR (not thread-safe)
|
||||||
std::unique_ptr<PaddleOCR::PPOCR> ppocr = std::make_unique<PaddleOCR::PPOCR>();
|
std::unique_ptr<PaddleOCR::PPOCR> ppocr = std::make_unique<PaddleOCR::PPOCR>();
|
||||||
[[nodiscard]] std::string AnalyseLicensePlateText(const std::string& ocrText);
|
[[nodiscard]] std::string AnalyseLicensePlateText(const std::string& ocrText);
|
||||||
[[nodiscard]] char convertDigitToLetter(char c);
|
[[nodiscard]] char convertDigitToLetter(char c);
|
||||||
|
|||||||
@@ -863,7 +863,8 @@ namespace ANSCENTER {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::vector<Object> ANSALPR_OD::RunInferenceSingleFrame(const cv::Mat& input, const std::string& cameraId) {
|
std::vector<Object> ANSALPR_OD::RunInferenceSingleFrame(const cv::Mat& input, const std::string& cameraId) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex here — sub-components (detectors, alprChecker) have their own locks.
|
||||||
|
// LabVIEW semaphore controls concurrency at the caller level.
|
||||||
|
|
||||||
// Early validation
|
// Early validation
|
||||||
if (!_licenseValid) {
|
if (!_licenseValid) {
|
||||||
@@ -916,18 +917,19 @@ namespace ANSCENTER {
|
|||||||
cv::Mat draw = input.clone();
|
cv::Mat draw = input.clone();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
_detectedArea = cv::Rect(0, 0, frameWidth, frameHeight);
|
// Use local variable instead of shared _detectedArea for thread safety
|
||||||
|
cv::Rect detectedArea(0, 0, frameWidth, frameHeight);
|
||||||
|
|
||||||
if (_detectedArea.width <= 50 || _detectedArea.height <= 50) {
|
if (detectedArea.width <= 50 || detectedArea.height <= 50) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef FNS_DEBUG
|
#ifdef FNS_DEBUG
|
||||||
cv::rectangle(draw, _detectedArea, cv::Scalar(0, 0, 255), 2);
|
cv::rectangle(draw, detectedArea, cv::Scalar(0, 0, 255), 2);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Run license plate detection
|
// Run license plate detection
|
||||||
cv::Mat activeFrame = frame(_detectedArea);
|
cv::Mat activeFrame = frame(detectedArea);
|
||||||
std::vector<Object> lprOutput = _lpDetector->RunInference(activeFrame, cameraId);
|
std::vector<Object> lprOutput = _lpDetector->RunInference(activeFrame, cameraId);
|
||||||
|
|
||||||
if (lprOutput.empty()) {
|
if (lprOutput.empty()) {
|
||||||
@@ -1010,7 +1012,7 @@ namespace ANSCENTER {
|
|||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
std::string ANSALPR_OD::DetectLicensePlateString(const cv::Mat& lprROI, const std::string& cameraId) {
|
std::string ANSALPR_OD::DetectLicensePlateString(const cv::Mat& lprROI, const std::string& cameraId) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — _ocrDetector has its own m_inferenceMutex
|
||||||
try {
|
try {
|
||||||
// convert lprROI to greyscale if it is not already
|
// convert lprROI to greyscale if it is not already
|
||||||
if (lprROI.empty()) {
|
if (lprROI.empty()) {
|
||||||
@@ -1277,8 +1279,7 @@ namespace ANSCENTER {
|
|||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — _lpColourDetector has its own m_inferenceMutex
|
||||||
|
|
||||||
try {
|
try {
|
||||||
std::vector<Object> colourOutputs = _lpColourDetector->RunInference(lprROI, cameraId);
|
std::vector<Object> colourOutputs = _lpColourDetector->RunInference(lprROI, cameraId);
|
||||||
|
|
||||||
@@ -1310,8 +1311,9 @@ namespace ANSCENTER {
|
|||||||
return DetectLPColourDetector(lprROI, cameraId);
|
return DetectLPColourDetector(lprROI, cameraId);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check cache first (no GPU work needed)
|
// Check cache first (fine-grained lock, no GPU work)
|
||||||
{
|
{
|
||||||
|
std::lock_guard<std::mutex> cacheLock(_colourCacheMutex);
|
||||||
auto it = _colourCache.find(plateText);
|
auto it = _colourCache.find(plateText);
|
||||||
if (it != _colourCache.end()) {
|
if (it != _colourCache.end()) {
|
||||||
it->second.hitCount++;
|
it->second.hitCount++;
|
||||||
@@ -1319,11 +1321,12 @@ namespace ANSCENTER {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cache miss — run the actual classifier
|
// Cache miss — run the actual classifier (no lock held during GPU inference)
|
||||||
std::string colour = DetectLPColourDetector(lprROI, cameraId);
|
std::string colour = DetectLPColourDetector(lprROI, cameraId);
|
||||||
|
|
||||||
// Store in cache
|
// Store in cache (fine-grained lock)
|
||||||
if (!colour.empty()) {
|
if (!colour.empty()) {
|
||||||
|
std::lock_guard<std::mutex> cacheLock(_colourCacheMutex);
|
||||||
if (_colourCache.size() >= COLOUR_CACHE_MAX_SIZE) {
|
if (_colourCache.size() >= COLOUR_CACHE_MAX_SIZE) {
|
||||||
_colourCache.clear();
|
_colourCache.clear();
|
||||||
}
|
}
|
||||||
@@ -1334,13 +1337,14 @@ namespace ANSCENTER {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool ANSALPR_OD::Inference(const cv::Mat& input, std::string& lprResult) {
|
bool ANSALPR_OD::Inference(const cv::Mat& input, std::string& lprResult) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — delegates to Inference(input, lprResult, cameraId) which is also lock-free
|
||||||
if (input.empty()) return false;
|
if (input.empty()) return false;
|
||||||
if ((input.cols < 5) || (input.rows < 5)) return false;
|
if ((input.cols < 5) || (input.rows < 5)) return false;
|
||||||
return Inference(input, lprResult, "CustomCam");
|
return Inference(input, lprResult, "CustomCam");
|
||||||
}
|
}
|
||||||
bool ANSALPR_OD::Inference(const cv::Mat& input, std::string& lprResult, const std::string& cameraId) {
|
bool ANSALPR_OD::Inference(const cv::Mat& input, std::string& lprResult, const std::string& cameraId) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — sub-components have their own fine-grained locks.
|
||||||
|
// LabVIEW semaphore controls concurrency at the caller level.
|
||||||
|
|
||||||
// Early validation
|
// Early validation
|
||||||
if (!_licenseValid) {
|
if (!_licenseValid) {
|
||||||
@@ -1518,14 +1522,14 @@ namespace ANSCENTER {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
bool ANSALPR_OD::Inference(const cv::Mat& input, const std::vector<cv::Rect> & Bbox, std::string& lprResult) {
|
bool ANSALPR_OD::Inference(const cv::Mat& input, const std::vector<cv::Rect> & Bbox, std::string& lprResult) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — delegates to Inference(input, Bbox, lprResult, cameraId)
|
||||||
if (input.empty()) return false;
|
if (input.empty()) return false;
|
||||||
if ((input.cols < 5) || (input.rows < 5)) return false;
|
if ((input.cols < 5) || (input.rows < 5)) return false;
|
||||||
return Inference(input, Bbox, lprResult, "CustomCam");
|
return Inference(input, Bbox, lprResult, "CustomCam");
|
||||||
}
|
}
|
||||||
bool ANSALPR_OD::Inference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox,std::string& lprResult, const std::string& cameraId)
|
bool ANSALPR_OD::Inference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox,std::string& lprResult, const std::string& cameraId)
|
||||||
{
|
{
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — sub-components have their own fine-grained locks.
|
||||||
|
|
||||||
// Early validation
|
// Early validation
|
||||||
if (!_licenseValid) {
|
if (!_licenseValid) {
|
||||||
@@ -2177,12 +2181,10 @@ namespace ANSCENTER {
|
|||||||
cv::Mat unsharp;
|
cv::Mat unsharp;
|
||||||
cv::addWeighted(denoised, 1.8, blurred, -0.8, 0, unsharp);
|
cv::addWeighted(denoised, 1.8, blurred, -0.8, 0, unsharp);
|
||||||
|
|
||||||
// Step 5: CLAHE contrast enhancement
|
// Step 5: CLAHE contrast enhancement (thread-local for thread safety)
|
||||||
if (!_clahe) {
|
thread_local cv::Ptr<cv::CLAHE> tl_clahe = cv::createCLAHE(4.0, cv::Size(8, 8));
|
||||||
_clahe = cv::createCLAHE(4.0, cv::Size(8, 8));
|
|
||||||
}
|
|
||||||
cv::Mat contrastEnhanced;
|
cv::Mat contrastEnhanced;
|
||||||
_clahe->apply(unsharp, contrastEnhanced);
|
tl_clahe->apply(unsharp, contrastEnhanced);
|
||||||
|
|
||||||
// Step 6: Laplacian edge sharpening
|
// Step 6: Laplacian edge sharpening
|
||||||
cv::Mat lap;
|
cv::Mat lap;
|
||||||
@@ -2718,6 +2720,7 @@ namespace ANSCENTER {
|
|||||||
|
|
||||||
void ANSALPR_OD::ensureUniquePlateText(std::vector<Object>& results, const std::string& cameraId)
|
void ANSALPR_OD::ensureUniquePlateText(std::vector<Object>& results, const std::string& cameraId)
|
||||||
{
|
{
|
||||||
|
std::lock_guard<std::mutex> plateLock(_plateIdentitiesMutex);
|
||||||
auto& identities = _plateIdentities[cameraId];
|
auto& identities = _plateIdentities[cameraId];
|
||||||
|
|
||||||
// Option B: Auto-detect mode by counting detections.
|
// Option B: Auto-detect mode by counting detections.
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ namespace ANSCENTER
|
|||||||
ANSCENTER::ModelConfig _lpdmodelConfig;
|
ANSCENTER::ModelConfig _lpdmodelConfig;
|
||||||
ANSCENTER::ModelConfig _ocrModelConfig;
|
ANSCENTER::ModelConfig _ocrModelConfig;
|
||||||
ANSCENTER::ModelConfig _lpColourModelConfig;
|
ANSCENTER::ModelConfig _lpColourModelConfig;
|
||||||
cv::Ptr<cv::CLAHE> _clahe; // Reusable CLAHE instance
|
// _clahe moved to thread-local in enhanceForOCR() for thread safety
|
||||||
ANSCENTER::NV12PreprocessHelper _nv12Helper; // NV12 crop for high-res plate OCR
|
ANSCENTER::NV12PreprocessHelper _nv12Helper; // NV12 crop for high-res plate OCR
|
||||||
|
|
||||||
std::string _lpdLabels;
|
std::string _lpdLabels;
|
||||||
@@ -147,6 +147,7 @@ namespace ANSCENTER
|
|||||||
int framesSinceLastSeen = 0;
|
int framesSinceLastSeen = 0;
|
||||||
};
|
};
|
||||||
// cameraId → list of tracked plate identities
|
// cameraId → list of tracked plate identities
|
||||||
|
std::mutex _plateIdentitiesMutex; // Fine-grained lock for plate identity tracking
|
||||||
std::unordered_map<std::string, std::vector<SpatialPlateIdentity>> _plateIdentities;
|
std::unordered_map<std::string, std::vector<SpatialPlateIdentity>> _plateIdentities;
|
||||||
static constexpr float PLATE_SPATIAL_MATCH_THRESHOLD = 0.3f; // IoU threshold for same plate
|
static constexpr float PLATE_SPATIAL_MATCH_THRESHOLD = 0.3f; // IoU threshold for same plate
|
||||||
void ensureUniquePlateText(std::vector<Object>& results, const std::string& cameraId);
|
void ensureUniquePlateText(std::vector<Object>& results, const std::string& cameraId);
|
||||||
@@ -176,6 +177,7 @@ namespace ANSCENTER
|
|||||||
std::string colour;
|
std::string colour;
|
||||||
int hitCount = 0;
|
int hitCount = 0;
|
||||||
};
|
};
|
||||||
|
std::mutex _colourCacheMutex; // Fine-grained lock for colour cache only
|
||||||
std::unordered_map<std::string, ColourCacheEntry> _colourCache;
|
std::unordered_map<std::string, ColourCacheEntry> _colourCache;
|
||||||
static constexpr size_t COLOUR_CACHE_MAX_SIZE = 200;
|
static constexpr size_t COLOUR_CACHE_MAX_SIZE = 200;
|
||||||
|
|
||||||
|
|||||||
@@ -118,7 +118,7 @@ namespace ANSCENTER {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input, const std::string& cameraId) {
|
std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input, const std::string& cameraId) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — ppOCR->Predict() / engine has its own internal lock
|
||||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||||
OCRObjects.clear();
|
OCRObjects.clear();
|
||||||
if (!_licenseValid) {
|
if (!_licenseValid) {
|
||||||
@@ -177,7 +177,7 @@ namespace ANSCENTER {
|
|||||||
|
|
||||||
|
|
||||||
std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox) {
|
std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — ppOCR->Predict() / engine has its own internal lock
|
||||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||||
OCRObjects.clear();
|
OCRObjects.clear();
|
||||||
if (!_licenseValid) {
|
if (!_licenseValid) {
|
||||||
@@ -271,7 +271,7 @@ namespace ANSCENTER {
|
|||||||
|
|
||||||
|
|
||||||
std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, const std::string& cameraId) {
|
std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, const std::string& cameraId) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — ppOCR->Predict() / engine has its own internal lock
|
||||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||||
OCRObjects.clear();
|
OCRObjects.clear();
|
||||||
if (!_licenseValid) {
|
if (!_licenseValid) {
|
||||||
|
|||||||
@@ -80,7 +80,7 @@ std::vector<ANSCENTER::OCRObject> ANSONNXOCR::RunInference(const cv::Mat& input)
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<ANSCENTER::OCRObject> ANSONNXOCR::RunInference(const cv::Mat& input, const std::string& cameraId) {
|
std::vector<ANSCENTER::OCRObject> ANSONNXOCR::RunInference(const cv::Mat& input, const std::string& cameraId) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — _engine->ocr() has its own internal lock
|
||||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||||
|
|
||||||
if (!_licenseValid) {
|
if (!_licenseValid) {
|
||||||
@@ -164,7 +164,7 @@ std::vector<ANSCENTER::OCRObject> ANSONNXOCR::RunInference(const cv::Mat& input,
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<ANSCENTER::OCRObject> ANSONNXOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox) {
|
std::vector<ANSCENTER::OCRObject> ANSONNXOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — _engine->ocr() has its own internal lock
|
||||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||||
|
|
||||||
if (!_licenseValid) {
|
if (!_licenseValid) {
|
||||||
@@ -268,7 +268,7 @@ std::vector<ANSCENTER::OCRObject> ANSONNXOCR::RunInference(const cv::Mat& input,
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<ANSCENTER::OCRObject> ANSONNXOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, const std::string& cameraId) {
|
std::vector<ANSCENTER::OCRObject> ANSONNXOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, const std::string& cameraId) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — _engine->ocr() has its own internal lock
|
||||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||||
|
|
||||||
if (!_licenseValid) {
|
if (!_licenseValid) {
|
||||||
@@ -385,7 +385,7 @@ bool ANSONNXOCR::Destroy() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::pair<std::string, float> ANSONNXOCR::RecognizeText(const cv::Mat& croppedImage) {
|
std::pair<std::string, float> ANSONNXOCR::RecognizeText(const cv::Mat& croppedImage) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — _engine->recognizeOnly() has its own internal lock
|
||||||
if (!_isInitialized || !_engine || croppedImage.empty()) return {"", 0.0f};
|
if (!_isInitialized || !_engine || croppedImage.empty()) return {"", 0.0f};
|
||||||
auto result = _engine->recognizeOnly(croppedImage);
|
auto result = _engine->recognizeOnly(croppedImage);
|
||||||
return {result.text, result.score};
|
return {result.text, result.score};
|
||||||
|
|||||||
@@ -90,7 +90,7 @@ std::vector<ANSCENTER::OCRObject> ANSRTOCR::RunInference(const cv::Mat& input) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<ANSCENTER::OCRObject> ANSRTOCR::RunInference(const cv::Mat& input, const std::string& cameraId) {
|
std::vector<ANSCENTER::OCRObject> ANSRTOCR::RunInference(const cv::Mat& input, const std::string& cameraId) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — _engine->ocr() has its own internal lock
|
||||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||||
|
|
||||||
if (!_licenseValid) {
|
if (!_licenseValid) {
|
||||||
@@ -178,7 +178,7 @@ std::vector<ANSCENTER::OCRObject> ANSRTOCR::RunInference(const cv::Mat& input, c
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<ANSCENTER::OCRObject> ANSRTOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox) {
|
std::vector<ANSCENTER::OCRObject> ANSRTOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — _engine->ocr() has its own internal lock
|
||||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||||
|
|
||||||
if (!_licenseValid) {
|
if (!_licenseValid) {
|
||||||
@@ -282,7 +282,7 @@ std::vector<ANSCENTER::OCRObject> ANSRTOCR::RunInference(const cv::Mat& input, c
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<ANSCENTER::OCRObject> ANSRTOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, const std::string& cameraId) {
|
std::vector<ANSCENTER::OCRObject> ANSRTOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, const std::string& cameraId) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — _engine->ocr() has its own internal lock
|
||||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||||
|
|
||||||
if (!_licenseValid) {
|
if (!_licenseValid) {
|
||||||
@@ -379,7 +379,7 @@ std::vector<ANSCENTER::OCRObject> ANSRTOCR::RunInference(const cv::Mat& input, c
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::pair<std::string, float> ANSRTOCR::RecognizeText(const cv::Mat& croppedImage) {
|
std::pair<std::string, float> ANSRTOCR::RecognizeText(const cv::Mat& croppedImage) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — _engine->recognizeOnly() has its own internal lock
|
||||||
if (!_isInitialized || !_engine || croppedImage.empty()) return {"", 0.0f};
|
if (!_isInitialized || !_engine || croppedImage.empty()) return {"", 0.0f};
|
||||||
auto result = _engine->recognizeOnly(croppedImage);
|
auto result = _engine->recognizeOnly(croppedImage);
|
||||||
return {result.text, result.score};
|
return {result.text, result.score};
|
||||||
|
|||||||
@@ -1455,7 +1455,7 @@ namespace ANSCENTER
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::vector<Object> ANSODBase::RunStaticInference(const cv::Mat& input, cv::Rect Bbox, const std::string& camera_id) {
|
std::vector<Object> ANSODBase::RunStaticInference(const cv::Mat& input, cv::Rect Bbox, const std::string& camera_id) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — only uses local variables and virtual RunInference() which has its own engine lock
|
||||||
std::vector<Object> output;
|
std::vector<Object> output;
|
||||||
output.clear();
|
output.clear();
|
||||||
try {
|
try {
|
||||||
@@ -2100,7 +2100,8 @@ namespace ANSCENTER
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::vector<Object> ANSODBase::RunInferenceWithOption(const cv::Mat& input, const std::string& camera_id, const std::string activeROIMode) {
|
std::vector<Object> ANSODBase::RunInferenceWithOption(const cv::Mat& input, const std::string& camera_id, const std::string activeROIMode) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
// No coarse _mutex — sub-components (engines, trackers) have their own locks.
|
||||||
|
// LabVIEW semaphore controls concurrency at the caller level.
|
||||||
try {
|
try {
|
||||||
int mode = 0;
|
int mode = 0;
|
||||||
double confidenceThreshold = 0.35;
|
double confidenceThreshold = 0.35;
|
||||||
@@ -2116,8 +2117,11 @@ namespace ANSCENTER
|
|||||||
if (confidenceThreshold <= 0) confidenceThreshold = 0;
|
if (confidenceThreshold <= 0) confidenceThreshold = 0;
|
||||||
if (confidenceThreshold > 1) confidenceThreshold = 1;
|
if (confidenceThreshold > 1) confidenceThreshold = 1;
|
||||||
|
|
||||||
// Update model configuration with the new parameters
|
// Update model configuration with the new parameters (brief lock for config)
|
||||||
if(confidenceThreshold>0)_modelConfig.detectionScoreThreshold = confidenceThreshold;
|
if (confidenceThreshold > 0) {
|
||||||
|
std::lock_guard<std::recursive_mutex> cfgLock(_mutex);
|
||||||
|
_modelConfig.detectionScoreThreshold = confidenceThreshold;
|
||||||
|
}
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
case 0: // Normal mode
|
case 0: // Normal mode
|
||||||
return RunInference(input, camera_id); //RunInference
|
return RunInference(input, camera_id); //RunInference
|
||||||
|
|||||||
@@ -275,6 +275,26 @@ namespace ANSCENTER {
|
|||||||
gpuData->gpuIndex == inferenceGpu;
|
gpuData->gpuIndex == inferenceGpu;
|
||||||
const bool useZeroCopy = isCudaDevice && gpuMatch;
|
const bool useZeroCopy = isCudaDevice && gpuMatch;
|
||||||
|
|
||||||
|
// --- Debug: log pointer state before reading ---
|
||||||
|
{
|
||||||
|
char _nv12_dbg[512];
|
||||||
|
snprintf(_nv12_dbg, sizeof(_nv12_dbg),
|
||||||
|
"[NV12Helper] tryNV12: gpuData=%p yPlane=%p uvPlane=%p isCuda=%d "
|
||||||
|
"gpuIdx=%d infGpu=%d gpuMatch=%d zeroCopy=%d "
|
||||||
|
"gpuCacheY=%p gpuCacheUV=%p gpuCacheValid=%d refcount=%d %dx%d\n",
|
||||||
|
(void*)gpuData, (void*)gpuData->yPlane, (void*)gpuData->uvPlane,
|
||||||
|
(int)isCudaDevice, gpuData->gpuIndex, inferenceGpu,
|
||||||
|
(int)gpuMatch, (int)useZeroCopy,
|
||||||
|
gpuData->gpuCacheY, gpuData->gpuCacheUV,
|
||||||
|
(int)gpuData->gpuCacheValid,
|
||||||
|
gpuData->refcount.load(),
|
||||||
|
frameW, frameH);
|
||||||
|
#ifdef _WIN32
|
||||||
|
OutputDebugStringA(_nv12_dbg);
|
||||||
|
#endif
|
||||||
|
fprintf(stderr, "%s", _nv12_dbg);
|
||||||
|
}
|
||||||
|
|
||||||
// Effective plane pointers — for zero-copy, use CUDA device ptrs;
|
// Effective plane pointers — for zero-copy, use CUDA device ptrs;
|
||||||
// for CPU upload, use the CPU snapshot buffers.
|
// for CPU upload, use the CPU snapshot buffers.
|
||||||
uint8_t* effYPlane;
|
uint8_t* effYPlane;
|
||||||
@@ -283,7 +303,7 @@ namespace ANSCENTER {
|
|||||||
int effUvLinesize;
|
int effUvLinesize;
|
||||||
|
|
||||||
if (useZeroCopy) {
|
if (useZeroCopy) {
|
||||||
// Same GPU: wrap NVDEC device pointers directly
|
// Same GPU: wrap owned CUDA device pointers directly
|
||||||
effYPlane = gpuData->yPlane;
|
effYPlane = gpuData->yPlane;
|
||||||
effUvPlane = gpuData->uvPlane;
|
effUvPlane = gpuData->uvPlane;
|
||||||
effYLinesize = gpuData->yLinesize;
|
effYLinesize = gpuData->yLinesize;
|
||||||
@@ -435,6 +455,18 @@ namespace ANSCENTER {
|
|||||||
gpuResized.create(inputH, inputW, CV_8UC3);
|
gpuResized.create(inputH, inputW, CV_8UC3);
|
||||||
|
|
||||||
cudaStream_t rawStream = cv::cuda::StreamAccessor::getStream(stream);
|
cudaStream_t rawStream = cv::cuda::StreamAccessor::getStream(stream);
|
||||||
|
{
|
||||||
|
char _nv12_dbg2[256];
|
||||||
|
snprintf(_nv12_dbg2, sizeof(_nv12_dbg2),
|
||||||
|
"[NV12Helper] KERNEL LAUNCH: gpuY=%p(%dx%d) gpuUV=%p(%dx%d) -> %dx%d zeroCopy=%d\n",
|
||||||
|
(void*)gpuY.data, gpuY.cols, gpuY.rows,
|
||||||
|
(void*)gpuUV.data, gpuUV.cols, gpuUV.rows,
|
||||||
|
inputW, inputH, (int)useZeroCopy);
|
||||||
|
#ifdef _WIN32
|
||||||
|
OutputDebugStringA(_nv12_dbg2);
|
||||||
|
#endif
|
||||||
|
fprintf(stderr, "%s", _nv12_dbg2);
|
||||||
|
}
|
||||||
launcher(gpuY, gpuUV, gpuResized, frameW, frameH, inputW, inputH, rawStream);
|
launcher(gpuY, gpuUV, gpuResized, frameW, frameH, inputW, inputH, rawStream);
|
||||||
|
|
||||||
stream.waitForCompletion();
|
stream.waitForCompletion();
|
||||||
@@ -945,7 +977,15 @@ namespace ANSCENTER {
|
|||||||
inputW, inputH, frameW, frameH, stream);
|
inputW, inputH, frameW, frameH, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaStreamSynchronize(stream);
|
// Use polling sync instead of cudaStreamSynchronize to avoid
|
||||||
|
// holding nvcuda64 SRW lock continuously (WDDM deadlock prevention).
|
||||||
|
{
|
||||||
|
cudaError_t err = cudaStreamQuery(stream);
|
||||||
|
while (err == cudaErrorNotReady) {
|
||||||
|
Sleep(0);
|
||||||
|
err = cudaStreamQuery(stream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// (No registry lock to release — data kept alive by refcount)
|
// (No registry lock to release — data kept alive by refcount)
|
||||||
|
|
||||||
|
|||||||
@@ -8,6 +8,9 @@
|
|||||||
|
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include <windows.h> // Sleep()
|
||||||
|
#endif
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
|
||||||
// ── Shared YUV→RGB computation ───────────────────────────────────────────
|
// ── Shared YUV→RGB computation ───────────────────────────────────────────
|
||||||
@@ -651,7 +654,24 @@ int ANSGpuNV12ToBGR(
|
|||||||
width * 3, height,
|
width * 3, height,
|
||||||
cudaMemcpyDeviceToHost, t_bufs.stream);
|
cudaMemcpyDeviceToHost, t_bufs.stream);
|
||||||
|
|
||||||
cudaStreamSynchronize(t_bufs.stream);
|
// Use polling sync instead of cudaStreamSynchronize to avoid
|
||||||
|
// holding nvcuda64 SRW lock continuously (WDDM deadlock prevention).
|
||||||
|
// Short Sleep(0) fast path for sub-ms kernels, then Sleep(1) to give
|
||||||
|
// cleanup operations (cuArrayDestroy, cuMemFree) a window to acquire
|
||||||
|
// the exclusive SRW lock.
|
||||||
|
{
|
||||||
|
cudaError_t qerr = cudaStreamQuery(t_bufs.stream);
|
||||||
|
if (qerr == cudaErrorNotReady) {
|
||||||
|
for (int i = 0; i < 10 && qerr == cudaErrorNotReady; ++i) {
|
||||||
|
Sleep(0);
|
||||||
|
qerr = cudaStreamQuery(t_bufs.stream);
|
||||||
|
}
|
||||||
|
while (qerr == cudaErrorNotReady) {
|
||||||
|
Sleep(1);
|
||||||
|
qerr = cudaStreamQuery(t_bufs.stream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Check for errors
|
// Check for errors
|
||||||
cudaError_t err = cudaGetLastError();
|
cudaError_t err = cudaGetLastError();
|
||||||
|
|||||||
@@ -23,6 +23,7 @@
|
|||||||
#include <thread>
|
#include <thread>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
#include <random>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <deque>
|
#include <deque>
|
||||||
#include <set>
|
#include <set>
|
||||||
@@ -751,8 +752,11 @@ static void LogGpuInfo() {
|
|||||||
// Worker thread: reads RTSP frames and runs ALPR inference
|
// Worker thread: reads RTSP frames and runs ALPR inference
|
||||||
// RTSP client and ALPR engine are pre-created on the main thread to avoid
|
// RTSP client and ALPR engine are pre-created on the main thread to avoid
|
||||||
// race conditions in CreateANSRTSPHandle / CreateANSALPRHandle.
|
// race conditions in CreateANSRTSPHandle / CreateANSALPRHandle.
|
||||||
|
// Takes rtspClientPtr (pointer to array slot) + streamGuard mutex so the
|
||||||
|
// CHAOS thread can safely destroy+recreate the stream without use-after-free.
|
||||||
static void ALPRWorkerThread(int taskId,
|
static void ALPRWorkerThread(int taskId,
|
||||||
ANSCENTER::ANSRTSPClient* rtspClient,
|
ANSCENTER::ANSRTSPClient** rtspClientPtr,
|
||||||
|
std::mutex* streamGuard,
|
||||||
ANSCENTER::ANSALPR* alprHandle,
|
ANSCENTER::ANSALPR* alprHandle,
|
||||||
TaskState& state) {
|
TaskState& state) {
|
||||||
char tag[32];
|
char tag[32];
|
||||||
@@ -780,6 +784,23 @@ static void ALPRWorkerThread(int taskId,
|
|||||||
bool hwDecodeLogged = false;
|
bool hwDecodeLogged = false;
|
||||||
|
|
||||||
while (g_running.load()) {
|
while (g_running.load()) {
|
||||||
|
// Lock the stream guard to prevent CHAOS from destroying the client
|
||||||
|
// while we're mid-frame-grab or mid-inference.
|
||||||
|
std::unique_lock<std::mutex> streamLock(*streamGuard);
|
||||||
|
|
||||||
|
// Re-read the client pointer each iteration — CHAOS may have
|
||||||
|
// destroyed+recreated it, so our old pointer could be dangling.
|
||||||
|
ANSCENTER::ANSRTSPClient* rtspClient = *rtspClientPtr;
|
||||||
|
if (rtspClient == nullptr) {
|
||||||
|
streamLock.unlock();
|
||||||
|
emptyFrames++;
|
||||||
|
if (emptyFrames % 100 == 1) {
|
||||||
|
g_log.add(prefix + " Stream destroyed by CHAOS, waiting... (count=" + std::to_string(emptyFrames) + ")");
|
||||||
|
}
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(50));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Read frame from RTSP via ANSCV
|
// Read frame from RTSP via ANSCV
|
||||||
auto grabStart = std::chrono::steady_clock::now();
|
auto grabStart = std::chrono::steady_clock::now();
|
||||||
cv::Mat* framePtr = nullptr;
|
cv::Mat* framePtr = nullptr;
|
||||||
@@ -797,6 +818,7 @@ static void ALPRWorkerThread(int taskId,
|
|||||||
ReconnectRTSP(&rtspClient);
|
ReconnectRTSP(&rtspClient);
|
||||||
emptyFrames = 0;
|
emptyFrames = 0;
|
||||||
}
|
}
|
||||||
|
streamLock.unlock();
|
||||||
if (framePtr) delete framePtr;
|
if (framePtr) delete framePtr;
|
||||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||||
continue;
|
continue;
|
||||||
@@ -829,6 +851,9 @@ static void ALPRWorkerThread(int taskId,
|
|||||||
// matches by cv::Mat* pointer, so `new cv::Mat(*framePtr)` would create
|
// matches by cv::Mat* pointer, so `new cv::Mat(*framePtr)` would create
|
||||||
// a different pointer the registry doesn't know, breaking NV12 zero-copy.
|
// a different pointer the registry doesn't know, breaking NV12 zero-copy.
|
||||||
ANSALPR_RunInferenceComplete_CPP(&alprHandle, &framePtr, cameraId.c_str(), 0, 0, lpnResult, jpegImage);
|
ANSALPR_RunInferenceComplete_CPP(&alprHandle, &framePtr, cameraId.c_str(), 0, 0, lpnResult, jpegImage);
|
||||||
|
|
||||||
|
// Release stream lock — inference is done, CHAOS can now safely destroy.
|
||||||
|
streamLock.unlock();
|
||||||
auto infEnd = std::chrono::steady_clock::now();
|
auto infEnd = std::chrono::steady_clock::now();
|
||||||
double infMs = std::chrono::duration<double, std::milli>(infEnd - infStart).count();
|
double infMs = std::chrono::duration<double, std::milli>(infEnd - infStart).count();
|
||||||
totalInfMs += infMs;
|
totalInfMs += infMs;
|
||||||
@@ -933,19 +958,20 @@ int ANSLPR_MultiGPU_StressTest() {
|
|||||||
|
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("============================================================\n");
|
printf("============================================================\n");
|
||||||
printf(" ANSLPR Multi-GPU Stress Test — 4 Parallel ALPR Tasks\n");
|
printf(" ANSLPR Multi-GPU Stress Test — 5 Parallel ALPR Tasks\n");
|
||||||
|
printf(" (4 cameras, 5 AI tasks — Task 4 shares Stream 2)\n");
|
||||||
printf(" Press ESC to stop\n");
|
printf(" Press ESC to stop\n");
|
||||||
printf(" Log file: %s\n", LOG_FILE_PATH);
|
printf(" Log file: %s\n", LOG_FILE_PATH);
|
||||||
printf("============================================================\n\n");
|
printf("============================================================\n\n");
|
||||||
|
|
||||||
g_log.add("============================================================");
|
g_log.add("============================================================");
|
||||||
g_log.add(" ANSLPR Multi-GPU Stress Test — 4 Parallel ALPR Tasks");
|
g_log.add(" ANSLPR Multi-GPU Stress Test — 5 Parallel ALPR Tasks");
|
||||||
g_log.add("============================================================");
|
g_log.add("============================================================");
|
||||||
|
|
||||||
// --- Log GPU info for diagnostics ---
|
// --- Log GPU info for diagnostics ---
|
||||||
LogGpuInfo();
|
LogGpuInfo();
|
||||||
|
|
||||||
// --- RTSP URLs (4 independent streams, one per task) ---
|
// --- RTSP URLs (4 independent camera streams) ---
|
||||||
const std::string rtspUrl0 = "rtsp://admin:admin123@103.156.0.133:8010/cam/realmonitor?channel=1&subtype=0";
|
const std::string rtspUrl0 = "rtsp://admin:admin123@103.156.0.133:8010/cam/realmonitor?channel=1&subtype=0";
|
||||||
const std::string rtspUrl1 = "rtsp://cafe2471.ddns.net:600/rtsp/streaming?channel=01&subtype=0";
|
const std::string rtspUrl1 = "rtsp://cafe2471.ddns.net:600/rtsp/streaming?channel=01&subtype=0";
|
||||||
const std::string rtspUrl2 = "rtsp://nhathuocngoclinh.zapto.org:600/rtsp/streaming?channel=01&subtype=0";
|
const std::string rtspUrl2 = "rtsp://nhathuocngoclinh.zapto.org:600/rtsp/streaming?channel=01&subtype=0";
|
||||||
@@ -956,18 +982,39 @@ int ANSLPR_MultiGPU_StressTest() {
|
|||||||
g_log.add("Stream 2: " + rtspUrl2);
|
g_log.add("Stream 2: " + rtspUrl2);
|
||||||
g_log.add("Stream 3: " + rtspUrl3);
|
g_log.add("Stream 3: " + rtspUrl3);
|
||||||
|
|
||||||
|
// =========================================================================
|
||||||
|
// Architecture: Camera Process + AI Task Process (mimics LabVIEW)
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// Camera Process: 4 independent RTSP streams acquire frames from cameras.
|
||||||
|
// AI Task Process: 5 AI tasks subscribe to camera streams and run inference
|
||||||
|
// in parallel. Multiple tasks can share one camera stream.
|
||||||
|
// Task 4 subscribes to Stream 2 (nhathuocngoclinh) to demonstrate the
|
||||||
|
// shared-camera subscription model used in LabVIEW.
|
||||||
|
// =========================================================================
|
||||||
|
|
||||||
|
const int NUM_STREAMS = 4;
|
||||||
|
const int NUM_TASKS = 5;
|
||||||
|
|
||||||
// --- Task states ---
|
// --- Task states ---
|
||||||
TaskState taskStates[4];
|
TaskState taskStates[NUM_TASKS];
|
||||||
|
|
||||||
// =========================================================================
|
// =========================================================================
|
||||||
// Create 4 INDEPENDENT RTSP readers — one per task, each with its own
|
// CAMERA PROCESS: Create 4 independent RTSP readers (one per camera).
|
||||||
// camera stream. Each task gets a dedicated RTSP connection.
|
// These form the camera acquisition layer that AI tasks subscribe to.
|
||||||
// =========================================================================
|
// =========================================================================
|
||||||
const int NUM_STREAMS = 4;
|
|
||||||
ANSCENTER::ANSRTSPClient* rtspClients[NUM_STREAMS] = {};
|
ANSCENTER::ANSRTSPClient* rtspClients[NUM_STREAMS] = {};
|
||||||
const std::string streamUrls[NUM_STREAMS] = { rtspUrl0, rtspUrl1, rtspUrl2, rtspUrl3 };
|
const std::string streamUrls[NUM_STREAMS] = { rtspUrl0, rtspUrl1, rtspUrl2, rtspUrl3 };
|
||||||
// Map: task index -> stream index (1:1 mapping)
|
// Map: task index -> stream index
|
||||||
const int taskStreamMap[4] = { 0, 1, 2, 3 };
|
// Tasks 0-3 map 1:1 to streams 0-3.
|
||||||
|
// Task 4 subscribes to Stream 2 (nhathuocngoclinh) — shared camera.
|
||||||
|
const int taskStreamMap[NUM_TASKS] = { 0, 1, 2, 3, 2 };
|
||||||
|
|
||||||
|
// Log task-to-stream subscription mapping
|
||||||
|
g_log.add("--- AI Task -> Camera Stream subscription ---");
|
||||||
|
for (int i = 0; i < NUM_TASKS; i++) {
|
||||||
|
g_log.add(" Task " + std::to_string(i) + " -> Stream " + std::to_string(taskStreamMap[i])
|
||||||
|
+ " (" + streamUrls[taskStreamMap[i]] + ")");
|
||||||
|
}
|
||||||
|
|
||||||
for (int s = 0; s < NUM_STREAMS; s++) {
|
for (int s = 0; s < NUM_STREAMS; s++) {
|
||||||
printf("[Stream%d] Creating RTSP handle for %s...\n", s, streamUrls[s].c_str());
|
printf("[Stream%d] Creating RTSP handle for %s...\n", s, streamUrls[s].c_str());
|
||||||
@@ -986,14 +1033,17 @@ int ANSLPR_MultiGPU_StressTest() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// =========================================================================
|
// =========================================================================
|
||||||
// Create 4 ALPR engines sequentially
|
// AI TASK PROCESS: Create 5 ALPR engines sequentially.
|
||||||
|
// Each AI task gets its own engine and subscribes to a camera stream.
|
||||||
|
// Task 4 shares Stream 2 (nhathuocngoclinh) with Task 2 — demonstrating
|
||||||
|
// the LabVIEW pattern where multiple AI tasks subscribe to one camera.
|
||||||
// =========================================================================
|
// =========================================================================
|
||||||
ANSCENTER::ANSALPR* alprHandles[4] = {};
|
ANSCENTER::ANSALPR* alprHandles[NUM_TASKS] = {};
|
||||||
std::string modelZipFile = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_ALPR_v1.2.zip";
|
std::string modelZipFile = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_ALPR_v1.2.zip";
|
||||||
int engineType = 1; // NVIDIA_GPU
|
int engineType = 1; // NVIDIA_GPU
|
||||||
double detThresh = 0.5, ocrThresh = 0.5, colThresh = 0.5;
|
double detThresh = 0.5, ocrThresh = 0.5, colThresh = 0.5;
|
||||||
|
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < NUM_TASKS; i++) {
|
||||||
char tag[32];
|
char tag[32];
|
||||||
snprintf(tag, sizeof(tag), "[Task%d]", i);
|
snprintf(tag, sizeof(tag), "[Task%d]", i);
|
||||||
|
|
||||||
@@ -1109,7 +1159,7 @@ int ANSLPR_MultiGPU_StressTest() {
|
|||||||
|
|
||||||
// Count votes: how many tasks on this stream use each GPU
|
// Count votes: how many tasks on this stream use each GPU
|
||||||
std::map<int, int> gpuVotes;
|
std::map<int, int> gpuVotes;
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < NUM_TASKS; i++) {
|
||||||
if (taskStreamMap[i] == s && alprHandles[i]) {
|
if (taskStreamMap[i] == s && alprHandles[i]) {
|
||||||
gpuVotes[taskStates[i].gpuDeviceId]++;
|
gpuVotes[taskStates[i].gpuDeviceId]++;
|
||||||
}
|
}
|
||||||
@@ -1194,30 +1244,132 @@ int ANSLPR_MultiGPU_StressTest() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// --- Enable deep pipeline benchmarking on all ALPR handles ---
|
// --- Enable deep pipeline benchmarking on all ALPR handles ---
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < NUM_TASKS; i++) {
|
||||||
if (alprHandles[i]) {
|
if (alprHandles[i]) {
|
||||||
alprHandles[i]->ActivateDebugger(true);
|
alprHandles[i]->ActivateDebugger(true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
g_log.add("Debug benchmarking ENABLED on all ALPR handles");
|
g_log.add("Debug benchmarking ENABLED on all ALPR handles");
|
||||||
|
|
||||||
// --- Launch worker threads — tasks sharing a stream get the same RTSP client ---
|
// --- Per-stream mutex: prevents CHAOS from destroying a stream while a
|
||||||
g_log.add("Launching worker threads...");
|
// worker is mid-frame-grab or mid-inference (use-after-free fix). ---
|
||||||
std::thread workers[4];
|
std::mutex streamGuards[NUM_STREAMS];
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
|
// --- Launch worker threads ---
|
||||||
|
// Each AI task subscribes to its camera stream via taskStreamMap.
|
||||||
|
// Tasks sharing a stream (e.g. Task 2 & Task 4 on Stream 2) both get
|
||||||
|
// the same RTSP client pointer and share the stream's mutex guard.
|
||||||
|
g_log.add("Launching " + std::to_string(NUM_TASKS) + " worker threads...");
|
||||||
|
std::thread workers[NUM_TASKS];
|
||||||
|
for (int i = 0; i < NUM_TASKS; i++) {
|
||||||
int streamIdx = taskStreamMap[i];
|
int streamIdx = taskStreamMap[i];
|
||||||
if (rtspClients[streamIdx] && alprHandles[i]) {
|
if (rtspClients[streamIdx] && alprHandles[i]) {
|
||||||
workers[i] = std::thread(ALPRWorkerThread, i,
|
workers[i] = std::thread(ALPRWorkerThread, i,
|
||||||
rtspClients[streamIdx], alprHandles[i],
|
&rtspClients[streamIdx],
|
||||||
|
&streamGuards[streamIdx],
|
||||||
|
alprHandles[i],
|
||||||
std::ref(taskStates[i]));
|
std::ref(taskStates[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// =========================================================================
|
||||||
|
// Camera Chaos Thread — simulates camera errors / reconnects
|
||||||
|
// Mimics LabVIEW behavior: cameras randomly go into Error/Recovering
|
||||||
|
// state, triggering Stop/Reconnect/Destroy+Recreate cycles that cause
|
||||||
|
// CUDA cleanup (cuArrayDestroy, cuMemFree) while inference is running.
|
||||||
|
// This is the exact scenario that triggers the nvcuda64 SRW lock deadlock.
|
||||||
|
// =========================================================================
|
||||||
|
std::atomic<bool> chaosEnabled{true};
|
||||||
|
std::thread chaosThread([&]() {
|
||||||
|
std::mt19937 rng(std::random_device{}());
|
||||||
|
|
||||||
|
// Wait 10 seconds for system to stabilize before starting chaos
|
||||||
|
for (int i = 0; i < 100 && g_running.load(); i++) {
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||||
|
}
|
||||||
|
|
||||||
|
g_log.add("[CHAOS] Camera chaos thread started — every 10s, stop/destroy/recreate one camera (round-robin)");
|
||||||
|
printf("[CHAOS] Camera chaos thread started — 10s interval, round-robin across %d streams\n", NUM_STREAMS);
|
||||||
|
|
||||||
|
int chaosCount = 0;
|
||||||
|
int nextStream = 0; // Round-robin: cycle through streams 0,1,2,3,0,1,...
|
||||||
|
while (g_running.load() && chaosEnabled.load()) {
|
||||||
|
// Fixed 10-second interval between chaos events
|
||||||
|
for (int s = 0; s < 100 && g_running.load(); s++) {
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||||
|
}
|
||||||
|
if (!g_running.load()) break;
|
||||||
|
|
||||||
|
int streamIdx = nextStream;
|
||||||
|
nextStream = (nextStream + 1) % NUM_STREAMS;
|
||||||
|
chaosCount++;
|
||||||
|
|
||||||
|
char buf[512];
|
||||||
|
auto chaosStart = std::chrono::steady_clock::now();
|
||||||
|
|
||||||
|
// Lock stream guard: wait for any in-flight inference to finish
|
||||||
|
// before touching the RTSP client. This prevents use-after-free
|
||||||
|
// when CHAOS destroys a stream while a worker is mid-inference.
|
||||||
|
std::unique_lock<std::mutex> chaosLock(streamGuards[streamIdx]);
|
||||||
|
|
||||||
|
// Always use full DESTROY + RECREATE cycle.
|
||||||
|
// Reconnect() reuses internal player state which can leave stale
|
||||||
|
// CUDA resources and cause freezes. A clean destroy + recreate
|
||||||
|
// guarantees a fresh decoder/player with no leftover state.
|
||||||
|
{
|
||||||
|
bool wasAlive = (rtspClients[streamIdx] != nullptr);
|
||||||
|
|
||||||
|
snprintf(buf, sizeof(buf), "[CHAOS #%d] Stream%d: DESTROY + RECREATE (%s)",
|
||||||
|
chaosCount, streamIdx,
|
||||||
|
wasAlive ? "camera was running" : "camera was already offline");
|
||||||
|
g_log.add(buf);
|
||||||
|
printf("%s\n", buf);
|
||||||
|
|
||||||
|
// Stop and release old handle if it exists
|
||||||
|
if (rtspClients[streamIdx]) {
|
||||||
|
StopRTSP(&rtspClients[streamIdx]);
|
||||||
|
ReleaseANSRTSPHandle(&rtspClients[streamIdx]);
|
||||||
|
rtspClients[streamIdx] = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Release lock during offline sleep — worker sees nullptr and skips
|
||||||
|
int offlineMs = 500 + (rng() % 2500); // 0.5 - 3 seconds offline
|
||||||
|
chaosLock.unlock();
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(offlineMs));
|
||||||
|
chaosLock.lock();
|
||||||
|
|
||||||
|
// Recreate the RTSP handle (under lock again)
|
||||||
|
int result = CreateANSRTSPHandle(&rtspClients[streamIdx], "", "", "",
|
||||||
|
streamUrls[streamIdx].c_str());
|
||||||
|
if (result == 1 && rtspClients[streamIdx]) {
|
||||||
|
SetRTSPImageQuality(&rtspClients[streamIdx], 0);
|
||||||
|
SetRTSPHWDecoding(&rtspClients[streamIdx], 7);
|
||||||
|
StartRTSP(&rtspClients[streamIdx]);
|
||||||
|
|
||||||
|
auto chaosEnd = std::chrono::steady_clock::now();
|
||||||
|
double chaosMs = std::chrono::duration<double, std::milli>(chaosEnd - chaosStart).count();
|
||||||
|
snprintf(buf, sizeof(buf), "[CHAOS #%d] Stream%d: RECREATED in %.0f ms (offline %d ms)",
|
||||||
|
chaosCount, streamIdx, chaosMs, offlineMs);
|
||||||
|
} else {
|
||||||
|
snprintf(buf, sizeof(buf), "[CHAOS #%d] Stream%d: RECREATE FAILED (result=%d)",
|
||||||
|
chaosCount, streamIdx, result);
|
||||||
|
}
|
||||||
|
g_log.add(buf);
|
||||||
|
printf("%s\n", buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
g_log.add("[CHAOS] Camera chaos thread stopped (total events: " + std::to_string(chaosCount) + ")");
|
||||||
|
printf("[CHAOS] Camera chaos thread stopped (total events: %d)\n", chaosCount);
|
||||||
|
});
|
||||||
|
|
||||||
// --- Display loop (main thread) ---
|
// --- Display loop (main thread) ---
|
||||||
const int cellW = 640, cellH = 480;
|
// 3x2 grid layout: 5 tasks displayed in 3 columns x 2 rows
|
||||||
const int logPanelH = 200;
|
const int cellW = 480, cellH = 360; // Smaller cells for 3-column layout
|
||||||
|
const int logPanelH = 220;
|
||||||
|
const int gridCols = 3, gridRows = 2;
|
||||||
cv::namedWindow("ANSLPR Multi-GPU Stress Test", cv::WINDOW_NORMAL);
|
cv::namedWindow("ANSLPR Multi-GPU Stress Test", cv::WINDOW_NORMAL);
|
||||||
cv::resizeWindow("ANSLPR Multi-GPU Stress Test", cellW * 2, cellH * 2 + logPanelH);
|
cv::resizeWindow("ANSLPR Multi-GPU Stress Test", cellW * gridCols, cellH * gridRows + logPanelH);
|
||||||
|
|
||||||
auto testStart = std::chrono::steady_clock::now();
|
auto testStart = std::chrono::steady_clock::now();
|
||||||
auto lastGpuSnapshot = std::chrono::steady_clock::now();
|
auto lastGpuSnapshot = std::chrono::steady_clock::now();
|
||||||
@@ -1244,12 +1396,12 @@ int ANSLPR_MultiGPU_StressTest() {
|
|||||||
}
|
}
|
||||||
// Per-task stats
|
// Per-task stats
|
||||||
double totalFpsSnap = 0;
|
double totalFpsSnap = 0;
|
||||||
for (int t = 0; t < 4; t++) {
|
for (int t = 0; t < NUM_TASKS; t++) {
|
||||||
std::lock_guard<std::mutex> lk(taskStates[t].mtx);
|
std::lock_guard<std::mutex> lk(taskStates[t].mtx);
|
||||||
char buf[256];
|
char buf[256];
|
||||||
snprintf(buf, sizeof(buf),
|
snprintf(buf, sizeof(buf),
|
||||||
" T%d: GPU[%d] VRAM=%zuMiB FPS=%.1f GrabMs=%.0f InfMs=%.0f Frames=%d Det=%d",
|
" T%d(S%d): GPU[%d] VRAM=%zuMiB FPS=%.1f GrabMs=%.0f InfMs=%.0f Frames=%d Det=%d",
|
||||||
t, taskStates[t].gpuDeviceId,
|
t, taskStreamMap[t], taskStates[t].gpuDeviceId,
|
||||||
taskStates[t].vramUsedBytes / (1024 * 1024),
|
taskStates[t].vramUsedBytes / (1024 * 1024),
|
||||||
taskStates[t].fps, taskStates[t].lastGrabMs, taskStates[t].inferenceMs,
|
taskStates[t].fps, taskStates[t].lastGrabMs, taskStates[t].inferenceMs,
|
||||||
taskStates[t].frameCount, taskStates[t].detectionCount);
|
taskStates[t].frameCount, taskStates[t].detectionCount);
|
||||||
@@ -1261,7 +1413,7 @@ int ANSLPR_MultiGPU_StressTest() {
|
|||||||
g_log.add(buf);
|
g_log.add(buf);
|
||||||
// Multi-GPU check
|
// Multi-GPU check
|
||||||
std::set<int> gpusUsed;
|
std::set<int> gpusUsed;
|
||||||
for (int t = 0; t < 4; t++) {
|
for (int t = 0; t < NUM_TASKS; t++) {
|
||||||
if (taskStates[t].gpuDeviceId >= 0) gpusUsed.insert(taskStates[t].gpuDeviceId);
|
if (taskStates[t].gpuDeviceId >= 0) gpusUsed.insert(taskStates[t].gpuDeviceId);
|
||||||
}
|
}
|
||||||
if (gpusUsed.size() > 1) {
|
if (gpusUsed.size() > 1) {
|
||||||
@@ -1271,12 +1423,12 @@ int ANSLPR_MultiGPU_StressTest() {
|
|||||||
}
|
}
|
||||||
g_log.add("---- END SNAPSHOT ----");
|
g_log.add("---- END SNAPSHOT ----");
|
||||||
}
|
}
|
||||||
// Build 2x2 grid + log panel
|
// Build 3x2 grid + log panel (5 tasks: 3 cols x 2 rows, cell [1][2] empty)
|
||||||
cv::Mat canvas(cellH * 2 + logPanelH, cellW * 2, CV_8UC3, cv::Scalar(30, 30, 30));
|
cv::Mat canvas(cellH * gridRows + logPanelH, cellW * gridCols, CV_8UC3, cv::Scalar(30, 30, 30));
|
||||||
|
|
||||||
// Place each task's frame in its quadrant
|
// Place each task's frame in its cell
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < NUM_TASKS; i++) {
|
||||||
int row = i / 2, col = i % 2;
|
int row = i / gridCols, col = i % gridCols;
|
||||||
cv::Rect roi(col * cellW, row * cellH, cellW, cellH);
|
cv::Rect roi(col * cellW, row * cellH, cellW, cellH);
|
||||||
|
|
||||||
cv::Mat cell;
|
cv::Mat cell;
|
||||||
@@ -1313,8 +1465,8 @@ int ANSLPR_MultiGPU_StressTest() {
|
|||||||
// Draw status bar at bottom of each cell (2 lines)
|
// Draw status bar at bottom of each cell (2 lines)
|
||||||
cv::rectangle(cell, cv::Rect(0, cellH - 50, cellW, 50), cv::Scalar(0, 0, 0), cv::FILLED);
|
cv::rectangle(cell, cv::Rect(0, cellH - 50, cellW, 50), cv::Scalar(0, 0, 0), cv::FILLED);
|
||||||
char bar1[256], bar2[256];
|
char bar1[256], bar2[256];
|
||||||
snprintf(bar1, sizeof(bar1), "T%d | %.1f FPS | %.0fms | Frames:%d | Det:%d | %s",
|
snprintf(bar1, sizeof(bar1), "T%d(S%d) | %.1f FPS | %.0fms | F:%d | D:%d | %s",
|
||||||
i, fps, infMs, fCount, dCount,
|
i, taskStreamMap[i], fps, infMs, fCount, dCount,
|
||||||
lastPlate.empty() ? "-" : lastPlate.c_str());
|
lastPlate.empty() ? "-" : lastPlate.c_str());
|
||||||
if (gpuId >= 0) {
|
if (gpuId >= 0) {
|
||||||
snprintf(bar2, sizeof(bar2), "GPU[%d] | VRAM: %zu MiB", gpuId, vramMiB);
|
snprintf(bar2, sizeof(bar2), "GPU[%d] | VRAM: %zu MiB", gpuId, vramMiB);
|
||||||
@@ -1323,45 +1475,53 @@ int ANSLPR_MultiGPU_StressTest() {
|
|||||||
}
|
}
|
||||||
cv::Scalar barColor = engineLoaded ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 100, 255);
|
cv::Scalar barColor = engineLoaded ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 100, 255);
|
||||||
cv::putText(cell, bar1, cv::Point(5, cellH - 28),
|
cv::putText(cell, bar1, cv::Point(5, cellH - 28),
|
||||||
cv::FONT_HERSHEY_SIMPLEX, 0.45, barColor, 1);
|
cv::FONT_HERSHEY_SIMPLEX, 0.4, barColor, 1);
|
||||||
cv::putText(cell, bar2, cv::Point(5, cellH - 8),
|
cv::putText(cell, bar2, cv::Point(5, cellH - 8),
|
||||||
cv::FONT_HERSHEY_SIMPLEX, 0.45, cv::Scalar(0, 200, 255), 1);
|
cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(0, 200, 255), 1);
|
||||||
|
|
||||||
cell.copyTo(canvas(roi));
|
cell.copyTo(canvas(roi));
|
||||||
|
|
||||||
// Draw grid lines
|
|
||||||
cv::line(canvas, cv::Point(cellW, 0), cv::Point(cellW, cellH * 2),
|
|
||||||
cv::Scalar(100, 100, 100), 1);
|
|
||||||
cv::line(canvas, cv::Point(0, cellH), cv::Point(cellW * 2, cellH),
|
|
||||||
cv::Scalar(100, 100, 100), 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Draw grid lines
|
||||||
|
for (int c = 1; c < gridCols; c++)
|
||||||
|
cv::line(canvas, cv::Point(c * cellW, 0), cv::Point(c * cellW, cellH * gridRows),
|
||||||
|
cv::Scalar(100, 100, 100), 1);
|
||||||
|
for (int r = 1; r < gridRows; r++)
|
||||||
|
cv::line(canvas, cv::Point(0, r * cellH), cv::Point(cellW * gridCols, r * cellH),
|
||||||
|
cv::Scalar(100, 100, 100), 1);
|
||||||
|
|
||||||
// --- Log panel at bottom ---
|
// --- Log panel at bottom ---
|
||||||
cv::Rect logRoi(0, cellH * 2, cellW * 2, logPanelH);
|
cv::Rect logRoi(0, cellH * gridRows, cellW * gridCols, logPanelH);
|
||||||
cv::Mat logPanel = canvas(logRoi);
|
cv::Mat logPanel = canvas(logRoi);
|
||||||
logPanel.setTo(cv::Scalar(20, 20, 20));
|
logPanel.setTo(cv::Scalar(20, 20, 20));
|
||||||
|
|
||||||
// Elapsed time header
|
// Elapsed time header
|
||||||
auto elapsed = std::chrono::duration<double>(std::chrono::steady_clock::now() - testStart).count();
|
auto elapsed = std::chrono::duration<double>(std::chrono::steady_clock::now() - testStart).count();
|
||||||
char header[128];
|
char header[256];
|
||||||
snprintf(header, sizeof(header),
|
snprintf(header, sizeof(header),
|
||||||
"Elapsed: %.0fs | Press ESC to stop | Resize window freely", elapsed);
|
"Elapsed: %.0fs | %d cameras, %d AI tasks | Press ESC to stop",
|
||||||
|
elapsed, NUM_STREAMS, NUM_TASKS);
|
||||||
cv::putText(logPanel, header, cv::Point(10, 18),
|
cv::putText(logPanel, header, cv::Point(10, 18),
|
||||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(200, 200, 0), 1);
|
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(200, 200, 0), 1);
|
||||||
|
|
||||||
// Aggregate stats + per-task GPU summary
|
// Aggregate stats + per-task GPU summary
|
||||||
double totalFps = 0;
|
double totalFps = 0;
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < NUM_TASKS; i++) {
|
||||||
std::lock_guard<std::mutex> lk(taskStates[i].mtx);
|
std::lock_guard<std::mutex> lk(taskStates[i].mtx);
|
||||||
totalFps += taskStates[i].fps;
|
totalFps += taskStates[i].fps;
|
||||||
}
|
}
|
||||||
char aggLine[256];
|
// Build dynamic task-GPU summary string
|
||||||
snprintf(aggLine, sizeof(aggLine), "Total throughput: %.1f FPS | T0:GPU%d T1:GPU%d T2:GPU%d T3:GPU%d",
|
std::string taskGpuStr;
|
||||||
totalFps,
|
for (int i = 0; i < NUM_TASKS; i++) {
|
||||||
taskStates[0].gpuDeviceId, taskStates[1].gpuDeviceId,
|
if (i > 0) taskGpuStr += " ";
|
||||||
taskStates[2].gpuDeviceId, taskStates[3].gpuDeviceId);
|
taskGpuStr += "T" + std::to_string(i) + "(S" + std::to_string(taskStreamMap[i])
|
||||||
|
+ "):GPU" + std::to_string(taskStates[i].gpuDeviceId);
|
||||||
|
}
|
||||||
|
char aggLine[512];
|
||||||
|
snprintf(aggLine, sizeof(aggLine), "Total: %.1f FPS | %s",
|
||||||
|
totalFps, taskGpuStr.c_str());
|
||||||
cv::putText(logPanel, aggLine, cv::Point(10, 38),
|
cv::putText(logPanel, aggLine, cv::Point(10, 38),
|
||||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 255), 1);
|
cv::FONT_HERSHEY_SIMPLEX, 0.45, cv::Scalar(0, 255, 255), 1);
|
||||||
|
|
||||||
// Real-time GPU VRAM monitor (query every frame — cheap call)
|
// Real-time GPU VRAM monitor (query every frame — cheap call)
|
||||||
auto gpuSnaps = QueryGpuVram();
|
auto gpuSnaps = QueryGpuVram();
|
||||||
@@ -1370,7 +1530,7 @@ int ANSLPR_MultiGPU_StressTest() {
|
|||||||
// Count tasks on this GPU and their total VRAM
|
// Count tasks on this GPU and their total VRAM
|
||||||
int tasksOnGpu = 0;
|
int tasksOnGpu = 0;
|
||||||
size_t taskVramMiB = 0;
|
size_t taskVramMiB = 0;
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < NUM_TASKS; i++) {
|
||||||
std::lock_guard<std::mutex> lk(taskStates[i].mtx);
|
std::lock_guard<std::mutex> lk(taskStates[i].mtx);
|
||||||
if (taskStates[i].gpuDeviceId == gs.deviceId) {
|
if (taskStates[i].gpuDeviceId == gs.deviceId) {
|
||||||
tasksOnGpu++;
|
tasksOnGpu++;
|
||||||
@@ -1387,13 +1547,13 @@ int ANSLPR_MultiGPU_StressTest() {
|
|||||||
gpuLineY += 18;
|
gpuLineY += 18;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Per-task resource line
|
// Per-task resource line (shows which stream each task subscribes to)
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < NUM_TASKS; i++) {
|
||||||
std::lock_guard<std::mutex> lk(taskStates[i].mtx);
|
std::lock_guard<std::mutex> lk(taskStates[i].mtx);
|
||||||
char tLine[256];
|
char tLine[256];
|
||||||
snprintf(tLine, sizeof(tLine),
|
snprintf(tLine, sizeof(tLine),
|
||||||
"T%d: GPU[%d] VRAM=%zuMiB FPS=%.1f Inf=%.0fms Frames=%d Det=%d",
|
"T%d(S%d): GPU[%d] VRAM=%zuMiB FPS=%.1f Inf=%.0fms Frames=%d Det=%d",
|
||||||
i, taskStates[i].gpuDeviceId,
|
i, taskStreamMap[i], taskStates[i].gpuDeviceId,
|
||||||
taskStates[i].vramUsedBytes / (1024 * 1024),
|
taskStates[i].vramUsedBytes / (1024 * 1024),
|
||||||
taskStates[i].fps, taskStates[i].inferenceMs,
|
taskStates[i].fps, taskStates[i].inferenceMs,
|
||||||
taskStates[i].frameCount, taskStates[i].detectionCount);
|
taskStates[i].frameCount, taskStates[i].detectionCount);
|
||||||
@@ -1421,9 +1581,13 @@ int ANSLPR_MultiGPU_StressTest() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- Stop chaos thread ---
|
||||||
|
chaosEnabled.store(false);
|
||||||
|
if (chaosThread.joinable()) chaosThread.join();
|
||||||
|
|
||||||
// --- Wait for all workers ---
|
// --- Wait for all workers ---
|
||||||
printf("Waiting for worker threads to finish...\n");
|
printf("Waiting for %d worker threads to finish...\n", NUM_TASKS);
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < NUM_TASKS; i++) {
|
||||||
if (workers[i].joinable()) workers[i].join();
|
if (workers[i].joinable()) workers[i].join();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1433,19 +1597,21 @@ int ANSLPR_MultiGPU_StressTest() {
|
|||||||
|
|
||||||
g_log.add("================================================================");
|
g_log.add("================================================================");
|
||||||
g_log.add(" FINAL PERFORMANCE SUMMARY");
|
g_log.add(" FINAL PERFORMANCE SUMMARY");
|
||||||
|
g_log.add(" " + std::to_string(NUM_STREAMS) + " cameras, " + std::to_string(NUM_TASKS) + " AI tasks");
|
||||||
g_log.add(" Total runtime: " + std::to_string((int)totalElapsed) + " seconds");
|
g_log.add(" Total runtime: " + std::to_string((int)totalElapsed) + " seconds");
|
||||||
g_log.add("================================================================");
|
g_log.add("================================================================");
|
||||||
|
|
||||||
printf("\n============================================================\n");
|
printf("\n============================================================\n");
|
||||||
printf(" FINAL PERFORMANCE SUMMARY (runtime: %.0fs)\n", totalElapsed);
|
printf(" FINAL PERFORMANCE SUMMARY (runtime: %.0fs)\n", totalElapsed);
|
||||||
|
printf(" %d cameras, %d AI tasks\n", NUM_STREAMS, NUM_TASKS);
|
||||||
printf("============================================================\n");
|
printf("============================================================\n");
|
||||||
|
|
||||||
double totalFpsFinal = 0;
|
double totalFpsFinal = 0;
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < NUM_TASKS; i++) {
|
||||||
char buf[512];
|
char buf[512];
|
||||||
snprintf(buf, sizeof(buf),
|
snprintf(buf, sizeof(buf),
|
||||||
" Task %d: GPU[%d] | VRAM=%zuMiB | %d frames, %d detections, FPS=%.1f, InfMs=%.0f",
|
" Task %d (Stream %d): GPU[%d] | VRAM=%zuMiB | %d frames, %d detections, FPS=%.1f, InfMs=%.0f",
|
||||||
i, taskStates[i].gpuDeviceId,
|
i, taskStreamMap[i], taskStates[i].gpuDeviceId,
|
||||||
taskStates[i].vramUsedBytes / (1024 * 1024),
|
taskStates[i].vramUsedBytes / (1024 * 1024),
|
||||||
taskStates[i].frameCount, taskStates[i].detectionCount,
|
taskStates[i].frameCount, taskStates[i].detectionCount,
|
||||||
taskStates[i].fps, taskStates[i].inferenceMs);
|
taskStates[i].fps, taskStates[i].inferenceMs);
|
||||||
@@ -1466,12 +1632,13 @@ int ANSLPR_MultiGPU_StressTest() {
|
|||||||
|
|
||||||
// Multi-GPU verdict
|
// Multi-GPU verdict
|
||||||
std::set<int> finalGpusUsed;
|
std::set<int> finalGpusUsed;
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < NUM_TASKS; i++) {
|
||||||
if (taskStates[i].gpuDeviceId >= 0) finalGpusUsed.insert(taskStates[i].gpuDeviceId);
|
if (taskStates[i].gpuDeviceId >= 0) finalGpusUsed.insert(taskStates[i].gpuDeviceId);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
char buf[256];
|
char buf[256];
|
||||||
snprintf(buf, sizeof(buf), " Total throughput: %.1f FPS across 4 tasks", totalFpsFinal);
|
snprintf(buf, sizeof(buf), " Total throughput: %.1f FPS across %d tasks (%d cameras)",
|
||||||
|
totalFpsFinal, NUM_TASKS, NUM_STREAMS);
|
||||||
printf("%s\n", buf);
|
printf("%s\n", buf);
|
||||||
g_log.add(buf);
|
g_log.add(buf);
|
||||||
}
|
}
|
||||||
@@ -1491,13 +1658,16 @@ int ANSLPR_MultiGPU_StressTest() {
|
|||||||
g_log.add(" 3. No CUDA_VISIBLE_DEVICES env var restricting GPU access");
|
g_log.add(" 3. No CUDA_VISIBLE_DEVICES env var restricting GPU access");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Log shared-camera subscription info
|
||||||
|
g_log.add(" Camera subscription: Task 2 and Task 4 both subscribe to Stream 2 (nhathuocngoclinh)");
|
||||||
|
|
||||||
printf("============================================================\n");
|
printf("============================================================\n");
|
||||||
g_log.add("================================================================");
|
g_log.add("================================================================");
|
||||||
g_log.add(" Log saved to: " + std::string(LOG_FILE_PATH));
|
g_log.add(" Log saved to: " + std::string(LOG_FILE_PATH));
|
||||||
g_log.add("================================================================");
|
g_log.add("================================================================");
|
||||||
|
|
||||||
// --- Release all handles (sequentially on main thread) ---
|
// --- Release all handles (sequentially on main thread) ---
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < NUM_TASKS; i++) {
|
||||||
if (alprHandles[i]) {
|
if (alprHandles[i]) {
|
||||||
ReleaseANSALPRHandle(&alprHandles[i]);
|
ReleaseANSALPRHandle(&alprHandles[i]);
|
||||||
}
|
}
|
||||||
@@ -2770,9 +2940,9 @@ int main()
|
|||||||
//for (int i = 0; i < 100; i++) {
|
//for (int i = 0; i < 100; i++) {
|
||||||
// ANSLPR_CPU_Inferences_FileTest();
|
// ANSLPR_CPU_Inferences_FileTest();
|
||||||
//}
|
//}
|
||||||
//ANSLPR_MultiGPU_StressTest();
|
ANSLPR_MultiGPU_StressTest();
|
||||||
//ANSLPR_MultiGPU_StressTest_SimulatedCam();
|
//ANSLPR_MultiGPU_StressTest_SimulatedCam();
|
||||||
ANSLPR_MultiGPU_StressTest_FilePlayer();
|
// ANSLPR_MultiGPU_StressTest_FilePlayer();
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user