Fix NV12 crash issue when recreate camera object
This commit is contained in:
@@ -23,6 +23,7 @@
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <atomic>
|
||||
#include <random>
|
||||
#include <chrono>
|
||||
#include <deque>
|
||||
#include <set>
|
||||
@@ -751,8 +752,11 @@ static void LogGpuInfo() {
|
||||
// Worker thread: reads RTSP frames and runs ALPR inference
|
||||
// RTSP client and ALPR engine are pre-created on the main thread to avoid
|
||||
// race conditions in CreateANSRTSPHandle / CreateANSALPRHandle.
|
||||
// Takes rtspClientPtr (pointer to array slot) + streamGuard mutex so the
|
||||
// CHAOS thread can safely destroy+recreate the stream without use-after-free.
|
||||
static void ALPRWorkerThread(int taskId,
|
||||
ANSCENTER::ANSRTSPClient* rtspClient,
|
||||
ANSCENTER::ANSRTSPClient** rtspClientPtr,
|
||||
std::mutex* streamGuard,
|
||||
ANSCENTER::ANSALPR* alprHandle,
|
||||
TaskState& state) {
|
||||
char tag[32];
|
||||
@@ -780,6 +784,23 @@ static void ALPRWorkerThread(int taskId,
|
||||
bool hwDecodeLogged = false;
|
||||
|
||||
while (g_running.load()) {
|
||||
// Lock the stream guard to prevent CHAOS from destroying the client
|
||||
// while we're mid-frame-grab or mid-inference.
|
||||
std::unique_lock<std::mutex> streamLock(*streamGuard);
|
||||
|
||||
// Re-read the client pointer each iteration — CHAOS may have
|
||||
// destroyed+recreated it, so our old pointer could be dangling.
|
||||
ANSCENTER::ANSRTSPClient* rtspClient = *rtspClientPtr;
|
||||
if (rtspClient == nullptr) {
|
||||
streamLock.unlock();
|
||||
emptyFrames++;
|
||||
if (emptyFrames % 100 == 1) {
|
||||
g_log.add(prefix + " Stream destroyed by CHAOS, waiting... (count=" + std::to_string(emptyFrames) + ")");
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(50));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Read frame from RTSP via ANSCV
|
||||
auto grabStart = std::chrono::steady_clock::now();
|
||||
cv::Mat* framePtr = nullptr;
|
||||
@@ -797,6 +818,7 @@ static void ALPRWorkerThread(int taskId,
|
||||
ReconnectRTSP(&rtspClient);
|
||||
emptyFrames = 0;
|
||||
}
|
||||
streamLock.unlock();
|
||||
if (framePtr) delete framePtr;
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
continue;
|
||||
@@ -829,6 +851,9 @@ static void ALPRWorkerThread(int taskId,
|
||||
// matches by cv::Mat* pointer, so `new cv::Mat(*framePtr)` would create
|
||||
// a different pointer the registry doesn't know, breaking NV12 zero-copy.
|
||||
ANSALPR_RunInferenceComplete_CPP(&alprHandle, &framePtr, cameraId.c_str(), 0, 0, lpnResult, jpegImage);
|
||||
|
||||
// Release stream lock — inference is done, CHAOS can now safely destroy.
|
||||
streamLock.unlock();
|
||||
auto infEnd = std::chrono::steady_clock::now();
|
||||
double infMs = std::chrono::duration<double, std::milli>(infEnd - infStart).count();
|
||||
totalInfMs += infMs;
|
||||
@@ -933,19 +958,20 @@ int ANSLPR_MultiGPU_StressTest() {
|
||||
|
||||
printf("\n");
|
||||
printf("============================================================\n");
|
||||
printf(" ANSLPR Multi-GPU Stress Test — 4 Parallel ALPR Tasks\n");
|
||||
printf(" ANSLPR Multi-GPU Stress Test — 5 Parallel ALPR Tasks\n");
|
||||
printf(" (4 cameras, 5 AI tasks — Task 4 shares Stream 2)\n");
|
||||
printf(" Press ESC to stop\n");
|
||||
printf(" Log file: %s\n", LOG_FILE_PATH);
|
||||
printf("============================================================\n\n");
|
||||
|
||||
g_log.add("============================================================");
|
||||
g_log.add(" ANSLPR Multi-GPU Stress Test — 4 Parallel ALPR Tasks");
|
||||
g_log.add(" ANSLPR Multi-GPU Stress Test — 5 Parallel ALPR Tasks");
|
||||
g_log.add("============================================================");
|
||||
|
||||
// --- Log GPU info for diagnostics ---
|
||||
LogGpuInfo();
|
||||
|
||||
// --- RTSP URLs (4 independent streams, one per task) ---
|
||||
// --- RTSP URLs (4 independent camera streams) ---
|
||||
const std::string rtspUrl0 = "rtsp://admin:admin123@103.156.0.133:8010/cam/realmonitor?channel=1&subtype=0";
|
||||
const std::string rtspUrl1 = "rtsp://cafe2471.ddns.net:600/rtsp/streaming?channel=01&subtype=0";
|
||||
const std::string rtspUrl2 = "rtsp://nhathuocngoclinh.zapto.org:600/rtsp/streaming?channel=01&subtype=0";
|
||||
@@ -956,18 +982,39 @@ int ANSLPR_MultiGPU_StressTest() {
|
||||
g_log.add("Stream 2: " + rtspUrl2);
|
||||
g_log.add("Stream 3: " + rtspUrl3);
|
||||
|
||||
// =========================================================================
|
||||
// Architecture: Camera Process + AI Task Process (mimics LabVIEW)
|
||||
// -----------------------------------------------------------------------
|
||||
// Camera Process: 4 independent RTSP streams acquire frames from cameras.
|
||||
// AI Task Process: 5 AI tasks subscribe to camera streams and run inference
|
||||
// in parallel. Multiple tasks can share one camera stream.
|
||||
// Task 4 subscribes to Stream 2 (nhathuocngoclinh) to demonstrate the
|
||||
// shared-camera subscription model used in LabVIEW.
|
||||
// =========================================================================
|
||||
|
||||
const int NUM_STREAMS = 4;
|
||||
const int NUM_TASKS = 5;
|
||||
|
||||
// --- Task states ---
|
||||
TaskState taskStates[4];
|
||||
TaskState taskStates[NUM_TASKS];
|
||||
|
||||
// =========================================================================
|
||||
// Create 4 INDEPENDENT RTSP readers — one per task, each with its own
|
||||
// camera stream. Each task gets a dedicated RTSP connection.
|
||||
// CAMERA PROCESS: Create 4 independent RTSP readers (one per camera).
|
||||
// These form the camera acquisition layer that AI tasks subscribe to.
|
||||
// =========================================================================
|
||||
const int NUM_STREAMS = 4;
|
||||
ANSCENTER::ANSRTSPClient* rtspClients[NUM_STREAMS] = {};
|
||||
const std::string streamUrls[NUM_STREAMS] = { rtspUrl0, rtspUrl1, rtspUrl2, rtspUrl3 };
|
||||
// Map: task index -> stream index (1:1 mapping)
|
||||
const int taskStreamMap[4] = { 0, 1, 2, 3 };
|
||||
// Map: task index -> stream index
|
||||
// Tasks 0-3 map 1:1 to streams 0-3.
|
||||
// Task 4 subscribes to Stream 2 (nhathuocngoclinh) — shared camera.
|
||||
const int taskStreamMap[NUM_TASKS] = { 0, 1, 2, 3, 2 };
|
||||
|
||||
// Log task-to-stream subscription mapping
|
||||
g_log.add("--- AI Task -> Camera Stream subscription ---");
|
||||
for (int i = 0; i < NUM_TASKS; i++) {
|
||||
g_log.add(" Task " + std::to_string(i) + " -> Stream " + std::to_string(taskStreamMap[i])
|
||||
+ " (" + streamUrls[taskStreamMap[i]] + ")");
|
||||
}
|
||||
|
||||
for (int s = 0; s < NUM_STREAMS; s++) {
|
||||
printf("[Stream%d] Creating RTSP handle for %s...\n", s, streamUrls[s].c_str());
|
||||
@@ -986,14 +1033,17 @@ int ANSLPR_MultiGPU_StressTest() {
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Create 4 ALPR engines sequentially
|
||||
// AI TASK PROCESS: Create 5 ALPR engines sequentially.
|
||||
// Each AI task gets its own engine and subscribes to a camera stream.
|
||||
// Task 4 shares Stream 2 (nhathuocngoclinh) with Task 2 — demonstrating
|
||||
// the LabVIEW pattern where multiple AI tasks subscribe to one camera.
|
||||
// =========================================================================
|
||||
ANSCENTER::ANSALPR* alprHandles[4] = {};
|
||||
ANSCENTER::ANSALPR* alprHandles[NUM_TASKS] = {};
|
||||
std::string modelZipFile = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_ALPR_v1.2.zip";
|
||||
int engineType = 1; // NVIDIA_GPU
|
||||
double detThresh = 0.5, ocrThresh = 0.5, colThresh = 0.5;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int i = 0; i < NUM_TASKS; i++) {
|
||||
char tag[32];
|
||||
snprintf(tag, sizeof(tag), "[Task%d]", i);
|
||||
|
||||
@@ -1109,7 +1159,7 @@ int ANSLPR_MultiGPU_StressTest() {
|
||||
|
||||
// Count votes: how many tasks on this stream use each GPU
|
||||
std::map<int, int> gpuVotes;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int i = 0; i < NUM_TASKS; i++) {
|
||||
if (taskStreamMap[i] == s && alprHandles[i]) {
|
||||
gpuVotes[taskStates[i].gpuDeviceId]++;
|
||||
}
|
||||
@@ -1194,30 +1244,132 @@ int ANSLPR_MultiGPU_StressTest() {
|
||||
}
|
||||
|
||||
// --- Enable deep pipeline benchmarking on all ALPR handles ---
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int i = 0; i < NUM_TASKS; i++) {
|
||||
if (alprHandles[i]) {
|
||||
alprHandles[i]->ActivateDebugger(true);
|
||||
}
|
||||
}
|
||||
g_log.add("Debug benchmarking ENABLED on all ALPR handles");
|
||||
|
||||
// --- Launch worker threads — tasks sharing a stream get the same RTSP client ---
|
||||
g_log.add("Launching worker threads...");
|
||||
std::thread workers[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
// --- Per-stream mutex: prevents CHAOS from destroying a stream while a
|
||||
// worker is mid-frame-grab or mid-inference (use-after-free fix). ---
|
||||
std::mutex streamGuards[NUM_STREAMS];
|
||||
|
||||
// --- Launch worker threads ---
|
||||
// Each AI task subscribes to its camera stream via taskStreamMap.
|
||||
// Tasks sharing a stream (e.g. Task 2 & Task 4 on Stream 2) both get
|
||||
// the same RTSP client pointer and share the stream's mutex guard.
|
||||
g_log.add("Launching " + std::to_string(NUM_TASKS) + " worker threads...");
|
||||
std::thread workers[NUM_TASKS];
|
||||
for (int i = 0; i < NUM_TASKS; i++) {
|
||||
int streamIdx = taskStreamMap[i];
|
||||
if (rtspClients[streamIdx] && alprHandles[i]) {
|
||||
workers[i] = std::thread(ALPRWorkerThread, i,
|
||||
rtspClients[streamIdx], alprHandles[i],
|
||||
&rtspClients[streamIdx],
|
||||
&streamGuards[streamIdx],
|
||||
alprHandles[i],
|
||||
std::ref(taskStates[i]));
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Camera Chaos Thread — simulates camera errors / reconnects
|
||||
// Mimics LabVIEW behavior: cameras randomly go into Error/Recovering
|
||||
// state, triggering Stop/Reconnect/Destroy+Recreate cycles that cause
|
||||
// CUDA cleanup (cuArrayDestroy, cuMemFree) while inference is running.
|
||||
// This is the exact scenario that triggers the nvcuda64 SRW lock deadlock.
|
||||
// =========================================================================
|
||||
std::atomic<bool> chaosEnabled{true};
|
||||
std::thread chaosThread([&]() {
|
||||
std::mt19937 rng(std::random_device{}());
|
||||
|
||||
// Wait 10 seconds for system to stabilize before starting chaos
|
||||
for (int i = 0; i < 100 && g_running.load(); i++) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
|
||||
g_log.add("[CHAOS] Camera chaos thread started — every 10s, stop/destroy/recreate one camera (round-robin)");
|
||||
printf("[CHAOS] Camera chaos thread started — 10s interval, round-robin across %d streams\n", NUM_STREAMS);
|
||||
|
||||
int chaosCount = 0;
|
||||
int nextStream = 0; // Round-robin: cycle through streams 0,1,2,3,0,1,...
|
||||
while (g_running.load() && chaosEnabled.load()) {
|
||||
// Fixed 10-second interval between chaos events
|
||||
for (int s = 0; s < 100 && g_running.load(); s++) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
if (!g_running.load()) break;
|
||||
|
||||
int streamIdx = nextStream;
|
||||
nextStream = (nextStream + 1) % NUM_STREAMS;
|
||||
chaosCount++;
|
||||
|
||||
char buf[512];
|
||||
auto chaosStart = std::chrono::steady_clock::now();
|
||||
|
||||
// Lock stream guard: wait for any in-flight inference to finish
|
||||
// before touching the RTSP client. This prevents use-after-free
|
||||
// when CHAOS destroys a stream while a worker is mid-inference.
|
||||
std::unique_lock<std::mutex> chaosLock(streamGuards[streamIdx]);
|
||||
|
||||
// Always use full DESTROY + RECREATE cycle.
|
||||
// Reconnect() reuses internal player state which can leave stale
|
||||
// CUDA resources and cause freezes. A clean destroy + recreate
|
||||
// guarantees a fresh decoder/player with no leftover state.
|
||||
{
|
||||
bool wasAlive = (rtspClients[streamIdx] != nullptr);
|
||||
|
||||
snprintf(buf, sizeof(buf), "[CHAOS #%d] Stream%d: DESTROY + RECREATE (%s)",
|
||||
chaosCount, streamIdx,
|
||||
wasAlive ? "camera was running" : "camera was already offline");
|
||||
g_log.add(buf);
|
||||
printf("%s\n", buf);
|
||||
|
||||
// Stop and release old handle if it exists
|
||||
if (rtspClients[streamIdx]) {
|
||||
StopRTSP(&rtspClients[streamIdx]);
|
||||
ReleaseANSRTSPHandle(&rtspClients[streamIdx]);
|
||||
rtspClients[streamIdx] = nullptr;
|
||||
}
|
||||
|
||||
// Release lock during offline sleep — worker sees nullptr and skips
|
||||
int offlineMs = 500 + (rng() % 2500); // 0.5 - 3 seconds offline
|
||||
chaosLock.unlock();
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(offlineMs));
|
||||
chaosLock.lock();
|
||||
|
||||
// Recreate the RTSP handle (under lock again)
|
||||
int result = CreateANSRTSPHandle(&rtspClients[streamIdx], "", "", "",
|
||||
streamUrls[streamIdx].c_str());
|
||||
if (result == 1 && rtspClients[streamIdx]) {
|
||||
SetRTSPImageQuality(&rtspClients[streamIdx], 0);
|
||||
SetRTSPHWDecoding(&rtspClients[streamIdx], 7);
|
||||
StartRTSP(&rtspClients[streamIdx]);
|
||||
|
||||
auto chaosEnd = std::chrono::steady_clock::now();
|
||||
double chaosMs = std::chrono::duration<double, std::milli>(chaosEnd - chaosStart).count();
|
||||
snprintf(buf, sizeof(buf), "[CHAOS #%d] Stream%d: RECREATED in %.0f ms (offline %d ms)",
|
||||
chaosCount, streamIdx, chaosMs, offlineMs);
|
||||
} else {
|
||||
snprintf(buf, sizeof(buf), "[CHAOS #%d] Stream%d: RECREATE FAILED (result=%d)",
|
||||
chaosCount, streamIdx, result);
|
||||
}
|
||||
g_log.add(buf);
|
||||
printf("%s\n", buf);
|
||||
}
|
||||
}
|
||||
|
||||
g_log.add("[CHAOS] Camera chaos thread stopped (total events: " + std::to_string(chaosCount) + ")");
|
||||
printf("[CHAOS] Camera chaos thread stopped (total events: %d)\n", chaosCount);
|
||||
});
|
||||
|
||||
// --- Display loop (main thread) ---
|
||||
const int cellW = 640, cellH = 480;
|
||||
const int logPanelH = 200;
|
||||
// 3x2 grid layout: 5 tasks displayed in 3 columns x 2 rows
|
||||
const int cellW = 480, cellH = 360; // Smaller cells for 3-column layout
|
||||
const int logPanelH = 220;
|
||||
const int gridCols = 3, gridRows = 2;
|
||||
cv::namedWindow("ANSLPR Multi-GPU Stress Test", cv::WINDOW_NORMAL);
|
||||
cv::resizeWindow("ANSLPR Multi-GPU Stress Test", cellW * 2, cellH * 2 + logPanelH);
|
||||
cv::resizeWindow("ANSLPR Multi-GPU Stress Test", cellW * gridCols, cellH * gridRows + logPanelH);
|
||||
|
||||
auto testStart = std::chrono::steady_clock::now();
|
||||
auto lastGpuSnapshot = std::chrono::steady_clock::now();
|
||||
@@ -1244,12 +1396,12 @@ int ANSLPR_MultiGPU_StressTest() {
|
||||
}
|
||||
// Per-task stats
|
||||
double totalFpsSnap = 0;
|
||||
for (int t = 0; t < 4; t++) {
|
||||
for (int t = 0; t < NUM_TASKS; t++) {
|
||||
std::lock_guard<std::mutex> lk(taskStates[t].mtx);
|
||||
char buf[256];
|
||||
snprintf(buf, sizeof(buf),
|
||||
" T%d: GPU[%d] VRAM=%zuMiB FPS=%.1f GrabMs=%.0f InfMs=%.0f Frames=%d Det=%d",
|
||||
t, taskStates[t].gpuDeviceId,
|
||||
" T%d(S%d): GPU[%d] VRAM=%zuMiB FPS=%.1f GrabMs=%.0f InfMs=%.0f Frames=%d Det=%d",
|
||||
t, taskStreamMap[t], taskStates[t].gpuDeviceId,
|
||||
taskStates[t].vramUsedBytes / (1024 * 1024),
|
||||
taskStates[t].fps, taskStates[t].lastGrabMs, taskStates[t].inferenceMs,
|
||||
taskStates[t].frameCount, taskStates[t].detectionCount);
|
||||
@@ -1261,7 +1413,7 @@ int ANSLPR_MultiGPU_StressTest() {
|
||||
g_log.add(buf);
|
||||
// Multi-GPU check
|
||||
std::set<int> gpusUsed;
|
||||
for (int t = 0; t < 4; t++) {
|
||||
for (int t = 0; t < NUM_TASKS; t++) {
|
||||
if (taskStates[t].gpuDeviceId >= 0) gpusUsed.insert(taskStates[t].gpuDeviceId);
|
||||
}
|
||||
if (gpusUsed.size() > 1) {
|
||||
@@ -1271,12 +1423,12 @@ int ANSLPR_MultiGPU_StressTest() {
|
||||
}
|
||||
g_log.add("---- END SNAPSHOT ----");
|
||||
}
|
||||
// Build 2x2 grid + log panel
|
||||
cv::Mat canvas(cellH * 2 + logPanelH, cellW * 2, CV_8UC3, cv::Scalar(30, 30, 30));
|
||||
// Build 3x2 grid + log panel (5 tasks: 3 cols x 2 rows, cell [1][2] empty)
|
||||
cv::Mat canvas(cellH * gridRows + logPanelH, cellW * gridCols, CV_8UC3, cv::Scalar(30, 30, 30));
|
||||
|
||||
// Place each task's frame in its quadrant
|
||||
for (int i = 0; i < 4; i++) {
|
||||
int row = i / 2, col = i % 2;
|
||||
// Place each task's frame in its cell
|
||||
for (int i = 0; i < NUM_TASKS; i++) {
|
||||
int row = i / gridCols, col = i % gridCols;
|
||||
cv::Rect roi(col * cellW, row * cellH, cellW, cellH);
|
||||
|
||||
cv::Mat cell;
|
||||
@@ -1313,8 +1465,8 @@ int ANSLPR_MultiGPU_StressTest() {
|
||||
// Draw status bar at bottom of each cell (2 lines)
|
||||
cv::rectangle(cell, cv::Rect(0, cellH - 50, cellW, 50), cv::Scalar(0, 0, 0), cv::FILLED);
|
||||
char bar1[256], bar2[256];
|
||||
snprintf(bar1, sizeof(bar1), "T%d | %.1f FPS | %.0fms | Frames:%d | Det:%d | %s",
|
||||
i, fps, infMs, fCount, dCount,
|
||||
snprintf(bar1, sizeof(bar1), "T%d(S%d) | %.1f FPS | %.0fms | F:%d | D:%d | %s",
|
||||
i, taskStreamMap[i], fps, infMs, fCount, dCount,
|
||||
lastPlate.empty() ? "-" : lastPlate.c_str());
|
||||
if (gpuId >= 0) {
|
||||
snprintf(bar2, sizeof(bar2), "GPU[%d] | VRAM: %zu MiB", gpuId, vramMiB);
|
||||
@@ -1323,45 +1475,53 @@ int ANSLPR_MultiGPU_StressTest() {
|
||||
}
|
||||
cv::Scalar barColor = engineLoaded ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 100, 255);
|
||||
cv::putText(cell, bar1, cv::Point(5, cellH - 28),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.45, barColor, 1);
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.4, barColor, 1);
|
||||
cv::putText(cell, bar2, cv::Point(5, cellH - 8),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.45, cv::Scalar(0, 200, 255), 1);
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(0, 200, 255), 1);
|
||||
|
||||
cell.copyTo(canvas(roi));
|
||||
|
||||
// Draw grid lines
|
||||
cv::line(canvas, cv::Point(cellW, 0), cv::Point(cellW, cellH * 2),
|
||||
cv::Scalar(100, 100, 100), 1);
|
||||
cv::line(canvas, cv::Point(0, cellH), cv::Point(cellW * 2, cellH),
|
||||
cv::Scalar(100, 100, 100), 1);
|
||||
}
|
||||
|
||||
// Draw grid lines
|
||||
for (int c = 1; c < gridCols; c++)
|
||||
cv::line(canvas, cv::Point(c * cellW, 0), cv::Point(c * cellW, cellH * gridRows),
|
||||
cv::Scalar(100, 100, 100), 1);
|
||||
for (int r = 1; r < gridRows; r++)
|
||||
cv::line(canvas, cv::Point(0, r * cellH), cv::Point(cellW * gridCols, r * cellH),
|
||||
cv::Scalar(100, 100, 100), 1);
|
||||
|
||||
// --- Log panel at bottom ---
|
||||
cv::Rect logRoi(0, cellH * 2, cellW * 2, logPanelH);
|
||||
cv::Rect logRoi(0, cellH * gridRows, cellW * gridCols, logPanelH);
|
||||
cv::Mat logPanel = canvas(logRoi);
|
||||
logPanel.setTo(cv::Scalar(20, 20, 20));
|
||||
|
||||
// Elapsed time header
|
||||
auto elapsed = std::chrono::duration<double>(std::chrono::steady_clock::now() - testStart).count();
|
||||
char header[128];
|
||||
char header[256];
|
||||
snprintf(header, sizeof(header),
|
||||
"Elapsed: %.0fs | Press ESC to stop | Resize window freely", elapsed);
|
||||
"Elapsed: %.0fs | %d cameras, %d AI tasks | Press ESC to stop",
|
||||
elapsed, NUM_STREAMS, NUM_TASKS);
|
||||
cv::putText(logPanel, header, cv::Point(10, 18),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(200, 200, 0), 1);
|
||||
|
||||
// Aggregate stats + per-task GPU summary
|
||||
double totalFps = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int i = 0; i < NUM_TASKS; i++) {
|
||||
std::lock_guard<std::mutex> lk(taskStates[i].mtx);
|
||||
totalFps += taskStates[i].fps;
|
||||
}
|
||||
char aggLine[256];
|
||||
snprintf(aggLine, sizeof(aggLine), "Total throughput: %.1f FPS | T0:GPU%d T1:GPU%d T2:GPU%d T3:GPU%d",
|
||||
totalFps,
|
||||
taskStates[0].gpuDeviceId, taskStates[1].gpuDeviceId,
|
||||
taskStates[2].gpuDeviceId, taskStates[3].gpuDeviceId);
|
||||
// Build dynamic task-GPU summary string
|
||||
std::string taskGpuStr;
|
||||
for (int i = 0; i < NUM_TASKS; i++) {
|
||||
if (i > 0) taskGpuStr += " ";
|
||||
taskGpuStr += "T" + std::to_string(i) + "(S" + std::to_string(taskStreamMap[i])
|
||||
+ "):GPU" + std::to_string(taskStates[i].gpuDeviceId);
|
||||
}
|
||||
char aggLine[512];
|
||||
snprintf(aggLine, sizeof(aggLine), "Total: %.1f FPS | %s",
|
||||
totalFps, taskGpuStr.c_str());
|
||||
cv::putText(logPanel, aggLine, cv::Point(10, 38),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 255), 1);
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.45, cv::Scalar(0, 255, 255), 1);
|
||||
|
||||
// Real-time GPU VRAM monitor (query every frame — cheap call)
|
||||
auto gpuSnaps = QueryGpuVram();
|
||||
@@ -1370,7 +1530,7 @@ int ANSLPR_MultiGPU_StressTest() {
|
||||
// Count tasks on this GPU and their total VRAM
|
||||
int tasksOnGpu = 0;
|
||||
size_t taskVramMiB = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int i = 0; i < NUM_TASKS; i++) {
|
||||
std::lock_guard<std::mutex> lk(taskStates[i].mtx);
|
||||
if (taskStates[i].gpuDeviceId == gs.deviceId) {
|
||||
tasksOnGpu++;
|
||||
@@ -1387,13 +1547,13 @@ int ANSLPR_MultiGPU_StressTest() {
|
||||
gpuLineY += 18;
|
||||
}
|
||||
|
||||
// Per-task resource line
|
||||
for (int i = 0; i < 4; i++) {
|
||||
// Per-task resource line (shows which stream each task subscribes to)
|
||||
for (int i = 0; i < NUM_TASKS; i++) {
|
||||
std::lock_guard<std::mutex> lk(taskStates[i].mtx);
|
||||
char tLine[256];
|
||||
snprintf(tLine, sizeof(tLine),
|
||||
"T%d: GPU[%d] VRAM=%zuMiB FPS=%.1f Inf=%.0fms Frames=%d Det=%d",
|
||||
i, taskStates[i].gpuDeviceId,
|
||||
"T%d(S%d): GPU[%d] VRAM=%zuMiB FPS=%.1f Inf=%.0fms Frames=%d Det=%d",
|
||||
i, taskStreamMap[i], taskStates[i].gpuDeviceId,
|
||||
taskStates[i].vramUsedBytes / (1024 * 1024),
|
||||
taskStates[i].fps, taskStates[i].inferenceMs,
|
||||
taskStates[i].frameCount, taskStates[i].detectionCount);
|
||||
@@ -1421,9 +1581,13 @@ int ANSLPR_MultiGPU_StressTest() {
|
||||
}
|
||||
}
|
||||
|
||||
// --- Stop chaos thread ---
|
||||
chaosEnabled.store(false);
|
||||
if (chaosThread.joinable()) chaosThread.join();
|
||||
|
||||
// --- Wait for all workers ---
|
||||
printf("Waiting for worker threads to finish...\n");
|
||||
for (int i = 0; i < 4; i++) {
|
||||
printf("Waiting for %d worker threads to finish...\n", NUM_TASKS);
|
||||
for (int i = 0; i < NUM_TASKS; i++) {
|
||||
if (workers[i].joinable()) workers[i].join();
|
||||
}
|
||||
|
||||
@@ -1433,19 +1597,21 @@ int ANSLPR_MultiGPU_StressTest() {
|
||||
|
||||
g_log.add("================================================================");
|
||||
g_log.add(" FINAL PERFORMANCE SUMMARY");
|
||||
g_log.add(" " + std::to_string(NUM_STREAMS) + " cameras, " + std::to_string(NUM_TASKS) + " AI tasks");
|
||||
g_log.add(" Total runtime: " + std::to_string((int)totalElapsed) + " seconds");
|
||||
g_log.add("================================================================");
|
||||
|
||||
printf("\n============================================================\n");
|
||||
printf(" FINAL PERFORMANCE SUMMARY (runtime: %.0fs)\n", totalElapsed);
|
||||
printf(" %d cameras, %d AI tasks\n", NUM_STREAMS, NUM_TASKS);
|
||||
printf("============================================================\n");
|
||||
|
||||
double totalFpsFinal = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int i = 0; i < NUM_TASKS; i++) {
|
||||
char buf[512];
|
||||
snprintf(buf, sizeof(buf),
|
||||
" Task %d: GPU[%d] | VRAM=%zuMiB | %d frames, %d detections, FPS=%.1f, InfMs=%.0f",
|
||||
i, taskStates[i].gpuDeviceId,
|
||||
" Task %d (Stream %d): GPU[%d] | VRAM=%zuMiB | %d frames, %d detections, FPS=%.1f, InfMs=%.0f",
|
||||
i, taskStreamMap[i], taskStates[i].gpuDeviceId,
|
||||
taskStates[i].vramUsedBytes / (1024 * 1024),
|
||||
taskStates[i].frameCount, taskStates[i].detectionCount,
|
||||
taskStates[i].fps, taskStates[i].inferenceMs);
|
||||
@@ -1466,12 +1632,13 @@ int ANSLPR_MultiGPU_StressTest() {
|
||||
|
||||
// Multi-GPU verdict
|
||||
std::set<int> finalGpusUsed;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int i = 0; i < NUM_TASKS; i++) {
|
||||
if (taskStates[i].gpuDeviceId >= 0) finalGpusUsed.insert(taskStates[i].gpuDeviceId);
|
||||
}
|
||||
{
|
||||
char buf[256];
|
||||
snprintf(buf, sizeof(buf), " Total throughput: %.1f FPS across 4 tasks", totalFpsFinal);
|
||||
snprintf(buf, sizeof(buf), " Total throughput: %.1f FPS across %d tasks (%d cameras)",
|
||||
totalFpsFinal, NUM_TASKS, NUM_STREAMS);
|
||||
printf("%s\n", buf);
|
||||
g_log.add(buf);
|
||||
}
|
||||
@@ -1491,13 +1658,16 @@ int ANSLPR_MultiGPU_StressTest() {
|
||||
g_log.add(" 3. No CUDA_VISIBLE_DEVICES env var restricting GPU access");
|
||||
}
|
||||
|
||||
// Log shared-camera subscription info
|
||||
g_log.add(" Camera subscription: Task 2 and Task 4 both subscribe to Stream 2 (nhathuocngoclinh)");
|
||||
|
||||
printf("============================================================\n");
|
||||
g_log.add("================================================================");
|
||||
g_log.add(" Log saved to: " + std::string(LOG_FILE_PATH));
|
||||
g_log.add("================================================================");
|
||||
|
||||
// --- Release all handles (sequentially on main thread) ---
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int i = 0; i < NUM_TASKS; i++) {
|
||||
if (alprHandles[i]) {
|
||||
ReleaseANSALPRHandle(&alprHandles[i]);
|
||||
}
|
||||
@@ -2770,9 +2940,9 @@ int main()
|
||||
//for (int i = 0; i < 100; i++) {
|
||||
// ANSLPR_CPU_Inferences_FileTest();
|
||||
//}
|
||||
//ANSLPR_MultiGPU_StressTest();
|
||||
ANSLPR_MultiGPU_StressTest();
|
||||
//ANSLPR_MultiGPU_StressTest_SimulatedCam();
|
||||
ANSLPR_MultiGPU_StressTest_FilePlayer();
|
||||
// ANSLPR_MultiGPU_StressTest_FilePlayer();
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user