// ============================================================================= // CustomModel-StressTest.cpp // // Multi-task stress test using ANSODEngine's extern "C" API functions // (same path as LabVIEW). Uses FilePlayer + CloneImage + RunInferenceComplete_CPP // to reproduce the full LabVIEW production flow for custom model DLLs. // // This test loads ANSCV.dll at runtime via LoadLibrary/GetProcAddress // so it does NOT require linking ANSCV.lib. // ============================================================================= // windows.h MUST come before ANSODTest.h to avoid ACCESS_MASK conflict with TensorRT #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #include #include "ANSODTest.h" #include #include #include #include #include #include // Note: NOT linking cudart.lib — use GetProcAddress for CUDA if needed // --- Forward declarations of ANSODEngine extern "C" functions --- // These are linked via ANSODEngine.lib extern "C" __declspec(dllimport) std::string CreateANSODHandle( ANSCENTER::ANSODBase** Handle, const char* licenseKey, const char* modelFilePath, const char* modelFileZipPassword, float detectionScoreThreshold, float modelConfThreshold, float modelMNSThreshold, int autoDetectEngine, int modelType, int detectionType, int loadEngineOnCreation); extern "C" __declspec(dllimport) int RunInferenceComplete_CPP( ANSCENTER::ANSODBase** Handle, cv::Mat** cvImage, const char* cameraId, const char* activeROIMode, std::vector& detectionResult); extern "C" __declspec(dllimport) int ReleaseANSODHandle(ANSCENTER::ANSODBase** Handle); // --- ANSCV function pointer types (loaded at runtime) --- typedef int (*FnCreateFilePlayer)(void** Handle, const char* licenseKey, const char* url); typedef int (*FnStartFilePlayer)(void** Handle); typedef int (*FnStopFilePlayer)(void** Handle); typedef int (*FnGetFilePlayerCVImage)(void** Handle, int& w, int& h, int64_t& pts, cv::Mat** image); typedef void(*FnSetFilePlayerDisplayRes)(void** Handle, int w, int h); typedef int (*FnReleaseFilePlayer)(void** Handle); typedef int (*FnCloneImage)(cv::Mat** imageIn, cv::Mat** imageOut); typedef int (*FnReleaseImage)(cv::Mat** imageIn); typedef void(*FnInitCameraNetwork)(); typedef void(*FnDeinitCameraNetwork)(); // --- ANSCV function pointers --- static FnCreateFilePlayer pCreateFilePlayer = nullptr; static FnStartFilePlayer pStartFilePlayer = nullptr; static FnStopFilePlayer pStopFilePlayer = nullptr; static FnGetFilePlayerCVImage pGetFilePlayerCVImage = nullptr; static FnSetFilePlayerDisplayRes pSetFilePlayerDisplayRes = nullptr; static FnReleaseFilePlayer pReleaseFilePlayer = nullptr; static FnCloneImage pCloneImage = nullptr; static FnReleaseImage pReleaseImage = nullptr; static FnInitCameraNetwork pInitCameraNetwork = nullptr; static FnDeinitCameraNetwork pDeinitCameraNetwork = nullptr; static HMODULE g_hANSCV = nullptr; static bool LoadANSCV() { g_hANSCV = LoadLibraryA("ANSCV.dll"); if (!g_hANSCV) { printf("ERROR: Failed to load ANSCV.dll (error %lu)\n", GetLastError()); return false; } pCreateFilePlayer = (FnCreateFilePlayer)GetProcAddress(g_hANSCV, "CreateANSFilePlayerHandle"); pStartFilePlayer = (FnStartFilePlayer)GetProcAddress(g_hANSCV, "StartFilePlayer"); pStopFilePlayer = (FnStopFilePlayer)GetProcAddress(g_hANSCV, "StopFilePlayer"); pGetFilePlayerCVImage = (FnGetFilePlayerCVImage)GetProcAddress(g_hANSCV, "GetFilePlayerCVImage"); pSetFilePlayerDisplayRes = (FnSetFilePlayerDisplayRes)GetProcAddress(g_hANSCV, "SetFilePlayerDisplayResolution"); pReleaseFilePlayer = (FnReleaseFilePlayer)GetProcAddress(g_hANSCV, "ReleaseANSFilePlayerHandle"); pCloneImage = (FnCloneImage)GetProcAddress(g_hANSCV, "ANSCV_CloneImage_S"); pReleaseImage = (FnReleaseImage)GetProcAddress(g_hANSCV, "ANSCV_ReleaseImage_S"); pInitCameraNetwork = (FnInitCameraNetwork)GetProcAddress(g_hANSCV, "InitCameraNetwork"); pDeinitCameraNetwork = (FnDeinitCameraNetwork)GetProcAddress(g_hANSCV, "DeinitCameraNetwork"); if (!pCreateFilePlayer || !pStartFilePlayer || !pStopFilePlayer || !pGetFilePlayerCVImage || !pReleaseFilePlayer || !pCloneImage || !pReleaseImage) { printf("ERROR: Failed to resolve one or more ANSCV functions\n"); FreeLibrary(g_hANSCV); g_hANSCV = nullptr; return false; } printf("ANSCV.dll loaded successfully\n"); return true; } static void UnloadANSCV() { if (g_hANSCV) { FreeLibrary(g_hANSCV); g_hANSCV = nullptr; } } // --- Shared state --- static std::atomic g_stressRunning{true}; struct StressTaskState { std::mutex mtx; cv::Mat displayFrame; double fps = 0; double inferenceMs = 0; double grabMs = 0; int frameCount = 0; int detectionCount = 0; int gpuDeviceId = -1; size_t vramUsedMiB = 0; std::string statusMsg = "Initializing"; std::string lastDetection; bool engineLoaded = false; }; // --- GPU VRAM helpers (via cudart.dll at runtime) --- typedef int (*FnCudaGetDeviceCount)(int*); typedef int (*FnCudaSetDevice)(int); typedef int (*FnCudaMemGetInfo)(size_t*, size_t*); static std::vector GetPerGpuFreeMiB() { static HMODULE hCudart = LoadLibraryA("cudart64_12.dll"); if (!hCudart) hCudart = LoadLibraryA("cudart64_110.dll"); if (!hCudart) return {}; auto fnGetCount = (FnCudaGetDeviceCount)GetProcAddress(hCudart, "cudaGetDeviceCount"); auto fnSetDev = (FnCudaSetDevice)GetProcAddress(hCudart, "cudaSetDevice"); auto fnMemInfo = (FnCudaMemGetInfo)GetProcAddress(hCudart, "cudaMemGetInfo"); if (!fnGetCount || !fnSetDev || !fnMemInfo) return {}; int count = 0; fnGetCount(&count); std::vector result(count, 0); for (int i = 0; i < count; i++) { fnSetDev(i); size_t freeMem = 0, totalMem = 0; fnMemInfo(&freeMem, &totalMem); result[i] = freeMem / (1024 * 1024); } return result; } // --- Worker thread --- // Mimics LabVIEW flow: GetImage → CloneImage → RunInferenceComplete_CPP → ReleaseImage static void ODWorkerThread(int taskId, void* fpClient, ANSCENTER::ANSODBase* odHandle, StressTaskState& state) { char tag[32]; snprintf(tag, sizeof(tag), "[Task%d]", taskId); printf("%s Worker thread started\n", tag); int width = 0, height = 0; int64_t pts = 0; int emptyFrames = 0; std::string cameraId = "StressCam" + std::to_string(taskId); std::deque fpsTimestamps; while (g_stressRunning.load()) { // --- Step 1: Get image from FilePlayer (like camera process) --- auto grabStart = std::chrono::steady_clock::now(); cv::Mat* framePtr = nullptr; pGetFilePlayerCVImage(&fpClient, width, height, pts, &framePtr); auto grabEnd = std::chrono::steady_clock::now(); double grabMs = std::chrono::duration(grabEnd - grabStart).count(); if (!framePtr || framePtr->empty()) { emptyFrames++; if (emptyFrames > 500) { printf("%s Too many empty frames (%d), stopping\n", tag, emptyFrames); break; } if (framePtr) { pReleaseImage(&framePtr); } std::this_thread::sleep_for(std::chrono::milliseconds(10)); continue; } emptyFrames = 0; // --- Step 2: Clone image (like LabVIEW consumer) --- cv::Mat* clonedImage = nullptr; int cloneResult = pCloneImage(&framePtr, &clonedImage); if (cloneResult != 1 || !clonedImage) { printf("%s CloneImage failed (result=%d)\n", tag, cloneResult); pReleaseImage(&framePtr); std::this_thread::sleep_for(std::chrono::milliseconds(10)); continue; } // Release original frame (camera process would do this) pReleaseImage(&framePtr); // --- Step 3: Run inference on clone (like AI task) --- auto infStart = std::chrono::steady_clock::now(); std::vector detections; int infResult = RunInferenceComplete_CPP(&odHandle, &clonedImage, cameraId.c_str(), "", detections); auto infEnd = std::chrono::steady_clock::now(); double infMs = std::chrono::duration(infEnd - infStart).count(); // --- Step 4: Draw results on clone for display --- cv::Mat display; if (clonedImage && !clonedImage->empty()) { display = clonedImage->clone(); } std::string lastDet; int detCount = 0; if (infResult > 0) { for (const auto& obj : detections) { if (!display.empty()) { cv::rectangle(display, obj.box, cv::Scalar(0, 255, 0), 2); std::string label = obj.className + " " + std::to_string((int)(obj.confidence * 100)) + "%"; cv::putText(display, label, cv::Point(obj.box.x, obj.box.y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 1); } lastDet = obj.className; detCount++; } } // --- Step 5: Release clone (like LabVIEW task cleanup) --- pReleaseImage(&clonedImage); // --- FPS calculation --- auto now = std::chrono::steady_clock::now(); fpsTimestamps.push_back(now); while (!fpsTimestamps.empty() && std::chrono::duration(now - fpsTimestamps.front()).count() > 2.0) { fpsTimestamps.pop_front(); } double fps = fpsTimestamps.size() / 2.0; // --- Update state --- { std::lock_guard lk(state.mtx); if (!display.empty()) state.displayFrame = display; state.fps = fps; state.inferenceMs = infMs; state.grabMs = grabMs; state.frameCount++; state.detectionCount += detCount; if (!lastDet.empty()) state.lastDetection = lastDet; } // Periodic log if ((state.frameCount % 200) == 0) { printf("%s %d frames | %.1f FPS | Inf: %.0f ms | Det: %d\n", tag, state.frameCount, fps, infMs, state.detectionCount); } } printf("%s Worker thread finished (%d frames)\n", tag, state.frameCount); } // ============================================================================= // Main test function // // Config (edit these): // NUM_STREAMS — number of FilePlayer instances (cameras) // NUM_TASKS — number of AI tasks // videoFiles[] — paths to video files // modelFolder — path to custom model folder // modelType — engine type (31=RTYOLO, 30=ONNXYOLO, 10=CustomDetector, etc.) // ============================================================================= int CustomModel_StressTest_FilePlayer() { printf("\n"); printf("============================================================\n"); printf(" Custom Model Multi-Task Stress Test (FilePlayer)\n"); printf(" Uses RunInferenceComplete_CPP (same path as LabVIEW)\n"); printf(" Press ESC to stop\n"); printf("============================================================\n\n"); // --- Load ANSCV.dll at runtime --- if (!LoadANSCV()) return -1; if (pInitCameraNetwork) pInitCameraNetwork(); // ===================================================================== // CONFIGURATION — EDIT THESE FOR YOUR TEST // ===================================================================== const int NUM_STREAMS = 2; const int NUM_TASKS = 4; // 2 tasks per camera // Video files (one per stream) const std::string videoFiles[NUM_STREAMS] = { "E:\\Programs\\DemoAssets\\Videos\\Helmet\\HM1.mp4", "E:\\Programs\\DemoAssets\\Videos\\Helmet\\HM2.mp4", }; // Which stream each task uses const int taskStreamMap[NUM_TASKS] = { 0, 0, 1, 1 }; // Model config — EDIT for your custom model const std::string modelFolder = "C:\\Projects\\ANSVIS\\Models\\ANS_Helmet_v2.0.zip"; //const char* modelName = "detector"; //const char* className = "detector.names"; const int modelType = 16; // 16 = CustomDetector, 31 = RTYOLO, 30 = ONNXYOLO const float scoreThresh = 0.5f; const float confThresh = 0.5f; const float nmsThresh = 0.45f; // ===================================================================== int detectorType = 1; // Detection std::cout << "\n--- Test 1: Handle creation (elastic mode) ---\n" << std::endl; std::cout << "Optimizing model, please wait..." << std::endl; std::string optimizedFolder = OptimizeModelStr( modelFolder.c_str(), "", modelType, detectorType, 1); std::cout << "Optimized model folder: " << optimizedFolder << std::endl; StressTaskState taskStates[NUM_TASKS]; // --- Create FilePlayer instances --- void* fpClients[NUM_STREAMS] = {}; for (int s = 0; s < NUM_STREAMS; s++) { printf("[Stream%d] Creating FilePlayer: %s\n", s, videoFiles[s].c_str()); int result = pCreateFilePlayer(&fpClients[s], "", videoFiles[s].c_str()); if (result != 1 || !fpClients[s]) { printf("[Stream%d] FAILED to create FilePlayer (result=%d)\n", s, result); fpClients[s] = nullptr; continue; } if (pSetFilePlayerDisplayRes) { pSetFilePlayerDisplayRes(&fpClients[s], 1920, 1080); } printf("[Stream%d] FilePlayer created (display: 1920x1080)\n", s); } // --- Create OD engine handles sequentially --- ANSCENTER::ANSODBase* odHandles[NUM_TASKS] = {}; for (int i = 0; i < NUM_TASKS; i++) { char tag[32]; snprintf(tag, sizeof(tag), "[Task%d]", i); int streamIdx = taskStreamMap[i]; if (!fpClients[streamIdx]) { printf("%s Skipped — Stream%d not available\n", tag, streamIdx); std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "Stream not available"; continue; } { std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "Loading model..."; } printf("%s Creating OD handle (modelType=%d)...\n", tag, modelType); auto loadStart = std::chrono::steady_clock::now(); auto vramBefore = GetPerGpuFreeMiB(); // Use CreateANSODHandle — same API as VideoDetectorEngine and LabVIEW std::string labelMap = CreateANSODHandle( &odHandles[i], "", // licenseKey modelFolder.c_str(),// modelFilePath (zip or folder) "", // modelZipFilePassword scoreThresh, // detectionScoreThreshold confThresh, // modelConfThreshold nmsThresh, // modelMNSThreshold 1, // autoDetectEngine modelType, // modelType (16=custom, 31=RTYOLO, etc.) 1, // detectionType (1=Detection) 1); // loadEngineOnCreation auto loadEnd = std::chrono::steady_clock::now(); double loadMs = std::chrono::duration(loadEnd - loadStart).count(); if (!odHandles[i]) { printf("%s FAILED to create OD handle\n", tag); std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "Model load failed"; continue; } auto vramAfter = GetPerGpuFreeMiB(); int bestGpu = 0; size_t maxDelta = 0; for (size_t g = 0; g < vramBefore.size() && g < vramAfter.size(); g++) { size_t delta = (vramBefore[g] > vramAfter[g]) ? vramBefore[g] - vramAfter[g] : 0; if (delta > maxDelta) { maxDelta = delta; bestGpu = (int)g; } } printf("%s Model loaded in %.0f ms | GPU[%d] | VRAM: %zu MiB | Labels: %s\n", tag, loadMs, bestGpu, maxDelta, labelMap.empty() ? "(none)" : labelMap.substr(0, 80).c_str()); { std::lock_guard lk(taskStates[i].mtx); taskStates[i].engineLoaded = true; taskStates[i].statusMsg = "Running"; taskStates[i].gpuDeviceId = bestGpu; taskStates[i].vramUsedMiB = maxDelta; } } // --- Start video playback --- for (int s = 0; s < NUM_STREAMS; s++) { if (fpClients[s]) { pStartFilePlayer(&fpClients[s]); printf("[Stream%d] Playback started\n", s); } } // Give FilePlayer time to decode first frames std::this_thread::sleep_for(std::chrono::milliseconds(500)); // --- Launch worker threads --- std::thread workers[NUM_TASKS]; for (int i = 0; i < NUM_TASKS; i++) { int streamIdx = taskStreamMap[i]; if (fpClients[streamIdx] && odHandles[i]) { workers[i] = std::thread(ODWorkerThread, i, fpClients[streamIdx], odHandles[i], std::ref(taskStates[i])); } } // --- Display loop --- const int cols = (NUM_TASKS <= 2) ? NUM_TASKS : 2; const int rows = (NUM_TASKS + cols - 1) / cols; const int cellW = 640, cellH = 480; const char* windowName = "Custom Model Stress Test"; cv::namedWindow(windowName, cv::WINDOW_NORMAL); cv::resizeWindow(windowName, cellW * cols, cellH * rows + 40); auto testStart = std::chrono::steady_clock::now(); while (g_stressRunning.load()) { cv::Mat canvas(cellH * rows + 40, cellW * cols, CV_8UC3, cv::Scalar(30, 30, 30)); for (int i = 0; i < NUM_TASKS; i++) { int row = i / cols, col = i % cols; cv::Rect roi(col * cellW, row * cellH, cellW, cellH); cv::Mat cell; double fps = 0, infMs = 0; int fCount = 0, dCount = 0; int gpuId = -1; std::string statusMsg, lastDet; bool engineLoaded = false; { std::lock_guard lk(taskStates[i].mtx); if (!taskStates[i].displayFrame.empty()) { cv::resize(taskStates[i].displayFrame, cell, cv::Size(cellW, cellH)); } fps = taskStates[i].fps; infMs = taskStates[i].inferenceMs; fCount = taskStates[i].frameCount; dCount = taskStates[i].detectionCount; gpuId = taskStates[i].gpuDeviceId; statusMsg = taskStates[i].statusMsg; lastDet = taskStates[i].lastDetection; engineLoaded = taskStates[i].engineLoaded; } if (cell.empty()) { cell = cv::Mat(cellH, cellW, CV_8UC3, cv::Scalar(40, 40, 40)); cv::putText(cell, "Task " + std::to_string(i) + ": " + statusMsg, cv::Point(20, cellH / 2), cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(100, 100, 255), 2); } // Status bar cv::rectangle(cell, cv::Rect(0, cellH - 45, cellW, 45), cv::Scalar(0, 0, 0), cv::FILLED); char bar1[256], bar2[128]; snprintf(bar1, sizeof(bar1), "T%d | %.1f FPS | %.0fms | Frames:%d | Det:%d", i, fps, infMs, fCount, dCount); snprintf(bar2, sizeof(bar2), "GPU[%d] | %s", gpuId, lastDet.empty() ? "-" : lastDet.c_str()); cv::Scalar barColor = engineLoaded ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 100, 255); cv::putText(cell, bar1, cv::Point(5, cellH - 25), cv::FONT_HERSHEY_SIMPLEX, 0.45, barColor, 1); cv::putText(cell, bar2, cv::Point(5, cellH - 5), cv::FONT_HERSHEY_SIMPLEX, 0.45, cv::Scalar(0, 200, 255), 1); cell.copyTo(canvas(roi)); } // Bottom status bar double elapsed = std::chrono::duration( std::chrono::steady_clock::now() - testStart).count(); double totalFps = 0; for (int i = 0; i < NUM_TASKS; i++) totalFps += taskStates[i].fps; char bottomBar[256]; snprintf(bottomBar, sizeof(bottomBar), "Elapsed: %.0fs | Total: %.1f FPS | %d streams, %d tasks | Press ESC to stop", elapsed, totalFps, NUM_STREAMS, NUM_TASKS); cv::putText(canvas, bottomBar, cv::Point(10, cellH * rows + 25), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(200, 200, 200), 1); cv::imshow(windowName, canvas); int key = cv::waitKey(30); if (key == 27) { printf("\nESC pressed — stopping...\n"); g_stressRunning.store(false); } } // --- Wait for workers --- printf("Waiting for worker threads...\n"); for (int i = 0; i < NUM_TASKS; i++) { if (workers[i].joinable()) workers[i].join(); } // --- Final summary --- double totalElapsed = std::chrono::duration( std::chrono::steady_clock::now() - testStart).count(); printf("\n============================================================\n"); printf(" FINAL SUMMARY (runtime: %.0fs)\n", totalElapsed); printf("============================================================\n"); double totalFps = 0; for (int i = 0; i < NUM_TASKS; i++) { printf(" Task %d: GPU[%d] | %d frames | %d detections | %.1f FPS | Inf: %.0fms\n", i, taskStates[i].gpuDeviceId, taskStates[i].frameCount, taskStates[i].detectionCount, taskStates[i].fps, taskStates[i].inferenceMs); totalFps += taskStates[i].fps; } printf(" Total throughput: %.1f FPS across %d tasks\n", totalFps, NUM_TASKS); printf("============================================================\n"); // --- Release --- for (int i = 0; i < NUM_TASKS; i++) { if (odHandles[i]) ReleaseANSODHandle(&odHandles[i]); } for (int s = 0; s < NUM_STREAMS; s++) { if (fpClients[s]) { pStopFilePlayer(&fpClients[s]); pReleaseFilePlayer(&fpClients[s]); } } cv::destroyAllWindows(); if (pDeinitCameraNetwork) pDeinitCameraNetwork(); UnloadANSCV(); return 0; }