Files
ANSCORE/tests/ANSODEngine-UnitTest/CustomModel-StressTest.cpp

553 lines
22 KiB
C++
Raw Permalink Normal View History

2026-03-29 08:45:38 +11:00
// =============================================================================
// CustomModel-StressTest.cpp
//
// Multi-task stress test using ANSODEngine's extern "C" API functions
// (same path as LabVIEW). Uses FilePlayer + CloneImage + RunInferenceComplete_CPP
// to reproduce the full LabVIEW production flow for custom model DLLs.
//
// This test loads ANSCV.dll at runtime via LoadLibrary/GetProcAddress
// so it does NOT require linking ANSCV.lib.
// =============================================================================
// windows.h MUST come before ANSODTest.h to avoid ACCESS_MASK conflict with TensorRT
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include "ANSODTest.h"
#include <thread>
#include <atomic>
#include <chrono>
#include <mutex>
#include <deque>
#include <set>
// Note: NOT linking cudart.lib — use GetProcAddress for CUDA if needed
// --- Forward declarations of ANSODEngine extern "C" functions ---
// These are linked via ANSODEngine.lib
extern "C" __declspec(dllimport) std::string CreateANSODHandle(
ANSCENTER::ANSODBase** Handle,
const char* licenseKey, const char* modelFilePath,
const char* modelFileZipPassword,
float detectionScoreThreshold, float modelConfThreshold, float modelMNSThreshold,
int autoDetectEngine, int modelType, int detectionType, int loadEngineOnCreation);
extern "C" __declspec(dllimport) int RunInferenceComplete_CPP(
ANSCENTER::ANSODBase** Handle, cv::Mat** cvImage, const char* cameraId,
const char* activeROIMode, std::vector<ANSCENTER::Object>& detectionResult);
extern "C" __declspec(dllimport) int ReleaseANSODHandle(ANSCENTER::ANSODBase** Handle);
// --- ANSCV function pointer types (loaded at runtime) ---
typedef int (*FnCreateFilePlayer)(void** Handle, const char* licenseKey, const char* url);
typedef int (*FnStartFilePlayer)(void** Handle);
typedef int (*FnStopFilePlayer)(void** Handle);
typedef int (*FnGetFilePlayerCVImage)(void** Handle, int& w, int& h, int64_t& pts, cv::Mat** image);
typedef void(*FnSetFilePlayerDisplayRes)(void** Handle, int w, int h);
typedef int (*FnReleaseFilePlayer)(void** Handle);
typedef int (*FnCloneImage)(cv::Mat** imageIn, cv::Mat** imageOut);
typedef int (*FnReleaseImage)(cv::Mat** imageIn);
typedef void(*FnInitCameraNetwork)();
typedef void(*FnDeinitCameraNetwork)();
// --- ANSCV function pointers ---
static FnCreateFilePlayer pCreateFilePlayer = nullptr;
static FnStartFilePlayer pStartFilePlayer = nullptr;
static FnStopFilePlayer pStopFilePlayer = nullptr;
static FnGetFilePlayerCVImage pGetFilePlayerCVImage = nullptr;
static FnSetFilePlayerDisplayRes pSetFilePlayerDisplayRes = nullptr;
static FnReleaseFilePlayer pReleaseFilePlayer = nullptr;
static FnCloneImage pCloneImage = nullptr;
static FnReleaseImage pReleaseImage = nullptr;
static FnInitCameraNetwork pInitCameraNetwork = nullptr;
static FnDeinitCameraNetwork pDeinitCameraNetwork = nullptr;
static HMODULE g_hANSCV = nullptr;
static bool LoadANSCV() {
g_hANSCV = LoadLibraryA("ANSCV.dll");
if (!g_hANSCV) {
printf("ERROR: Failed to load ANSCV.dll (error %lu)\n", GetLastError());
return false;
}
pCreateFilePlayer = (FnCreateFilePlayer)GetProcAddress(g_hANSCV, "CreateANSFilePlayerHandle");
pStartFilePlayer = (FnStartFilePlayer)GetProcAddress(g_hANSCV, "StartFilePlayer");
pStopFilePlayer = (FnStopFilePlayer)GetProcAddress(g_hANSCV, "StopFilePlayer");
pGetFilePlayerCVImage = (FnGetFilePlayerCVImage)GetProcAddress(g_hANSCV, "GetFilePlayerCVImage");
pSetFilePlayerDisplayRes = (FnSetFilePlayerDisplayRes)GetProcAddress(g_hANSCV, "SetFilePlayerDisplayResolution");
pReleaseFilePlayer = (FnReleaseFilePlayer)GetProcAddress(g_hANSCV, "ReleaseANSFilePlayerHandle");
pCloneImage = (FnCloneImage)GetProcAddress(g_hANSCV, "ANSCV_CloneImage_S");
pReleaseImage = (FnReleaseImage)GetProcAddress(g_hANSCV, "ANSCV_ReleaseImage_S");
pInitCameraNetwork = (FnInitCameraNetwork)GetProcAddress(g_hANSCV, "InitCameraNetwork");
pDeinitCameraNetwork = (FnDeinitCameraNetwork)GetProcAddress(g_hANSCV, "DeinitCameraNetwork");
if (!pCreateFilePlayer || !pStartFilePlayer || !pStopFilePlayer ||
!pGetFilePlayerCVImage || !pReleaseFilePlayer ||
!pCloneImage || !pReleaseImage) {
printf("ERROR: Failed to resolve one or more ANSCV functions\n");
FreeLibrary(g_hANSCV);
g_hANSCV = nullptr;
return false;
}
printf("ANSCV.dll loaded successfully\n");
return true;
}
static void UnloadANSCV() {
if (g_hANSCV) {
FreeLibrary(g_hANSCV);
g_hANSCV = nullptr;
}
}
// --- Shared state ---
static std::atomic<bool> g_stressRunning{true};
struct StressTaskState {
std::mutex mtx;
cv::Mat displayFrame;
double fps = 0;
double inferenceMs = 0;
double grabMs = 0;
int frameCount = 0;
int detectionCount = 0;
int gpuDeviceId = -1;
size_t vramUsedMiB = 0;
std::string statusMsg = "Initializing";
std::string lastDetection;
bool engineLoaded = false;
};
// --- GPU VRAM helpers (via cudart.dll at runtime) ---
typedef int (*FnCudaGetDeviceCount)(int*);
typedef int (*FnCudaSetDevice)(int);
typedef int (*FnCudaMemGetInfo)(size_t*, size_t*);
static std::vector<size_t> GetPerGpuFreeMiB() {
static HMODULE hCudart = LoadLibraryA("cudart64_12.dll");
if (!hCudart) hCudart = LoadLibraryA("cudart64_110.dll");
if (!hCudart) return {};
auto fnGetCount = (FnCudaGetDeviceCount)GetProcAddress(hCudart, "cudaGetDeviceCount");
auto fnSetDev = (FnCudaSetDevice)GetProcAddress(hCudart, "cudaSetDevice");
auto fnMemInfo = (FnCudaMemGetInfo)GetProcAddress(hCudart, "cudaMemGetInfo");
if (!fnGetCount || !fnSetDev || !fnMemInfo) return {};
int count = 0;
fnGetCount(&count);
std::vector<size_t> result(count, 0);
for (int i = 0; i < count; i++) {
fnSetDev(i);
size_t freeMem = 0, totalMem = 0;
fnMemInfo(&freeMem, &totalMem);
result[i] = freeMem / (1024 * 1024);
}
return result;
}
// --- Worker thread ---
// Mimics LabVIEW flow: GetImage → CloneImage → RunInferenceComplete_CPP → ReleaseImage
static void ODWorkerThread(int taskId,
void* fpClient,
ANSCENTER::ANSODBase* odHandle,
StressTaskState& state) {
char tag[32];
snprintf(tag, sizeof(tag), "[Task%d]", taskId);
printf("%s Worker thread started\n", tag);
int width = 0, height = 0;
int64_t pts = 0;
int emptyFrames = 0;
std::string cameraId = "StressCam" + std::to_string(taskId);
std::deque<std::chrono::steady_clock::time_point> fpsTimestamps;
while (g_stressRunning.load()) {
// --- Step 1: Get image from FilePlayer (like camera process) ---
auto grabStart = std::chrono::steady_clock::now();
cv::Mat* framePtr = nullptr;
pGetFilePlayerCVImage(&fpClient, width, height, pts, &framePtr);
auto grabEnd = std::chrono::steady_clock::now();
double grabMs = std::chrono::duration<double, std::milli>(grabEnd - grabStart).count();
if (!framePtr || framePtr->empty()) {
emptyFrames++;
if (emptyFrames > 500) {
printf("%s Too many empty frames (%d), stopping\n", tag, emptyFrames);
break;
}
if (framePtr) { pReleaseImage(&framePtr); }
std::this_thread::sleep_for(std::chrono::milliseconds(10));
continue;
}
emptyFrames = 0;
// --- Step 2: Clone image (like LabVIEW consumer) ---
cv::Mat* clonedImage = nullptr;
int cloneResult = pCloneImage(&framePtr, &clonedImage);
if (cloneResult != 1 || !clonedImage) {
printf("%s CloneImage failed (result=%d)\n", tag, cloneResult);
pReleaseImage(&framePtr);
std::this_thread::sleep_for(std::chrono::milliseconds(10));
continue;
}
// Release original frame (camera process would do this)
pReleaseImage(&framePtr);
// --- Step 3: Run inference on clone (like AI task) ---
auto infStart = std::chrono::steady_clock::now();
std::vector<ANSCENTER::Object> detections;
int infResult = RunInferenceComplete_CPP(&odHandle, &clonedImage, cameraId.c_str(), "", detections);
auto infEnd = std::chrono::steady_clock::now();
double infMs = std::chrono::duration<double, std::milli>(infEnd - infStart).count();
// --- Step 4: Draw results on clone for display ---
cv::Mat display;
if (clonedImage && !clonedImage->empty()) {
display = clonedImage->clone();
}
std::string lastDet;
int detCount = 0;
if (infResult > 0) {
for (const auto& obj : detections) {
if (!display.empty()) {
cv::rectangle(display, obj.box, cv::Scalar(0, 255, 0), 2);
std::string label = obj.className + " " +
std::to_string((int)(obj.confidence * 100)) + "%";
cv::putText(display, label,
cv::Point(obj.box.x, obj.box.y - 5),
cv::FONT_HERSHEY_SIMPLEX, 0.5,
cv::Scalar(0, 255, 0), 1);
}
lastDet = obj.className;
detCount++;
}
}
// --- Step 5: Release clone (like LabVIEW task cleanup) ---
pReleaseImage(&clonedImage);
// --- FPS calculation ---
auto now = std::chrono::steady_clock::now();
fpsTimestamps.push_back(now);
while (!fpsTimestamps.empty() &&
std::chrono::duration<double>(now - fpsTimestamps.front()).count() > 2.0) {
fpsTimestamps.pop_front();
}
double fps = fpsTimestamps.size() / 2.0;
// --- Update state ---
{
std::lock_guard<std::mutex> lk(state.mtx);
if (!display.empty()) state.displayFrame = display;
state.fps = fps;
state.inferenceMs = infMs;
state.grabMs = grabMs;
state.frameCount++;
state.detectionCount += detCount;
if (!lastDet.empty()) state.lastDetection = lastDet;
}
// Periodic log
if ((state.frameCount % 200) == 0) {
printf("%s %d frames | %.1f FPS | Inf: %.0f ms | Det: %d\n",
tag, state.frameCount, fps, infMs, state.detectionCount);
}
}
printf("%s Worker thread finished (%d frames)\n", tag, state.frameCount);
}
// =============================================================================
// Main test function
//
// Config (edit these):
// NUM_STREAMS — number of FilePlayer instances (cameras)
// NUM_TASKS — number of AI tasks
// videoFiles[] — paths to video files
// modelFolder — path to custom model folder
// modelType — engine type (31=RTYOLO, 30=ONNXYOLO, 10=CustomDetector, etc.)
// =============================================================================
int CustomModel_StressTest_FilePlayer() {
printf("\n");
printf("============================================================\n");
printf(" Custom Model Multi-Task Stress Test (FilePlayer)\n");
printf(" Uses RunInferenceComplete_CPP (same path as LabVIEW)\n");
printf(" Press ESC to stop\n");
printf("============================================================\n\n");
// --- Load ANSCV.dll at runtime ---
if (!LoadANSCV()) return -1;
if (pInitCameraNetwork) pInitCameraNetwork();
// =====================================================================
// CONFIGURATION — EDIT THESE FOR YOUR TEST
// =====================================================================
const int NUM_STREAMS = 2;
const int NUM_TASKS = 4; // 2 tasks per camera
// Video files (one per stream)
const std::string videoFiles[NUM_STREAMS] = {
"E:\\Programs\\DemoAssets\\Videos\\Helmet\\HM1.mp4",
"E:\\Programs\\DemoAssets\\Videos\\Helmet\\HM2.mp4",
};
// Which stream each task uses
const int taskStreamMap[NUM_TASKS] = { 0, 0, 1, 1 };
// Model config — EDIT for your custom model
const std::string modelFolder = "C:\\Projects\\ANSVIS\\Models\\ANS_Helmet_v2.0.zip";
//const char* modelName = "detector";
//const char* className = "detector.names";
const int modelType = 16; // 16 = CustomDetector, 31 = RTYOLO, 30 = ONNXYOLO
const float scoreThresh = 0.5f;
const float confThresh = 0.5f;
const float nmsThresh = 0.45f;
// =====================================================================
int detectorType = 1; // Detection
std::cout << "\n--- Test 1: Handle creation (elastic mode) ---\n" << std::endl;
std::cout << "Optimizing model, please wait..." << std::endl;
2026-04-09 08:09:02 +10:00
std::string optimizedFolder;
OptimizeModelStr(
2026-03-29 08:45:38 +11:00
modelFolder.c_str(), "",
2026-04-09 08:09:02 +10:00
modelType, detectorType, 1, optimizedFolder);
2026-03-29 08:45:38 +11:00
std::cout << "Optimized model folder: " << optimizedFolder << std::endl;
StressTaskState taskStates[NUM_TASKS];
// --- Create FilePlayer instances ---
void* fpClients[NUM_STREAMS] = {};
for (int s = 0; s < NUM_STREAMS; s++) {
printf("[Stream%d] Creating FilePlayer: %s\n", s, videoFiles[s].c_str());
int result = pCreateFilePlayer(&fpClients[s], "", videoFiles[s].c_str());
if (result != 1 || !fpClients[s]) {
printf("[Stream%d] FAILED to create FilePlayer (result=%d)\n", s, result);
fpClients[s] = nullptr;
continue;
}
if (pSetFilePlayerDisplayRes) {
pSetFilePlayerDisplayRes(&fpClients[s], 1920, 1080);
}
printf("[Stream%d] FilePlayer created (display: 1920x1080)\n", s);
}
// --- Create OD engine handles sequentially ---
ANSCENTER::ANSODBase* odHandles[NUM_TASKS] = {};
for (int i = 0; i < NUM_TASKS; i++) {
char tag[32];
snprintf(tag, sizeof(tag), "[Task%d]", i);
int streamIdx = taskStreamMap[i];
if (!fpClients[streamIdx]) {
printf("%s Skipped — Stream%d not available\n", tag, streamIdx);
std::lock_guard<std::mutex> lk(taskStates[i].mtx);
taskStates[i].statusMsg = "Stream not available";
continue;
}
{
std::lock_guard<std::mutex> lk(taskStates[i].mtx);
taskStates[i].statusMsg = "Loading model...";
}
printf("%s Creating OD handle (modelType=%d)...\n", tag, modelType);
auto loadStart = std::chrono::steady_clock::now();
auto vramBefore = GetPerGpuFreeMiB();
// Use CreateANSODHandle — same API as VideoDetectorEngine and LabVIEW
std::string labelMap = CreateANSODHandle(
&odHandles[i],
"", // licenseKey
modelFolder.c_str(),// modelFilePath (zip or folder)
"", // modelZipFilePassword
scoreThresh, // detectionScoreThreshold
confThresh, // modelConfThreshold
nmsThresh, // modelMNSThreshold
1, // autoDetectEngine
modelType, // modelType (16=custom, 31=RTYOLO, etc.)
1, // detectionType (1=Detection)
1); // loadEngineOnCreation
auto loadEnd = std::chrono::steady_clock::now();
double loadMs = std::chrono::duration<double, std::milli>(loadEnd - loadStart).count();
if (!odHandles[i]) {
printf("%s FAILED to create OD handle\n", tag);
std::lock_guard<std::mutex> lk(taskStates[i].mtx);
taskStates[i].statusMsg = "Model load failed";
continue;
}
auto vramAfter = GetPerGpuFreeMiB();
int bestGpu = 0;
size_t maxDelta = 0;
for (size_t g = 0; g < vramBefore.size() && g < vramAfter.size(); g++) {
size_t delta = (vramBefore[g] > vramAfter[g]) ? vramBefore[g] - vramAfter[g] : 0;
if (delta > maxDelta) { maxDelta = delta; bestGpu = (int)g; }
}
printf("%s Model loaded in %.0f ms | GPU[%d] | VRAM: %zu MiB | Labels: %s\n",
tag, loadMs, bestGpu, maxDelta,
labelMap.empty() ? "(none)" : labelMap.substr(0, 80).c_str());
{
std::lock_guard<std::mutex> lk(taskStates[i].mtx);
taskStates[i].engineLoaded = true;
taskStates[i].statusMsg = "Running";
taskStates[i].gpuDeviceId = bestGpu;
taskStates[i].vramUsedMiB = maxDelta;
}
}
// --- Start video playback ---
for (int s = 0; s < NUM_STREAMS; s++) {
if (fpClients[s]) {
pStartFilePlayer(&fpClients[s]);
printf("[Stream%d] Playback started\n", s);
}
}
// Give FilePlayer time to decode first frames
std::this_thread::sleep_for(std::chrono::milliseconds(500));
// --- Launch worker threads ---
std::thread workers[NUM_TASKS];
for (int i = 0; i < NUM_TASKS; i++) {
int streamIdx = taskStreamMap[i];
if (fpClients[streamIdx] && odHandles[i]) {
workers[i] = std::thread(ODWorkerThread, i,
fpClients[streamIdx], odHandles[i],
std::ref(taskStates[i]));
}
}
// --- Display loop ---
const int cols = (NUM_TASKS <= 2) ? NUM_TASKS : 2;
const int rows = (NUM_TASKS + cols - 1) / cols;
const int cellW = 640, cellH = 480;
const char* windowName = "Custom Model Stress Test";
cv::namedWindow(windowName, cv::WINDOW_NORMAL);
cv::resizeWindow(windowName, cellW * cols, cellH * rows + 40);
auto testStart = std::chrono::steady_clock::now();
while (g_stressRunning.load()) {
cv::Mat canvas(cellH * rows + 40, cellW * cols, CV_8UC3, cv::Scalar(30, 30, 30));
for (int i = 0; i < NUM_TASKS; i++) {
int row = i / cols, col = i % cols;
cv::Rect roi(col * cellW, row * cellH, cellW, cellH);
cv::Mat cell;
double fps = 0, infMs = 0;
int fCount = 0, dCount = 0;
int gpuId = -1;
std::string statusMsg, lastDet;
bool engineLoaded = false;
{
std::lock_guard<std::mutex> lk(taskStates[i].mtx);
if (!taskStates[i].displayFrame.empty()) {
cv::resize(taskStates[i].displayFrame, cell, cv::Size(cellW, cellH));
}
fps = taskStates[i].fps;
infMs = taskStates[i].inferenceMs;
fCount = taskStates[i].frameCount;
dCount = taskStates[i].detectionCount;
gpuId = taskStates[i].gpuDeviceId;
statusMsg = taskStates[i].statusMsg;
lastDet = taskStates[i].lastDetection;
engineLoaded = taskStates[i].engineLoaded;
}
if (cell.empty()) {
cell = cv::Mat(cellH, cellW, CV_8UC3, cv::Scalar(40, 40, 40));
cv::putText(cell, "Task " + std::to_string(i) + ": " + statusMsg,
cv::Point(20, cellH / 2),
cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(100, 100, 255), 2);
}
// Status bar
cv::rectangle(cell, cv::Rect(0, cellH - 45, cellW, 45), cv::Scalar(0, 0, 0), cv::FILLED);
char bar1[256], bar2[128];
snprintf(bar1, sizeof(bar1), "T%d | %.1f FPS | %.0fms | Frames:%d | Det:%d",
i, fps, infMs, fCount, dCount);
snprintf(bar2, sizeof(bar2), "GPU[%d] | %s",
gpuId, lastDet.empty() ? "-" : lastDet.c_str());
cv::Scalar barColor = engineLoaded ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 100, 255);
cv::putText(cell, bar1, cv::Point(5, cellH - 25),
cv::FONT_HERSHEY_SIMPLEX, 0.45, barColor, 1);
cv::putText(cell, bar2, cv::Point(5, cellH - 5),
cv::FONT_HERSHEY_SIMPLEX, 0.45, cv::Scalar(0, 200, 255), 1);
cell.copyTo(canvas(roi));
}
// Bottom status bar
double elapsed = std::chrono::duration<double>(
std::chrono::steady_clock::now() - testStart).count();
double totalFps = 0;
for (int i = 0; i < NUM_TASKS; i++) totalFps += taskStates[i].fps;
char bottomBar[256];
snprintf(bottomBar, sizeof(bottomBar),
"Elapsed: %.0fs | Total: %.1f FPS | %d streams, %d tasks | Press ESC to stop",
elapsed, totalFps, NUM_STREAMS, NUM_TASKS);
cv::putText(canvas, bottomBar, cv::Point(10, cellH * rows + 25),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(200, 200, 200), 1);
cv::imshow(windowName, canvas);
int key = cv::waitKey(30);
if (key == 27) {
printf("\nESC pressed — stopping...\n");
g_stressRunning.store(false);
}
}
// --- Wait for workers ---
printf("Waiting for worker threads...\n");
for (int i = 0; i < NUM_TASKS; i++) {
if (workers[i].joinable()) workers[i].join();
}
// --- Final summary ---
double totalElapsed = std::chrono::duration<double>(
std::chrono::steady_clock::now() - testStart).count();
printf("\n============================================================\n");
printf(" FINAL SUMMARY (runtime: %.0fs)\n", totalElapsed);
printf("============================================================\n");
double totalFps = 0;
for (int i = 0; i < NUM_TASKS; i++) {
printf(" Task %d: GPU[%d] | %d frames | %d detections | %.1f FPS | Inf: %.0fms\n",
i, taskStates[i].gpuDeviceId,
taskStates[i].frameCount, taskStates[i].detectionCount,
taskStates[i].fps, taskStates[i].inferenceMs);
totalFps += taskStates[i].fps;
}
printf(" Total throughput: %.1f FPS across %d tasks\n", totalFps, NUM_TASKS);
printf("============================================================\n");
// --- Release ---
for (int i = 0; i < NUM_TASKS; i++) {
if (odHandles[i]) ReleaseANSODHandle(&odHandles[i]);
}
for (int s = 0; s < NUM_STREAMS; s++) {
if (fpClients[s]) {
pStopFilePlayer(&fpClients[s]);
pReleaseFilePlayer(&fpClients[s]);
}
}
cv::destroyAllWindows();
if (pDeinitCameraNetwork) pDeinitCameraNetwork();
UnloadANSCV();
return 0;
}