2026-03-29 08:45:38 +11:00
|
|
|
// =============================================================================
|
|
|
|
|
// CustomModel-StressTest.cpp
|
|
|
|
|
//
|
|
|
|
|
// Multi-task stress test using ANSODEngine's extern "C" API functions
|
|
|
|
|
// (same path as LabVIEW). Uses FilePlayer + CloneImage + RunInferenceComplete_CPP
|
|
|
|
|
// to reproduce the full LabVIEW production flow for custom model DLLs.
|
|
|
|
|
//
|
|
|
|
|
// This test loads ANSCV.dll at runtime via LoadLibrary/GetProcAddress
|
|
|
|
|
// so it does NOT require linking ANSCV.lib.
|
|
|
|
|
// =============================================================================
|
|
|
|
|
|
|
|
|
|
// windows.h MUST come before ANSODTest.h to avoid ACCESS_MASK conflict with TensorRT
|
|
|
|
|
#ifndef WIN32_LEAN_AND_MEAN
|
|
|
|
|
#define WIN32_LEAN_AND_MEAN
|
|
|
|
|
#endif
|
|
|
|
|
#include <windows.h>
|
|
|
|
|
|
|
|
|
|
#include "ANSODTest.h"
|
|
|
|
|
#include <thread>
|
|
|
|
|
#include <atomic>
|
|
|
|
|
#include <chrono>
|
|
|
|
|
#include <mutex>
|
|
|
|
|
#include <deque>
|
|
|
|
|
#include <set>
|
|
|
|
|
// Note: NOT linking cudart.lib — use GetProcAddress for CUDA if needed
|
|
|
|
|
|
|
|
|
|
// --- Forward declarations of ANSODEngine extern "C" functions ---
|
|
|
|
|
// These are linked via ANSODEngine.lib
|
|
|
|
|
extern "C" __declspec(dllimport) std::string CreateANSODHandle(
|
|
|
|
|
ANSCENTER::ANSODBase** Handle,
|
|
|
|
|
const char* licenseKey, const char* modelFilePath,
|
|
|
|
|
const char* modelFileZipPassword,
|
|
|
|
|
float detectionScoreThreshold, float modelConfThreshold, float modelMNSThreshold,
|
|
|
|
|
int autoDetectEngine, int modelType, int detectionType, int loadEngineOnCreation);
|
|
|
|
|
|
|
|
|
|
extern "C" __declspec(dllimport) int RunInferenceComplete_CPP(
|
|
|
|
|
ANSCENTER::ANSODBase** Handle, cv::Mat** cvImage, const char* cameraId,
|
|
|
|
|
const char* activeROIMode, std::vector<ANSCENTER::Object>& detectionResult);
|
|
|
|
|
|
|
|
|
|
extern "C" __declspec(dllimport) int ReleaseANSODHandle(ANSCENTER::ANSODBase** Handle);
|
|
|
|
|
|
|
|
|
|
// --- ANSCV function pointer types (loaded at runtime) ---
|
|
|
|
|
typedef int (*FnCreateFilePlayer)(void** Handle, const char* licenseKey, const char* url);
|
|
|
|
|
typedef int (*FnStartFilePlayer)(void** Handle);
|
|
|
|
|
typedef int (*FnStopFilePlayer)(void** Handle);
|
|
|
|
|
typedef int (*FnGetFilePlayerCVImage)(void** Handle, int& w, int& h, int64_t& pts, cv::Mat** image);
|
|
|
|
|
typedef void(*FnSetFilePlayerDisplayRes)(void** Handle, int w, int h);
|
|
|
|
|
typedef int (*FnReleaseFilePlayer)(void** Handle);
|
|
|
|
|
typedef int (*FnCloneImage)(cv::Mat** imageIn, cv::Mat** imageOut);
|
|
|
|
|
typedef int (*FnReleaseImage)(cv::Mat** imageIn);
|
|
|
|
|
typedef void(*FnInitCameraNetwork)();
|
|
|
|
|
typedef void(*FnDeinitCameraNetwork)();
|
|
|
|
|
|
|
|
|
|
// --- ANSCV function pointers ---
|
|
|
|
|
static FnCreateFilePlayer pCreateFilePlayer = nullptr;
|
|
|
|
|
static FnStartFilePlayer pStartFilePlayer = nullptr;
|
|
|
|
|
static FnStopFilePlayer pStopFilePlayer = nullptr;
|
|
|
|
|
static FnGetFilePlayerCVImage pGetFilePlayerCVImage = nullptr;
|
|
|
|
|
static FnSetFilePlayerDisplayRes pSetFilePlayerDisplayRes = nullptr;
|
|
|
|
|
static FnReleaseFilePlayer pReleaseFilePlayer = nullptr;
|
|
|
|
|
static FnCloneImage pCloneImage = nullptr;
|
|
|
|
|
static FnReleaseImage pReleaseImage = nullptr;
|
|
|
|
|
static FnInitCameraNetwork pInitCameraNetwork = nullptr;
|
|
|
|
|
static FnDeinitCameraNetwork pDeinitCameraNetwork = nullptr;
|
|
|
|
|
|
|
|
|
|
static HMODULE g_hANSCV = nullptr;
|
|
|
|
|
|
|
|
|
|
static bool LoadANSCV() {
|
|
|
|
|
g_hANSCV = LoadLibraryA("ANSCV.dll");
|
|
|
|
|
if (!g_hANSCV) {
|
|
|
|
|
printf("ERROR: Failed to load ANSCV.dll (error %lu)\n", GetLastError());
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
pCreateFilePlayer = (FnCreateFilePlayer)GetProcAddress(g_hANSCV, "CreateANSFilePlayerHandle");
|
|
|
|
|
pStartFilePlayer = (FnStartFilePlayer)GetProcAddress(g_hANSCV, "StartFilePlayer");
|
|
|
|
|
pStopFilePlayer = (FnStopFilePlayer)GetProcAddress(g_hANSCV, "StopFilePlayer");
|
|
|
|
|
pGetFilePlayerCVImage = (FnGetFilePlayerCVImage)GetProcAddress(g_hANSCV, "GetFilePlayerCVImage");
|
|
|
|
|
pSetFilePlayerDisplayRes = (FnSetFilePlayerDisplayRes)GetProcAddress(g_hANSCV, "SetFilePlayerDisplayResolution");
|
|
|
|
|
pReleaseFilePlayer = (FnReleaseFilePlayer)GetProcAddress(g_hANSCV, "ReleaseANSFilePlayerHandle");
|
|
|
|
|
pCloneImage = (FnCloneImage)GetProcAddress(g_hANSCV, "ANSCV_CloneImage_S");
|
|
|
|
|
pReleaseImage = (FnReleaseImage)GetProcAddress(g_hANSCV, "ANSCV_ReleaseImage_S");
|
|
|
|
|
pInitCameraNetwork = (FnInitCameraNetwork)GetProcAddress(g_hANSCV, "InitCameraNetwork");
|
|
|
|
|
pDeinitCameraNetwork = (FnDeinitCameraNetwork)GetProcAddress(g_hANSCV, "DeinitCameraNetwork");
|
|
|
|
|
|
|
|
|
|
if (!pCreateFilePlayer || !pStartFilePlayer || !pStopFilePlayer ||
|
|
|
|
|
!pGetFilePlayerCVImage || !pReleaseFilePlayer ||
|
|
|
|
|
!pCloneImage || !pReleaseImage) {
|
|
|
|
|
printf("ERROR: Failed to resolve one or more ANSCV functions\n");
|
|
|
|
|
FreeLibrary(g_hANSCV);
|
|
|
|
|
g_hANSCV = nullptr;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
printf("ANSCV.dll loaded successfully\n");
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void UnloadANSCV() {
|
|
|
|
|
if (g_hANSCV) {
|
|
|
|
|
FreeLibrary(g_hANSCV);
|
|
|
|
|
g_hANSCV = nullptr;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- Shared state ---
|
|
|
|
|
static std::atomic<bool> g_stressRunning{true};
|
|
|
|
|
|
|
|
|
|
struct StressTaskState {
|
|
|
|
|
std::mutex mtx;
|
|
|
|
|
cv::Mat displayFrame;
|
|
|
|
|
double fps = 0;
|
|
|
|
|
double inferenceMs = 0;
|
|
|
|
|
double grabMs = 0;
|
|
|
|
|
int frameCount = 0;
|
|
|
|
|
int detectionCount = 0;
|
|
|
|
|
int gpuDeviceId = -1;
|
|
|
|
|
size_t vramUsedMiB = 0;
|
|
|
|
|
std::string statusMsg = "Initializing";
|
|
|
|
|
std::string lastDetection;
|
|
|
|
|
bool engineLoaded = false;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// --- GPU VRAM helpers (via cudart.dll at runtime) ---
|
|
|
|
|
typedef int (*FnCudaGetDeviceCount)(int*);
|
|
|
|
|
typedef int (*FnCudaSetDevice)(int);
|
|
|
|
|
typedef int (*FnCudaMemGetInfo)(size_t*, size_t*);
|
|
|
|
|
|
|
|
|
|
static std::vector<size_t> GetPerGpuFreeMiB() {
|
|
|
|
|
static HMODULE hCudart = LoadLibraryA("cudart64_12.dll");
|
|
|
|
|
if (!hCudart) hCudart = LoadLibraryA("cudart64_110.dll");
|
|
|
|
|
if (!hCudart) return {};
|
|
|
|
|
|
|
|
|
|
auto fnGetCount = (FnCudaGetDeviceCount)GetProcAddress(hCudart, "cudaGetDeviceCount");
|
|
|
|
|
auto fnSetDev = (FnCudaSetDevice)GetProcAddress(hCudart, "cudaSetDevice");
|
|
|
|
|
auto fnMemInfo = (FnCudaMemGetInfo)GetProcAddress(hCudart, "cudaMemGetInfo");
|
|
|
|
|
if (!fnGetCount || !fnSetDev || !fnMemInfo) return {};
|
|
|
|
|
|
|
|
|
|
int count = 0;
|
|
|
|
|
fnGetCount(&count);
|
|
|
|
|
std::vector<size_t> result(count, 0);
|
|
|
|
|
for (int i = 0; i < count; i++) {
|
|
|
|
|
fnSetDev(i);
|
|
|
|
|
size_t freeMem = 0, totalMem = 0;
|
|
|
|
|
fnMemInfo(&freeMem, &totalMem);
|
|
|
|
|
result[i] = freeMem / (1024 * 1024);
|
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- Worker thread ---
|
|
|
|
|
// Mimics LabVIEW flow: GetImage → CloneImage → RunInferenceComplete_CPP → ReleaseImage
|
|
|
|
|
static void ODWorkerThread(int taskId,
|
|
|
|
|
void* fpClient,
|
|
|
|
|
ANSCENTER::ANSODBase* odHandle,
|
|
|
|
|
StressTaskState& state) {
|
|
|
|
|
char tag[32];
|
|
|
|
|
snprintf(tag, sizeof(tag), "[Task%d]", taskId);
|
|
|
|
|
printf("%s Worker thread started\n", tag);
|
|
|
|
|
|
|
|
|
|
int width = 0, height = 0;
|
|
|
|
|
int64_t pts = 0;
|
|
|
|
|
int emptyFrames = 0;
|
|
|
|
|
std::string cameraId = "StressCam" + std::to_string(taskId);
|
|
|
|
|
|
|
|
|
|
std::deque<std::chrono::steady_clock::time_point> fpsTimestamps;
|
|
|
|
|
|
|
|
|
|
while (g_stressRunning.load()) {
|
|
|
|
|
// --- Step 1: Get image from FilePlayer (like camera process) ---
|
|
|
|
|
auto grabStart = std::chrono::steady_clock::now();
|
|
|
|
|
cv::Mat* framePtr = nullptr;
|
|
|
|
|
pGetFilePlayerCVImage(&fpClient, width, height, pts, &framePtr);
|
|
|
|
|
auto grabEnd = std::chrono::steady_clock::now();
|
|
|
|
|
double grabMs = std::chrono::duration<double, std::milli>(grabEnd - grabStart).count();
|
|
|
|
|
|
|
|
|
|
if (!framePtr || framePtr->empty()) {
|
|
|
|
|
emptyFrames++;
|
|
|
|
|
if (emptyFrames > 500) {
|
|
|
|
|
printf("%s Too many empty frames (%d), stopping\n", tag, emptyFrames);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (framePtr) { pReleaseImage(&framePtr); }
|
|
|
|
|
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
emptyFrames = 0;
|
|
|
|
|
|
|
|
|
|
// --- Step 2: Clone image (like LabVIEW consumer) ---
|
|
|
|
|
cv::Mat* clonedImage = nullptr;
|
|
|
|
|
int cloneResult = pCloneImage(&framePtr, &clonedImage);
|
|
|
|
|
if (cloneResult != 1 || !clonedImage) {
|
|
|
|
|
printf("%s CloneImage failed (result=%d)\n", tag, cloneResult);
|
|
|
|
|
pReleaseImage(&framePtr);
|
|
|
|
|
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Release original frame (camera process would do this)
|
|
|
|
|
pReleaseImage(&framePtr);
|
|
|
|
|
|
|
|
|
|
// --- Step 3: Run inference on clone (like AI task) ---
|
|
|
|
|
auto infStart = std::chrono::steady_clock::now();
|
|
|
|
|
std::vector<ANSCENTER::Object> detections;
|
|
|
|
|
int infResult = RunInferenceComplete_CPP(&odHandle, &clonedImage, cameraId.c_str(), "", detections);
|
|
|
|
|
auto infEnd = std::chrono::steady_clock::now();
|
|
|
|
|
double infMs = std::chrono::duration<double, std::milli>(infEnd - infStart).count();
|
|
|
|
|
|
|
|
|
|
// --- Step 4: Draw results on clone for display ---
|
|
|
|
|
cv::Mat display;
|
|
|
|
|
if (clonedImage && !clonedImage->empty()) {
|
|
|
|
|
display = clonedImage->clone();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string lastDet;
|
|
|
|
|
int detCount = 0;
|
|
|
|
|
if (infResult > 0) {
|
|
|
|
|
for (const auto& obj : detections) {
|
|
|
|
|
if (!display.empty()) {
|
|
|
|
|
cv::rectangle(display, obj.box, cv::Scalar(0, 255, 0), 2);
|
|
|
|
|
std::string label = obj.className + " " +
|
|
|
|
|
std::to_string((int)(obj.confidence * 100)) + "%";
|
|
|
|
|
cv::putText(display, label,
|
|
|
|
|
cv::Point(obj.box.x, obj.box.y - 5),
|
|
|
|
|
cv::FONT_HERSHEY_SIMPLEX, 0.5,
|
|
|
|
|
cv::Scalar(0, 255, 0), 1);
|
|
|
|
|
}
|
|
|
|
|
lastDet = obj.className;
|
|
|
|
|
detCount++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- Step 5: Release clone (like LabVIEW task cleanup) ---
|
|
|
|
|
pReleaseImage(&clonedImage);
|
|
|
|
|
|
|
|
|
|
// --- FPS calculation ---
|
|
|
|
|
auto now = std::chrono::steady_clock::now();
|
|
|
|
|
fpsTimestamps.push_back(now);
|
|
|
|
|
while (!fpsTimestamps.empty() &&
|
|
|
|
|
std::chrono::duration<double>(now - fpsTimestamps.front()).count() > 2.0) {
|
|
|
|
|
fpsTimestamps.pop_front();
|
|
|
|
|
}
|
|
|
|
|
double fps = fpsTimestamps.size() / 2.0;
|
|
|
|
|
|
|
|
|
|
// --- Update state ---
|
|
|
|
|
{
|
|
|
|
|
std::lock_guard<std::mutex> lk(state.mtx);
|
|
|
|
|
if (!display.empty()) state.displayFrame = display;
|
|
|
|
|
state.fps = fps;
|
|
|
|
|
state.inferenceMs = infMs;
|
|
|
|
|
state.grabMs = grabMs;
|
|
|
|
|
state.frameCount++;
|
|
|
|
|
state.detectionCount += detCount;
|
|
|
|
|
if (!lastDet.empty()) state.lastDetection = lastDet;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Periodic log
|
|
|
|
|
if ((state.frameCount % 200) == 0) {
|
|
|
|
|
printf("%s %d frames | %.1f FPS | Inf: %.0f ms | Det: %d\n",
|
|
|
|
|
tag, state.frameCount, fps, infMs, state.detectionCount);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
printf("%s Worker thread finished (%d frames)\n", tag, state.frameCount);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// =============================================================================
|
|
|
|
|
// Main test function
|
|
|
|
|
//
|
|
|
|
|
// Config (edit these):
|
|
|
|
|
// NUM_STREAMS — number of FilePlayer instances (cameras)
|
|
|
|
|
// NUM_TASKS — number of AI tasks
|
|
|
|
|
// videoFiles[] — paths to video files
|
|
|
|
|
// modelFolder — path to custom model folder
|
|
|
|
|
// modelType — engine type (31=RTYOLO, 30=ONNXYOLO, 10=CustomDetector, etc.)
|
|
|
|
|
// =============================================================================
|
|
|
|
|
int CustomModel_StressTest_FilePlayer() {
|
|
|
|
|
printf("\n");
|
|
|
|
|
printf("============================================================\n");
|
|
|
|
|
printf(" Custom Model Multi-Task Stress Test (FilePlayer)\n");
|
|
|
|
|
printf(" Uses RunInferenceComplete_CPP (same path as LabVIEW)\n");
|
|
|
|
|
printf(" Press ESC to stop\n");
|
|
|
|
|
printf("============================================================\n\n");
|
|
|
|
|
|
|
|
|
|
// --- Load ANSCV.dll at runtime ---
|
|
|
|
|
if (!LoadANSCV()) return -1;
|
|
|
|
|
if (pInitCameraNetwork) pInitCameraNetwork();
|
|
|
|
|
|
|
|
|
|
// =====================================================================
|
|
|
|
|
// CONFIGURATION — EDIT THESE FOR YOUR TEST
|
|
|
|
|
// =====================================================================
|
|
|
|
|
const int NUM_STREAMS = 2;
|
|
|
|
|
const int NUM_TASKS = 4; // 2 tasks per camera
|
|
|
|
|
|
|
|
|
|
// Video files (one per stream)
|
|
|
|
|
const std::string videoFiles[NUM_STREAMS] = {
|
|
|
|
|
"E:\\Programs\\DemoAssets\\Videos\\Helmet\\HM1.mp4",
|
|
|
|
|
"E:\\Programs\\DemoAssets\\Videos\\Helmet\\HM2.mp4",
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Which stream each task uses
|
|
|
|
|
const int taskStreamMap[NUM_TASKS] = { 0, 0, 1, 1 };
|
|
|
|
|
|
|
|
|
|
// Model config — EDIT for your custom model
|
|
|
|
|
const std::string modelFolder = "C:\\Projects\\ANSVIS\\Models\\ANS_Helmet_v2.0.zip";
|
|
|
|
|
//const char* modelName = "detector";
|
|
|
|
|
//const char* className = "detector.names";
|
|
|
|
|
const int modelType = 16; // 16 = CustomDetector, 31 = RTYOLO, 30 = ONNXYOLO
|
|
|
|
|
const float scoreThresh = 0.5f;
|
|
|
|
|
const float confThresh = 0.5f;
|
|
|
|
|
const float nmsThresh = 0.45f;
|
|
|
|
|
// =====================================================================
|
|
|
|
|
int detectorType = 1; // Detection
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::cout << "\n--- Test 1: Handle creation (elastic mode) ---\n" << std::endl;
|
|
|
|
|
|
|
|
|
|
std::cout << "Optimizing model, please wait..." << std::endl;
|
2026-04-09 08:09:02 +10:00
|
|
|
std::string optimizedFolder;
|
|
|
|
|
OptimizeModelStr(
|
2026-03-29 08:45:38 +11:00
|
|
|
modelFolder.c_str(), "",
|
2026-04-09 08:09:02 +10:00
|
|
|
modelType, detectorType, 1, optimizedFolder);
|
2026-03-29 08:45:38 +11:00
|
|
|
std::cout << "Optimized model folder: " << optimizedFolder << std::endl;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
StressTaskState taskStates[NUM_TASKS];
|
|
|
|
|
|
|
|
|
|
// --- Create FilePlayer instances ---
|
|
|
|
|
void* fpClients[NUM_STREAMS] = {};
|
|
|
|
|
for (int s = 0; s < NUM_STREAMS; s++) {
|
|
|
|
|
printf("[Stream%d] Creating FilePlayer: %s\n", s, videoFiles[s].c_str());
|
|
|
|
|
int result = pCreateFilePlayer(&fpClients[s], "", videoFiles[s].c_str());
|
|
|
|
|
if (result != 1 || !fpClients[s]) {
|
|
|
|
|
printf("[Stream%d] FAILED to create FilePlayer (result=%d)\n", s, result);
|
|
|
|
|
fpClients[s] = nullptr;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (pSetFilePlayerDisplayRes) {
|
|
|
|
|
pSetFilePlayerDisplayRes(&fpClients[s], 1920, 1080);
|
|
|
|
|
}
|
|
|
|
|
printf("[Stream%d] FilePlayer created (display: 1920x1080)\n", s);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- Create OD engine handles sequentially ---
|
|
|
|
|
ANSCENTER::ANSODBase* odHandles[NUM_TASKS] = {};
|
|
|
|
|
for (int i = 0; i < NUM_TASKS; i++) {
|
|
|
|
|
char tag[32];
|
|
|
|
|
snprintf(tag, sizeof(tag), "[Task%d]", i);
|
|
|
|
|
|
|
|
|
|
int streamIdx = taskStreamMap[i];
|
|
|
|
|
if (!fpClients[streamIdx]) {
|
|
|
|
|
printf("%s Skipped — Stream%d not available\n", tag, streamIdx);
|
|
|
|
|
std::lock_guard<std::mutex> lk(taskStates[i].mtx);
|
|
|
|
|
taskStates[i].statusMsg = "Stream not available";
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
std::lock_guard<std::mutex> lk(taskStates[i].mtx);
|
|
|
|
|
taskStates[i].statusMsg = "Loading model...";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
printf("%s Creating OD handle (modelType=%d)...\n", tag, modelType);
|
|
|
|
|
auto loadStart = std::chrono::steady_clock::now();
|
|
|
|
|
auto vramBefore = GetPerGpuFreeMiB();
|
|
|
|
|
|
|
|
|
|
// Use CreateANSODHandle — same API as VideoDetectorEngine and LabVIEW
|
|
|
|
|
std::string labelMap = CreateANSODHandle(
|
|
|
|
|
&odHandles[i],
|
|
|
|
|
"", // licenseKey
|
|
|
|
|
modelFolder.c_str(),// modelFilePath (zip or folder)
|
|
|
|
|
"", // modelZipFilePassword
|
|
|
|
|
scoreThresh, // detectionScoreThreshold
|
|
|
|
|
confThresh, // modelConfThreshold
|
|
|
|
|
nmsThresh, // modelMNSThreshold
|
|
|
|
|
1, // autoDetectEngine
|
|
|
|
|
modelType, // modelType (16=custom, 31=RTYOLO, etc.)
|
|
|
|
|
1, // detectionType (1=Detection)
|
|
|
|
|
1); // loadEngineOnCreation
|
|
|
|
|
|
|
|
|
|
auto loadEnd = std::chrono::steady_clock::now();
|
|
|
|
|
double loadMs = std::chrono::duration<double, std::milli>(loadEnd - loadStart).count();
|
|
|
|
|
|
|
|
|
|
if (!odHandles[i]) {
|
|
|
|
|
printf("%s FAILED to create OD handle\n", tag);
|
|
|
|
|
std::lock_guard<std::mutex> lk(taskStates[i].mtx);
|
|
|
|
|
taskStates[i].statusMsg = "Model load failed";
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto vramAfter = GetPerGpuFreeMiB();
|
|
|
|
|
int bestGpu = 0;
|
|
|
|
|
size_t maxDelta = 0;
|
|
|
|
|
for (size_t g = 0; g < vramBefore.size() && g < vramAfter.size(); g++) {
|
|
|
|
|
size_t delta = (vramBefore[g] > vramAfter[g]) ? vramBefore[g] - vramAfter[g] : 0;
|
|
|
|
|
if (delta > maxDelta) { maxDelta = delta; bestGpu = (int)g; }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
printf("%s Model loaded in %.0f ms | GPU[%d] | VRAM: %zu MiB | Labels: %s\n",
|
|
|
|
|
tag, loadMs, bestGpu, maxDelta,
|
|
|
|
|
labelMap.empty() ? "(none)" : labelMap.substr(0, 80).c_str());
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
std::lock_guard<std::mutex> lk(taskStates[i].mtx);
|
|
|
|
|
taskStates[i].engineLoaded = true;
|
|
|
|
|
taskStates[i].statusMsg = "Running";
|
|
|
|
|
taskStates[i].gpuDeviceId = bestGpu;
|
|
|
|
|
taskStates[i].vramUsedMiB = maxDelta;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- Start video playback ---
|
|
|
|
|
for (int s = 0; s < NUM_STREAMS; s++) {
|
|
|
|
|
if (fpClients[s]) {
|
|
|
|
|
pStartFilePlayer(&fpClients[s]);
|
|
|
|
|
printf("[Stream%d] Playback started\n", s);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Give FilePlayer time to decode first frames
|
|
|
|
|
std::this_thread::sleep_for(std::chrono::milliseconds(500));
|
|
|
|
|
|
|
|
|
|
// --- Launch worker threads ---
|
|
|
|
|
std::thread workers[NUM_TASKS];
|
|
|
|
|
for (int i = 0; i < NUM_TASKS; i++) {
|
|
|
|
|
int streamIdx = taskStreamMap[i];
|
|
|
|
|
if (fpClients[streamIdx] && odHandles[i]) {
|
|
|
|
|
workers[i] = std::thread(ODWorkerThread, i,
|
|
|
|
|
fpClients[streamIdx], odHandles[i],
|
|
|
|
|
std::ref(taskStates[i]));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- Display loop ---
|
|
|
|
|
const int cols = (NUM_TASKS <= 2) ? NUM_TASKS : 2;
|
|
|
|
|
const int rows = (NUM_TASKS + cols - 1) / cols;
|
|
|
|
|
const int cellW = 640, cellH = 480;
|
|
|
|
|
const char* windowName = "Custom Model Stress Test";
|
|
|
|
|
cv::namedWindow(windowName, cv::WINDOW_NORMAL);
|
|
|
|
|
cv::resizeWindow(windowName, cellW * cols, cellH * rows + 40);
|
|
|
|
|
|
|
|
|
|
auto testStart = std::chrono::steady_clock::now();
|
|
|
|
|
|
|
|
|
|
while (g_stressRunning.load()) {
|
|
|
|
|
cv::Mat canvas(cellH * rows + 40, cellW * cols, CV_8UC3, cv::Scalar(30, 30, 30));
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < NUM_TASKS; i++) {
|
|
|
|
|
int row = i / cols, col = i % cols;
|
|
|
|
|
cv::Rect roi(col * cellW, row * cellH, cellW, cellH);
|
|
|
|
|
|
|
|
|
|
cv::Mat cell;
|
|
|
|
|
double fps = 0, infMs = 0;
|
|
|
|
|
int fCount = 0, dCount = 0;
|
|
|
|
|
int gpuId = -1;
|
|
|
|
|
std::string statusMsg, lastDet;
|
|
|
|
|
bool engineLoaded = false;
|
|
|
|
|
{
|
|
|
|
|
std::lock_guard<std::mutex> lk(taskStates[i].mtx);
|
|
|
|
|
if (!taskStates[i].displayFrame.empty()) {
|
|
|
|
|
cv::resize(taskStates[i].displayFrame, cell, cv::Size(cellW, cellH));
|
|
|
|
|
}
|
|
|
|
|
fps = taskStates[i].fps;
|
|
|
|
|
infMs = taskStates[i].inferenceMs;
|
|
|
|
|
fCount = taskStates[i].frameCount;
|
|
|
|
|
dCount = taskStates[i].detectionCount;
|
|
|
|
|
gpuId = taskStates[i].gpuDeviceId;
|
|
|
|
|
statusMsg = taskStates[i].statusMsg;
|
|
|
|
|
lastDet = taskStates[i].lastDetection;
|
|
|
|
|
engineLoaded = taskStates[i].engineLoaded;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (cell.empty()) {
|
|
|
|
|
cell = cv::Mat(cellH, cellW, CV_8UC3, cv::Scalar(40, 40, 40));
|
|
|
|
|
cv::putText(cell, "Task " + std::to_string(i) + ": " + statusMsg,
|
|
|
|
|
cv::Point(20, cellH / 2),
|
|
|
|
|
cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(100, 100, 255), 2);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Status bar
|
|
|
|
|
cv::rectangle(cell, cv::Rect(0, cellH - 45, cellW, 45), cv::Scalar(0, 0, 0), cv::FILLED);
|
|
|
|
|
char bar1[256], bar2[128];
|
|
|
|
|
snprintf(bar1, sizeof(bar1), "T%d | %.1f FPS | %.0fms | Frames:%d | Det:%d",
|
|
|
|
|
i, fps, infMs, fCount, dCount);
|
|
|
|
|
snprintf(bar2, sizeof(bar2), "GPU[%d] | %s",
|
|
|
|
|
gpuId, lastDet.empty() ? "-" : lastDet.c_str());
|
|
|
|
|
cv::Scalar barColor = engineLoaded ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 100, 255);
|
|
|
|
|
cv::putText(cell, bar1, cv::Point(5, cellH - 25),
|
|
|
|
|
cv::FONT_HERSHEY_SIMPLEX, 0.45, barColor, 1);
|
|
|
|
|
cv::putText(cell, bar2, cv::Point(5, cellH - 5),
|
|
|
|
|
cv::FONT_HERSHEY_SIMPLEX, 0.45, cv::Scalar(0, 200, 255), 1);
|
|
|
|
|
|
|
|
|
|
cell.copyTo(canvas(roi));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Bottom status bar
|
|
|
|
|
double elapsed = std::chrono::duration<double>(
|
|
|
|
|
std::chrono::steady_clock::now() - testStart).count();
|
|
|
|
|
double totalFps = 0;
|
|
|
|
|
for (int i = 0; i < NUM_TASKS; i++) totalFps += taskStates[i].fps;
|
|
|
|
|
char bottomBar[256];
|
|
|
|
|
snprintf(bottomBar, sizeof(bottomBar),
|
|
|
|
|
"Elapsed: %.0fs | Total: %.1f FPS | %d streams, %d tasks | Press ESC to stop",
|
|
|
|
|
elapsed, totalFps, NUM_STREAMS, NUM_TASKS);
|
|
|
|
|
cv::putText(canvas, bottomBar, cv::Point(10, cellH * rows + 25),
|
|
|
|
|
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(200, 200, 200), 1);
|
|
|
|
|
|
|
|
|
|
cv::imshow(windowName, canvas);
|
|
|
|
|
int key = cv::waitKey(30);
|
|
|
|
|
if (key == 27) {
|
|
|
|
|
printf("\nESC pressed — stopping...\n");
|
|
|
|
|
g_stressRunning.store(false);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- Wait for workers ---
|
|
|
|
|
printf("Waiting for worker threads...\n");
|
|
|
|
|
for (int i = 0; i < NUM_TASKS; i++) {
|
|
|
|
|
if (workers[i].joinable()) workers[i].join();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- Final summary ---
|
|
|
|
|
double totalElapsed = std::chrono::duration<double>(
|
|
|
|
|
std::chrono::steady_clock::now() - testStart).count();
|
|
|
|
|
printf("\n============================================================\n");
|
|
|
|
|
printf(" FINAL SUMMARY (runtime: %.0fs)\n", totalElapsed);
|
|
|
|
|
printf("============================================================\n");
|
|
|
|
|
double totalFps = 0;
|
|
|
|
|
for (int i = 0; i < NUM_TASKS; i++) {
|
|
|
|
|
printf(" Task %d: GPU[%d] | %d frames | %d detections | %.1f FPS | Inf: %.0fms\n",
|
|
|
|
|
i, taskStates[i].gpuDeviceId,
|
|
|
|
|
taskStates[i].frameCount, taskStates[i].detectionCount,
|
|
|
|
|
taskStates[i].fps, taskStates[i].inferenceMs);
|
|
|
|
|
totalFps += taskStates[i].fps;
|
|
|
|
|
}
|
|
|
|
|
printf(" Total throughput: %.1f FPS across %d tasks\n", totalFps, NUM_TASKS);
|
|
|
|
|
printf("============================================================\n");
|
|
|
|
|
|
|
|
|
|
// --- Release ---
|
|
|
|
|
for (int i = 0; i < NUM_TASKS; i++) {
|
|
|
|
|
if (odHandles[i]) ReleaseANSODHandle(&odHandles[i]);
|
|
|
|
|
}
|
|
|
|
|
for (int s = 0; s < NUM_STREAMS; s++) {
|
|
|
|
|
if (fpClients[s]) {
|
|
|
|
|
pStopFilePlayer(&fpClients[s]);
|
|
|
|
|
pReleaseFilePlayer(&fpClients[s]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cv::destroyAllWindows();
|
|
|
|
|
if (pDeinitCameraNetwork) pDeinitCameraNetwork();
|
|
|
|
|
UnloadANSCV();
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|