Fix AMD by changing from GetTensorData<T>() to GetTensorMutableData<T>()
This commit is contained in:
@@ -1867,12 +1867,13 @@ int main()
|
||||
//YOLO26POSEYolo11Test();
|
||||
//YOLO26CLYolo11Test();
|
||||
//YOLO26ODYolo12Test();
|
||||
YOLO26ODYolo11Test();
|
||||
//YOLO26ODYolo11Test();
|
||||
//YOLO26ODYolo10Test();
|
||||
//YOLO26OBBYolo11Test();
|
||||
//SAM3ONNX_ImageTest(); // ORT reference — runs first, prints decoder input stats
|
||||
//SAM3TRT_ImageTest(); // TRT under test — compare decoder input stats with above
|
||||
//CustomModel_StressTest_FilePlayer(); // Multi-task stress test (LabVIEW flow)
|
||||
//CustomModel_StressTest_FilePlayer(); // Multi-task stress test (LabVIEW flow)
|
||||
CustomModel_SingleStream_FilePlayer(); // 1 cam + 1 task — isolates concurrency from per-instance bugs
|
||||
//SAM3TRT_UnitTest(); // TensorRT SAM3 test (in ANSSAM3-UnitTest.cpp)
|
||||
//TensorRT10Test();
|
||||
//FireNSmokeCustomDetection();
|
||||
|
||||
@@ -116,4 +116,5 @@ int FaceYoloTest();
|
||||
int TestYOLOV12();
|
||||
int PPETest();
|
||||
int RVATest();
|
||||
int CustomModel_StressTest_FilePlayer();
|
||||
int CustomModel_StressTest_FilePlayer();
|
||||
int CustomModel_SingleStream_FilePlayer(); // 1 camera + 1 task — isolates concurrency from per-instance bugs
|
||||
@@ -292,15 +292,15 @@ int CustomModel_StressTest_FilePlayer() {
|
||||
|
||||
// Video files (one per stream)
|
||||
const std::string videoFiles[NUM_STREAMS] = {
|
||||
"E:\\Programs\\DemoAssets\\Videos\\Helmet\\HM1.mp4",
|
||||
"E:\\Programs\\DemoAssets\\Videos\\Helmet\\HM2.mp4",
|
||||
"C:\\ProgramData\\ANSCENTER\\Shared\\HM1.mp4",
|
||||
"C:\\ProgramData\\ANSCENTER\\Shared\\HM2.mp4",
|
||||
};
|
||||
|
||||
// Which stream each task uses
|
||||
const int taskStreamMap[NUM_TASKS] = { 0, 0, 1, 1 };
|
||||
|
||||
// Model config — EDIT for your custom model
|
||||
const std::string modelFolder = "C:\\Projects\\ANSVIS\\Models\\ANS_Helmet_v2.0.zip";
|
||||
const std::string modelFolder = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\Models\\B-IN_ANS_Helmet_v2.0_102728911.zip";
|
||||
//const char* modelName = "detector";
|
||||
//const char* className = "detector.names";
|
||||
const int modelType = 16; // 16 = CustomDetector, 31 = RTYOLO, 30 = ONNXYOLO
|
||||
@@ -550,3 +550,237 @@ int CustomModel_StressTest_FilePlayer() {
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// =============================================================================
|
||||
// CustomModel_SingleStream_FilePlayer
|
||||
//
|
||||
// ISOLATION TEST — 1 camera, 1 OD handle, 1 worker thread. No concurrent
|
||||
// inference whatsoever. Same flow as CustomModel_StressTest_FilePlayer
|
||||
// (FilePlayer → CloneImage → RunInferenceComplete_CPP → ReleaseImage), just
|
||||
// without the multi-stream / multi-task fan-out.
|
||||
//
|
||||
// Use this to determine whether a hang is per-instance (will still hang here)
|
||||
// or only triggered by cross-session DML contention (will NOT hang here).
|
||||
// If THIS test runs cleanly for an extended period but the multi-stream
|
||||
// stress test hangs after a few inferences, the issue is concurrent DML
|
||||
// submissions on the AMD iGPU — not a bug in the engine code itself.
|
||||
//
|
||||
// Reuses helpers from CustomModel_StressTest_FilePlayer:
|
||||
// LoadANSCV / UnloadANSCV, ODWorkerThread, GetPerGpuFreeMiB.
|
||||
// =============================================================================
|
||||
int CustomModel_SingleStream_FilePlayer() {
|
||||
printf("\n");
|
||||
printf("============================================================\n");
|
||||
printf(" Custom Model SINGLE-STREAM Isolation Test (FilePlayer)\n");
|
||||
printf(" 1 camera + 1 model + 1 worker thread\n");
|
||||
printf(" Press ESC to stop\n");
|
||||
printf("============================================================\n\n");
|
||||
|
||||
// --- Load ANSCV.dll at runtime (same helper as stress test) ---
|
||||
if (!LoadANSCV()) return -1;
|
||||
if (pInitCameraNetwork) pInitCameraNetwork();
|
||||
|
||||
// =====================================================================
|
||||
// CONFIGURATION — EDIT THESE FOR YOUR TEST
|
||||
// =====================================================================
|
||||
const std::string videoFile =
|
||||
"C:\\ProgramData\\ANSCENTER\\Shared\\HM1.mp4";
|
||||
|
||||
const std::string modelFolder =
|
||||
"C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\Models\\B-IN_ANS_Helmet_v2.0_102728911.zip";
|
||||
const int modelType = 16; // 16 = CustomDetector (same as stress test)
|
||||
const int detectorType = 1; // Detection
|
||||
const float scoreThresh = 0.5f;
|
||||
const float confThresh = 0.5f;
|
||||
const float nmsThresh = 0.45f;
|
||||
// =====================================================================
|
||||
|
||||
// Reset shared run flag (it's a static at file scope shared with stress test)
|
||||
g_stressRunning.store(true);
|
||||
|
||||
std::cout << "\n--- Single-stream isolation test (no concurrency) ---\n" << std::endl;
|
||||
// NOTE: deliberately NOT calling OptimizeModelStr here. OptimizeModelStr
|
||||
// creates a separate "warmup" ANSCUSTOM instance whose detector and
|
||||
// classifier sessions stay loaded for the lifetime of the process — even
|
||||
// though that instance never runs inference, its 2 DML sessions hold AMD
|
||||
// GPU resources and were suspected of contributing to a hang in the
|
||||
// active session's GetTensorData<float>. Skipping it here leaves exactly
|
||||
// 1 ANSCUSTOM = 2 DML sessions (detector + classifier) in the process,
|
||||
// for the cleanest possible single-session isolation.
|
||||
(void)detectorType; // unused without the OptimizeModelStr call
|
||||
|
||||
// --- Per-task state (just one) ---
|
||||
StressTaskState taskState;
|
||||
|
||||
// --- Create FilePlayer (single stream) ---
|
||||
void* fpClient = nullptr;
|
||||
{
|
||||
printf("[Stream0] Creating FilePlayer: %s\n", videoFile.c_str());
|
||||
int result = pCreateFilePlayer(&fpClient, "", videoFile.c_str());
|
||||
if (result != 1 || !fpClient) {
|
||||
printf("[Stream0] FAILED to create FilePlayer (result=%d)\n", result);
|
||||
UnloadANSCV();
|
||||
return -2;
|
||||
}
|
||||
if (pSetFilePlayerDisplayRes) {
|
||||
pSetFilePlayerDisplayRes(&fpClient, 1920, 1080);
|
||||
}
|
||||
printf("[Stream0] FilePlayer created (display: 1920x1080)\n");
|
||||
}
|
||||
|
||||
// --- Create OD handle (single instance) ---
|
||||
ANSCENTER::ANSODBase* odHandle = nullptr;
|
||||
{
|
||||
printf("[Task0] Creating OD handle (modelType=%d)...\n", modelType);
|
||||
auto loadStart = std::chrono::steady_clock::now();
|
||||
auto vramBefore = GetPerGpuFreeMiB();
|
||||
|
||||
std::string labelMap = CreateANSODHandle(
|
||||
&odHandle,
|
||||
"", // licenseKey
|
||||
modelFolder.c_str(), // modelFilePath (zip or folder)
|
||||
"", // modelZipFilePassword
|
||||
scoreThresh,
|
||||
confThresh,
|
||||
nmsThresh,
|
||||
1, // autoDetectEngine
|
||||
modelType,
|
||||
1, // detectionType (1 = Detection)
|
||||
1); // loadEngineOnCreation
|
||||
|
||||
auto loadEnd = std::chrono::steady_clock::now();
|
||||
double loadMs = std::chrono::duration<double, std::milli>(loadEnd - loadStart).count();
|
||||
|
||||
if (!odHandle) {
|
||||
printf("[Task0] FAILED to create OD handle\n");
|
||||
pStopFilePlayer(&fpClient);
|
||||
pReleaseFilePlayer(&fpClient);
|
||||
UnloadANSCV();
|
||||
return -3;
|
||||
}
|
||||
|
||||
auto vramAfter = GetPerGpuFreeMiB();
|
||||
int bestGpu = 0;
|
||||
size_t maxDelta = 0;
|
||||
for (size_t g = 0; g < vramBefore.size() && g < vramAfter.size(); g++) {
|
||||
size_t delta = (vramBefore[g] > vramAfter[g]) ? vramBefore[g] - vramAfter[g] : 0;
|
||||
if (delta > maxDelta) { maxDelta = delta; bestGpu = (int)g; }
|
||||
}
|
||||
|
||||
printf("[Task0] Model loaded in %.0f ms | GPU[%d] | VRAM: %zu MiB | Labels: %s\n",
|
||||
loadMs, bestGpu, maxDelta,
|
||||
labelMap.empty() ? "(none)" : labelMap.substr(0, 80).c_str());
|
||||
|
||||
std::lock_guard<std::mutex> lk(taskState.mtx);
|
||||
taskState.engineLoaded = true;
|
||||
taskState.statusMsg = "Running";
|
||||
taskState.gpuDeviceId = bestGpu;
|
||||
taskState.vramUsedMiB = maxDelta;
|
||||
}
|
||||
|
||||
// --- Start playback ---
|
||||
pStartFilePlayer(&fpClient);
|
||||
printf("[Stream0] Playback started\n");
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(500));
|
||||
|
||||
// --- Single worker thread (reuse ODWorkerThread from stress test) ---
|
||||
std::thread worker(ODWorkerThread, /*taskId=*/0, fpClient, odHandle, std::ref(taskState));
|
||||
|
||||
// --- Display loop (single cell) ---
|
||||
const int cellW = 1280, cellH = 720;
|
||||
const char* windowName = "Custom Model — Single Stream Isolation";
|
||||
cv::namedWindow(windowName, cv::WINDOW_NORMAL);
|
||||
cv::resizeWindow(windowName, cellW, cellH + 40);
|
||||
|
||||
auto testStart = std::chrono::steady_clock::now();
|
||||
|
||||
while (g_stressRunning.load()) {
|
||||
cv::Mat canvas(cellH + 40, cellW, CV_8UC3, cv::Scalar(30, 30, 30));
|
||||
|
||||
cv::Mat cell;
|
||||
double fps = 0, infMs = 0, grabMs = 0;
|
||||
int fCount = 0, dCount = 0, gpuId = -1;
|
||||
std::string statusMsg, lastDet;
|
||||
bool engineLoaded = false;
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(taskState.mtx);
|
||||
if (!taskState.displayFrame.empty()) {
|
||||
cv::resize(taskState.displayFrame, cell, cv::Size(cellW, cellH));
|
||||
}
|
||||
fps = taskState.fps;
|
||||
infMs = taskState.inferenceMs;
|
||||
grabMs = taskState.grabMs;
|
||||
fCount = taskState.frameCount;
|
||||
dCount = taskState.detectionCount;
|
||||
gpuId = taskState.gpuDeviceId;
|
||||
statusMsg = taskState.statusMsg;
|
||||
lastDet = taskState.lastDetection;
|
||||
engineLoaded = taskState.engineLoaded;
|
||||
}
|
||||
|
||||
if (cell.empty()) {
|
||||
cell = cv::Mat(cellH, cellW, CV_8UC3, cv::Scalar(40, 40, 40));
|
||||
cv::putText(cell, "Task 0: " + statusMsg,
|
||||
cv::Point(20, cellH / 2),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(100, 100, 255), 2);
|
||||
}
|
||||
|
||||
cv::rectangle(cell, cv::Rect(0, cellH - 45, cellW, 45),
|
||||
cv::Scalar(0, 0, 0), cv::FILLED);
|
||||
char bar1[256], bar2[128];
|
||||
snprintf(bar1, sizeof(bar1),
|
||||
"%.1f FPS | inf:%.0fms grab:%.0fms | Frames:%d | Det:%d",
|
||||
fps, infMs, grabMs, fCount, dCount);
|
||||
snprintf(bar2, sizeof(bar2), "GPU[%d] | last:%s",
|
||||
gpuId, lastDet.empty() ? "-" : lastDet.c_str());
|
||||
cv::Scalar barColor = engineLoaded ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 100, 255);
|
||||
cv::putText(cell, bar1, cv::Point(5, cellH - 25),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, barColor, 1);
|
||||
cv::putText(cell, bar2, cv::Point(5, cellH - 5),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 200, 255), 1);
|
||||
|
||||
cell.copyTo(canvas(cv::Rect(0, 0, cellW, cellH)));
|
||||
|
||||
double elapsed = std::chrono::duration<double>(
|
||||
std::chrono::steady_clock::now() - testStart).count();
|
||||
char bottomBar[256];
|
||||
snprintf(bottomBar, sizeof(bottomBar),
|
||||
"Single-stream | Elapsed: %.0fs | %.1f FPS | Press ESC to stop",
|
||||
elapsed, fps);
|
||||
cv::putText(canvas, bottomBar, cv::Point(10, cellH + 25),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(200, 200, 200), 1);
|
||||
|
||||
cv::imshow(windowName, canvas);
|
||||
int key = cv::waitKey(30);
|
||||
if (key == 27) {
|
||||
printf("\nESC pressed - stopping...\n");
|
||||
g_stressRunning.store(false);
|
||||
}
|
||||
}
|
||||
|
||||
printf("Waiting for worker thread...\n");
|
||||
if (worker.joinable()) worker.join();
|
||||
|
||||
double totalElapsed = std::chrono::duration<double>(
|
||||
std::chrono::steady_clock::now() - testStart).count();
|
||||
printf("\n============================================================\n");
|
||||
printf(" SINGLE-STREAM SUMMARY (runtime: %.0fs)\n", totalElapsed);
|
||||
printf("============================================================\n");
|
||||
printf(" GPU[%d] | %d frames | %d detections | %.1f FPS | Inf: %.0fms\n",
|
||||
taskState.gpuDeviceId, taskState.frameCount, taskState.detectionCount,
|
||||
taskState.fps, taskState.inferenceMs);
|
||||
printf("============================================================\n");
|
||||
|
||||
if (odHandle) ReleaseANSODHandle(&odHandle);
|
||||
if (fpClient) {
|
||||
pStopFilePlayer(&fpClient);
|
||||
pReleaseFilePlayer(&fpClient);
|
||||
}
|
||||
|
||||
cv::destroyAllWindows();
|
||||
if (pDeinitCameraNetwork) pDeinitCameraNetwork();
|
||||
UnloadANSCV();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user