#include #include #include #include #include "boost/property_tree/ptree.hpp" #include "boost/property_tree/json_parser.hpp" #include "boost/foreach.hpp" #include "boost/optional.hpp" #include #include #include #include #include #include #include #include "ANSLPR.h" #include "ANSLPR_CPU.h" #include "ANSOpenCV.h" #include "ANSRTSP.h" #include "ANSVideoPlayer.h" #include "ANSFilePlayer.h" #include #include #include #include #include #include #include #include #include template T GetOptionalValue(const boost::property_tree::ptree& pt, std::string attribute, T defaultValue) { if (pt.count(attribute)) { return pt.get(attribute); } return defaultValue; } template T GetData(const boost::property_tree::ptree& pt, const std::string& key) { T ret; if (boost::optional data = pt.get_optional(key)) { ret = data.get(); } return ret; } int ANSLPR_CPU_VideoTest() { // Get the current working directory std::filesystem::path currentPath = std::filesystem::current_path(); // Print the current working directory std::cout << "Current working directory: " << currentPath << std::endl; boost::property_tree::ptree root; boost::property_tree::ptree detectionObjects; boost::property_tree::ptree pt; ANSCENTER::ANSALPR* infHandle = new ANSCENTER::ANSALPR_CPU(); std::string licenseKey = ""; std::string modelZipFile = currentPath.string() + "\\ANS_GenericALPR_v1.0.zip"; modelZipFile = "C:\\ProgramData\\ANSCENTER\\Shared\\ANS_GenericALPR_v1.0.zip"; std::string videoFilePath = "C:\\Programs\\DemoAssets\\Videos\\ALRP\\ALPR1.mp4"; std::string lpnResult; bool result = infHandle->Initialize(licenseKey, modelZipFile, "",0.5, 0.5); std::cout << "Loading ANSLRP:" << result << std::endl; cv::VideoCapture capture(videoFilePath); if (!capture.isOpened()) { printf("could not read this video file...\n"); return -1; } while (true) { cv::Mat frame; if (!capture.read(frame)) // if not success, break loop { std::cout << "\n Cannot read the video file. please check your video.\n"; break; } auto start = std::chrono::system_clock::now(); infHandle->Inference(frame, lpnResult); std::string detectionResult = lpnResult; std::cout << "Result:" << detectionResult; if (!detectionResult.empty()) { pt.clear(); std::stringstream ss; ss.clear(); ss << detectionResult; boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results")) { const boost::property_tree::ptree& result = child.second; const auto class_id = GetData(result, "class_id"); const auto class_name = GetData(result, "class_name"); const auto x = GetData(result, "x"); const auto y = GetData(result, "y"); const auto width = GetData(result, "width"); const auto height = GetData(result, "height"); cv::rectangle(frame, cv::Rect(x, y, width, height), 123, 2); cv::putText(frame, cv::format("%s", class_name), cv::Point(x, y - 5), 0, 0.6, cv::Scalar(0, 0, 255), 1, cv::LINE_AA); } } auto end = std::chrono::system_clock::now(); auto elapsed = std::chrono::duration_cast(end - start); printf("Time = %lld ms\n", static_cast(elapsed.count())); cv::namedWindow("ANSLPR", cv::WINDOW_AUTOSIZE); cv::imshow("ANSLPR", frame); if (cv::waitKey(30) == 27) // Wait for 'esc' key press to exit { std::cout << "End of program faces.\n"; } } capture.release(); cv::destroyAllWindows(); } int ANSLPR_BigSize_VideoTest() { // Get the current working directory std::filesystem::path currentPath = std::filesystem::current_path(); // Print the current working directory std::cout << "Current working directory: " << currentPath << std::endl; boost::property_tree::ptree root; boost::property_tree::ptree detectionObjects; boost::property_tree::ptree pt; ANSCENTER::ANSALPR* infHandle = new ANSCENTER::ANSALPR_CPU(); std::string licenseKey = ""; std::string modelZipFile = currentPath.string() + "\\ANS_GenericALPR_v1.0.zip"; modelZipFile = "C:\\ProgramData\\ANSCENTER\\Shared\\ANS_GenericALPR_v1.0.zip"; std::string videoFilePath = "C:\\Programs\\DemoAssets\\Videos\\ALRP\\3725.mp4"; std::string lpnResult; bool result = infHandle->Initialize(licenseKey, modelZipFile, "",0.5,0.5); std::cout << "Loading ANSLRP:" << result << std::endl; infHandle->LoadEngine(); cv::VideoCapture capture(videoFilePath); if (!capture.isOpened()) { printf("could not read this video file...\n"); return -1; } while (true) { cv::Mat frame; if (!capture.read(frame)) // if not success, break loop { std::cout << "\n Cannot read the video file. please check your video.\n"; break; } auto start = std::chrono::system_clock::now(); infHandle->Inference(frame, lpnResult,"MyCam"); std::string detectionResult = lpnResult; std::cout << "Result:" << detectionResult; if (!detectionResult.empty()) { pt.clear(); std::stringstream ss; ss.clear(); ss << detectionResult; boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results")) { const boost::property_tree::ptree& result = child.second; const auto class_id = GetData(result, "class_id"); const auto class_name = GetData(result, "class_name"); const auto x = GetData(result, "x"); const auto y = GetData(result, "y"); const auto width = GetData(result, "width"); const auto height = GetData(result, "height"); cv::rectangle(frame, cv::Rect(x, y, width, height), 123, 2); cv::putText(frame, cv::format("%s", class_name), cv::Point(x, y - 5), 0, 1.2, cv::Scalar(0, 0, 255), 1, cv::LINE_AA); } } auto end = std::chrono::system_clock::now(); auto elapsed = std::chrono::duration_cast(end - start); printf("Time = %lld ms\n", static_cast(elapsed.count())); cv::resize(frame, frame, cv::Size(frame.cols / 2, frame.rows / 2)); // to half size or even smaller cv::namedWindow("ANSLPR", cv::WINDOW_AUTOSIZE); cv::imshow("ANSLPR", frame); if (cv::waitKey(30) == 27) // Wait for 'esc' key press to exit { std::cout << "End of program faces.\n"; } } capture.release(); cv::destroyAllWindows(); } std::string readJsonFile(const std::string& filePath) { boost::property_tree::ptree pt; boost::property_tree::read_json(filePath, pt); std::ostringstream oss; boost::property_tree::write_json(oss, pt, false); return oss.str(); } unsigned char* CVMatToBytes(cv::Mat image, unsigned int& bufferLengh) { int size = int(image.total() * image.elemSize()); std::cout << "size:" << size << std::endl; unsigned char* bytes = new unsigned char[size]; // you will have to delete[] that later std::memcpy(bytes, image.data, size * sizeof(unsigned char)); bufferLengh = size * sizeof(unsigned char); return bytes; } int ANSLPR_CPU_Inferences_FileTest() { // Get the current working directory std::filesystem::path currentPath = std::filesystem::current_path(); // Print the current working directory std::cout << "Current working directory: " << currentPath << std::endl; boost::property_tree::ptree root; boost::property_tree::ptree detectionObjects; boost::property_tree::ptree pt; ANSCENTER::ANSALPR* infHandle; std::string licenseKey = ""; std::string modelZipFile = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_GenericALPR_v1.0.zip"; std::string imageFilePath = "C:\\Projects\\ANSVIS\\Documentation\\TestImages\\ALPR\\LP1.jpg"; std::string StrBox = readJsonFile("C:\\Projects\\ANLS\\Documents\\bboxStr.json"); int result = CreateANSALPRHandle(& infHandle, "", modelZipFile.c_str(), "",0,0.5,0.5,0); std::cout << "Init Result:" << result << std::endl; unsigned int bufferLength = 0; cv::Mat input = cv::imread(imageFilePath, cv::IMREAD_COLOR); cv::Mat frame = input; unsigned char* jpeg_string = CVMatToBytes(frame, bufferLength); int height = frame.rows; int width = frame.cols; auto start = std::chrono::system_clock::now(); std::string detectionResult = ANSALPR_RunInferenceBinaryInCroppedImages(&infHandle, jpeg_string, width, height, StrBox.c_str()); std::cout << "Result:" << detectionResult; if (!detectionResult.empty()) { pt.clear(); std::stringstream ss; ss.clear(); ss << detectionResult; boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results")) { const boost::property_tree::ptree& result = child.second; const auto class_id = GetData(result, "class_id"); const auto class_name = GetData(result, "class_name"); const auto x = GetData(result, "x"); const auto y = GetData(result, "y"); const auto width = GetData(result, "width"); const auto height = GetData(result, "height"); cv::rectangle(frame, cv::Rect(x, y, width, height), 123, 2); cv::putText(frame, cv::format("%s:%d", class_name, class_id), cv::Point(x, y - 5), 0, 0.6, cv::Scalar(0, 0, 255), 1, cv::LINE_AA); } } auto end = std::chrono::system_clock::now(); auto elapsed = std::chrono::duration_cast(end - start); printf("Time = %lld ms\n", static_cast(elapsed.count())); //cv::namedWindow("ANSLPR", cv::WINDOW_AUTOSIZE); //cv::imshow("ANSLPR", frame); //cv::waitKey(0); //cv::destroyAllWindows(); ReleaseANSALPRHandle(&infHandle); } int ANSLPR_CV_VideoTest() { // Get the current working directory std::filesystem::path currentPath = std::filesystem::current_path(); // Print the current working directory std::cout << "Current working directory: " << currentPath << std::endl; boost::property_tree::ptree root; boost::property_tree::ptree detectionObjects; boost::property_tree::ptree pt; ANSCENTER::ANSALPR* infHandle; std::string licenseKey = ""; std::string modelZipFile = currentPath.string() + "\\ANS_GenericALPR_v1.0.zip"; modelZipFile = "C:\\Programs\\DemoAssets\\ModelsForANSVIS\\ANS_GenericALPR_v1.1.zip"; std::string videoFilePath = "C:\\Programs\\DemoAssets\\Videos\\ALRP\\3725.mp4"; std::string lpnResult; int result = CreateANSALPRHandle(& infHandle, licenseKey.c_str(), modelZipFile.c_str(), "",0,0.5,0.5,0); std::cout << "Loading ANSLRP:" << result << std::endl; cv::VideoCapture capture(videoFilePath); if (!capture.isOpened()) { printf("could not read this video file...\n"); return -1; } while (true) { cv::Mat frame; if (!capture.read(frame)) // if not success, break loop { std::cout << "\n Cannot read the video file. please check your video.\n"; break; } auto start = std::chrono::system_clock::now(); std::string jpegImage; cv::Mat* image = nullptr; // ✅ Use a pointer to hold the allocated image image = new cv::Mat(frame); // ✅ Allocate the image ANSALPR_RunInferenceComplete_CPP(& infHandle, &image, "MyCam", 0, 0, lpnResult, jpegImage); std::string detectionResult = lpnResult; std::cout << "Result:" << detectionResult; if (!detectionResult.empty()) { pt.clear(); std::stringstream ss; ss.clear(); ss << detectionResult; boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results")) { const boost::property_tree::ptree& result = child.second; const auto class_id = GetData(result, "class_id"); const auto class_name = GetData(result, "class_name"); const auto x = GetData(result, "x"); const auto y = GetData(result, "y"); const auto width = GetData(result, "width"); const auto height = GetData(result, "height"); cv::rectangle(frame, cv::Rect(x, y, width, height), 123, 2); cv::putText(frame, cv::format("%s", class_name), cv::Point(x, y - 5), 0, 1.2, cv::Scalar(0, 0, 255), 1, cv::LINE_AA); } } auto end = std::chrono::system_clock::now(); auto elapsed = std::chrono::duration_cast(end - start); printf("Time = %lld ms\n", static_cast(elapsed.count())); cv::resize(frame, frame, cv::Size(1920,1080)); // to half size or even smaller delete image; cv::namedWindow("ANSLPR", cv::WINDOW_AUTOSIZE); cv::imshow("ANSLPR", frame); if (cv::waitKey(30) == 27) // Wait for 'esc' key press to exit { std::cout << "End of program faces.\n"; } } capture.release(); cv::destroyAllWindows(); ReleaseANSALPRHandle(&infHandle); } int ANSLPR_OD_VideoTest() { // Get the current working directory std::filesystem::path currentPath = std::filesystem::current_path(); // Print the current working directory std::cout << "Current working directory: " << currentPath << std::endl; boost::property_tree::ptree root; boost::property_tree::ptree detectionObjects; boost::property_tree::ptree pt; ANSCENTER::ANSALPR* infHandle; std::string licenseKey = ""; std::string modelZipFile ="C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_ALPR_v1.2.zip";// "C:\\Projects\\ANSVIS\\Models\\ANS_ALPR_v1.2.zip";// "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ServerOptimised\\ANS_ALPR_v1.1_NVIDIAGeForceRTX4070LaptopGPU.zip"; std::string videoFilePath = "C:\\ProgramData\\ANSCENTER\\Shared\\day.mp4";//"E:\\Programs\\DemoAssets\\Videos\\ALRP\\PMH\\Day\\day.mp4";// std::string lpnResult; int engineType = 1; double detectionThreshold = 0.5; double ocrThreshold = 0.5; double detectionColourThreshold = 0.5; int result = CreateANSALPRHandle(&infHandle, licenseKey.c_str(), modelZipFile.c_str(), "", engineType, detectionThreshold, ocrThreshold, detectionColourThreshold); std::cout << "Loading ANSLRP:" << result << std::endl; int loadEngine = LoadANSALPREngineHandle(&infHandle); std::cout << "Loading ANSLRP:" << loadEngine << std::endl; cv::VideoCapture capture(videoFilePath); if (!capture.isOpened()) { printf("could not read this video file...\n"); return -1; } while (true) { cv::Mat frame; if (!capture.read(frame)) // if not success, break loop { std::cout << "\n Cannot read the video file. please check your video.\n"; break; } auto start = std::chrono::system_clock::now(); std::string jpegImage; cv::Mat* image = nullptr; // ✅ Use a pointer to hold the allocated image image = new cv::Mat(frame); // ✅ Allocate the image ANSALPR_RunInferenceComplete_CPP(&infHandle, &image, "MyCam", 0, 0, lpnResult, jpegImage); auto end = std::chrono::system_clock::now(); auto elapsed = std::chrono::duration_cast(end - start); printf("Time = %lld ms\n", static_cast(elapsed.count())); std::string detectionResult = lpnResult; //std::cout << "Result:" << detectionResult; if (!detectionResult.empty()) { pt.clear(); std::stringstream ss; ss.clear(); ss << detectionResult; boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results")) { const boost::property_tree::ptree& result = child.second; const auto class_id = GetData(result, "class_id"); const auto class_name = GetData(result, "class_name"); const auto x = GetData(result, "x"); const auto y = GetData(result, "y"); const auto width = GetData(result, "width"); const auto height = GetData(result, "height"); cv::rectangle(frame, cv::Rect(x, y, width, height), 123, 2); cv::putText(frame, cv::format("%s", class_name), cv::Point(x, y - 5), 0, 1.2, cv::Scalar(0, 0, 255), 1, cv::LINE_AA); } } cv::resize(frame, frame, cv::Size(1920, 1080)); // to half size or even smaller delete image; cv::namedWindow("ANSLPR", cv::WINDOW_AUTOSIZE); cv::imshow("ANSLPR", frame); if (cv::waitKey(30) == 27) // Wait for 'esc' key press to exit { std::cout << "End of program faces.\n"; } } capture.release(); cv::destroyAllWindows(); ReleaseANSALPRHandle(&infHandle); } int ANSLPR_OD_Inferences_FileTest() { // Get the current working directory std::filesystem::path currentPath = std::filesystem::current_path(); // Print the current working directory std::cout << "Current working directory: " << currentPath << std::endl; boost::property_tree::ptree root; boost::property_tree::ptree detectionObjects; boost::property_tree::ptree pt; ANSCENTER::ANSALPR* infHandle; std::string licenseKey = ""; std::string modelZipFile = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ServerOptimised\\ANS_ALPR_v1.2_NVIDIAGeForceRTX4070LaptopGPU.zip"; std::string imageFilePath = "E:\\Programs\\DemoAssets\\Images\\ALPRTest\\WrongOrder\\1109.jpg";//20250912_213850.717.jpg; 20250912_213850.511.jpg;//20250912_213850.411.jpg;//20250912_213850.261.jpg(65H115912:0.73) cororect (20250912_213850.071.jpg: 65H115833) std::string lpnResult; int engineType = 1; double detectionThreshold = 0.3; double ocrThreshold = 0.5; double colourThreshold = 0.5; int result = CreateANSALPRHandle(&infHandle, licenseKey.c_str(), modelZipFile.c_str(), "", engineType, detectionThreshold, ocrThreshold, colourThreshold); std::cout << "Loading ANSLRP:" << result << std::endl; auto start = std::chrono::system_clock::now(); int loadEngine = LoadANSALPREngineHandle(&infHandle); std::cout << "Init Result:" << result << std::endl; auto end = std::chrono::system_clock::now(); auto elapsed = std::chrono::duration_cast(end - start); printf("Time to load engine = %lld ms\n", static_cast(elapsed.count())); unsigned int bufferLength = 0; std::string jpegImage; cv::Mat input = cv::imread(imageFilePath, cv::IMREAD_COLOR); cv::Mat* image = nullptr; // ✅ Use a pointer to hold the allocated image image = new cv::Mat(input); // ✅ Allocate the image ANSALPR_RunInferenceComplete_CPP(&infHandle, &image, "MyCam", 0, 0, lpnResult, jpegImage); std::string detectionResult = lpnResult; std::cout << "Result:" << detectionResult; if (!detectionResult.empty()) { pt.clear(); std::stringstream ss; ss.clear(); ss << detectionResult; boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results")) { const boost::property_tree::ptree& result = child.second; const auto class_id = GetData(result, "class_id"); const auto class_name = GetData(result, "class_name"); const auto x = GetData(result, "x"); const auto y = GetData(result, "y"); const auto width = GetData(result, "width"); const auto height = GetData(result, "height"); cv::rectangle(input, cv::Rect(x, y, width, height), 123, 2); cv::putText(input, cv::format("%s", class_name), cv::Point(x, y - 5), 0, 1.2, cv::Scalar(0, 0, 255), 1, cv::LINE_AA); } } delete image; cv::resize(input, input, cv::Size(1920, 1080)); // to half size or even smaller cv::namedWindow("ANSLPR", cv::WINDOW_AUTOSIZE); cv::imshow("ANSLPR", input); cv::waitKey(0); cv::destroyAllWindows(); ReleaseANSALPRHandle(&infHandle); return 0; } int ANSLPR_OD_INDOInferences_FileTest() { // Get the current working directory std::filesystem::path currentPath = std::filesystem::current_path(); // Print the current working directory std::cout << "Current working directory: " << currentPath << std::endl; boost::property_tree::ptree root; boost::property_tree::ptree detectionObjects; boost::property_tree::ptree pt; ANSCENTER::ANSALPR* infHandle; std::string licenseKey = ""; std::string modelZipFile = "E:\\Programs\\DemoAssets\\ModelsForANSVIS\\ANS_ALPR_IND_v1.1.zip"; std::string imageFilePath = "E:\\Programs\\TrainingWorkingStation\\IndoALPR\\Indonesian License Plate Dataset\\data\\train075.jpg";//20250912_213850.717.jpg; 20250912_213850.511.jpg;//20250912_213850.411.jpg;//20250912_213850.261.jpg(65H115912:0.73) cororect (20250912_213850.071.jpg: 65H115833) std::string lpnResult; int engineType = 1; double detectionThreshold = 0.3; double ocrThreshold = 0.5; int result = CreateANSALPRHandle(&infHandle, licenseKey.c_str(), modelZipFile.c_str(), "", engineType, detectionThreshold, ocrThreshold, 0.5); std::cout << "Loading ANSLRP:" << result << std::endl; int loadEngine = LoadANSALPREngineHandle(&infHandle); std::cout << "Init Result:" << result << std::endl; unsigned int bufferLength = 0; std::string jpegImage; cv::Mat input = cv::imread(imageFilePath, cv::IMREAD_COLOR); cv::Mat* image = nullptr; // ✅ Use a pointer to hold the allocated image image = new cv::Mat(input); // ✅ Allocate the image ANSALPR_RunInferenceComplete_CPP(&infHandle, &image, "MyCam", 0, 0, lpnResult, jpegImage); std::string detectionResult = lpnResult; std::cout << "Result:" << detectionResult; if (!detectionResult.empty()) { pt.clear(); std::stringstream ss; ss.clear(); ss << detectionResult; boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results")) { const boost::property_tree::ptree& result = child.second; const auto class_id = GetData(result, "class_id"); const auto class_name = GetData(result, "class_name"); const auto x = GetData(result, "x"); const auto y = GetData(result, "y"); const auto width = GetData(result, "width"); const auto height = GetData(result, "height"); cv::rectangle(input, cv::Rect(x, y, width, height), 123, 2); cv::putText(input, cv::format("%s", class_name), cv::Point(x, y - 5), 0, 1.2, cv::Scalar(0, 0, 255), 1, cv::LINE_AA); } } auto end = std::chrono::system_clock::now(); delete image; cv::resize(input, input, cv::Size(1920, 1080)); // to half size or even smaller cv::namedWindow("ANSLPR", cv::WINDOW_AUTOSIZE); cv::imshow("ANSLPR", input); cv::waitKey(0); cv::destroyAllWindows(); ReleaseANSALPRHandle(&infHandle); return 0; } // ============================================================================ // Multi-GPU ALPR Stress Test — 4 parallel RTSP→ALPR tasks // // Purpose: Diagnose why dual RTX 5080 performs worse than single RTX 3050. // Each task has its own RTSP reader + ALPR engine. Tasks 0-1 read stream A, // tasks 2-3 read stream B. All 4 run in parallel threads. // // The display composites all 4 views into a single resizable window with a // log panel at the bottom showing per-task stats and GPU diagnostics. // ============================================================================ // Thread-safe logger: collects timestamped messages for on-screen log + file static const char* LOG_FILE_PATH = "C:\\Temp\\ALPRdebug.txt"; class ThreadSafeLog { public: void init() { std::lock_guard lk(m_mtx); m_file.open(LOG_FILE_PATH, std::ios::out | std::ios::trunc); if (m_file.is_open()) { auto now = std::chrono::system_clock::now(); auto t = std::chrono::system_clock::to_time_t(now); char timeBuf[64]; struct tm lt; localtime_s(<, &t); strftime(timeBuf, sizeof(timeBuf), "%Y-%m-%d %H:%M:%S", <); m_file << "================================================================\n"; m_file << " ANSLPR Multi-GPU Stress Test Debug Log\n"; m_file << " Started: " << timeBuf << "\n"; m_file << " Log file: " << LOG_FILE_PATH << "\n"; m_file << "================================================================\n\n"; m_file.flush(); } } void add(const std::string& msg) { std::lock_guard lk(m_mtx); // Full timestamp for file: HH:MM:SS.mmm auto now = std::chrono::system_clock::now(); auto t = std::chrono::system_clock::to_time_t(now); auto ms = std::chrono::duration_cast( now.time_since_epoch()).count() % 1000; struct tm lt; localtime_s(<, &t); char ts[32]; snprintf(ts, sizeof(ts), "[%02d:%02d:%02d.%03lld] ", lt.tm_hour, lt.tm_min, lt.tm_sec, static_cast(ms)); std::string line = std::string(ts) + msg; m_lines.push_back(line); if (m_lines.size() > 200) m_lines.pop_front(); // Write to file immediately (flush so user can read mid-run) if (m_file.is_open()) { m_file << line << "\n"; m_file.flush(); } } std::deque getRecent(size_t n) { std::lock_guard lk(m_mtx); size_t start = (m_lines.size() > n) ? m_lines.size() - n : 0; return std::deque(m_lines.begin() + start, m_lines.end()); } void close() { std::lock_guard lk(m_mtx); if (m_file.is_open()) m_file.close(); } private: std::mutex m_mtx; std::deque m_lines; std::ofstream m_file; }; // Per-task shared state (written by worker thread, read by display thread) struct TaskState { std::mutex mtx; cv::Mat displayFrame; // latest frame with detections drawn double fps = 0.0; double inferenceMs = 0.0; int frameCount = 0; int detectionCount= 0; std::string lastPlate; bool engineLoaded = false; bool streamOk = false; std::string statusMsg = "Initializing..."; // GPU resource tracking (set during engine load) int gpuDeviceId = -1; // which GPU this task's engine landed on size_t vramUsedBytes = 0; // VRAM consumed by this task's engine // Grab/Inference timing (updated by worker thread) double lastGrabMs = 0.0; double lastInfMs = 0.0; }; // Snapshot of GPU state for real-time monitoring struct GpuSnapshot { int deviceId = 0; std::string name; size_t totalMiB = 0; size_t freeMiB = 0; size_t usedMiB = 0; }; // Query current GPU VRAM usage for all devices static std::vector QueryGpuVram() { std::vector snapshots; int deviceCount = 0; if (cudaGetDeviceCount(&deviceCount) != cudaSuccess) return snapshots; for (int i = 0; i < deviceCount; i++) { cudaDeviceProp prop; cudaGetDeviceProperties(&prop, i); int prevDevice; cudaGetDevice(&prevDevice); cudaSetDevice(i); size_t freeMem = 0, totalMem = 0; cudaMemGetInfo(&freeMem, &totalMem); cudaSetDevice(prevDevice); GpuSnapshot s; s.deviceId = i; s.name = prop.name; s.totalMiB = totalMem / (1024 * 1024); s.freeMiB = freeMem / (1024 * 1024); s.usedMiB = s.totalMiB - s.freeMiB; snapshots.push_back(s); } return snapshots; } // Measure per-GPU free VRAM (returns array indexed by device) static std::vector GetPerGpuFreeMiB() { std::vector result; int deviceCount = 0; if (cudaGetDeviceCount(&deviceCount) != cudaSuccess) return result; int prevDevice; cudaGetDevice(&prevDevice); for (int i = 0; i < deviceCount; i++) { cudaSetDevice(i); size_t freeMem = 0, totalMem = 0; cudaMemGetInfo(&freeMem, &totalMem); result.push_back(freeMem / (1024 * 1024)); } cudaSetDevice(prevDevice); return result; } static std::atomic g_running{true}; static ThreadSafeLog g_log; // Log GPU info using CUDA runtime static void LogGpuInfo() { int deviceCount = 0; cudaError_t err = cudaGetDeviceCount(&deviceCount); if (err != cudaSuccess) { g_log.add("CUDA ERROR: cudaGetDeviceCount failed: " + std::string(cudaGetErrorString(err))); printf("[GPU] CUDA ERROR: %s\n", cudaGetErrorString(err)); return; } printf("============================================================\n"); printf(" GPU DEVICE REPORT — %d device(s) detected\n", deviceCount); printf("============================================================\n"); g_log.add("GPU DEVICE REPORT: " + std::to_string(deviceCount) + " device(s)"); for (int i = 0; i < deviceCount; i++) { cudaDeviceProp prop; cudaGetDeviceProperties(&prop, i); size_t freeMem = 0, totalMem = 0; cudaSetDevice(i); cudaMemGetInfo(&freeMem, &totalMem); char buf[512]; snprintf(buf, sizeof(buf), " GPU[%d] %s | SM %d.%d | VRAM: %.0f MiB total, %.0f MiB free", i, prop.name, prop.major, prop.minor, totalMem / 1048576.0, freeMem / 1048576.0); printf("%s\n", buf); g_log.add(buf); snprintf(buf, sizeof(buf), " GPU[%d] PCIe Bus %d, Device %d | Async Engines: %d | Concurrent Kernels: %d", i, prop.pciBusID, prop.pciDeviceID, prop.asyncEngineCount, prop.concurrentKernels); printf("%s\n", buf); g_log.add(buf); } printf("============================================================\n"); } // Worker thread: reads RTSP frames and runs ALPR inference // RTSP client and ALPR engine are pre-created on the main thread to avoid // race conditions in CreateANSRTSPHandle / CreateANSALPRHandle. static void ALPRWorkerThread(int taskId, ANSCENTER::ANSRTSPClient* rtspClient, ANSCENTER::ANSALPR* alprHandle, TaskState& state) { char tag[32]; snprintf(tag, sizeof(tag), "[Task%d]", taskId); std::string prefix(tag); g_log.add(prefix + " Worker thread started"); printf("%s Worker thread started\n", tag); // --- Main inference loop --- int width = 0, height = 0; int64_t pts = 0; int emptyFrames = 0; std::string cameraId = "Cam" + std::to_string(taskId); // FPS tracking with sliding window std::deque fpsTimestamps; // Timing accumulators for periodic benchmarking double totalGrabMs = 0, totalInfMs = 0; int grabCount = 0, infCount = 0; double maxGrabMs = 0, maxInfMs = 0; auto benchStart = std::chrono::steady_clock::now(); bool hwDecodeLogged = false; while (g_running.load()) { // Read frame from RTSP via ANSCV auto grabStart = std::chrono::steady_clock::now(); cv::Mat* framePtr = nullptr; GetRTSPCVImage(&rtspClient, width, height, pts, &framePtr); auto grabEnd = std::chrono::steady_clock::now(); double grabMs = std::chrono::duration(grabEnd - grabStart).count(); if (framePtr == nullptr || framePtr->empty()) { emptyFrames++; if (emptyFrames % 100 == 1) { g_log.add(prefix + " Empty frame (count=" + std::to_string(emptyFrames) + ")"); } if (emptyFrames > 300) { g_log.add(prefix + " Too many empty frames, attempting reconnect..."); ReconnectRTSP(&rtspClient); emptyFrames = 0; } if (framePtr) delete framePtr; std::this_thread::sleep_for(std::chrono::milliseconds(10)); continue; } emptyFrames = 0; // Log HW decode status once after first successful frame if (!hwDecodeLogged) { hwDecodeLogged = true; int hwActive = rtspClient->IsHWDecodingActive() ? 1 : 0; bool isCuda = rtspClient->IsCudaHWAccel(); int hwGpu = rtspClient->GetHWDecodingGpuIndex(); char hwBuf[256]; const char* hwType = !hwActive ? "INACTIVE (software/CPU)" : isCuda ? "ACTIVE (CUDA/NVDEC zero-copy)" : "ACTIVE (D3D11VA/NVDEC cpu-nv12)"; snprintf(hwBuf, sizeof(hwBuf), "%s HW Decode: %s (GPU index: %d)", tag, hwType, hwGpu); g_log.add(hwBuf); printf("%s\n", hwBuf); } totalGrabMs += grabMs; grabCount++; if (grabMs > maxGrabMs) maxGrabMs = grabMs; // Run ALPR inference auto infStart = std::chrono::steady_clock::now(); std::string lpnResult, jpegImage; // Pass framePtr directly — NOT a copy. ANSGpuFrameRegistry::lookup() // matches by cv::Mat* pointer, so `new cv::Mat(*framePtr)` would create // a different pointer the registry doesn't know, breaking NV12 zero-copy. ANSALPR_RunInferenceComplete_CPP(&alprHandle, &framePtr, cameraId.c_str(), 0, 0, lpnResult, jpegImage); auto infEnd = std::chrono::steady_clock::now(); double infMs = std::chrono::duration(infEnd - infStart).count(); totalInfMs += infMs; infCount++; if (infMs > maxInfMs) maxInfMs = infMs; // Parse detections and draw on frame cv::Mat display = framePtr->clone(); int detCount = 0; std::string lastPlateText; if (!lpnResult.empty()) { try { boost::property_tree::ptree pt; std::stringstream ss(lpnResult); boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type& child, pt.get_child("results")) { const boost::property_tree::ptree& det = child.second; const auto class_name = GetData(det, "class_name"); const auto x = GetData(det, "x"); const auto y = GetData(det, "y"); const auto w = GetData(det, "width"); const auto h = GetData(det, "height"); cv::rectangle(display, cv::Rect((int)x, (int)y, (int)w, (int)h), cv::Scalar(0, 255, 0), 2); cv::putText(display, class_name, cv::Point((int)x, (int)y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.7, cv::Scalar(0, 255, 0), 2); lastPlateText = class_name; detCount++; } } catch (...) {} } // Update FPS (sliding window over last 2 seconds) auto now = std::chrono::steady_clock::now(); fpsTimestamps.push_back(now); while (!fpsTimestamps.empty() && std::chrono::duration(now - fpsTimestamps.front()).count() > 2.0) { fpsTimestamps.pop_front(); } double fps = fpsTimestamps.size() / 2.0; // Draw OSD on frame char osd[128]; snprintf(osd, sizeof(osd), "Task%d | %.1f FPS | Inf: %.0f ms | #%d", taskId, fps, infMs, state.frameCount + 1); cv::putText(display, osd, cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 0.7, cv::Scalar(0, 255, 255), 2); // Update shared state { std::lock_guard lk(state.mtx); state.displayFrame = display; state.fps = fps; state.inferenceMs = infMs; state.lastGrabMs = grabMs; state.lastInfMs = infMs; state.frameCount++; state.detectionCount += detCount; if (!lastPlateText.empty()) state.lastPlate = lastPlateText; } // Periodic logging (every 100 frames) if ((state.frameCount % 100) == 0) { double avgGrab = grabCount > 0 ? totalGrabMs / grabCount : 0; double avgInf = infCount > 0 ? totalInfMs / infCount : 0; double elapsed = std::chrono::duration( std::chrono::steady_clock::now() - benchStart).count(); char buf[512]; snprintf(buf, sizeof(buf), "%s Frame %d | FPS=%.1f | Grab: avg=%.1fms max=%.0fms | Inf: avg=%.1fms max=%.0fms | " "GrabPct=%.0f%% InfPct=%.0f%% | Det=%d", tag, state.frameCount, fps, avgGrab, maxGrabMs, avgInf, maxInfMs, (totalGrabMs / (elapsed * 1000.0)) * 100.0, (totalInfMs / (elapsed * 1000.0)) * 100.0, state.detectionCount); g_log.add(buf); printf("%s\n", buf); // Reset accumulators totalGrabMs = totalInfMs = 0; maxGrabMs = maxInfMs = 0; grabCount = infCount = 0; benchStart = std::chrono::steady_clock::now(); } delete framePtr; } g_log.add(prefix + " Worker loop exited"); } int ANSLPR_MultiGPU_StressTest() { ANSCENTER::ANSOPENCV::InitCameraNetwork(); // --- Initialize log file --- g_log.init(); printf("\n"); printf("============================================================\n"); printf(" ANSLPR Multi-GPU Stress Test — 4 Parallel ALPR Tasks\n"); printf(" Press ESC to stop\n"); printf(" Log file: %s\n", LOG_FILE_PATH); printf("============================================================\n\n"); g_log.add("============================================================"); g_log.add(" ANSLPR Multi-GPU Stress Test — 4 Parallel ALPR Tasks"); g_log.add("============================================================"); // --- Log GPU info for diagnostics --- LogGpuInfo(); // --- RTSP URLs (4 independent streams, one per task) --- const std::string rtspUrl0 = "rtsp://admin:admin123@103.156.0.133:8010/cam/realmonitor?channel=1&subtype=0"; const std::string rtspUrl1 = "rtsp://cafe2471.ddns.net:600/rtsp/streaming?channel=01&subtype=0"; const std::string rtspUrl2 = "rtsp://nhathuocngoclinh.zapto.org:600/rtsp/streaming?channel=01&subtype=0"; const std::string rtspUrl3 = "rtsp://bnunitttd.ddns.net:554/rtsp/streaming?channel=01&subtype=0"; g_log.add("Stream 0: " + rtspUrl0); g_log.add("Stream 1: " + rtspUrl1); g_log.add("Stream 2: " + rtspUrl2); g_log.add("Stream 3: " + rtspUrl3); // --- Task states --- TaskState taskStates[4]; // ========================================================================= // Create 4 INDEPENDENT RTSP readers — one per task, each with its own // camera stream. Each task gets a dedicated RTSP connection. // ========================================================================= const int NUM_STREAMS = 4; ANSCENTER::ANSRTSPClient* rtspClients[NUM_STREAMS] = {}; const std::string streamUrls[NUM_STREAMS] = { rtspUrl0, rtspUrl1, rtspUrl2, rtspUrl3 }; // Map: task index -> stream index (1:1 mapping) const int taskStreamMap[4] = { 0, 1, 2, 3 }; for (int s = 0; s < NUM_STREAMS; s++) { printf("[Stream%d] Creating RTSP handle for %s...\n", s, streamUrls[s].c_str()); g_log.add("[Stream" + std::to_string(s) + "] Creating RTSP handle for " + streamUrls[s]); int rtspResult = CreateANSRTSPHandle(&rtspClients[s], "", "", "", streamUrls[s].c_str()); if (rtspResult != 1 || rtspClients[s] == nullptr) { printf("[Stream%d] FAILED to create RTSP handle (result=%d)\n", s, rtspResult); g_log.add("[Stream" + std::to_string(s) + "] RTSP create FAILED"); rtspClients[s] = nullptr; continue; } SetRTSPImageQuality(&rtspClients[s], 0); SetRTSPHWDecoding(&rtspClients[s], 7); // HW_DECODING_CUDA: force CUDA/NVDEC zero-copy path StartRTSP(&rtspClients[s]); g_log.add("[Stream" + std::to_string(s) + "] RTSP started"); } // ========================================================================= // Create 4 ALPR engines sequentially // ========================================================================= ANSCENTER::ANSALPR* alprHandles[4] = {}; std::string modelZipFile = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_ALPR_v1.2.zip"; int engineType = 1; // NVIDIA_GPU double detThresh = 0.5, ocrThresh = 0.5, colThresh = 0.5; for (int i = 0; i < 4; i++) { char tag[32]; snprintf(tag, sizeof(tag), "[Task%d]", i); int streamIdx = taskStreamMap[i]; if (rtspClients[streamIdx] == nullptr) { printf("%s Skipped — Stream%d not available\n", tag, streamIdx); std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "Stream not available"; continue; } { std::lock_guard lk(taskStates[i].mtx); taskStates[i].streamOk = true; taskStates[i].statusMsg = "Loading ALPR engine..."; } printf("%s Creating ALPR handle (engineType=%d)...\n", tag, engineType); g_log.add(std::string(tag) + " Creating ALPR handle..."); auto engineStart = std::chrono::steady_clock::now(); int createResult = CreateANSALPRHandle(&alprHandles[i], "", modelZipFile.c_str(), "", engineType, detThresh, ocrThresh, colThresh); if (createResult != 1 || alprHandles[i] == nullptr) { printf("%s FAILED to create ALPR handle (result=%d)\n", tag, createResult); g_log.add(std::string(tag) + " ALPR create FAILED"); std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "ALPR create failed"; continue; } printf("%s Loading ALPR engine (TensorRT)...\n", tag); g_log.add(std::string(tag) + " Loading ALPR engine..."); // Snapshot VRAM before engine load to measure consumption auto vramBefore = GetPerGpuFreeMiB(); int loadResult = LoadANSALPREngineHandle(&alprHandles[i]); auto engineEnd = std::chrono::steady_clock::now(); double loadMs = std::chrono::duration(engineEnd - engineStart).count(); if (loadResult != 1) { printf("%s FAILED to load ALPR engine (result=%d)\n", tag, loadResult); g_log.add(std::string(tag) + " Engine load FAILED"); ReleaseANSALPRHandle(&alprHandles[i]); alprHandles[i] = nullptr; std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "Engine load failed"; continue; } // Snapshot VRAM after engine load — find which GPU lost the most VRAM auto vramAfter = GetPerGpuFreeMiB(); int bestGpu = 0; size_t maxDelta = 0; size_t gpuCount = vramBefore.size() < vramAfter.size() ? vramBefore.size() : vramAfter.size(); for (size_t g = 0; g < gpuCount; g++) { size_t delta = (vramBefore[g] > vramAfter[g]) ? (vramBefore[g] - vramAfter[g]) : 0; if (delta > maxDelta) { maxDelta = delta; bestGpu = (int)g; } } char buf[512]; snprintf(buf, sizeof(buf), "%s Engine loaded in %.0f ms | GPU[%d] | VRAM used: %zu MiB (Stream%d)", tag, loadMs, bestGpu, maxDelta, streamIdx); printf("%s\n", buf); g_log.add(buf); // Log per-GPU VRAM state after this engine load for (size_t g = 0; g < vramAfter.size(); g++) { size_t total = 0; if (g < vramBefore.size()) { // Compute total from free + used auto gpus = QueryGpuVram(); if (g < gpus.size()) total = gpus[g].totalMiB; } char vbuf[256]; snprintf(vbuf, sizeof(vbuf), " GPU[%zu] VRAM: %zu MiB free (of %zu MiB)", g, vramAfter[g], total); printf("%s\n", vbuf); g_log.add(vbuf); } { std::lock_guard lk(taskStates[i].mtx); taskStates[i].engineLoaded = true; taskStates[i].statusMsg = "Running"; taskStates[i].gpuDeviceId = bestGpu; taskStates[i].vramUsedBytes = maxDelta * 1024 * 1024; } } // --- Align NVDEC decode GPU with inference GPU for NV12 zero-copy --- // Each stream should decode on the same GPU as its inference engine to enable // direct NVDEC→TensorRT zero-copy (0.5ms vs 17ms preprocess per frame). // // Strategy: For each stream, count how many tasks run on each GPU (vote). // Pick the GPU with the most tasks → maximises the number of NV12 zero-copy hits. // If tied, prefer to keep the current decode GPU to avoid a reconnect. // Additional tie-breaker: distribute streams across GPUs for decode load balance. { int streamPreferredGpu[NUM_STREAMS]; for (int s = 0; s < NUM_STREAMS; s++) streamPreferredGpu[s] = -1; // Track how many streams have already been assigned to each GPU (for tie-breaking) std::map gpuStreamCount; for (int s = 0; s < NUM_STREAMS; s++) { if (!rtspClients[s]) continue; // Count votes: how many tasks on this stream use each GPU std::map gpuVotes; for (int i = 0; i < 4; i++) { if (taskStreamMap[i] == s && alprHandles[i]) { gpuVotes[taskStates[i].gpuDeviceId]++; } } if (gpuVotes.empty()) continue; // Find the GPU with the most votes int currentGpu = rtspClients[s]->GetHWDecodingGpuIndex(); int bestGpu = -1; int bestVotes = 0; for (auto& [gpu, votes] : gpuVotes) { if (votes > bestVotes) { bestVotes = votes; bestGpu = gpu; } else if (votes == bestVotes) { // Tie-break 1: prefer current decode GPU (avoids reconnect) if (gpu == currentGpu && bestGpu != currentGpu) { bestGpu = gpu; } // Tie-break 2: prefer GPU with fewer streams assigned (load balance) else if (bestGpu != currentGpu && gpu != currentGpu) { if (gpuStreamCount[gpu] < gpuStreamCount[bestGpu]) { bestGpu = gpu; } } } } streamPreferredGpu[s] = bestGpu; gpuStreamCount[bestGpu]++; char buf[512]; std::string voteStr; for (auto& [gpu, votes] : gpuVotes) { if (!voteStr.empty()) voteStr += ", "; voteStr += "GPU[" + std::to_string(gpu) + "]=" + std::to_string(votes); } snprintf(buf, sizeof(buf), "[Stream%d] GPU vote: {%s} -> preferred GPU[%d] (current: GPU[%d])", s, voteStr.c_str(), bestGpu, currentGpu); g_log.add(buf); printf("%s\n", buf); } // Apply alignment: reconnect streams whose NVDEC is on the wrong GPU. // IMPORTANT: If currentGpu == -1, the decoder hasn't initialized yet. // Do NOT reconnect — it disrupts the initial RTSP handshake and causes // 80+ seconds of empty frames. Just set preferredGpu; the decoder will // use it when it naturally initializes. for (int s = 0; s < NUM_STREAMS; s++) { if (rtspClients[s] && streamPreferredGpu[s] >= 0) { int currentGpu = rtspClients[s]->GetHWDecodingGpuIndex(); if (currentGpu < 0) { // Decoder not yet initialized — set preferred GPU without reconnect SetRTSPHWDecoding(&rtspClients[s], 7, streamPreferredGpu[s]); char buf[256]; snprintf(buf, sizeof(buf), "[Stream%d] NVDEC not yet initialized (GPU[-1]) -- set preferred GPU[%d] (no reconnect)", s, streamPreferredGpu[s]); g_log.add(buf); printf("%s\n", buf); } else if (currentGpu != streamPreferredGpu[s]) { // Decoder is active on wrong GPU — reconnect to move it SetRTSPHWDecoding(&rtspClients[s], 7, streamPreferredGpu[s]); ReconnectRTSP(&rtspClients[s]); char buf[256]; snprintf(buf, sizeof(buf), "[Stream%d] NVDEC GPU realigned: GPU[%d] -> GPU[%d] (reconnected for zero-copy)", s, currentGpu, streamPreferredGpu[s]); g_log.add(buf); printf("%s\n", buf); } else { char buf[256]; snprintf(buf, sizeof(buf), "[Stream%d] NVDEC GPU already on GPU[%d] (zero-copy OK)", s, currentGpu); g_log.add(buf); printf("%s\n", buf); } } } } // --- Enable deep pipeline benchmarking on all ALPR handles --- for (int i = 0; i < 4; i++) { if (alprHandles[i]) { alprHandles[i]->ActivateDebugger(true); } } g_log.add("Debug benchmarking ENABLED on all ALPR handles"); // --- Launch worker threads — tasks sharing a stream get the same RTSP client --- g_log.add("Launching worker threads..."); std::thread workers[4]; for (int i = 0; i < 4; i++) { int streamIdx = taskStreamMap[i]; if (rtspClients[streamIdx] && alprHandles[i]) { workers[i] = std::thread(ALPRWorkerThread, i, rtspClients[streamIdx], alprHandles[i], std::ref(taskStates[i])); } } // --- Display loop (main thread) --- const int cellW = 640, cellH = 480; const int logPanelH = 200; cv::namedWindow("ANSLPR Multi-GPU Stress Test", cv::WINDOW_NORMAL); cv::resizeWindow("ANSLPR Multi-GPU Stress Test", cellW * 2, cellH * 2 + logPanelH); auto testStart = std::chrono::steady_clock::now(); auto lastGpuSnapshot = std::chrono::steady_clock::now(); int snapshotCount = 0; while (g_running.load()) { // --- Periodic GPU/perf snapshot every 10 seconds (written to log file) --- auto now2 = std::chrono::steady_clock::now(); if (std::chrono::duration(now2 - lastGpuSnapshot).count() >= 10.0) { lastGpuSnapshot = now2; snapshotCount++; double elapsedSec = std::chrono::duration(now2 - testStart).count(); g_log.add("---- PERIODIC SNAPSHOT #" + std::to_string(snapshotCount) + " (elapsed " + std::to_string((int)elapsedSec) + "s) ----"); // GPU VRAM auto gpuSnap = QueryGpuVram(); for (const auto& gs : gpuSnap) { char buf[256]; snprintf(buf, sizeof(buf), " GPU[%d] %s | Used: %zu/%zu MiB (%.1f%%)", gs.deviceId, gs.name.c_str(), gs.usedMiB, gs.totalMiB, gs.totalMiB > 0 ? 100.0 * gs.usedMiB / gs.totalMiB : 0.0); g_log.add(buf); } // Per-task stats double totalFpsSnap = 0; for (int t = 0; t < 4; t++) { std::lock_guard lk(taskStates[t].mtx); char buf[256]; snprintf(buf, sizeof(buf), " T%d: GPU[%d] VRAM=%zuMiB FPS=%.1f GrabMs=%.0f InfMs=%.0f Frames=%d Det=%d", t, taskStates[t].gpuDeviceId, taskStates[t].vramUsedBytes / (1024 * 1024), taskStates[t].fps, taskStates[t].lastGrabMs, taskStates[t].inferenceMs, taskStates[t].frameCount, taskStates[t].detectionCount); g_log.add(buf); totalFpsSnap += taskStates[t].fps; } char buf[128]; snprintf(buf, sizeof(buf), " Total throughput: %.1f FPS", totalFpsSnap); g_log.add(buf); // Multi-GPU check std::set gpusUsed; for (int t = 0; t < 4; t++) { if (taskStates[t].gpuDeviceId >= 0) gpusUsed.insert(taskStates[t].gpuDeviceId); } if (gpusUsed.size() > 1) { g_log.add(" MULTI-GPU: YES — tasks distributed across " + std::to_string(gpusUsed.size()) + " GPUs"); } else if (!gpusUsed.empty()) { g_log.add(" MULTI-GPU: NO — all tasks on GPU[" + std::to_string(*gpusUsed.begin()) + "]"); } g_log.add("---- END SNAPSHOT ----"); } // Build 2x2 grid + log panel cv::Mat canvas(cellH * 2 + logPanelH, cellW * 2, CV_8UC3, cv::Scalar(30, 30, 30)); // Place each task's frame in its quadrant for (int i = 0; i < 4; i++) { int row = i / 2, col = i % 2; cv::Rect roi(col * cellW, row * cellH, cellW, cellH); cv::Mat cell; double fps = 0, infMs = 0; int fCount = 0, dCount = 0; int gpuId = -1; size_t vramMiB = 0; std::string statusMsg, lastPlate; bool engineLoaded = false, streamOk = false; { std::lock_guard lk(taskStates[i].mtx); if (!taskStates[i].displayFrame.empty()) { cv::resize(taskStates[i].displayFrame, cell, cv::Size(cellW, cellH)); } fps = taskStates[i].fps; infMs = taskStates[i].inferenceMs; fCount = taskStates[i].frameCount; dCount = taskStates[i].detectionCount; statusMsg = taskStates[i].statusMsg; lastPlate = taskStates[i].lastPlate; engineLoaded = taskStates[i].engineLoaded; streamOk = taskStates[i].streamOk; gpuId = taskStates[i].gpuDeviceId; vramMiB = taskStates[i].vramUsedBytes / (1024 * 1024); } if (cell.empty()) { cell = cv::Mat(cellH, cellW, CV_8UC3, cv::Scalar(40, 40, 40)); cv::putText(cell, "Task " + std::to_string(i) + ": " + statusMsg, cv::Point(20, cellH / 2), cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(100, 100, 255), 2); } // Draw status bar at bottom of each cell (2 lines) cv::rectangle(cell, cv::Rect(0, cellH - 50, cellW, 50), cv::Scalar(0, 0, 0), cv::FILLED); char bar1[256], bar2[256]; snprintf(bar1, sizeof(bar1), "T%d | %.1f FPS | %.0fms | Frames:%d | Det:%d | %s", i, fps, infMs, fCount, dCount, lastPlate.empty() ? "-" : lastPlate.c_str()); if (gpuId >= 0) { snprintf(bar2, sizeof(bar2), "GPU[%d] | VRAM: %zu MiB", gpuId, vramMiB); } else { snprintf(bar2, sizeof(bar2), "GPU: N/A"); } cv::Scalar barColor = engineLoaded ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 100, 255); cv::putText(cell, bar1, cv::Point(5, cellH - 28), cv::FONT_HERSHEY_SIMPLEX, 0.45, barColor, 1); cv::putText(cell, bar2, cv::Point(5, cellH - 8), cv::FONT_HERSHEY_SIMPLEX, 0.45, cv::Scalar(0, 200, 255), 1); cell.copyTo(canvas(roi)); // Draw grid lines cv::line(canvas, cv::Point(cellW, 0), cv::Point(cellW, cellH * 2), cv::Scalar(100, 100, 100), 1); cv::line(canvas, cv::Point(0, cellH), cv::Point(cellW * 2, cellH), cv::Scalar(100, 100, 100), 1); } // --- Log panel at bottom --- cv::Rect logRoi(0, cellH * 2, cellW * 2, logPanelH); cv::Mat logPanel = canvas(logRoi); logPanel.setTo(cv::Scalar(20, 20, 20)); // Elapsed time header auto elapsed = std::chrono::duration(std::chrono::steady_clock::now() - testStart).count(); char header[128]; snprintf(header, sizeof(header), "Elapsed: %.0fs | Press ESC to stop | Resize window freely", elapsed); cv::putText(logPanel, header, cv::Point(10, 18), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(200, 200, 0), 1); // Aggregate stats + per-task GPU summary double totalFps = 0; for (int i = 0; i < 4; i++) { std::lock_guard lk(taskStates[i].mtx); totalFps += taskStates[i].fps; } char aggLine[256]; snprintf(aggLine, sizeof(aggLine), "Total throughput: %.1f FPS | T0:GPU%d T1:GPU%d T2:GPU%d T3:GPU%d", totalFps, taskStates[0].gpuDeviceId, taskStates[1].gpuDeviceId, taskStates[2].gpuDeviceId, taskStates[3].gpuDeviceId); cv::putText(logPanel, aggLine, cv::Point(10, 38), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 255), 1); // Real-time GPU VRAM monitor (query every frame — cheap call) auto gpuSnaps = QueryGpuVram(); int gpuLineY = 58; for (const auto& gs : gpuSnaps) { // Count tasks on this GPU and their total VRAM int tasksOnGpu = 0; size_t taskVramMiB = 0; for (int i = 0; i < 4; i++) { std::lock_guard lk(taskStates[i].mtx); if (taskStates[i].gpuDeviceId == gs.deviceId) { tasksOnGpu++; taskVramMiB += taskStates[i].vramUsedBytes / (1024 * 1024); } } char gpuLine[256]; snprintf(gpuLine, sizeof(gpuLine), "GPU[%d] %s | Used: %zu/%zu MiB | Tasks: %d (engine VRAM: %zu MiB)", gs.deviceId, gs.name.c_str(), gs.usedMiB, gs.totalMiB, tasksOnGpu, taskVramMiB); cv::putText(logPanel, gpuLine, cv::Point(10, gpuLineY), cv::FONT_HERSHEY_SIMPLEX, 0.45, cv::Scalar(100, 255, 100), 1); gpuLineY += 18; } // Per-task resource line for (int i = 0; i < 4; i++) { std::lock_guard lk(taskStates[i].mtx); char tLine[256]; snprintf(tLine, sizeof(tLine), "T%d: GPU[%d] VRAM=%zuMiB FPS=%.1f Inf=%.0fms Frames=%d Det=%d", i, taskStates[i].gpuDeviceId, taskStates[i].vramUsedBytes / (1024 * 1024), taskStates[i].fps, taskStates[i].inferenceMs, taskStates[i].frameCount, taskStates[i].detectionCount); cv::putText(logPanel, tLine, cv::Point(10, gpuLineY), cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(200, 200, 200), 1); gpuLineY += 16; } // Recent log lines (remaining space) auto recentLogs = g_log.getRecent(4); for (const auto& line : recentLogs) { if (gpuLineY > logPanelH - 5) break; std::string display = (line.size() > 130) ? line.substr(0, 127) + "..." : line; cv::putText(logPanel, display, cv::Point(10, gpuLineY), cv::FONT_HERSHEY_PLAIN, 1.0, cv::Scalar(140, 140, 140), 1); gpuLineY += 15; } cv::imshow("ANSLPR Multi-GPU Stress Test", canvas); int key = cv::waitKey(30); if (key == 27) { // ESC g_log.add("ESC pressed — stopping all tasks..."); printf("\nESC pressed — stopping...\n"); g_running.store(false); } } // --- Wait for all workers --- printf("Waiting for worker threads to finish...\n"); for (int i = 0; i < 4; i++) { if (workers[i].joinable()) workers[i].join(); } // --- Print final summary (console + log file) --- double totalElapsed = std::chrono::duration( std::chrono::steady_clock::now() - testStart).count(); g_log.add("================================================================"); g_log.add(" FINAL PERFORMANCE SUMMARY"); g_log.add(" Total runtime: " + std::to_string((int)totalElapsed) + " seconds"); g_log.add("================================================================"); printf("\n============================================================\n"); printf(" FINAL PERFORMANCE SUMMARY (runtime: %.0fs)\n", totalElapsed); printf("============================================================\n"); double totalFpsFinal = 0; for (int i = 0; i < 4; i++) { char buf[512]; snprintf(buf, sizeof(buf), " Task %d: GPU[%d] | VRAM=%zuMiB | %d frames, %d detections, FPS=%.1f, InfMs=%.0f", i, taskStates[i].gpuDeviceId, taskStates[i].vramUsedBytes / (1024 * 1024), taskStates[i].frameCount, taskStates[i].detectionCount, taskStates[i].fps, taskStates[i].inferenceMs); printf("%s\n", buf); g_log.add(buf); totalFpsFinal += taskStates[i].fps; } auto finalGpu = QueryGpuVram(); for (const auto& gs : finalGpu) { char buf[256]; snprintf(buf, sizeof(buf), " GPU[%d] %s: %zu/%zu MiB used (%.1f%%)", gs.deviceId, gs.name.c_str(), gs.usedMiB, gs.totalMiB, gs.totalMiB > 0 ? 100.0 * gs.usedMiB / gs.totalMiB : 0.0); printf("%s\n", buf); g_log.add(buf); } // Multi-GPU verdict std::set finalGpusUsed; for (int i = 0; i < 4; i++) { if (taskStates[i].gpuDeviceId >= 0) finalGpusUsed.insert(taskStates[i].gpuDeviceId); } { char buf[256]; snprintf(buf, sizeof(buf), " Total throughput: %.1f FPS across 4 tasks", totalFpsFinal); printf("%s\n", buf); g_log.add(buf); } if (finalGpusUsed.size() > 1) { char buf[128]; snprintf(buf, sizeof(buf), " MULTI-GPU: YES — tasks on %zu different GPUs", finalGpusUsed.size()); printf("%s\n", buf); g_log.add(buf); } else if (!finalGpusUsed.empty()) { char buf[128]; snprintf(buf, sizeof(buf), " MULTI-GPU: NO — all tasks on GPU[%d] only", *finalGpusUsed.begin()); printf("%s\n", buf); g_log.add(buf); g_log.add(" DIAGNOSIS: Engine pool sees only 1 GPU. On dual-GPU systems, check:"); g_log.add(" 1. Both GPUs visible to CUDA (nvidia-smi shows 2 devices)"); g_log.add(" 2. TRT engine files are compatible with both GPU architectures"); g_log.add(" 3. No CUDA_VISIBLE_DEVICES env var restricting GPU access"); } printf("============================================================\n"); g_log.add("================================================================"); g_log.add(" Log saved to: " + std::string(LOG_FILE_PATH)); g_log.add("================================================================"); // --- Release all handles (sequentially on main thread) --- for (int i = 0; i < 4; i++) { if (alprHandles[i]) { ReleaseANSALPRHandle(&alprHandles[i]); } } for (int s = 0; s < NUM_STREAMS; s++) { if (rtspClients[s]) { StopRTSP(&rtspClients[s]); ReleaseANSRTSPHandle(&rtspClients[s]); } } g_log.close(); cv::destroyAllWindows(); ANSCENTER::ANSOPENCV::DeinitCameraNetwork(); return 0; } // ============================================================================= // VideoPlayer-based worker thread for SimulatedCam stress test // Same structure as ALPRWorkerThread but uses ANSVideoPlayer instead of ANSRTSP // ============================================================================= static void ALPRWorkerThread_VideoPlayer(int taskId, ANSCENTER::ANSVIDEOPLAYER* vpClient, ANSCENTER::ANSALPR* alprHandle, TaskState& state) { char tag[32]; snprintf(tag, sizeof(tag), "[Task%d]", taskId); std::string prefix(tag); g_log.add(prefix + " Worker thread started"); printf("%s Worker thread started\n", tag); int width = 0, height = 0; int64_t pts = 0; int emptyFrames = 0; std::string cameraId = "Cam" + std::to_string(taskId); // FPS tracking with sliding window std::deque fpsTimestamps; // Timing accumulators for periodic benchmarking double totalGrabMs = 0, totalInfMs = 0; int grabCount = 0, infCount = 0; double maxGrabMs = 0, maxInfMs = 0; auto benchStart = std::chrono::steady_clock::now(); while (g_running.load()) { // Read frame from VideoPlayer auto grabStart = std::chrono::steady_clock::now(); cv::Mat* framePtr = nullptr; GetVideoPlayerCVImage(&vpClient, width, height, pts, &framePtr); auto grabEnd = std::chrono::steady_clock::now(); double grabMs = std::chrono::duration(grabEnd - grabStart).count(); if (framePtr == nullptr || framePtr->empty()) { emptyFrames++; if (emptyFrames % 100 == 1) { g_log.add(prefix + " Empty frame (count=" + std::to_string(emptyFrames) + ")"); } if (emptyFrames > 300) { g_log.add(prefix + " Too many empty frames, attempting reconnect..."); ReconnectVideoPlayer(&vpClient); emptyFrames = 0; } if (framePtr) delete framePtr; std::this_thread::sleep_for(std::chrono::milliseconds(10)); continue; } emptyFrames = 0; totalGrabMs += grabMs; grabCount++; if (grabMs > maxGrabMs) maxGrabMs = grabMs; // Run ALPR inference auto infStart = std::chrono::steady_clock::now(); std::string lpnResult, jpegImage; // Pass framePtr directly — NOT a copy. ANSGpuFrameRegistry::lookup() // matches by cv::Mat* pointer, so `new cv::Mat(*framePtr)` would create // a different pointer the registry doesn't know, breaking NV12 zero-copy. ANSALPR_RunInferenceComplete_CPP(&alprHandle, &framePtr, cameraId.c_str(), 0, 0, lpnResult, jpegImage); auto infEnd = std::chrono::steady_clock::now(); double infMs = std::chrono::duration(infEnd - infStart).count(); totalInfMs += infMs; infCount++; if (infMs > maxInfMs) maxInfMs = infMs; // Parse detections and draw on frame cv::Mat display = framePtr->clone(); int detCount = 0; std::string lastPlateText; if (!lpnResult.empty()) { try { boost::property_tree::ptree pt; std::stringstream ss(lpnResult); boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type& child, pt.get_child("results")) { const boost::property_tree::ptree& det = child.second; const auto class_name = GetData(det, "class_name"); const auto x = GetData(det, "x"); const auto y = GetData(det, "y"); const auto w = GetData(det, "width"); const auto h = GetData(det, "height"); cv::rectangle(display, cv::Rect((int)x, (int)y, (int)w, (int)h), cv::Scalar(0, 255, 0), 2); cv::putText(display, class_name, cv::Point((int)x, (int)y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.7, cv::Scalar(0, 255, 0), 2); lastPlateText = class_name; detCount++; } } catch (...) {} } // Update FPS (sliding window over last 2 seconds) auto now = std::chrono::steady_clock::now(); fpsTimestamps.push_back(now); while (!fpsTimestamps.empty() && std::chrono::duration(now - fpsTimestamps.front()).count() > 2.0) { fpsTimestamps.pop_front(); } double fps = fpsTimestamps.size() / 2.0; // Draw OSD on frame char osd[128]; snprintf(osd, sizeof(osd), "Task%d | %.1f FPS | Inf: %.0f ms | #%d", taskId, fps, infMs, state.frameCount + 1); cv::putText(display, osd, cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 0.7, cv::Scalar(0, 255, 255), 2); // Update shared state { std::lock_guard lk(state.mtx); state.displayFrame = display; state.fps = fps; state.inferenceMs = infMs; state.lastGrabMs = grabMs; state.lastInfMs = infMs; state.frameCount++; state.detectionCount += detCount; if (!lastPlateText.empty()) state.lastPlate = lastPlateText; } // Periodic logging (every 100 frames) if ((state.frameCount % 100) == 0) { double avgGrab = grabCount > 0 ? totalGrabMs / grabCount : 0; double avgInf = infCount > 0 ? totalInfMs / infCount : 0; double elapsed = std::chrono::duration( std::chrono::steady_clock::now() - benchStart).count(); char buf[512]; snprintf(buf, sizeof(buf), "%s Frame %d | FPS=%.1f | Grab: avg=%.1fms max=%.0fms | Inf: avg=%.1fms max=%.0fms | " "GrabPct=%.0f%% InfPct=%.0f%% | Det=%d", tag, state.frameCount, fps, avgGrab, maxGrabMs, avgInf, maxInfMs, (totalGrabMs / (elapsed * 1000.0)) * 100.0, (totalInfMs / (elapsed * 1000.0)) * 100.0, state.detectionCount); g_log.add(buf); printf("%s\n", buf); // Reset accumulators totalGrabMs = totalInfMs = 0; maxGrabMs = maxInfMs = 0; grabCount = infCount = 0; benchStart = std::chrono::steady_clock::now(); } delete framePtr; } g_log.add(prefix + " Worker loop exited"); } // ============================================================================= // ANSLPR_MultiGPU_StressTest_SimulatedCam // Same structure as ANSLPR_MultiGPU_StressTest but uses local video files // via ANSVideoPlayer instead of live RTSP streams. // ============================================================================= int ANSLPR_MultiGPU_StressTest_SimulatedCam() { ANSCENTER::ANSOPENCV::InitCameraNetwork(); // --- Initialize log file --- g_log.init(); printf("\n"); printf("============================================================\n"); printf(" ANSLPR Multi-GPU Stress Test (Simulated Cam)\n"); printf(" Using local video files via ANSVideoPlayer\n"); printf(" Press ESC to stop\n"); printf(" Log file: %s\n", LOG_FILE_PATH); printf("============================================================\n\n"); g_log.add("============================================================"); g_log.add(" ANSLPR Multi-GPU Stress Test (Simulated Cam)"); g_log.add(" Using ANSVideoPlayer with local video files"); g_log.add("============================================================"); // --- Log GPU info for diagnostics --- LogGpuInfo(); // --- Video file paths (4 files, one per task) --- const std::string videoFile0 = "E:\\Programs\\DemoAssets\\Videos\\ALRP\\PMH\\Day\\day.mp4"; const std::string videoFile1 = "E:\\Programs\\DemoAssets\\Videos\\ALRP\\PMH\\Day\\day_1.mp4"; const std::string videoFile2 = "E:\\Programs\\DemoAssets\\Videos\\ALRP\\PMH\\Day\\day_2.mp4"; const std::string videoFile3 = "E:\\Programs\\DemoAssets\\Videos\\ALRP\\PMH\\Day\\day_3.mp4"; g_log.add("Video 0: " + videoFile0); g_log.add("Video 1: " + videoFile1); g_log.add("Video 2: " + videoFile2); g_log.add("Video 3: " + videoFile3); // --- Task states --- TaskState taskStates[4]; // ========================================================================= // Create 4 VideoPlayer readers — one per task // ========================================================================= const int NUM_STREAMS = 4; ANSCENTER::ANSVIDEOPLAYER* vpClients[NUM_STREAMS] = {}; const std::string videoFiles[NUM_STREAMS] = { videoFile0, videoFile1, videoFile2, videoFile3 }; const int taskStreamMap[4] = { 0, 1, 2, 3 }; for (int s = 0; s < NUM_STREAMS; s++) { printf("[Stream%d] Creating VideoPlayer for %s\n", s, videoFiles[s].c_str()); g_log.add("[Stream" + std::to_string(s) + "] Creating VideoPlayer for " + videoFiles[s]); int result = CreateANSVideoPlayerHandle(&vpClients[s], "", videoFiles[s].c_str()); if (result != 1 || vpClients[s] == nullptr) { printf("[Stream%d] FAILED to create VideoPlayer (result=%d)\n", s, result); g_log.add("[Stream" + std::to_string(s) + "] VideoPlayer create FAILED"); vpClients[s] = nullptr; continue; } // Don't call StartVideoPlayer here — play() will be called just before worker threads // launch, so the video doesn't play to completion during the ~16s engine loading phase. SetVideoPlayerDisplayResolution(&vpClients[s], 1920, 1080); g_log.add("[Stream" + std::to_string(s) + "] VideoPlayer created (display: 1920x1080)"); } // ========================================================================= // Create 4 ALPR engines sequentially // ========================================================================= ANSCENTER::ANSALPR* alprHandles[4] = {}; std::string modelZipFile = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_ALPR_v1.2.zip"; int engineType = 1; // NVIDIA_GPU double detThresh = 0.5, ocrThresh = 0.5, colThresh = 0.5; for (int i = 0; i < 4; i++) { char tag[32]; snprintf(tag, sizeof(tag), "[Task%d]", i); int streamIdx = taskStreamMap[i]; if (vpClients[streamIdx] == nullptr) { printf("%s Skipped — Stream%d not available\n", tag, streamIdx); std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "Stream not available"; continue; } { std::lock_guard lk(taskStates[i].mtx); taskStates[i].streamOk = true; taskStates[i].statusMsg = "Loading ALPR engine..."; } printf("%s Creating ALPR handle (engineType=%d)...\n", tag, engineType); g_log.add(std::string(tag) + " Creating ALPR handle..."); auto engineStart = std::chrono::steady_clock::now(); int createResult = CreateANSALPRHandle(&alprHandles[i], "", modelZipFile.c_str(), "", engineType, detThresh, ocrThresh, colThresh); if (createResult != 1 || alprHandles[i] == nullptr) { printf("%s FAILED to create ALPR handle (result=%d)\n", tag, createResult); g_log.add(std::string(tag) + " ALPR create FAILED"); std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "ALPR create failed"; continue; } printf("%s Loading ALPR engine (TensorRT)...\n", tag); g_log.add(std::string(tag) + " Loading ALPR engine..."); // Snapshot VRAM before engine load to measure consumption auto vramBefore = GetPerGpuFreeMiB(); int loadResult = LoadANSALPREngineHandle(&alprHandles[i]); auto engineEnd = std::chrono::steady_clock::now(); double loadMs = std::chrono::duration(engineEnd - engineStart).count(); if (loadResult != 1) { printf("%s FAILED to load ALPR engine (result=%d)\n", tag, loadResult); g_log.add(std::string(tag) + " Engine load FAILED"); ReleaseANSALPRHandle(&alprHandles[i]); alprHandles[i] = nullptr; std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "Engine load failed"; continue; } // Snapshot VRAM after engine load — find which GPU lost the most VRAM auto vramAfter = GetPerGpuFreeMiB(); int bestGpu = 0; size_t maxDelta = 0; size_t gpuCount = vramBefore.size() < vramAfter.size() ? vramBefore.size() : vramAfter.size(); for (size_t g = 0; g < gpuCount; g++) { size_t delta = (vramBefore[g] > vramAfter[g]) ? (vramBefore[g] - vramAfter[g]) : 0; if (delta > maxDelta) { maxDelta = delta; bestGpu = (int)g; } } char buf[512]; snprintf(buf, sizeof(buf), "%s Engine loaded in %.0f ms | GPU[%d] | VRAM used: %zu MiB (Video%d)", tag, loadMs, bestGpu, maxDelta, streamIdx); printf("%s\n", buf); g_log.add(buf); // Log per-GPU VRAM state after this engine load for (size_t g = 0; g < vramAfter.size(); g++) { size_t total = 0; if (g < vramBefore.size()) { auto gpus = QueryGpuVram(); if (g < gpus.size()) total = gpus[g].totalMiB; } char vbuf[256]; snprintf(vbuf, sizeof(vbuf), " GPU[%zu] VRAM: %zu MiB free (of %zu MiB)", g, vramAfter[g], total); printf("%s\n", vbuf); g_log.add(vbuf); } { std::lock_guard lk(taskStates[i].mtx); taskStates[i].engineLoaded = true; taskStates[i].statusMsg = "Running"; taskStates[i].gpuDeviceId = bestGpu; taskStates[i].vramUsedBytes = maxDelta * 1024 * 1024; } } // --- No NVDEC realignment needed — ANSVideoPlayer uses cv::VideoCapture (CPU decode) --- // --- Enable deep pipeline benchmarking on all ALPR handles --- for (int i = 0; i < 4; i++) { if (alprHandles[i]) { alprHandles[i]->ActivateDebugger(true); } } g_log.add("Debug benchmarking ENABLED on all ALPR handles"); // --- Start video playback NOW (just before workers need frames) --- // This avoids the video playing to completion during the ~16s engine loading phase. for (int s = 0; s < NUM_STREAMS; s++) { if (vpClients[s]) { StartVideoPlayer(&vpClients[s]); g_log.add("[Stream" + std::to_string(s) + "] VideoPlayer play() started"); } } // --- Launch worker threads --- g_log.add("Launching worker threads..."); std::thread workers[4]; for (int i = 0; i < 4; i++) { int streamIdx = taskStreamMap[i]; if (vpClients[streamIdx] && alprHandles[i]) { workers[i] = std::thread(ALPRWorkerThread_VideoPlayer, i, vpClients[streamIdx], alprHandles[i], std::ref(taskStates[i])); } } // --- Display loop (main thread) --- const int cellW = 640, cellH = 480; const int logPanelH = 200; const char* windowName = "ANSLPR Multi-GPU Stress Test (Simulated Cam)"; cv::namedWindow(windowName, cv::WINDOW_NORMAL); cv::resizeWindow(windowName, cellW * 2, cellH * 2 + logPanelH); auto testStart = std::chrono::steady_clock::now(); auto lastGpuSnapshot = std::chrono::steady_clock::now(); int snapshotCount = 0; while (g_running.load()) { // --- Periodic GPU/perf snapshot every 10 seconds --- auto now2 = std::chrono::steady_clock::now(); if (std::chrono::duration(now2 - lastGpuSnapshot).count() >= 10.0) { lastGpuSnapshot = now2; snapshotCount++; double elapsedSec = std::chrono::duration(now2 - testStart).count(); g_log.add("---- PERIODIC SNAPSHOT #" + std::to_string(snapshotCount) + " (elapsed " + std::to_string((int)elapsedSec) + "s) ----"); auto gpuSnap = QueryGpuVram(); for (const auto& gs : gpuSnap) { char buf[256]; snprintf(buf, sizeof(buf), " GPU[%d] %s | Used: %zu/%zu MiB (%.1f%%)", gs.deviceId, gs.name.c_str(), gs.usedMiB, gs.totalMiB, gs.totalMiB > 0 ? 100.0 * gs.usedMiB / gs.totalMiB : 0.0); g_log.add(buf); } double totalFpsSnap = 0; for (int t = 0; t < 4; t++) { std::lock_guard lk(taskStates[t].mtx); char buf[256]; snprintf(buf, sizeof(buf), " T%d: GPU[%d] VRAM=%zuMiB FPS=%.1f GrabMs=%.0f InfMs=%.0f Frames=%d Det=%d", t, taskStates[t].gpuDeviceId, taskStates[t].vramUsedBytes / (1024 * 1024), taskStates[t].fps, taskStates[t].lastGrabMs, taskStates[t].inferenceMs, taskStates[t].frameCount, taskStates[t].detectionCount); g_log.add(buf); totalFpsSnap += taskStates[t].fps; } char buf[128]; snprintf(buf, sizeof(buf), " Total throughput: %.1f FPS", totalFpsSnap); g_log.add(buf); std::set gpusUsed; for (int t = 0; t < 4; t++) { if (taskStates[t].gpuDeviceId >= 0) gpusUsed.insert(taskStates[t].gpuDeviceId); } if (gpusUsed.size() > 1) { g_log.add(" MULTI-GPU: YES — tasks distributed across " + std::to_string(gpusUsed.size()) + " GPUs"); } else if (!gpusUsed.empty()) { g_log.add(" MULTI-GPU: NO — all tasks on GPU[" + std::to_string(*gpusUsed.begin()) + "]"); } g_log.add("---- END SNAPSHOT ----"); } // Build 2x2 grid + log panel cv::Mat canvas(cellH * 2 + logPanelH, cellW * 2, CV_8UC3, cv::Scalar(30, 30, 30)); for (int i = 0; i < 4; i++) { int row = i / 2, col = i % 2; cv::Rect roi(col * cellW, row * cellH, cellW, cellH); cv::Mat cell; double fps = 0, infMs = 0; int fCount = 0, dCount = 0; int gpuId = -1; size_t vramMiB = 0; std::string statusMsg, lastPlate; bool engineLoaded = false, streamOk = false; { std::lock_guard lk(taskStates[i].mtx); if (!taskStates[i].displayFrame.empty()) { cv::resize(taskStates[i].displayFrame, cell, cv::Size(cellW, cellH)); } fps = taskStates[i].fps; infMs = taskStates[i].inferenceMs; fCount = taskStates[i].frameCount; dCount = taskStates[i].detectionCount; statusMsg = taskStates[i].statusMsg; lastPlate = taskStates[i].lastPlate; engineLoaded = taskStates[i].engineLoaded; streamOk = taskStates[i].streamOk; gpuId = taskStates[i].gpuDeviceId; vramMiB = taskStates[i].vramUsedBytes / (1024 * 1024); } if (cell.empty()) { cell = cv::Mat(cellH, cellW, CV_8UC3, cv::Scalar(40, 40, 40)); cv::putText(cell, "Task " + std::to_string(i) + ": " + statusMsg, cv::Point(20, cellH / 2), cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(100, 100, 255), 2); } cv::rectangle(cell, cv::Rect(0, cellH - 50, cellW, 50), cv::Scalar(0, 0, 0), cv::FILLED); char bar1[256], bar2[256]; snprintf(bar1, sizeof(bar1), "T%d | %.1f FPS | %.0fms | Frames:%d | Det:%d | %s", i, fps, infMs, fCount, dCount, lastPlate.empty() ? "-" : lastPlate.c_str()); if (gpuId >= 0) { snprintf(bar2, sizeof(bar2), "GPU[%d] | VRAM: %zu MiB", gpuId, vramMiB); } else { snprintf(bar2, sizeof(bar2), "GPU: N/A"); } cv::Scalar barColor = engineLoaded ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 100, 255); cv::putText(cell, bar1, cv::Point(5, cellH - 28), cv::FONT_HERSHEY_SIMPLEX, 0.45, barColor, 1); cv::putText(cell, bar2, cv::Point(5, cellH - 8), cv::FONT_HERSHEY_SIMPLEX, 0.45, cv::Scalar(0, 200, 255), 1); cell.copyTo(canvas(roi)); cv::line(canvas, cv::Point(cellW, 0), cv::Point(cellW, cellH * 2), cv::Scalar(100, 100, 100), 1); cv::line(canvas, cv::Point(0, cellH), cv::Point(cellW * 2, cellH), cv::Scalar(100, 100, 100), 1); } // --- Log panel at bottom --- cv::Rect logRoi(0, cellH * 2, cellW * 2, logPanelH); cv::Mat logPanel = canvas(logRoi); logPanel.setTo(cv::Scalar(20, 20, 20)); auto elapsed = std::chrono::duration(std::chrono::steady_clock::now() - testStart).count(); char header[128]; snprintf(header, sizeof(header), "Elapsed: %.0fs | Simulated Cam (VideoPlayer) | Press ESC to stop", elapsed); cv::putText(logPanel, header, cv::Point(10, 18), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(200, 200, 0), 1); double totalFps = 0; for (int i = 0; i < 4; i++) { std::lock_guard lk(taskStates[i].mtx); totalFps += taskStates[i].fps; } char aggLine[256]; snprintf(aggLine, sizeof(aggLine), "Total throughput: %.1f FPS | T0:GPU%d T1:GPU%d T2:GPU%d T3:GPU%d", totalFps, taskStates[0].gpuDeviceId, taskStates[1].gpuDeviceId, taskStates[2].gpuDeviceId, taskStates[3].gpuDeviceId); cv::putText(logPanel, aggLine, cv::Point(10, 38), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 255), 1); auto gpuSnaps = QueryGpuVram(); int gpuLineY = 58; for (const auto& gs : gpuSnaps) { int tasksOnGpu = 0; size_t taskVramMiB = 0; for (int i = 0; i < 4; i++) { std::lock_guard lk(taskStates[i].mtx); if (taskStates[i].gpuDeviceId == gs.deviceId) { tasksOnGpu++; taskVramMiB += taskStates[i].vramUsedBytes / (1024 * 1024); } } char gpuLine[256]; snprintf(gpuLine, sizeof(gpuLine), "GPU[%d] %s | Used: %zu/%zu MiB | Tasks: %d (engine VRAM: %zu MiB)", gs.deviceId, gs.name.c_str(), gs.usedMiB, gs.totalMiB, tasksOnGpu, taskVramMiB); cv::putText(logPanel, gpuLine, cv::Point(10, gpuLineY), cv::FONT_HERSHEY_SIMPLEX, 0.45, cv::Scalar(100, 255, 100), 1); gpuLineY += 18; } for (int i = 0; i < 4; i++) { std::lock_guard lk(taskStates[i].mtx); char tLine[256]; snprintf(tLine, sizeof(tLine), "T%d: GPU[%d] VRAM=%zuMiB FPS=%.1f Inf=%.0fms Frames=%d Det=%d", i, taskStates[i].gpuDeviceId, taskStates[i].vramUsedBytes / (1024 * 1024), taskStates[i].fps, taskStates[i].inferenceMs, taskStates[i].frameCount, taskStates[i].detectionCount); cv::putText(logPanel, tLine, cv::Point(10, gpuLineY), cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(200, 200, 200), 1); gpuLineY += 16; } auto recentLogs = g_log.getRecent(4); for (const auto& line : recentLogs) { if (gpuLineY > logPanelH - 5) break; std::string display = (line.size() > 130) ? line.substr(0, 127) + "..." : line; cv::putText(logPanel, display, cv::Point(10, gpuLineY), cv::FONT_HERSHEY_PLAIN, 1.0, cv::Scalar(140, 140, 140), 1); gpuLineY += 15; } cv::imshow(windowName, canvas); int key = cv::waitKey(30); if (key == 27) { // ESC g_log.add("ESC pressed — stopping all tasks..."); printf("\nESC pressed — stopping...\n"); g_running.store(false); } } // --- Wait for all workers --- printf("Waiting for worker threads to finish...\n"); for (int i = 0; i < 4; i++) { if (workers[i].joinable()) workers[i].join(); } // --- Print final summary --- double totalElapsed = std::chrono::duration( std::chrono::steady_clock::now() - testStart).count(); g_log.add("================================================================"); g_log.add(" FINAL PERFORMANCE SUMMARY (Simulated Cam)"); g_log.add(" Total runtime: " + std::to_string((int)totalElapsed) + " seconds"); g_log.add("================================================================"); printf("\n============================================================\n"); printf(" FINAL PERFORMANCE SUMMARY — Simulated Cam (runtime: %.0fs)\n", totalElapsed); printf("============================================================\n"); double totalFpsFinal = 0; for (int i = 0; i < 4; i++) { char buf[512]; snprintf(buf, sizeof(buf), " Task %d: GPU[%d] | VRAM=%zuMiB | %d frames, %d detections, FPS=%.1f, InfMs=%.0f", i, taskStates[i].gpuDeviceId, taskStates[i].vramUsedBytes / (1024 * 1024), taskStates[i].frameCount, taskStates[i].detectionCount, taskStates[i].fps, taskStates[i].inferenceMs); printf("%s\n", buf); g_log.add(buf); totalFpsFinal += taskStates[i].fps; } auto finalGpu = QueryGpuVram(); for (const auto& gs : finalGpu) { char buf[256]; snprintf(buf, sizeof(buf), " GPU[%d] %s: %zu/%zu MiB used (%.1f%%)", gs.deviceId, gs.name.c_str(), gs.usedMiB, gs.totalMiB, gs.totalMiB > 0 ? 100.0 * gs.usedMiB / gs.totalMiB : 0.0); printf("%s\n", buf); g_log.add(buf); } std::set finalGpusUsed; for (int i = 0; i < 4; i++) { if (taskStates[i].gpuDeviceId >= 0) finalGpusUsed.insert(taskStates[i].gpuDeviceId); } { char buf[256]; snprintf(buf, sizeof(buf), " Total throughput: %.1f FPS across 4 tasks", totalFpsFinal); printf("%s\n", buf); g_log.add(buf); } if (finalGpusUsed.size() > 1) { char buf[128]; snprintf(buf, sizeof(buf), " MULTI-GPU: YES — tasks on %zu different GPUs", finalGpusUsed.size()); printf("%s\n", buf); g_log.add(buf); } else if (!finalGpusUsed.empty()) { char buf[128]; snprintf(buf, sizeof(buf), " MULTI-GPU: NO — all tasks on GPU[%d] only", *finalGpusUsed.begin()); printf("%s\n", buf); g_log.add(buf); } printf("============================================================\n"); g_log.add("================================================================"); g_log.add(" Log saved to: " + std::string(LOG_FILE_PATH)); g_log.add("================================================================"); // --- Release all handles --- for (int i = 0; i < 4; i++) { if (alprHandles[i]) { ReleaseANSALPRHandle(&alprHandles[i]); } } for (int s = 0; s < NUM_STREAMS; s++) { if (vpClients[s]) { StopVideoPlayer(&vpClients[s]); ReleaseANSVideoPlayerHandle(&vpClients[s]); } } g_log.close(); cv::destroyAllWindows(); ANSCENTER::ANSOPENCV::DeinitCameraNetwork(); return 0; } // ============================================================================= // Worker thread for FilePlayer-based stress test (uses ANSFILEPLAYER) // Key difference from VideoPlayer worker: uses GetFilePlayerCVImage/ReconnectFilePlayer // ============================================================================= static void ALPRWorkerThread_FilePlayer(int taskId, ANSCENTER::ANSFILEPLAYER* fpClient, ANSCENTER::ANSALPR* alprHandle, TaskState& state) { char tag[32]; snprintf(tag, sizeof(tag), "[Task%d]", taskId); std::string prefix(tag); g_log.add(prefix + " Worker thread started"); printf("%s Worker thread started\n", tag); int width = 0, height = 0; int64_t pts = 0; int emptyFrames = 0; std::string cameraId = "Cam" + std::to_string(taskId); std::deque fpsTimestamps; double totalGrabMs = 0, totalInfMs = 0; int grabCount = 0, infCount = 0; double maxGrabMs = 0, maxInfMs = 0; auto benchStart = std::chrono::steady_clock::now(); while (g_running.load()) { auto grabStart = std::chrono::steady_clock::now(); cv::Mat* framePtr = nullptr; GetFilePlayerCVImage(&fpClient, width, height, pts, &framePtr); auto grabEnd = std::chrono::steady_clock::now(); double grabMs = std::chrono::duration(grabEnd - grabStart).count(); if (framePtr == nullptr || framePtr->empty()) { emptyFrames++; if (emptyFrames % 100 == 1) { g_log.add(prefix + " Empty frame (count=" + std::to_string(emptyFrames) + ")"); } if (emptyFrames > 300) { g_log.add(prefix + " Too many empty frames, attempting reconnect..."); ReconnectFilePlayer(&fpClient); emptyFrames = 0; } if (framePtr) delete framePtr; std::this_thread::sleep_for(std::chrono::milliseconds(10)); continue; } emptyFrames = 0; totalGrabMs += grabMs; grabCount++; if (grabMs > maxGrabMs) maxGrabMs = grabMs; auto infStart = std::chrono::steady_clock::now(); std::string lpnResult, jpegImage; // Pass framePtr directly — NOT a copy. ANSGpuFrameRegistry::lookup() // matches by cv::Mat* pointer, so `new cv::Mat(*framePtr)` would create // a different pointer the registry doesn't know, breaking NV12 zero-copy. ANSALPR_RunInferenceComplete_CPP(&alprHandle, &framePtr, cameraId.c_str(), 0, 0, lpnResult, jpegImage); auto infEnd = std::chrono::steady_clock::now(); double infMs = std::chrono::duration(infEnd - infStart).count(); totalInfMs += infMs; infCount++; if (infMs > maxInfMs) maxInfMs = infMs; cv::Mat display = framePtr->clone(); int detCount = 0; std::string lastPlateText; if (!lpnResult.empty()) { try { boost::property_tree::ptree pt; std::stringstream ss(lpnResult); boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type& child, pt.get_child("results")) { const boost::property_tree::ptree& det = child.second; const auto class_name = GetData(det, "class_name"); const auto x = GetData(det, "x"); const auto y = GetData(det, "y"); const auto w = GetData(det, "width"); const auto h = GetData(det, "height"); cv::rectangle(display, cv::Rect((int)x, (int)y, (int)w, (int)h), cv::Scalar(0, 255, 0), 2); cv::putText(display, class_name, cv::Point((int)x, (int)y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.7, cv::Scalar(0, 255, 0), 2); lastPlateText = class_name; detCount++; } } catch (...) {} } auto now = std::chrono::steady_clock::now(); fpsTimestamps.push_back(now); while (!fpsTimestamps.empty() && std::chrono::duration(now - fpsTimestamps.front()).count() > 2.0) { fpsTimestamps.pop_front(); } double fps = fpsTimestamps.size() / 2.0; char osd[128]; snprintf(osd, sizeof(osd), "Task%d | %.1f FPS | Inf: %.0f ms | #%d", taskId, fps, infMs, state.frameCount + 1); cv::putText(display, osd, cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 0.7, cv::Scalar(0, 255, 255), 2); { std::lock_guard lk(state.mtx); state.displayFrame = display; state.fps = fps; state.inferenceMs = infMs; state.lastGrabMs = grabMs; state.lastInfMs = infMs; state.frameCount++; state.detectionCount += detCount; if (!lastPlateText.empty()) state.lastPlate = lastPlateText; } if ((state.frameCount % 100) == 0) { double avgGrab = grabCount > 0 ? totalGrabMs / grabCount : 0; double avgInf = infCount > 0 ? totalInfMs / infCount : 0; double elapsed = std::chrono::duration( std::chrono::steady_clock::now() - benchStart).count(); char buf[512]; snprintf(buf, sizeof(buf), "%s Frame %d | FPS=%.1f | Grab: avg=%.1fms max=%.0fms | Inf: avg=%.1fms max=%.0fms | " "GrabPct=%.0f%% InfPct=%.0f%% | Det=%d", tag, state.frameCount, fps, avgGrab, maxGrabMs, avgInf, maxInfMs, (totalGrabMs / (elapsed * 1000.0)) * 100.0, (totalInfMs / (elapsed * 1000.0)) * 100.0, state.detectionCount); g_log.add(buf); printf("%s\n", buf); totalGrabMs = totalInfMs = 0; maxGrabMs = maxInfMs = 0; grabCount = infCount = 0; benchStart = std::chrono::steady_clock::now(); } delete framePtr; } g_log.add(prefix + " Worker loop exited"); } // ============================================================================= // ANSLPR_MultiGPU_StressTest_FilePlayer // Same as SimulatedCam but uses ANSFILEPLAYER (loops video continuously). // ============================================================================= int ANSLPR_MultiGPU_StressTest_FilePlayer() { ANSCENTER::ANSOPENCV::InitCameraNetwork(); g_log.init(); printf("\n"); printf("============================================================\n"); printf(" ANSLPR Multi-GPU Stress Test (FilePlayer — looping)\n"); printf(" Using local video files via ANSFilePlayer (HW decode)\n"); printf(" Press ESC to stop\n"); printf(" Log file: %s\n", LOG_FILE_PATH); printf("============================================================\n\n"); g_log.add("============================================================"); g_log.add(" ANSLPR Multi-GPU Stress Test (FilePlayer — looping)"); g_log.add(" Using ANSFilePlayer with HW decode + NV12 zero-copy"); g_log.add("============================================================"); LogGpuInfo(); const std::string videoFile0 = "E:\\Programs\\DemoAssets\\Videos\\ALRP\\PMH\\Day\\day.mp4"; const std::string videoFile1 = "E:\\Programs\\DemoAssets\\Videos\\ALRP\\PMH\\Day\\day_1.mp4"; const std::string videoFile2 = "E:\\Programs\\DemoAssets\\Videos\\ALRP\\PMH\\Day\\day_2.mp4"; const std::string videoFile3 = "E:\\Programs\\DemoAssets\\Videos\\ALRP\\PMH\\Day\\day_3.mp4"; g_log.add("Video 0: " + videoFile0); g_log.add("Video 1: " + videoFile1); g_log.add("Video 2: " + videoFile2); g_log.add("Video 3: " + videoFile3); TaskState taskStates[4]; // ========================================================================= // Create 4 FilePlayer readers // ========================================================================= const int NUM_STREAMS = 4; ANSCENTER::ANSFILEPLAYER* fpClients[NUM_STREAMS] = {}; const std::string videoFiles[NUM_STREAMS] = { videoFile0, videoFile1, videoFile2, videoFile3 }; const int taskStreamMap[4] = { 0, 1, 2, 3 }; for (int s = 0; s < NUM_STREAMS; s++) { printf("[Stream%d] Creating FilePlayer for %s\n", s, videoFiles[s].c_str()); g_log.add("[Stream" + std::to_string(s) + "] Creating FilePlayer for " + videoFiles[s]); int result = CreateANSFilePlayerHandle(&fpClients[s], "", videoFiles[s].c_str()); if (result != 1 || fpClients[s] == nullptr) { printf("[Stream%d] FAILED to create FilePlayer (result=%d)\n", s, result); g_log.add("[Stream" + std::to_string(s) + "] FilePlayer create FAILED"); fpClients[s] = nullptr; continue; } // Don't start yet — start after engines are loaded SetFilePlayerDisplayResolution(&fpClients[s], 1920, 1080); g_log.add("[Stream" + std::to_string(s) + "] FilePlayer created (display: 1920x1080)"); } // ========================================================================= // Create 4 ALPR engines sequentially // ========================================================================= ANSCENTER::ANSALPR* alprHandles[4] = {}; std::string modelZipFile = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_ALPR_v1.2.zip"; int engineType = 1; double detThresh = 0.5, ocrThresh = 0.5, colThresh = 0.5; for (int i = 0; i < 4; i++) { char tag[32]; snprintf(tag, sizeof(tag), "[Task%d]", i); int streamIdx = taskStreamMap[i]; if (fpClients[streamIdx] == nullptr) { printf("%s Skipped — Stream%d not available\n", tag, streamIdx); std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "Stream not available"; continue; } { std::lock_guard lk(taskStates[i].mtx); taskStates[i].streamOk = true; taskStates[i].statusMsg = "Loading ALPR engine..."; } printf("%s Creating ALPR handle (engineType=%d)...\n", tag, engineType); g_log.add(std::string(tag) + " Creating ALPR handle..."); auto engineStart = std::chrono::steady_clock::now(); int createResult = CreateANSALPRHandle(&alprHandles[i], "", modelZipFile.c_str(), "", engineType, detThresh, ocrThresh, colThresh); if (createResult != 1 || alprHandles[i] == nullptr) { printf("%s FAILED to create ALPR handle (result=%d)\n", tag, createResult); g_log.add(std::string(tag) + " ALPR create FAILED"); std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "ALPR create failed"; continue; } printf("%s Loading ALPR engine (TensorRT)...\n", tag); g_log.add(std::string(tag) + " Loading ALPR engine..."); auto vramBefore = GetPerGpuFreeMiB(); int loadResult = LoadANSALPREngineHandle(&alprHandles[i]); auto engineEnd = std::chrono::steady_clock::now(); double loadMs = std::chrono::duration(engineEnd - engineStart).count(); if (loadResult != 1) { printf("%s FAILED to load ALPR engine (result=%d)\n", tag, loadResult); g_log.add(std::string(tag) + " Engine load FAILED"); ReleaseANSALPRHandle(&alprHandles[i]); alprHandles[i] = nullptr; std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "Engine load failed"; continue; } auto vramAfter = GetPerGpuFreeMiB(); int bestGpu = 0; size_t maxDelta = 0; for (size_t g = 0; g < vramBefore.size() && g < vramAfter.size(); g++) { size_t delta = (vramBefore[g] > vramAfter[g]) ? vramBefore[g] - vramAfter[g] : 0; if (delta > maxDelta) { maxDelta = delta; bestGpu = (int)g; } } char ebuf[256]; snprintf(ebuf, sizeof(ebuf), "%s Engine loaded in %d ms | GPU[%d] | VRAM used: %zu MiB (Video%d)", tag, (int)loadMs, bestGpu, maxDelta, i); printf("%s\n", ebuf); g_log.add(ebuf); for (size_t g = 0; g < vramAfter.size(); g++) { size_t total = 0; cudaDeviceProp prop; if (cudaGetDeviceProperties(&prop, (int)g) == cudaSuccess) { total = prop.totalGlobalMem / (1024 * 1024); } char vbuf[128]; snprintf(vbuf, sizeof(vbuf), " GPU[%zu] VRAM: %zu MiB free (of %zu MiB)", g, vramAfter[g], total); printf("%s\n", vbuf); g_log.add(vbuf); } { std::lock_guard lk(taskStates[i].mtx); taskStates[i].engineLoaded = true; taskStates[i].statusMsg = "Running"; taskStates[i].gpuDeviceId = bestGpu; taskStates[i].vramUsedBytes = maxDelta * 1024 * 1024; } } // --- Enable debug benchmarking --- for (int i = 0; i < 4; i++) { if (alprHandles[i]) { alprHandles[i]->ActivateDebugger(true); } } g_log.add("Debug benchmarking ENABLED on all ALPR handles"); // --- Start video playback NOW (just before workers need frames) --- for (int s = 0; s < NUM_STREAMS; s++) { if (fpClients[s]) { StartFilePlayer(&fpClients[s]); g_log.add("[Stream" + std::to_string(s) + "] FilePlayer play() started"); } } // --- Launch worker threads --- g_log.add("Launching worker threads..."); std::thread workers[4]; for (int i = 0; i < 4; i++) { int streamIdx = taskStreamMap[i]; if (fpClients[streamIdx] && alprHandles[i]) { workers[i] = std::thread(ALPRWorkerThread_FilePlayer, i, fpClients[streamIdx], alprHandles[i], std::ref(taskStates[i])); } } // --- Display loop (main thread) --- const int cellW = 640, cellH = 480; const int logPanelH = 200; const char* windowName = "ANSLPR Stress Test (FilePlayer — looping)"; cv::namedWindow(windowName, cv::WINDOW_NORMAL); cv::resizeWindow(windowName, cellW * 2, cellH * 2 + logPanelH); auto testStart = std::chrono::steady_clock::now(); auto lastGpuSnapshot = std::chrono::steady_clock::now(); int snapshotCount = 0; while (g_running.load()) { auto now2 = std::chrono::steady_clock::now(); if (std::chrono::duration(now2 - lastGpuSnapshot).count() >= 10.0) { lastGpuSnapshot = now2; snapshotCount++; double elapsedSec = std::chrono::duration(now2 - testStart).count(); g_log.add("---- PERIODIC SNAPSHOT #" + std::to_string(snapshotCount) + " (elapsed " + std::to_string((int)elapsedSec) + "s) ----"); auto gpuSnap = QueryGpuVram(); for (const auto& gs : gpuSnap) { char buf[256]; snprintf(buf, sizeof(buf), " GPU[%d] %s | Used: %zu/%zu MiB (%.1f%%)", gs.deviceId, gs.name.c_str(), gs.usedMiB, gs.totalMiB, gs.totalMiB > 0 ? 100.0 * gs.usedMiB / gs.totalMiB : 0.0); g_log.add(buf); } double totalFpsSnap = 0; for (int t = 0; t < 4; t++) { std::lock_guard lk(taskStates[t].mtx); char buf[256]; snprintf(buf, sizeof(buf), " T%d: GPU[%d] VRAM=%zuMiB FPS=%.1f GrabMs=%.0f InfMs=%.0f Frames=%d Det=%d", t, taskStates[t].gpuDeviceId, taskStates[t].vramUsedBytes / (1024 * 1024), taskStates[t].fps, taskStates[t].lastGrabMs, taskStates[t].inferenceMs, taskStates[t].frameCount, taskStates[t].detectionCount); g_log.add(buf); totalFpsSnap += taskStates[t].fps; } char buf[128]; snprintf(buf, sizeof(buf), " Total throughput: %.1f FPS", totalFpsSnap); g_log.add(buf); std::set gpusUsed; for (int t = 0; t < 4; t++) { if (taskStates[t].gpuDeviceId >= 0) gpusUsed.insert(taskStates[t].gpuDeviceId); } if (gpusUsed.size() > 1) { g_log.add(" MULTI-GPU: YES — tasks distributed across " + std::to_string(gpusUsed.size()) + " GPUs"); } else if (!gpusUsed.empty()) { g_log.add(" MULTI-GPU: NO — all tasks on GPU[" + std::to_string(*gpusUsed.begin()) + "]"); } g_log.add("---- END SNAPSHOT ----"); } // Build 2x2 grid + log panel cv::Mat canvas(cellH * 2 + logPanelH, cellW * 2, CV_8UC3, cv::Scalar(30, 30, 30)); for (int i = 0; i < 4; i++) { int row = i / 2, col = i % 2; cv::Rect roi(col * cellW, row * cellH, cellW, cellH); cv::Mat cell; double fps = 0, infMs = 0; int fCount = 0, dCount = 0; int gpuId = -1; size_t vramMiB = 0; std::string statusMsg, lastPlate; bool engineLoaded = false, streamOk = false; { std::lock_guard lk(taskStates[i].mtx); if (!taskStates[i].displayFrame.empty()) { cv::resize(taskStates[i].displayFrame, cell, cv::Size(cellW, cellH)); } fps = taskStates[i].fps; infMs = taskStates[i].inferenceMs; fCount = taskStates[i].frameCount; dCount = taskStates[i].detectionCount; statusMsg = taskStates[i].statusMsg; lastPlate = taskStates[i].lastPlate; engineLoaded = taskStates[i].engineLoaded; streamOk = taskStates[i].streamOk; gpuId = taskStates[i].gpuDeviceId; vramMiB = taskStates[i].vramUsedBytes / (1024 * 1024); } if (cell.empty()) { cell = cv::Mat(cellH, cellW, CV_8UC3, cv::Scalar(40, 40, 40)); cv::putText(cell, "Task " + std::to_string(i) + ": " + statusMsg, cv::Point(20, cellH / 2), cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(100, 100, 255), 2); } cv::rectangle(cell, cv::Rect(0, cellH - 50, cellW, 50), cv::Scalar(0, 0, 0), cv::FILLED); char bar1[256], bar2[256]; snprintf(bar1, sizeof(bar1), "T%d | %.1f FPS | %.0fms | Frames:%d | Det:%d | %s", i, fps, infMs, fCount, dCount, lastPlate.empty() ? "-" : lastPlate.c_str()); if (gpuId >= 0) { snprintf(bar2, sizeof(bar2), "GPU[%d] | VRAM: %zu MiB", gpuId, vramMiB); } else { snprintf(bar2, sizeof(bar2), "GPU: N/A"); } cv::Scalar barColor = engineLoaded ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 100, 255); cv::putText(cell, bar1, cv::Point(5, cellH - 28), cv::FONT_HERSHEY_SIMPLEX, 0.45, barColor, 1); cv::putText(cell, bar2, cv::Point(5, cellH - 8), cv::FONT_HERSHEY_SIMPLEX, 0.45, cv::Scalar(0, 200, 255), 1); cell.copyTo(canvas(roi)); cv::line(canvas, cv::Point(cellW, 0), cv::Point(cellW, cellH * 2), cv::Scalar(100, 100, 100), 1); cv::line(canvas, cv::Point(0, cellH), cv::Point(cellW * 2, cellH), cv::Scalar(100, 100, 100), 1); } // Log panel cv::Rect logRoi(0, cellH * 2, cellW * 2, logPanelH); cv::Mat logPanel = canvas(logRoi); logPanel.setTo(cv::Scalar(20, 20, 20)); auto elapsed = std::chrono::duration(std::chrono::steady_clock::now() - testStart).count(); char header[128]; snprintf(header, sizeof(header), "Elapsed: %.0fs | FilePlayer (looping, HW decode) | Press ESC to stop", elapsed); cv::putText(logPanel, header, cv::Point(10, 18), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(200, 200, 0), 1); double totalFps = 0; for (int i = 0; i < 4; i++) { std::lock_guard lk(taskStates[i].mtx); totalFps += taskStates[i].fps; } char aggLine[256]; snprintf(aggLine, sizeof(aggLine), "Total throughput: %.1f FPS | T0:GPU%d T1:GPU%d T2:GPU%d T3:GPU%d", totalFps, taskStates[0].gpuDeviceId, taskStates[1].gpuDeviceId, taskStates[2].gpuDeviceId, taskStates[3].gpuDeviceId); cv::putText(logPanel, aggLine, cv::Point(10, 38), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 255), 1); auto gpuSnaps = QueryGpuVram(); int gpuLineY = 58; for (const auto& gs : gpuSnaps) { int tasksOnGpu = 0; size_t taskVramMiB = 0; for (int i = 0; i < 4; i++) { std::lock_guard lk(taskStates[i].mtx); if (taskStates[i].gpuDeviceId == gs.deviceId) { tasksOnGpu++; taskVramMiB += taskStates[i].vramUsedBytes / (1024 * 1024); } } char gpuLine[256]; snprintf(gpuLine, sizeof(gpuLine), "GPU[%d] %s | Used: %zu/%zu MiB | Tasks: %d (engine VRAM: %zu MiB)", gs.deviceId, gs.name.c_str(), gs.usedMiB, gs.totalMiB, tasksOnGpu, taskVramMiB); cv::putText(logPanel, gpuLine, cv::Point(10, gpuLineY), cv::FONT_HERSHEY_SIMPLEX, 0.45, cv::Scalar(100, 255, 100), 1); gpuLineY += 18; } for (int i = 0; i < 4; i++) { std::lock_guard lk(taskStates[i].mtx); char tLine[256]; snprintf(tLine, sizeof(tLine), "T%d: GPU[%d] VRAM=%zuMiB FPS=%.1f Inf=%.0fms Frames=%d Det=%d", i, taskStates[i].gpuDeviceId, taskStates[i].vramUsedBytes / (1024 * 1024), taskStates[i].fps, taskStates[i].inferenceMs, taskStates[i].frameCount, taskStates[i].detectionCount); cv::putText(logPanel, tLine, cv::Point(10, gpuLineY), cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(200, 200, 200), 1); gpuLineY += 16; } auto recentLogs = g_log.getRecent(4); for (const auto& line : recentLogs) { if (gpuLineY > logPanelH - 5) break; std::string display = (line.size() > 130) ? line.substr(0, 127) + "..." : line; cv::putText(logPanel, display, cv::Point(10, gpuLineY), cv::FONT_HERSHEY_PLAIN, 1.0, cv::Scalar(140, 140, 140), 1); gpuLineY += 15; } cv::imshow(windowName, canvas); int key = cv::waitKey(30); if (key == 27) { g_log.add("ESC pressed — stopping all tasks..."); printf("\nESC pressed — stopping...\n"); g_running.store(false); } } // --- Wait for all workers --- printf("Waiting for worker threads to finish...\n"); for (int i = 0; i < 4; i++) { if (workers[i].joinable()) workers[i].join(); } // --- Final summary --- double totalElapsed = std::chrono::duration( std::chrono::steady_clock::now() - testStart).count(); g_log.add("================================================================"); g_log.add(" FINAL PERFORMANCE SUMMARY (FilePlayer — looping)"); g_log.add(" Total runtime: " + std::to_string((int)totalElapsed) + " seconds"); g_log.add("================================================================"); printf("\n============================================================\n"); printf(" FINAL PERFORMANCE SUMMARY — FilePlayer (runtime: %.0fs)\n", totalElapsed); printf("============================================================\n"); double totalFpsFinal = 0; for (int i = 0; i < 4; i++) { char buf[512]; snprintf(buf, sizeof(buf), " Task %d: GPU[%d] | VRAM=%zuMiB | %d frames, %d detections, FPS=%.1f, InfMs=%.0f", i, taskStates[i].gpuDeviceId, taskStates[i].vramUsedBytes / (1024 * 1024), taskStates[i].frameCount, taskStates[i].detectionCount, taskStates[i].fps, taskStates[i].inferenceMs); printf("%s\n", buf); g_log.add(buf); totalFpsFinal += taskStates[i].fps; } auto finalGpu = QueryGpuVram(); for (const auto& gs : finalGpu) { char buf[256]; snprintf(buf, sizeof(buf), " GPU[%d] %s: %zu/%zu MiB used (%.1f%%)", gs.deviceId, gs.name.c_str(), gs.usedMiB, gs.totalMiB, gs.totalMiB > 0 ? 100.0 * gs.usedMiB / gs.totalMiB : 0.0); printf("%s\n", buf); g_log.add(buf); } { char buf[256]; snprintf(buf, sizeof(buf), " Total throughput: %.1f FPS across 4 tasks", totalFpsFinal); printf("%s\n", buf); g_log.add(buf); } printf("============================================================\n"); g_log.add("================================================================"); // --- Release all handles --- for (int i = 0; i < 4; i++) { if (alprHandles[i]) { ReleaseANSALPRHandle(&alprHandles[i]); } } for (int s = 0; s < NUM_STREAMS; s++) { if (fpClients[s]) { StopFilePlayer(&fpClients[s]); ReleaseANSFilePlayerHandle(&fpClients[s]); } } g_log.close(); cv::destroyAllWindows(); ANSCENTER::ANSOPENCV::DeinitCameraNetwork(); return 0; } int main() { // ANSLPR_OD_INDOInferences_FileTest(); //ANSLPR_OD_Inferences_FileTest(); //ANSLPR_OD_VideoTest(); //ANSLPR_BigSize_VideoTest(); //ANSLPR_CPU_VideoTest(); //for (int i = 0; i < 100; i++) { // ANSLPR_CPU_Inferences_FileTest(); //} ANSLPR_MultiGPU_StressTest(); //ANSLPR_MultiGPU_StressTest_SimulatedCam(); //ANSLPR_MultiGPU_StressTest_FilePlayer(); return 0; }