#define NOMINMAX #include #include #include #include #include "boost/property_tree/ptree.hpp" #include "boost/property_tree/json_parser.hpp" #include "boost/foreach.hpp" #include "boost/optional.hpp" #include #include #include #include #include #include #include #include "ANSLPR.h" #include "ANSLPR_CPU.h" #include "ANSOpenCV.h" #include "ANSRTSP.h" #include "ANSVideoPlayer.h" #include "ANSFilePlayer.h" #include #include #include #include #include #include #include #include #include #include #include "EPLoader.h" // Decode \\uXXXX (literal backslash-u-hex) sequences back to UTF-8. // VectorDetectionToJsonString double-escapes Unicode for LabVIEW compatibility, // so JSON strings contain literal "\u54c1" text instead of actual Unicode chars. static std::string DecodeUnicodeEscapes(const std::string& input) { std::string result; result.reserve(input.size()); size_t i = 0; while (i < input.size()) { if (i + 5 < input.size() && input[i] == '\\' && input[i + 1] == 'u') { // Parse 4 hex digits std::string hex = input.substr(i + 2, 4); char* end = nullptr; uint32_t cp = static_cast(strtoul(hex.c_str(), &end, 16)); if (end == hex.c_str() + 4) { // Check for surrogate pair (\\uD800-DBFF followed by \\uDC00-DFFF) if (cp >= 0xD800 && cp <= 0xDBFF && i + 11 < input.size() && input[i + 6] == '\\' && input[i + 7] == 'u') { std::string hex2 = input.substr(i + 8, 4); uint32_t cp2 = static_cast(strtoul(hex2.c_str(), &end, 16)); if (end == hex2.c_str() + 4 && cp2 >= 0xDC00 && cp2 <= 0xDFFF) { cp = 0x10000 + ((cp - 0xD800) << 10) + (cp2 - 0xDC00); i += 12; } else { i += 6; } } else { i += 6; } // Encode codepoint as UTF-8 if (cp < 0x80) { result += static_cast(cp); } else if (cp < 0x800) { result += static_cast(0xC0 | (cp >> 6)); result += static_cast(0x80 | (cp & 0x3F)); } else if (cp < 0x10000) { result += static_cast(0xE0 | (cp >> 12)); result += static_cast(0x80 | ((cp >> 6) & 0x3F)); result += static_cast(0x80 | (cp & 0x3F)); } else { result += static_cast(0xF0 | (cp >> 18)); result += static_cast(0x80 | ((cp >> 12) & 0x3F)); result += static_cast(0x80 | ((cp >> 6) & 0x3F)); result += static_cast(0x80 | (cp & 0x3F)); } } else { result += input[i++]; } } else { result += input[i++]; } } return result; } template T GetOptionalValue(const boost::property_tree::ptree& pt, std::string attribute, T defaultValue) { if (pt.count(attribute)) { return pt.get(attribute); } return defaultValue; } template T GetData(const boost::property_tree::ptree& pt, const std::string& key) { T ret; if (boost::optional data = pt.get_optional(key)) { ret = data.get(); } return ret; } int ANSLPR_CPU_VideoTest() { // Get the current working directory std::filesystem::path currentPath = std::filesystem::current_path(); // Print the current working directory std::cout << "Current working directory: " << currentPath << std::endl; boost::property_tree::ptree root; boost::property_tree::ptree detectionObjects; boost::property_tree::ptree pt; ANSCENTER::ANSALPR* infHandle = new ANSCENTER::ANSALPR_CPU(); std::string licenseKey = ""; std::string modelZipFile = currentPath.string() + "\\ANS_GenericALPR_v1.0.zip"; modelZipFile = "C:\\ProgramData\\ANSCENTER\\Shared\\ANS_GenericALPR_v1.0.zip"; std::string videoFilePath = "C:\\Programs\\DemoAssets\\Videos\\ALRP\\ALPR1.mp4"; std::string lpnResult; bool result = infHandle->Initialize(licenseKey, modelZipFile, "",0.5, 0.5); std::cout << "Loading ANSLRP:" << result << std::endl; cv::VideoCapture capture(videoFilePath); if (!capture.isOpened()) { printf("could not read this video file...\n"); return -1; } while (true) { cv::Mat frame; if (!capture.read(frame)) // if not success, break loop { std::cout << "\n Cannot read the video file. please check your video.\n"; break; } auto start = std::chrono::system_clock::now(); infHandle->Inference(frame, lpnResult); std::string detectionResult = lpnResult; std::cout << "Result:" << detectionResult; if (!detectionResult.empty()) { pt.clear(); std::stringstream ss; ss.clear(); ss << detectionResult; boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results")) { const boost::property_tree::ptree& result = child.second; const auto class_id = GetData(result, "class_id"); const auto class_name = GetData(result, "class_name"); const auto x = GetData(result, "x"); const auto y = GetData(result, "y"); const auto width = GetData(result, "width"); const auto height = GetData(result, "height"); cv::rectangle(frame, cv::Rect(x, y, width, height), 123, 2); cv::putText(frame, cv::format("%s", class_name), cv::Point(x, y - 5), 0, 0.6, cv::Scalar(0, 0, 255), 1, cv::LINE_AA); } } auto end = std::chrono::system_clock::now(); auto elapsed = std::chrono::duration_cast(end - start); printf("Time = %lld ms\n", static_cast(elapsed.count())); cv::namedWindow("ANSLPR", cv::WINDOW_AUTOSIZE); cv::imshow("ANSLPR", frame); if (cv::waitKey(30) == 27) // Wait for 'esc' key press to exit { std::cout << "End of program faces.\n"; } } capture.release(); cv::destroyAllWindows(); } int ANSLPR_BigSize_VideoTest() { // Get the current working directory std::filesystem::path currentPath = std::filesystem::current_path(); // Print the current working directory std::cout << "Current working directory: " << currentPath << std::endl; boost::property_tree::ptree root; boost::property_tree::ptree detectionObjects; boost::property_tree::ptree pt; ANSCENTER::ANSALPR* infHandle = new ANSCENTER::ANSALPR_CPU(); std::string licenseKey = ""; std::string modelZipFile = currentPath.string() + "\\ANS_GenericALPR_v1.0.zip"; modelZipFile = "C:\\ProgramData\\ANSCENTER\\Shared\\ANS_GenericALPR_v1.0.zip"; std::string videoFilePath = "C:\\Programs\\DemoAssets\\Videos\\ALRP\\3725.mp4"; std::string lpnResult; bool result = infHandle->Initialize(licenseKey, modelZipFile, "",0.5,0.5); std::cout << "Loading ANSLRP:" << result << std::endl; infHandle->LoadEngine(); cv::VideoCapture capture(videoFilePath); if (!capture.isOpened()) { printf("could not read this video file...\n"); return -1; } while (true) { cv::Mat frame; if (!capture.read(frame)) // if not success, break loop { std::cout << "\n Cannot read the video file. please check your video.\n"; break; } auto start = std::chrono::system_clock::now(); infHandle->Inference(frame, lpnResult,"MyCam"); std::string detectionResult = lpnResult; std::cout << "Result:" << detectionResult; if (!detectionResult.empty()) { pt.clear(); std::stringstream ss; ss.clear(); ss << detectionResult; boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results")) { const boost::property_tree::ptree& result = child.second; const auto class_id = GetData(result, "class_id"); const auto class_name = GetData(result, "class_name"); const auto x = GetData(result, "x"); const auto y = GetData(result, "y"); const auto width = GetData(result, "width"); const auto height = GetData(result, "height"); cv::rectangle(frame, cv::Rect(x, y, width, height), 123, 2); cv::putText(frame, cv::format("%s", class_name), cv::Point(x, y - 5), 0, 1.2, cv::Scalar(0, 0, 255), 1, cv::LINE_AA); } } auto end = std::chrono::system_clock::now(); auto elapsed = std::chrono::duration_cast(end - start); printf("Time = %lld ms\n", static_cast(elapsed.count())); cv::resize(frame, frame, cv::Size(frame.cols / 2, frame.rows / 2)); // to half size or even smaller cv::namedWindow("ANSLPR", cv::WINDOW_AUTOSIZE); cv::imshow("ANSLPR", frame); if (cv::waitKey(30) == 27) // Wait for 'esc' key press to exit { std::cout << "End of program faces.\n"; } } capture.release(); cv::destroyAllWindows(); } std::string readJsonFile(const std::string& filePath) { boost::property_tree::ptree pt; boost::property_tree::read_json(filePath, pt); std::ostringstream oss; boost::property_tree::write_json(oss, pt, false); return oss.str(); } unsigned char* CVMatToBytes(cv::Mat image, unsigned int& bufferLengh) { int size = int(image.total() * image.elemSize()); std::cout << "size:" << size << std::endl; unsigned char* bytes = new unsigned char[size]; // you will have to delete[] that later std::memcpy(bytes, image.data, size * sizeof(unsigned char)); bufferLengh = size * sizeof(unsigned char); return bytes; } int ANSLPR_CPU_Inferences_FileTest() { // Get the current working directory std::filesystem::path currentPath = std::filesystem::current_path(); // Print the current working directory std::cout << "Current working directory: " << currentPath << std::endl; boost::property_tree::ptree root; boost::property_tree::ptree detectionObjects; boost::property_tree::ptree pt; ANSCENTER::ANSALPR* infHandle; std::string licenseKey = ""; std::string modelZipFile = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_GenericALPR_v1.0.zip"; std::string imageFilePath = "C:\\Projects\\ANSVIS\\Documentation\\TestImages\\ALPR\\LP1.jpg"; std::string StrBox = readJsonFile("C:\\Projects\\ANLS\\Documents\\bboxStr.json"); int result = CreateANSALPRHandle(& infHandle, "", modelZipFile.c_str(), "",0,0.5,0.5,0); std::cout << "Init Result:" << result << std::endl; unsigned int bufferLength = 0; cv::Mat input = cv::imread(imageFilePath, cv::IMREAD_COLOR); cv::Mat frame = input; unsigned char* jpeg_string = CVMatToBytes(frame, bufferLength); int height = frame.rows; int width = frame.cols; auto start = std::chrono::system_clock::now(); std::string detectionResult = ANSALPR_RunInferenceBinaryInCroppedImages(&infHandle, jpeg_string, width, height, StrBox.c_str()); std::cout << "Result:" << detectionResult; if (!detectionResult.empty()) { pt.clear(); std::stringstream ss; ss.clear(); ss << detectionResult; boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results")) { const boost::property_tree::ptree& result = child.second; const auto class_id = GetData(result, "class_id"); const auto class_name = GetData(result, "class_name"); const auto x = GetData(result, "x"); const auto y = GetData(result, "y"); const auto width = GetData(result, "width"); const auto height = GetData(result, "height"); cv::rectangle(frame, cv::Rect(x, y, width, height), 123, 2); cv::putText(frame, cv::format("%s:%d", class_name, class_id), cv::Point(x, y - 5), 0, 0.6, cv::Scalar(0, 0, 255), 1, cv::LINE_AA); } } auto end = std::chrono::system_clock::now(); auto elapsed = std::chrono::duration_cast(end - start); printf("Time = %lld ms\n", static_cast(elapsed.count())); //cv::namedWindow("ANSLPR", cv::WINDOW_AUTOSIZE); //cv::imshow("ANSLPR", frame); //cv::waitKey(0); //cv::destroyAllWindows(); ReleaseANSALPRHandle(&infHandle); } int ANSLPR_CV_VideoTest() { // Get the current working directory std::filesystem::path currentPath = std::filesystem::current_path(); // Print the current working directory std::cout << "Current working directory: " << currentPath << std::endl; boost::property_tree::ptree root; boost::property_tree::ptree detectionObjects; boost::property_tree::ptree pt; ANSCENTER::ANSALPR* infHandle; std::string licenseKey = ""; std::string modelZipFile = currentPath.string() + "\\ANS_GenericALPR_v1.0.zip"; modelZipFile = "C:\\Programs\\DemoAssets\\ModelsForANSVIS\\ANS_GenericALPR_v1.1.zip"; std::string videoFilePath = "C:\\Programs\\DemoAssets\\Videos\\ALRP\\3725.mp4"; std::string lpnResult; int result = CreateANSALPRHandle(& infHandle, licenseKey.c_str(), modelZipFile.c_str(), "",0,0.5,0.5,0); std::cout << "Loading ANSLRP:" << result << std::endl; cv::VideoCapture capture(videoFilePath); if (!capture.isOpened()) { printf("could not read this video file...\n"); return -1; } while (true) { cv::Mat frame; if (!capture.read(frame)) // if not success, break loop { std::cout << "\n Cannot read the video file. please check your video.\n"; break; } auto start = std::chrono::system_clock::now(); std::string jpegImage; cv::Mat* image = nullptr; // ✅ Use a pointer to hold the allocated image image = new cv::Mat(frame); // ✅ Allocate the image ANSALPR_RunInferenceComplete_CPP(& infHandle, &image, "MyCam", 0, 0, lpnResult, jpegImage); std::string detectionResult = lpnResult; std::cout << "Result:" << detectionResult; if (!detectionResult.empty()) { pt.clear(); std::stringstream ss; ss.clear(); ss << detectionResult; boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results")) { const boost::property_tree::ptree& result = child.second; const auto class_id = GetData(result, "class_id"); const auto class_name = GetData(result, "class_name"); const auto x = GetData(result, "x"); const auto y = GetData(result, "y"); const auto width = GetData(result, "width"); const auto height = GetData(result, "height"); cv::rectangle(frame, cv::Rect(x, y, width, height), 123, 2); cv::putText(frame, cv::format("%s", class_name), cv::Point(x, y - 5), 0, 1.2, cv::Scalar(0, 0, 255), 1, cv::LINE_AA); } } auto end = std::chrono::system_clock::now(); auto elapsed = std::chrono::duration_cast(end - start); printf("Time = %lld ms\n", static_cast(elapsed.count())); cv::resize(frame, frame, cv::Size(1920,1080)); // to half size or even smaller delete image; cv::namedWindow("ANSLPR", cv::WINDOW_AUTOSIZE); cv::imshow("ANSLPR", frame); if (cv::waitKey(30) == 27) // Wait for 'esc' key press to exit { std::cout << "End of program faces.\n"; } } capture.release(); cv::destroyAllWindows(); ReleaseANSALPRHandle(&infHandle); } int ANSLPR_OD_VideoTest() { // Get the current working directory std::filesystem::path currentPath = std::filesystem::current_path(); // Print the current working directory std::cout << "Current working directory: " << currentPath << std::endl; boost::property_tree::ptree root; boost::property_tree::ptree detectionObjects; boost::property_tree::ptree pt; ANSCENTER::ANSALPR* infHandle; std::string licenseKey = ""; std::string modelZipFile ="C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_ALPR_v1.2.zip";// "C:\\Projects\\ANSVIS\\Models\\ANS_ALPR_v1.2.zip";// "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ServerOptimised\\ANS_ALPR_v1.1_NVIDIAGeForceRTX4070LaptopGPU.zip"; std::string videoFilePath = "C:\\ProgramData\\ANSCENTER\\Shared\\day.mp4";//"E:\\Programs\\DemoAssets\\Videos\\ALRP\\PMH\\Day\\day.mp4";// std::string lpnResult; int engineType = 1; double detectionThreshold = 0.5; double ocrThreshold = 0.5; double detectionColourThreshold = 0.5; int result = CreateANSALPRHandle(&infHandle, licenseKey.c_str(), modelZipFile.c_str(), "", engineType, detectionThreshold, ocrThreshold, detectionColourThreshold); std::cout << "Loading ANSLRP:" << result << std::endl; int loadEngine = LoadANSALPREngineHandle(&infHandle); std::cout << "Loading ANSLRP:" << loadEngine << std::endl; cv::VideoCapture capture(videoFilePath); if (!capture.isOpened()) { printf("could not read this video file...\n"); return -1; } while (true) { cv::Mat frame; if (!capture.read(frame)) // if not success, break loop { std::cout << "\n Cannot read the video file. please check your video.\n"; break; } auto start = std::chrono::system_clock::now(); std::string jpegImage; cv::Mat* image = nullptr; // ✅ Use a pointer to hold the allocated image image = new cv::Mat(frame); // ✅ Allocate the image ANSALPR_RunInferenceComplete_CPP(&infHandle, &image, "MyCam", 0, 0, lpnResult, jpegImage); auto end = std::chrono::system_clock::now(); auto elapsed = std::chrono::duration_cast(end - start); printf("Time = %lld ms\n", static_cast(elapsed.count())); std::string detectionResult = lpnResult; //std::cout << "Result:" << detectionResult; if (!detectionResult.empty()) { pt.clear(); std::stringstream ss; ss.clear(); ss << detectionResult; boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results")) { const boost::property_tree::ptree& result = child.second; const auto class_id = GetData(result, "class_id"); const auto class_name = GetData(result, "class_name"); const auto x = GetData(result, "x"); const auto y = GetData(result, "y"); const auto width = GetData(result, "width"); const auto height = GetData(result, "height"); cv::rectangle(frame, cv::Rect(x, y, width, height), 123, 2); cv::putText(frame, cv::format("%s", class_name), cv::Point(x, y - 5), 0, 1.2, cv::Scalar(0, 0, 255), 1, cv::LINE_AA); } } cv::resize(frame, frame, cv::Size(1920, 1080)); // to half size or even smaller delete image; cv::namedWindow("ANSLPR", cv::WINDOW_AUTOSIZE); cv::imshow("ANSLPR", frame); if (cv::waitKey(30) == 27) // Wait for 'esc' key press to exit { std::cout << "End of program faces.\n"; } } capture.release(); cv::destroyAllWindows(); ReleaseANSALPRHandle(&infHandle); } int ANSLPR_OD_Inferences_FileTest() { // Get the current working directory std::filesystem::path currentPath = std::filesystem::current_path(); // Print the current working directory std::cout << "Current working directory: " << currentPath << std::endl; boost::property_tree::ptree root; boost::property_tree::ptree detectionObjects; boost::property_tree::ptree pt; ANSCENTER::ANSALPR* infHandle; std::string licenseKey = ""; std::string modelZipFile = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ServerOptimised\\ANS_ALPR_v1.2_NVIDIAGeForceRTX4070LaptopGPU.zip"; std::string imageFilePath = "E:\\Programs\\DemoAssets\\Images\\ALPRTest\\WrongOrder\\1109.jpg";//20250912_213850.717.jpg; 20250912_213850.511.jpg;//20250912_213850.411.jpg;//20250912_213850.261.jpg(65H115912:0.73) cororect (20250912_213850.071.jpg: 65H115833) std::string lpnResult; int engineType = 1; double detectionThreshold = 0.3; double ocrThreshold = 0.5; double colourThreshold = 0.5; int result = CreateANSALPRHandle(&infHandle, licenseKey.c_str(), modelZipFile.c_str(), "", engineType, detectionThreshold, ocrThreshold, colourThreshold); std::cout << "Loading ANSLRP:" << result << std::endl; auto start = std::chrono::system_clock::now(); int loadEngine = LoadANSALPREngineHandle(&infHandle); std::cout << "Init Result:" << result << std::endl; auto end = std::chrono::system_clock::now(); auto elapsed = std::chrono::duration_cast(end - start); printf("Time to load engine = %lld ms\n", static_cast(elapsed.count())); unsigned int bufferLength = 0; std::string jpegImage; cv::Mat input = cv::imread(imageFilePath, cv::IMREAD_COLOR); cv::Mat* image = nullptr; // ✅ Use a pointer to hold the allocated image image = new cv::Mat(input); // ✅ Allocate the image ANSALPR_RunInferenceComplete_CPP(&infHandle, &image, "MyCam", 0, 0, lpnResult, jpegImage); std::string detectionResult = lpnResult; std::cout << "Result:" << detectionResult; if (!detectionResult.empty()) { pt.clear(); std::stringstream ss; ss.clear(); ss << detectionResult; boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results")) { const boost::property_tree::ptree& result = child.second; const auto class_id = GetData(result, "class_id"); const auto class_name = GetData(result, "class_name"); const auto x = GetData(result, "x"); const auto y = GetData(result, "y"); const auto width = GetData(result, "width"); const auto height = GetData(result, "height"); cv::rectangle(input, cv::Rect(x, y, width, height), 123, 2); cv::putText(input, cv::format("%s", class_name), cv::Point(x, y - 5), 0, 1.2, cv::Scalar(0, 0, 255), 1, cv::LINE_AA); } } delete image; cv::resize(input, input, cv::Size(1920, 1080)); // to half size or even smaller cv::namedWindow("ANSLPR", cv::WINDOW_AUTOSIZE); cv::imshow("ANSLPR", input); cv::waitKey(0); cv::destroyAllWindows(); ReleaseANSALPRHandle(&infHandle); return 0; } int ANSLPR_OD_INDOInferences_FileTest() { // Get the current working directory std::filesystem::path currentPath = std::filesystem::current_path(); // Print the current working directory std::cout << "Current working directory: " << currentPath << std::endl; boost::property_tree::ptree root; boost::property_tree::ptree detectionObjects; boost::property_tree::ptree pt; ANSCENTER::ANSALPR* infHandle; std::string licenseKey = ""; std::string modelZipFile = "E:\\Programs\\DemoAssets\\ModelsForANSVIS\\ANS_ALPR_IND_v1.1.zip"; std::string imageFilePath = "E:\\Programs\\TrainingWorkingStation\\IndoALPR\\Indonesian License Plate Dataset\\data\\train075.jpg";//20250912_213850.717.jpg; 20250912_213850.511.jpg;//20250912_213850.411.jpg;//20250912_213850.261.jpg(65H115912:0.73) cororect (20250912_213850.071.jpg: 65H115833) std::string lpnResult; int engineType = 1; double detectionThreshold = 0.3; double ocrThreshold = 0.5; int result = CreateANSALPRHandle(&infHandle, licenseKey.c_str(), modelZipFile.c_str(), "", engineType, detectionThreshold, ocrThreshold, 0.5); std::cout << "Loading ANSLRP:" << result << std::endl; int loadEngine = LoadANSALPREngineHandle(&infHandle); std::cout << "Init Result:" << result << std::endl; unsigned int bufferLength = 0; std::string jpegImage; cv::Mat input = cv::imread(imageFilePath, cv::IMREAD_COLOR); cv::Mat* image = nullptr; // ✅ Use a pointer to hold the allocated image image = new cv::Mat(input); // ✅ Allocate the image ANSALPR_RunInferenceComplete_CPP(&infHandle, &image, "MyCam", 0, 0, lpnResult, jpegImage); std::string detectionResult = lpnResult; std::cout << "Result:" << detectionResult; if (!detectionResult.empty()) { pt.clear(); std::stringstream ss; ss.clear(); ss << detectionResult; boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results")) { const boost::property_tree::ptree& result = child.second; const auto class_id = GetData(result, "class_id"); const auto class_name = GetData(result, "class_name"); const auto x = GetData(result, "x"); const auto y = GetData(result, "y"); const auto width = GetData(result, "width"); const auto height = GetData(result, "height"); cv::rectangle(input, cv::Rect(x, y, width, height), 123, 2); cv::putText(input, cv::format("%s", class_name), cv::Point(x, y - 5), 0, 1.2, cv::Scalar(0, 0, 255), 1, cv::LINE_AA); } } auto end = std::chrono::system_clock::now(); delete image; cv::resize(input, input, cv::Size(1920, 1080)); // to half size or even smaller cv::namedWindow("ANSLPR", cv::WINDOW_AUTOSIZE); cv::imshow("ANSLPR", input); cv::waitKey(0); cv::destroyAllWindows(); ReleaseANSALPRHandle(&infHandle); return 0; } // ============================================================================ // Multi-GPU ALPR Stress Test — 4 parallel RTSP→ALPR tasks // // Purpose: Diagnose why dual RTX 5080 performs worse than single RTX 3050. // Each task has its own RTSP reader + ALPR engine. Tasks 0-1 read stream A, // tasks 2-3 read stream B. All 4 run in parallel threads. // // The display composites all 4 views into a single resizable window with a // log panel at the bottom showing per-task stats and GPU diagnostics. // ============================================================================ // Thread-safe logger: collects timestamped messages for on-screen log + file static const char* LOG_FILE_PATH = "C:\\Temp\\ALPRdebug.txt"; class ThreadSafeLog { public: void init() { std::lock_guard lk(m_mtx); m_file.open(LOG_FILE_PATH, std::ios::out | std::ios::trunc); if (m_file.is_open()) { auto now = std::chrono::system_clock::now(); auto t = std::chrono::system_clock::to_time_t(now); char timeBuf[64]; struct tm lt; localtime_s(<, &t); strftime(timeBuf, sizeof(timeBuf), "%Y-%m-%d %H:%M:%S", <); m_file << "================================================================\n"; m_file << " ANSLPR Multi-GPU Stress Test Debug Log\n"; m_file << " Started: " << timeBuf << "\n"; m_file << " Log file: " << LOG_FILE_PATH << "\n"; m_file << "================================================================\n\n"; m_file.flush(); } } void add(const std::string& msg) { std::lock_guard lk(m_mtx); // Full timestamp for file: HH:MM:SS.mmm auto now = std::chrono::system_clock::now(); auto t = std::chrono::system_clock::to_time_t(now); auto ms = std::chrono::duration_cast( now.time_since_epoch()).count() % 1000; struct tm lt; localtime_s(<, &t); char ts[32]; snprintf(ts, sizeof(ts), "[%02d:%02d:%02d.%03lld] ", lt.tm_hour, lt.tm_min, lt.tm_sec, static_cast(ms)); std::string line = std::string(ts) + msg; m_lines.push_back(line); if (m_lines.size() > 200) m_lines.pop_front(); // Write to file immediately (flush so user can read mid-run) if (m_file.is_open()) { m_file << line << "\n"; m_file.flush(); } } std::deque getRecent(size_t n) { std::lock_guard lk(m_mtx); size_t start = (m_lines.size() > n) ? m_lines.size() - n : 0; return std::deque(m_lines.begin() + start, m_lines.end()); } void close() { std::lock_guard lk(m_mtx); if (m_file.is_open()) m_file.close(); } private: std::mutex m_mtx; std::deque m_lines; std::ofstream m_file; }; // Per-task shared state (written by worker thread, read by display thread) struct TaskState { std::mutex mtx; cv::Mat displayFrame; // latest frame with detections drawn double fps = 0.0; double inferenceMs = 0.0; int frameCount = 0; int detectionCount= 0; std::string lastPlate; bool engineLoaded = false; bool streamOk = false; std::string statusMsg = "Initializing..."; // GPU resource tracking (set during engine load) int gpuDeviceId = -1; // which GPU this task's engine landed on size_t vramUsedBytes = 0; // VRAM consumed by this task's engine // Grab/Inference timing (updated by worker thread) double lastGrabMs = 0.0; double lastInfMs = 0.0; }; // Snapshot of GPU state for real-time monitoring struct GpuSnapshot { int deviceId = 0; std::string name; size_t totalMiB = 0; size_t freeMiB = 0; size_t usedMiB = 0; }; // Safe check: is CUDA runtime available? (prevents crash on CPU-only PCs) static bool IsCudaAvailable() { static int cached = -1; if (cached < 0) { HMODULE h = LoadLibraryA("nvcuda.dll"); cached = (h != nullptr) ? 1 : 0; if (h) FreeLibrary(h); } return cached == 1; } // Query current GPU VRAM usage for all devices static std::vector QueryGpuVram() { std::vector snapshots; if (!IsCudaAvailable()) return snapshots; int deviceCount = 0; if (cudaGetDeviceCount(&deviceCount) != cudaSuccess) return snapshots; for (int i = 0; i < deviceCount; i++) { cudaDeviceProp prop; cudaGetDeviceProperties(&prop, i); int prevDevice; cudaGetDevice(&prevDevice); cudaSetDevice(i); size_t freeMem = 0, totalMem = 0; cudaMemGetInfo(&freeMem, &totalMem); cudaSetDevice(prevDevice); GpuSnapshot s; s.deviceId = i; s.name = prop.name; s.totalMiB = totalMem / (1024 * 1024); s.freeMiB = freeMem / (1024 * 1024); s.usedMiB = s.totalMiB - s.freeMiB; snapshots.push_back(s); } return snapshots; } // Measure per-GPU free VRAM (returns array indexed by device) static std::vector GetPerGpuFreeMiB() { std::vector result; if (!IsCudaAvailable()) return result; int deviceCount = 0; if (cudaGetDeviceCount(&deviceCount) != cudaSuccess) return result; int prevDevice; cudaGetDevice(&prevDevice); for (int i = 0; i < deviceCount; i++) { cudaSetDevice(i); size_t freeMem = 0, totalMem = 0; cudaMemGetInfo(&freeMem, &totalMem); result.push_back(freeMem / (1024 * 1024)); } cudaSetDevice(prevDevice); return result; } static std::atomic g_running{true}; static ThreadSafeLog g_log; // Log GPU info using CUDA runtime static void LogGpuInfo() { if (!IsCudaAvailable()) { g_log.add("No NVIDIA GPU detected — running in CPU mode"); printf("[GPU] No NVIDIA GPU detected — running in CPU mode\n"); return; } int deviceCount = 0; cudaError_t err = cudaGetDeviceCount(&deviceCount); if (err != cudaSuccess) { g_log.add("CUDA ERROR: cudaGetDeviceCount failed: " + std::string(cudaGetErrorString(err))); printf("[GPU] CUDA ERROR: %s\n", cudaGetErrorString(err)); return; } printf("============================================================\n"); printf(" GPU DEVICE REPORT — %d device(s) detected\n", deviceCount); printf("============================================================\n"); g_log.add("GPU DEVICE REPORT: " + std::to_string(deviceCount) + " device(s)"); for (int i = 0; i < deviceCount; i++) { cudaDeviceProp prop; cudaGetDeviceProperties(&prop, i); size_t freeMem = 0, totalMem = 0; cudaSetDevice(i); cudaMemGetInfo(&freeMem, &totalMem); char buf[512]; snprintf(buf, sizeof(buf), " GPU[%d] %s | SM %d.%d | VRAM: %.0f MiB total, %.0f MiB free", i, prop.name, prop.major, prop.minor, totalMem / 1048576.0, freeMem / 1048576.0); printf("%s\n", buf); g_log.add(buf); snprintf(buf, sizeof(buf), " GPU[%d] PCIe Bus %d, Device %d | Async Engines: %d | Concurrent Kernels: %d", i, prop.pciBusID, prop.pciDeviceID, prop.asyncEngineCount, prop.concurrentKernels); printf("%s\n", buf); g_log.add(buf); } printf("============================================================\n"); } // Global inference mutex: serializes inference on non-NVIDIA GPUs (DirectML/OpenVINO). // DirectML is not thread-safe when multiple ORT sessions run concurrently on the // same integrated GPU — causes access violations on 4K frames. // On NVIDIA, each task has its own CUDA context so no serialization needed. static std::mutex g_inferenceMutex; // Worker thread: reads RTSP frames and runs ALPR inference // RTSP client and ALPR engine are pre-created on the main thread to avoid // race conditions in CreateANSRTSPHandle / CreateANSALPRHandle. // Takes rtspClientPtr (pointer to array slot) + streamGuard mutex so the // CHAOS thread can safely destroy+recreate the stream without use-after-free. static void ALPRWorkerThread(int taskId, ANSCENTER::ANSRTSPClient** rtspClientPtr, std::mutex* streamGuard, ANSCENTER::ANSALPR* alprHandle, TaskState& state) { char tag[32]; snprintf(tag, sizeof(tag), "[Task%d]", taskId); std::string prefix(tag); g_log.add(prefix + " Worker thread started"); printf("%s Worker thread started\n", tag); // --- Main inference loop --- int width = 0, height = 0; int64_t pts = 0; int emptyFrames = 0; std::string cameraId = "Cam" + std::to_string(taskId); // FPS tracking with sliding window std::deque fpsTimestamps; // Timing accumulators for periodic benchmarking double totalGrabMs = 0, totalInfMs = 0; int grabCount = 0, infCount = 0; double maxGrabMs = 0, maxInfMs = 0; auto benchStart = std::chrono::steady_clock::now(); bool hwDecodeLogged = false; while (g_running.load()) { // Lock the stream guard to prevent CHAOS from destroying the client // while we're mid-frame-grab or mid-inference. std::unique_lock streamLock(*streamGuard); // Re-read the client pointer each iteration — CHAOS may have // destroyed+recreated it, so our old pointer could be dangling. ANSCENTER::ANSRTSPClient* rtspClient = *rtspClientPtr; if (rtspClient == nullptr) { streamLock.unlock(); emptyFrames++; if (emptyFrames % 100 == 1) { g_log.add(prefix + " Stream destroyed by CHAOS, waiting... (count=" + std::to_string(emptyFrames) + ")"); } std::this_thread::sleep_for(std::chrono::milliseconds(50)); continue; } // Read frame from RTSP via ANSCV auto grabStart = std::chrono::steady_clock::now(); cv::Mat* framePtr = nullptr; GetRTSPCVImage(&rtspClient, width, height, pts, &framePtr); auto grabEnd = std::chrono::steady_clock::now(); double grabMs = std::chrono::duration(grabEnd - grabStart).count(); if (framePtr == nullptr || framePtr->empty()) { emptyFrames++; if (emptyFrames % 100 == 1) { g_log.add(prefix + " Empty frame (count=" + std::to_string(emptyFrames) + ")"); } if (emptyFrames > 300) { g_log.add(prefix + " Too many empty frames (reconnect disabled for long test)"); // ReconnectRTSP(&rtspClient); // Disabled for VRAM stability testing emptyFrames = 0; } streamLock.unlock(); if (framePtr) delete framePtr; std::this_thread::sleep_for(std::chrono::milliseconds(10)); continue; } emptyFrames = 0; // Log HW decode status once after first successful frame if (!hwDecodeLogged) { hwDecodeLogged = true; int hwActive = rtspClient->IsHWDecodingActive() ? 1 : 0; bool isCuda = rtspClient->IsCudaHWAccel(); int hwGpu = rtspClient->GetHWDecodingGpuIndex(); char hwBuf[256]; const char* hwType = !hwActive ? "INACTIVE (software/CPU)" : isCuda ? "ACTIVE (CUDA/NVDEC zero-copy)" : "ACTIVE (D3D11VA/NVDEC cpu-nv12)"; snprintf(hwBuf, sizeof(hwBuf), "%s HW Decode: %s (GPU index: %d)", tag, hwType, hwGpu); g_log.add(hwBuf); printf("%s\n", hwBuf); } totalGrabMs += grabMs; grabCount++; if (grabMs > maxGrabMs) maxGrabMs = grabMs; // Run ALPR inference bool isNvidia = (ANSCENTER::EPLoader::Current().type == ANSCENTER::EngineType::NVIDIA_GPU); fprintf(stderr, "[Worker T%d] frame %d: calling inference %dx%d...\n", taskId, state.frameCount + 1, framePtr->cols, framePtr->rows); auto infStart = std::chrono::steady_clock::now(); std::string lpnResult, jpegImage; { std::unique_lock infLock(g_inferenceMutex, std::defer_lock); if (!isNvidia) infLock.lock(); ANSALPR_RunInferenceComplete_CPP(&alprHandle, &framePtr, cameraId.c_str(), 0, 0, lpnResult, jpegImage); } fprintf(stderr, "[Worker T%d] frame %d: inference done, result len=%zu\n", taskId, state.frameCount + 1, lpnResult.size()); // Release stream lock — inference is done, CHAOS can now safely destroy. streamLock.unlock(); auto infEnd = std::chrono::steady_clock::now(); double infMs = std::chrono::duration(infEnd - infStart).count(); totalInfMs += infMs; infCount++; if (infMs > maxInfMs) maxInfMs = infMs; // Parse detections and draw on frame cv::Mat display = framePtr->clone(); int detCount = 0; std::string lastPlateText; if (!lpnResult.empty()) { try { boost::property_tree::ptree pt; std::stringstream ss(lpnResult); boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type& child, pt.get_child("results")) { const boost::property_tree::ptree& det = child.second; const auto class_name = GetData(det, "class_name"); const auto x = GetData(det, "x"); const auto y = GetData(det, "y"); const auto w = GetData(det, "width"); const auto h = GetData(det, "height"); cv::rectangle(display, cv::Rect((int)x, (int)y, (int)w, (int)h), cv::Scalar(0, 255, 0), 2); cv::putText(display, class_name, cv::Point((int)x, (int)y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.7, cv::Scalar(0, 255, 0), 2); lastPlateText = class_name; detCount++; } } catch (...) {} } // Update FPS (sliding window over last 2 seconds) auto now = std::chrono::steady_clock::now(); fpsTimestamps.push_back(now); while (!fpsTimestamps.empty() && std::chrono::duration(now - fpsTimestamps.front()).count() > 2.0) { fpsTimestamps.pop_front(); } double fps = fpsTimestamps.size() / 2.0; // Draw OSD on frame char osd[128]; snprintf(osd, sizeof(osd), "Task%d | %.1f FPS | Inf: %.0f ms | #%d", taskId, fps, infMs, state.frameCount + 1); cv::putText(display, osd, cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 0.7, cv::Scalar(0, 255, 255), 2); // Update shared state { std::lock_guard lk(state.mtx); state.displayFrame = display; state.fps = fps; state.inferenceMs = infMs; state.lastGrabMs = grabMs; state.lastInfMs = infMs; state.frameCount++; state.detectionCount += detCount; if (!lastPlateText.empty()) state.lastPlate = lastPlateText; } // Periodic logging (every 100 frames) if ((state.frameCount % 100) == 0) { double avgGrab = grabCount > 0 ? totalGrabMs / grabCount : 0; double avgInf = infCount > 0 ? totalInfMs / infCount : 0; double elapsed = std::chrono::duration( std::chrono::steady_clock::now() - benchStart).count(); char buf[512]; snprintf(buf, sizeof(buf), "%s Frame %d | FPS=%.1f | Grab: avg=%.1fms max=%.0fms | Inf: avg=%.1fms max=%.0fms | " "GrabPct=%.0f%% InfPct=%.0f%% | Det=%d", tag, state.frameCount, fps, avgGrab, maxGrabMs, avgInf, maxInfMs, (totalGrabMs / (elapsed * 1000.0)) * 100.0, (totalInfMs / (elapsed * 1000.0)) * 100.0, state.detectionCount); g_log.add(buf); printf("%s\n", buf); // Reset accumulators totalGrabMs = totalInfMs = 0; maxGrabMs = maxInfMs = 0; grabCount = infCount = 0; benchStart = std::chrono::steady_clock::now(); } delete framePtr; } g_log.add(prefix + " Worker loop exited"); } // ============================================================================= // ANSLPR_SingleTask_Test — 1 stream, 1 AI task. For isolating DirectML/ORT // issues on non-NVIDIA GPUs. If this works but 2-task crashes, it's concurrency. // ============================================================================= int ANSLPR_SingleTask_Test() { ANSCENTER::ANSOPENCV::InitCameraNetwork(); g_log.init(); printf("\n"); printf("============================================================\n"); printf(" ANSLPR Single-Task Test — 1 Stream, 1 AI Task\n"); printf(" Press ESC to stop\n"); printf(" Log file: %s\n", LOG_FILE_PATH); printf("============================================================\n\n"); g_log.add("============================================================"); g_log.add(" ANSLPR Single-Task Test — 1 Stream, 1 AI Task"); g_log.add("============================================================"); const std::string streamUrl = "rtsp://admin:admin123@103.156.0.133:8010/cam/realmonitor?channel=1&subtype=0"; g_log.add("Stream: " + streamUrl); // --- Create RTSP client --- ANSCENTER::ANSRTSPClient* rtspClient = nullptr; printf("[Stream0] Creating RTSP handle...\n"); int rtspResult = CreateANSRTSPHandle(&rtspClient, "", "", "", streamUrl.c_str()); if (rtspResult != 1 || rtspClient == nullptr) { printf("[Stream0] FAILED to create RTSP handle\n"); ANSCENTER::ANSOPENCV::DeinitCameraNetwork(); return -1; } SetRTSPImageQuality(&rtspClient, 0); SetRTSPHWDecoding(&rtspClient, -1); // Force software decoding StartRTSP(&rtspClient); g_log.add("[Stream0] RTSP started (software decode)"); // --- Create single ALPR engine --- ANSCENTER::ANSALPR* alprHandle = nullptr; std::string modelZipFile = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_ALPR_v1.2.zip"; printf("[Task0] Creating ALPR handle...\n"); auto engineStart = std::chrono::steady_clock::now(); int createResult = CreateANSALPRHandle(&alprHandle, "", modelZipFile.c_str(), "", 1, 0.5, 0.5, 0.5); if (createResult != 1 || alprHandle == nullptr) { printf("[Task0] FAILED to create ALPR handle (result=%d)\n", createResult); StopRTSP(&rtspClient); ReleaseANSRTSPHandle(&rtspClient); ANSCENTER::ANSOPENCV::DeinitCameraNetwork(); return -1; } printf("[Task0] Loading ALPR engine...\n"); int loadResult = LoadANSALPREngineHandle(&alprHandle); auto engineEnd = std::chrono::steady_clock::now(); double loadMs = std::chrono::duration(engineEnd - engineStart).count(); if (loadResult != 1) { printf("[Task0] FAILED to load ALPR engine (result=%d)\n", loadResult); ReleaseANSALPRHandle(&alprHandle); StopRTSP(&rtspClient); ReleaseANSRTSPHandle(&rtspClient); ANSCENTER::ANSOPENCV::DeinitCameraNetwork(); return -1; } printf("[Task0] Engine loaded in %.0f ms\n", loadMs); g_log.add("[Task0] Engine loaded in " + std::to_string((int)loadMs) + " ms"); // --- Single-task worker + display --- TaskState state; state.engineLoaded = true; state.streamOk = true; state.statusMsg = "Running"; std::mutex streamGuard; std::thread worker(ALPRWorkerThread, 0, &rtspClient, &streamGuard, alprHandle, std::ref(state)); const int cellW = 800, cellH = 600; const int logPanelH = 80; std::string windowTitle = "ANSLPR Single-Task Test"; cv::namedWindow(windowTitle, cv::WINDOW_NORMAL); cv::resizeWindow(windowTitle, cellW, cellH + logPanelH); auto testStart = std::chrono::steady_clock::now(); while (g_running.load()) { cv::Mat canvas(cellH + logPanelH, cellW, CV_8UC3, cv::Scalar(30, 30, 30)); cv::Mat cell; double fps = 0, infMs = 0; int fCount = 0, dCount = 0; std::string lastPlate; { std::lock_guard lk(state.mtx); if (!state.displayFrame.empty()) cv::resize(state.displayFrame, cell, cv::Size(cellW, cellH)); fps = state.fps; infMs = state.inferenceMs; fCount = state.frameCount; dCount = state.detectionCount; lastPlate = state.lastPlate; } if (cell.empty()) cell = cv::Mat(cellH, cellW, CV_8UC3, cv::Scalar(40, 40, 40)); cv::rectangle(cell, cv::Rect(0, cellH - 40, cellW, 40), cv::Scalar(0, 0, 0), cv::FILLED); char bar[256]; snprintf(bar, sizeof(bar), "T0 | %.1f FPS | %.0fms | F:%d | D:%d | %s", fps, infMs, fCount, dCount, lastPlate.empty() ? "-" : lastPlate.c_str()); cv::putText(cell, bar, cv::Point(5, cellH - 12), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 1); cell.copyTo(canvas(cv::Rect(0, 0, cellW, cellH))); cv::Mat logPanel = canvas(cv::Rect(0, cellH, cellW, logPanelH)); logPanel.setTo(cv::Scalar(20, 20, 20)); auto elapsed = std::chrono::duration(std::chrono::steady_clock::now() - testStart).count(); char header[256]; snprintf(header, sizeof(header), "Elapsed: %.0fs | 1 camera, 1 AI task | %.1f FPS | Press ESC to stop", elapsed, fps); cv::putText(logPanel, header, cv::Point(10, 20), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(200, 200, 0), 1); cv::imshow(windowTitle, canvas); if (cv::waitKey(30) == 27) { g_log.add("ESC pressed — stopping..."); printf("\nESC pressed — stopping...\n"); g_running.store(false); } } if (worker.joinable()) worker.join(); printf("\n============================================================\n"); printf(" FINAL SUMMARY\n"); printf(" Frames: %d | Detections: %d | FPS: %.1f | InfMs: %.0f\n", state.frameCount, state.detectionCount, state.fps, state.inferenceMs); printf("============================================================\n"); ReleaseANSALPRHandle(&alprHandle); StopRTSP(&rtspClient); ReleaseANSRTSPHandle(&rtspClient); g_log.close(); cv::destroyAllWindows(); ANSCENTER::ANSOPENCV::DeinitCameraNetwork(); return 0; } // ============================================================================= // ANSLPR_CPU_StressTest — Lightweight 2-task stress test for CPU-only PCs // Uses ANSALPR_OD (engineType=1) which auto-falls-back to ONNX Runtime on CPU. // No VRAM tracking, no NVDEC alignment, no chaos thread. // ============================================================================= int ANSLPR_CPU_StressTest() { ANSCENTER::ANSOPENCV::InitCameraNetwork(); g_log.init(); const int NUM_STREAMS = 2; const int NUM_TASKS = 2; printf("\n"); printf("============================================================\n"); printf(" ANSLPR CPU Stress Test — %d Parallel ALPR Tasks\n", NUM_TASKS); printf(" Press ESC to stop\n"); printf(" Log file: %s\n", LOG_FILE_PATH); printf("============================================================\n\n"); g_log.add("============================================================"); g_log.add(" ANSLPR CPU Stress Test — " + std::to_string(NUM_TASKS) + " Tasks"); g_log.add("============================================================"); // --- RTSP URLs (2 camera streams) --- const std::string streamUrls[NUM_STREAMS] = { "rtsp://admin:admin123@103.156.0.133:8010/cam/realmonitor?channel=1&subtype=0", "rtsp://nhathuocngoclinh.zapto.org:600/rtsp/streaming?channel=01&subtype=0" }; const int taskStreamMap[NUM_TASKS] = { 0, 1 }; for (int i = 0; i < NUM_STREAMS; i++) g_log.add("Stream " + std::to_string(i) + ": " + streamUrls[i]); // --- Task states --- TaskState taskStates[NUM_TASKS]; // --- Create RTSP clients (software decoding) --- ANSCENTER::ANSRTSPClient* rtspClients[NUM_STREAMS] = {}; for (int s = 0; s < NUM_STREAMS; s++) { printf("[Stream%d] Creating RTSP handle...\n", s); int result = CreateANSRTSPHandle(&rtspClients[s], "", "", "", streamUrls[s].c_str()); if (result != 1 || rtspClients[s] == nullptr) { printf("[Stream%d] FAILED to create RTSP handle\n", s); g_log.add("[Stream" + std::to_string(s) + "] RTSP create FAILED"); rtspClients[s] = nullptr; continue; } SetRTSPImageQuality(&rtspClients[s], 0); SetRTSPHWDecoding(&rtspClients[s], -1); // HW_DECODING_DISABLE: force software decoding StartRTSP(&rtspClients[s]); g_log.add("[Stream" + std::to_string(s) + "] RTSP started (software decode)"); } // --- Create ALPR engines (engineType=1 → ANSALPR_OD, auto CPU/GPU) --- ANSCENTER::ANSALPR* alprHandles[NUM_TASKS] = {}; std::string modelZipFile = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_ALPR_v1.2.zip"; int engineType = 1; // ANSALPR_OD: auto CPU/GPU double detThresh = 0.5, ocrThresh = 0.5, colThresh = 0.5; for (int i = 0; i < NUM_TASKS; i++) { char tag[32]; snprintf(tag, sizeof(tag), "[Task%d]", i); int streamIdx = taskStreamMap[i]; if (rtspClients[streamIdx] == nullptr) { printf("%s Skipped — Stream%d not available\n", tag, streamIdx); continue; } { std::lock_guard lk(taskStates[i].mtx); taskStates[i].streamOk = true; taskStates[i].statusMsg = "Loading ALPR engine..."; } printf("%s Creating ALPR handle...\n", tag); auto engineStart = std::chrono::steady_clock::now(); int createResult = CreateANSALPRHandle(&alprHandles[i], "", modelZipFile.c_str(), "", engineType, detThresh, ocrThresh, colThresh); if (createResult != 1 || alprHandles[i] == nullptr) { printf("%s FAILED to create ALPR handle (result=%d)\n", tag, createResult); g_log.add(std::string(tag) + " ALPR create FAILED"); continue; } printf("%s Loading ALPR engine...\n", tag); int loadResult = LoadANSALPREngineHandle(&alprHandles[i]); auto engineEnd = std::chrono::steady_clock::now(); double loadMs = std::chrono::duration(engineEnd - engineStart).count(); if (loadResult != 1) { printf("%s FAILED to load ALPR engine (result=%d)\n", tag, loadResult); g_log.add(std::string(tag) + " Engine load FAILED"); ReleaseANSALPRHandle(&alprHandles[i]); alprHandles[i] = nullptr; continue; } char buf[256]; snprintf(buf, sizeof(buf), "%s Engine loaded in %.0f ms (Stream%d)", tag, loadMs, streamIdx); printf("%s\n", buf); g_log.add(buf); { std::lock_guard lk(taskStates[i].mtx); taskStates[i].engineLoaded = true; taskStates[i].statusMsg = "Running"; } } // --- Launch worker threads --- std::mutex streamGuards[NUM_STREAMS]; std::thread workers[NUM_TASKS]; for (int i = 0; i < NUM_TASKS; i++) { int streamIdx = taskStreamMap[i]; if (rtspClients[streamIdx] && alprHandles[i]) { workers[i] = std::thread(ALPRWorkerThread, i, &rtspClients[streamIdx], &streamGuards[streamIdx], alprHandles[i], std::ref(taskStates[i])); } } // --- Display loop --- const int cellW = 640, cellH = 480; const int logPanelH = 120; const int gridCols = 2, gridRows = 1; std::string windowTitle = "ANSLPR CPU Stress Test"; cv::namedWindow(windowTitle, cv::WINDOW_NORMAL); cv::resizeWindow(windowTitle, cellW * gridCols, cellH * gridRows + logPanelH); auto testStart = std::chrono::steady_clock::now(); while (g_running.load()) { cv::Mat canvas(cellH * gridRows + logPanelH, cellW * gridCols, CV_8UC3, cv::Scalar(30, 30, 30)); for (int i = 0; i < NUM_TASKS; i++) { int col = i % gridCols, row = i / gridCols; cv::Rect roi(col * cellW, row * cellH, cellW, cellH); cv::Mat cell; double fps = 0, infMs = 0; int fCount = 0, dCount = 0; std::string statusMsg, lastPlate; bool engineLoaded = false; { std::lock_guard lk(taskStates[i].mtx); if (!taskStates[i].displayFrame.empty()) cv::resize(taskStates[i].displayFrame, cell, cv::Size(cellW, cellH)); fps = taskStates[i].fps; infMs = taskStates[i].inferenceMs; fCount = taskStates[i].frameCount; dCount = taskStates[i].detectionCount; statusMsg = taskStates[i].statusMsg; lastPlate = taskStates[i].lastPlate; engineLoaded = taskStates[i].engineLoaded; } if (cell.empty()) { cell = cv::Mat(cellH, cellW, CV_8UC3, cv::Scalar(40, 40, 40)); cv::putText(cell, "Task " + std::to_string(i) + ": " + statusMsg, cv::Point(20, cellH / 2), cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(100, 100, 255), 2); } // Status bar cv::rectangle(cell, cv::Rect(0, cellH - 40, cellW, 40), cv::Scalar(0, 0, 0), cv::FILLED); char bar[256]; snprintf(bar, sizeof(bar), "T%d(S%d) | %.1f FPS | %.0fms | F:%d | D:%d | %s", i, taskStreamMap[i], fps, infMs, fCount, dCount, lastPlate.empty() ? "-" : lastPlate.c_str()); cv::Scalar barColor = engineLoaded ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 100, 255); cv::putText(cell, bar, cv::Point(5, cellH - 12), cv::FONT_HERSHEY_SIMPLEX, 0.45, barColor, 1); cell.copyTo(canvas(roi)); } // Grid line if (gridCols > 1) cv::line(canvas, cv::Point(cellW, 0), cv::Point(cellW, cellH * gridRows), cv::Scalar(100, 100, 100), 1); // Log panel cv::Rect logRoi(0, cellH * gridRows, cellW * gridCols, logPanelH); cv::Mat logPanel = canvas(logRoi); logPanel.setTo(cv::Scalar(20, 20, 20)); auto elapsed = std::chrono::duration(std::chrono::steady_clock::now() - testStart).count(); double totalFps = 0; for (int i = 0; i < NUM_TASKS; i++) { std::lock_guard lk(taskStates[i].mtx); totalFps += taskStates[i].fps; } char header[256]; snprintf(header, sizeof(header), "Elapsed: %.0fs | %d cameras, %d AI tasks | Total: %.1f FPS | Press ESC to stop", elapsed, NUM_STREAMS, NUM_TASKS, totalFps); cv::putText(logPanel, header, cv::Point(10, 20), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(200, 200, 0), 1); // Per-task summary for (int i = 0; i < NUM_TASKS; i++) { std::lock_guard lk(taskStates[i].mtx); char tLine[256]; snprintf(tLine, sizeof(tLine), "T%d(S%d): FPS=%.1f Inf=%.0fms Frames=%d Det=%d", i, taskStreamMap[i], taskStates[i].fps, taskStates[i].inferenceMs, taskStates[i].frameCount, taskStates[i].detectionCount); cv::putText(logPanel, tLine, cv::Point(10, 42 + i * 18), cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(200, 200, 200), 1); } // Recent log auto recentLogs = g_log.getRecent(3); int logY = 42 + NUM_TASKS * 18 + 5; for (const auto& line : recentLogs) { if (logY > logPanelH - 5) break; std::string display = (line.size() > 120) ? line.substr(0, 117) + "..." : line; cv::putText(logPanel, display, cv::Point(10, logY), cv::FONT_HERSHEY_PLAIN, 1.0, cv::Scalar(140, 140, 140), 1); logY += 15; } cv::imshow(windowTitle, canvas); int key = cv::waitKey(30); if (key == 27) { g_log.add("ESC pressed — stopping..."); printf("\nESC pressed — stopping...\n"); g_running.store(false); } } // --- Wait for workers --- for (int i = 0; i < NUM_TASKS; i++) { if (workers[i].joinable()) workers[i].join(); } // --- Final summary --- double totalElapsed = std::chrono::duration( std::chrono::steady_clock::now() - testStart).count(); printf("\n============================================================\n"); printf(" FINAL SUMMARY (runtime: %.0fs)\n", totalElapsed); printf("============================================================\n"); double totalFpsFinal = 0; for (int i = 0; i < NUM_TASKS; i++) { char buf[256]; snprintf(buf, sizeof(buf), " Task %d (Stream %d): %d frames, %d detections, FPS=%.1f, InfMs=%.0f", i, taskStreamMap[i], taskStates[i].frameCount, taskStates[i].detectionCount, taskStates[i].fps, taskStates[i].inferenceMs); printf("%s\n", buf); g_log.add(buf); totalFpsFinal += taskStates[i].fps; } printf(" Total throughput: %.1f FPS\n", totalFpsFinal); printf("============================================================\n"); // --- Cleanup --- for (int i = 0; i < NUM_TASKS; i++) { if (alprHandles[i]) ReleaseANSALPRHandle(&alprHandles[i]); } for (int s = 0; s < NUM_STREAMS; s++) { if (rtspClients[s]) { StopRTSP(&rtspClients[s]); ReleaseANSRTSPHandle(&rtspClients[s]); } } g_log.close(); cv::destroyAllWindows(); ANSCENTER::ANSOPENCV::DeinitCameraNetwork(); return 0; } int ANSLPR_MultiGPU_StressTest() { ANSCENTER::ANSOPENCV::InitCameraNetwork(); // --- Initialize log file --- g_log.init(); printf("\n"); // --- Auto-detect GPU availability (safe on CPU-only PCs without CUDA runtime) --- int gpuCount = 0; bool hasGpu = false; if (IsCudaAvailable()) { cudaGetDeviceCount(&gpuCount); hasGpu = (gpuCount > 0); } const char* modeStr = hasGpu ? "GPU (NVIDIA CUDA)" : "CPU (Software Decoding)"; printf("\n"); printf("============================================================\n"); printf(" ANSLPR Multi-Engine Stress Test — 5 Parallel ALPR Tasks\n"); printf(" Mode: %s\n", modeStr); printf(" (4 cameras, 5 AI tasks — Task 4 shares Stream 2)\n"); printf(" Press ESC to stop\n"); printf(" Log file: %s\n", LOG_FILE_PATH); printf("============================================================\n\n"); g_log.add("============================================================"); g_log.add(" ANSLPR Multi-Engine Stress Test — 5 Parallel ALPR Tasks"); g_log.add(" Mode: " + std::string(modeStr)); g_log.add("============================================================"); // --- Log GPU info for diagnostics (safe on CPU — prints "no GPU found") --- LogGpuInfo(); // --- RTSP URLs (4 independent camera streams) --- const std::string rtspUrl0 = "rtsp://admin:admin123@103.156.0.133:8010/cam/realmonitor?channel=1&subtype=0"; const std::string rtspUrl1 = "rtsp://cafe2471.ddns.net:600/rtsp/streaming?channel=01&subtype=0"; const std::string rtspUrl2 = "rtsp://nhathuocngoclinh.zapto.org:600/rtsp/streaming?channel=01&subtype=0"; const std::string rtspUrl3 = "rtsp://bnunitttd.ddns.net:554/rtsp/streaming?channel=01&subtype=0"; g_log.add("Stream 0: " + rtspUrl0); g_log.add("Stream 1: " + rtspUrl1); g_log.add("Stream 2: " + rtspUrl2); g_log.add("Stream 3: " + rtspUrl3); // ========================================================================= // Architecture: Camera Process + AI Task Process (mimics LabVIEW) // ----------------------------------------------------------------------- // Camera Process: 4 independent RTSP streams acquire frames from cameras. // AI Task Process: 5 AI tasks subscribe to camera streams and run inference // in parallel. Multiple tasks can share one camera stream. // Task 4 subscribes to Stream 2 (nhathuocngoclinh) to demonstrate the // shared-camera subscription model used in LabVIEW. // ========================================================================= const int NUM_STREAMS = 4; const int NUM_TASKS = 5; // --- Task states --- TaskState taskStates[NUM_TASKS]; // ========================================================================= // CAMERA PROCESS: Create 4 independent RTSP readers (one per camera). // These form the camera acquisition layer that AI tasks subscribe to. // ========================================================================= ANSCENTER::ANSRTSPClient* rtspClients[NUM_STREAMS] = {}; const std::string streamUrls[NUM_STREAMS] = { rtspUrl0, rtspUrl1, rtspUrl2, rtspUrl3 }; // Map: task index -> stream index // Tasks 0-3 map 1:1 to streams 0-3. // Task 4 subscribes to Stream 2 (nhathuocngoclinh) — shared camera. const int taskStreamMap[NUM_TASKS] = { 0, 1, 2, 3, 2 }; // Log task-to-stream subscription mapping g_log.add("--- AI Task -> Camera Stream subscription ---"); for (int i = 0; i < NUM_TASKS; i++) { g_log.add(" Task " + std::to_string(i) + " -> Stream " + std::to_string(taskStreamMap[i]) + " (" + streamUrls[taskStreamMap[i]] + ")"); } for (int s = 0; s < NUM_STREAMS; s++) { printf("[Stream%d] Creating RTSP handle for %s...\n", s, streamUrls[s].c_str()); g_log.add("[Stream" + std::to_string(s) + "] Creating RTSP handle for " + streamUrls[s]); int rtspResult = CreateANSRTSPHandle(&rtspClients[s], "", "", "", streamUrls[s].c_str()); if (rtspResult != 1 || rtspClients[s] == nullptr) { printf("[Stream%d] FAILED to create RTSP handle (result=%d)\n", s, rtspResult); g_log.add("[Stream" + std::to_string(s) + "] RTSP create FAILED"); rtspClients[s] = nullptr; continue; } SetRTSPImageQuality(&rtspClients[s], 0); if (hasGpu) SetRTSPHWDecoding(&rtspClients[s], 7); // CUDA HW decode only with GPU StartRTSP(&rtspClients[s]); g_log.add("[Stream" + std::to_string(s) + "] RTSP started"); } // ========================================================================= // AI TASK PROCESS: Create 5 ALPR engines sequentially. // Each AI task gets its own engine and subscribes to a camera stream. // Task 4 shares Stream 2 (nhathuocngoclinh) with Task 2 — demonstrating // the LabVIEW pattern where multiple AI tasks subscribe to one camera. // ========================================================================= ANSCENTER::ANSALPR* alprHandles[NUM_TASKS] = {}; std::string modelZipFile = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_ALPR_v1.2.zip"; int engineType = 1; // ANSALPR_OD: auto-detects GPU/CPU, uses ONNX Runtime on CPU double detThresh = 0.5, ocrThresh = 0.5, colThresh = 0.5; for (int i = 0; i < NUM_TASKS; i++) { char tag[32]; snprintf(tag, sizeof(tag), "[Task%d]", i); int streamIdx = taskStreamMap[i]; if (rtspClients[streamIdx] == nullptr) { printf("%s Skipped — Stream%d not available\n", tag, streamIdx); std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "Stream not available"; continue; } { std::lock_guard lk(taskStates[i].mtx); taskStates[i].streamOk = true; taskStates[i].statusMsg = "Loading ALPR engine..."; } printf("%s Creating ALPR handle (engineType=%d)...\n", tag, engineType); g_log.add(std::string(tag) + " Creating ALPR handle..."); auto engineStart = std::chrono::steady_clock::now(); int createResult = CreateANSALPRHandle(&alprHandles[i], "", modelZipFile.c_str(), "", engineType, detThresh, ocrThresh, colThresh); if (createResult != 1 || alprHandles[i] == nullptr) { printf("%s FAILED to create ALPR handle (result=%d)\n", tag, createResult); g_log.add(std::string(tag) + " ALPR create FAILED"); std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "ALPR create failed"; continue; } printf("%s Loading ALPR engine (%s)...\n", tag, hasGpu ? "TensorRT" : "CPU"); g_log.add(std::string(tag) + " Loading ALPR engine..."); // Snapshot VRAM before engine load to measure consumption (GPU only) std::vector vramBefore; if (hasGpu) vramBefore = GetPerGpuFreeMiB(); int loadResult = LoadANSALPREngineHandle(&alprHandles[i]); auto engineEnd = std::chrono::steady_clock::now(); double loadMs = std::chrono::duration(engineEnd - engineStart).count(); if (loadResult != 1) { printf("%s FAILED to load ALPR engine (result=%d)\n", tag, loadResult); g_log.add(std::string(tag) + " Engine load FAILED"); ReleaseANSALPRHandle(&alprHandles[i]); alprHandles[i] = nullptr; std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "Engine load failed"; continue; } int bestGpu = -1; size_t maxDelta = 0; if (hasGpu) { // Snapshot VRAM after engine load — find which GPU lost the most VRAM auto vramAfter = GetPerGpuFreeMiB(); size_t gpuCnt = vramBefore.size() < vramAfter.size() ? vramBefore.size() : vramAfter.size(); bestGpu = 0; for (size_t g = 0; g < gpuCnt; g++) { size_t delta = (vramBefore[g] > vramAfter[g]) ? (vramBefore[g] - vramAfter[g]) : 0; if (delta > maxDelta) { maxDelta = delta; bestGpu = (int)g; } } char buf[512]; snprintf(buf, sizeof(buf), "%s Engine loaded in %.0f ms | GPU[%d] | VRAM used: %zu MiB (Stream%d)", tag, loadMs, bestGpu, maxDelta, streamIdx); printf("%s\n", buf); g_log.add(buf); // Log per-GPU VRAM state after this engine load for (size_t g = 0; g < vramAfter.size(); g++) { size_t total = 0; auto gpus = QueryGpuVram(); if (g < gpus.size()) total = gpus[g].totalMiB; char vbuf[256]; snprintf(vbuf, sizeof(vbuf), " GPU[%zu] VRAM: %zu MiB free (of %zu MiB)", g, vramAfter[g], total); printf("%s\n", vbuf); g_log.add(vbuf); } } else { char buf[256]; snprintf(buf, sizeof(buf), "%s Engine loaded in %.0f ms (CPU mode, Stream%d)", tag, loadMs, streamIdx); printf("%s\n", buf); g_log.add(buf); } { std::lock_guard lk(taskStates[i].mtx); taskStates[i].engineLoaded = true; taskStates[i].statusMsg = "Running"; taskStates[i].gpuDeviceId = bestGpu; taskStates[i].vramUsedBytes = maxDelta * 1024 * 1024; } } // --- Align NVDEC decode GPU with inference GPU for NV12 zero-copy --- // (GPU only — software decoding on CPU doesn't use NVDEC) if (hasGpu) // Each stream should decode on the same GPU as its inference engine to enable // direct NVDEC→TensorRT zero-copy (0.5ms vs 17ms preprocess per frame). // // Strategy: For each stream, count how many tasks run on each GPU (vote). // Pick the GPU with the most tasks → maximises the number of NV12 zero-copy hits. // If tied, prefer to keep the current decode GPU to avoid a reconnect. // Additional tie-breaker: distribute streams across GPUs for decode load balance. { int streamPreferredGpu[NUM_STREAMS]; for (int s = 0; s < NUM_STREAMS; s++) streamPreferredGpu[s] = -1; // Track how many streams have already been assigned to each GPU (for tie-breaking) std::map gpuStreamCount; for (int s = 0; s < NUM_STREAMS; s++) { if (!rtspClients[s]) continue; // Count votes: how many tasks on this stream use each GPU std::map gpuVotes; for (int i = 0; i < NUM_TASKS; i++) { if (taskStreamMap[i] == s && alprHandles[i]) { gpuVotes[taskStates[i].gpuDeviceId]++; } } if (gpuVotes.empty()) continue; // Find the GPU with the most votes int currentGpu = rtspClients[s]->GetHWDecodingGpuIndex(); int bestGpu = -1; int bestVotes = 0; for (auto& [gpu, votes] : gpuVotes) { if (votes > bestVotes) { bestVotes = votes; bestGpu = gpu; } else if (votes == bestVotes) { // Tie-break 1: prefer current decode GPU (avoids reconnect) if (gpu == currentGpu && bestGpu != currentGpu) { bestGpu = gpu; } // Tie-break 2: prefer GPU with fewer streams assigned (load balance) else if (bestGpu != currentGpu && gpu != currentGpu) { if (gpuStreamCount[gpu] < gpuStreamCount[bestGpu]) { bestGpu = gpu; } } } } streamPreferredGpu[s] = bestGpu; gpuStreamCount[bestGpu]++; char buf[512]; std::string voteStr; for (auto& [gpu, votes] : gpuVotes) { if (!voteStr.empty()) voteStr += ", "; voteStr += "GPU[" + std::to_string(gpu) + "]=" + std::to_string(votes); } snprintf(buf, sizeof(buf), "[Stream%d] GPU vote: {%s} -> preferred GPU[%d] (current: GPU[%d])", s, voteStr.c_str(), bestGpu, currentGpu); g_log.add(buf); printf("%s\n", buf); } // Apply alignment: reconnect streams whose NVDEC is on the wrong GPU. // IMPORTANT: If currentGpu == -1, the decoder hasn't initialized yet. // Do NOT reconnect — it disrupts the initial RTSP handshake and causes // 80+ seconds of empty frames. Just set preferredGpu; the decoder will // use it when it naturally initializes. for (int s = 0; s < NUM_STREAMS; s++) { if (rtspClients[s] && streamPreferredGpu[s] >= 0) { int currentGpu = rtspClients[s]->GetHWDecodingGpuIndex(); if (currentGpu < 0) { // Decoder not yet initialized — set preferred GPU without reconnect SetRTSPHWDecoding(&rtspClients[s], 7, streamPreferredGpu[s]); char buf[256]; snprintf(buf, sizeof(buf), "[Stream%d] NVDEC not yet initialized (GPU[-1]) -- set preferred GPU[%d] (no reconnect)", s, streamPreferredGpu[s]); g_log.add(buf); printf("%s\n", buf); } else if (currentGpu != streamPreferredGpu[s]) { // Decoder is active on wrong GPU — reconnect disabled for VRAM stability testing SetRTSPHWDecoding(&rtspClients[s], 7, streamPreferredGpu[s]); // ReconnectRTSP(&rtspClients[s]); // Disabled for long test char buf[256]; snprintf(buf, sizeof(buf), "[Stream%d] NVDEC GPU realigned: GPU[%d] -> GPU[%d] (reconnected for zero-copy)", s, currentGpu, streamPreferredGpu[s]); g_log.add(buf); printf("%s\n", buf); } else { char buf[256]; snprintf(buf, sizeof(buf), "[Stream%d] NVDEC GPU already on GPU[%d] (zero-copy OK)", s, currentGpu); g_log.add(buf); printf("%s\n", buf); } } } } // --- Enable deep pipeline benchmarking on all ALPR handles --- for (int i = 0; i < NUM_TASKS; i++) { if (alprHandles[i]) { alprHandles[i]->ActivateDebugger(true); } } g_log.add("Debug benchmarking ENABLED on all ALPR handles"); // --- Per-stream mutex: prevents CHAOS from destroying a stream while a // worker is mid-frame-grab or mid-inference (use-after-free fix). --- std::mutex streamGuards[NUM_STREAMS]; // --- Launch worker threads --- // Each AI task subscribes to its camera stream via taskStreamMap. // Tasks sharing a stream (e.g. Task 2 & Task 4 on Stream 2) both get // the same RTSP client pointer and share the stream's mutex guard. g_log.add("Launching " + std::to_string(NUM_TASKS) + " worker threads..."); std::thread workers[NUM_TASKS]; for (int i = 0; i < NUM_TASKS; i++) { int streamIdx = taskStreamMap[i]; if (rtspClients[streamIdx] && alprHandles[i]) { workers[i] = std::thread(ALPRWorkerThread, i, &rtspClients[streamIdx], &streamGuards[streamIdx], alprHandles[i], std::ref(taskStates[i])); } } // ========================================================================= // Camera Chaos Thread — simulates camera errors / reconnects // Mimics LabVIEW behavior: cameras randomly go into Error/Recovering // state, triggering Stop/Reconnect/Destroy+Recreate cycles that cause // CUDA cleanup (cuArrayDestroy, cuMemFree) while inference is running. // This is the exact scenario that triggers the nvcuda64 SRW lock deadlock. // ========================================================================= std::atomic chaosEnabled{false}; // Disabled for VRAM stability long test std::thread chaosThread([&]() { std::mt19937 rng(std::random_device{}()); // Wait 10 seconds for system to stabilize before starting chaos for (int i = 0; i < 100 && g_running.load(); i++) { std::this_thread::sleep_for(std::chrono::milliseconds(100)); } g_log.add("[CHAOS] Camera chaos thread started — every 10s, stop/destroy/recreate one camera (round-robin)"); printf("[CHAOS] Camera chaos thread started — 10s interval, round-robin across %d streams\n", NUM_STREAMS); int chaosCount = 0; int nextStream = 0; // Round-robin: cycle through streams 0,1,2,3,0,1,... while (g_running.load() && chaosEnabled.load()) { // Fixed 10-second interval between chaos events for (int s = 0; s < 100 && g_running.load(); s++) { std::this_thread::sleep_for(std::chrono::milliseconds(100)); } if (!g_running.load()) break; int streamIdx = nextStream; nextStream = (nextStream + 1) % NUM_STREAMS; chaosCount++; char buf[512]; auto chaosStart = std::chrono::steady_clock::now(); // Lock stream guard: wait for any in-flight inference to finish // before touching the RTSP client. This prevents use-after-free // when CHAOS destroys a stream while a worker is mid-inference. std::unique_lock chaosLock(streamGuards[streamIdx]); // Always use full DESTROY + RECREATE cycle. // Reconnect() reuses internal player state which can leave stale // CUDA resources and cause freezes. A clean destroy + recreate // guarantees a fresh decoder/player with no leftover state. { bool wasAlive = (rtspClients[streamIdx] != nullptr); snprintf(buf, sizeof(buf), "[CHAOS #%d] Stream%d: DESTROY + RECREATE (%s)", chaosCount, streamIdx, wasAlive ? "camera was running" : "camera was already offline"); g_log.add(buf); printf("%s\n", buf); // Stop and release old handle if it exists if (rtspClients[streamIdx]) { StopRTSP(&rtspClients[streamIdx]); ReleaseANSRTSPHandle(&rtspClients[streamIdx]); rtspClients[streamIdx] = nullptr; } // Release lock during offline sleep — worker sees nullptr and skips int offlineMs = 500 + (rng() % 2500); // 0.5 - 3 seconds offline chaosLock.unlock(); std::this_thread::sleep_for(std::chrono::milliseconds(offlineMs)); chaosLock.lock(); // Recreate the RTSP handle (under lock again) int result = CreateANSRTSPHandle(&rtspClients[streamIdx], "", "", "", streamUrls[streamIdx].c_str()); if (result == 1 && rtspClients[streamIdx]) { SetRTSPImageQuality(&rtspClients[streamIdx], 0); if (hasGpu) SetRTSPHWDecoding(&rtspClients[streamIdx], 7); StartRTSP(&rtspClients[streamIdx]); auto chaosEnd = std::chrono::steady_clock::now(); double chaosMs = std::chrono::duration(chaosEnd - chaosStart).count(); snprintf(buf, sizeof(buf), "[CHAOS #%d] Stream%d: RECREATED in %.0f ms (offline %d ms)", chaosCount, streamIdx, chaosMs, offlineMs); } else { snprintf(buf, sizeof(buf), "[CHAOS #%d] Stream%d: RECREATE FAILED (result=%d)", chaosCount, streamIdx, result); } g_log.add(buf); printf("%s\n", buf); } } g_log.add("[CHAOS] Camera chaos thread stopped (total events: " + std::to_string(chaosCount) + ")"); printf("[CHAOS] Camera chaos thread stopped (total events: %d)\n", chaosCount); }); // --- Display loop (main thread) --- // 3x2 grid layout: 5 tasks displayed in 3 columns x 2 rows const int cellW = 480, cellH = 360; // Smaller cells for 3-column layout const int logPanelH = 220; const int gridCols = 3, gridRows = 2; std::string windowTitle = hasGpu ? "ANSLPR Multi-GPU Stress Test" : "ANSLPR CPU Stress Test"; cv::namedWindow(windowTitle, cv::WINDOW_NORMAL); cv::resizeWindow(windowTitle, cellW * gridCols, cellH * gridRows + logPanelH); auto testStart = std::chrono::steady_clock::now(); auto lastGpuSnapshot = std::chrono::steady_clock::now(); int snapshotCount = 0; while (g_running.load()) { // --- Periodic GPU/perf snapshot every 10 seconds (written to log file) --- auto now2 = std::chrono::steady_clock::now(); if (std::chrono::duration(now2 - lastGpuSnapshot).count() >= 10.0) { lastGpuSnapshot = now2; snapshotCount++; double elapsedSec = std::chrono::duration(now2 - testStart).count(); g_log.add("---- PERIODIC SNAPSHOT #" + std::to_string(snapshotCount) + " (elapsed " + std::to_string((int)elapsedSec) + "s) ----"); // GPU VRAM auto gpuSnap = QueryGpuVram(); for (const auto& gs : gpuSnap) { char buf[256]; snprintf(buf, sizeof(buf), " GPU[%d] %s | Used: %zu/%zu MiB (%.1f%%)", gs.deviceId, gs.name.c_str(), gs.usedMiB, gs.totalMiB, gs.totalMiB > 0 ? 100.0 * gs.usedMiB / gs.totalMiB : 0.0); g_log.add(buf); } // Per-task stats double totalFpsSnap = 0; for (int t = 0; t < NUM_TASKS; t++) { std::lock_guard lk(taskStates[t].mtx); char buf[256]; snprintf(buf, sizeof(buf), " T%d(S%d): GPU[%d] VRAM=%zuMiB FPS=%.1f GrabMs=%.0f InfMs=%.0f Frames=%d Det=%d", t, taskStreamMap[t], taskStates[t].gpuDeviceId, taskStates[t].vramUsedBytes / (1024 * 1024), taskStates[t].fps, taskStates[t].lastGrabMs, taskStates[t].inferenceMs, taskStates[t].frameCount, taskStates[t].detectionCount); g_log.add(buf); totalFpsSnap += taskStates[t].fps; } char buf[128]; snprintf(buf, sizeof(buf), " Total throughput: %.1f FPS", totalFpsSnap); g_log.add(buf); // Multi-GPU check std::set gpusUsed; for (int t = 0; t < NUM_TASKS; t++) { if (taskStates[t].gpuDeviceId >= 0) gpusUsed.insert(taskStates[t].gpuDeviceId); } if (gpusUsed.size() > 1) { g_log.add(" MULTI-GPU: YES — tasks distributed across " + std::to_string(gpusUsed.size()) + " GPUs"); } else if (!gpusUsed.empty()) { g_log.add(" MULTI-GPU: NO — all tasks on GPU[" + std::to_string(*gpusUsed.begin()) + "]"); } g_log.add("---- END SNAPSHOT ----"); } // Build 3x2 grid + log panel (5 tasks: 3 cols x 2 rows, cell [1][2] empty) cv::Mat canvas(cellH * gridRows + logPanelH, cellW * gridCols, CV_8UC3, cv::Scalar(30, 30, 30)); // Place each task's frame in its cell for (int i = 0; i < NUM_TASKS; i++) { int row = i / gridCols, col = i % gridCols; cv::Rect roi(col * cellW, row * cellH, cellW, cellH); cv::Mat cell; double fps = 0, infMs = 0; int fCount = 0, dCount = 0; int gpuId = -1; size_t vramMiB = 0; std::string statusMsg, lastPlate; bool engineLoaded = false, streamOk = false; { std::lock_guard lk(taskStates[i].mtx); if (!taskStates[i].displayFrame.empty()) { cv::resize(taskStates[i].displayFrame, cell, cv::Size(cellW, cellH)); } fps = taskStates[i].fps; infMs = taskStates[i].inferenceMs; fCount = taskStates[i].frameCount; dCount = taskStates[i].detectionCount; statusMsg = taskStates[i].statusMsg; lastPlate = taskStates[i].lastPlate; engineLoaded = taskStates[i].engineLoaded; streamOk = taskStates[i].streamOk; gpuId = taskStates[i].gpuDeviceId; vramMiB = taskStates[i].vramUsedBytes / (1024 * 1024); } if (cell.empty()) { cell = cv::Mat(cellH, cellW, CV_8UC3, cv::Scalar(40, 40, 40)); cv::putText(cell, "Task " + std::to_string(i) + ": " + statusMsg, cv::Point(20, cellH / 2), cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(100, 100, 255), 2); } // Draw status bar at bottom of each cell (2 lines) cv::rectangle(cell, cv::Rect(0, cellH - 50, cellW, 50), cv::Scalar(0, 0, 0), cv::FILLED); char bar1[256], bar2[256]; snprintf(bar1, sizeof(bar1), "T%d(S%d) | %.1f FPS | %.0fms | F:%d | D:%d | %s", i, taskStreamMap[i], fps, infMs, fCount, dCount, lastPlate.empty() ? "-" : lastPlate.c_str()); if (!hasGpu) { snprintf(bar2, sizeof(bar2), "CPU mode (software decoding)"); } else if (gpuId >= 0) { snprintf(bar2, sizeof(bar2), "GPU[%d] | VRAM: %zu MiB", gpuId, vramMiB); } else { snprintf(bar2, sizeof(bar2), "GPU: N/A"); } cv::Scalar barColor = engineLoaded ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 100, 255); cv::putText(cell, bar1, cv::Point(5, cellH - 28), cv::FONT_HERSHEY_SIMPLEX, 0.4, barColor, 1); cv::putText(cell, bar2, cv::Point(5, cellH - 8), cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(0, 200, 255), 1); cell.copyTo(canvas(roi)); } // Draw grid lines for (int c = 1; c < gridCols; c++) cv::line(canvas, cv::Point(c * cellW, 0), cv::Point(c * cellW, cellH * gridRows), cv::Scalar(100, 100, 100), 1); for (int r = 1; r < gridRows; r++) cv::line(canvas, cv::Point(0, r * cellH), cv::Point(cellW * gridCols, r * cellH), cv::Scalar(100, 100, 100), 1); // --- Log panel at bottom --- cv::Rect logRoi(0, cellH * gridRows, cellW * gridCols, logPanelH); cv::Mat logPanel = canvas(logRoi); logPanel.setTo(cv::Scalar(20, 20, 20)); // Elapsed time header auto elapsed = std::chrono::duration(std::chrono::steady_clock::now() - testStart).count(); char header[256]; snprintf(header, sizeof(header), "Elapsed: %.0fs | %d cameras, %d AI tasks | Press ESC to stop", elapsed, NUM_STREAMS, NUM_TASKS); cv::putText(logPanel, header, cv::Point(10, 18), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(200, 200, 0), 1); // Aggregate stats + per-task GPU summary double totalFps = 0; for (int i = 0; i < NUM_TASKS; i++) { std::lock_guard lk(taskStates[i].mtx); totalFps += taskStates[i].fps; } // Build dynamic task-GPU summary string std::string taskGpuStr; for (int i = 0; i < NUM_TASKS; i++) { if (i > 0) taskGpuStr += " "; taskGpuStr += "T" + std::to_string(i) + "(S" + std::to_string(taskStreamMap[i]) + "):GPU" + std::to_string(taskStates[i].gpuDeviceId); } char aggLine[512]; snprintf(aggLine, sizeof(aggLine), "Total: %.1f FPS | %s", totalFps, taskGpuStr.c_str()); cv::putText(logPanel, aggLine, cv::Point(10, 38), cv::FONT_HERSHEY_SIMPLEX, 0.45, cv::Scalar(0, 255, 255), 1); // Real-time GPU VRAM monitor (query every frame — cheap call) auto gpuSnaps = QueryGpuVram(); int gpuLineY = 58; for (const auto& gs : gpuSnaps) { // Count tasks on this GPU and their total VRAM int tasksOnGpu = 0; size_t taskVramMiB = 0; for (int i = 0; i < NUM_TASKS; i++) { std::lock_guard lk(taskStates[i].mtx); if (taskStates[i].gpuDeviceId == gs.deviceId) { tasksOnGpu++; taskVramMiB += taskStates[i].vramUsedBytes / (1024 * 1024); } } char gpuLine[256]; snprintf(gpuLine, sizeof(gpuLine), "GPU[%d] %s | Used: %zu/%zu MiB | Tasks: %d (engine VRAM: %zu MiB)", gs.deviceId, gs.name.c_str(), gs.usedMiB, gs.totalMiB, tasksOnGpu, taskVramMiB); cv::putText(logPanel, gpuLine, cv::Point(10, gpuLineY), cv::FONT_HERSHEY_SIMPLEX, 0.45, cv::Scalar(100, 255, 100), 1); gpuLineY += 18; } // Per-task resource line (shows which stream each task subscribes to) for (int i = 0; i < NUM_TASKS; i++) { std::lock_guard lk(taskStates[i].mtx); char tLine[256]; snprintf(tLine, sizeof(tLine), "T%d(S%d): GPU[%d] VRAM=%zuMiB FPS=%.1f Inf=%.0fms Frames=%d Det=%d", i, taskStreamMap[i], taskStates[i].gpuDeviceId, taskStates[i].vramUsedBytes / (1024 * 1024), taskStates[i].fps, taskStates[i].inferenceMs, taskStates[i].frameCount, taskStates[i].detectionCount); cv::putText(logPanel, tLine, cv::Point(10, gpuLineY), cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(200, 200, 200), 1); gpuLineY += 16; } // Recent log lines (remaining space) auto recentLogs = g_log.getRecent(4); for (const auto& line : recentLogs) { if (gpuLineY > logPanelH - 5) break; std::string display = (line.size() > 130) ? line.substr(0, 127) + "..." : line; cv::putText(logPanel, display, cv::Point(10, gpuLineY), cv::FONT_HERSHEY_PLAIN, 1.0, cv::Scalar(140, 140, 140), 1); gpuLineY += 15; } cv::imshow(windowTitle, canvas); int key = cv::waitKey(30); if (key == 27) { // ESC g_log.add("ESC pressed — stopping all tasks..."); printf("\nESC pressed — stopping...\n"); g_running.store(false); } } // --- Stop chaos thread --- chaosEnabled.store(false); if (chaosThread.joinable()) chaosThread.join(); // --- Wait for all workers --- printf("Waiting for %d worker threads to finish...\n", NUM_TASKS); for (int i = 0; i < NUM_TASKS; i++) { if (workers[i].joinable()) workers[i].join(); } // --- Print final summary (console + log file) --- double totalElapsed = std::chrono::duration( std::chrono::steady_clock::now() - testStart).count(); g_log.add("================================================================"); g_log.add(" FINAL PERFORMANCE SUMMARY"); g_log.add(" " + std::to_string(NUM_STREAMS) + " cameras, " + std::to_string(NUM_TASKS) + " AI tasks"); g_log.add(" Total runtime: " + std::to_string((int)totalElapsed) + " seconds"); g_log.add("================================================================"); printf("\n============================================================\n"); printf(" FINAL PERFORMANCE SUMMARY (runtime: %.0fs)\n", totalElapsed); printf(" %d cameras, %d AI tasks\n", NUM_STREAMS, NUM_TASKS); printf("============================================================\n"); double totalFpsFinal = 0; for (int i = 0; i < NUM_TASKS; i++) { char buf[512]; snprintf(buf, sizeof(buf), " Task %d (Stream %d): GPU[%d] | VRAM=%zuMiB | %d frames, %d detections, FPS=%.1f, InfMs=%.0f", i, taskStreamMap[i], taskStates[i].gpuDeviceId, taskStates[i].vramUsedBytes / (1024 * 1024), taskStates[i].frameCount, taskStates[i].detectionCount, taskStates[i].fps, taskStates[i].inferenceMs); printf("%s\n", buf); g_log.add(buf); totalFpsFinal += taskStates[i].fps; } auto finalGpu = QueryGpuVram(); for (const auto& gs : finalGpu) { char buf[256]; snprintf(buf, sizeof(buf), " GPU[%d] %s: %zu/%zu MiB used (%.1f%%)", gs.deviceId, gs.name.c_str(), gs.usedMiB, gs.totalMiB, gs.totalMiB > 0 ? 100.0 * gs.usedMiB / gs.totalMiB : 0.0); printf("%s\n", buf); g_log.add(buf); } // Multi-GPU verdict std::set finalGpusUsed; for (int i = 0; i < NUM_TASKS; i++) { if (taskStates[i].gpuDeviceId >= 0) finalGpusUsed.insert(taskStates[i].gpuDeviceId); } { char buf[256]; snprintf(buf, sizeof(buf), " Total throughput: %.1f FPS across %d tasks (%d cameras)", totalFpsFinal, NUM_TASKS, NUM_STREAMS); printf("%s\n", buf); g_log.add(buf); } if (finalGpusUsed.size() > 1) { char buf[128]; snprintf(buf, sizeof(buf), " MULTI-GPU: YES — tasks on %zu different GPUs", finalGpusUsed.size()); printf("%s\n", buf); g_log.add(buf); } else if (!finalGpusUsed.empty()) { char buf[128]; snprintf(buf, sizeof(buf), " MULTI-GPU: NO — all tasks on GPU[%d] only", *finalGpusUsed.begin()); printf("%s\n", buf); g_log.add(buf); g_log.add(" DIAGNOSIS: Engine pool sees only 1 GPU. On dual-GPU systems, check:"); g_log.add(" 1. Both GPUs visible to CUDA (nvidia-smi shows 2 devices)"); g_log.add(" 2. TRT engine files are compatible with both GPU architectures"); g_log.add(" 3. No CUDA_VISIBLE_DEVICES env var restricting GPU access"); } // Log shared-camera subscription info g_log.add(" Camera subscription: Task 2 and Task 4 both subscribe to Stream 2 (nhathuocngoclinh)"); printf("============================================================\n"); g_log.add("================================================================"); g_log.add(" Log saved to: " + std::string(LOG_FILE_PATH)); g_log.add("================================================================"); // --- Release all handles (sequentially on main thread) --- for (int i = 0; i < NUM_TASKS; i++) { if (alprHandles[i]) { ReleaseANSALPRHandle(&alprHandles[i]); } } for (int s = 0; s < NUM_STREAMS; s++) { if (rtspClients[s]) { StopRTSP(&rtspClients[s]); ReleaseANSRTSPHandle(&rtspClients[s]); } } g_log.close(); cv::destroyAllWindows(); ANSCENTER::ANSOPENCV::DeinitCameraNetwork(); return 0; } // ============================================================================= // VideoPlayer-based worker thread for SimulatedCam stress test // Same structure as ALPRWorkerThread but uses ANSVideoPlayer instead of ANSRTSP // ============================================================================= static void ALPRWorkerThread_VideoPlayer(int taskId, ANSCENTER::ANSVIDEOPLAYER* vpClient, ANSCENTER::ANSALPR* alprHandle, TaskState& state) { char tag[32]; snprintf(tag, sizeof(tag), "[Task%d]", taskId); std::string prefix(tag); g_log.add(prefix + " Worker thread started"); printf("%s Worker thread started\n", tag); int width = 0, height = 0; int64_t pts = 0; int emptyFrames = 0; std::string cameraId = "Cam" + std::to_string(taskId); // FPS tracking with sliding window std::deque fpsTimestamps; // Timing accumulators for periodic benchmarking double totalGrabMs = 0, totalInfMs = 0; int grabCount = 0, infCount = 0; double maxGrabMs = 0, maxInfMs = 0; auto benchStart = std::chrono::steady_clock::now(); while (g_running.load()) { // Read frame from VideoPlayer auto grabStart = std::chrono::steady_clock::now(); cv::Mat* framePtr = nullptr; GetVideoPlayerCVImage(&vpClient, width, height, pts, &framePtr); auto grabEnd = std::chrono::steady_clock::now(); double grabMs = std::chrono::duration(grabEnd - grabStart).count(); if (framePtr == nullptr || framePtr->empty()) { emptyFrames++; if (emptyFrames % 100 == 1) { g_log.add(prefix + " Empty frame (count=" + std::to_string(emptyFrames) + ")"); } if (emptyFrames > 300) { g_log.add(prefix + " Too many empty frames, attempting reconnect..."); ReconnectVideoPlayer(&vpClient); emptyFrames = 0; } if (framePtr) delete framePtr; std::this_thread::sleep_for(std::chrono::milliseconds(10)); continue; } emptyFrames = 0; totalGrabMs += grabMs; grabCount++; if (grabMs > maxGrabMs) maxGrabMs = grabMs; // Run ALPR inference auto infStart = std::chrono::steady_clock::now(); std::string lpnResult, jpegImage; // Pass framePtr directly — NOT a copy. ANSGpuFrameRegistry::lookup() // matches by cv::Mat* pointer, so `new cv::Mat(*framePtr)` would create // a different pointer the registry doesn't know, breaking NV12 zero-copy. ANSALPR_RunInferenceComplete_CPP(&alprHandle, &framePtr, cameraId.c_str(), 0, 0, lpnResult, jpegImage); auto infEnd = std::chrono::steady_clock::now(); double infMs = std::chrono::duration(infEnd - infStart).count(); totalInfMs += infMs; infCount++; if (infMs > maxInfMs) maxInfMs = infMs; // Parse detections and draw on frame cv::Mat display = framePtr->clone(); int detCount = 0; std::string lastPlateText; if (!lpnResult.empty()) { try { boost::property_tree::ptree pt; std::stringstream ss(lpnResult); boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type& child, pt.get_child("results")) { const boost::property_tree::ptree& det = child.second; const auto class_name = GetData(det, "class_name"); const auto x = GetData(det, "x"); const auto y = GetData(det, "y"); const auto w = GetData(det, "width"); const auto h = GetData(det, "height"); cv::rectangle(display, cv::Rect((int)x, (int)y, (int)w, (int)h), cv::Scalar(0, 255, 0), 2); cv::putText(display, class_name, cv::Point((int)x, (int)y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.7, cv::Scalar(0, 255, 0), 2); lastPlateText = class_name; detCount++; } } catch (...) {} } // Update FPS (sliding window over last 2 seconds) auto now = std::chrono::steady_clock::now(); fpsTimestamps.push_back(now); while (!fpsTimestamps.empty() && std::chrono::duration(now - fpsTimestamps.front()).count() > 2.0) { fpsTimestamps.pop_front(); } double fps = fpsTimestamps.size() / 2.0; // Draw OSD on frame char osd[128]; snprintf(osd, sizeof(osd), "Task%d | %.1f FPS | Inf: %.0f ms | #%d", taskId, fps, infMs, state.frameCount + 1); cv::putText(display, osd, cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 0.7, cv::Scalar(0, 255, 255), 2); // Update shared state { std::lock_guard lk(state.mtx); state.displayFrame = display; state.fps = fps; state.inferenceMs = infMs; state.lastGrabMs = grabMs; state.lastInfMs = infMs; state.frameCount++; state.detectionCount += detCount; if (!lastPlateText.empty()) state.lastPlate = lastPlateText; } // Periodic logging (every 100 frames) if ((state.frameCount % 100) == 0) { double avgGrab = grabCount > 0 ? totalGrabMs / grabCount : 0; double avgInf = infCount > 0 ? totalInfMs / infCount : 0; double elapsed = std::chrono::duration( std::chrono::steady_clock::now() - benchStart).count(); char buf[512]; snprintf(buf, sizeof(buf), "%s Frame %d | FPS=%.1f | Grab: avg=%.1fms max=%.0fms | Inf: avg=%.1fms max=%.0fms | " "GrabPct=%.0f%% InfPct=%.0f%% | Det=%d", tag, state.frameCount, fps, avgGrab, maxGrabMs, avgInf, maxInfMs, (totalGrabMs / (elapsed * 1000.0)) * 100.0, (totalInfMs / (elapsed * 1000.0)) * 100.0, state.detectionCount); g_log.add(buf); printf("%s\n", buf); // Reset accumulators totalGrabMs = totalInfMs = 0; maxGrabMs = maxInfMs = 0; grabCount = infCount = 0; benchStart = std::chrono::steady_clock::now(); } delete framePtr; } g_log.add(prefix + " Worker loop exited"); } // ============================================================================= // ANSLPR_MultiGPU_StressTest_SimulatedCam // Same structure as ANSLPR_MultiGPU_StressTest but uses local video files // via ANSVideoPlayer instead of live RTSP streams. // ============================================================================= int ANSLPR_MultiGPU_StressTest_SimulatedCam() { ANSCENTER::ANSOPENCV::InitCameraNetwork(); // --- Initialize log file --- g_log.init(); printf("\n"); printf("============================================================\n"); printf(" ANSLPR Multi-GPU Stress Test (Simulated Cam)\n"); printf(" Using local video files via ANSVideoPlayer\n"); printf(" Press ESC to stop\n"); printf(" Log file: %s\n", LOG_FILE_PATH); printf("============================================================\n\n"); g_log.add("============================================================"); g_log.add(" ANSLPR Multi-GPU Stress Test (Simulated Cam)"); g_log.add(" Using ANSVideoPlayer with local video files"); g_log.add("============================================================"); // --- Log GPU info for diagnostics --- LogGpuInfo(); // --- Video file paths (4 files, one per task) --- const std::string videoFile0 = "E:\\Programs\\DemoAssets\\Videos\\ALRP\\PMH\\Day\\day.mp4"; const std::string videoFile1 = "E:\\Programs\\DemoAssets\\Videos\\ALRP\\PMH\\Day\\day_1.mp4"; const std::string videoFile2 = "E:\\Programs\\DemoAssets\\Videos\\ALRP\\PMH\\Day\\day_2.mp4"; const std::string videoFile3 = "E:\\Programs\\DemoAssets\\Videos\\ALRP\\PMH\\Day\\day_3.mp4"; g_log.add("Video 0: " + videoFile0); g_log.add("Video 1: " + videoFile1); g_log.add("Video 2: " + videoFile2); g_log.add("Video 3: " + videoFile3); // --- Task states --- TaskState taskStates[4]; // ========================================================================= // Create 4 VideoPlayer readers — one per task // ========================================================================= const int NUM_STREAMS = 4; ANSCENTER::ANSVIDEOPLAYER* vpClients[NUM_STREAMS] = {}; const std::string videoFiles[NUM_STREAMS] = { videoFile0, videoFile1, videoFile2, videoFile3 }; const int taskStreamMap[4] = { 0, 1, 2, 3 }; for (int s = 0; s < NUM_STREAMS; s++) { printf("[Stream%d] Creating VideoPlayer for %s\n", s, videoFiles[s].c_str()); g_log.add("[Stream" + std::to_string(s) + "] Creating VideoPlayer for " + videoFiles[s]); int result = CreateANSVideoPlayerHandle(&vpClients[s], "", videoFiles[s].c_str()); if (result != 1 || vpClients[s] == nullptr) { printf("[Stream%d] FAILED to create VideoPlayer (result=%d)\n", s, result); g_log.add("[Stream" + std::to_string(s) + "] VideoPlayer create FAILED"); vpClients[s] = nullptr; continue; } // Don't call StartVideoPlayer here — play() will be called just before worker threads // launch, so the video doesn't play to completion during the ~16s engine loading phase. SetVideoPlayerDisplayResolution(&vpClients[s], 1920, 1080); g_log.add("[Stream" + std::to_string(s) + "] VideoPlayer created (display: 1920x1080)"); } // ========================================================================= // Create 4 ALPR engines sequentially // ========================================================================= ANSCENTER::ANSALPR* alprHandles[4] = {}; std::string modelZipFile = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_ALPR_v1.2.zip"; int engineType = 1; // NVIDIA_GPU double detThresh = 0.5, ocrThresh = 0.5, colThresh = 0.5; for (int i = 0; i < 4; i++) { char tag[32]; snprintf(tag, sizeof(tag), "[Task%d]", i); int streamIdx = taskStreamMap[i]; if (vpClients[streamIdx] == nullptr) { printf("%s Skipped — Stream%d not available\n", tag, streamIdx); std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "Stream not available"; continue; } { std::lock_guard lk(taskStates[i].mtx); taskStates[i].streamOk = true; taskStates[i].statusMsg = "Loading ALPR engine..."; } printf("%s Creating ALPR handle (engineType=%d)...\n", tag, engineType); g_log.add(std::string(tag) + " Creating ALPR handle..."); auto engineStart = std::chrono::steady_clock::now(); int createResult = CreateANSALPRHandle(&alprHandles[i], "", modelZipFile.c_str(), "", engineType, detThresh, ocrThresh, colThresh); if (createResult != 1 || alprHandles[i] == nullptr) { printf("%s FAILED to create ALPR handle (result=%d)\n", tag, createResult); g_log.add(std::string(tag) + " ALPR create FAILED"); std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "ALPR create failed"; continue; } printf("%s Loading ALPR engine (TensorRT)...\n", tag); g_log.add(std::string(tag) + " Loading ALPR engine..."); // Snapshot VRAM before engine load to measure consumption auto vramBefore = GetPerGpuFreeMiB(); int loadResult = LoadANSALPREngineHandle(&alprHandles[i]); auto engineEnd = std::chrono::steady_clock::now(); double loadMs = std::chrono::duration(engineEnd - engineStart).count(); if (loadResult != 1) { printf("%s FAILED to load ALPR engine (result=%d)\n", tag, loadResult); g_log.add(std::string(tag) + " Engine load FAILED"); ReleaseANSALPRHandle(&alprHandles[i]); alprHandles[i] = nullptr; std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "Engine load failed"; continue; } // Snapshot VRAM after engine load — find which GPU lost the most VRAM auto vramAfter = GetPerGpuFreeMiB(); int bestGpu = 0; size_t maxDelta = 0; size_t gpuCount = vramBefore.size() < vramAfter.size() ? vramBefore.size() : vramAfter.size(); for (size_t g = 0; g < gpuCount; g++) { size_t delta = (vramBefore[g] > vramAfter[g]) ? (vramBefore[g] - vramAfter[g]) : 0; if (delta > maxDelta) { maxDelta = delta; bestGpu = (int)g; } } char buf[512]; snprintf(buf, sizeof(buf), "%s Engine loaded in %.0f ms | GPU[%d] | VRAM used: %zu MiB (Video%d)", tag, loadMs, bestGpu, maxDelta, streamIdx); printf("%s\n", buf); g_log.add(buf); // Log per-GPU VRAM state after this engine load for (size_t g = 0; g < vramAfter.size(); g++) { size_t total = 0; if (g < vramBefore.size()) { auto gpus = QueryGpuVram(); if (g < gpus.size()) total = gpus[g].totalMiB; } char vbuf[256]; snprintf(vbuf, sizeof(vbuf), " GPU[%zu] VRAM: %zu MiB free (of %zu MiB)", g, vramAfter[g], total); printf("%s\n", vbuf); g_log.add(vbuf); } { std::lock_guard lk(taskStates[i].mtx); taskStates[i].engineLoaded = true; taskStates[i].statusMsg = "Running"; taskStates[i].gpuDeviceId = bestGpu; taskStates[i].vramUsedBytes = maxDelta * 1024 * 1024; } } // --- No NVDEC realignment needed — ANSVideoPlayer uses cv::VideoCapture (CPU decode) --- // --- Enable deep pipeline benchmarking on all ALPR handles --- for (int i = 0; i < 4; i++) { if (alprHandles[i]) { alprHandles[i]->ActivateDebugger(true); } } g_log.add("Debug benchmarking ENABLED on all ALPR handles"); // --- Start video playback NOW (just before workers need frames) --- // This avoids the video playing to completion during the ~16s engine loading phase. for (int s = 0; s < NUM_STREAMS; s++) { if (vpClients[s]) { StartVideoPlayer(&vpClients[s]); g_log.add("[Stream" + std::to_string(s) + "] VideoPlayer play() started"); } } // --- Launch worker threads --- g_log.add("Launching worker threads..."); std::thread workers[4]; for (int i = 0; i < 4; i++) { int streamIdx = taskStreamMap[i]; if (vpClients[streamIdx] && alprHandles[i]) { workers[i] = std::thread(ALPRWorkerThread_VideoPlayer, i, vpClients[streamIdx], alprHandles[i], std::ref(taskStates[i])); } } // --- Display loop (main thread) --- const int cellW = 640, cellH = 480; const int logPanelH = 200; const char* windowName = "ANSLPR Multi-GPU Stress Test (Simulated Cam)"; cv::namedWindow(windowName, cv::WINDOW_NORMAL); cv::resizeWindow(windowName, cellW * 2, cellH * 2 + logPanelH); auto testStart = std::chrono::steady_clock::now(); auto lastGpuSnapshot = std::chrono::steady_clock::now(); int snapshotCount = 0; while (g_running.load()) { // --- Periodic GPU/perf snapshot every 10 seconds --- auto now2 = std::chrono::steady_clock::now(); if (std::chrono::duration(now2 - lastGpuSnapshot).count() >= 10.0) { lastGpuSnapshot = now2; snapshotCount++; double elapsedSec = std::chrono::duration(now2 - testStart).count(); g_log.add("---- PERIODIC SNAPSHOT #" + std::to_string(snapshotCount) + " (elapsed " + std::to_string((int)elapsedSec) + "s) ----"); auto gpuSnap = QueryGpuVram(); for (const auto& gs : gpuSnap) { char buf[256]; snprintf(buf, sizeof(buf), " GPU[%d] %s | Used: %zu/%zu MiB (%.1f%%)", gs.deviceId, gs.name.c_str(), gs.usedMiB, gs.totalMiB, gs.totalMiB > 0 ? 100.0 * gs.usedMiB / gs.totalMiB : 0.0); g_log.add(buf); } double totalFpsSnap = 0; for (int t = 0; t < 4; t++) { std::lock_guard lk(taskStates[t].mtx); char buf[256]; snprintf(buf, sizeof(buf), " T%d: GPU[%d] VRAM=%zuMiB FPS=%.1f GrabMs=%.0f InfMs=%.0f Frames=%d Det=%d", t, taskStates[t].gpuDeviceId, taskStates[t].vramUsedBytes / (1024 * 1024), taskStates[t].fps, taskStates[t].lastGrabMs, taskStates[t].inferenceMs, taskStates[t].frameCount, taskStates[t].detectionCount); g_log.add(buf); totalFpsSnap += taskStates[t].fps; } char buf[128]; snprintf(buf, sizeof(buf), " Total throughput: %.1f FPS", totalFpsSnap); g_log.add(buf); std::set gpusUsed; for (int t = 0; t < 4; t++) { if (taskStates[t].gpuDeviceId >= 0) gpusUsed.insert(taskStates[t].gpuDeviceId); } if (gpusUsed.size() > 1) { g_log.add(" MULTI-GPU: YES — tasks distributed across " + std::to_string(gpusUsed.size()) + " GPUs"); } else if (!gpusUsed.empty()) { g_log.add(" MULTI-GPU: NO — all tasks on GPU[" + std::to_string(*gpusUsed.begin()) + "]"); } g_log.add("---- END SNAPSHOT ----"); } // Build 2x2 grid + log panel cv::Mat canvas(cellH * 2 + logPanelH, cellW * 2, CV_8UC3, cv::Scalar(30, 30, 30)); for (int i = 0; i < 4; i++) { int row = i / 2, col = i % 2; cv::Rect roi(col * cellW, row * cellH, cellW, cellH); cv::Mat cell; double fps = 0, infMs = 0; int fCount = 0, dCount = 0; int gpuId = -1; size_t vramMiB = 0; std::string statusMsg, lastPlate; bool engineLoaded = false, streamOk = false; { std::lock_guard lk(taskStates[i].mtx); if (!taskStates[i].displayFrame.empty()) { cv::resize(taskStates[i].displayFrame, cell, cv::Size(cellW, cellH)); } fps = taskStates[i].fps; infMs = taskStates[i].inferenceMs; fCount = taskStates[i].frameCount; dCount = taskStates[i].detectionCount; statusMsg = taskStates[i].statusMsg; lastPlate = taskStates[i].lastPlate; engineLoaded = taskStates[i].engineLoaded; streamOk = taskStates[i].streamOk; gpuId = taskStates[i].gpuDeviceId; vramMiB = taskStates[i].vramUsedBytes / (1024 * 1024); } if (cell.empty()) { cell = cv::Mat(cellH, cellW, CV_8UC3, cv::Scalar(40, 40, 40)); cv::putText(cell, "Task " + std::to_string(i) + ": " + statusMsg, cv::Point(20, cellH / 2), cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(100, 100, 255), 2); } cv::rectangle(cell, cv::Rect(0, cellH - 50, cellW, 50), cv::Scalar(0, 0, 0), cv::FILLED); char bar1[256], bar2[256]; snprintf(bar1, sizeof(bar1), "T%d | %.1f FPS | %.0fms | Frames:%d | Det:%d | %s", i, fps, infMs, fCount, dCount, lastPlate.empty() ? "-" : lastPlate.c_str()); if (gpuId >= 0) { snprintf(bar2, sizeof(bar2), "GPU[%d] | VRAM: %zu MiB", gpuId, vramMiB); } else { snprintf(bar2, sizeof(bar2), "GPU: N/A"); } cv::Scalar barColor = engineLoaded ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 100, 255); cv::putText(cell, bar1, cv::Point(5, cellH - 28), cv::FONT_HERSHEY_SIMPLEX, 0.45, barColor, 1); cv::putText(cell, bar2, cv::Point(5, cellH - 8), cv::FONT_HERSHEY_SIMPLEX, 0.45, cv::Scalar(0, 200, 255), 1); cell.copyTo(canvas(roi)); cv::line(canvas, cv::Point(cellW, 0), cv::Point(cellW, cellH * 2), cv::Scalar(100, 100, 100), 1); cv::line(canvas, cv::Point(0, cellH), cv::Point(cellW * 2, cellH), cv::Scalar(100, 100, 100), 1); } // --- Log panel at bottom --- cv::Rect logRoi(0, cellH * 2, cellW * 2, logPanelH); cv::Mat logPanel = canvas(logRoi); logPanel.setTo(cv::Scalar(20, 20, 20)); auto elapsed = std::chrono::duration(std::chrono::steady_clock::now() - testStart).count(); char header[128]; snprintf(header, sizeof(header), "Elapsed: %.0fs | Simulated Cam (VideoPlayer) | Press ESC to stop", elapsed); cv::putText(logPanel, header, cv::Point(10, 18), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(200, 200, 0), 1); double totalFps = 0; for (int i = 0; i < 4; i++) { std::lock_guard lk(taskStates[i].mtx); totalFps += taskStates[i].fps; } char aggLine[256]; snprintf(aggLine, sizeof(aggLine), "Total throughput: %.1f FPS | T0:GPU%d T1:GPU%d T2:GPU%d T3:GPU%d", totalFps, taskStates[0].gpuDeviceId, taskStates[1].gpuDeviceId, taskStates[2].gpuDeviceId, taskStates[3].gpuDeviceId); cv::putText(logPanel, aggLine, cv::Point(10, 38), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 255), 1); auto gpuSnaps = QueryGpuVram(); int gpuLineY = 58; for (const auto& gs : gpuSnaps) { int tasksOnGpu = 0; size_t taskVramMiB = 0; for (int i = 0; i < 4; i++) { std::lock_guard lk(taskStates[i].mtx); if (taskStates[i].gpuDeviceId == gs.deviceId) { tasksOnGpu++; taskVramMiB += taskStates[i].vramUsedBytes / (1024 * 1024); } } char gpuLine[256]; snprintf(gpuLine, sizeof(gpuLine), "GPU[%d] %s | Used: %zu/%zu MiB | Tasks: %d (engine VRAM: %zu MiB)", gs.deviceId, gs.name.c_str(), gs.usedMiB, gs.totalMiB, tasksOnGpu, taskVramMiB); cv::putText(logPanel, gpuLine, cv::Point(10, gpuLineY), cv::FONT_HERSHEY_SIMPLEX, 0.45, cv::Scalar(100, 255, 100), 1); gpuLineY += 18; } for (int i = 0; i < 4; i++) { std::lock_guard lk(taskStates[i].mtx); char tLine[256]; snprintf(tLine, sizeof(tLine), "T%d: GPU[%d] VRAM=%zuMiB FPS=%.1f Inf=%.0fms Frames=%d Det=%d", i, taskStates[i].gpuDeviceId, taskStates[i].vramUsedBytes / (1024 * 1024), taskStates[i].fps, taskStates[i].inferenceMs, taskStates[i].frameCount, taskStates[i].detectionCount); cv::putText(logPanel, tLine, cv::Point(10, gpuLineY), cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(200, 200, 200), 1); gpuLineY += 16; } auto recentLogs = g_log.getRecent(4); for (const auto& line : recentLogs) { if (gpuLineY > logPanelH - 5) break; std::string display = (line.size() > 130) ? line.substr(0, 127) + "..." : line; cv::putText(logPanel, display, cv::Point(10, gpuLineY), cv::FONT_HERSHEY_PLAIN, 1.0, cv::Scalar(140, 140, 140), 1); gpuLineY += 15; } cv::imshow(windowName, canvas); int key = cv::waitKey(30); if (key == 27) { // ESC g_log.add("ESC pressed — stopping all tasks..."); printf("\nESC pressed — stopping...\n"); g_running.store(false); } } // --- Wait for all workers --- printf("Waiting for worker threads to finish...\n"); for (int i = 0; i < 4; i++) { if (workers[i].joinable()) workers[i].join(); } // --- Print final summary --- double totalElapsed = std::chrono::duration( std::chrono::steady_clock::now() - testStart).count(); g_log.add("================================================================"); g_log.add(" FINAL PERFORMANCE SUMMARY (Simulated Cam)"); g_log.add(" Total runtime: " + std::to_string((int)totalElapsed) + " seconds"); g_log.add("================================================================"); printf("\n============================================================\n"); printf(" FINAL PERFORMANCE SUMMARY — Simulated Cam (runtime: %.0fs)\n", totalElapsed); printf("============================================================\n"); double totalFpsFinal = 0; for (int i = 0; i < 4; i++) { char buf[512]; snprintf(buf, sizeof(buf), " Task %d: GPU[%d] | VRAM=%zuMiB | %d frames, %d detections, FPS=%.1f, InfMs=%.0f", i, taskStates[i].gpuDeviceId, taskStates[i].vramUsedBytes / (1024 * 1024), taskStates[i].frameCount, taskStates[i].detectionCount, taskStates[i].fps, taskStates[i].inferenceMs); printf("%s\n", buf); g_log.add(buf); totalFpsFinal += taskStates[i].fps; } auto finalGpu = QueryGpuVram(); for (const auto& gs : finalGpu) { char buf[256]; snprintf(buf, sizeof(buf), " GPU[%d] %s: %zu/%zu MiB used (%.1f%%)", gs.deviceId, gs.name.c_str(), gs.usedMiB, gs.totalMiB, gs.totalMiB > 0 ? 100.0 * gs.usedMiB / gs.totalMiB : 0.0); printf("%s\n", buf); g_log.add(buf); } std::set finalGpusUsed; for (int i = 0; i < 4; i++) { if (taskStates[i].gpuDeviceId >= 0) finalGpusUsed.insert(taskStates[i].gpuDeviceId); } { char buf[256]; snprintf(buf, sizeof(buf), " Total throughput: %.1f FPS across 4 tasks", totalFpsFinal); printf("%s\n", buf); g_log.add(buf); } if (finalGpusUsed.size() > 1) { char buf[128]; snprintf(buf, sizeof(buf), " MULTI-GPU: YES — tasks on %zu different GPUs", finalGpusUsed.size()); printf("%s\n", buf); g_log.add(buf); } else if (!finalGpusUsed.empty()) { char buf[128]; snprintf(buf, sizeof(buf), " MULTI-GPU: NO — all tasks on GPU[%d] only", *finalGpusUsed.begin()); printf("%s\n", buf); g_log.add(buf); } printf("============================================================\n"); g_log.add("================================================================"); g_log.add(" Log saved to: " + std::string(LOG_FILE_PATH)); g_log.add("================================================================"); // --- Release all handles --- for (int i = 0; i < 4; i++) { if (alprHandles[i]) { ReleaseANSALPRHandle(&alprHandles[i]); } } for (int s = 0; s < NUM_STREAMS; s++) { if (vpClients[s]) { StopVideoPlayer(&vpClients[s]); ReleaseANSVideoPlayerHandle(&vpClients[s]); } } g_log.close(); cv::destroyAllWindows(); ANSCENTER::ANSOPENCV::DeinitCameraNetwork(); return 0; } // ============================================================================= // Worker thread for FilePlayer-based stress test (uses ANSFILEPLAYER) // Key difference from VideoPlayer worker: uses GetFilePlayerCVImage/ReconnectFilePlayer // ============================================================================= static void ALPRWorkerThread_FilePlayer(int taskId, ANSCENTER::ANSFILEPLAYER* fpClient, ANSCENTER::ANSALPR* alprHandle, TaskState& state) { char tag[32]; snprintf(tag, sizeof(tag), "[Task%d]", taskId); std::string prefix(tag); g_log.add(prefix + " Worker thread started"); printf("%s Worker thread started\n", tag); int width = 0, height = 0; int64_t pts = 0; int emptyFrames = 0; std::string cameraId = "Cam" + std::to_string(taskId); std::deque fpsTimestamps; double totalGrabMs = 0, totalInfMs = 0; int grabCount = 0, infCount = 0; double maxGrabMs = 0, maxInfMs = 0; auto benchStart = std::chrono::steady_clock::now(); while (g_running.load()) { auto grabStart = std::chrono::steady_clock::now(); cv::Mat* framePtr = nullptr; GetFilePlayerCVImage(&fpClient, width, height, pts, &framePtr); auto grabEnd = std::chrono::steady_clock::now(); double grabMs = std::chrono::duration(grabEnd - grabStart).count(); if (framePtr == nullptr || framePtr->empty()) { emptyFrames++; if (emptyFrames % 100 == 1) { g_log.add(prefix + " Empty frame (count=" + std::to_string(emptyFrames) + ")"); } if (emptyFrames > 300) { g_log.add(prefix + " Too many empty frames, attempting reconnect..."); ReconnectFilePlayer(&fpClient); emptyFrames = 0; } if (framePtr) delete framePtr; std::this_thread::sleep_for(std::chrono::milliseconds(10)); continue; } emptyFrames = 0; totalGrabMs += grabMs; grabCount++; if (grabMs > maxGrabMs) maxGrabMs = grabMs; auto infStart = std::chrono::steady_clock::now(); std::string lpnResult, jpegImage; // Pass framePtr directly — NOT a copy. ANSGpuFrameRegistry::lookup() // matches by cv::Mat* pointer, so `new cv::Mat(*framePtr)` would create // a different pointer the registry doesn't know, breaking NV12 zero-copy. ANSALPR_RunInferenceComplete_CPP(&alprHandle, &framePtr, cameraId.c_str(), 0, 0, lpnResult, jpegImage); auto infEnd = std::chrono::steady_clock::now(); double infMs = std::chrono::duration(infEnd - infStart).count(); totalInfMs += infMs; infCount++; if (infMs > maxInfMs) maxInfMs = infMs; cv::Mat display = framePtr->clone(); int detCount = 0; std::string lastPlateText; if (!lpnResult.empty()) { try { boost::property_tree::ptree pt; std::stringstream ss(lpnResult); boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type& child, pt.get_child("results")) { const boost::property_tree::ptree& det = child.second; const auto class_name = GetData(det, "class_name"); const auto x = GetData(det, "x"); const auto y = GetData(det, "y"); const auto w = GetData(det, "width"); const auto h = GetData(det, "height"); cv::rectangle(display, cv::Rect((int)x, (int)y, (int)w, (int)h), cv::Scalar(0, 255, 0), 2); cv::putText(display, class_name, cv::Point((int)x, (int)y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.7, cv::Scalar(0, 255, 0), 2); lastPlateText = class_name; detCount++; } } catch (...) {} } auto now = std::chrono::steady_clock::now(); fpsTimestamps.push_back(now); while (!fpsTimestamps.empty() && std::chrono::duration(now - fpsTimestamps.front()).count() > 2.0) { fpsTimestamps.pop_front(); } double fps = fpsTimestamps.size() / 2.0; char osd[128]; snprintf(osd, sizeof(osd), "Task%d | %.1f FPS | Inf: %.0f ms | #%d", taskId, fps, infMs, state.frameCount + 1); cv::putText(display, osd, cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 0.7, cv::Scalar(0, 255, 255), 2); { std::lock_guard lk(state.mtx); state.displayFrame = display; state.fps = fps; state.inferenceMs = infMs; state.lastGrabMs = grabMs; state.lastInfMs = infMs; state.frameCount++; state.detectionCount += detCount; if (!lastPlateText.empty()) state.lastPlate = lastPlateText; } if ((state.frameCount % 100) == 0) { double avgGrab = grabCount > 0 ? totalGrabMs / grabCount : 0; double avgInf = infCount > 0 ? totalInfMs / infCount : 0; double elapsed = std::chrono::duration( std::chrono::steady_clock::now() - benchStart).count(); char buf[512]; snprintf(buf, sizeof(buf), "%s Frame %d | FPS=%.1f | Grab: avg=%.1fms max=%.0fms | Inf: avg=%.1fms max=%.0fms | " "GrabPct=%.0f%% InfPct=%.0f%% | Det=%d", tag, state.frameCount, fps, avgGrab, maxGrabMs, avgInf, maxInfMs, (totalGrabMs / (elapsed * 1000.0)) * 100.0, (totalInfMs / (elapsed * 1000.0)) * 100.0, state.detectionCount); g_log.add(buf); printf("%s\n", buf); totalGrabMs = totalInfMs = 0; maxGrabMs = maxInfMs = 0; grabCount = infCount = 0; benchStart = std::chrono::steady_clock::now(); } delete framePtr; } g_log.add(prefix + " Worker loop exited"); } // ============================================================================= // ANSLPR_MultiGPU_StressTest_FilePlayer // Same as SimulatedCam but uses ANSFILEPLAYER (loops video continuously). // ============================================================================= int ANSLPR_MultiGPU_StressTest_FilePlayer() { ANSCENTER::ANSOPENCV::InitCameraNetwork(); g_log.init(); printf("\n"); printf("============================================================\n"); printf(" ANSLPR Multi-GPU Stress Test (FilePlayer — looping)\n"); printf(" Using local video files via ANSFilePlayer (HW decode)\n"); printf(" Press ESC to stop\n"); printf(" Log file: %s\n", LOG_FILE_PATH); printf("============================================================\n\n"); g_log.add("============================================================"); g_log.add(" ANSLPR Multi-GPU Stress Test (FilePlayer — looping)"); g_log.add(" Using ANSFilePlayer with HW decode + NV12 zero-copy"); g_log.add("============================================================"); LogGpuInfo(); const std::string videoFile0 = "E:\\Programs\\DemoAssets\\Videos\\ALRP\\PMH\\Day\\day.mp4"; const std::string videoFile1 = "E:\\Programs\\DemoAssets\\Videos\\ALRP\\PMH\\Day\\day_1.mp4"; const std::string videoFile2 = "E:\\Programs\\DemoAssets\\Videos\\ALRP\\PMH\\Day\\day_2.mp4"; const std::string videoFile3 = "E:\\Programs\\DemoAssets\\Videos\\ALRP\\PMH\\Day\\day_3.mp4"; g_log.add("Video 0: " + videoFile0); g_log.add("Video 1: " + videoFile1); g_log.add("Video 2: " + videoFile2); g_log.add("Video 3: " + videoFile3); TaskState taskStates[4]; // ========================================================================= // Create 4 FilePlayer readers // ========================================================================= const int NUM_STREAMS = 4; ANSCENTER::ANSFILEPLAYER* fpClients[NUM_STREAMS] = {}; const std::string videoFiles[NUM_STREAMS] = { videoFile0, videoFile1, videoFile2, videoFile3 }; const int taskStreamMap[4] = { 0, 1, 2, 3 }; for (int s = 0; s < NUM_STREAMS; s++) { printf("[Stream%d] Creating FilePlayer for %s\n", s, videoFiles[s].c_str()); g_log.add("[Stream" + std::to_string(s) + "] Creating FilePlayer for " + videoFiles[s]); int result = CreateANSFilePlayerHandle(&fpClients[s], "", videoFiles[s].c_str()); if (result != 1 || fpClients[s] == nullptr) { printf("[Stream%d] FAILED to create FilePlayer (result=%d)\n", s, result); g_log.add("[Stream" + std::to_string(s) + "] FilePlayer create FAILED"); fpClients[s] = nullptr; continue; } // Don't start yet — start after engines are loaded SetFilePlayerDisplayResolution(&fpClients[s], 1920, 1080); g_log.add("[Stream" + std::to_string(s) + "] FilePlayer created (display: 1920x1080)"); } // ========================================================================= // Create 4 ALPR engines sequentially // ========================================================================= ANSCENTER::ANSALPR* alprHandles[4] = {}; std::string modelZipFile = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_ALPR_v1.2.zip"; int engineType = 1; double detThresh = 0.5, ocrThresh = 0.5, colThresh = 0.5; for (int i = 0; i < 4; i++) { char tag[32]; snprintf(tag, sizeof(tag), "[Task%d]", i); int streamIdx = taskStreamMap[i]; if (fpClients[streamIdx] == nullptr) { printf("%s Skipped — Stream%d not available\n", tag, streamIdx); std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "Stream not available"; continue; } { std::lock_guard lk(taskStates[i].mtx); taskStates[i].streamOk = true; taskStates[i].statusMsg = "Loading ALPR engine..."; } printf("%s Creating ALPR handle (engineType=%d)...\n", tag, engineType); g_log.add(std::string(tag) + " Creating ALPR handle..."); auto engineStart = std::chrono::steady_clock::now(); int createResult = CreateANSALPRHandle(&alprHandles[i], "", modelZipFile.c_str(), "", engineType, detThresh, ocrThresh, colThresh); if (createResult != 1 || alprHandles[i] == nullptr) { printf("%s FAILED to create ALPR handle (result=%d)\n", tag, createResult); g_log.add(std::string(tag) + " ALPR create FAILED"); std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "ALPR create failed"; continue; } printf("%s Loading ALPR engine (TensorRT)...\n", tag); g_log.add(std::string(tag) + " Loading ALPR engine..."); auto vramBefore = GetPerGpuFreeMiB(); int loadResult = LoadANSALPREngineHandle(&alprHandles[i]); auto engineEnd = std::chrono::steady_clock::now(); double loadMs = std::chrono::duration(engineEnd - engineStart).count(); if (loadResult != 1) { printf("%s FAILED to load ALPR engine (result=%d)\n", tag, loadResult); g_log.add(std::string(tag) + " Engine load FAILED"); ReleaseANSALPRHandle(&alprHandles[i]); alprHandles[i] = nullptr; std::lock_guard lk(taskStates[i].mtx); taskStates[i].statusMsg = "Engine load failed"; continue; } auto vramAfter = GetPerGpuFreeMiB(); int bestGpu = 0; size_t maxDelta = 0; for (size_t g = 0; g < vramBefore.size() && g < vramAfter.size(); g++) { size_t delta = (vramBefore[g] > vramAfter[g]) ? vramBefore[g] - vramAfter[g] : 0; if (delta > maxDelta) { maxDelta = delta; bestGpu = (int)g; } } char ebuf[256]; snprintf(ebuf, sizeof(ebuf), "%s Engine loaded in %d ms | GPU[%d] | VRAM used: %zu MiB (Video%d)", tag, (int)loadMs, bestGpu, maxDelta, i); printf("%s\n", ebuf); g_log.add(ebuf); for (size_t g = 0; g < vramAfter.size(); g++) { size_t total = 0; cudaDeviceProp prop; if (cudaGetDeviceProperties(&prop, (int)g) == cudaSuccess) { total = prop.totalGlobalMem / (1024 * 1024); } char vbuf[128]; snprintf(vbuf, sizeof(vbuf), " GPU[%zu] VRAM: %zu MiB free (of %zu MiB)", g, vramAfter[g], total); printf("%s\n", vbuf); g_log.add(vbuf); } { std::lock_guard lk(taskStates[i].mtx); taskStates[i].engineLoaded = true; taskStates[i].statusMsg = "Running"; taskStates[i].gpuDeviceId = bestGpu; taskStates[i].vramUsedBytes = maxDelta * 1024 * 1024; } } // --- Enable debug benchmarking --- for (int i = 0; i < 4; i++) { if (alprHandles[i]) { alprHandles[i]->ActivateDebugger(true); } } g_log.add("Debug benchmarking ENABLED on all ALPR handles"); // --- Start video playback NOW (just before workers need frames) --- for (int s = 0; s < NUM_STREAMS; s++) { if (fpClients[s]) { StartFilePlayer(&fpClients[s]); g_log.add("[Stream" + std::to_string(s) + "] FilePlayer play() started"); } } // --- Launch worker threads --- g_log.add("Launching worker threads..."); std::thread workers[4]; for (int i = 0; i < 4; i++) { int streamIdx = taskStreamMap[i]; if (fpClients[streamIdx] && alprHandles[i]) { workers[i] = std::thread(ALPRWorkerThread_FilePlayer, i, fpClients[streamIdx], alprHandles[i], std::ref(taskStates[i])); } } // --- Display loop (main thread) --- const int cellW = 640, cellH = 480; const int logPanelH = 200; const char* windowName = "ANSLPR Stress Test (FilePlayer — looping)"; cv::namedWindow(windowName, cv::WINDOW_NORMAL); cv::resizeWindow(windowName, cellW * 2, cellH * 2 + logPanelH); auto testStart = std::chrono::steady_clock::now(); auto lastGpuSnapshot = std::chrono::steady_clock::now(); int snapshotCount = 0; while (g_running.load()) { auto now2 = std::chrono::steady_clock::now(); if (std::chrono::duration(now2 - lastGpuSnapshot).count() >= 10.0) { lastGpuSnapshot = now2; snapshotCount++; double elapsedSec = std::chrono::duration(now2 - testStart).count(); g_log.add("---- PERIODIC SNAPSHOT #" + std::to_string(snapshotCount) + " (elapsed " + std::to_string((int)elapsedSec) + "s) ----"); auto gpuSnap = QueryGpuVram(); for (const auto& gs : gpuSnap) { char buf[256]; snprintf(buf, sizeof(buf), " GPU[%d] %s | Used: %zu/%zu MiB (%.1f%%)", gs.deviceId, gs.name.c_str(), gs.usedMiB, gs.totalMiB, gs.totalMiB > 0 ? 100.0 * gs.usedMiB / gs.totalMiB : 0.0); g_log.add(buf); } double totalFpsSnap = 0; for (int t = 0; t < 4; t++) { std::lock_guard lk(taskStates[t].mtx); char buf[256]; snprintf(buf, sizeof(buf), " T%d: GPU[%d] VRAM=%zuMiB FPS=%.1f GrabMs=%.0f InfMs=%.0f Frames=%d Det=%d", t, taskStates[t].gpuDeviceId, taskStates[t].vramUsedBytes / (1024 * 1024), taskStates[t].fps, taskStates[t].lastGrabMs, taskStates[t].inferenceMs, taskStates[t].frameCount, taskStates[t].detectionCount); g_log.add(buf); totalFpsSnap += taskStates[t].fps; } char buf[128]; snprintf(buf, sizeof(buf), " Total throughput: %.1f FPS", totalFpsSnap); g_log.add(buf); std::set gpusUsed; for (int t = 0; t < 4; t++) { if (taskStates[t].gpuDeviceId >= 0) gpusUsed.insert(taskStates[t].gpuDeviceId); } if (gpusUsed.size() > 1) { g_log.add(" MULTI-GPU: YES — tasks distributed across " + std::to_string(gpusUsed.size()) + " GPUs"); } else if (!gpusUsed.empty()) { g_log.add(" MULTI-GPU: NO — all tasks on GPU[" + std::to_string(*gpusUsed.begin()) + "]"); } g_log.add("---- END SNAPSHOT ----"); } // Build 2x2 grid + log panel cv::Mat canvas(cellH * 2 + logPanelH, cellW * 2, CV_8UC3, cv::Scalar(30, 30, 30)); for (int i = 0; i < 4; i++) { int row = i / 2, col = i % 2; cv::Rect roi(col * cellW, row * cellH, cellW, cellH); cv::Mat cell; double fps = 0, infMs = 0; int fCount = 0, dCount = 0; int gpuId = -1; size_t vramMiB = 0; std::string statusMsg, lastPlate; bool engineLoaded = false, streamOk = false; { std::lock_guard lk(taskStates[i].mtx); if (!taskStates[i].displayFrame.empty()) { cv::resize(taskStates[i].displayFrame, cell, cv::Size(cellW, cellH)); } fps = taskStates[i].fps; infMs = taskStates[i].inferenceMs; fCount = taskStates[i].frameCount; dCount = taskStates[i].detectionCount; statusMsg = taskStates[i].statusMsg; lastPlate = taskStates[i].lastPlate; engineLoaded = taskStates[i].engineLoaded; streamOk = taskStates[i].streamOk; gpuId = taskStates[i].gpuDeviceId; vramMiB = taskStates[i].vramUsedBytes / (1024 * 1024); } if (cell.empty()) { cell = cv::Mat(cellH, cellW, CV_8UC3, cv::Scalar(40, 40, 40)); cv::putText(cell, "Task " + std::to_string(i) + ": " + statusMsg, cv::Point(20, cellH / 2), cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(100, 100, 255), 2); } cv::rectangle(cell, cv::Rect(0, cellH - 50, cellW, 50), cv::Scalar(0, 0, 0), cv::FILLED); char bar1[256], bar2[256]; snprintf(bar1, sizeof(bar1), "T%d | %.1f FPS | %.0fms | Frames:%d | Det:%d | %s", i, fps, infMs, fCount, dCount, lastPlate.empty() ? "-" : lastPlate.c_str()); if (gpuId >= 0) { snprintf(bar2, sizeof(bar2), "GPU[%d] | VRAM: %zu MiB", gpuId, vramMiB); } else { snprintf(bar2, sizeof(bar2), "GPU: N/A"); } cv::Scalar barColor = engineLoaded ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 100, 255); cv::putText(cell, bar1, cv::Point(5, cellH - 28), cv::FONT_HERSHEY_SIMPLEX, 0.45, barColor, 1); cv::putText(cell, bar2, cv::Point(5, cellH - 8), cv::FONT_HERSHEY_SIMPLEX, 0.45, cv::Scalar(0, 200, 255), 1); cell.copyTo(canvas(roi)); cv::line(canvas, cv::Point(cellW, 0), cv::Point(cellW, cellH * 2), cv::Scalar(100, 100, 100), 1); cv::line(canvas, cv::Point(0, cellH), cv::Point(cellW * 2, cellH), cv::Scalar(100, 100, 100), 1); } // Log panel cv::Rect logRoi(0, cellH * 2, cellW * 2, logPanelH); cv::Mat logPanel = canvas(logRoi); logPanel.setTo(cv::Scalar(20, 20, 20)); auto elapsed = std::chrono::duration(std::chrono::steady_clock::now() - testStart).count(); char header[128]; snprintf(header, sizeof(header), "Elapsed: %.0fs | FilePlayer (looping, HW decode) | Press ESC to stop", elapsed); cv::putText(logPanel, header, cv::Point(10, 18), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(200, 200, 0), 1); double totalFps = 0; for (int i = 0; i < 4; i++) { std::lock_guard lk(taskStates[i].mtx); totalFps += taskStates[i].fps; } char aggLine[256]; snprintf(aggLine, sizeof(aggLine), "Total throughput: %.1f FPS | T0:GPU%d T1:GPU%d T2:GPU%d T3:GPU%d", totalFps, taskStates[0].gpuDeviceId, taskStates[1].gpuDeviceId, taskStates[2].gpuDeviceId, taskStates[3].gpuDeviceId); cv::putText(logPanel, aggLine, cv::Point(10, 38), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 255), 1); auto gpuSnaps = QueryGpuVram(); int gpuLineY = 58; for (const auto& gs : gpuSnaps) { int tasksOnGpu = 0; size_t taskVramMiB = 0; for (int i = 0; i < 4; i++) { std::lock_guard lk(taskStates[i].mtx); if (taskStates[i].gpuDeviceId == gs.deviceId) { tasksOnGpu++; taskVramMiB += taskStates[i].vramUsedBytes / (1024 * 1024); } } char gpuLine[256]; snprintf(gpuLine, sizeof(gpuLine), "GPU[%d] %s | Used: %zu/%zu MiB | Tasks: %d (engine VRAM: %zu MiB)", gs.deviceId, gs.name.c_str(), gs.usedMiB, gs.totalMiB, tasksOnGpu, taskVramMiB); cv::putText(logPanel, gpuLine, cv::Point(10, gpuLineY), cv::FONT_HERSHEY_SIMPLEX, 0.45, cv::Scalar(100, 255, 100), 1); gpuLineY += 18; } for (int i = 0; i < 4; i++) { std::lock_guard lk(taskStates[i].mtx); char tLine[256]; snprintf(tLine, sizeof(tLine), "T%d: GPU[%d] VRAM=%zuMiB FPS=%.1f Inf=%.0fms Frames=%d Det=%d", i, taskStates[i].gpuDeviceId, taskStates[i].vramUsedBytes / (1024 * 1024), taskStates[i].fps, taskStates[i].inferenceMs, taskStates[i].frameCount, taskStates[i].detectionCount); cv::putText(logPanel, tLine, cv::Point(10, gpuLineY), cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(200, 200, 200), 1); gpuLineY += 16; } auto recentLogs = g_log.getRecent(4); for (const auto& line : recentLogs) { if (gpuLineY > logPanelH - 5) break; std::string display = (line.size() > 130) ? line.substr(0, 127) + "..." : line; cv::putText(logPanel, display, cv::Point(10, gpuLineY), cv::FONT_HERSHEY_PLAIN, 1.0, cv::Scalar(140, 140, 140), 1); gpuLineY += 15; } cv::imshow(windowName, canvas); int key = cv::waitKey(30); if (key == 27) { g_log.add("ESC pressed — stopping all tasks..."); printf("\nESC pressed — stopping...\n"); g_running.store(false); } } // --- Wait for all workers --- printf("Waiting for worker threads to finish...\n"); for (int i = 0; i < 4; i++) { if (workers[i].joinable()) workers[i].join(); } // --- Final summary --- double totalElapsed = std::chrono::duration( std::chrono::steady_clock::now() - testStart).count(); g_log.add("================================================================"); g_log.add(" FINAL PERFORMANCE SUMMARY (FilePlayer — looping)"); g_log.add(" Total runtime: " + std::to_string((int)totalElapsed) + " seconds"); g_log.add("================================================================"); printf("\n============================================================\n"); printf(" FINAL PERFORMANCE SUMMARY — FilePlayer (runtime: %.0fs)\n", totalElapsed); printf("============================================================\n"); double totalFpsFinal = 0; for (int i = 0; i < 4; i++) { char buf[512]; snprintf(buf, sizeof(buf), " Task %d: GPU[%d] | VRAM=%zuMiB | %d frames, %d detections, FPS=%.1f, InfMs=%.0f", i, taskStates[i].gpuDeviceId, taskStates[i].vramUsedBytes / (1024 * 1024), taskStates[i].frameCount, taskStates[i].detectionCount, taskStates[i].fps, taskStates[i].inferenceMs); printf("%s\n", buf); g_log.add(buf); totalFpsFinal += taskStates[i].fps; } auto finalGpu = QueryGpuVram(); for (const auto& gs : finalGpu) { char buf[256]; snprintf(buf, sizeof(buf), " GPU[%d] %s: %zu/%zu MiB used (%.1f%%)", gs.deviceId, gs.name.c_str(), gs.usedMiB, gs.totalMiB, gs.totalMiB > 0 ? 100.0 * gs.usedMiB / gs.totalMiB : 0.0); printf("%s\n", buf); g_log.add(buf); } { char buf[256]; snprintf(buf, sizeof(buf), " Total throughput: %.1f FPS across 4 tasks", totalFpsFinal); printf("%s\n", buf); g_log.add(buf); } printf("============================================================\n"); g_log.add("================================================================"); // --- Release all handles --- for (int i = 0; i < 4; i++) { if (alprHandles[i]) { ReleaseANSALPRHandle(&alprHandles[i]); } } for (int s = 0; s < NUM_STREAMS; s++) { if (fpClients[s]) { StopFilePlayer(&fpClients[s]); ReleaseANSFilePlayerHandle(&fpClients[s]); } } g_log.close(); cv::destroyAllWindows(); ANSCENTER::ANSOPENCV::DeinitCameraNetwork(); return 0; } // ANSLPR_OD_CPU_VideoTest — Uses ANSALPR_OD (engineType=1) on Intel CPU/iGPU. // ANSALPR_OD auto-detects hardware (OpenVINO on Intel, DirectML on AMD, etc.) // No CUDA calls — safe on non-NVIDIA systems. int ANSLPR_OD_CPU_VideoTest() { std::cout << "\n============================================================" << std::endl; std::cout << " ANSLPR CPU/iGPU Test (ANSALPR_OD with auto-detect)" << std::endl; std::cout << "============================================================\n" << std::endl; std::string modelZipFile = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_ALPR_v1.2.zip"; std::string videoFilePath = "C:\\ProgramData\\ANSCENTER\\Shared\\classroom.mp4"; std::cout << "Model: " << modelZipFile << std::endl; std::cout << "Video: " << videoFilePath << std::endl; ANSCENTER::ANSALPR* infHandle = nullptr; int engineType = 1; // ANSALPR_OD (auto-detects HW internally) double detThresh = 0.5, ocrThresh = 0.5, colThresh = 0.5; // Step 1: Create handle std::cout << "[LPR-CPU] Step 1: Creating handle..." << std::endl; int createResult = CreateANSALPRHandle(&infHandle, "", modelZipFile.c_str(), "", engineType, detThresh, ocrThresh, colThresh); std::cout << "[LPR-CPU] CreateANSALPRHandle result: " << createResult << std::endl; if (createResult != 1 || infHandle == nullptr) { std::cerr << "[LPR-CPU] FAILED: CreateANSALPRHandle returned " << createResult << std::endl; return -1; } // Step 2: Load engine std::cout << "[LPR-CPU] Step 2: Loading engine..." << std::endl; int loadResult = LoadANSALPREngineHandle(&infHandle); std::cout << "[LPR-CPU] LoadANSALPREngineHandle result: " << loadResult << std::endl; if (loadResult != 1) { std::cerr << "[LPR-CPU] FAILED: LoadANSALPREngineHandle returned " << loadResult << std::endl; ReleaseANSALPRHandle(&infHandle); return -2; } // Step 3: Open video std::cout << "[LPR-CPU] Step 3: Opening video..." << std::endl; cv::VideoCapture capture(videoFilePath); if (!capture.isOpened()) { std::cerr << "[LPR-CPU] FAILED: Could not open video: " << videoFilePath << std::endl; ReleaseANSALPRHandle(&infHandle); return -3; } int totalFrames = static_cast(capture.get(cv::CAP_PROP_FRAME_COUNT)); std::cout << "[LPR-CPU] Video opened: " << totalFrames << " frames" << std::endl; // Step 4: Run inference std::cout << "[LPR-CPU] Step 4: Running inference..." << std::endl; boost::property_tree::ptree pt; int frameIndex = 0; int totalDetections = 0; double totalInferenceMs = 0.0; int maxFrames = 200; while (frameIndex < maxFrames) { cv::Mat frame; if (!capture.read(frame)) { std::cout << "[LPR-CPU] End of video at frame " << frameIndex << std::endl; break; } frameIndex++; unsigned int bufferLength = 0; unsigned char* jpeg_bytes = CVMatToBytes(frame, bufferLength); int height = frame.rows; int width = frame.cols; auto start = std::chrono::system_clock::now(); std::string detectionResult = ANSALPR_RunInferenceBinary(&infHandle, jpeg_bytes, width, height); auto end = std::chrono::system_clock::now(); auto elapsed = std::chrono::duration_cast(end - start); totalInferenceMs += static_cast(elapsed.count()); delete[] jpeg_bytes; if (!detectionResult.empty()) { try { pt.clear(); std::stringstream ss; ss << detectionResult; boost::property_tree::read_json(ss, pt); int detCount = 0; BOOST_FOREACH(const boost::property_tree::ptree::value_type& child, pt.get_child("results")) { const boost::property_tree::ptree& r = child.second; const auto class_name = GetData(r, "class_name"); const auto x = GetData(r, "x"); const auto y = GetData(r, "y"); const auto w = GetData(r, "width"); const auto h = GetData(r, "height"); detCount++; cv::rectangle(frame, cv::Rect(x, y, w, h), cv::Scalar(0, 255, 0), 2); cv::putText(frame, class_name, cv::Point(x, y - 5), 0, 0.6, cv::Scalar(0, 0, 255), 1, cv::LINE_AA); } totalDetections += detCount; } catch (...) {} } if (frameIndex % 10 == 0) { double avgSoFar = totalInferenceMs / frameIndex; std::cout << "[LPR-CPU] Frame " << frameIndex << "/" << maxFrames << " | Time: " << elapsed.count() << "ms" << " | Avg: " << static_cast(avgSoFar) << "ms" << " | Detections: " << totalDetections << std::endl; } cv::imshow("ANSLPR CPU Test", frame); if (cv::waitKey(1) == 27) break; } // Summary double avgMs = (frameIndex > 0) ? (totalInferenceMs / frameIndex) : 0.0; std::cout << "\n=== LPR CPU Test Summary ===" << std::endl; std::cout << "Frames processed: " << frameIndex << std::endl; std::cout << "Total detections: " << totalDetections << std::endl; std::cout << "Avg inference: " << avgMs << " ms/frame" << std::endl; std::cout << "Total time: " << totalInferenceMs << " ms" << std::endl; std::cout << (frameIndex > 0 ? "[LPR-CPU] PASSED" : "[LPR-CPU] FAILED") << std::endl; capture.release(); cv::destroyAllWindows(); ReleaseANSALPRHandle(&infHandle); return (frameIndex > 0) ? 0 : -4; } // ── ANSALPR_OCR test: Japanese license plate detection using ANSONNXOCR ── // Render UTF-8 text onto a cv::Mat using Windows GDI (supports CJK/Unicode). // cv::putText only handles ASCII — Japanese characters render as '?'. #ifdef WIN32 static void putTextUnicode(cv::Mat& img, const std::string& text, cv::Point org, double fontScale, cv::Scalar color, int thickness) { int wlen = MultiByteToWideChar(CP_UTF8, 0, text.c_str(), -1, nullptr, 0); std::wstring wtext(wlen - 1, 0); MultiByteToWideChar(CP_UTF8, 0, text.c_str(), -1, &wtext[0], wlen); HDC hdc = CreateCompatibleDC(nullptr); int fontHeight = (int)(fontScale * 30); HFONT hFont = CreateFontW(fontHeight, 0, 0, 0, (thickness > 2) ? FW_BOLD : FW_NORMAL, FALSE, FALSE, FALSE, DEFAULT_CHARSET, OUT_DEFAULT_PRECIS, CLIP_DEFAULT_PRECIS, ANTIALIASED_QUALITY, DEFAULT_PITCH | FF_SWISS, L"Yu Gothic UI"); HFONT hOldFont = (HFONT)SelectObject(hdc, hFont); SIZE sz; GetTextExtentPoint32W(hdc, wtext.c_str(), (int)wtext.size(), &sz); BITMAPINFO bmi = {}; bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER); bmi.bmiHeader.biWidth = sz.cx; bmi.bmiHeader.biHeight = -sz.cy; bmi.bmiHeader.biPlanes = 1; bmi.bmiHeader.biBitCount = 32; bmi.bmiHeader.biCompression = BI_RGB; void* bits = nullptr; HBITMAP hBmp = CreateDIBSection(hdc, &bmi, DIB_RGB_COLORS, &bits, nullptr, 0); HBITMAP hOldBmp = (HBITMAP)SelectObject(hdc, hBmp); SetBkMode(hdc, TRANSPARENT); SetTextColor(hdc, RGB((int)color[2], (int)color[1], (int)color[0])); TextOutW(hdc, 0, 0, wtext.c_str(), (int)wtext.size()); cv::Mat textImg(sz.cy, sz.cx, CV_8UC4, bits); for (int row = 0; row < sz.cy; ++row) { for (int col = 0; col < sz.cx; ++col) { cv::Vec4b px = textImg.at(row, col); if (px[0] != 0 || px[1] != 0 || px[2] != 0) { int dy = org.y + row; int dx = org.x + col; if (dy >= 0 && dy < img.rows && dx >= 0 && dx < img.cols) { img.at(dy, dx) = cv::Vec3b(px[0], px[1], px[2]); } } } } SelectObject(hdc, hOldBmp); SelectObject(hdc, hOldFont); DeleteObject(hBmp); DeleteObject(hFont); DeleteDC(hdc); } #endif int ALPR_OCR_Test() { std::cout << "=== ALPR_OCR_Test: Japanese License Plate (ANSALPR_OCR) ===" << std::endl; std::filesystem::path currentPath = std::filesystem::current_path(); std::cout << "Current working directory: " << currentPath << std::endl; ANSCENTER::ANSALPR* infHandle = nullptr; std::string licenseKey = ""; std::string modelFilePath = "C:\\Projects\\ANSVIS\\Models\\ANS_GenericALPR_v2.0.zip"; std::string imagePath = "C:\\Programs\\ModelTraining\\JLPD\\data\\test6.jpg"; int engineType = 2; // ANSALPR_OCR double detectionThreshold = 0.3; double ocrThreshold = 0.5; double colourThreshold = 0.0; // No colour detection for this test // Step 1: Create handle int createResult = CreateANSALPRHandle(&infHandle, licenseKey.c_str(), modelFilePath.c_str(), "", engineType, detectionThreshold, ocrThreshold, colourThreshold); std::cout << "CreateANSALPRHandle result: " << createResult << std::endl; if (!createResult || !infHandle) { std::cerr << "Failed to create ANSALPR_OCR handle" << std::endl; return -1; } // Step 2: Set country to Japan ANSALPR_SetCountry(&infHandle, 5); // JAPAN = 5 std::cout << "Country set to JAPAN" << std::endl; // Step 3: Load engine auto engineStart = std::chrono::high_resolution_clock::now(); int loadResult = LoadANSALPREngineHandle(&infHandle); auto engineEnd = std::chrono::high_resolution_clock::now(); double engineMs = std::chrono::duration(engineEnd - engineStart).count(); std::cout << "LoadANSALPREngineHandle result: " << loadResult << " (" << engineMs << " ms)" << std::endl; if (!loadResult) { std::cerr << "Failed to load ANSALPR_OCR engine" << std::endl; ReleaseANSALPRHandle(&infHandle); return -2; } // Step 4: Load image cv::Mat input = cv::imread(imagePath, cv::IMREAD_COLOR); if (input.empty()) { std::cerr << "Failed to load image: " << imagePath << std::endl; ReleaseANSALPRHandle(&infHandle); return -3; } std::cout << "Image loaded: " << input.cols << "x" << input.rows << std::endl; cv::Mat frame = input.clone(); int width = frame.cols; int height = frame.rows; // Convert to raw BGR bytes for RunInferenceBinary unsigned int bufferLength = static_cast(frame.total() * frame.elemSize()); unsigned char* imageBytes = new unsigned char[bufferLength]; std::memcpy(imageBytes, frame.data, bufferLength); // Step 5: Warmup run auto warmupStart = std::chrono::high_resolution_clock::now(); std::string detectionResult = ANSALPR_RunInferenceBinary(&infHandle, imageBytes, width, height); auto warmupEnd = std::chrono::high_resolution_clock::now(); double warmupMs = std::chrono::duration(warmupEnd - warmupStart).count(); std::cout << "Warmup inference: " << warmupMs << " ms" << std::endl; std::cout << "ALPR Result: " << detectionResult << std::endl; // Step 6: Benchmark const int benchmarkIterations = 10; std::vector times; times.reserve(benchmarkIterations); for (int i = 0; i < benchmarkIterations; ++i) { auto t0 = std::chrono::high_resolution_clock::now(); std::string result = ANSALPR_RunInferenceBinary(&infHandle, imageBytes, width, height); auto t1 = std::chrono::high_resolution_clock::now(); double ms = std::chrono::duration(t1 - t0).count(); times.push_back(ms); std::cout << " Run " << (i + 1) << "/" << benchmarkIterations << ": " << ms << " ms" << std::endl; } std::sort(times.begin(), times.end()); double sum = std::accumulate(times.begin(), times.end(), 0.0); double avg = sum / benchmarkIterations; double median = (benchmarkIterations % 2 == 0) ? (times[benchmarkIterations / 2 - 1] + times[benchmarkIterations / 2]) / 2.0 : times[benchmarkIterations / 2]; std::cout << "\n=== Benchmark (" << benchmarkIterations << " runs) ===" << std::endl; std::cout << " Avg: " << avg << " ms" << std::endl; std::cout << " Median: " << median << " ms" << std::endl; std::cout << " Min: " << times.front() << " ms" << std::endl; std::cout << " Max: " << times.back() << " ms" << std::endl; std::cout << " FPS: " << (1000.0 / avg) << std::endl; delete[] imageBytes; // Step 7: Draw results on image if (!detectionResult.empty()) { try { boost::property_tree::ptree pt; std::stringstream ss(detectionResult); boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type& child, pt.get_child("results")) { const boost::property_tree::ptree& res = child.second; const auto class_name_raw = GetData(res, "class_name"); const std::string class_name = DecodeUnicodeEscapes(class_name_raw); const auto x = GetData(res, "x"); const auto y = GetData(res, "y"); const auto w = GetData(res, "width"); const auto h = GetData(res, "height"); cv::rectangle(frame, cv::Rect(x, y, w, h), cv::Scalar(0, 255, 0), 2); std::string extraInfo = GetOptionalValue(res, "extra_info", ""); std::cout << " Plate: " << class_name << std::endl; if (!extraInfo.empty()) { std::cout << " extra_info: " << extraInfo << std::endl; } #ifdef WIN32 { int textH = (int)(1.5 * 30); int ty = y - 5 - textH; if (ty < 0) ty = y + 3; putTextUnicode(frame, class_name, cv::Point(x, ty), 1.5, cv::Scalar(0, 0, 255), 3); } #else cv::putText(frame, class_name, cv::Point(x, y - 5), cv::FONT_HERSHEY_SIMPLEX, 1.0, cv::Scalar(0, 0, 255), 2, cv::LINE_AA); #endif } } catch (const std::exception& e) { std::cerr << "JSON parse error: " << e.what() << std::endl; } } // Step 8: Display result cv::Mat display; double scale = std::min(1920.0 / frame.cols, 1080.0 / frame.rows); if (scale < 1.0) { cv::resize(frame, display, cv::Size(), scale, scale); } else { display = frame; } cv::namedWindow("ALPR_OCR_Test", cv::WINDOW_AUTOSIZE); cv::imshow("ALPR_OCR_Test", display); cv::waitKey(0); // Cleanup ReleaseANSALPRHandle(&infHandle); cv::destroyAllWindows(); frame.release(); input.release(); std::cout << "=== ALPR_OCR_Test complete ===" << std::endl; return 0; } int ALPR_OCR_VideoTest() { std::cout << "=== ALPR_OCR_VideoTest: ANSALPR_OCR engine on video ===" << std::endl; std::filesystem::path currentPath = std::filesystem::current_path(); std::cout << "Current working directory: " << currentPath << std::endl; ANSCENTER::ANSALPR* infHandle = nullptr; std::string licenseKey = ""; std::string modelFilePath = "C:\\Projects\\ANSVIS\\Models\\ANS_GenericALPR_v2.0.zip"; std::string videoFilePath = "E:\\Programs\\DemoAssets\\Videos\\ALRP\\PMH\\Day\\day.mp4"; int engineType = 2; // ANSALPR_OCR double detectionThreshold = 0.3; double ocrThreshold = 0.5; double colourThreshold = 0.0; // Step 1: Create handle int createResult = CreateANSALPRHandle(&infHandle, licenseKey.c_str(), modelFilePath.c_str(), "", engineType, detectionThreshold, ocrThreshold, colourThreshold); std::cout << "CreateANSALPRHandle result: " << createResult << std::endl; if (!createResult || !infHandle) { std::cerr << "Failed to create ANSALPR_OCR handle" << std::endl; return -1; } // Step 2: Set country (JAPAN = 5 — adjust to match the dataset if needed) ANSALPR_SetCountry(&infHandle, 1); std::cout << "Country set to JAPAN" << std::endl; // Step 3: Load engine auto engineStart = std::chrono::high_resolution_clock::now(); int loadResult = LoadANSALPREngineHandle(&infHandle); auto engineEnd = std::chrono::high_resolution_clock::now(); double engineMs = std::chrono::duration(engineEnd - engineStart).count(); std::cout << "LoadANSALPREngineHandle result: " << loadResult << " (" << engineMs << " ms)" << std::endl; if (!loadResult) { std::cerr << "Failed to load ANSALPR_OCR engine" << std::endl; ReleaseANSALPRHandle(&infHandle); return -2; } // Step 4: Open video cv::VideoCapture capture(videoFilePath); if (!capture.isOpened()) { std::cerr << "Could not open video file: " << videoFilePath << std::endl; ReleaseANSALPRHandle(&infHandle); return -3; } boost::property_tree::ptree pt; int frameIdx = 0; while (true) { cv::Mat frame; if (!capture.read(frame)) { std::cout << "\nEnd of video stream.\n"; break; } ++frameIdx; int width = frame.cols; int height = frame.rows; // Convert to raw BGR bytes for ANSALPR_RunInferenceBinary unsigned int bufferLength = static_cast(frame.total() * frame.elemSize()); unsigned char* imageBytes = new unsigned char[bufferLength]; std::memcpy(imageBytes, frame.data, bufferLength); auto t0 = std::chrono::high_resolution_clock::now(); std::string detectionResult = ANSALPR_RunInferenceBinary(&infHandle, imageBytes, width, height); auto t1 = std::chrono::high_resolution_clock::now(); double inferMs = std::chrono::duration(t1 - t0).count(); delete[] imageBytes; printf("Frame %d: %.2f ms (%.1f FPS)\n", frameIdx, inferMs, inferMs > 0.0 ? (1000.0 / inferMs) : 0.0); // Draw detections if (!detectionResult.empty()) { try { pt.clear(); std::stringstream ss(detectionResult); boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type& child, pt.get_child("results")) { const boost::property_tree::ptree& res = child.second; const auto class_name_raw = GetData(res, "class_name"); const std::string class_name = DecodeUnicodeEscapes(class_name_raw); const auto x = GetData(res, "x"); const auto y = GetData(res, "y"); const auto w = GetData(res, "width"); const auto h = GetData(res, "height"); cv::rectangle(frame, cv::Rect(x, y, w, h), cv::Scalar(0, 255, 0), 2); std::string extraInfo = GetOptionalValue(res, "extra_info", ""); if (!class_name.empty()) { std::cout << " Plate: " << class_name; if (!extraInfo.empty()) std::cout << " (" << extraInfo << ")"; std::cout << std::endl; } #ifdef WIN32 { int textH = (int)(1.5 * 30); int ty = y - 5 - textH; if (ty < 0) ty = y + 3; putTextUnicode(frame, class_name, cv::Point(x, ty), 1.5, cv::Scalar(0, 0, 255), 3); } #else cv::putText(frame, class_name, cv::Point(x, y - 5), cv::FONT_HERSHEY_SIMPLEX, 1.0, cv::Scalar(0, 0, 255), 2, cv::LINE_AA); #endif } } catch (const std::exception& e) { std::cerr << "JSON parse error: " << e.what() << std::endl; } } // Display (fit to 1920x1080) cv::Mat display; double scale = std::min(1920.0 / frame.cols, 1080.0 / frame.rows); if (scale < 1.0) { cv::resize(frame, display, cv::Size(), scale, scale); } else { display = frame; } cv::namedWindow("ALPR_OCR_VideoTest", cv::WINDOW_AUTOSIZE); cv::imshow("ALPR_OCR_VideoTest", display); if (cv::waitKey(1) == 27) { // ESC to exit std::cout << "ESC pressed — stopping.\n"; break; } } capture.release(); cv::destroyAllWindows(); ReleaseANSALPRHandle(&infHandle); std::cout << "=== ALPR_OCR_VideoTest complete ===" << std::endl; return 0; } int main() { #ifdef WIN32 SetConsoleOutputCP(CP_UTF8); SetConsoleCP(CP_UTF8); #endif // ANSLPR_OD_INDOInferences_FileTest(); //ANSLPR_OD_Inferences_FileTest(); //ANSLPR_OD_VideoTest(); //ANSLPR_BigSize_VideoTest(); //ANSLPR_CPU_VideoTest(); //for (int i = 0; i < 100; i++) { // ANSLPR_CPU_Inferences_FileTest(); //} //ANSLPR_SingleTask_Test(); //ANSLPR_CPU_StressTest(); //ANSLPR_MultiGPU_StressTest(); //ANSLPR_MultiGPU_StressTest_SimulatedCam(); // ANSLPR_MultiGPU_StressTest_FilePlayer(); //ANSLPR_OD_CPU_VideoTest(); //ALPR_OCR_Test(); ALPR_OCR_VideoTest(); return 0; }