diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 6c8903b..bb13a69 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -81,7 +81,9 @@ "Bash(cmake --build build --target ANSUtilities)", "Bash(ls -d /c/Projects/CLionProjects/ANSCORE/cmake-build-* /c/Projects/CLionProjects/ANSCORE/out/*)", "Bash(cmake --build /c/Projects/CLionProjects/ANSCORE/cmake-build-release --target ANSUtilities)", - "Bash(find /c/Projects/CLionProjects/ANSCORE -name *json* -o -name *Json*)" + "Bash(find /c/Projects/CLionProjects/ANSCORE -name *json* -o -name *Json*)", + "Bash(grep -n \"CreateANSALPRHandle\\\\|LoadANSALPREngineHandle\\\\|CreateANSRTSPHandle\\\\|ReleaseANSALPRHandle\\\\|ANSALPR_RunInference\" C:ProjectsCLionProjectsANSCOREsrc*)", + "Bash(find C:ProjectsCLionProjectsANSCORE -name ANSLibsLoader* -type f)" ] } } diff --git a/core/ANSLibsLoader/EPLoader.cpp b/core/ANSLibsLoader/EPLoader.cpp index 0e6cf3f..35274f6 100644 --- a/core/ANSLibsLoader/EPLoader.cpp +++ b/core/ANSLibsLoader/EPLoader.cpp @@ -179,9 +179,12 @@ namespace ANSCENTER { EngineType EPLoader::AutoDetect() { std::cout << "[EPLoader] Auto-detecting hardware..." << std::endl; + ANS_DBG("EPLoader", "AutoDetect: starting hardware detection"); ANSLicenseHelper helper; EngineType detected = helper.CheckHardwareInformation(); std::cout << "[EPLoader] Detected: " << EngineTypeName(detected) << std::endl; + ANS_DBG("EPLoader", "AutoDetect: result=%d (%s)", + static_cast(detected), EngineTypeName(detected)); return detected; } @@ -217,6 +220,9 @@ namespace ANSCENTER { s_info.fromSubdir = (ep_dir != shared_dir); s_initialized = true; + ANS_DBG("EPLoader", "Initialize: EP=%d (%s) dir=%s fromSubdir=%d", + static_cast(type), EngineTypeName(type), + ep_dir.c_str(), s_info.fromSubdir ? 1 : 0); std::cout << "[EPLoader] Ready. EP=" << EngineTypeName(type) << " dir=" << ep_dir << std::endl; return s_info; diff --git a/core/ANSLicensingSystem/ANSLicense.h b/core/ANSLicensingSystem/ANSLicense.h index b55fbf1..7497409 100644 --- a/core/ANSLicensingSystem/ANSLicense.h +++ b/core/ANSLicensingSystem/ANSLicense.h @@ -8,7 +8,7 @@ // Set to 0 for production builds to eliminate all debug output overhead. // ============================================================================ #ifndef ANSCORE_DEBUGVIEW -#define ANSCORE_DEBUGVIEW 1 // 1 = enabled (debug), 0 = disabled (production) +#define ANSCORE_DEBUGVIEW 0 // 1 = enabled (debug), 0 = disabled (production) #endif // ANS_DBG: Debug logging macro for DebugView (OutputDebugStringA on Windows). diff --git a/engines/ONNXEngine/ONNXEngine.cpp b/engines/ONNXEngine/ONNXEngine.cpp index e0b323a..5cca830 100644 --- a/engines/ONNXEngine/ONNXEngine.cpp +++ b/engines/ONNXEngine/ONNXEngine.cpp @@ -122,36 +122,65 @@ namespace ANSCENTER { // Use AppendExecutionProvider_OpenVINO_V2 instead of the generic string API, // matching the pattern used in YOLOOD/YOLO12OD/ANSONNXCL etc. // Try device configs in priority order, falling back gracefully. + // + // NPU availability is probed once per process. If AUTO:NPU,GPU fails on + // the first call, we skip it for all subsequent models to avoid repeated + // "Failed to load shared library" errors cluttering the log. + static bool s_npuProbed = false; + static bool s_npuAvailable = false; + const std::string precision = "FP16"; const std::string numberOfThreads = "4"; const std::string numberOfStreams = "4"; - std::vector> try_configs = { - { {"device_type","AUTO:NPU,GPU"}, {"precision",precision}, - {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, - {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }, - { {"device_type","GPU.0"}, {"precision",precision}, - {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, - {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }, - { {"device_type","GPU.1"}, {"precision",precision}, - {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, - {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }, - { {"device_type","AUTO:GPU,CPU"}, {"precision",precision}, - {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, - {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} } + auto makeConfig = [&](const std::string& device) { + return std::unordered_map{ + {"device_type", device}, {"precision", precision}, + {"num_of_threads", numberOfThreads}, {"num_streams", numberOfStreams}, + {"enable_opencl_throttling", "False"}, {"enable_qdq_optimizer", "True"} + }; }; + std::vector> try_configs; + + // Only try NPU if it hasn't been probed yet or was previously available + if (!s_npuProbed || s_npuAvailable) { + try_configs.push_back(makeConfig("AUTO:NPU,GPU")); + } + try_configs.push_back(makeConfig("GPU.0")); + try_configs.push_back(makeConfig("GPU.1")); + try_configs.push_back(makeConfig("AUTO:GPU,CPU")); + for (const auto& config : try_configs) { try { session_options.AppendExecutionProvider_OpenVINO_V2(config); + const auto& device = config.at("device_type"); std::cout << "[ORT] OpenVINO EP attached (" - << config.at("device_type") << ", " << precision << ")." << std::endl; + << device << ", " << precision << ")." << std::endl; + ANS_DBG("OrtHandler", "OpenVINO EP attached: %s", device.c_str()); + + // If NPU config succeeded, mark it available + if (device.find("NPU") != std::string::npos) { + s_npuProbed = true; + s_npuAvailable = true; + } return true; } catch (const Ort::Exception& e) { - std::cerr << "[ORT] OpenVINO EP failed for device " - << config.at("device_type") << ": " << e.what() << std::endl; - // try next config + const auto& device = config.at("device_type"); + + // If NPU config failed, remember so we skip it next time + if (device.find("NPU") != std::string::npos) { + if (!s_npuProbed) { + std::cout << "[ORT] NPU not available — skipping NPU configs for subsequent models." << std::endl; + ANS_DBG("OrtHandler", "NPU not available, will skip in future"); + } + s_npuProbed = true; + s_npuAvailable = false; + } else { + std::cerr << "[ORT] OpenVINO EP failed for device " + << device << ": " << e.what() << std::endl; + } } } std::cerr << "[ORT] OpenVINO EP: all device configs failed." << std::endl; @@ -164,7 +193,10 @@ namespace ANSCENTER { void BasicOrtHandler::initialize_handler() { + ANS_DBG("OrtHandler", "initialize_handler: m_engineType=%d", static_cast(m_engineType)); const auto& epInfo = EPLoader::Current(); + ANS_DBG("OrtHandler", "initialize_handler: EPLoader type=%d dir=%s", + static_cast(epInfo.type), epInfo.libraryDir.c_str()); if (Ort::Global::api_ == nullptr) Ort::InitApi(static_cast(EPLoader::GetOrtApiRaw())); @@ -172,6 +204,12 @@ namespace ANSCENTER { EngineType engine = (static_cast(m_engineType) == -1) ? epInfo.type : m_engineType; + // Persist the resolved engine type so subclasses (e.g. ONNXYOLO) + // can branch on the actual EP at inference time (IoBinding for DML). + m_engineType = engine; + ANS_DBG("OrtHandler", "initialize_handler: resolved engine=%d (from %s)", + static_cast(engine), + (static_cast(m_engineType) == -1) ? "EPLoader" : "explicit"); ort_env = new Ort::Env(ORT_LOGGING_LEVEL_ERROR, log_id); memory_info_handler = new Ort::MemoryInfo( @@ -186,7 +224,17 @@ namespace ANSCENTER { GraphOptimizationLevel::ORT_ENABLE_ALL); session_options.SetLogSeverityLevel(4); - + // DirectML REQUIRES these two settings per ORT documentation: + // - DisableMemPattern: DML manages its own memory; ORT's memory + // pattern optimization conflicts with DML's D3D12 allocator. + // - ORT_SEQUENTIAL: DML uses a single command queue and cannot + // handle parallel execution mode — doing so causes deadlocks + // when synchronizing GPU→CPU data transfers. + if (engine == EngineType::AMD_GPU) { + session_options.DisableMemPattern(); + session_options.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL); + ANS_DBG("OrtHandler", "DirectML: DisableMemPattern + ORT_SEQUENTIAL set"); + } std::vector available = Ort::GetAvailableProviders(); std::cout << "[ORT] Available providers: "; @@ -206,41 +254,55 @@ namespace ANSCENTER { { // -------------------------------------------------------- case EngineType::NVIDIA_GPU: + ANS_DBG("OrtHandler", "Trying CUDA EP..."); if (hasProvider("CUDAExecutionProvider")) epAttached = TryAppendCUDA(session_options); - if (!epAttached) + if (!epAttached) { std::cerr << "[ORT] CUDA EP unavailable — falling back to CPU." << std::endl; + ANS_DBG("OrtHandler", "CUDA EP FAILED — fallback to CPU"); + } break; // -------------------------------------------------------- case EngineType::AMD_GPU: + ANS_DBG("OrtHandler", "Trying DirectML EP..."); if (hasProvider("DmlExecutionProvider")) epAttached = TryAppendDirectML(session_options); - if (!epAttached) + if (!epAttached) { std::cerr << "[ORT] DirectML EP unavailable — falling back to CPU." << std::endl; + ANS_DBG("OrtHandler", "DirectML EP FAILED — fallback to CPU"); + } break; // -------------------------------------------------------- case EngineType::OPENVINO_GPU: + ANS_DBG("OrtHandler", "Trying OpenVINO EP..."); if (hasProvider("OpenVINOExecutionProvider")) epAttached = TryAppendOpenVINO(session_options); - if (!epAttached) + if (!epAttached) { std::cerr << "[ORT] OpenVINO EP unavailable — falling back to CPU." << std::endl; + ANS_DBG("OrtHandler", "OpenVINO EP FAILED — fallback to CPU"); + } break; // -------------------------------------------------------- case EngineType::CPU: default: std::cout << "[ORT] Using CPU EP." << std::endl; + ANS_DBG("OrtHandler", "Using CPU EP"); epAttached = true; break; } - if (!epAttached) + if (!epAttached) { std::cout << "[ORT] Running on CPU EP (fallback)." << std::endl; + ANS_DBG("OrtHandler", "EP not attached — running on CPU fallback"); + } else { + ANS_DBG("OrtHandler", "EP attached successfully"); + } // ---------------------------------------------------------------- // Create session @@ -367,15 +429,19 @@ namespace ANSCENTER { std::cout << "[ORT] Session created OK (" << label << ")." << std::endl; }; + ANS_DBG("OrtHandler", "Creating session for model: %ls", onnx_path); try { createSession(session_options, "primary EP"); + ANS_DBG("OrtHandler", "Session created OK with primary EP"); } catch (const Ort::Exception& e) { + ANS_DBG("OrtHandler", "Session FAILED with primary EP: %s", e.what()); std::cerr << "[ORT] Session creation FAILED with primary EP: " << e.what() << std::endl; // If we were using a GPU EP, fall back to CPU if (engine != EngineType::CPU && epAttached) { + ANS_DBG("OrtHandler", "Retrying with CPU fallback..."); std::cerr << "[ORT] Retrying with CPU EP (fallback)..." << std::endl; // Build fresh session options — no GPU EP, no graph opt @@ -404,6 +470,7 @@ namespace ANSCENTER { } } catch (const std::exception& e) { + ANS_DBG("OrtHandler", "Session FAILED (std::exception): %s", e.what()); std::cerr << "[ORT] Session creation FAILED (std::exception): " << e.what() << std::endl; throw; diff --git a/modules/ANSFR/dllmain.cpp b/modules/ANSFR/dllmain.cpp index 90eefd1..3a9709f 100644 --- a/modules/ANSFR/dllmain.cpp +++ b/modules/ANSFR/dllmain.cpp @@ -514,6 +514,177 @@ extern "C" ANSFR_API int InsertUser(ANSCENTER::ANSFacialRecognition** H return -1; } } + +// Helper: repair mixed-encoding LabVIEW LStrHandle to clean UTF-16LE. +// LabVIEW text controls may produce a mix of UTF-16LE pairs, embedded UTF-8 +// multi-byte sequences, and lone space bytes (0x20 without 0x00 high byte). +// This normalizes everything to proper UTF-16LE pairs. +// Input: BOM-stripped raw bytes. Output: clean UTF-16LE vector. +static std::vector RepairLabVIEWUTF16LE_Local(const unsigned char* data, int len) { + std::vector repaired; + if (!data || len <= 0) return repaired; + repaired.reserve(len + 32); + + auto emitU16 = [&](uint16_t cp) { + repaired.push_back(static_cast(cp & 0xFF)); + repaired.push_back(static_cast((cp >> 8) & 0xFF)); + }; + + for (int i = 0; i < len; ) { + unsigned char b = data[i]; + + // 1. Detect embedded UTF-8 multi-byte sequences + // 2-byte UTF-8: C2-DF followed by 80-BF + if (b >= 0xC2 && b <= 0xDF && i + 1 < len) { + unsigned char b1 = data[i + 1]; + if ((b1 & 0xC0) == 0x80) { + uint32_t cp = ((b & 0x1F) << 6) | (b1 & 0x3F); + emitU16(static_cast(cp)); + i += 2; continue; + } + } + // 3-byte UTF-8: E0-EF followed by 80-BF 80-BF + if (b >= 0xE0 && b <= 0xEF && i + 2 < len) { + unsigned char b1 = data[i + 1], b2 = data[i + 2]; + if ((b1 & 0xC0) == 0x80 && (b2 & 0xC0) == 0x80) { + uint32_t cp = ((b & 0x0F) << 12) | ((b1 & 0x3F) << 6) | (b2 & 0x3F); + if (cp >= 0x0800 && (cp < 0xD800 || cp > 0xDFFF)) { + emitU16(static_cast(cp)); + i += 3; continue; + } + } + } + // 4-byte UTF-8: F0-F4 followed by 80-BF 80-BF 80-BF + if (b >= 0xF0 && b <= 0xF4 && i + 3 < len) { + unsigned char b1 = data[i + 1], b2 = data[i + 2], b3 = data[i + 3]; + if ((b1 & 0xC0) == 0x80 && (b2 & 0xC0) == 0x80 && (b3 & 0xC0) == 0x80) { + uint32_t cp = ((b & 0x07) << 18) | ((b1 & 0x3F) << 12) + | ((b2 & 0x3F) << 6) | (b3 & 0x3F); + if (cp >= 0x10000 && cp <= 0x10FFFF) { + cp -= 0x10000; + emitU16(static_cast(0xD800 + (cp >> 10))); + emitU16(static_cast(0xDC00 + (cp & 0x3FF))); + i += 4; continue; + } + } + } + + // 2. Normal UTF-16LE pair (low byte + 0x00 high byte) + if (i + 1 < len && data[i + 1] == 0x00) { + repaired.push_back(data[i]); repaired.push_back(0x00); i += 2; + } + // 3. Lone space byte — LabVIEW dropped the 0x00 high byte + else if (b == 0x20 && (i + 1 >= len || data[i + 1] != 0x00)) { + repaired.push_back(0x20); repaired.push_back(0x00); i += 1; + } + // 4. Non-ASCII UTF-16LE pair + else if (i + 1 < len) { + repaired.push_back(data[i]); repaired.push_back(data[i + 1]); i += 2; + } + // 5. Trailing odd byte — skip + else { i++; } + } + return repaired; +} + +// Helper: convert LStrHandle (mixed UTF-8/UTF-16LE or system codepage) to UTF-8 string +static std::string LStrHandleToUTF8(LStrHandle handle) { + if (!handle) return ""; + int byteLen = (*handle)->cnt; + if (byteLen <= 0) return ""; + const unsigned char* data = reinterpret_cast((*handle)->str); + + // Check for BOM or 0x00 bytes → UTF-16LE (possibly mixed with UTF-8) + bool isUtf16le = false; + if (byteLen >= 2 && data[0] == 0xFF && data[1] == 0xFE) isUtf16le = true; + if (!isUtf16le) { + for (int i = 0; i < byteLen; i++) { + if (data[i] == 0x00) { isUtf16le = true; break; } + } + } + + if (isUtf16le) { + const unsigned char* convData = data; + int convLen = byteLen; + if (convLen >= 2 && convData[0] == 0xFF && convData[1] == 0xFE) { convData += 2; convLen -= 2; } + if (convLen <= 0) return ""; + + // Repair mixed encoding (UTF-8 islands, lone spaces) → clean UTF-16LE + auto repaired = RepairLabVIEWUTF16LE_Local(convData, convLen); + +#ifdef _WIN32 + int wideLen = static_cast(repaired.size()) / 2; + const wchar_t* wideStr = reinterpret_cast(repaired.data()); + int utf8Len = WideCharToMultiByte(CP_UTF8, 0, wideStr, wideLen, nullptr, 0, nullptr, nullptr); + if (utf8Len > 0) { + std::string utf8(utf8Len, 0); + WideCharToMultiByte(CP_UTF8, 0, wideStr, wideLen, &utf8[0], utf8Len, nullptr, nullptr); + return utf8; + } +#endif + return std::string(reinterpret_cast(repaired.data()), repaired.size()); + } else { + // No 0x00 bytes — try UTF-8 first, fall back to system codepage. + // IsValidUTF8: check if bytes form valid UTF-8 with at least one multi-byte sequence. + auto IsValidUTF8 = [](const unsigned char* d, int l) -> bool { + bool hasMulti = false; + for (int j = 0; j < l; ) { + unsigned char c = d[j]; + if (c <= 0x7F) { j++; } + else if (c >= 0xC2 && c <= 0xDF) { + if (j + 1 >= l || (d[j + 1] & 0xC0) != 0x80) return false; + hasMulti = true; j += 2; + } else if (c >= 0xE0 && c <= 0xEF) { + if (j + 2 >= l || (d[j + 1] & 0xC0) != 0x80 || (d[j + 2] & 0xC0) != 0x80) return false; + hasMulti = true; j += 3; + } else if (c >= 0xF0 && c <= 0xF4) { + if (j + 3 >= l || (d[j + 1] & 0xC0) != 0x80 || (d[j + 2] & 0xC0) != 0x80 || (d[j + 3] & 0xC0) != 0x80) return false; + hasMulti = true; j += 4; + } else { return false; } + } + return hasMulti; + }; + + if (IsValidUTF8(data, byteLen)) { + return std::string(reinterpret_cast(data), byteLen); + } +#ifdef _WIN32 + int wideLen = MultiByteToWideChar(CP_ACP, 0, reinterpret_cast(data), byteLen, nullptr, 0); + if (wideLen > 0) { + std::wstring wideStr(wideLen, 0); + MultiByteToWideChar(CP_ACP, 0, reinterpret_cast(data), byteLen, &wideStr[0], wideLen); + int utf8Len = WideCharToMultiByte(CP_UTF8, 0, wideStr.c_str(), wideLen, nullptr, 0, nullptr, nullptr); + if (utf8Len > 0) { + std::string utf8(utf8Len, 0); + WideCharToMultiByte(CP_UTF8, 0, wideStr.c_str(), wideLen, &utf8[0], utf8Len, nullptr, nullptr); + return utf8; + } + } +#endif + return std::string(reinterpret_cast(data), byteLen); + } +} + +extern "C" ANSFR_API int InsertUser_LV(ANSCENTER::ANSFacialRecognition** Handle, const char* userCode, LStrHandle userName) { + try { + if (!Handle || !*Handle || !userCode || !userName) return -1; + std::string utf8Name = LStrHandleToUTF8(userName); + if (utf8Name.empty()) return -1; + return (*Handle)->InsertUser(userCode, utf8Name); + } + catch (const std::exception& e) { return -1; } +} + +extern "C" ANSFR_API int UpdateUser_LV(ANSCENTER::ANSFacialRecognition** Handle, int userId, const char* userCode, LStrHandle userName) { + try { + if (!Handle || !*Handle || !userCode || !userName) return -1; + std::string utf8Name = LStrHandleToUTF8(userName); + if (utf8Name.empty()) return -1; + return (*Handle)->UpdateUser(userId, userCode, utf8Name); + } + catch (const std::exception& e) { return -1; } +} + extern "C" ANSFR_API int UpdateUser(ANSCENTER::ANSFacialRecognition** Handle, int userId, const char* userCode, const char* userName) { try { if (!Handle || !*Handle || !userCode || !userName) return -1; diff --git a/modules/ANSLPR/ANSLPR_OD.cpp b/modules/ANSLPR/ANSLPR_OD.cpp index 7ac2aa5..56e7685 100644 --- a/modules/ANSLPR/ANSLPR_OD.cpp +++ b/modules/ANSLPR/ANSLPR_OD.cpp @@ -963,7 +963,9 @@ namespace ANSCENTER { // Run license plate detection cv::Mat activeFrame = frame(detectedArea); + fprintf(stderr, "[ALPR] RunInference: calling lpd %dx%d cam=%s\n", activeFrame.cols, activeFrame.rows, cameraId.c_str()); std::vector lprOutput = _lpDetector->RunInference(activeFrame, cameraId); + fprintf(stderr, "[ALPR] RunInference: lpd done, %zu detections cam=%s\n", lprOutput.size(), cameraId.c_str()); for (size_t _di = 0; _di < lprOutput.size(); ++_di) { ANS_DBG("ALPR_Track", "cam=%s det[%zu] tid=%d box=(%d,%d,%d,%d) conf=%.2f", cameraId.c_str(), _di, lprOutput[_di].trackId, @@ -1005,7 +1007,9 @@ namespace ANSCENTER { cv::Mat alignedLPR = frame(lprPos);// .clone(); // OCR inference + fprintf(stderr, "[ALPR] RunInference: calling OCR on plate %dx%d cam=%s\n", alignedLPR.cols, alignedLPR.rows, cameraId.c_str()); std::string ocrText = DetectLicensePlateString(alignedLPR, cameraId); + fprintf(stderr, "[ALPR] RunInference: OCR done, text='%s' cam=%s\n", ocrText.c_str(), cameraId.c_str()); if (ocrText.empty()) { continue; diff --git a/modules/ANSODEngine/ANSONNXYOLO.cpp b/modules/ANSODEngine/ANSONNXYOLO.cpp index 7ea0731..ad739e9 100644 --- a/modules/ANSODEngine/ANSONNXYOLO.cpp +++ b/modules/ANSODEngine/ANSONNXYOLO.cpp @@ -335,7 +335,7 @@ namespace ANSCENTER { // to distinguish OBB (angle values in [-pi, pi]) from detection bool likelyOBB = false; if (extra >= 2) { - const float* rawOutput = outputTensors[0].GetTensorData(); + const float* rawOutput = outputTensors[0].GetTensorMutableData(); int numSamples = std::min(numBoxes, 100); int angleCount = 0; for (int s = 0; s < numSamples; ++s) { @@ -371,13 +371,13 @@ namespace ANSCENTER { std::vector ONNXYOLO::postprocessEndToEnd( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, - const std::vector& outputTensors, + std::vector& outputTensors, const std::vector& classNames, float confThreshold) { if (outputTensors.empty()) return {}; - const float* rawOutput = outputTensors[0].GetTensorData(); + const float* rawOutput = outputTensors[0].GetTensorMutableData(); const auto outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); if (outputShape.size() < 3) return {}; @@ -427,13 +427,13 @@ namespace ANSCENTER { std::vector ONNXYOLO::postprocessLegacy( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, - const std::vector& outputTensors, + std::vector& outputTensors, const std::vector& classNames, float confThreshold, float iouThreshold, int maxDet) { if (outputTensors.empty()) return {}; - const float* rawOutput = outputTensors[0].GetTensorData(); + const float* rawOutput = outputTensors[0].GetTensorMutableData(); const auto outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); if (outputShape.size() < 3) return {}; @@ -656,12 +656,12 @@ namespace ANSCENTER { std::vector ONNXYOLO::postprocessOBBEndToEnd( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, - const std::vector& outputTensors, + std::vector& outputTensors, const std::vector& classNames, float confThreshold) { if (outputTensors.empty()) return {}; - const float* raw = outputTensors[0].GetTensorData(); + const float* raw = outputTensors[0].GetTensorMutableData(); const auto shape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); if (shape.size() < 3) return {}; @@ -721,12 +721,12 @@ namespace ANSCENTER { std::vector ONNXYOLO::postprocessOBBLegacy( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, - const std::vector& outputTensors, + std::vector& outputTensors, const std::vector& classNames, float confThreshold, float iouThreshold, int maxDet) { if (outputTensors.empty()) return {}; - const float* rawOutput = outputTensors[0].GetTensorData(); + const float* rawOutput = outputTensors[0].GetTensorMutableData(); const auto outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); if (outputShape.size() < 3) return {}; @@ -822,13 +822,13 @@ namespace ANSCENTER { std::vector ONNXYOLO::postprocessSegEndToEnd( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, - const std::vector& outputTensors, + std::vector& outputTensors, const std::vector& classNames, float confThreshold) { if (outputTensors.size() < 2) return {}; - const float* raw = outputTensors[0].GetTensorData(); + const float* raw = outputTensors[0].GetTensorMutableData(); const auto shape0 = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); const auto protoShape = outputTensors[1].GetTensorTypeAndShapeInfo().GetShape(); if (shape0.size() < 3 || protoShape.size() < 4) return {}; @@ -884,7 +884,7 @@ namespace ANSCENTER { // Generate masks: coeffs @ protos → sigmoid → crop-in-proto → resize-to-box → threshold if (!objs.empty() && !maskCoeffs.empty()) { - const float* protoData = outputTensors[1].GetTensorData(); + const float* protoData = outputTensors[1].GetTensorMutableData(); cv::Mat protos(nm, protoH * protoW, CV_32F, const_cast(protoData)); cv::Mat matmulRes = (maskCoeffs * protos).t(); @@ -951,13 +951,13 @@ namespace ANSCENTER { std::vector ONNXYOLO::postprocessSegLegacy( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, - const std::vector& outputTensors, + std::vector& outputTensors, const std::vector& classNames, float confThreshold, float iouThreshold, int maxDet) { if (outputTensors.size() < 2) return {}; - const float* rawOutput = outputTensors[0].GetTensorData(); + const float* rawOutput = outputTensors[0].GetTensorMutableData(); const auto shape0 = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); const auto protoShape = outputTensors[1].GetTensorTypeAndShapeInfo().GetShape(); if (shape0.size() < 3 || protoShape.size() < 4) return {}; @@ -1035,7 +1035,7 @@ namespace ANSCENTER { // Generate masks if (!objs.empty() && !masks.empty()) { - const float* protoData = outputTensors[1].GetTensorData(); + const float* protoData = outputTensors[1].GetTensorMutableData(); cv::Mat protos(nm, protoH * protoW, CV_32F, const_cast(protoData)); cv::Mat matmulRes = (masks * protos).t(); @@ -1106,12 +1106,12 @@ namespace ANSCENTER { std::vector ONNXYOLO::postprocessPoseEndToEnd( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, - const std::vector& outputTensors, + std::vector& outputTensors, const std::vector& classNames, float confThreshold, int numKPS) { if (outputTensors.empty()) return {}; - const float* raw = outputTensors[0].GetTensorData(); + const float* raw = outputTensors[0].GetTensorMutableData(); const auto shape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); if (shape.size() < 3) return {}; @@ -1172,12 +1172,12 @@ namespace ANSCENTER { std::vector ONNXYOLO::postprocessPoseLegacy( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, - const std::vector& outputTensors, + std::vector& outputTensors, const std::vector& classNames, float confThreshold, float iouThreshold, int numKPS, int maxDet) { if (outputTensors.empty()) return {}; - const float* rawOutput = outputTensors[0].GetTensorData(); + const float* rawOutput = outputTensors[0].GetTensorMutableData(); const auto outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); if (outputShape.size() < 3) return {}; @@ -1273,12 +1273,12 @@ namespace ANSCENTER { // ==================================================================== std::vector ONNXYOLO::postprocessClassify( - const std::vector& outputTensors, + std::vector& outputTensors, const std::vector& classNames, const cv::Size& imageSize) { if (outputTensors.empty()) return {}; - const float* raw = outputTensors[0].GetTensorData(); + const float* raw = outputTensors[0].GetTensorMutableData(); const auto shape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); if (shape.size() < 2) return {}; @@ -1339,7 +1339,7 @@ namespace ANSCENTER { // ==================================================================== /*static*/ Ort::Value ONNXYOLO::sliceBatchOutput( - const Ort::Value& batchTensor, + Ort::Value& batchTensor, int64_t batchIndex, const std::vector& fullShape, Ort::MemoryInfo& memInfo) @@ -1349,8 +1349,8 @@ namespace ANSCENTER { for (size_t d = 1; d < fullShape.size(); ++d) elemsPerImage *= fullShape[d]; - const float* batchData = batchTensor.GetTensorData(); - float* imageData = const_cast(batchData + batchIndex * elemsPerImage); + float* batchData = batchTensor.GetTensorMutableData(); + float* imageData = batchData + batchIndex * elemsPerImage; // Shape for single image: [1, D1, D2, ...] std::vector singleShape = fullShape; @@ -1504,7 +1504,7 @@ namespace ANSCENTER { // Class count mismatch — probe last channel for OBB angles bool likelyOBB = false; if (extra >= 2) { - const float* rawOutput = perImageOutputs[0].GetTensorData(); + const float* rawOutput = perImageOutputs[0].GetTensorMutableData(); int numSamp = std::min(numBoxes, 100); int angleCount = 0; for (int s = 0; s < numSamp; ++s) { @@ -1571,6 +1571,22 @@ namespace ANSCENTER { } } + bool ANSONNXYOLO::InitOrtEngine(ANSCENTER::EngineType engineType) { + try { + if (!FileExist(_modelFilePath)) { + _logger.LogError("ANSONNXYOLO::InitOrtEngine", + "Model file does not exist: " + _modelFilePath, __FILE__, __LINE__); + return false; + } + m_ortEngine = std::make_unique(_modelFilePath, engineType); + return true; + } + catch (const std::exception& e) { + _logger.LogFatal("ANSONNXYOLO::InitOrtEngine", e.what(), __FILE__, __LINE__); + return false; + } + } + bool ANSONNXYOLO::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, @@ -1807,9 +1823,12 @@ namespace ANSCENTER { const std::string& camera_id) { try { + ANS_DBG("ONNXYOLO", "DetectObjects: cam=%s acquiring mutex...", camera_id.c_str()); std::lock_guard lock(_mutex); + ANS_DBG("ONNXYOLO", "DetectObjects: mutex acquired, cam=%s", camera_id.c_str()); if (!m_ortEngine) { _logger.LogError("ANSONNXYOLO::DetectObjects", "ORT engine is null", __FILE__, __LINE__); + ANS_DBG("ONNXYOLO", "DetectObjects: ORT engine is null!"); return {}; } @@ -1880,6 +1899,7 @@ namespace ANSCENTER { return results; } catch (const std::exception& e) { + ANS_DBG("ONNXYOLO", "DetectObjects EXCEPTION: %s cam=%s", e.what(), camera_id.c_str()); _logger.LogFatal("ANSONNXYOLO::DetectObjects", e.what(), __FILE__, __LINE__); return {}; } diff --git a/modules/ANSODEngine/ANSONNXYOLO.h b/modules/ANSODEngine/ANSONNXYOLO.h index f6316cf..755fe3e 100644 --- a/modules/ANSODEngine/ANSONNXYOLO.h +++ b/modules/ANSODEngine/ANSONNXYOLO.h @@ -83,55 +83,55 @@ namespace ANSCENTER { // ── Detection postprocess ─────────────────────────────────────── std::vector postprocessEndToEnd( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, - const std::vector& outputTensors, + std::vector& outputTensors, const std::vector& classNames, float confThreshold); std::vector postprocessLegacy( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, - const std::vector& outputTensors, + std::vector& outputTensors, const std::vector& classNames, float confThreshold, float iouThreshold, int maxDet = 300); // ── OBB postprocess ───────────────────────────────────────────── std::vector postprocessOBBEndToEnd( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, - const std::vector& outputTensors, + std::vector& outputTensors, const std::vector& classNames, float confThreshold); std::vector postprocessOBBLegacy( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, - const std::vector& outputTensors, + std::vector& outputTensors, const std::vector& classNames, float confThreshold, float iouThreshold, int maxDet = 300); // ── Segmentation postprocess ──────────────────────────────────── std::vector postprocessSegEndToEnd( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, - const std::vector& outputTensors, + std::vector& outputTensors, const std::vector& classNames, float confThreshold); std::vector postprocessSegLegacy( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, - const std::vector& outputTensors, + std::vector& outputTensors, const std::vector& classNames, float confThreshold, float iouThreshold, int maxDet = 300); // ── Pose postprocess ──────────────────────────────────────────── std::vector postprocessPoseEndToEnd( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, - const std::vector& outputTensors, + std::vector& outputTensors, const std::vector& classNames, float confThreshold, int numKPS); std::vector postprocessPoseLegacy( const cv::Size& originalImageSize, const cv::Size& resizedImageShape, - const std::vector& outputTensors, + std::vector& outputTensors, const std::vector& classNames, float confThreshold, float iouThreshold, int numKPS, int maxDet = 300); // ── Classification postprocess ────────────────────────────────── std::vector postprocessClassify( - const std::vector& outputTensors, + std::vector& outputTensors, const std::vector& classNames, const cv::Size& imageSize); @@ -154,7 +154,7 @@ namespace ANSCENTER { // ── Batch output slicing helper ──────────────────────────────── static Ort::Value sliceBatchOutput( - const Ort::Value& batchTensor, + Ort::Value& batchTensor, int64_t batchIndex, const std::vector& fullShape, Ort::MemoryInfo& memInfo); @@ -224,6 +224,9 @@ namespace ANSCENTER { // Initialise ORT engine from the resolved model path bool InitOrtEngine(); + public: + // Initialise ORT engine with explicit engine type override (e.g. CPU fallback for AMD iGPUs) + bool InitOrtEngine(ANSCENTER::EngineType engineType); }; } #endif diff --git a/modules/ANSODEngine/ANSYOLOOD.cpp b/modules/ANSODEngine/ANSYOLOOD.cpp index bbd2768..86ba3cd 100644 --- a/modules/ANSODEngine/ANSYOLOOD.cpp +++ b/modules/ANSODEngine/ANSYOLOOD.cpp @@ -218,6 +218,12 @@ namespace ANSCENTER std::min(6, static_cast(std::thread::hardware_concurrency()))); sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); + // DirectML REQUIRES these two settings per ORT documentation + if (ep.type == ANSCENTER::EngineType::AMD_GPU) { + sessionOptions.DisableMemPattern(); + sessionOptions.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL); + } + // ── Log available providers ───────────────────────────────────────── std::vector availableProviders = Ort::GetAvailableProviders(); std::cout << "Available Execution Providers:" << std::endl; @@ -519,7 +525,7 @@ namespace ANSCENTER { try { // Get raw output pointer (NO COPY!) - const float* rawOutput = outputTensors[0].GetTensorData(); + const float* rawOutput = outputTensors[0].GetTensorMutableData(); std::vector outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); const int numClasses = static_cast(outputShape[2]) - 5; @@ -647,11 +653,11 @@ namespace ANSCENTER } return result; } - std::vector YOLOOD::postprocessv11(const cv::Size& originalImageSize,const cv::Size& resizedImageShape,const std::vector& outputTensors,float confThreshold,float iouThreshold) + std::vector YOLOOD::postprocessv11(const cv::Size& originalImageSize,const cv::Size& resizedImageShape,std::vector& outputTensors,float confThreshold,float iouThreshold) { try { // Get raw output - const float* rawOutput = outputTensors[0].GetTensorData(); + const float* rawOutput = outputTensors[0].GetTensorMutableData(); const std::vector outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); const size_t numFeatures = outputShape[1]; @@ -1448,7 +1454,7 @@ namespace ANSCENTER ); // Parse output - const float* rawOutput = outputTensors[0].GetTensorData(); + const float* rawOutput = outputTensors[0].GetTensorMutableData(); const std::vector outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); const int dimensions = static_cast(outputShape[1]); // 4 + num_classes diff --git a/modules/ANSODEngine/ANSYOLOOD.h b/modules/ANSODEngine/ANSYOLOOD.h index 544de08..8bb3002 100644 --- a/modules/ANSODEngine/ANSYOLOOD.h +++ b/modules/ANSODEngine/ANSYOLOOD.h @@ -44,7 +44,7 @@ namespace ANSCENTER { cv::Mat preprocessv11(const cv::Mat& image, std::vector& blob, std::vector& inputTensorShape); std::vector postprocessing(const cv::Size& resizedImageShape,const cv::Size& originalImageShape,std::vector& outputTensors, const float& confThreshold, const float& iouThreshold); - std::vector postprocessv11(const cv::Size& originalImageSize,const cv::Size& resizedImageShape,const std::vector& outputTensors,float confThreshold,float iouThreshold); + std::vector postprocessv11(const cv::Size& originalImageSize,const cv::Size& resizedImageShape,std::vector& outputTensors,float confThreshold,float iouThreshold); BoundingBox scaleCoordsv11(const cv::Size& imageShape, BoundingBox coords,const cv::Size& imageOriginalShape, bool p_Clip); std::vector inputNodeNames; std::vector outputNodeNames; diff --git a/modules/ANSODEngine/dllmain.cpp b/modules/ANSODEngine/dllmain.cpp index 768be6c..e8a3415 100644 --- a/modules/ANSODEngine/dllmain.cpp +++ b/modules/ANSODEngine/dllmain.cpp @@ -355,6 +355,7 @@ extern "C" ANSODENGINE_API std::string CreateANSODHandle(ANSCENTER::ANSODBase** // TEXTSCENSE = 6 //Force modelType to ANSONNXYOLO and ANSRTYOLO if detectionType is detection and modelType is TENSORRT or ONNX + if ((modelType == 4) || // TensorRT (modelType == 14)|| // TensorRT Yolov10 (modelType == 22)|| // TensorRT Pose @@ -376,7 +377,6 @@ extern "C" ANSODENGINE_API std::string CreateANSODHandle(ANSCENTER::ANSODBase** } - switch (detectionType) { case 0: modelConfig.detectionType = ANSCENTER::DetectionType::CLASSIFICATION; diff --git a/modules/ANSUtilities/dllmain.cpp b/modules/ANSUtilities/dllmain.cpp index b68bd6c..e94454a 100644 --- a/modules/ANSUtilities/dllmain.cpp +++ b/modules/ANSUtilities/dllmain.cpp @@ -804,34 +804,54 @@ extern "C" ANSULT_API int ANSConvertUTF8ToUTF16LE(const char* utf8Str, LStrHandl int len = (int)strlen(utf8Str); if (len == 0) return 0; const char bom[2] = { '\xFF', '\xFE' }; + + // Check if input contains \uXXXX escape sequences bool hasUnicodeEscapes = false; for (int i = 0; i + 1 < len; i++) { if (utf8Str[i] == '\\' && utf8Str[i + 1] == 'u') { hasUnicodeEscapes = true; break; } } + if (hasUnicodeEscapes) { - std::string utf16le; - if (includeBOM) utf16le.assign(bom, 2); - utf16le.reserve(len * 2 + 2); + // Two-pass approach: first decode \uXXXX escapes to UTF-8, then convert to UTF-16LE. + // This correctly handles mixed input (raw UTF-8 + \uXXXX escapes) by producing + // clean UTF-8 first, then using MultiByteToWideChar for proper UTF-16LE conversion. + std::string utf8Decoded; + utf8Decoded.reserve(len); for (int i = 0; i < len; ) { if (i + 5 < len && utf8Str[i] == '\\' && utf8Str[i + 1] == 'u') { char hex[5] = { utf8Str[i + 2], utf8Str[i + 3], utf8Str[i + 4], utf8Str[i + 5], 0 }; - uint16_t cp = (uint16_t)strtoul(hex, nullptr, 16); - utf16le += static_cast(cp & 0xFF); - utf16le += static_cast((cp >> 8) & 0xFF); + uint32_t cp = (uint32_t)strtoul(hex, nullptr, 16); + // Encode codepoint as UTF-8 + if (cp <= 0x7F) { + utf8Decoded += static_cast(cp); + } else if (cp <= 0x7FF) { + utf8Decoded += static_cast(0xC0 | (cp >> 6)); + utf8Decoded += static_cast(0x80 | (cp & 0x3F)); + } else { + utf8Decoded += static_cast(0xE0 | (cp >> 12)); + utf8Decoded += static_cast(0x80 | ((cp >> 6) & 0x3F)); + utf8Decoded += static_cast(0x80 | (cp & 0x3F)); + } i += 6; } else { - utf16le += utf8Str[i]; - utf16le += '\0'; + utf8Decoded += utf8Str[i]; i++; } } - int size = (int)utf16le.size(); - MgErr error = DSSetHandleSize(result, sizeof(int32) + size * sizeof(uChar)); + // Now convert the clean UTF-8 to UTF-16LE + std::string converted = ANSCENTER::ANSUtilities::ConvertUTF8ToUTF16LE(utf8Decoded); + if (converted.empty()) return 0; + int dataSize = static_cast(converted.size()); + int bomSize = includeBOM ? 2 : 0; + int totalSize = bomSize + dataSize; + MgErr error = DSSetHandleSize(result, sizeof(int32) + totalSize * sizeof(uChar)); if (error != noErr) return -2; - (*result)->cnt = size; - memcpy((*result)->str, utf16le.data(), size); + (*result)->cnt = totalSize; + if (includeBOM) memcpy((*result)->str, bom, 2); + memcpy((*result)->str + bomSize, converted.data(), dataSize); return 1; } + std::string converted = ANSCENTER::ANSUtilities::ConvertUTF8ToUTF16LE(utf8Str); if (converted.empty()) return 0; int dataSize = static_cast(converted.size()); @@ -850,23 +870,31 @@ extern "C" ANSULT_API int ANSConvertUTF8ToUTF16LE(const char* utf8Str, LStrHandl extern "C" ANSULT_API int ANSConvertUTF16LEToUTF8(const unsigned char* utf16leBytes, int byteLen, LStrHandle result) { try { if (!utf16leBytes || byteLen <= 0 || !result) return -1; - bool isUtf16le = (byteLen >= 2 && byteLen % 2 == 0); + const unsigned char* data = utf16leBytes; + int dataLen = byteLen; + // Strip BOM (FF FE) if present + if (dataLen >= 2 && data[0] == 0xFF && data[1] == 0xFE) { + data += 2; + dataLen -= 2; + } + if (dataLen <= 0) return 0; + bool isUtf16le = (dataLen >= 2 && dataLen % 2 == 0); if (isUtf16le) { bool isAscii = true; - for (int i = 1; i < byteLen; i += 2) { - if (utf16leBytes[i] != 0x00) { isAscii = false; break; } + for (int i = 1; i < dataLen; i += 2) { + if (data[i] != 0x00) { isAscii = false; break; } } if (isAscii) { - int asciiLen = byteLen / 2; + int asciiLen = dataLen / 2; MgErr error = DSSetHandleSize(result, sizeof(int32) + asciiLen * sizeof(uChar)); if (error != noErr) return -2; (*result)->cnt = asciiLen; - for (int i = 0; i < asciiLen; i++) (*result)->str[i] = utf16leBytes[i * 2]; + for (int i = 0; i < asciiLen; i++) (*result)->str[i] = data[i * 2]; return 1; } } std::string converted = ANSCENTER::ANSUtilities::ConvertUTF16LEToUTF8( - reinterpret_cast(utf16leBytes), byteLen); + reinterpret_cast(data), dataLen); if (converted.empty()) return 0; int size = static_cast(converted.size()); MgErr error = DSSetHandleSize(result, sizeof(int32) + size * sizeof(uChar)); @@ -909,6 +937,168 @@ extern "C" ANSULT_API int ANSConvertUTF16LEToUnicodeEscapes(const unsigned char* catch (...) { return -1; } } +// Helper: copy a std::string into a LabVIEW LStrHandle. +static int CopyStringToLStrHandle(LStrHandle handle, const std::string& str) { + if (str.empty()) return 0; + int size = static_cast(str.size()); + MgErr error = DSSetHandleSize(handle, sizeof(int32) + size * sizeof(uChar)); + if (error != noErr) return -2; + (*handle)->cnt = size; + memcpy((*handle)->str, str.data(), size); + return 1; +} + +// Helper: copy raw bytes into a LabVIEW LStrHandle. +static int CopyBytesToLStrHandle(LStrHandle handle, const unsigned char* data, int len) { + if (!data || len <= 0) return 0; + MgErr error = DSSetHandleSize(handle, sizeof(int32) + len * sizeof(uChar)); + if (error != noErr) return -2; + (*handle)->cnt = len; + memcpy((*handle)->str, data, len); + return 1; +} + +// Helper: detect if LabVIEW LStrHandle contains UTF-16LE (BOM or 0x00 bytes). +static bool DetectUTF16LE(const unsigned char* data, int byteLen) { + if (byteLen >= 2 && data[0] == 0xFF && data[1] == 0xFE) return true; + for (int i = 0; i < byteLen; i++) { + if (data[i] == 0x00) return true; + } + return false; +} + +// Helper: strip BOM from UTF-16LE data. Returns pointer and adjusts length. +static const unsigned char* StripBOM(const unsigned char* data, int& len) { + if (len >= 2 && data[0] == 0xFF && data[1] == 0xFE) { data += 2; len -= 2; } + return data; +} + +// LStrHandle-safe version: reads raw bytes from LabVIEW LStrHandle directly. +// Two paths: +// 1. Pure UTF-8 (no BOM, no 0x00 bytes, valid UTF-8) → pass through to output as-is +// 2. Contains UTF-16LE (BOM or 0x00 bytes) → RepairLabVIEWUTF16LE (normalizes +// mixed UTF-8/UTF-16LE + lone spaces to clean UTF-16LE) → convert to UTF-8 +extern "C" ANSULT_API int ANSConvertUTF16LEToUTF8_LV(LStrHandle input, LStrHandle result) { + try { + if (!input || !result) return -1; + int byteLen = (*input)->cnt; + if (byteLen <= 0) return 0; + + // Copy input data first — input and result may be the same LStrHandle + std::vector inputCopy(byteLen); + memcpy(inputCopy.data(), (*input)->str, byteLen); + const unsigned char* data = inputCopy.data(); + + if (DetectUTF16LE(data, byteLen)) { + // Path 2: UTF-16LE detected — repair mixed encoding, then convert to UTF-8 + int convLen = byteLen; + const unsigned char* convData = StripBOM(data, convLen); + if (convLen <= 0) return 0; + + auto repaired = ANSCENTER::ANSUtilities::RepairLabVIEWUTF16LE(convData, convLen); + std::string converted = ANSCENTER::ANSUtilities::ConvertUTF16LEToUTF8( + reinterpret_cast(repaired.data()), static_cast(repaired.size())); + return CopyStringToLStrHandle(result, converted); + } + + if (ANSCENTER::ANSUtilities::IsValidUTF8(data, byteLen)) { + // Path 1: Pure UTF-8 — pass through as-is + return CopyBytesToLStrHandle(result, data, byteLen); + } + + // Fallback: not UTF-16LE, not valid UTF-8 — assume system codepage +#ifdef _WIN32 + int wideLen = MultiByteToWideChar(CP_ACP, 0, + reinterpret_cast(data), byteLen, nullptr, 0); + if (wideLen > 0) { + std::wstring wideStr(wideLen, 0); + MultiByteToWideChar(CP_ACP, 0, + reinterpret_cast(data), byteLen, &wideStr[0], wideLen); + int utf8Len = WideCharToMultiByte(CP_UTF8, 0, + wideStr.c_str(), wideLen, nullptr, 0, nullptr, nullptr); + if (utf8Len > 0) { + std::string utf8Str(utf8Len, 0); + WideCharToMultiByte(CP_UTF8, 0, + wideStr.c_str(), wideLen, &utf8Str[0], utf8Len, nullptr, nullptr); + return CopyStringToLStrHandle(result, utf8Str); + } + } +#endif + return CopyBytesToLStrHandle(result, data, byteLen); + } + catch (...) { return -1; } +} + +// LStrHandle-safe version with auto-detection. +// Two paths: +// 1. Pure UTF-8 → convert UTF-8 to Unicode escapes (\uXXXX) +// 2. Contains UTF-16LE → RepairLabVIEWUTF16LE → convert to Unicode escapes +extern "C" ANSULT_API int ANSConvertUTF16LEToUnicodeEscapes_LV(LStrHandle input, LStrHandle result) { + try { + if (!input || !result) return -1; + int byteLen = (*input)->cnt; + if (byteLen <= 0) return 0; + + // Copy input data first — input and result may be the same LStrHandle + std::vector inputCopy(byteLen); + memcpy(inputCopy.data(), (*input)->str, byteLen); + const unsigned char* data = inputCopy.data(); + + std::string escaped; + + if (DetectUTF16LE(data, byteLen)) { + // Path 2: UTF-16LE detected — repair mixed encoding, then convert to escapes + int convLen = byteLen; + const unsigned char* convData = StripBOM(data, convLen); + if (convLen <= 0) return 0; + + auto repaired = ANSCENTER::ANSUtilities::RepairLabVIEWUTF16LE(convData, convLen); + + // Re-add BOM for ConvertUTF16LEToUnicodeEscapes (it expects optional BOM) + std::vector withBom; + withBom.reserve(2 + repaired.size()); + withBom.push_back(0xFF); + withBom.push_back(0xFE); + withBom.insert(withBom.end(), repaired.begin(), repaired.end()); + + escaped = ANSCENTER::ANSUtilities::ConvertUTF16LEToUnicodeEscapes( + reinterpret_cast(withBom.data()), static_cast(withBom.size())); + } + else { + // Path 1: No UTF-16LE — get UTF-8, then convert to Unicode escapes + std::string utf8Str; + if (ANSCENTER::ANSUtilities::IsValidUTF8(data, byteLen)) { + utf8Str.assign(reinterpret_cast(data), byteLen); + } +#ifdef _WIN32 + else { + int wideLen = MultiByteToWideChar(CP_ACP, 0, + reinterpret_cast(data), byteLen, nullptr, 0); + if (wideLen > 0) { + std::wstring wideStr(wideLen, 0); + MultiByteToWideChar(CP_ACP, 0, + reinterpret_cast(data), byteLen, &wideStr[0], wideLen); + int utf8Len = WideCharToMultiByte(CP_UTF8, 0, + wideStr.c_str(), wideLen, nullptr, 0, nullptr, nullptr); + if (utf8Len > 0) { + utf8Str.resize(utf8Len); + WideCharToMultiByte(CP_UTF8, 0, + wideStr.c_str(), wideLen, &utf8Str[0], utf8Len, nullptr, nullptr); + } + } + } +#endif + if (utf8Str.empty()) { + utf8Str.assign(reinterpret_cast(data), byteLen); + } + escaped = ANSCENTER::ANSUtilities::ConvertUTF8ToUnicodeEscapes(utf8Str); + } + + return CopyStringToLStrHandle(result, escaped); + } + catch (...) { return -1; } +} + extern "C" ANSULT_API int ANSConvertUnicodeEscapesToUTF8(const char* escapedStr, LStrHandle result) { try { if (!escapedStr || !result) return -1; diff --git a/tests/ANSLPR-UnitTest/ANSLPR-UnitTest.cpp b/tests/ANSLPR-UnitTest/ANSLPR-UnitTest.cpp index 2c86de3..b56927b 100644 --- a/tests/ANSLPR-UnitTest/ANSLPR-UnitTest.cpp +++ b/tests/ANSLPR-UnitTest/ANSLPR-UnitTest.cpp @@ -29,6 +29,7 @@ #include #include #include +#include "EPLoader.h" template T GetOptionalValue(const boost::property_tree::ptree& pt, std::string attribute, T defaultValue) { @@ -664,9 +665,21 @@ struct GpuSnapshot { size_t usedMiB = 0; }; +// Safe check: is CUDA runtime available? (prevents crash on CPU-only PCs) +static bool IsCudaAvailable() { + static int cached = -1; + if (cached < 0) { + HMODULE h = LoadLibraryA("nvcuda.dll"); + cached = (h != nullptr) ? 1 : 0; + if (h) FreeLibrary(h); + } + return cached == 1; +} + // Query current GPU VRAM usage for all devices static std::vector QueryGpuVram() { std::vector snapshots; + if (!IsCudaAvailable()) return snapshots; int deviceCount = 0; if (cudaGetDeviceCount(&deviceCount) != cudaSuccess) return snapshots; for (int i = 0; i < deviceCount; i++) { @@ -693,6 +706,7 @@ static std::vector QueryGpuVram() { // Measure per-GPU free VRAM (returns array indexed by device) static std::vector GetPerGpuFreeMiB() { std::vector result; + if (!IsCudaAvailable()) return result; int deviceCount = 0; if (cudaGetDeviceCount(&deviceCount) != cudaSuccess) return result; int prevDevice; @@ -712,6 +726,11 @@ static ThreadSafeLog g_log; // Log GPU info using CUDA runtime static void LogGpuInfo() { + if (!IsCudaAvailable()) { + g_log.add("No NVIDIA GPU detected — running in CPU mode"); + printf("[GPU] No NVIDIA GPU detected — running in CPU mode\n"); + return; + } int deviceCount = 0; cudaError_t err = cudaGetDeviceCount(&deviceCount); if (err != cudaSuccess) { @@ -749,6 +768,12 @@ static void LogGpuInfo() { printf("============================================================\n"); } +// Global inference mutex: serializes inference on non-NVIDIA GPUs (DirectML/OpenVINO). +// DirectML is not thread-safe when multiple ORT sessions run concurrently on the +// same integrated GPU — causes access violations on 4K frames. +// On NVIDIA, each task has its own CUDA context so no serialization needed. +static std::mutex g_inferenceMutex; + // Worker thread: reads RTSP frames and runs ALPR inference // RTSP client and ALPR engine are pre-created on the main thread to avoid // race conditions in CreateANSRTSPHandle / CreateANSALPRHandle. @@ -845,12 +870,18 @@ static void ALPRWorkerThread(int taskId, if (grabMs > maxGrabMs) maxGrabMs = grabMs; // Run ALPR inference + bool isNvidia = (ANSCENTER::EPLoader::Current().type == ANSCENTER::EngineType::NVIDIA_GPU); + fprintf(stderr, "[Worker T%d] frame %d: calling inference %dx%d...\n", + taskId, state.frameCount + 1, framePtr->cols, framePtr->rows); auto infStart = std::chrono::steady_clock::now(); std::string lpnResult, jpegImage; - // Pass framePtr directly — NOT a copy. ANSGpuFrameRegistry::lookup() - // matches by cv::Mat* pointer, so `new cv::Mat(*framePtr)` would create - // a different pointer the registry doesn't know, breaking NV12 zero-copy. - ANSALPR_RunInferenceComplete_CPP(&alprHandle, &framePtr, cameraId.c_str(), 0, 0, lpnResult, jpegImage); + { + std::unique_lock infLock(g_inferenceMutex, std::defer_lock); + if (!isNvidia) infLock.lock(); + ANSALPR_RunInferenceComplete_CPP(&alprHandle, &framePtr, cameraId.c_str(), 0, 0, lpnResult, jpegImage); + } + fprintf(stderr, "[Worker T%d] frame %d: inference done, result len=%zu\n", + taskId, state.frameCount + 1, lpnResult.size()); // Release stream lock — inference is done, CHAOS can now safely destroy. streamLock.unlock(); @@ -950,25 +981,454 @@ static void ALPRWorkerThread(int taskId, g_log.add(prefix + " Worker loop exited"); } +// ============================================================================= +// ANSLPR_SingleTask_Test — 1 stream, 1 AI task. For isolating DirectML/ORT +// issues on non-NVIDIA GPUs. If this works but 2-task crashes, it's concurrency. +// ============================================================================= +int ANSLPR_SingleTask_Test() { + ANSCENTER::ANSOPENCV::InitCameraNetwork(); + g_log.init(); + + printf("\n"); + printf("============================================================\n"); + printf(" ANSLPR Single-Task Test — 1 Stream, 1 AI Task\n"); + printf(" Press ESC to stop\n"); + printf(" Log file: %s\n", LOG_FILE_PATH); + printf("============================================================\n\n"); + + g_log.add("============================================================"); + g_log.add(" ANSLPR Single-Task Test — 1 Stream, 1 AI Task"); + g_log.add("============================================================"); + + const std::string streamUrl = "rtsp://admin:admin123@103.156.0.133:8010/cam/realmonitor?channel=1&subtype=0"; + g_log.add("Stream: " + streamUrl); + + // --- Create RTSP client --- + ANSCENTER::ANSRTSPClient* rtspClient = nullptr; + printf("[Stream0] Creating RTSP handle...\n"); + int rtspResult = CreateANSRTSPHandle(&rtspClient, "", "", "", streamUrl.c_str()); + if (rtspResult != 1 || rtspClient == nullptr) { + printf("[Stream0] FAILED to create RTSP handle\n"); + ANSCENTER::ANSOPENCV::DeinitCameraNetwork(); + return -1; + } + SetRTSPImageQuality(&rtspClient, 0); + SetRTSPHWDecoding(&rtspClient, -1); // Force software decoding + StartRTSP(&rtspClient); + g_log.add("[Stream0] RTSP started (software decode)"); + + // --- Create single ALPR engine --- + ANSCENTER::ANSALPR* alprHandle = nullptr; + std::string modelZipFile = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_ALPR_v1.2.zip"; + printf("[Task0] Creating ALPR handle...\n"); + auto engineStart = std::chrono::steady_clock::now(); + int createResult = CreateANSALPRHandle(&alprHandle, "", modelZipFile.c_str(), "", + 1, 0.5, 0.5, 0.5); + if (createResult != 1 || alprHandle == nullptr) { + printf("[Task0] FAILED to create ALPR handle (result=%d)\n", createResult); + StopRTSP(&rtspClient); ReleaseANSRTSPHandle(&rtspClient); + ANSCENTER::ANSOPENCV::DeinitCameraNetwork(); + return -1; + } + + printf("[Task0] Loading ALPR engine...\n"); + int loadResult = LoadANSALPREngineHandle(&alprHandle); + auto engineEnd = std::chrono::steady_clock::now(); + double loadMs = std::chrono::duration(engineEnd - engineStart).count(); + if (loadResult != 1) { + printf("[Task0] FAILED to load ALPR engine (result=%d)\n", loadResult); + ReleaseANSALPRHandle(&alprHandle); + StopRTSP(&rtspClient); ReleaseANSRTSPHandle(&rtspClient); + ANSCENTER::ANSOPENCV::DeinitCameraNetwork(); + return -1; + } + printf("[Task0] Engine loaded in %.0f ms\n", loadMs); + g_log.add("[Task0] Engine loaded in " + std::to_string((int)loadMs) + " ms"); + + // --- Single-task worker + display --- + TaskState state; + state.engineLoaded = true; + state.streamOk = true; + state.statusMsg = "Running"; + + std::mutex streamGuard; + std::thread worker(ALPRWorkerThread, 0, &rtspClient, &streamGuard, alprHandle, std::ref(state)); + + const int cellW = 800, cellH = 600; + const int logPanelH = 80; + std::string windowTitle = "ANSLPR Single-Task Test"; + cv::namedWindow(windowTitle, cv::WINDOW_NORMAL); + cv::resizeWindow(windowTitle, cellW, cellH + logPanelH); + + auto testStart = std::chrono::steady_clock::now(); + + while (g_running.load()) { + cv::Mat canvas(cellH + logPanelH, cellW, CV_8UC3, cv::Scalar(30, 30, 30)); + + cv::Mat cell; + double fps = 0, infMs = 0; + int fCount = 0, dCount = 0; + std::string lastPlate; + { + std::lock_guard lk(state.mtx); + if (!state.displayFrame.empty()) + cv::resize(state.displayFrame, cell, cv::Size(cellW, cellH)); + fps = state.fps; + infMs = state.inferenceMs; + fCount = state.frameCount; + dCount = state.detectionCount; + lastPlate = state.lastPlate; + } + + if (cell.empty()) + cell = cv::Mat(cellH, cellW, CV_8UC3, cv::Scalar(40, 40, 40)); + + cv::rectangle(cell, cv::Rect(0, cellH - 40, cellW, 40), cv::Scalar(0, 0, 0), cv::FILLED); + char bar[256]; + snprintf(bar, sizeof(bar), "T0 | %.1f FPS | %.0fms | F:%d | D:%d | %s", + fps, infMs, fCount, dCount, lastPlate.empty() ? "-" : lastPlate.c_str()); + cv::putText(cell, bar, cv::Point(5, cellH - 12), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 1); + cell.copyTo(canvas(cv::Rect(0, 0, cellW, cellH))); + + cv::Mat logPanel = canvas(cv::Rect(0, cellH, cellW, logPanelH)); + logPanel.setTo(cv::Scalar(20, 20, 20)); + auto elapsed = std::chrono::duration(std::chrono::steady_clock::now() - testStart).count(); + char header[256]; + snprintf(header, sizeof(header), "Elapsed: %.0fs | 1 camera, 1 AI task | %.1f FPS | Press ESC to stop", + elapsed, fps); + cv::putText(logPanel, header, cv::Point(10, 20), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(200, 200, 0), 1); + + cv::imshow(windowTitle, canvas); + if (cv::waitKey(30) == 27) { + g_log.add("ESC pressed — stopping..."); + printf("\nESC pressed — stopping...\n"); + g_running.store(false); + } + } + + if (worker.joinable()) worker.join(); + + printf("\n============================================================\n"); + printf(" FINAL SUMMARY\n"); + printf(" Frames: %d | Detections: %d | FPS: %.1f | InfMs: %.0f\n", + state.frameCount, state.detectionCount, state.fps, state.inferenceMs); + printf("============================================================\n"); + + ReleaseANSALPRHandle(&alprHandle); + StopRTSP(&rtspClient); + ReleaseANSRTSPHandle(&rtspClient); + g_log.close(); + cv::destroyAllWindows(); + ANSCENTER::ANSOPENCV::DeinitCameraNetwork(); + return 0; +} + +// ============================================================================= +// ANSLPR_CPU_StressTest — Lightweight 2-task stress test for CPU-only PCs +// Uses ANSALPR_OD (engineType=1) which auto-falls-back to ONNX Runtime on CPU. +// No VRAM tracking, no NVDEC alignment, no chaos thread. +// ============================================================================= +int ANSLPR_CPU_StressTest() { + ANSCENTER::ANSOPENCV::InitCameraNetwork(); + g_log.init(); + + const int NUM_STREAMS = 2; + const int NUM_TASKS = 2; + + printf("\n"); + printf("============================================================\n"); + printf(" ANSLPR CPU Stress Test — %d Parallel ALPR Tasks\n", NUM_TASKS); + printf(" Press ESC to stop\n"); + printf(" Log file: %s\n", LOG_FILE_PATH); + printf("============================================================\n\n"); + + g_log.add("============================================================"); + g_log.add(" ANSLPR CPU Stress Test — " + std::to_string(NUM_TASKS) + " Tasks"); + g_log.add("============================================================"); + + // --- RTSP URLs (2 camera streams) --- + const std::string streamUrls[NUM_STREAMS] = { + "rtsp://admin:admin123@103.156.0.133:8010/cam/realmonitor?channel=1&subtype=0", + "rtsp://nhathuocngoclinh.zapto.org:600/rtsp/streaming?channel=01&subtype=0" + }; + const int taskStreamMap[NUM_TASKS] = { 0, 1 }; + + for (int i = 0; i < NUM_STREAMS; i++) + g_log.add("Stream " + std::to_string(i) + ": " + streamUrls[i]); + + // --- Task states --- + TaskState taskStates[NUM_TASKS]; + + // --- Create RTSP clients (software decoding) --- + ANSCENTER::ANSRTSPClient* rtspClients[NUM_STREAMS] = {}; + for (int s = 0; s < NUM_STREAMS; s++) { + printf("[Stream%d] Creating RTSP handle...\n", s); + int result = CreateANSRTSPHandle(&rtspClients[s], "", "", "", streamUrls[s].c_str()); + if (result != 1 || rtspClients[s] == nullptr) { + printf("[Stream%d] FAILED to create RTSP handle\n", s); + g_log.add("[Stream" + std::to_string(s) + "] RTSP create FAILED"); + rtspClients[s] = nullptr; + continue; + } + SetRTSPImageQuality(&rtspClients[s], 0); + SetRTSPHWDecoding(&rtspClients[s], -1); // HW_DECODING_DISABLE: force software decoding + StartRTSP(&rtspClients[s]); + g_log.add("[Stream" + std::to_string(s) + "] RTSP started (software decode)"); + } + + // --- Create ALPR engines (engineType=1 → ANSALPR_OD, auto CPU/GPU) --- + ANSCENTER::ANSALPR* alprHandles[NUM_TASKS] = {}; + std::string modelZipFile = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_ALPR_v1.2.zip"; + int engineType = 1; // ANSALPR_OD: auto CPU/GPU + double detThresh = 0.5, ocrThresh = 0.5, colThresh = 0.5; + + for (int i = 0; i < NUM_TASKS; i++) { + char tag[32]; + snprintf(tag, sizeof(tag), "[Task%d]", i); + int streamIdx = taskStreamMap[i]; + if (rtspClients[streamIdx] == nullptr) { + printf("%s Skipped — Stream%d not available\n", tag, streamIdx); + continue; + } + + { + std::lock_guard lk(taskStates[i].mtx); + taskStates[i].streamOk = true; + taskStates[i].statusMsg = "Loading ALPR engine..."; + } + + printf("%s Creating ALPR handle...\n", tag); + auto engineStart = std::chrono::steady_clock::now(); + int createResult = CreateANSALPRHandle(&alprHandles[i], "", modelZipFile.c_str(), "", + engineType, detThresh, ocrThresh, colThresh); + if (createResult != 1 || alprHandles[i] == nullptr) { + printf("%s FAILED to create ALPR handle (result=%d)\n", tag, createResult); + g_log.add(std::string(tag) + " ALPR create FAILED"); + continue; + } + + printf("%s Loading ALPR engine...\n", tag); + int loadResult = LoadANSALPREngineHandle(&alprHandles[i]); + auto engineEnd = std::chrono::steady_clock::now(); + double loadMs = std::chrono::duration(engineEnd - engineStart).count(); + + if (loadResult != 1) { + printf("%s FAILED to load ALPR engine (result=%d)\n", tag, loadResult); + g_log.add(std::string(tag) + " Engine load FAILED"); + ReleaseANSALPRHandle(&alprHandles[i]); + alprHandles[i] = nullptr; + continue; + } + + char buf[256]; + snprintf(buf, sizeof(buf), "%s Engine loaded in %.0f ms (Stream%d)", tag, loadMs, streamIdx); + printf("%s\n", buf); + g_log.add(buf); + + { + std::lock_guard lk(taskStates[i].mtx); + taskStates[i].engineLoaded = true; + taskStates[i].statusMsg = "Running"; + } + } + + // --- Launch worker threads --- + std::mutex streamGuards[NUM_STREAMS]; + std::thread workers[NUM_TASKS]; + for (int i = 0; i < NUM_TASKS; i++) { + int streamIdx = taskStreamMap[i]; + if (rtspClients[streamIdx] && alprHandles[i]) { + workers[i] = std::thread(ALPRWorkerThread, i, + &rtspClients[streamIdx], + &streamGuards[streamIdx], + alprHandles[i], + std::ref(taskStates[i])); + } + } + + // --- Display loop --- + const int cellW = 640, cellH = 480; + const int logPanelH = 120; + const int gridCols = 2, gridRows = 1; + std::string windowTitle = "ANSLPR CPU Stress Test"; + cv::namedWindow(windowTitle, cv::WINDOW_NORMAL); + cv::resizeWindow(windowTitle, cellW * gridCols, cellH * gridRows + logPanelH); + + auto testStart = std::chrono::steady_clock::now(); + + while (g_running.load()) { + cv::Mat canvas(cellH * gridRows + logPanelH, cellW * gridCols, CV_8UC3, cv::Scalar(30, 30, 30)); + + for (int i = 0; i < NUM_TASKS; i++) { + int col = i % gridCols, row = i / gridCols; + cv::Rect roi(col * cellW, row * cellH, cellW, cellH); + + cv::Mat cell; + double fps = 0, infMs = 0; + int fCount = 0, dCount = 0; + std::string statusMsg, lastPlate; + bool engineLoaded = false; + { + std::lock_guard lk(taskStates[i].mtx); + if (!taskStates[i].displayFrame.empty()) + cv::resize(taskStates[i].displayFrame, cell, cv::Size(cellW, cellH)); + fps = taskStates[i].fps; + infMs = taskStates[i].inferenceMs; + fCount = taskStates[i].frameCount; + dCount = taskStates[i].detectionCount; + statusMsg = taskStates[i].statusMsg; + lastPlate = taskStates[i].lastPlate; + engineLoaded = taskStates[i].engineLoaded; + } + + if (cell.empty()) { + cell = cv::Mat(cellH, cellW, CV_8UC3, cv::Scalar(40, 40, 40)); + cv::putText(cell, "Task " + std::to_string(i) + ": " + statusMsg, + cv::Point(20, cellH / 2), + cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(100, 100, 255), 2); + } + + // Status bar + cv::rectangle(cell, cv::Rect(0, cellH - 40, cellW, 40), cv::Scalar(0, 0, 0), cv::FILLED); + char bar[256]; + snprintf(bar, sizeof(bar), "T%d(S%d) | %.1f FPS | %.0fms | F:%d | D:%d | %s", + i, taskStreamMap[i], fps, infMs, fCount, dCount, + lastPlate.empty() ? "-" : lastPlate.c_str()); + cv::Scalar barColor = engineLoaded ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 100, 255); + cv::putText(cell, bar, cv::Point(5, cellH - 12), + cv::FONT_HERSHEY_SIMPLEX, 0.45, barColor, 1); + cell.copyTo(canvas(roi)); + } + + // Grid line + if (gridCols > 1) + cv::line(canvas, cv::Point(cellW, 0), cv::Point(cellW, cellH * gridRows), + cv::Scalar(100, 100, 100), 1); + + // Log panel + cv::Rect logRoi(0, cellH * gridRows, cellW * gridCols, logPanelH); + cv::Mat logPanel = canvas(logRoi); + logPanel.setTo(cv::Scalar(20, 20, 20)); + + auto elapsed = std::chrono::duration(std::chrono::steady_clock::now() - testStart).count(); + double totalFps = 0; + for (int i = 0; i < NUM_TASKS; i++) { + std::lock_guard lk(taskStates[i].mtx); + totalFps += taskStates[i].fps; + } + char header[256]; + snprintf(header, sizeof(header), + "Elapsed: %.0fs | %d cameras, %d AI tasks | Total: %.1f FPS | Press ESC to stop", + elapsed, NUM_STREAMS, NUM_TASKS, totalFps); + cv::putText(logPanel, header, cv::Point(10, 20), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(200, 200, 0), 1); + + // Per-task summary + for (int i = 0; i < NUM_TASKS; i++) { + std::lock_guard lk(taskStates[i].mtx); + char tLine[256]; + snprintf(tLine, sizeof(tLine), + "T%d(S%d): FPS=%.1f Inf=%.0fms Frames=%d Det=%d", + i, taskStreamMap[i], taskStates[i].fps, taskStates[i].inferenceMs, + taskStates[i].frameCount, taskStates[i].detectionCount); + cv::putText(logPanel, tLine, cv::Point(10, 42 + i * 18), + cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(200, 200, 200), 1); + } + + // Recent log + auto recentLogs = g_log.getRecent(3); + int logY = 42 + NUM_TASKS * 18 + 5; + for (const auto& line : recentLogs) { + if (logY > logPanelH - 5) break; + std::string display = (line.size() > 120) ? line.substr(0, 117) + "..." : line; + cv::putText(logPanel, display, cv::Point(10, logY), + cv::FONT_HERSHEY_PLAIN, 1.0, cv::Scalar(140, 140, 140), 1); + logY += 15; + } + + cv::imshow(windowTitle, canvas); + int key = cv::waitKey(30); + if (key == 27) { + g_log.add("ESC pressed — stopping..."); + printf("\nESC pressed — stopping...\n"); + g_running.store(false); + } + } + + // --- Wait for workers --- + for (int i = 0; i < NUM_TASKS; i++) { + if (workers[i].joinable()) workers[i].join(); + } + + // --- Final summary --- + double totalElapsed = std::chrono::duration( + std::chrono::steady_clock::now() - testStart).count(); + printf("\n============================================================\n"); + printf(" FINAL SUMMARY (runtime: %.0fs)\n", totalElapsed); + printf("============================================================\n"); + double totalFpsFinal = 0; + for (int i = 0; i < NUM_TASKS; i++) { + char buf[256]; + snprintf(buf, sizeof(buf), " Task %d (Stream %d): %d frames, %d detections, FPS=%.1f, InfMs=%.0f", + i, taskStreamMap[i], taskStates[i].frameCount, taskStates[i].detectionCount, + taskStates[i].fps, taskStates[i].inferenceMs); + printf("%s\n", buf); + g_log.add(buf); + totalFpsFinal += taskStates[i].fps; + } + printf(" Total throughput: %.1f FPS\n", totalFpsFinal); + printf("============================================================\n"); + + // --- Cleanup --- + for (int i = 0; i < NUM_TASKS; i++) { + if (alprHandles[i]) ReleaseANSALPRHandle(&alprHandles[i]); + } + for (int s = 0; s < NUM_STREAMS; s++) { + if (rtspClients[s]) { + StopRTSP(&rtspClients[s]); + ReleaseANSRTSPHandle(&rtspClients[s]); + } + } + + g_log.close(); + cv::destroyAllWindows(); + ANSCENTER::ANSOPENCV::DeinitCameraNetwork(); + return 0; +} + int ANSLPR_MultiGPU_StressTest() { ANSCENTER::ANSOPENCV::InitCameraNetwork(); // --- Initialize log file --- g_log.init(); + printf("\n"); + // --- Auto-detect GPU availability (safe on CPU-only PCs without CUDA runtime) --- + int gpuCount = 0; + bool hasGpu = false; + if (IsCudaAvailable()) { + cudaGetDeviceCount(&gpuCount); + hasGpu = (gpuCount > 0); + } + const char* modeStr = hasGpu ? "GPU (NVIDIA CUDA)" : "CPU (Software Decoding)"; + printf("\n"); printf("============================================================\n"); - printf(" ANSLPR Multi-GPU Stress Test — 5 Parallel ALPR Tasks\n"); + printf(" ANSLPR Multi-Engine Stress Test — 5 Parallel ALPR Tasks\n"); + printf(" Mode: %s\n", modeStr); printf(" (4 cameras, 5 AI tasks — Task 4 shares Stream 2)\n"); printf(" Press ESC to stop\n"); printf(" Log file: %s\n", LOG_FILE_PATH); printf("============================================================\n\n"); g_log.add("============================================================"); - g_log.add(" ANSLPR Multi-GPU Stress Test — 5 Parallel ALPR Tasks"); + g_log.add(" ANSLPR Multi-Engine Stress Test — 5 Parallel ALPR Tasks"); + g_log.add(" Mode: " + std::string(modeStr)); g_log.add("============================================================"); - // --- Log GPU info for diagnostics --- + // --- Log GPU info for diagnostics (safe on CPU — prints "no GPU found") --- LogGpuInfo(); // --- RTSP URLs (4 independent camera streams) --- @@ -1027,7 +1487,7 @@ int ANSLPR_MultiGPU_StressTest() { continue; } SetRTSPImageQuality(&rtspClients[s], 0); - SetRTSPHWDecoding(&rtspClients[s], 7); // HW_DECODING_CUDA: force CUDA/NVDEC zero-copy path + if (hasGpu) SetRTSPHWDecoding(&rtspClients[s], 7); // CUDA HW decode only with GPU StartRTSP(&rtspClients[s]); g_log.add("[Stream" + std::to_string(s) + "] RTSP started"); } @@ -1040,7 +1500,7 @@ int ANSLPR_MultiGPU_StressTest() { // ========================================================================= ANSCENTER::ANSALPR* alprHandles[NUM_TASKS] = {}; std::string modelZipFile = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_ALPR_v1.2.zip"; - int engineType = 1; // NVIDIA_GPU + int engineType = 1; // ANSALPR_OD: auto-detects GPU/CPU, uses ONNX Runtime on CPU double detThresh = 0.5, ocrThresh = 0.5, colThresh = 0.5; for (int i = 0; i < NUM_TASKS; i++) { @@ -1074,11 +1534,12 @@ int ANSLPR_MultiGPU_StressTest() { continue; } - printf("%s Loading ALPR engine (TensorRT)...\n", tag); + printf("%s Loading ALPR engine (%s)...\n", tag, hasGpu ? "TensorRT" : "CPU"); g_log.add(std::string(tag) + " Loading ALPR engine..."); - // Snapshot VRAM before engine load to measure consumption - auto vramBefore = GetPerGpuFreeMiB(); + // Snapshot VRAM before engine load to measure consumption (GPU only) + std::vector vramBefore; + if (hasGpu) vramBefore = GetPerGpuFreeMiB(); int loadResult = LoadANSALPREngineHandle(&alprHandles[i]); auto engineEnd = std::chrono::steady_clock::now(); @@ -1094,40 +1555,47 @@ int ANSLPR_MultiGPU_StressTest() { continue; } - // Snapshot VRAM after engine load — find which GPU lost the most VRAM - auto vramAfter = GetPerGpuFreeMiB(); - int bestGpu = 0; + int bestGpu = -1; size_t maxDelta = 0; - size_t gpuCount = vramBefore.size() < vramAfter.size() ? vramBefore.size() : vramAfter.size(); - for (size_t g = 0; g < gpuCount; g++) { - size_t delta = (vramBefore[g] > vramAfter[g]) ? (vramBefore[g] - vramAfter[g]) : 0; - if (delta > maxDelta) { - maxDelta = delta; - bestGpu = (int)g; + + if (hasGpu) { + // Snapshot VRAM after engine load — find which GPU lost the most VRAM + auto vramAfter = GetPerGpuFreeMiB(); + size_t gpuCnt = vramBefore.size() < vramAfter.size() ? vramBefore.size() : vramAfter.size(); + bestGpu = 0; + for (size_t g = 0; g < gpuCnt; g++) { + size_t delta = (vramBefore[g] > vramAfter[g]) ? (vramBefore[g] - vramAfter[g]) : 0; + if (delta > maxDelta) { + maxDelta = delta; + bestGpu = (int)g; + } } - } - char buf[512]; - snprintf(buf, sizeof(buf), - "%s Engine loaded in %.0f ms | GPU[%d] | VRAM used: %zu MiB (Stream%d)", - tag, loadMs, bestGpu, maxDelta, streamIdx); - printf("%s\n", buf); - g_log.add(buf); + char buf[512]; + snprintf(buf, sizeof(buf), + "%s Engine loaded in %.0f ms | GPU[%d] | VRAM used: %zu MiB (Stream%d)", + tag, loadMs, bestGpu, maxDelta, streamIdx); + printf("%s\n", buf); + g_log.add(buf); - // Log per-GPU VRAM state after this engine load - for (size_t g = 0; g < vramAfter.size(); g++) { - size_t total = 0; - if (g < vramBefore.size()) { - // Compute total from free + used + // Log per-GPU VRAM state after this engine load + for (size_t g = 0; g < vramAfter.size(); g++) { + size_t total = 0; auto gpus = QueryGpuVram(); if (g < gpus.size()) total = gpus[g].totalMiB; + char vbuf[256]; + snprintf(vbuf, sizeof(vbuf), + " GPU[%zu] VRAM: %zu MiB free (of %zu MiB)", + g, vramAfter[g], total); + printf("%s\n", vbuf); + g_log.add(vbuf); } - char vbuf[256]; - snprintf(vbuf, sizeof(vbuf), - " GPU[%zu] VRAM: %zu MiB free (of %zu MiB)", - g, vramAfter[g], total); - printf("%s\n", vbuf); - g_log.add(vbuf); + } else { + char buf[256]; + snprintf(buf, sizeof(buf), "%s Engine loaded in %.0f ms (CPU mode, Stream%d)", + tag, loadMs, streamIdx); + printf("%s\n", buf); + g_log.add(buf); } { @@ -1140,6 +1608,8 @@ int ANSLPR_MultiGPU_StressTest() { } // --- Align NVDEC decode GPU with inference GPU for NV12 zero-copy --- + // (GPU only — software decoding on CPU doesn't use NVDEC) + if (hasGpu) // Each stream should decode on the same GPU as its inference engine to enable // direct NVDEC→TensorRT zero-copy (0.5ms vs 17ms preprocess per frame). // @@ -1343,7 +1813,7 @@ int ANSLPR_MultiGPU_StressTest() { streamUrls[streamIdx].c_str()); if (result == 1 && rtspClients[streamIdx]) { SetRTSPImageQuality(&rtspClients[streamIdx], 0); - SetRTSPHWDecoding(&rtspClients[streamIdx], 7); + if (hasGpu) SetRTSPHWDecoding(&rtspClients[streamIdx], 7); StartRTSP(&rtspClients[streamIdx]); auto chaosEnd = std::chrono::steady_clock::now(); @@ -1368,8 +1838,9 @@ int ANSLPR_MultiGPU_StressTest() { const int cellW = 480, cellH = 360; // Smaller cells for 3-column layout const int logPanelH = 220; const int gridCols = 3, gridRows = 2; - cv::namedWindow("ANSLPR Multi-GPU Stress Test", cv::WINDOW_NORMAL); - cv::resizeWindow("ANSLPR Multi-GPU Stress Test", cellW * gridCols, cellH * gridRows + logPanelH); + std::string windowTitle = hasGpu ? "ANSLPR Multi-GPU Stress Test" : "ANSLPR CPU Stress Test"; + cv::namedWindow(windowTitle, cv::WINDOW_NORMAL); + cv::resizeWindow(windowTitle, cellW * gridCols, cellH * gridRows + logPanelH); auto testStart = std::chrono::steady_clock::now(); auto lastGpuSnapshot = std::chrono::steady_clock::now(); @@ -1468,7 +1939,9 @@ int ANSLPR_MultiGPU_StressTest() { snprintf(bar1, sizeof(bar1), "T%d(S%d) | %.1f FPS | %.0fms | F:%d | D:%d | %s", i, taskStreamMap[i], fps, infMs, fCount, dCount, lastPlate.empty() ? "-" : lastPlate.c_str()); - if (gpuId >= 0) { + if (!hasGpu) { + snprintf(bar2, sizeof(bar2), "CPU mode (software decoding)"); + } else if (gpuId >= 0) { snprintf(bar2, sizeof(bar2), "GPU[%d] | VRAM: %zu MiB", gpuId, vramMiB); } else { snprintf(bar2, sizeof(bar2), "GPU: N/A"); @@ -1572,7 +2045,7 @@ int ANSLPR_MultiGPU_StressTest() { gpuLineY += 15; } - cv::imshow("ANSLPR Multi-GPU Stress Test", canvas); + cv::imshow(windowTitle, canvas); int key = cv::waitKey(30); if (key == 27) { // ESC g_log.add("ESC pressed — stopping all tasks..."); @@ -2930,6 +3403,136 @@ int ANSLPR_MultiGPU_StressTest_FilePlayer() { return 0; } +// ANSLPR_OD_CPU_VideoTest — Uses ANSALPR_OD (engineType=1) on Intel CPU/iGPU. +// ANSALPR_OD auto-detects hardware (OpenVINO on Intel, DirectML on AMD, etc.) +// No CUDA calls — safe on non-NVIDIA systems. +int ANSLPR_OD_CPU_VideoTest() { + std::cout << "\n============================================================" << std::endl; + std::cout << " ANSLPR CPU/iGPU Test (ANSALPR_OD with auto-detect)" << std::endl; + std::cout << "============================================================\n" << std::endl; + + std::string modelZipFile = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_ALPR_v1.2.zip"; + std::string videoFilePath = "C:\\ProgramData\\ANSCENTER\\Shared\\classroom.mp4"; + + std::cout << "Model: " << modelZipFile << std::endl; + std::cout << "Video: " << videoFilePath << std::endl; + + ANSCENTER::ANSALPR* infHandle = nullptr; + int engineType = 1; // ANSALPR_OD (auto-detects HW internally) + double detThresh = 0.5, ocrThresh = 0.5, colThresh = 0.5; + + // Step 1: Create handle + std::cout << "[LPR-CPU] Step 1: Creating handle..." << std::endl; + int createResult = CreateANSALPRHandle(&infHandle, "", modelZipFile.c_str(), "", + engineType, detThresh, ocrThresh, colThresh); + std::cout << "[LPR-CPU] CreateANSALPRHandle result: " << createResult << std::endl; + if (createResult != 1 || infHandle == nullptr) { + std::cerr << "[LPR-CPU] FAILED: CreateANSALPRHandle returned " << createResult << std::endl; + return -1; + } + + // Step 2: Load engine + std::cout << "[LPR-CPU] Step 2: Loading engine..." << std::endl; + int loadResult = LoadANSALPREngineHandle(&infHandle); + std::cout << "[LPR-CPU] LoadANSALPREngineHandle result: " << loadResult << std::endl; + if (loadResult != 1) { + std::cerr << "[LPR-CPU] FAILED: LoadANSALPREngineHandle returned " << loadResult << std::endl; + ReleaseANSALPRHandle(&infHandle); + return -2; + } + + // Step 3: Open video + std::cout << "[LPR-CPU] Step 3: Opening video..." << std::endl; + cv::VideoCapture capture(videoFilePath); + if (!capture.isOpened()) { + std::cerr << "[LPR-CPU] FAILED: Could not open video: " << videoFilePath << std::endl; + ReleaseANSALPRHandle(&infHandle); + return -3; + } + + int totalFrames = static_cast(capture.get(cv::CAP_PROP_FRAME_COUNT)); + std::cout << "[LPR-CPU] Video opened: " << totalFrames << " frames" << std::endl; + + // Step 4: Run inference + std::cout << "[LPR-CPU] Step 4: Running inference..." << std::endl; + boost::property_tree::ptree pt; + int frameIndex = 0; + int totalDetections = 0; + double totalInferenceMs = 0.0; + int maxFrames = 200; + + while (frameIndex < maxFrames) { + cv::Mat frame; + if (!capture.read(frame)) { + std::cout << "[LPR-CPU] End of video at frame " << frameIndex << std::endl; + break; + } + frameIndex++; + + unsigned int bufferLength = 0; + unsigned char* jpeg_bytes = CVMatToBytes(frame, bufferLength); + int height = frame.rows; + int width = frame.cols; + + auto start = std::chrono::system_clock::now(); + std::string detectionResult = ANSALPR_RunInferenceBinary(&infHandle, jpeg_bytes, width, height); + auto end = std::chrono::system_clock::now(); + auto elapsed = std::chrono::duration_cast(end - start); + totalInferenceMs += static_cast(elapsed.count()); + + delete[] jpeg_bytes; + + if (!detectionResult.empty()) { + try { + pt.clear(); + std::stringstream ss; + ss << detectionResult; + boost::property_tree::read_json(ss, pt); + int detCount = 0; + BOOST_FOREACH(const boost::property_tree::ptree::value_type& child, pt.get_child("results")) { + const boost::property_tree::ptree& r = child.second; + const auto class_name = GetData(r, "class_name"); + const auto x = GetData(r, "x"); + const auto y = GetData(r, "y"); + const auto w = GetData(r, "width"); + const auto h = GetData(r, "height"); + detCount++; + cv::rectangle(frame, cv::Rect(x, y, w, h), cv::Scalar(0, 255, 0), 2); + cv::putText(frame, class_name, cv::Point(x, y - 5), + 0, 0.6, cv::Scalar(0, 0, 255), 1, cv::LINE_AA); + } + totalDetections += detCount; + } + catch (...) {} + } + + if (frameIndex % 10 == 0) { + double avgSoFar = totalInferenceMs / frameIndex; + std::cout << "[LPR-CPU] Frame " << frameIndex << "/" << maxFrames + << " | Time: " << elapsed.count() << "ms" + << " | Avg: " << static_cast(avgSoFar) << "ms" + << " | Detections: " << totalDetections << std::endl; + } + + cv::imshow("ANSLPR CPU Test", frame); + if (cv::waitKey(1) == 27) break; + } + + // Summary + double avgMs = (frameIndex > 0) ? (totalInferenceMs / frameIndex) : 0.0; + std::cout << "\n=== LPR CPU Test Summary ===" << std::endl; + std::cout << "Frames processed: " << frameIndex << std::endl; + std::cout << "Total detections: " << totalDetections << std::endl; + std::cout << "Avg inference: " << avgMs << " ms/frame" << std::endl; + std::cout << "Total time: " << totalInferenceMs << " ms" << std::endl; + std::cout << (frameIndex > 0 ? "[LPR-CPU] PASSED" : "[LPR-CPU] FAILED") << std::endl; + + capture.release(); + cv::destroyAllWindows(); + ReleaseANSALPRHandle(&infHandle); + return (frameIndex > 0) ? 0 : -4; +} + int main() { // ANSLPR_OD_INDOInferences_FileTest(); @@ -2940,9 +3543,12 @@ int main() //for (int i = 0; i < 100; i++) { // ANSLPR_CPU_Inferences_FileTest(); //} - ANSLPR_MultiGPU_StressTest(); + //ANSLPR_SingleTask_Test(); + ANSLPR_CPU_StressTest(); + //ANSLPR_MultiGPU_StressTest(); //ANSLPR_MultiGPU_StressTest_SimulatedCam(); // ANSLPR_MultiGPU_StressTest_FilePlayer(); + //ANSLPR_OD_CPU_VideoTest(); return 0; } diff --git a/tests/ANSLPR-UnitTest/CMakeLists.txt b/tests/ANSLPR-UnitTest/CMakeLists.txt index e02b88c..dc63c68 100644 --- a/tests/ANSLPR-UnitTest/CMakeLists.txt +++ b/tests/ANSLPR-UnitTest/CMakeLists.txt @@ -7,6 +7,7 @@ target_include_directories(ANSLPR-UnitTest PRIVATE ${CMAKE_SOURCE_DIR}/modules/ANSLPR ${CMAKE_SOURCE_DIR}/modules/ANSLPR/include ${CMAKE_SOURCE_DIR}/modules/ANSODEngine + ${CMAKE_SOURCE_DIR}/core/ANSLibsLoader/include ${CMAKE_SOURCE_DIR}/modules/ANSCV ${CMAKE_SOURCE_DIR}/MediaClient ${CMAKE_SOURCE_DIR}/MediaClient/media @@ -36,6 +37,7 @@ target_link_libraries(ANSLPR-UnitTest PRIVATE ANSODEngine PRIVATE ANSCV PRIVATE ANSLicensingSystem + PRIVATE ANSLibsLoader PRIVATE anslicensing PRIVATE ANSMOT PRIVATE opencv diff --git a/tests/ANSODEngine-UnitTest/ANSODEngine-UnitTest.cpp b/tests/ANSODEngine-UnitTest/ANSODEngine-UnitTest.cpp index c041718..8967308 100644 --- a/tests/ANSODEngine-UnitTest/ANSODEngine-UnitTest.cpp +++ b/tests/ANSODEngine-UnitTest/ANSODEngine-UnitTest.cpp @@ -1449,8 +1449,8 @@ int YOLO26ODYolo12Test() { } int YOLO26ODYolo11Test() { std::string modelFilePath = "C:\\Projects\\ANSVIS\\Models\\ANS_VehicleDetection_v2.0.zip"; - std::string videoFile = "E:\\Programs\\DemoAssets\\Videos\\road.mp4"; - int modelType = 31; // ONNX YOLO (30) RT YOLO (31) + std::string videoFile = "C:\\ProgramData\\ANSCENTER\\Shared\\road.mp4"; + int modelType = 3; // ONNX YOLO (30) RT YOLO (31) VideoDetectorEngine(modelFilePath, videoFile, modelType); return 0; } @@ -1861,12 +1861,12 @@ int main() //YOLO26POSEYolo11Test(); //YOLO26CLYolo11Test(); //YOLO26ODYolo12Test(); - //YOLO26ODYolo11Test(); + YOLO26ODYolo11Test(); //YOLO26ODYolo10Test(); //YOLO26OBBYolo11Test(); //SAM3ONNX_ImageTest(); // ORT reference — runs first, prints decoder input stats //SAM3TRT_ImageTest(); // TRT under test — compare decoder input stats with above - CustomModel_StressTest_FilePlayer(); // Multi-task stress test (LabVIEW flow) + //CustomModel_StressTest_FilePlayer(); // Multi-task stress test (LabVIEW flow) //SAM3TRT_UnitTest(); // TensorRT SAM3 test (in ANSSAM3-UnitTest.cpp) //TensorRT10Test(); //FireNSmokeCustomDetection();