Support UTF8 to UTF16 LE

2026-03-31 14:10:21 +11:00
parent 0c24096c80
commit 70be68d0fc
14 changed files with 790 additions and 11 deletions
--- a/modules/ANSODEngine/ANSEngineCommon.cpp
+++ b/modules/ANSODEngine/ANSEngineCommon.cpp
@@ -1,4 +1,8 @@
 #pragma once
+#ifdef _WIN32
+#define NOMINMAX
+#include <windows.h>
+#endif
 #include "ANSODEngine.h"
 #include "ANSYOLOOD.h"
 #include "ANSTENSORRTOD.h"
@@ -1078,6 +1082,32 @@ namespace ANSCENTER {
 		return boundingBox;
 	}

+	static std::string DoubleEscapeUnicode(const std::string& utf8Str) {
+		bool hasNonAscii = false;
+		for (unsigned char c : utf8Str) {
+			if (c >= 0x80) { hasNonAscii = true; break; }
+		}
+		if (!hasNonAscii) return utf8Str;
+		std::string result;
+		result.reserve(utf8Str.size() * 2);
+		size_t i = 0;
+		while (i < utf8Str.size()) {
+			unsigned char c = static_cast<unsigned char>(utf8Str[i]);
+			if (c < 0x80) { result += utf8Str[i++]; continue; }
+			uint32_t cp = 0;
+			if ((c & 0xE0) == 0xC0 && i + 1 < utf8Str.size()) {
+				cp = ((c & 0x1F) << 6) | (static_cast<unsigned char>(utf8Str[i + 1]) & 0x3F); i += 2;
+			} else if ((c & 0xF0) == 0xE0 && i + 2 < utf8Str.size()) {
+				cp = ((c & 0x0F) << 12) | ((static_cast<unsigned char>(utf8Str[i + 1]) & 0x3F) << 6) | (static_cast<unsigned char>(utf8Str[i + 2]) & 0x3F); i += 3;
+			} else if ((c & 0xF8) == 0xF0 && i + 3 < utf8Str.size()) {
+				cp = ((c & 0x07) << 18) | ((static_cast<unsigned char>(utf8Str[i + 1]) & 0x3F) << 12) | ((static_cast<unsigned char>(utf8Str[i + 2]) & 0x3F) << 6) | (static_cast<unsigned char>(utf8Str[i + 3]) & 0x3F); i += 4;
+			} else { i++; continue; }
+			if (cp <= 0xFFFF) { char buf[8]; snprintf(buf, sizeof(buf), "\\u%04x", cp); result += buf; }
+			else { cp -= 0x10000; char buf[16]; snprintf(buf, sizeof(buf), "\\u%04x\\u%04x", 0xD800 + (uint16_t)(cp >> 10), 0xDC00 + (uint16_t)(cp & 0x3FF)); result += buf; }
+		}
+		return result;
+	}
+
 	std::string ANSCENTER::ANSUtilityHelper::VectorDetectionToJsonString(const std::vector<Object>& dets)
 	{
 		if (dets.empty()) {
@@ -1091,14 +1121,14 @@ namespace ANSCENTER {
 					{"class_id", std::to_string(det.classId)},
 					//{"track_id", std::to_string(det.trackId)},
 					{"track_id", "0"},
-					{"class_name", det.className},
+					{"class_name", DoubleEscapeUnicode(det.className)},
 					{"prob", std::to_string(det.confidence)},
 					{"x", std::to_string(det.box.x)},
 					{"y", std::to_string(det.box.y)},
 					{"width", std::to_string(det.box.width)},
 					{"height", std::to_string(det.box.height)},
-					{"mask", ""},  // TODO: convert masks to comma separated string
-					{"extra_info",det.extraInfo},
+					{"mask", ""},
+					{"extra_info", det.extraInfo},
 					{"camera_id", det.cameraId},
 					{"polygon", PolygonToString(det.polygon)},
 					{"kps", KeypointsToString(det.kps)}
@@ -2212,6 +2242,144 @@ namespace ANSCENTER {
 		return polygon;
 	}

+// Unicode conversion utilities for LabVIEW wrapper classes
+extern "C" ANSENGINE_API int ANSEngine_ConvertUTF8ToUTF16LE(const char* utf8Str, LStrHandle result) {
+	try {
+		if (!utf8Str || !result) return -1;
+		int len = (int)strlen(utf8Str);
+		if (len == 0) return 0;
+		bool hasUnicodeEscapes = false;
+		bool hasNonAscii = false;
+		for (int i = 0; i < len; i++) {
+			if ((unsigned char)utf8Str[i] >= 0x80) hasNonAscii = true;
+			if (i + 1 < len && utf8Str[i] == '\\' && utf8Str[i + 1] == 'u') hasUnicodeEscapes = true;
+		}
+		if (!hasNonAscii && !hasUnicodeEscapes) {
+			MgErr error = DSSetHandleSize(result, sizeof(int32) + len * sizeof(uChar));
+			if (error != noErr) return -2;
+			(*result)->cnt = len;
+			memcpy((*result)->str, utf8Str, len);
+			return 1;
+		}
+		if (hasUnicodeEscapes) {
+			std::string utf16le;
+			utf16le.reserve(len * 2);
+			for (int i = 0; i < len; ) {
+				if (i + 5 < len && utf8Str[i] == '\\' && utf8Str[i + 1] == 'u') {
+					char hex[5] = { utf8Str[i + 2], utf8Str[i + 3], utf8Str[i + 4], utf8Str[i + 5], 0 };
+					uint16_t cp = (uint16_t)strtoul(hex, nullptr, 16);
+					utf16le += static_cast<char>(cp & 0xFF);
+					utf16le += static_cast<char>((cp >> 8) & 0xFF);
+					i += 6;
+				} else {
+					utf16le += utf8Str[i];
+					utf16le += '\0';
+					i++;
+				}
+			}
+			int size = (int)utf16le.size();
+			MgErr error = DSSetHandleSize(result, sizeof(int32) + size * sizeof(uChar));
+			if (error != noErr) return -2;
+			(*result)->cnt = size;
+			memcpy((*result)->str, utf16le.data(), size);
+			return 1;
+		}
+#ifdef _WIN32
+		int wideLen = MultiByteToWideChar(CP_UTF8, 0, utf8Str, len, nullptr, 0);
+		if (wideLen <= 0) return 0;
+		std::wstring wideStr(wideLen, 0);
+		MultiByteToWideChar(CP_UTF8, 0, utf8Str, len, &wideStr[0], wideLen);
+		int size = wideLen * (int)sizeof(wchar_t);
+		MgErr error = DSSetHandleSize(result, sizeof(int32) + size * sizeof(uChar));
+		if (error != noErr) return -2;
+		(*result)->cnt = size;
+		memcpy((*result)->str, wideStr.data(), size);
+		return 1;
+#else
+		return 0;
+#endif
+	}
+	catch (...) { return -1; }
+}
+
+extern "C" ANSENGINE_API int ANSEngine_ConvertUTF16LEToUTF8(const unsigned char* utf16leBytes, int byteLen, LStrHandle result) {
+	try {
+		if (!utf16leBytes || byteLen <= 0 || !result) return -1;
+		bool isUtf16le = (byteLen >= 2 && byteLen % 2 == 0);
+		if (isUtf16le) {
+			bool isAscii = true;
+			for (int i = 1; i < byteLen; i += 2) {
+				if (utf16leBytes[i] != 0x00) { isAscii = false; break; }
+			}
+			if (isAscii) {
+				int asciiLen = byteLen / 2;
+				MgErr error = DSSetHandleSize(result, sizeof(int32) + asciiLen * sizeof(uChar));
+				if (error != noErr) return -2;
+				(*result)->cnt = asciiLen;
+				for (int i = 0; i < asciiLen; i++) (*result)->str[i] = utf16leBytes[i * 2];
+				return 1;
+			}
+		}
+#ifdef _WIN32
+		int wideLen = byteLen / (int)sizeof(wchar_t);
+		const wchar_t* wideStr = reinterpret_cast<const wchar_t*>(utf16leBytes);
+		int utf8Len = WideCharToMultiByte(CP_UTF8, 0, wideStr, wideLen, nullptr, 0, nullptr, nullptr);
+		if (utf8Len <= 0) return 0;
+		std::string utf8Str(utf8Len, 0);
+		WideCharToMultiByte(CP_UTF8, 0, wideStr, wideLen, &utf8Str[0], utf8Len, nullptr, nullptr);
+		MgErr error = DSSetHandleSize(result, sizeof(int32) + utf8Len * sizeof(uChar));
+		if (error != noErr) return -2;
+		(*result)->cnt = utf8Len;
+		memcpy((*result)->str, utf8Str.data(), utf8Len);
+		return 1;
+#else
+		return 0;
+#endif
+	}
+	catch (...) { return -1; }
+}
+
+	std::string ANSUtilityHelper::DecodeJsonUnicodeToUTF16LE(const std::string& escapedStr) {
+		std::string result;
+		result.reserve(escapedStr.size() * 2);
+		size_t i = 0;
+		while (i < escapedStr.size()) {
+			if (i + 5 < escapedStr.size() && escapedStr[i] == '\\' && escapedStr[i + 1] == 'u') {
+				// Parse \uXXXX
+				char hex[5] = { escapedStr[i + 2], escapedStr[i + 3], escapedStr[i + 4], escapedStr[i + 5], 0 };
+				uint16_t codepoint = (uint16_t)strtoul(hex, nullptr, 16);
+				// UTF-16LE: low byte first, high byte second
+				result += static_cast<char>(codepoint & 0xFF);
+				result += static_cast<char>((codepoint >> 8) & 0xFF);
+				i += 6;
+			} else {
+				// ASCII character -> UTF-16LE (2 bytes: char, 0x00)
+				result += escapedStr[i];
+				result += '\0';
+				i++;
+			}
+		}
+		return result;
+	}
+
+	std::string ANSUtilityHelper::ConvertUTF8ToUTF16LE(const std::string& utf8Str) {
+#ifdef _WIN32
+		if (utf8Str.empty()) return "";
+		// First call: get required buffer size
+		int wideLen = MultiByteToWideChar(CP_UTF8, 0, utf8Str.c_str(), (int)utf8Str.size(), nullptr, 0);
+		if (wideLen <= 0) return "";
+		// Allocate wide string buffer
+		std::wstring wideStr(wideLen, 0);
+		MultiByteToWideChar(CP_UTF8, 0, utf8Str.c_str(), (int)utf8Str.size(), &wideStr[0], wideLen);
+		// Convert wchar_t buffer to raw bytes (UTF-16LE on Windows)
+		const char* rawBytes = reinterpret_cast<const char*>(wideStr.data());
+		return std::string(rawBytes, wideLen * sizeof(wchar_t));
+#else
+		// Non-Windows: return UTF-8 as-is (LabVIEW is primarily Windows)
+		return utf8Str;
+#endif
+	}
+
 	float ANSUtilityHelper::calculate_intersection_area(const cv::Rect& box1, const cv::Rect& box2) {
 		int xx1 = std::max(box1.x, box2.x);
 		int yy1 = std::max(box1.y, box2.y);