Support UTF8 to UTF16 LE.

Support Unicode helper Fix ANSFR to show 2 same faces in 1 image
2026-03-31 21:52:47 +11:00
parent 70be68d0fc
commit ccfc5964d4
21 changed files with 379 additions and 104 deletions
--- a/modules/ANSOCR/dllmain.cpp
+++ b/modules/ANSOCR/dllmain.cpp
@@ -427,33 +427,25 @@ extern "C" ANSOCR_API int SetANSOCRALPRFormat(ANSCENTER::ANSOCRBase** Handle, co
 // - JSON Unicode escapes (\uXXXX) from ensure_ascii=true output
 // - Raw UTF-8 encoded strings
 // Pure ASCII input is passed through directly (no conversion overhead).
-extern "C" ANSOCR_API int ANSOCR_ConvertUTF8ToUTF16LE(const char* utf8Str, LStrHandle result) {
+extern "C" ANSOCR_API int ANSOCR_ConvertUTF8ToUTF16LE(const char* utf8Str, LStrHandle result, int includeBOM) {
 	try {
 		if (!utf8Str || !result) return -1;
 		int len = (int)strlen(utf8Str);
 		if (len == 0) return 0;

-		// Check if input contains \uXXXX escapes or non-ASCII bytes
+		// Always output UTF-16LE (required for LabVIEW "Force Unicode Text" indicators)
+		const char bom[2] = { '\xFF', '\xFE' };
+
+		// Check if input contains \uXXXX escapes
 		bool hasUnicodeEscapes = false;
-		bool hasNonAscii = false;
-		for (int i = 0; i < len; i++) {
-			if ((unsigned char)utf8Str[i] >= 0x80) hasNonAscii = true;
-			if (i + 1 < len && utf8Str[i] == '\\' && utf8Str[i + 1] == 'u') hasUnicodeEscapes = true;
+		for (int i = 0; i + 1 < len; i++) {
+			if (utf8Str[i] == '\\' && utf8Str[i + 1] == 'u') { hasUnicodeEscapes = true; break; }
 		}

-		// Pure ASCII with no escapes — pass through directly
-		if (!hasNonAscii && !hasUnicodeEscapes) {
-			MgErr error = DSSetHandleSize(result, sizeof(int32) + len * sizeof(uChar));
-			if (error != noErr) return -2;
-			(*result)->cnt = len;
-			memcpy((*result)->str, utf8Str, len);
-			return 1;
-		}
-
-		// If contains \uXXXX escapes, decode them to UTF-16LE directly
 		if (hasUnicodeEscapes) {
 			std::string utf16le;
-			utf16le.reserve(len * 2);
+			if (includeBOM) utf16le.assign(bom, 2);
+			utf16le.reserve(len * 2 + 2);
 			for (int i = 0; i < len; ) {
 				if (i + 5 < len && utf8Str[i] == '\\' && utf8Str[i + 1] == 'u') {
 					char hex[5] = { utf8Str[i + 2], utf8Str[i + 3], utf8Str[i + 4], utf8Str[i + 5], 0 };
@@ -462,7 +454,6 @@ extern "C" ANSOCR_API int ANSOCR_ConvertUTF8ToUTF16LE(const char* utf8Str, LStrH
 					utf16le += static_cast<char>((cp >> 8) & 0xFF);
 					i += 6;
 				} else {
-					// ASCII or raw UTF-8 byte — convert as single char
 					utf16le += utf8Str[i];
 					utf16le += '\0';
 					i++;
@@ -476,17 +467,19 @@ extern "C" ANSOCR_API int ANSOCR_ConvertUTF8ToUTF16LE(const char* utf8Str, LStrH
 			return 1;
 		}

-		// Raw UTF-8 — convert via Windows API
 #ifdef _WIN32
 		int wideLen = MultiByteToWideChar(CP_UTF8, 0, utf8Str, len, nullptr, 0);
 		if (wideLen <= 0) return 0;
 		std::wstring wideStr(wideLen, 0);
 		MultiByteToWideChar(CP_UTF8, 0, utf8Str, len, &wideStr[0], wideLen);
-		int size = wideLen * (int)sizeof(wchar_t);
-		MgErr error = DSSetHandleSize(result, sizeof(int32) + size * sizeof(uChar));
+		int dataSize = wideLen * (int)sizeof(wchar_t);
+		int bomSize = includeBOM ? 2 : 0;
+		int totalSize = bomSize + dataSize;
+		MgErr error = DSSetHandleSize(result, sizeof(int32) + totalSize * sizeof(uChar));
 		if (error != noErr) return -2;
-		(*result)->cnt = size;
-		memcpy((*result)->str, wideStr.data(), size);
+		(*result)->cnt = totalSize;
+		if (includeBOM) memcpy((*result)->str, bom, 2);
+		memcpy((*result)->str + bomSize, wideStr.data(), dataSize);
 		return 1;
 #else
 		return 0;