Fix AMD and OpenVINO

2026-04-08 13:45:52 +10:00
parent a4a8caaa86
commit 69787b0ff0
15 changed files with 1209 additions and 132 deletions
--- a/modules/ANSUtilities/dllmain.cpp
+++ b/modules/ANSUtilities/dllmain.cpp
@@ -804,34 +804,54 @@ extern "C" ANSULT_API int ANSConvertUTF8ToUTF16LE(const char* utf8Str, LStrHandl
 		int len = (int)strlen(utf8Str);
 		if (len == 0) return 0;
 		const char bom[2] = { '\xFF', '\xFE' };
+
+		// Check if input contains \uXXXX escape sequences
 		bool hasUnicodeEscapes = false;
 		for (int i = 0; i + 1 < len; i++) {
 			if (utf8Str[i] == '\\' && utf8Str[i + 1] == 'u') { hasUnicodeEscapes = true; break; }
 		}
+
 		if (hasUnicodeEscapes) {
-			std::string utf16le;
-			if (includeBOM) utf16le.assign(bom, 2);
-			utf16le.reserve(len * 2 + 2);
+			// Two-pass approach: first decode \uXXXX escapes to UTF-8, then convert to UTF-16LE.
+			// This correctly handles mixed input (raw UTF-8 + \uXXXX escapes) by producing
+			// clean UTF-8 first, then using MultiByteToWideChar for proper UTF-16LE conversion.
+			std::string utf8Decoded;
+			utf8Decoded.reserve(len);
 			for (int i = 0; i < len; ) {
 				if (i + 5 < len && utf8Str[i] == '\\' && utf8Str[i + 1] == 'u') {
 					char hex[5] = { utf8Str[i + 2], utf8Str[i + 3], utf8Str[i + 4], utf8Str[i + 5], 0 };
-					uint16_t cp = (uint16_t)strtoul(hex, nullptr, 16);
-					utf16le += static_cast<char>(cp & 0xFF);
-					utf16le += static_cast<char>((cp >> 8) & 0xFF);
+					uint32_t cp = (uint32_t)strtoul(hex, nullptr, 16);
+					// Encode codepoint as UTF-8
+					if (cp <= 0x7F) {
+						utf8Decoded += static_cast<char>(cp);
+					} else if (cp <= 0x7FF) {
+						utf8Decoded += static_cast<char>(0xC0 | (cp >> 6));
+						utf8Decoded += static_cast<char>(0x80 | (cp & 0x3F));
+					} else {
+						utf8Decoded += static_cast<char>(0xE0 | (cp >> 12));
+						utf8Decoded += static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
+						utf8Decoded += static_cast<char>(0x80 | (cp & 0x3F));
+					}
 					i += 6;
 				} else {
-					utf16le += utf8Str[i];
-					utf16le += '\0';
+					utf8Decoded += utf8Str[i];
 					i++;
 				}
 			}
-			int size = (int)utf16le.size();
-			MgErr error = DSSetHandleSize(result, sizeof(int32) + size * sizeof(uChar));
+			// Now convert the clean UTF-8 to UTF-16LE
+			std::string converted = ANSCENTER::ANSUtilities::ConvertUTF8ToUTF16LE(utf8Decoded);
+			if (converted.empty()) return 0;
+			int dataSize = static_cast<int>(converted.size());
+			int bomSize = includeBOM ? 2 : 0;
+			int totalSize = bomSize + dataSize;
+			MgErr error = DSSetHandleSize(result, sizeof(int32) + totalSize * sizeof(uChar));
 			if (error != noErr) return -2;
-			(*result)->cnt = size;
-			memcpy((*result)->str, utf16le.data(), size);
+			(*result)->cnt = totalSize;
+			if (includeBOM) memcpy((*result)->str, bom, 2);
+			memcpy((*result)->str + bomSize, converted.data(), dataSize);
 			return 1;
 		}
+
 		std::string converted = ANSCENTER::ANSUtilities::ConvertUTF8ToUTF16LE(utf8Str);
 		if (converted.empty()) return 0;
 		int dataSize = static_cast<int>(converted.size());
@@ -850,23 +870,31 @@ extern "C" ANSULT_API int ANSConvertUTF8ToUTF16LE(const char* utf8Str, LStrHandl
 extern "C" ANSULT_API int ANSConvertUTF16LEToUTF8(const unsigned char* utf16leBytes, int byteLen, LStrHandle result) {
 	try {
 		if (!utf16leBytes || byteLen <= 0 || !result) return -1;
-		bool isUtf16le = (byteLen >= 2 && byteLen % 2 == 0);
+		const unsigned char* data = utf16leBytes;
+		int dataLen = byteLen;
+		// Strip BOM (FF FE) if present
+		if (dataLen >= 2 && data[0] == 0xFF && data[1] == 0xFE) {
+			data += 2;
+			dataLen -= 2;
+		}
+		if (dataLen <= 0) return 0;
+		bool isUtf16le = (dataLen >= 2 && dataLen % 2 == 0);
 		if (isUtf16le) {
 			bool isAscii = true;
-			for (int i = 1; i < byteLen; i += 2) {
-				if (utf16leBytes[i] != 0x00) { isAscii = false; break; }
+			for (int i = 1; i < dataLen; i += 2) {
+				if (data[i] != 0x00) { isAscii = false; break; }
 			}
 			if (isAscii) {
-				int asciiLen = byteLen / 2;
+				int asciiLen = dataLen / 2;
 				MgErr error = DSSetHandleSize(result, sizeof(int32) + asciiLen * sizeof(uChar));
 				if (error != noErr) return -2;
 				(*result)->cnt = asciiLen;
-				for (int i = 0; i < asciiLen; i++) (*result)->str[i] = utf16leBytes[i * 2];
+				for (int i = 0; i < asciiLen; i++) (*result)->str[i] = data[i * 2];
 				return 1;
 			}
 		}
 		std::string converted = ANSCENTER::ANSUtilities::ConvertUTF16LEToUTF8(
-			reinterpret_cast<const char*>(utf16leBytes), byteLen);
+			reinterpret_cast<const char*>(data), dataLen);
 		if (converted.empty()) return 0;
 		int size = static_cast<int>(converted.size());
 		MgErr error = DSSetHandleSize(result, sizeof(int32) + size * sizeof(uChar));
@@ -909,6 +937,168 @@ extern "C" ANSULT_API int ANSConvertUTF16LEToUnicodeEscapes(const unsigned char*
 	catch (...) { return -1; }
 }

+// Helper: copy a std::string into a LabVIEW LStrHandle.
+static int CopyStringToLStrHandle(LStrHandle handle, const std::string& str) {
+	if (str.empty()) return 0;
+	int size = static_cast<int>(str.size());
+	MgErr error = DSSetHandleSize(handle, sizeof(int32) + size * sizeof(uChar));
+	if (error != noErr) return -2;
+	(*handle)->cnt = size;
+	memcpy((*handle)->str, str.data(), size);
+	return 1;
+}
+
+// Helper: copy raw bytes into a LabVIEW LStrHandle.
+static int CopyBytesToLStrHandle(LStrHandle handle, const unsigned char* data, int len) {
+	if (!data || len <= 0) return 0;
+	MgErr error = DSSetHandleSize(handle, sizeof(int32) + len * sizeof(uChar));
+	if (error != noErr) return -2;
+	(*handle)->cnt = len;
+	memcpy((*handle)->str, data, len);
+	return 1;
+}
+
+// Helper: detect if LabVIEW LStrHandle contains UTF-16LE (BOM or 0x00 bytes).
+static bool DetectUTF16LE(const unsigned char* data, int byteLen) {
+	if (byteLen >= 2 && data[0] == 0xFF && data[1] == 0xFE) return true;
+	for (int i = 0; i < byteLen; i++) {
+		if (data[i] == 0x00) return true;
+	}
+	return false;
+}
+
+// Helper: strip BOM from UTF-16LE data. Returns pointer and adjusts length.
+static const unsigned char* StripBOM(const unsigned char* data, int& len) {
+	if (len >= 2 && data[0] == 0xFF && data[1] == 0xFE) { data += 2; len -= 2; }
+	return data;
+}
+
+// LStrHandle-safe version: reads raw bytes from LabVIEW LStrHandle directly.
+// Two paths:
+//   1. Pure UTF-8 (no BOM, no 0x00 bytes, valid UTF-8) → pass through to output as-is
+//   2. Contains UTF-16LE (BOM or 0x00 bytes) → RepairLabVIEWUTF16LE (normalizes
+//      mixed UTF-8/UTF-16LE + lone spaces to clean UTF-16LE) → convert to UTF-8
+extern "C" ANSULT_API int ANSConvertUTF16LEToUTF8_LV(LStrHandle input, LStrHandle result) {
+	try {
+		if (!input || !result) return -1;
+		int byteLen = (*input)->cnt;
+		if (byteLen <= 0) return 0;
+
+		// Copy input data first — input and result may be the same LStrHandle
+		std::vector<unsigned char> inputCopy(byteLen);
+		memcpy(inputCopy.data(), (*input)->str, byteLen);
+		const unsigned char* data = inputCopy.data();
+
+		if (DetectUTF16LE(data, byteLen)) {
+			// Path 2: UTF-16LE detected — repair mixed encoding, then convert to UTF-8
+			int convLen = byteLen;
+			const unsigned char* convData = StripBOM(data, convLen);
+			if (convLen <= 0) return 0;
+
+			auto repaired = ANSCENTER::ANSUtilities::RepairLabVIEWUTF16LE(convData, convLen);
+			std::string converted = ANSCENTER::ANSUtilities::ConvertUTF16LEToUTF8(
+				reinterpret_cast<const char*>(repaired.data()), static_cast<int>(repaired.size()));
+			return CopyStringToLStrHandle(result, converted);
+		}
+
+		if (ANSCENTER::ANSUtilities::IsValidUTF8(data, byteLen)) {
+			// Path 1: Pure UTF-8 — pass through as-is
+			return CopyBytesToLStrHandle(result, data, byteLen);
+		}
+
+		// Fallback: not UTF-16LE, not valid UTF-8 — assume system codepage
+#ifdef _WIN32
+		int wideLen = MultiByteToWideChar(CP_ACP, 0,
+			reinterpret_cast<const char*>(data), byteLen, nullptr, 0);
+		if (wideLen > 0) {
+			std::wstring wideStr(wideLen, 0);
+			MultiByteToWideChar(CP_ACP, 0,
+				reinterpret_cast<const char*>(data), byteLen, &wideStr[0], wideLen);
+			int utf8Len = WideCharToMultiByte(CP_UTF8, 0,
+				wideStr.c_str(), wideLen, nullptr, 0, nullptr, nullptr);
+			if (utf8Len > 0) {
+				std::string utf8Str(utf8Len, 0);
+				WideCharToMultiByte(CP_UTF8, 0,
+					wideStr.c_str(), wideLen, &utf8Str[0], utf8Len, nullptr, nullptr);
+				return CopyStringToLStrHandle(result, utf8Str);
+			}
+		}
+#endif
+		return CopyBytesToLStrHandle(result, data, byteLen);
+	}
+	catch (...) { return -1; }
+}
+
+// LStrHandle-safe version with auto-detection.
+// Two paths:
+//   1. Pure UTF-8 → convert UTF-8 to Unicode escapes (\uXXXX)
+//   2. Contains UTF-16LE → RepairLabVIEWUTF16LE → convert to Unicode escapes
+extern "C" ANSULT_API int ANSConvertUTF16LEToUnicodeEscapes_LV(LStrHandle input, LStrHandle result) {
+	try {
+		if (!input || !result) return -1;
+		int byteLen = (*input)->cnt;
+		if (byteLen <= 0) return 0;
+
+		// Copy input data first — input and result may be the same LStrHandle
+		std::vector<unsigned char> inputCopy(byteLen);
+		memcpy(inputCopy.data(), (*input)->str, byteLen);
+		const unsigned char* data = inputCopy.data();
+
+		std::string escaped;
+
+		if (DetectUTF16LE(data, byteLen)) {
+			// Path 2: UTF-16LE detected — repair mixed encoding, then convert to escapes
+			int convLen = byteLen;
+			const unsigned char* convData = StripBOM(data, convLen);
+			if (convLen <= 0) return 0;
+
+			auto repaired = ANSCENTER::ANSUtilities::RepairLabVIEWUTF16LE(convData, convLen);
+
+			// Re-add BOM for ConvertUTF16LEToUnicodeEscapes (it expects optional BOM)
+			std::vector<unsigned char> withBom;
+			withBom.reserve(2 + repaired.size());
+			withBom.push_back(0xFF);
+			withBom.push_back(0xFE);
+			withBom.insert(withBom.end(), repaired.begin(), repaired.end());
+
+			escaped = ANSCENTER::ANSUtilities::ConvertUTF16LEToUnicodeEscapes(
+				reinterpret_cast<const char*>(withBom.data()), static_cast<int>(withBom.size()));
+		}
+		else {
+			// Path 1: No UTF-16LE — get UTF-8, then convert to Unicode escapes
+			std::string utf8Str;
+			if (ANSCENTER::ANSUtilities::IsValidUTF8(data, byteLen)) {
+				utf8Str.assign(reinterpret_cast<const char*>(data), byteLen);
+			}
+#ifdef _WIN32
+			else {
+				int wideLen = MultiByteToWideChar(CP_ACP, 0,
+					reinterpret_cast<const char*>(data), byteLen, nullptr, 0);
+				if (wideLen > 0) {
+					std::wstring wideStr(wideLen, 0);
+					MultiByteToWideChar(CP_ACP, 0,
+						reinterpret_cast<const char*>(data), byteLen, &wideStr[0], wideLen);
+					int utf8Len = WideCharToMultiByte(CP_UTF8, 0,
+						wideStr.c_str(), wideLen, nullptr, 0, nullptr, nullptr);
+					if (utf8Len > 0) {
+						utf8Str.resize(utf8Len);
+						WideCharToMultiByte(CP_UTF8, 0,
+							wideStr.c_str(), wideLen, &utf8Str[0], utf8Len, nullptr, nullptr);
+					}
+				}
+			}
+#endif
+			if (utf8Str.empty()) {
+				utf8Str.assign(reinterpret_cast<const char*>(data), byteLen);
+			}
+			escaped = ANSCENTER::ANSUtilities::ConvertUTF8ToUnicodeEscapes(utf8Str);
+		}
+
+		return CopyStringToLStrHandle(result, escaped);
+	}
+	catch (...) { return -1; }
+}
+
 extern "C" ANSULT_API int ANSConvertUnicodeEscapesToUTF8(const char* escapedStr, LStrHandle result) {
 	try {
 		if (!escapedStr || !result) return -1;