Support UTF8 to UTF16 LE
This commit is contained in:
@@ -177,6 +177,60 @@ namespace ANSCENTER {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// Helper: convert non-ASCII UTF-8 chars to literal \\uXXXX text so that
|
||||
// JSON parsers (e.g., LabVIEW) preserve them as displayable escape sequences.
|
||||
// Pure ASCII strings pass through unchanged (zero overhead).
|
||||
static std::string DoubleEscapeUnicode(const std::string& utf8Str) {
|
||||
bool hasNonAscii = false;
|
||||
for (unsigned char c : utf8Str) {
|
||||
if (c >= 0x80) { hasNonAscii = true; break; }
|
||||
}
|
||||
if (!hasNonAscii) return utf8Str;
|
||||
|
||||
std::string result;
|
||||
result.reserve(utf8Str.size() * 2);
|
||||
size_t i = 0;
|
||||
while (i < utf8Str.size()) {
|
||||
unsigned char c = static_cast<unsigned char>(utf8Str[i]);
|
||||
if (c < 0x80) {
|
||||
result += utf8Str[i++];
|
||||
} else {
|
||||
// Decode UTF-8 codepoint
|
||||
uint32_t cp = 0;
|
||||
if ((c & 0xE0) == 0xC0 && i + 1 < utf8Str.size()) {
|
||||
cp = ((c & 0x1F) << 6) | (static_cast<unsigned char>(utf8Str[i + 1]) & 0x3F);
|
||||
i += 2;
|
||||
} else if ((c & 0xF0) == 0xE0 && i + 2 < utf8Str.size()) {
|
||||
cp = ((c & 0x0F) << 12) | ((static_cast<unsigned char>(utf8Str[i + 1]) & 0x3F) << 6)
|
||||
| (static_cast<unsigned char>(utf8Str[i + 2]) & 0x3F);
|
||||
i += 3;
|
||||
} else if ((c & 0xF8) == 0xF0 && i + 3 < utf8Str.size()) {
|
||||
cp = ((c & 0x07) << 18) | ((static_cast<unsigned char>(utf8Str[i + 1]) & 0x3F) << 12)
|
||||
| ((static_cast<unsigned char>(utf8Str[i + 2]) & 0x3F) << 6)
|
||||
| (static_cast<unsigned char>(utf8Str[i + 3]) & 0x3F);
|
||||
i += 4;
|
||||
} else {
|
||||
i++; continue; // skip invalid byte
|
||||
}
|
||||
// Encode as \\uXXXX (literal backslash + u + 4 hex digits)
|
||||
if (cp <= 0xFFFF) {
|
||||
char buf[8];
|
||||
snprintf(buf, sizeof(buf), "\\u%04x", cp);
|
||||
result += buf;
|
||||
} else {
|
||||
// Surrogate pair for codepoints > 0xFFFF
|
||||
cp -= 0x10000;
|
||||
uint16_t hi = 0xD800 + (uint16_t)(cp >> 10);
|
||||
uint16_t lo = 0xDC00 + (uint16_t)(cp & 0x3FF);
|
||||
char buf[16];
|
||||
snprintf(buf, sizeof(buf), "\\u%04x\\u%04x", hi, lo);
|
||||
result += buf;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string ANSCENTER::ANSOCRUtility::OCRDetectionToJsonString(const std::vector<OCRObject>& dets)
|
||||
{
|
||||
if (dets.empty()) {
|
||||
@@ -191,7 +245,7 @@ namespace ANSCENTER {
|
||||
results.push_back({
|
||||
{"class_id", std::to_string(det.classId)},
|
||||
{"track_id", std::to_string(det.trackId)},
|
||||
{"class_name", det.className},
|
||||
{"class_name", DoubleEscapeUnicode(det.className)},
|
||||
{"prob", std::to_string(det.confidence)},
|
||||
{"x", std::to_string(det.box.x)},
|
||||
{"y", std::to_string(det.box.y)},
|
||||
@@ -205,7 +259,6 @@ namespace ANSCENTER {
|
||||
});
|
||||
}
|
||||
|
||||
// ensure_ascii=true escapes non-ASCII chars as \uXXXX for LabVIEW compatibility
|
||||
return root.dump(-1, ' ', true);
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
@@ -846,7 +899,7 @@ namespace ANSCENTER {
|
||||
jsonResults.push_back({
|
||||
{"class_id", "0"},
|
||||
{"track_id", "0"},
|
||||
{"class_name", res.fullPlateText},
|
||||
{"class_name", DoubleEscapeUnicode(res.fullPlateText)},
|
||||
{"prob", std::to_string(res.confidence)},
|
||||
{"x", std::to_string(res.plateBox.x)},
|
||||
{"y", std::to_string(res.plateBox.y)},
|
||||
|
||||
@@ -243,6 +243,10 @@ extern "C" ANSOCR_API int SetANSOCRMode(ANSCENTER::ANSOCRBase** Handle,
|
||||
extern "C" ANSOCR_API int SetANSOCRCountry(ANSCENTER::ANSOCRBase** Handle, int country);
|
||||
extern "C" ANSOCR_API int SetANSOCRALPRFormat(ANSCENTER::ANSOCRBase** Handle, const char* formatJson);
|
||||
|
||||
// Unicode conversion utilities for LabVIEW wrapper classes
|
||||
extern "C" ANSOCR_API int ANSOCR_ConvertUTF8ToUTF16LE(const char* utf8Str, LStrHandle result);
|
||||
extern "C" ANSOCR_API int ANSOCR_ConvertUTF16LEToUTF8(const unsigned char* utf16leBytes, int byteLen, LStrHandle result);
|
||||
|
||||
// V2 Create / Release — handle as uint64_t by value (no pointer-to-pointer)
|
||||
extern "C" ANSOCR_API uint64_t CreateANSOCRHandleEx_V2(const char* licenseKey, const char* modelFilePath,
|
||||
const char* modelFileZipPassword, int language, int engineMode, int gpuId,
|
||||
|
||||
@@ -422,6 +422,123 @@ extern "C" ANSOCR_API int SetANSOCRALPRFormat(ANSCENTER::ANSOCRBase** Handle, co
|
||||
}
|
||||
}
|
||||
|
||||
// Unicode conversion utilities for LabVIEW wrapper classes
|
||||
// Converts input string to UTF-16LE. Handles both:
|
||||
// - JSON Unicode escapes (\uXXXX) from ensure_ascii=true output
|
||||
// - Raw UTF-8 encoded strings
|
||||
// Pure ASCII input is passed through directly (no conversion overhead).
|
||||
extern "C" ANSOCR_API int ANSOCR_ConvertUTF8ToUTF16LE(const char* utf8Str, LStrHandle result) {
|
||||
try {
|
||||
if (!utf8Str || !result) return -1;
|
||||
int len = (int)strlen(utf8Str);
|
||||
if (len == 0) return 0;
|
||||
|
||||
// Check if input contains \uXXXX escapes or non-ASCII bytes
|
||||
bool hasUnicodeEscapes = false;
|
||||
bool hasNonAscii = false;
|
||||
for (int i = 0; i < len; i++) {
|
||||
if ((unsigned char)utf8Str[i] >= 0x80) hasNonAscii = true;
|
||||
if (i + 1 < len && utf8Str[i] == '\\' && utf8Str[i + 1] == 'u') hasUnicodeEscapes = true;
|
||||
}
|
||||
|
||||
// Pure ASCII with no escapes — pass through directly
|
||||
if (!hasNonAscii && !hasUnicodeEscapes) {
|
||||
MgErr error = DSSetHandleSize(result, sizeof(int32) + len * sizeof(uChar));
|
||||
if (error != noErr) return -2;
|
||||
(*result)->cnt = len;
|
||||
memcpy((*result)->str, utf8Str, len);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// If contains \uXXXX escapes, decode them to UTF-16LE directly
|
||||
if (hasUnicodeEscapes) {
|
||||
std::string utf16le;
|
||||
utf16le.reserve(len * 2);
|
||||
for (int i = 0; i < len; ) {
|
||||
if (i + 5 < len && utf8Str[i] == '\\' && utf8Str[i + 1] == 'u') {
|
||||
char hex[5] = { utf8Str[i + 2], utf8Str[i + 3], utf8Str[i + 4], utf8Str[i + 5], 0 };
|
||||
uint16_t cp = (uint16_t)strtoul(hex, nullptr, 16);
|
||||
utf16le += static_cast<char>(cp & 0xFF);
|
||||
utf16le += static_cast<char>((cp >> 8) & 0xFF);
|
||||
i += 6;
|
||||
} else {
|
||||
// ASCII or raw UTF-8 byte — convert as single char
|
||||
utf16le += utf8Str[i];
|
||||
utf16le += '\0';
|
||||
i++;
|
||||
}
|
||||
}
|
||||
int size = (int)utf16le.size();
|
||||
MgErr error = DSSetHandleSize(result, sizeof(int32) + size * sizeof(uChar));
|
||||
if (error != noErr) return -2;
|
||||
(*result)->cnt = size;
|
||||
memcpy((*result)->str, utf16le.data(), size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Raw UTF-8 — convert via Windows API
|
||||
#ifdef _WIN32
|
||||
int wideLen = MultiByteToWideChar(CP_UTF8, 0, utf8Str, len, nullptr, 0);
|
||||
if (wideLen <= 0) return 0;
|
||||
std::wstring wideStr(wideLen, 0);
|
||||
MultiByteToWideChar(CP_UTF8, 0, utf8Str, len, &wideStr[0], wideLen);
|
||||
int size = wideLen * (int)sizeof(wchar_t);
|
||||
MgErr error = DSSetHandleSize(result, sizeof(int32) + size * sizeof(uChar));
|
||||
if (error != noErr) return -2;
|
||||
(*result)->cnt = size;
|
||||
memcpy((*result)->str, wideStr.data(), size);
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
catch (...) { return -1; }
|
||||
}
|
||||
|
||||
extern "C" ANSOCR_API int ANSOCR_ConvertUTF16LEToUTF8(const unsigned char* utf16leBytes, int byteLen, LStrHandle result) {
|
||||
try {
|
||||
if (!utf16leBytes || byteLen <= 0 || !result) return -1;
|
||||
// Check if input is already pure ASCII (no high bytes, or not valid UTF-16LE)
|
||||
// If all bytes < 0x80 and byteLen has no null bytes in odd positions pattern,
|
||||
// treat as already-UTF8 ASCII and pass through
|
||||
bool isAlreadyAscii = true;
|
||||
bool isUtf16le = (byteLen >= 2 && byteLen % 2 == 0);
|
||||
if (isUtf16le) {
|
||||
// Check if all high bytes (odd indices) are 0x00 — means pure ASCII in UTF-16LE
|
||||
for (int i = 1; i < byteLen; i += 2) {
|
||||
if (utf16leBytes[i] != 0x00) { isAlreadyAscii = false; break; }
|
||||
}
|
||||
if (isAlreadyAscii) {
|
||||
// Extract just the low bytes (ASCII characters)
|
||||
int asciiLen = byteLen / 2;
|
||||
MgErr error = DSSetHandleSize(result, sizeof(int32) + asciiLen * sizeof(uChar));
|
||||
if (error != noErr) return -2;
|
||||
(*result)->cnt = asciiLen;
|
||||
for (int i = 0; i < asciiLen; i++) {
|
||||
(*result)->str[i] = utf16leBytes[i * 2];
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
#ifdef _WIN32
|
||||
int wideLen = byteLen / (int)sizeof(wchar_t);
|
||||
const wchar_t* wideStr = reinterpret_cast<const wchar_t*>(utf16leBytes);
|
||||
int utf8Len = WideCharToMultiByte(CP_UTF8, 0, wideStr, wideLen, nullptr, 0, nullptr, nullptr);
|
||||
if (utf8Len <= 0) return 0;
|
||||
std::string utf8Str(utf8Len, 0);
|
||||
WideCharToMultiByte(CP_UTF8, 0, wideStr, wideLen, &utf8Str[0], utf8Len, nullptr, nullptr);
|
||||
MgErr error = DSSetHandleSize(result, sizeof(int32) + utf8Len * sizeof(uChar));
|
||||
if (error != noErr) return -2;
|
||||
(*result)->cnt = utf8Len;
|
||||
memcpy((*result)->str, utf8Str.data(), utf8Len);
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
catch (...) { return -1; }
|
||||
}
|
||||
|
||||
extern "C" ANSOCR_API std::string RunInferenceImagePath(ANSCENTER::ANSOCRBase** Handle, const char* imageFilePath) {
|
||||
if (!Handle || !*Handle) return "";
|
||||
OCRHandleGuard guard(AcquireOCRHandle(*Handle));
|
||||
|
||||
Reference in New Issue
Block a user