From b735931c55211c945fb73234c9fb87ad3bdc84cc Mon Sep 17 00:00:00 2001 From: Tuan Nghia Nguyen Date: Sun, 29 Mar 2026 22:51:39 +1100 Subject: [PATCH] Initial OCR to support ALPR mode with country support --- .claude/settings.local.json | 8 + modules/ANSOCR/ANSOCRBase.cpp | 569 ++++++++++++++++++ modules/ANSOCR/ANSOCRBase.h | 97 +++ .../ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp | 6 + modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.h | 3 + modules/ANSOCR/ANSOnnxOCR.cpp | 7 + modules/ANSOCR/ANSOnnxOCR.h | 1 + .../ANSOCR/ANSRTOCR/PaddleOCRV5RTEngine.cpp | 6 + modules/ANSOCR/ANSRTOCR/PaddleOCRV5RTEngine.h | 3 + modules/ANSOCR/ANSRtOCR.cpp | 7 + modules/ANSOCR/ANSRtOCR.h | 1 + modules/ANSOCR/dllmain.cpp | 80 ++- tests/ANSOCR-UnitTest/ANSOCR-UnitTest.cpp | 150 ++++- 13 files changed, 911 insertions(+), 27 deletions(-) create mode 100644 .claude/settings.local.json diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..480850e --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,8 @@ +{ + "permissions": { + "allow": [ + "Bash(find C:ProjectsCLionProjectsANSCORE -type f \\\\\\(-name *.h -o -name *.hpp -o -name *.cpp -o -name *.cc \\\\\\))", + "Bash(xargs grep:*)" + ] + } +} diff --git a/modules/ANSOCR/ANSOCRBase.cpp b/modules/ANSOCR/ANSOCRBase.cpp index 2582e1a..bbcb24f 100644 --- a/modules/ANSOCR/ANSOCRBase.cpp +++ b/modules/ANSOCR/ANSOCRBase.cpp @@ -293,6 +293,575 @@ namespace ANSCENTER { return polygon; } + + // ── ALPR Configuration Methods ────────────────────────────────────── + + void ANSOCRBase::SetOCRMode(OCRMode mode) { _ocrMode = mode; } + OCRMode ANSOCRBase::GetOCRMode() const { return _ocrMode; } + void ANSOCRBase::SetALPRCountry(ALPRCountry country) { + _alprCountry = country; + LoadDefaultFormats(country); + } + ALPRCountry ANSOCRBase::GetALPRCountry() const { return _alprCountry; } + void ANSOCRBase::SetALPRFormat(const ALPRPlateFormat& format) { + _alprFormats.clear(); + _alprFormats.push_back(format); + } + void ANSOCRBase::AddALPRFormat(const ALPRPlateFormat& format) { + _alprFormats.push_back(format); + } + void ANSOCRBase::ClearALPRFormats() { _alprFormats.clear(); } + const std::vector& ANSOCRBase::GetALPRFormats() const { return _alprFormats; } + + void ANSOCRBase::LoadDefaultFormats(ALPRCountry country) { + _alprFormats.clear(); + if (country == ALPR_JAPAN) { + ALPRPlateFormat fmt; + fmt.name = "JAPAN_STANDARD"; + fmt.country = ALPR_JAPAN; + fmt.numRows = 2; + fmt.rowSplitThreshold = 0.3f; + + ALPRZone region; + region.name = "region"; + region.row = 0; region.col = 0; + region.charClass = CHAR_KANJI; + region.minLength = 1; region.maxLength = 4; + region.corrections = { {"#", "\xe4\xba\x95"} }; // # -> 井 + + ALPRZone classification; + classification.name = "classification"; + classification.row = 0; classification.col = 1; + classification.charClass = CHAR_DIGIT; + classification.minLength = 1; classification.maxLength = 3; + classification.validationRegex = R"(^\d{1,3}$)"; + + ALPRZone kana; + kana.name = "kana"; + kana.row = 1; kana.col = 0; + kana.charClass = CHAR_HIRAGANA; + kana.minLength = 1; kana.maxLength = 1; + + ALPRZone designation; + designation.name = "designation"; + designation.row = 1; designation.col = 1; + designation.charClass = CHAR_DIGIT; + designation.minLength = 2; designation.maxLength = 5; + designation.validationRegex = R"(^\d{2}-\d{2}$)"; + // On Japanese plates, ・ (middle dot) represents 0 + designation.corrections = { + {"\xe3\x83\xbb", "0"}, // ・ (U+30FB fullwidth middle dot) + {"\xc2\xb7", "0"}, // · (U+00B7 middle dot) + {".", "0"} // ASCII dot + }; + + fmt.zones = { region, classification, kana, designation }; + _alprFormats.push_back(fmt); + } + } + + // ── UTF-8 Helpers ─────────────────────────────────────────────────── + + uint32_t ANSOCRUtility::NextUTF8Codepoint(const std::string& str, size_t& pos) { + if (pos >= str.size()) return 0; + uint32_t cp = 0; + unsigned char c = static_cast(str[pos]); + if (c < 0x80) { + cp = c; pos += 1; + } else if ((c & 0xE0) == 0xC0) { + cp = c & 0x1F; + if (pos + 1 < str.size()) cp = (cp << 6) | (static_cast(str[pos + 1]) & 0x3F); + pos += 2; + } else if ((c & 0xF0) == 0xE0) { + cp = c & 0x0F; + if (pos + 1 < str.size()) cp = (cp << 6) | (static_cast(str[pos + 1]) & 0x3F); + if (pos + 2 < str.size()) cp = (cp << 6) | (static_cast(str[pos + 2]) & 0x3F); + pos += 3; + } else if ((c & 0xF8) == 0xF0) { + cp = c & 0x07; + if (pos + 1 < str.size()) cp = (cp << 6) | (static_cast(str[pos + 1]) & 0x3F); + if (pos + 2 < str.size()) cp = (cp << 6) | (static_cast(str[pos + 2]) & 0x3F); + if (pos + 3 < str.size()) cp = (cp << 6) | (static_cast(str[pos + 3]) & 0x3F); + pos += 4; + } else { + pos += 1; // skip invalid byte + } + return cp; + } + + bool ANSOCRUtility::IsCharClass(uint32_t cp, ALPRCharClass charClass) { + switch (charClass) { + case CHAR_DIGIT: + return (cp >= 0x30 && cp <= 0x39); + case CHAR_LATIN_ALPHA: + return (cp >= 0x41 && cp <= 0x5A) || (cp >= 0x61 && cp <= 0x7A); + case CHAR_ALPHANUMERIC: + return (cp >= 0x30 && cp <= 0x39) || (cp >= 0x41 && cp <= 0x5A) || (cp >= 0x61 && cp <= 0x7A); + case CHAR_HIRAGANA: + return (cp >= 0x3040 && cp <= 0x309F); + case CHAR_KATAKANA: + return (cp >= 0x30A0 && cp <= 0x30FF); + case CHAR_KANJI: + return (cp >= 0x4E00 && cp <= 0x9FFF) || (cp >= 0x3400 && cp <= 0x4DBF); + case CHAR_CJK_ANY: + return (cp >= 0x3040 && cp <= 0x30FF) || (cp >= 0x4E00 && cp <= 0x9FFF) || (cp >= 0x3400 && cp <= 0x4DBF); + case CHAR_ANY: + return true; + default: + return false; + } + } + + // Helper: encode a single codepoint back to UTF-8 + static std::string CodepointToUTF8(uint32_t cp) { + std::string result; + if (cp < 0x80) { + result += static_cast(cp); + } else if (cp < 0x800) { + result += static_cast(0xC0 | (cp >> 6)); + result += static_cast(0x80 | (cp & 0x3F)); + } else if (cp < 0x10000) { + result += static_cast(0xE0 | (cp >> 12)); + result += static_cast(0x80 | ((cp >> 6) & 0x3F)); + result += static_cast(0x80 | (cp & 0x3F)); + } else { + result += static_cast(0xF0 | (cp >> 18)); + result += static_cast(0x80 | ((cp >> 12) & 0x3F)); + result += static_cast(0x80 | ((cp >> 6) & 0x3F)); + result += static_cast(0x80 | (cp & 0x3F)); + } + return result; + } + + // Helper: check if a codepoint is a separator/punctuation that should stay with digits + static bool IsDigitSeparator(uint32_t cp) { + return cp == '-' || cp == '.' || cp == 0xB7 || cp == 0x30FB; // hyphen, dot, middle dot (U+00B7, U+30FB) + } + + // Helper: split a UTF-8 string by character class, returning parts matching and not matching + // For CHAR_DIGIT, hyphens and dots are kept with digits (common in plate numbers like "20-46") + static void SplitByCharClass(const std::string& text, ALPRCharClass targetClass, + std::string& matched, std::string& remainder) { + matched.clear(); + remainder.clear(); + size_t pos = 0; + while (pos < text.size()) { + size_t startPos = pos; + uint32_t cp = ANSOCRUtility::NextUTF8Codepoint(text, pos); + if (cp == 0) break; + std::string ch = text.substr(startPos, pos - startPos); + bool belongs = ANSOCRUtility::IsCharClass(cp, targetClass); + // Keep separators with digits + if (!belongs && targetClass == CHAR_DIGIT && IsDigitSeparator(cp)) { + belongs = true; + } + if (belongs) { + matched += ch; + } else { + remainder += ch; + } + } + } + + // ── ALPR Post-Processing ──────────────────────────────────────────── + + std::vector ANSOCRUtility::ALPRPostProcessing( + const std::vector& ocrResults, + const std::vector& formats, + int imageWidth, int imageHeight, + ANSOCRBase* engine, + const cv::Mat& originalImage) + { + std::vector results; + if (ocrResults.empty() || formats.empty()) return results; + + // Use the first format for now (extensible to try multiple) + const ALPRPlateFormat& fmt = formats[0]; + + // Step 1: Compute the bounding box encompassing all detections + // Then expand it by 20% on each side to account for tight detection crops + // that may cut off kana characters or edge digits + cv::Rect plateBox = ocrResults[0].box; + for (size_t i = 1; i < ocrResults.size(); i++) { + plateBox |= ocrResults[i].box; + } + { + int expandX = (int)(plateBox.width * 0.20f); + int expandY = (int)(plateBox.height * 0.05f); + plateBox.x = std::max(0, plateBox.x - expandX); + plateBox.y = std::max(0, plateBox.y - expandY); + plateBox.width = std::min(imageWidth - plateBox.x, plateBox.width + expandX * 2); + plateBox.height = std::min(imageHeight - plateBox.y, plateBox.height + expandY * 2); + } + + // Step 2: Split OCR results into rows based on vertical center + float plateCenterY = plateBox.y + plateBox.height * 0.5f; + // For 2-row plates, use the midpoint of the plate as the row boundary + float rowBoundary = plateBox.y + plateBox.height * fmt.rowSplitThreshold + + (plateBox.height * (1.0f - fmt.rowSplitThreshold)) * 0.5f; + + // Find the actual gap: sort by Y center, find largest gap + std::vector> yCenters; // (y_center, index) + for (int i = 0; i < (int)ocrResults.size(); i++) { + float yc = ocrResults[i].box.y + ocrResults[i].box.height * 0.5f; + yCenters.push_back({ yc, i }); + } + std::sort(yCenters.begin(), yCenters.end()); + + if (yCenters.size() >= 2) { + float maxGap = 0; + float bestBoundary = rowBoundary; + for (size_t i = 1; i < yCenters.size(); i++) { + float gap = yCenters[i].first - yCenters[i - 1].first; + if (gap > maxGap) { + maxGap = gap; + bestBoundary = (yCenters[i].first + yCenters[i - 1].first) * 0.5f; + } + } + rowBoundary = bestBoundary; + } + + // Step 3: Assign each OCR result to a row and collect text per row + struct RowItem { + int ocrIndex; + float xCenter; + std::string text; + float confidence; + cv::Rect box; + }; + std::vector topRow, bottomRow; + + for (int i = 0; i < (int)ocrResults.size(); i++) { + float yc = ocrResults[i].box.y + ocrResults[i].box.height * 0.5f; + RowItem item; + item.ocrIndex = i; + item.xCenter = ocrResults[i].box.x + ocrResults[i].box.width * 0.5f; + item.text = ocrResults[i].className; + item.confidence = ocrResults[i].confidence; + item.box = ocrResults[i].box; + if (yc < rowBoundary) { + topRow.push_back(item); + } else { + bottomRow.push_back(item); + } + } + + // Sort each row left-to-right + auto sortByX = [](const RowItem& a, const RowItem& b) { return a.xCenter < b.xCenter; }; + std::sort(topRow.begin(), topRow.end(), sortByX); + std::sort(bottomRow.begin(), bottomRow.end(), sortByX); + + // Step 4: Concatenate text per row + std::string topText, bottomText; + float minConfidence = 1.0f; + for (auto& item : topRow) { + topText += item.text; + minConfidence = std::min(minConfidence, item.confidence); + } + for (auto& item : bottomRow) { + bottomText += item.text; + minConfidence = std::min(minConfidence, item.confidence); + } + + // Step 5: For each zone, extract text using character class splitting + ALPRResult alprResult; + alprResult.formatName = fmt.name; + alprResult.plateBox = plateBox; + alprResult.confidence = minConfidence; + alprResult.valid = true; + + // Process top row zones + std::string topRemaining = topText; + std::vector topZones, bottomZones; + for (const auto& zone : fmt.zones) { + if (zone.row == 0) topZones.push_back(&zone); + else bottomZones.push_back(&zone); + } + std::sort(topZones.begin(), topZones.end(), [](const ALPRZone* a, const ALPRZone* b) { return a->col < b->col; }); + std::sort(bottomZones.begin(), bottomZones.end(), [](const ALPRZone* a, const ALPRZone* b) { return a->col < b->col; }); + + // Split top row text by character class + for (const auto* zone : topZones) { + std::string matched, remainder; + SplitByCharClass(topRemaining, zone->charClass, matched, remainder); + // Apply corrections + for (const auto& corr : zone->corrections) { + size_t pos = 0; + while ((pos = matched.find(corr.first, pos)) != std::string::npos) { + matched.replace(pos, corr.first.length(), corr.second); + pos += corr.second.length(); + } + } + alprResult.parts[zone->name] = matched; + topRemaining = remainder; + } + + // Split bottom row text by character class + std::string bottomRemaining = bottomText; + for (const auto* zone : bottomZones) { + std::string matched, remainder; + SplitByCharClass(bottomRemaining, zone->charClass, matched, remainder); + // Apply corrections + for (const auto& corr : zone->corrections) { + size_t pos = 0; + while ((pos = matched.find(corr.first, pos)) != std::string::npos) { + matched.replace(pos, corr.first.length(), corr.second); + pos += corr.second.length(); + } + } + alprResult.parts[zone->name] = matched; + bottomRemaining = remainder; + } + + // Step 5b: Kana re-crop — if kana zone is empty and we have the original image, + // crop the left portion of the bottom row and run recognizer-only (no detection) + if (engine && !originalImage.empty()) { + const ALPRZone* kanaZone = nullptr; + for (const auto* zone : bottomZones) { + if (zone->charClass == CHAR_HIRAGANA || zone->charClass == CHAR_KATAKANA) { + kanaZone = zone; + break; + } + } + if (kanaZone && alprResult.parts[kanaZone->name].empty() && !bottomRow.empty()) { + cv::Rect bottomBox = bottomRow[0].box; + for (const auto& item : bottomRow) { + bottomBox |= item.box; + } + + // Crop the kana area: left ~20% of the expanded plate box, square crop. + int cropW = (int)(plateBox.width * 0.20f); + int cropH = cropW; // Square crop — kana is a square character + int cropX = std::max(0, plateBox.x); + if (cropW < 30) cropW = 30; + + // Try vertical offsets: 50% (center), 30%, 15% from top of bottom row + const float yOffsets[] = { 0.50f, 0.30f, 0.15f }; + bool kanaFound = false; + for (float yOff : yOffsets) { + if (kanaFound) break; + + int centerY = bottomBox.y + (int)(bottomBox.height * yOff); + int cy = centerY - cropH / 2; + int cw = cropW, ch = cropH; + // Clamp to image bounds + if (cy < 0) cy = 0; + if (cropX + cw > originalImage.cols) cw = originalImage.cols - cropX; + if (cy + ch > originalImage.rows) ch = originalImage.rows - cy; + if (cw <= 0 || ch <= 0) continue; + + cv::Mat kanaCrop = originalImage(cv::Rect(cropX, cy, cw, ch)).clone(); + + // Resize to recognizer format: height=48, min width=160 + int recH = 48; + double scale = (double)recH / kanaCrop.rows; + cv::Mat resized; + cv::resize(kanaCrop, resized, cv::Size(), scale, scale, cv::INTER_CUBIC); + int minWidth = 160; + if (resized.cols < minWidth) { + int padLeft = (minWidth - resized.cols) / 2; + int padRight = minWidth - resized.cols - padLeft; + cv::copyMakeBorder(resized, resized, 0, 0, padLeft, padRight, + cv::BORDER_CONSTANT, cv::Scalar(255, 255, 255)); + } + + auto [recText, recConf] = engine->RecognizeText(resized); + + if (!recText.empty()) { + std::string kanaText; + size_t pos = 0; + while (pos < recText.size()) { + size_t startPos = pos; + uint32_t cp = NextUTF8Codepoint(recText, pos); + if (cp == 0) break; + if (IsCharClass(cp, kanaZone->charClass)) { + kanaText += recText.substr(startPos, pos - startPos); + } + } + if (!kanaText.empty()) { + alprResult.parts[kanaZone->name] = kanaText; + kanaFound = true; + } + } + } + } + } + + // Step 5c: Designation re-crop — if designation has too few digits, + // crop the right portion of the bottom row and run recognizer directly + if (engine && !originalImage.empty()) { + const ALPRZone* desigZone = nullptr; + for (const auto* zone : bottomZones) { + if (zone->name == "designation") { + desigZone = zone; + break; + } + } + if (desigZone && !desigZone->validationRegex.empty()) { + std::string& desigVal = alprResult.parts[desigZone->name]; + try { + std::regex re(desigZone->validationRegex); + if (!std::regex_match(desigVal, re)) { + // Crop the right ~75% of the plate's bottom row + cv::Rect bottomBox = bottomRow[0].box; + for (const auto& item : bottomRow) bottomBox |= item.box; + + int cropX = plateBox.x + (int)(plateBox.width * 0.25f); + int cropY = bottomBox.y; + int cropW = plateBox.x + plateBox.width - cropX; + int cropH = bottomBox.height; + // Clamp + if (cropX + cropW > originalImage.cols) cropW = originalImage.cols - cropX; + if (cropY + cropH > originalImage.rows) cropH = originalImage.rows - cropY; + + if (cropW > 0 && cropH > 0) { + cv::Mat desigCrop = originalImage(cv::Rect(cropX, cropY, cropW, cropH)).clone(); + // Resize to recognizer format + int recH = 48; + double scale = (double)recH / desigCrop.rows; + cv::Mat resized; + cv::resize(desigCrop, resized, cv::Size(), scale, scale, cv::INTER_CUBIC); + int minWidth = 320; + if (resized.cols < minWidth) { + cv::copyMakeBorder(resized, resized, 0, 0, 0, minWidth - resized.cols, + cv::BORDER_CONSTANT, cv::Scalar(255, 255, 255)); + } + auto [recText, recConf] = engine->RecognizeText(resized); + + if (!recText.empty()) { + // Apply corrections (dots to zeros) + for (const auto& corr : desigZone->corrections) { + size_t pos = 0; + while ((pos = recText.find(corr.first, pos)) != std::string::npos) { + recText.replace(pos, corr.first.length(), corr.second); + pos += corr.second.length(); + } + } + // Extract digits and separators + std::string desigText; + size_t pos = 0; + while (pos < recText.size()) { + size_t startPos = pos; + uint32_t cp = NextUTF8Codepoint(recText, pos); + if (cp == 0) break; + if (IsCharClass(cp, CHAR_DIGIT) || IsDigitSeparator(cp)) { + desigText += recText.substr(startPos, pos - startPos); + } + } + if (!desigText.empty() && desigText.size() > desigVal.size()) { + desigVal = desigText; + } + } + } + } + } catch (...) {} + } + } + + // Step 6: Validate and auto-fix zones that fail regex + for (const auto& zone : fmt.zones) { + if (zone.validationRegex.empty() || alprResult.parts[zone.name].empty()) continue; + try { + std::regex re(zone.validationRegex); + std::string& val = alprResult.parts[zone.name]; + if (!std::regex_match(val, re)) { + bool fixed = false; + // For designation: try trimming leading digits (leaked from classification row) + if (zone.row == 1 && zone.charClass == CHAR_DIGIT) { + for (size_t trim = 1; trim < val.size() && !fixed; trim++) { + size_t pos = 0; + for (size_t t = 0; t < trim; t++) { + NextUTF8Codepoint(val, pos); + } + std::string trimmed = val.substr(pos); + if (std::regex_match(trimmed, re)) { + val = trimmed; + fixed = true; + } + } + } + // For designation: if too few digits, pad with leading zeros + // Japanese plates use ・ for zero, so "12" means "00-12" + if (!fixed && zone.name == "designation") { + // Extract only digits from val + std::string digitsOnly; + for (char c : val) { + if (c >= '0' && c <= '9') digitsOnly += c; + } + if (digitsOnly.size() >= 1 && digitsOnly.size() <= 3) { + // Pad to 4 digits and insert hyphen + while (digitsOnly.size() < 4) digitsOnly = "0" + digitsOnly; + std::string padded = digitsOnly.substr(0, 2) + "-" + digitsOnly.substr(2, 2); + if (std::regex_match(padded, re)) { + val = padded; + fixed = true; + } + } + } + if (!fixed) { + alprResult.valid = false; + } + } + } catch (...) {} + } + + // Step 7: Build full plate text (after validation/fix so values are corrected) + alprResult.fullPlateText.clear(); + for (const auto* zone : topZones) { + if (!alprResult.fullPlateText.empty()) alprResult.fullPlateText += " "; + alprResult.fullPlateText += alprResult.parts[zone->name]; + } + alprResult.fullPlateText += " "; + for (const auto* zone : bottomZones) { + if (zone != bottomZones[0]) alprResult.fullPlateText += " "; + alprResult.fullPlateText += alprResult.parts[zone->name]; + } + + results.push_back(alprResult); + return results; + } + + // ── ALPR JSON Serialization ───────────────────────────────────────── + + std::string ANSOCRUtility::ALPRResultToJsonString(const std::vector& results) { + if (results.empty()) { + return R"({"results":[]})"; + } + try { + nlohmann::json root; + auto& jsonResults = root["results"] = nlohmann::json::array(); + + for (const auto& res : results) { + nlohmann::json alprInfo; + alprInfo["valid"] = res.valid; + alprInfo["format"] = res.formatName; + for (const auto& part : res.parts) { + alprInfo[part.first] = part.second; + } + + jsonResults.push_back({ + {"class_id", "0"}, + {"track_id", "0"}, + {"class_name", res.fullPlateText}, + {"prob", std::to_string(res.confidence)}, + {"x", std::to_string(res.plateBox.x)}, + {"y", std::to_string(res.plateBox.y)}, + {"width", std::to_string(res.plateBox.width)}, + {"height", std::to_string(res.plateBox.height)}, + {"mask", ""}, + {"extra_info", ""}, + {"camera_id", ""}, + {"polygon", ""}, + {"kps", ""}, + {"alpr_info", alprInfo} + }); + } + return root.dump(); + } catch (const std::exception&) { + return R"({"results":[],"error":"ALPR serialization failed"})"; + } + } + }; diff --git a/modules/ANSOCR/ANSOCRBase.h b/modules/ANSOCR/ANSOCRBase.h index 95a04c5..42629af 100644 --- a/modules/ANSOCR/ANSOCRBase.h +++ b/modules/ANSOCR/ANSOCRBase.h @@ -9,7 +9,65 @@ #include #include "LabVIEWHeader/extcode.h" #include "ANSLicense.h" +#include +#include namespace ANSCENTER { + + // ── ALPR Enums ────────────────────────────────────────────────────── + enum OCRMode { + OCR_GENERAL = 0, + OCR_ALPR = 1 + }; + + enum ALPRCountry { + ALPR_JAPAN = 0, + ALPR_VIETNAM = 1, + ALPR_CHINA = 2, + ALPR_USA = 3, + ALPR_AUSTRALIA = 4, + ALPR_CUSTOM = 99 + }; + + enum ALPRCharClass { + CHAR_DIGIT = 0, + CHAR_LATIN_ALPHA = 1, + CHAR_ALPHANUMERIC = 2, + CHAR_HIRAGANA = 3, + CHAR_KATAKANA = 4, + CHAR_KANJI = 5, + CHAR_CJK_ANY = 6, + CHAR_ANY = 7 + }; + + // ── ALPR Structs ──────────────────────────────────────────────────── + struct ALPRZone { + std::string name; + int row = 0; + int col = 0; + ALPRCharClass charClass = CHAR_ANY; + int minLength = 1; + int maxLength = 10; + std::string validationRegex; + std::map corrections; + }; + + struct ALPRPlateFormat { + std::string name; + ALPRCountry country = ALPR_JAPAN; + int numRows = 2; + std::vector zones; + float rowSplitThreshold = 0.3f; + }; + + struct ALPRResult { + bool valid = false; + std::string formatName; + std::string fullPlateText; + std::map parts; + float confidence = 0.0f; + cv::Rect plateBox; + }; + struct OCRModelConfig { bool userGPU = true; bool useTensorRT = false; @@ -91,6 +149,12 @@ namespace ANSCENTER { OCRModelConfig _modelConfig; int _engineMode; //0: Auto detect, 1 GPU, 2 CPU SPDLogger& _logger = SPDLogger::GetInstance("OCR", false); + + // ALPR settings + OCRMode _ocrMode = OCR_GENERAL; + ALPRCountry _alprCountry = ALPR_JAPAN; + std::vector _alprFormats; + void CheckLicense(); [[nodiscard]] bool Init(const std::string& licenseKey, OCRModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode); public: @@ -100,6 +164,21 @@ namespace ANSCENTER { [[nodiscard]] virtual std::vector RunInference(const cv::Mat& input, const std::vector& Bbox) = 0; [[nodiscard]] virtual std::vector RunInference(const cv::Mat& input, const std::vector& Bbox, const std::string &cameraId) = 0; + // Run recognizer only on a pre-cropped text image (skips detection) + // Returns recognized text and confidence. Default returns empty. + virtual std::pair RecognizeText(const cv::Mat& croppedImage) { return {"", 0.0f}; } + + // ALPR configuration methods + void SetOCRMode(OCRMode mode); + OCRMode GetOCRMode() const; + void SetALPRCountry(ALPRCountry country); + ALPRCountry GetALPRCountry() const; + void SetALPRFormat(const ALPRPlateFormat& format); + void AddALPRFormat(const ALPRPlateFormat& format); + void ClearALPRFormats(); + void LoadDefaultFormats(ALPRCountry country); + const std::vector& GetALPRFormats() const; + ~ANSOCRBase() { try { @@ -119,6 +198,19 @@ namespace ANSCENTER { [[nodiscard]] static std::string PolygonToString(const std::vector& polygon); [[nodiscard]] static std::vector RectToNormalizedPolygon(const cv::Rect& rect, float imageWidth, float imageHeight); [[nodiscard]] static std::string KeypointsToString(const std::vector& kps); + + // ALPR post-processing + [[nodiscard]] static std::vector ALPRPostProcessing( + const std::vector& ocrResults, + const std::vector& formats, + int imageWidth, int imageHeight, + ANSOCRBase* engine = nullptr, + const cv::Mat& originalImage = cv::Mat()); + [[nodiscard]] static std::string ALPRResultToJsonString(const std::vector& results); + + // UTF-8 character classification helpers + static uint32_t NextUTF8Codepoint(const std::string& str, size_t& pos); + static bool IsCharClass(uint32_t codepoint, ALPRCharClass charClass); private: }; } @@ -155,6 +247,11 @@ extern "C" ANSOCR_API int RunInferenceInCroppedImages_LVWithCamID(ANSCENTER: extern "C" ANSOCR_API int RunInferenceComplete_LV(ANSCENTER::ANSOCRBase** Handle, cv::Mat** cvImage, const char* cameraId, int getJpegString, int jpegImageSize, LStrHandle detectionResult, LStrHandle imageStr); extern "C" ANSOCR_API int RunInferencesComplete_LV(ANSCENTER::ANSOCRBase** Handle, cv::Mat** cvImage, const char* cameraId, int maxImageSize, const char* strBboxes, LStrHandle detectionResult); +// ALPR configuration API +extern "C" ANSOCR_API int SetANSOCRMode(ANSCENTER::ANSOCRBase** Handle, int ocrMode); +extern "C" ANSOCR_API int SetANSOCRALPRCountry(ANSCENTER::ANSOCRBase** Handle, int country); +extern "C" ANSOCR_API int SetANSOCRALPRFormat(ANSCENTER::ANSOCRBase** Handle, const char* formatJson); + // V2 Create / Release — handle as uint64_t by value (no pointer-to-pointer) extern "C" ANSOCR_API uint64_t CreateANSOCRHandleEx_V2(const char* licenseKey, const char* modelFilePath, const char* modelFileZipPassword, int language, int engineMode, int gpuId, diff --git a/modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp b/modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp index c5fae49..22582f2 100644 --- a/modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp +++ b/modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp @@ -126,5 +126,11 @@ std::vector PaddleOCRV5Engine::ocr(const cv::Mat& img) { return results; } +TextLine PaddleOCRV5Engine::recognizeOnly(const cv::Mat& croppedImage) { + std::lock_guard lock(_mutex); + if (!_initialized || !recognizer_ || croppedImage.empty()) return { "", 0.0f }; + return recognizer_->Recognize(croppedImage); +} + } // namespace onnxocr } // namespace ANSCENTER diff --git a/modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.h b/modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.h index e69bc8c..a99e53f 100644 --- a/modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.h +++ b/modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.h @@ -31,6 +31,9 @@ public: // Returns results matching PaddleOCR::OCRPredictResult format std::vector ocr(const cv::Mat& img); + // Run recognizer only on a pre-cropped text image (no detection step) + TextLine recognizeOnly(const cv::Mat& croppedImage); + // Configuration setters (matching OCRModelConfig parameters) void SetDetMaxSideLen(int val) { _maxSideLen = val; } void SetDetDbThresh(float val) { _detDbThresh = val; } diff --git a/modules/ANSOCR/ANSOnnxOCR.cpp b/modules/ANSOCR/ANSOnnxOCR.cpp index 1e017e8..be017d6 100644 --- a/modules/ANSOCR/ANSOnnxOCR.cpp +++ b/modules/ANSOCR/ANSOnnxOCR.cpp @@ -384,4 +384,11 @@ bool ANSONNXOCR::Destroy() { } } +std::pair ANSONNXOCR::RecognizeText(const cv::Mat& croppedImage) { + std::lock_guard lock(_mutex); + if (!_isInitialized || !_engine || croppedImage.empty()) return {"", 0.0f}; + auto result = _engine->recognizeOnly(croppedImage); + return {result.text, result.score}; +} + } // namespace ANSCENTER diff --git a/modules/ANSOCR/ANSOnnxOCR.h b/modules/ANSOCR/ANSOnnxOCR.h index cdcb70f..b6b1f17 100644 --- a/modules/ANSOCR/ANSOnnxOCR.h +++ b/modules/ANSOCR/ANSOnnxOCR.h @@ -23,6 +23,7 @@ namespace ANSCENTER { std::vector RunInference(const cv::Mat& input, const std::vector& Bbox) override; std::vector RunInference(const cv::Mat& input, const std::vector& Bbox, const std::string& cameraId) override; + std::pair RecognizeText(const cv::Mat& croppedImage) override; ~ANSONNXOCR(); bool Destroy() override; diff --git a/modules/ANSOCR/ANSRTOCR/PaddleOCRV5RTEngine.cpp b/modules/ANSOCR/ANSRTOCR/PaddleOCRV5RTEngine.cpp index e76ba7d..5313a96 100644 --- a/modules/ANSOCR/ANSRTOCR/PaddleOCRV5RTEngine.cpp +++ b/modules/ANSOCR/ANSRTOCR/PaddleOCRV5RTEngine.cpp @@ -147,5 +147,11 @@ std::vector PaddleOCRV5RTEngine::ocr(const cv::Mat& image) { } } +TextLine PaddleOCRV5RTEngine::recognizeOnly(const cv::Mat& croppedImage) { + std::lock_guard lock(_mutex); + if (!recognizer_ || croppedImage.empty()) return { "", 0.0f }; + return recognizer_->Recognize(croppedImage); +} + } // namespace rtocr } // namespace ANSCENTER diff --git a/modules/ANSOCR/ANSRTOCR/PaddleOCRV5RTEngine.h b/modules/ANSOCR/ANSRTOCR/PaddleOCRV5RTEngine.h index 75d9f47..929952b 100644 --- a/modules/ANSOCR/ANSRTOCR/PaddleOCRV5RTEngine.h +++ b/modules/ANSOCR/ANSRTOCR/PaddleOCRV5RTEngine.h @@ -31,6 +31,9 @@ public: // Run full OCR pipeline: detect → crop → [classify →] recognize std::vector ocr(const cv::Mat& image); + // Run recognizer only on a pre-cropped text image (no detection step) + TextLine recognizeOnly(const cv::Mat& croppedImage); + // Configuration setters void SetDetMaxSideLen(int v) { detMaxSideLen_ = v; } void SetDetDbThresh(float v) { detDbThresh_ = v; } diff --git a/modules/ANSOCR/ANSRtOCR.cpp b/modules/ANSOCR/ANSRtOCR.cpp index a04f2b5..528a7d0 100644 --- a/modules/ANSOCR/ANSRtOCR.cpp +++ b/modules/ANSOCR/ANSRtOCR.cpp @@ -378,6 +378,13 @@ std::vector ANSRTOCR::RunInference(const cv::Mat& input, c } } +std::pair ANSRTOCR::RecognizeText(const cv::Mat& croppedImage) { + std::lock_guard lock(_mutex); + if (!_isInitialized || !_engine || croppedImage.empty()) return {"", 0.0f}; + auto result = _engine->recognizeOnly(croppedImage); + return {result.text, result.score}; +} + ANSRTOCR::~ANSRTOCR() { try { Destroy(); diff --git a/modules/ANSOCR/ANSRtOCR.h b/modules/ANSOCR/ANSRtOCR.h index aff9b6a..63f8535 100644 --- a/modules/ANSOCR/ANSRtOCR.h +++ b/modules/ANSOCR/ANSRtOCR.h @@ -21,6 +21,7 @@ public: std::vector RunInference(const cv::Mat& input, const std::vector& Bbox) override; std::vector RunInference(const cv::Mat& input, const std::vector& Bbox, const std::string& cameraId) override; + std::pair RecognizeText(const cv::Mat& croppedImage) override; bool Destroy() override; private: diff --git a/modules/ANSOCR/dllmain.cpp b/modules/ANSOCR/dllmain.cpp index 1bb8e44..7e6fa03 100644 --- a/modules/ANSOCR/dllmain.cpp +++ b/modules/ANSOCR/dllmain.cpp @@ -6,6 +6,7 @@ #include "ANSRtOCR.h" #include "ANSLibsLoader.h" #include "ANSGpuFrameRegistry.h" +#include #include "NV12PreprocessHelper.h" #include #include @@ -234,6 +235,19 @@ extern "C" ANSOCR_API int CreateANSOCRHandle(ANSCENTER::ANSOCRBase** Handle, co classifierThreshold, useDilation, 960); } +// Helper: serialize OCR results with optional ALPR post-processing +static std::string SerializeOCRResults(ANSCENTER::ANSOCRBase* engine, + const std::vector& outputs, int imageWidth, int imageHeight, + const cv::Mat& originalImage = cv::Mat()) { + if (engine->GetOCRMode() == ANSCENTER::OCR_ALPR && !engine->GetALPRFormats().empty()) { + auto alprResults = ANSCENTER::ANSOCRUtility::ALPRPostProcessing( + outputs, engine->GetALPRFormats(), imageWidth, imageHeight, + engine, originalImage); + return ANSCENTER::ANSOCRUtility::ALPRResultToJsonString(alprResults); + } + return ANSCENTER::ANSOCRUtility::OCRDetectionToJsonString(outputs); +} + extern "C" ANSOCR_API std::string RunInference(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_string, int32 bufferLength) { if (!Handle || !*Handle) return ""; OCRHandleGuard guard(AcquireOCRHandle(*Handle)); @@ -243,7 +257,7 @@ extern "C" ANSOCR_API std::string RunInference(ANSCENTER::ANSOCRBase** Handle, cv::Mat frame = cv::imdecode(cv::Mat(1, bufferLength, CV_8UC1, jpeg_string), cv::IMREAD_COLOR); if (frame.empty()) return ""; std::vector outputs = engine->RunInference(frame); - std::string stResult = ANSCENTER::ANSOCRUtility::OCRDetectionToJsonString(outputs); + std::string stResult = SerializeOCRResults(engine, outputs, frame.cols, frame.rows, frame); frame.release(); outputs.clear(); return stResult; @@ -260,7 +274,7 @@ extern "C" ANSOCR_API std::string RunInferenceWithCamID(ANSCENTER::ANSOCRBase** cv::Mat frame = cv::imdecode(cv::Mat(1, bufferLength, CV_8UC1, jpeg_string), cv::IMREAD_COLOR); if (frame.empty()) return ""; std::vector outputs = engine->RunInference(frame, cameraId); - std::string stResult = ANSCENTER::ANSOCRUtility::OCRDetectionToJsonString(outputs); + std::string stResult = SerializeOCRResults(engine, outputs, frame.cols, frame.rows, frame); frame.release(); outputs.clear(); return stResult; @@ -276,7 +290,7 @@ extern "C" ANSOCR_API int RunInferenceCV(ANSCENTER::ANSOCRBase** Handle, const c try { if (image.empty()) return 0; std::vector outputs = engine->RunInference(image, "cameraId"); - ocrResult = ANSCENTER::ANSOCRUtility::OCRDetectionToJsonString(outputs); + ocrResult = SerializeOCRResults(engine, outputs, image.cols, image.rows, image); return 1; } catch (...) { return -2; } @@ -291,7 +305,7 @@ extern "C" ANSOCR_API std::string RunInferenceBinary(ANSCENTER::ANSOCRBase** Ha cv::Mat frame = cv::Mat(height, width, CV_8UC3, jpeg_bytes).clone(); if (frame.empty()) return ""; std::vector outputs = engine->RunInference(frame); - std::string stResult = ANSCENTER::ANSOCRUtility::OCRDetectionToJsonString(outputs); + std::string stResult = SerializeOCRResults(engine, outputs, width, height, frame); frame.release(); outputs.clear(); return stResult; @@ -325,6 +339,62 @@ extern "C" ANSOCR_API int ReleaseANSOCRHandle(ANSCENTER::ANSOCRBase** Handle) { } } +// ── ALPR Configuration API ────────────────────────────────────────── + +extern "C" ANSOCR_API int SetANSOCRMode(ANSCENTER::ANSOCRBase** Handle, int ocrMode) { + if (!Handle || !*Handle) return -1; + (*Handle)->SetOCRMode(static_cast(ocrMode)); + return 0; +} + +extern "C" ANSOCR_API int SetANSOCRALPRCountry(ANSCENTER::ANSOCRBase** Handle, int country) { + if (!Handle || !*Handle) return -1; + (*Handle)->SetALPRCountry(static_cast(country)); + return 0; +} + +extern "C" ANSOCR_API int SetANSOCRALPRFormat(ANSCENTER::ANSOCRBase** Handle, const char* formatJson) { + if (!Handle || !*Handle || !formatJson) return -1; + try { + nlohmann::json j = nlohmann::json::parse(formatJson); + ANSCENTER::ALPRPlateFormat fmt; + fmt.name = j.value("name", "CUSTOM"); + fmt.country = static_cast(j.value("country", 99)); + fmt.numRows = j.value("num_rows", 2); + fmt.rowSplitThreshold = j.value("row_split_threshold", 0.3f); + + static const std::map classMap = { + {"digit", ANSCENTER::CHAR_DIGIT}, {"latin_alpha", ANSCENTER::CHAR_LATIN_ALPHA}, + {"alphanumeric", ANSCENTER::CHAR_ALPHANUMERIC}, {"hiragana", ANSCENTER::CHAR_HIRAGANA}, + {"katakana", ANSCENTER::CHAR_KATAKANA}, {"kanji", ANSCENTER::CHAR_KANJI}, + {"cjk_any", ANSCENTER::CHAR_CJK_ANY}, {"any", ANSCENTER::CHAR_ANY} + }; + + for (const auto& zj : j["zones"]) { + ANSCENTER::ALPRZone zone; + zone.name = zj.value("name", ""); + zone.row = zj.value("row", 0); + zone.col = zj.value("col", 0); + std::string ccStr = zj.value("char_class", "any"); + auto it = classMap.find(ccStr); + zone.charClass = (it != classMap.end()) ? it->second : ANSCENTER::CHAR_ANY; + zone.minLength = zj.value("min_length", 1); + zone.maxLength = zj.value("max_length", 10); + zone.validationRegex = zj.value("regex", ""); + if (zj.contains("corrections")) { + for (auto& [key, val] : zj["corrections"].items()) { + zone.corrections[key] = val.get(); + } + } + fmt.zones.push_back(zone); + } + (*Handle)->SetALPRFormat(fmt); + return 0; + } catch (...) { + return -2; + } +} + extern "C" ANSOCR_API std::string RunInferenceImagePath(ANSCENTER::ANSOCRBase** Handle, const char* imageFilePath) { if (!Handle || !*Handle) return ""; OCRHandleGuard guard(AcquireOCRHandle(*Handle)); @@ -335,7 +405,7 @@ extern "C" ANSOCR_API std::string RunInferenceImagePath(ANSCENTER::ANSOCRBase** cv::Mat frame = cv::imread(stImageFileName, cv::ImreadModes::IMREAD_COLOR); if (frame.empty()) return ""; std::vector outputs = engine->RunInference(frame); - std::string stResult = ANSCENTER::ANSOCRUtility::OCRDetectionToJsonString(outputs); + std::string stResult = SerializeOCRResults(engine, outputs, frame.cols, frame.rows, frame); frame.release(); outputs.clear(); return stResult; diff --git a/tests/ANSOCR-UnitTest/ANSOCR-UnitTest.cpp b/tests/ANSOCR-UnitTest/ANSOCR-UnitTest.cpp index f6ef0d5..a2520b6 100644 --- a/tests/ANSOCR-UnitTest/ANSOCR-UnitTest.cpp +++ b/tests/ANSOCR-UnitTest/ANSOCR-UnitTest.cpp @@ -16,6 +16,12 @@ #include #include #include +#include "C:/ANSLibs/nlohmann/json.hpp" + +#ifdef WIN32 +#define NOMINMAX +#include +#endif #ifdef WIN32 const char sep = '\\'; @@ -193,6 +199,70 @@ struct ImageViewerState { bool dirty = true; }; +#ifdef WIN32 +// Render Unicode text onto a cv::Mat using Windows GDI +static void putTextUnicode(cv::Mat& img, const std::string& text, cv::Point org, + double fontScale, cv::Scalar color, int thickness) { + // Convert UTF-8 to wide string + int wlen = MultiByteToWideChar(CP_UTF8, 0, text.c_str(), -1, nullptr, 0); + std::wstring wtext(wlen - 1, 0); + MultiByteToWideChar(CP_UTF8, 0, text.c_str(), -1, &wtext[0], wlen); + + // Create a compatible DC and bitmap + HDC hdc = CreateCompatibleDC(nullptr); + int fontHeight = (int)(fontScale * 30); // approximate pixel height + + HFONT hFont = CreateFontW(fontHeight, 0, 0, 0, + (thickness > 2) ? FW_BOLD : FW_NORMAL, + FALSE, FALSE, FALSE, + DEFAULT_CHARSET, OUT_DEFAULT_PRECIS, CLIP_DEFAULT_PRECIS, + ANTIALIASED_QUALITY, DEFAULT_PITCH | FF_SWISS, L"Yu Gothic UI"); + HFONT hOldFont = (HFONT)SelectObject(hdc, hFont); + + // Measure text size + SIZE sz; + GetTextExtentPoint32W(hdc, wtext.c_str(), (int)wtext.size(), &sz); + + // Create a DIB section so we can read pixels back + BITMAPINFO bmi = {}; + bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER); + bmi.bmiHeader.biWidth = sz.cx; + bmi.bmiHeader.biHeight = -sz.cy; // top-down + bmi.bmiHeader.biPlanes = 1; + bmi.bmiHeader.biBitCount = 32; + bmi.bmiHeader.biCompression = BI_RGB; + void* bits = nullptr; + HBITMAP hBmp = CreateDIBSection(hdc, &bmi, DIB_RGB_COLORS, &bits, nullptr, 0); + HBITMAP hOldBmp = (HBITMAP)SelectObject(hdc, hBmp); + + // Draw text onto the bitmap + SetBkMode(hdc, TRANSPARENT); + SetTextColor(hdc, RGB((int)color[2], (int)color[1], (int)color[0])); // BGR to RGB + TextOutW(hdc, 0, 0, wtext.c_str(), (int)wtext.size()); + + // Copy rendered text onto the cv::Mat + cv::Mat textImg(sz.cy, sz.cx, CV_8UC4, bits); + for (int row = 0; row < sz.cy; ++row) { + for (int col = 0; col < sz.cx; ++col) { + cv::Vec4b px = textImg.at(row, col); + if (px[0] != 0 || px[1] != 0 || px[2] != 0) { + int dy = org.y + row; + int dx = org.x + col; + if (dy >= 0 && dy < img.rows && dx >= 0 && dx < img.cols) { + img.at(dy, dx) = cv::Vec3b(px[0], px[1], px[2]); + } + } + } + } + + SelectObject(hdc, hOldBmp); + SelectObject(hdc, hOldFont); + DeleteObject(hBmp); + DeleteObject(hFont); + DeleteDC(hdc); +} +#endif + static void onViewerMouse(int event, int x, int y, int flags, void* userdata) { ImageViewerState& s = *(ImageViewerState*)userdata; if (event == cv::EVENT_MOUSEWHEEL) { @@ -234,23 +304,27 @@ int TestOCRv5mage() { std::cout << "Current working directory: " << currentPath << std::endl; std::string licenseKey = ""; std::string modelFilePath = "C:\\Projects\\ANSVIS\\Models\\ANS_GenericOCR_v2.0.zip"; - std::string imagePath = "E:\\Programs\\DemoAssets\\Images\\OCR\\ref3_000.bmp"; + std::string imagePath = "C:\\Programs\\ModelTraining\\JALPR\\data\\20260329_174127_834.jpg";//"E:\\Programs\\DemoAssets\\Images\\OCR\\ref3_000.bmp"; int language = 0; // CUSTOM - int engine = 1;// GPU + int engine = 0;// GPU // For high-resolution images with PP-OCRv5 server models, use higher limitSideLen // (default 960 downscales large images too aggressively, missing small text) int gpuId = 0; - double detDBThresh = 0.3, detBoxThresh = 0.6, detUnclipRatio = 1.5; + double detDBThresh = 0.5, detBoxThresh = 0.3, detUnclipRatio = 1.2; double clsThresh = 0.9; - int useDilation = 0; + int useDilation = 1; int limitSideLen = 2560; // 2560 Higher resolution for server-grade detection int createResult = CreateANSOCRHandleEx(&infHandle, licenseKey.c_str(), modelFilePath.c_str(), "", language, engine, gpuId, detDBThresh, detBoxThresh, detUnclipRatio, clsThresh, useDilation, limitSideLen); std::cout << "ANSOCR Engine Creation:" << createResult << std::endl; + // Enable ALPR mode with Japanese plate format + SetANSOCRMode(&infHandle, 1); // OCR_ALPR + SetANSOCRALPRCountry(&infHandle, 0); // ALPR_JAPAN + cv::Mat input = cv::imread(imagePath, cv::IMREAD_COLOR); if (input.empty()) { std::cerr << "Failed to load image: " << imagePath << std::endl; @@ -269,7 +343,7 @@ int TestOCRv5mage() { auto warmupEnd = std::chrono::high_resolution_clock::now(); double warmupMs = std::chrono::duration(warmupEnd - warmupStart).count(); std::cout << "Warmup inference: " << warmupMs << " ms" << std::endl; - std::cout << "Result:" << detectionResult << std::endl; + std::cout << "ALPR Result:" << detectionResult << std::endl; // --- Benchmark: run N iterations and report stats --- const int benchmarkIterations = 10; @@ -305,24 +379,50 @@ int TestOCRv5mage() { int textOffset = 8; if (!detectionResult.empty()) { - pt.clear(); - std::stringstream ss; - ss.clear(); - ss << detectionResult; - boost::property_tree::read_json(ss, pt); - BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results")) - { - const boost::property_tree::ptree& result = child.second; - const auto class_id = GetData(result, "class_id"); - const auto class_name = GetData(result, "class_name"); - const auto x = GetData(result, "x"); - const auto y = GetData(result, "y"); - const auto w = GetData(result, "width"); - const auto h = GetData(result, "height"); - cv::rectangle(frame, cv::Rect((int)x, (int)y, (int)w, (int)h), + // Use nlohmann::json for proper parsing of nested alpr_info + nlohmann::json jsonResult = nlohmann::json::parse(detectionResult); + for (const auto& result : jsonResult["results"]) { + const std::string class_name = result.value("class_name", ""); + const int x = std::stoi(result.value("x", "0")); + const int y = std::stoi(result.value("y", "0")); + const int w = std::stoi(result.value("width", "0")); + const int h = std::stoi(result.value("height", "0")); + + cv::rectangle(frame, cv::Rect(x, y, w, h), cv::Scalar(0, 255, 0), boxThickness); - cv::putText(frame, class_name, cv::Point((int)x, (int)y - textOffset), + + // Display ALPR structured info if available + std::string displayText = class_name; + if (result.contains("alpr_info")) { + const auto& alpr = result["alpr_info"]; + std::cout << "\n=== ALPR Result ===" << std::endl; + std::cout << " Format: " << alpr.value("format", "") << std::endl; + std::cout << " Valid: " << (alpr.value("valid", false) ? "YES" : "NO") << std::endl; + std::cout << " Region: " << alpr.value("region", "") << std::endl; + std::cout << " Classification: " << alpr.value("classification", "") << std::endl; + std::cout << " Kana: " << alpr.value("kana", "") << std::endl; + std::cout << " Designation: " << alpr.value("designation", "") << std::endl; + std::cout << " Full Plate: " << class_name << std::endl; + + // Build a compact display string for the viewer + displayText = alpr.value("region", "") + " " + + alpr.value("classification", "") + " " + + alpr.value("kana", "") + " " + + alpr.value("designation", ""); + } + +#ifdef WIN32 + { + int textH = (int)(fontScale * 30); + int ty = y - textOffset - textH; + if (ty < 0) ty = y + boxThickness + 2; + putTextUnicode(frame, displayText, cv::Point(x, ty), + fontScale, cv::Scalar(0, 0, 255), fontThickness); + } +#else + cv::putText(frame, displayText, cv::Point(x, y - textOffset), cv::FONT_HERSHEY_SIMPLEX, fontScale, cv::Scalar(0, 0, 255), fontThickness, cv::LINE_AA); +#endif } } @@ -384,15 +484,21 @@ int TestOCRv5mage() { if (cv::getWindowProperty(winName, cv::WND_PROP_VISIBLE) < 1) break; } + // Release OCR handle BEFORE OpenCV cleanup to avoid CUDA teardown errors + // (TensorRT needs the CUDA context alive to free GPU resources cleanly) + ReleaseANSOCRHandle(&infHandle); cv::destroyAllWindows(); frame.release(); input.release(); - ReleaseANSOCRHandle(&infHandle); return 0; } int main() { +#ifdef WIN32 + SetConsoleOutputCP(CP_UTF8); + SetConsoleCP(CP_UTF8); +#endif TestOCRv5mage(); //ANSOCR_VideoTest();