ANSCORE/modules/ANSOCR/ANSOCRBase.cpp

#include "ANSOCRBase.h"
#include "Utility.h"
#include <opencv2/highgui.hpp>
#include <omp.h>
#include <json.hpp>
#include "ANSLibsLoader.h"

static bool ansocrLicenceValid = false;
// Global once_flag to protect license checking
static std::once_flag ansocrLicenseOnceFlag;
template <typename T>
T GetData(const boost::property_tree::ptree& pt, const std::string& key)
{
	T ret;
	if (boost::optional<T> data = pt.get_optional<T>(key))
	{
		ret = data.get();
	}
	return ret;
}

namespace ANSCENTER {
	/// <summary>
	/// Base class
	/// </summary>
	///
	///
	static void VerifyGlobalANSOCRLicense(const std::string& licenseKey) {
		try {
			ansocrLicenceValid = ANSCENTER::ANSLicenseHelper::LicenseVerification(licenseKey, 1005, "ANSOCR");//Default productId=1006
			if (!ansocrLicenceValid) { // we also support ANSTS license
				ansocrLicenceValid = ANSCENTER::ANSLicenseHelper::LicenseVerification(licenseKey, 1003, "ANSVIS");//Default productId=1003 (ANSVIS)
			}
		}
		catch (std::exception& e) {
			ansocrLicenceValid = false;
		}
	}
	void ANSOCRBase::CheckLicense() {
		try {
			// Check once globally
			std::call_once(ansocrLicenseOnceFlag, [this]() {
				VerifyGlobalANSOCRLicense(_licenseKey);
				});

			// Update this instance's local license flag
			_licenseValid = ansocrLicenceValid;
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSOCRBase::CheckLicense. Error:", e.what(), __FILE__, __LINE__);
		}
	}

	bool ANSOCRBase::Init(const std::string& licenseKey, OCRModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) {
		try {
			ANSCENTER::ANSLibsLoader::Initialize();

			_licenseKey = licenseKey;
			_engineMode = engineMode;
			_licenseValid = false;
			_modelFolder = "";
			_modelConfigFile = "";
			_modelConfig = modelConfig;
			_modelFolder.clear();
			_modelConfigFile.clear();

			CheckLicense();
			if (!_licenseValid) {
				this->_logger.LogError("ANSOCRBase::Initialize", "Invalid License", __FILE__, __LINE__);
				return false;
			}
			_licenseValid = true;


			// 0. Check if the modelZipFilePath exist?
			if (!FileExist(modelZipFilePath)) {
				this->_logger.LogFatal("ANSOCRBase::Initialize", "Model zip file is not exist", __FILE__, __LINE__);
				return false;
			}
			// 1. Unzip model zip file to a special location with folder name as model file (and version)
			std::string outputFolder;
			std::vector<std::string> passwordArray;
			if (!modelZipPassword.empty()) passwordArray.push_back(modelZipPassword);
			passwordArray.push_back("AnsDemoModels20@!");
			passwordArray.push_back("Sh7O7nUe7vJ/417W0gWX+dSdfcP9hUqtf/fEqJGqxYL3PedvHubJag==");
			passwordArray.push_back("3LHxGrjQ7kKDJBD9MX86H96mtKLJaZcTYXrYRdQgW8BKGt7enZHYMg==");
			std::string modelName = GetFileNameWithoutExtension(modelZipFilePath);
			//this->_logger.LogDebug("ANSOCRBase::Initialize. Model name", modelName, __FILE__, __LINE__);
			size_t vectorSize = passwordArray.size();
			for (size_t i = 0; i < vectorSize; i++) {
				if (ExtractPasswordProtectedZip(modelZipFilePath, passwordArray[i], modelName, _modelFolder, false))
					break; // Break the loop when the condition is met.
			}
			// 2. Check if the outputFolder exist
			if (!std::filesystem::exists(_modelFolder)) {
				this->_logger.LogError("ANSOCRBase::Initialize. Output model folder is not exist", modelName, __FILE__, __LINE__);
				return false; // That means the model file is not exist or the password is not correct
			}

			return true;
		}
		catch (std::exception& e) {
			this->_logger.LogFatal("ANSOCRBase::Initialize", e.what(), __FILE__, __LINE__);
			return false;
		}
	}

	bool ANSOCRBase::Initialize(const std::string& licenseKey, OCRModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) {
		try {

			_licenseKey = licenseKey;
			_engineMode = engineMode;
			_licenseValid = false;
			_modelFolder = "";
			_modelConfigFile = "";
			_modelConfig = modelConfig;
			_modelFolder.clear();
			_modelConfigFile.clear();

			CheckLicense();
			if (!_licenseValid) {
				this->_logger.LogError("ANSOCRBase::Initialize", "Invalid License", __FILE__, __LINE__);
				return false;
			}
			_licenseValid = true;


			// 0. Check if the modelZipFilePath exist?
			if (!FileExist(modelZipFilePath)) {
				this->_logger.LogFatal("ANSOCRBase::Initialize", "Model zip file is not exist", __FILE__, __LINE__);
				return false;
			}
			// 1. Unzip model zip file to a special location with folder name as model file (and version)
			std::string outputFolder;
			std::vector<std::string> passwordArray;
			if (!modelZipPassword.empty()) passwordArray.push_back(modelZipPassword);
			passwordArray.push_back("AnsDemoModels20@!");
			passwordArray.push_back("Sh7O7nUe7vJ/417W0gWX+dSdfcP9hUqtf/fEqJGqxYL3PedvHubJag==");
			passwordArray.push_back("3LHxGrjQ7kKDJBD9MX86H96mtKLJaZcTYXrYRdQgW8BKGt7enZHYMg==");
			std::string modelName = GetFileNameWithoutExtension(modelZipFilePath);
			//this->_logger.LogDebug("ANSOCRBase::Initialize. Model name", modelName, __FILE__, __LINE__);
			size_t vectorSize = passwordArray.size();
			for (size_t i = 0; i < vectorSize; i++) {
				if (ExtractPasswordProtectedZip(modelZipFilePath, passwordArray[i], modelName, _modelFolder, false))
					break; // Break the loop when the condition is met.
			}
			// 2. Check if the outputFolder exist
			if (!std::filesystem::exists(_modelFolder)) {
				this->_logger.LogError("ANSOCRBase::Initialize. Output model folder is not exist", modelName, __FILE__, __LINE__);
				return false; // That means the model file is not exist or the password is not correct
			}
			// 3. Check if the model has the configuration file
			std::string modelConfigName = "model_config.json";
			_modelConfigFile = CreateFilePath(_modelFolder, modelConfigName);

			//4. For now we do have the model folder so we will assign paths to OCR models
			_modelConfig.detectionModelDir = _modelFolder;
			_modelConfig.recognizerModelDir = _modelFolder;
			_modelConfig.clsModelDir = _modelFolder;
			_modelConfig.layoutModelDir = _modelFolder;
			_modelConfig.layourDictionaryPath = _modelFolder;
			_modelConfig.tableModelDir = _modelFolder;
			_modelConfig.tableCharDictionaryPath = _modelFolder;
			_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_ch.txt");

			_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "ansocrdec.onnx");
			_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "ansocrdec.onnx");
			_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "ansocrcls.onnx");
			_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "ansocrcls.onnx");
			_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "ansocrrec.onnx");
			_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "ansocrrec.onnx");
			// For now we do have _modelConfig and _modelFolder
			return true;
		}
		catch (std::exception& e) {
			this->_logger.LogFatal("ANSOCRBase::Initialize", e.what(), __FILE__, __LINE__);
			return false;
		}
	}
	std::string ANSCENTER::ANSOCRUtility::OCRDetectionToJsonString(const std::vector<OCRObject>& dets)
	{
		if (dets.empty()) {
			return R"({"results":[]})";
		}

		try {
			nlohmann::json root;
			auto& results = root["results"] = nlohmann::json::array();

			for (const auto& det : dets) {
				results.push_back({
					{"class_id", std::to_string(det.classId)},
					{"track_id", std::to_string(det.trackId)},
					{"class_name", det.className},
					{"prob", std::to_string(det.confidence)},
					{"x", std::to_string(det.box.x)},
					{"y", std::to_string(det.box.y)},
					{"width", std::to_string(det.box.width)},
					{"height", std::to_string(det.box.height)},
					{"mask", ""},  // TODO: convert masks to comma separated string
					{"extra_info", det.extraInfo},
					{"camera_id", det.cameraId},
					{"polygon", PolygonToString(det.polygon)},
					{"kps", KeypointsToString(det.kps)}
					});
			}

			return root.dump();
		}
		catch (const std::exception& e) {
			// Add your error logging here if needed
			return R"({"results":[],"error":"Serialization failed"})";
		}
	}

	std::vector<cv::Rect> ANSCENTER::ANSOCRUtility::GetBoundingBoxes(const std::string& strBBoxes) {
		std::vector<cv::Rect> bBoxes;
		bBoxes.clear();
		std::stringstream ss;
		ss << strBBoxes;
		boost::property_tree::ptree pt;
		boost::property_tree::read_json(ss, pt);
		BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results"))
		{
			const boost::property_tree::ptree& result = child.second;
			const auto x = GetData<float>(result, "x");
			const auto y = GetData<float>(result, "y");
			const auto width = GetData<float>(result, "width");
			const auto height = GetData<float>(result, "height");
			cv::Rect rectTemp;
			rectTemp.x = x;
			rectTemp.y = y;
			rectTemp.width = width;
			rectTemp.height = height;
			bBoxes.push_back(rectTemp);
		}
		return bBoxes;
	}

	std::string ANSCENTER::ANSOCRUtility::PolygonToString(const std::vector<cv::Point2f>& polygon) {
		if (polygon.empty()) {
			return "";
		}

		std::string result;
		result.reserve(polygon.size() * 20);

		char buffer[64];
		for (size_t i = 0; i < polygon.size(); ++i) {
			if (i > 0) {
				snprintf(buffer, sizeof(buffer), ";%.3f;%.3f", polygon[i].x, polygon[i].y);
			}
			else {
				snprintf(buffer, sizeof(buffer), "%.3f;%.3f", polygon[i].x, polygon[i].y);
			}
			result += buffer;
		}

		return result;
	}
	std::string ANSCENTER::ANSOCRUtility::KeypointsToString(const std::vector<float>& kps) {
		if (kps.empty()) {
			return "";
		}

		std::string result;
		result.reserve(kps.size() * 10);

		char buffer[32];
		for (size_t i = 0; i < kps.size(); ++i) {
			if (i > 0) result += ';';
			snprintf(buffer, sizeof(buffer), "%.3f", kps[i]);
			result += buffer;
		}

		return result;
	}
	std::vector<cv::Point2f> ANSCENTER::ANSOCRUtility::RectToNormalizedPolygon(const cv::Rect& rect, float imageWidth, float imageHeight) {
		// Ensure imageWidth and imageHeight are non-zero to avoid division by zero
		if (imageWidth <= 0 || imageHeight <= 0) {
			std::vector<cv::Point2f> emptyPolygon;
			return emptyPolygon;
		}

		// Calculate normalized points for each corner of the rectangle
		std::vector<cv::Point2f> polygon = {
			{ rect.x / imageWidth, rect.y / imageHeight },                     // Top-left
			{ (rect.x + rect.width) / imageWidth, rect.y / imageHeight },      // Top-right
			{ (rect.x + rect.width) / imageWidth, (rect.y + rect.height) / imageHeight }, // Bottom-right
			{ rect.x / imageWidth, (rect.y + rect.height) / imageHeight }      // Bottom-left
		};

		return polygon;
	}


	// ── ALPR Configuration Methods ──────────────────────────────────────

	void ANSOCRBase::SetOCRMode(OCRMode mode) { _ocrMode = mode; }
	OCRMode ANSOCRBase::GetOCRMode() const { return _ocrMode; }
	void ANSOCRBase::SetALPRCountry(ALPRCountry country) {
		_alprCountry = country;
		LoadDefaultFormats(country);
	}
	ALPRCountry ANSOCRBase::GetALPRCountry() const { return _alprCountry; }
	void ANSOCRBase::SetALPRFormat(const ALPRPlateFormat& format) {
		_alprFormats.clear();
		_alprFormats.push_back(format);
	}
	void ANSOCRBase::AddALPRFormat(const ALPRPlateFormat& format) {
		_alprFormats.push_back(format);
	}
	void ANSOCRBase::ClearALPRFormats() { _alprFormats.clear(); }
	const std::vector<ALPRPlateFormat>& ANSOCRBase::GetALPRFormats() const { return _alprFormats; }

	void ANSOCRBase::LoadDefaultFormats(ALPRCountry country) {
		_alprFormats.clear();
		if (country == ALPR_JAPAN) {
			ALPRPlateFormat fmt;
			fmt.name = "JAPAN_STANDARD";
			fmt.country = ALPR_JAPAN;
			fmt.numRows = 2;
			fmt.rowSplitThreshold = 0.3f;

			ALPRZone region;
			region.name = "region";
			region.row = 0; region.col = 0;
			region.charClass = CHAR_KANJI;
			region.minLength = 1; region.maxLength = 4;
			region.corrections = { {"#", "\xe4\xba\x95"} }; // # -> 井

			ALPRZone classification;
			classification.name = "classification";
			classification.row = 0; classification.col = 1;
			classification.charClass = CHAR_DIGIT;
			classification.minLength = 1; classification.maxLength = 3;
			classification.validationRegex = R"(^\d{1,3}$)";

			ALPRZone kana;
			kana.name = "kana";
			kana.row = 1; kana.col = 0;
			kana.charClass = CHAR_HIRAGANA;
			kana.minLength = 1; kana.maxLength = 1;

			ALPRZone designation;
			designation.name = "designation";
			designation.row = 1; designation.col = 1;
			designation.charClass = CHAR_DIGIT;
			designation.minLength = 2; designation.maxLength = 5;
			designation.validationRegex = R"(^\d{2}-\d{2}$)";
			// On Japanese plates, ・ (middle dot) represents 0
			designation.corrections = {
				{"\xe3\x83\xbb", "0"},  // ・ (U+30FB fullwidth middle dot)
				{"\xc2\xb7", "0"},      // · (U+00B7 middle dot)
				{".", "0"}              // ASCII dot
			};

			fmt.zones = { region, classification, kana, designation };
			_alprFormats.push_back(fmt);
		}
	}

	// ── UTF-8 Helpers ───────────────────────────────────────────────────

	uint32_t ANSOCRUtility::NextUTF8Codepoint(const std::string& str, size_t& pos) {
		if (pos >= str.size()) return 0;
		uint32_t cp = 0;
		unsigned char c = static_cast<unsigned char>(str[pos]);
		if (c < 0x80) {
			cp = c; pos += 1;
		} else if ((c & 0xE0) == 0xC0) {
			cp = c & 0x1F;
			if (pos + 1 < str.size()) cp = (cp << 6) | (static_cast<unsigned char>(str[pos + 1]) & 0x3F);
			pos += 2;
		} else if ((c & 0xF0) == 0xE0) {
			cp = c & 0x0F;
			if (pos + 1 < str.size()) cp = (cp << 6) | (static_cast<unsigned char>(str[pos + 1]) & 0x3F);
			if (pos + 2 < str.size()) cp = (cp << 6) | (static_cast<unsigned char>(str[pos + 2]) & 0x3F);
			pos += 3;
		} else if ((c & 0xF8) == 0xF0) {
			cp = c & 0x07;
			if (pos + 1 < str.size()) cp = (cp << 6) | (static_cast<unsigned char>(str[pos + 1]) & 0x3F);
			if (pos + 2 < str.size()) cp = (cp << 6) | (static_cast<unsigned char>(str[pos + 2]) & 0x3F);
			if (pos + 3 < str.size()) cp = (cp << 6) | (static_cast<unsigned char>(str[pos + 3]) & 0x3F);
			pos += 4;
		} else {
			pos += 1; // skip invalid byte
		}
		return cp;
	}

	bool ANSOCRUtility::IsCharClass(uint32_t cp, ALPRCharClass charClass) {
		switch (charClass) {
		case CHAR_DIGIT:
			return (cp >= 0x30 && cp <= 0x39);
		case CHAR_LATIN_ALPHA:
			return (cp >= 0x41 && cp <= 0x5A) || (cp >= 0x61 && cp <= 0x7A);
		case CHAR_ALPHANUMERIC:
			return (cp >= 0x30 && cp <= 0x39) || (cp >= 0x41 && cp <= 0x5A) || (cp >= 0x61 && cp <= 0x7A);
		case CHAR_HIRAGANA:
			return (cp >= 0x3040 && cp <= 0x309F);
		case CHAR_KATAKANA:
			return (cp >= 0x30A0 && cp <= 0x30FF);
		case CHAR_KANJI:
			return (cp >= 0x4E00 && cp <= 0x9FFF) || (cp >= 0x3400 && cp <= 0x4DBF);
		case CHAR_CJK_ANY:
			return (cp >= 0x3040 && cp <= 0x30FF) || (cp >= 0x4E00 && cp <= 0x9FFF) || (cp >= 0x3400 && cp <= 0x4DBF);
		case CHAR_ANY:
			return true;
		default:
			return false;
		}
	}

	// Helper: encode a single codepoint back to UTF-8
	static std::string CodepointToUTF8(uint32_t cp) {
		std::string result;
		if (cp < 0x80) {
			result += static_cast<char>(cp);
		} else if (cp < 0x800) {
			result += static_cast<char>(0xC0 | (cp >> 6));
			result += static_cast<char>(0x80 | (cp & 0x3F));
		} else if (cp < 0x10000) {
			result += static_cast<char>(0xE0 | (cp >> 12));
			result += static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
			result += static_cast<char>(0x80 | (cp & 0x3F));
		} else {
			result += static_cast<char>(0xF0 | (cp >> 18));
			result += static_cast<char>(0x80 | ((cp >> 12) & 0x3F));
			result += static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
			result += static_cast<char>(0x80 | (cp & 0x3F));
		}
		return result;
	}

	// Helper: check if a codepoint is a separator/punctuation that should stay with digits
	static bool IsDigitSeparator(uint32_t cp) {
		return cp == '-' || cp == '.' || cp == 0xB7 || cp == 0x30FB; // hyphen, dot, middle dot (U+00B7, U+30FB)
	}

	// Helper: split a UTF-8 string by character class, returning parts matching and not matching
	// For CHAR_DIGIT, hyphens and dots are kept with digits (common in plate numbers like "20-46")
	static void SplitByCharClass(const std::string& text, ALPRCharClass targetClass,
		std::string& matched, std::string& remainder) {
		matched.clear();
		remainder.clear();
		size_t pos = 0;
		while (pos < text.size()) {
			size_t startPos = pos;
			uint32_t cp = ANSOCRUtility::NextUTF8Codepoint(text, pos);
			if (cp == 0) break;
			std::string ch = text.substr(startPos, pos - startPos);
			bool belongs = ANSOCRUtility::IsCharClass(cp, targetClass);
			// Keep separators with digits
			if (!belongs && targetClass == CHAR_DIGIT && IsDigitSeparator(cp)) {
				belongs = true;
			}
			if (belongs) {
				matched += ch;
			} else {
				remainder += ch;
			}
		}
	}

	// ── ALPR Post-Processing ────────────────────────────────────────────

	std::vector<ALPRResult> ANSOCRUtility::ALPRPostProcessing(
		const std::vector<OCRObject>& ocrResults,
		const std::vector<ALPRPlateFormat>& formats,
		int imageWidth, int imageHeight,
		ANSOCRBase* engine,
		const cv::Mat& originalImage)
	{
		std::vector<ALPRResult> results;
		if (ocrResults.empty() || formats.empty()) return results;

		// Use the first format for now (extensible to try multiple)
		const ALPRPlateFormat& fmt = formats[0];

		// Step 1: Compute the bounding box encompassing all detections
		// Then expand it by 20% on each side to account for tight detection crops
		// that may cut off kana characters or edge digits
		cv::Rect plateBox = ocrResults[0].box;
		for (size_t i = 1; i < ocrResults.size(); i++) {
			plateBox |= ocrResults[i].box;
		}
		{
			int expandX = (int)(plateBox.width * 0.20f);
			int expandY = (int)(plateBox.height * 0.05f);
			plateBox.x = std::max(0, plateBox.x - expandX);
			plateBox.y = std::max(0, plateBox.y - expandY);
			plateBox.width = std::min(imageWidth - plateBox.x, plateBox.width + expandX * 2);
			plateBox.height = std::min(imageHeight - plateBox.y, plateBox.height + expandY * 2);
		}

		// Step 2: Split OCR results into rows based on vertical center
		float plateCenterY = plateBox.y + plateBox.height * 0.5f;
		// For 2-row plates, use the midpoint of the plate as the row boundary
		float rowBoundary = plateBox.y + plateBox.height * fmt.rowSplitThreshold +
			(plateBox.height * (1.0f - fmt.rowSplitThreshold)) * 0.5f;

		// Find the actual gap: sort by Y center, find largest gap
		std::vector<std::pair<float, int>> yCenters; // (y_center, index)
		for (int i = 0; i < (int)ocrResults.size(); i++) {
			float yc = ocrResults[i].box.y + ocrResults[i].box.height * 0.5f;
			yCenters.push_back({ yc, i });
		}
		std::sort(yCenters.begin(), yCenters.end());

		if (yCenters.size() >= 2) {
			float maxGap = 0;
			float bestBoundary = rowBoundary;
			for (size_t i = 1; i < yCenters.size(); i++) {
				float gap = yCenters[i].first - yCenters[i - 1].first;
				if (gap > maxGap) {
					maxGap = gap;
					bestBoundary = (yCenters[i].first + yCenters[i - 1].first) * 0.5f;
				}
			}
			rowBoundary = bestBoundary;
		}

		// Step 3: Assign each OCR result to a row and collect text per row
		struct RowItem {
			int ocrIndex;
			float xCenter;
			std::string text;
			float confidence;
			cv::Rect box;
		};
		std::vector<RowItem> topRow, bottomRow;

		for (int i = 0; i < (int)ocrResults.size(); i++) {
			float yc = ocrResults[i].box.y + ocrResults[i].box.height * 0.5f;
			RowItem item;
			item.ocrIndex = i;
			item.xCenter = ocrResults[i].box.x + ocrResults[i].box.width * 0.5f;
			item.text = ocrResults[i].className;
			item.confidence = ocrResults[i].confidence;
			item.box = ocrResults[i].box;
			if (yc < rowBoundary) {
				topRow.push_back(item);
			} else {
				bottomRow.push_back(item);
			}
		}

		// Sort each row left-to-right
		auto sortByX = [](const RowItem& a, const RowItem& b) { return a.xCenter < b.xCenter; };
		std::sort(topRow.begin(), topRow.end(), sortByX);
		std::sort(bottomRow.begin(), bottomRow.end(), sortByX);

		// Step 4: Concatenate text per row
		std::string topText, bottomText;
		float minConfidence = 1.0f;
		for (auto& item : topRow) {
			topText += item.text;
			minConfidence = std::min(minConfidence, item.confidence);
		}
		for (auto& item : bottomRow) {
			bottomText += item.text;
			minConfidence = std::min(minConfidence, item.confidence);
		}

		// Step 5: For each zone, extract text using character class splitting
		ALPRResult alprResult;
		alprResult.formatName = fmt.name;
		alprResult.plateBox = plateBox;
		alprResult.confidence = minConfidence;
		alprResult.valid = true;

		// Process top row zones
		std::string topRemaining = topText;
		std::vector<const ALPRZone*> topZones, bottomZones;
		for (const auto& zone : fmt.zones) {
			if (zone.row == 0) topZones.push_back(&zone);
			else bottomZones.push_back(&zone);
		}
		std::sort(topZones.begin(), topZones.end(), [](const ALPRZone* a, const ALPRZone* b) { return a->col < b->col; });
		std::sort(bottomZones.begin(), bottomZones.end(), [](const ALPRZone* a, const ALPRZone* b) { return a->col < b->col; });

		// Split top row text by character class
		for (const auto* zone : topZones) {
			std::string matched, remainder;
			SplitByCharClass(topRemaining, zone->charClass, matched, remainder);
			// Apply corrections
			for (const auto& corr : zone->corrections) {
				size_t pos = 0;
				while ((pos = matched.find(corr.first, pos)) != std::string::npos) {
					matched.replace(pos, corr.first.length(), corr.second);
					pos += corr.second.length();
				}
			}
			alprResult.parts[zone->name] = matched;
			topRemaining = remainder;
		}

		// Split bottom row text by character class
		std::string bottomRemaining = bottomText;
		for (const auto* zone : bottomZones) {
			std::string matched, remainder;
			SplitByCharClass(bottomRemaining, zone->charClass, matched, remainder);
			// Apply corrections
			for (const auto& corr : zone->corrections) {
				size_t pos = 0;
				while ((pos = matched.find(corr.first, pos)) != std::string::npos) {
					matched.replace(pos, corr.first.length(), corr.second);
					pos += corr.second.length();
				}
			}
			alprResult.parts[zone->name] = matched;
			bottomRemaining = remainder;
		}

		// Step 5b: Kana re-crop — if kana zone is empty and we have the original image,
		// crop the left portion of the bottom row and run recognizer-only (no detection)
		if (engine && !originalImage.empty()) {
			const ALPRZone* kanaZone = nullptr;
			for (const auto* zone : bottomZones) {
				if (zone->charClass == CHAR_HIRAGANA || zone->charClass == CHAR_KATAKANA) {
					kanaZone = zone;
					break;
				}
			}
			if (kanaZone && alprResult.parts[kanaZone->name].empty() && !bottomRow.empty()) {
				cv::Rect bottomBox = bottomRow[0].box;
				for (const auto& item : bottomRow) {
					bottomBox |= item.box;
				}

				// Crop the kana area: left ~20% of the expanded plate box, square crop.
				int cropW = (int)(plateBox.width * 0.20f);
				int cropH = cropW; // Square crop — kana is a square character
				int cropX = std::max(0, plateBox.x);
				if (cropW < 30) cropW = 30;

				// Try vertical offsets: 50% (center), 30%, 15% from top of bottom row
				const float yOffsets[] = { 0.50f, 0.30f, 0.15f };
				bool kanaFound = false;
				for (float yOff : yOffsets) {
					if (kanaFound) break;

					int centerY = bottomBox.y + (int)(bottomBox.height * yOff);
					int cy = centerY - cropH / 2;
					int cw = cropW, ch = cropH;
					// Clamp to image bounds
					if (cy < 0) cy = 0;
					if (cropX + cw > originalImage.cols) cw = originalImage.cols - cropX;
					if (cy + ch > originalImage.rows) ch = originalImage.rows - cy;
					if (cw <= 0 || ch <= 0) continue;

					cv::Mat kanaCrop = originalImage(cv::Rect(cropX, cy, cw, ch)).clone();

					// Resize to recognizer format: height=48, min width=160
					int recH = 48;
					double scale = (double)recH / kanaCrop.rows;
					cv::Mat resized;
					cv::resize(kanaCrop, resized, cv::Size(), scale, scale, cv::INTER_CUBIC);
					int minWidth = 160;
					if (resized.cols < minWidth) {
						int padLeft = (minWidth - resized.cols) / 2;
						int padRight = minWidth - resized.cols - padLeft;
						cv::copyMakeBorder(resized, resized, 0, 0, padLeft, padRight,
							cv::BORDER_CONSTANT, cv::Scalar(255, 255, 255));
					}

					auto [recText, recConf] = engine->RecognizeText(resized);

					if (!recText.empty()) {
						std::string kanaText;
						size_t pos = 0;
						while (pos < recText.size()) {
							size_t startPos = pos;
							uint32_t cp = NextUTF8Codepoint(recText, pos);
							if (cp == 0) break;
							if (IsCharClass(cp, kanaZone->charClass)) {
								kanaText += recText.substr(startPos, pos - startPos);
							}
						}
						if (!kanaText.empty()) {
							alprResult.parts[kanaZone->name] = kanaText;
							kanaFound = true;
						}
					}
				}
			}
		}

		// Step 5c: Designation re-crop — if designation has too few digits,
		// crop the right portion of the bottom row and run recognizer directly
		if (engine && !originalImage.empty()) {
			const ALPRZone* desigZone = nullptr;
			for (const auto* zone : bottomZones) {
				if (zone->name == "designation") {
					desigZone = zone;
					break;
				}
			}
			if (desigZone && !desigZone->validationRegex.empty()) {
				std::string& desigVal = alprResult.parts[desigZone->name];
				try {
					std::regex re(desigZone->validationRegex);
					if (!std::regex_match(desigVal, re)) {
						// Crop the right ~75% of the plate's bottom row
						cv::Rect bottomBox = bottomRow[0].box;
						for (const auto& item : bottomRow) bottomBox |= item.box;

						int cropX = plateBox.x + (int)(plateBox.width * 0.25f);
						int cropY = bottomBox.y;
						int cropW = plateBox.x + plateBox.width - cropX;
						int cropH = bottomBox.height;
						// Clamp
						if (cropX + cropW > originalImage.cols) cropW = originalImage.cols - cropX;
						if (cropY + cropH > originalImage.rows) cropH = originalImage.rows - cropY;

						if (cropW > 0 && cropH > 0) {
							cv::Mat desigCrop = originalImage(cv::Rect(cropX, cropY, cropW, cropH)).clone();
							// Resize to recognizer format
							int recH = 48;
							double scale = (double)recH / desigCrop.rows;
							cv::Mat resized;
							cv::resize(desigCrop, resized, cv::Size(), scale, scale, cv::INTER_CUBIC);
							int minWidth = 320;
							if (resized.cols < minWidth) {
								cv::copyMakeBorder(resized, resized, 0, 0, 0, minWidth - resized.cols,
									cv::BORDER_CONSTANT, cv::Scalar(255, 255, 255));
							}
							auto [recText, recConf] = engine->RecognizeText(resized);

							if (!recText.empty()) {
								// Apply corrections (dots to zeros)
								for (const auto& corr : desigZone->corrections) {
									size_t pos = 0;
									while ((pos = recText.find(corr.first, pos)) != std::string::npos) {
										recText.replace(pos, corr.first.length(), corr.second);
										pos += corr.second.length();
									}
								}
								// Extract digits and separators
								std::string desigText;
								size_t pos = 0;
								while (pos < recText.size()) {
									size_t startPos = pos;
									uint32_t cp = NextUTF8Codepoint(recText, pos);
									if (cp == 0) break;
									if (IsCharClass(cp, CHAR_DIGIT) || IsDigitSeparator(cp)) {
										desigText += recText.substr(startPos, pos - startPos);
									}
								}
								if (!desigText.empty() && desigText.size() > desigVal.size()) {
									desigVal = desigText;
								}
							}
						}
					}
				} catch (...) {}
			}
		}

		// Step 6: Validate and auto-fix zones that fail regex
		for (const auto& zone : fmt.zones) {
			if (zone.validationRegex.empty() || alprResult.parts[zone.name].empty()) continue;
			try {
				std::regex re(zone.validationRegex);
				std::string& val = alprResult.parts[zone.name];
				if (!std::regex_match(val, re)) {
					bool fixed = false;
					// For designation: try trimming leading digits (leaked from classification row)
					if (zone.row == 1 && zone.charClass == CHAR_DIGIT) {
						for (size_t trim = 1; trim < val.size() && !fixed; trim++) {
							size_t pos = 0;
							for (size_t t = 0; t < trim; t++) {
								NextUTF8Codepoint(val, pos);
							}
							std::string trimmed = val.substr(pos);
							if (std::regex_match(trimmed, re)) {
								val = trimmed;
								fixed = true;
							}
						}
					}
					// For designation: if too few digits, pad with leading zeros
					// Japanese plates use ・ for zero, so "12" means "00-12"
					if (!fixed && zone.name == "designation") {
						// Extract only digits from val
						std::string digitsOnly;
						for (char c : val) {
							if (c >= '0' && c <= '9') digitsOnly += c;
						}
						if (digitsOnly.size() >= 1 && digitsOnly.size() <= 3) {
							// Pad to 4 digits and insert hyphen
							while (digitsOnly.size() < 4) digitsOnly = "0" + digitsOnly;
							std::string padded = digitsOnly.substr(0, 2) + "-" + digitsOnly.substr(2, 2);
							if (std::regex_match(padded, re)) {
								val = padded;
								fixed = true;
							}
						}
					}
					if (!fixed) {
						alprResult.valid = false;
					}
				}
			} catch (...) {}
		}

		// Step 7: Build full plate text (after validation/fix so values are corrected)
		alprResult.fullPlateText.clear();
		for (const auto* zone : topZones) {
			if (!alprResult.fullPlateText.empty()) alprResult.fullPlateText += " ";
			alprResult.fullPlateText += alprResult.parts[zone->name];
		}
		alprResult.fullPlateText += " ";
		for (const auto* zone : bottomZones) {
			if (zone != bottomZones[0]) alprResult.fullPlateText += " ";
			alprResult.fullPlateText += alprResult.parts[zone->name];
		}

		results.push_back(alprResult);
		return results;
	}

	// ── ALPR JSON Serialization ─────────────────────────────────────────

	std::string ANSOCRUtility::ALPRResultToJsonString(const std::vector<ALPRResult>& results) {
		if (results.empty()) {
			return R"({"results":[]})";
		}
		try {
			nlohmann::json root;
			auto& jsonResults = root["results"] = nlohmann::json::array();

			for (const auto& res : results) {
				nlohmann::json alprInfo;
				alprInfo["valid"] = res.valid;
				alprInfo["format"] = res.formatName;
				for (const auto& part : res.parts) {
					alprInfo[part.first] = part.second;
				}

				jsonResults.push_back({
					{"class_id", "0"},
					{"track_id", "0"},
					{"class_name", res.fullPlateText},
					{"prob", std::to_string(res.confidence)},
					{"x", std::to_string(res.plateBox.x)},
					{"y", std::to_string(res.plateBox.y)},
					{"width", std::to_string(res.plateBox.width)},
					{"height", std::to_string(res.plateBox.height)},
					{"mask", ""},
					{"extra_info", ""},
					{"camera_id", ""},
					{"polygon", ""},
					{"kps", ""},
					{"alpr_info", alprInfo}
				});
			}
			return root.dump();
		} catch (const std::exception&) {
			return R"({"results":[],"error":"ALPR serialization failed"})";
		}
	}

};