#include "ANSLPR_OCR.h" #include "ANSRTYOLO.h" #include "ANSONNXYOLO.h" #include "ANSOnnxOCR.h" #include "ANSOCRBase.h" #include #include #include // --------------------------------------------------------------------------- // SEH wrapper for loading ONNX models — identical to the one in ANSLPR_OD.cpp // --------------------------------------------------------------------------- static void WriteEventLog(const char* message, WORD eventType = EVENTLOG_INFORMATION_TYPE) { static HANDLE hEventLog = RegisterEventSourceA(NULL, "ANSLogger"); if (hEventLog) { const char* msgs[1] = { message }; ReportEventA(hEventLog, eventType, 0, 0, NULL, 1, 0, msgs, NULL); } OutputDebugStringA(message); OutputDebugStringA("\n"); } // --------------------------------------------------------------------------- // SEH wrapper for loading ANSRTYOLO (TensorRT) models — used when NVIDIA GPU // is detected. Falls back to ANSONNXYOLO if TRT fails. // --------------------------------------------------------------------------- struct LoadRtParams_OCR { const std::string* licenseKey; ANSCENTER::ModelConfig* config; const std::string* modelFolder; const char* modelName; const char* classFile; std::string* labels; std::unique_ptr* detector; bool enableTracker; bool disableStabilization; }; static bool LoadRtModel_OCR_Impl(const LoadRtParams_OCR& p) { try { auto rtyolo = std::make_unique(); bool ok = rtyolo->LoadModelFromFolder( *p.licenseKey, *p.config, p.modelName, p.classFile, *p.modelFolder, *p.labels); if (!ok) { return false; } if (p.enableTracker) { rtyolo->SetTracker(ANSCENTER::TrackerType::BYTETRACK, true); } else { rtyolo->SetTracker(ANSCENTER::TrackerType::BYTETRACK, false); } if (p.disableStabilization) { rtyolo->SetStabilization(false); } *p.detector = std::move(rtyolo); return true; } catch (...) { p.detector->reset(); return false; } } static bool LoadRtModel_OCR_SEH(const LoadRtParams_OCR& p, DWORD* outCode) { *outCode = 0; __try { return LoadRtModel_OCR_Impl(p); } __except (EXCEPTION_EXECUTE_HANDLER) { *outCode = GetExceptionCode(); return false; } } struct LoadOnnxParams_OCR { const std::string* licenseKey; ANSCENTER::ModelConfig* config; const std::string* modelFolder; const char* modelName; const char* classFile; std::string* labels; std::unique_ptr* detector; bool enableTracker; bool disableStabilization; }; static bool LoadOnnxModel_OCR_Impl(const LoadOnnxParams_OCR& p) { try { auto onnxyolo = std::make_unique(); bool ok = onnxyolo->LoadModelFromFolder( *p.licenseKey, *p.config, p.modelName, p.classFile, *p.modelFolder, *p.labels); if (!ok) { return false; } if (p.enableTracker) { onnxyolo->SetTracker(ANSCENTER::TrackerType::BYTETRACK, true); } else { onnxyolo->SetTracker(ANSCENTER::TrackerType::BYTETRACK, false); } if (p.disableStabilization) { onnxyolo->SetStabilization(false); } *p.detector = std::move(onnxyolo); return true; } catch (...) { p.detector->reset(); return false; } } static bool LoadOnnxModel_OCR_SEH(const LoadOnnxParams_OCR& p, DWORD* outCode) { *outCode = 0; __try { return LoadOnnxModel_OCR_Impl(p); } __except (EXCEPTION_EXECUTE_HANDLER) { *outCode = GetExceptionCode(); return false; } } namespace ANSCENTER { ANSALPR_OCR::ANSALPR_OCR() { engineType = EngineType::CPU; } ANSALPR_OCR::~ANSALPR_OCR() { try { Destroy(); } catch (...) {} } bool ANSALPR_OCR::Initialize(const std::string& licenseKey, const std::string& modelZipFilePath, const std::string& modelZipPassword, double detectorThreshold, double ocrThreshold, double colourThreshold) { std::lock_guard lock(_mutex); try { _licenseKey = licenseKey; _licenseValid = false; _detectorThreshold = detectorThreshold; _ocrThreshold = ocrThreshold; _colorThreshold = colourThreshold; _country = Country::JAPAN; // Default to JAPAN for OCR-based ALPR CheckLicense(); if (!_licenseValid) { this->_logger.LogError("ANSALPR_OCR::Initialize", "License is not valid.", __FILE__, __LINE__); return false; } // Extract model folder if (!FileExist(modelZipFilePath)) { this->_logger.LogFatal("ANSALPR_OCR::Initialize", "Model zip file does not exist: " + modelZipFilePath, __FILE__, __LINE__); return false; } this->_logger.LogInfo("ANSALPR_OCR::Initialize", "Model zip file found: " + modelZipFilePath, __FILE__, __LINE__); // Unzip model zip file std::vector passwordArray; if (!modelZipPassword.empty()) passwordArray.push_back(modelZipPassword); passwordArray.push_back("AnsDemoModels20@!"); passwordArray.push_back("Sh7O7nUe7vJ/417W0gWX+dSdfcP9hUqtf/fEqJGqxYL3PedvHubJag=="); passwordArray.push_back("3LHxGrjQ7kKDJBD9MX86H96mtKLJaZcTYXrYRdQgW8BKGt7enZHYMg=="); std::string modelName = GetFileNameWithoutExtension(modelZipFilePath); for (size_t i = 0; i < passwordArray.size(); i++) { if (ExtractPasswordProtectedZip(modelZipFilePath, passwordArray[i], modelName, _modelFolder, false)) break; } if (!FolderExist(_modelFolder)) { this->_logger.LogError("ANSALPR_OCR::Initialize", "Output model folder does not exist: " + _modelFolder, __FILE__, __LINE__); return false; } // Check country from country.txt std::string countryFile = CreateFilePath(_modelFolder, "country.txt"); if (FileExist(countryFile)) { std::ifstream infile(countryFile); std::string countryStr; std::getline(infile, countryStr); infile.close(); if (countryStr == "0") _country = Country::VIETNAM; else if (countryStr == "1") _country = Country::CHINA; else if (countryStr == "2") _country = Country::AUSTRALIA; else if (countryStr == "3") _country = Country::USA; else if (countryStr == "4") _country = Country::INDONESIA; else if (countryStr == "5") _country = Country::JAPAN; else _country = Country::JAPAN; // Default for OCR mode } // Store the original model zip path — the OCR models (ansocrdec.onnx, // ansocrcls.onnx, ansocrrec.onnx, dict_ch.txt) are bundled inside the // same ALPR model zip, so we reuse it for ANSONNXOCR initialization. _modelZipFilePath = modelZipFilePath; // Initialize ALPRChecker alprChecker.Init(MAX_ALPR_FRAME); _lpColourModelConfig.detectionScoreThreshold = _colorThreshold; _lpdmodelConfig.detectionScoreThreshold = _detectorThreshold; return true; } catch (std::exception& e) { this->_logger.LogFatal("ANSALPR_OCR::Initialize", e.what(), __FILE__, __LINE__); return false; } } bool ANSALPR_OCR::LoadEngine() { std::lock_guard lock(_mutex); try { WriteEventLog("ANSALPR_OCR::LoadEngine: Step 1 - Starting engine load"); this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 1: Starting engine load", __FILE__, __LINE__); // Detect hardware _lpdmodelConfig.detectionScoreThreshold = _detectorThreshold; _lpColourModelConfig.detectionScoreThreshold = _colorThreshold; if (_lpdmodelConfig.detectionScoreThreshold < 0.25) _lpdmodelConfig.detectionScoreThreshold = 0.25; if (_lpdmodelConfig.detectionScoreThreshold > 0.95) _lpdmodelConfig.detectionScoreThreshold = 0.95; engineType = ANSLicenseHelper::CheckHardwareInformation(); this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Detected engine type: " + std::to_string(static_cast(engineType)), __FILE__, __LINE__); float confThreshold = 0.5f; float MNSThreshold = 0.5f; _lpdmodelConfig.modelConfThreshold = confThreshold; _lpdmodelConfig.modelMNSThreshold = MNSThreshold; _lpColourModelConfig.modelConfThreshold = confThreshold; _lpColourModelConfig.modelMNSThreshold = MNSThreshold; std::string lprModel = CreateFilePath(_modelFolder, "lpd.onnx"); std::string colorModel = CreateFilePath(_modelFolder, "lpc.onnx"); bool valid = false; // ── Step 2: Load LP detector ───────────────────────────────── if (FileExist(lprModel)) { // Try TensorRT (ANSRTYOLO) when NVIDIA GPU is detected if (engineType == EngineType::NVIDIA_GPU) { WriteEventLog("ANSALPR_OCR::LoadEngine: Step 2 - Loading LP detector with TensorRT"); this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 2: Loading LP detector with TensorRT", __FILE__, __LINE__); _lpdmodelConfig.detectionType = DetectionType::DETECTION; _lpdmodelConfig.modelType = ModelType::RTYOLO; std::string _lprClasses; { LoadRtParams_OCR p{}; p.licenseKey = &_licenseKey; p.config = &_lpdmodelConfig; p.modelFolder = &_modelFolder; p.modelName = "lpd"; p.classFile = "lpd.names"; p.labels = &_lprClasses; p.detector = &_lpDetector; p.enableTracker = true; p.disableStabilization = true; DWORD sehCode = 0; bool lpSuccess = LoadRtModel_OCR_SEH(p, &sehCode); if (sehCode != 0) { char buf[256]; snprintf(buf, sizeof(buf), "ANSALPR_OCR::LoadEngine: Step 2 LPD TRT SEH exception 0x%08X — falling back to ONNX Runtime", sehCode); WriteEventLog(buf, EVENTLOG_ERROR_TYPE); this->_logger.LogError("ANSALPR_OCR::LoadEngine", "Step 2: LP detector TensorRT crashed (SEH). Falling back to ONNX Runtime.", __FILE__, __LINE__); if (_lpDetector) _lpDetector.reset(); } else if (!lpSuccess) { this->_logger.LogError("ANSALPR_OCR::LoadEngine", "Failed to load LP detector (TensorRT). Falling back to ONNX Runtime.", __FILE__, __LINE__); if (_lpDetector) _lpDetector.reset(); } } } // Fallback to ONNX Runtime (ANSONNXYOLO) if TRT was not attempted or failed if (!_lpDetector) { WriteEventLog("ANSALPR_OCR::LoadEngine: Step 2 - Loading LP detector with ONNX Runtime"); this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 2: Loading LP detector with ONNX Runtime", __FILE__, __LINE__); _lpdmodelConfig.detectionType = DetectionType::DETECTION; _lpdmodelConfig.modelType = ModelType::ONNXYOLO; std::string _lprClasses; { LoadOnnxParams_OCR p{}; p.licenseKey = &_licenseKey; p.config = &_lpdmodelConfig; p.modelFolder = &_modelFolder; p.modelName = "lpd"; p.classFile = "lpd.names"; p.labels = &_lprClasses; p.detector = &_lpDetector; p.enableTracker = true; p.disableStabilization = true; DWORD sehCode = 0; bool lpSuccess = LoadOnnxModel_OCR_SEH(p, &sehCode); if (sehCode != 0) { char buf[256]; snprintf(buf, sizeof(buf), "ANSALPR_OCR::LoadEngine: Step 2 LPD SEH exception 0x%08X — LP detector disabled", sehCode); WriteEventLog(buf, EVENTLOG_ERROR_TYPE); this->_logger.LogFatal("ANSALPR_OCR::LoadEngine", "Step 2: LP detector crashed (SEH). LP detector disabled.", __FILE__, __LINE__); if (_lpDetector) _lpDetector.reset(); } else if (!lpSuccess) { this->_logger.LogError("ANSALPR_OCR::LoadEngine", "Failed to load LP detector (ONNX Runtime).", __FILE__, __LINE__); if (_lpDetector) _lpDetector.reset(); } } } } if (!_lpDetector) { this->_logger.LogFatal("ANSALPR_OCR::LoadEngine", "LP detector failed to load. Cannot proceed.", __FILE__, __LINE__); _isInitialized = false; return false; } // ── Step 3: Load OCR engine (ANSONNXOCR) ───────────────────── // The OCR models (ansocrdec.onnx, ansocrcls.onnx, ansocrrec.onnx, // dict_ch.txt) are bundled inside the same ALPR model zip, so we // pass the original ALPR zip path to ANSONNXOCR::Initialize. // ANSOCRBase::Initialize will extract it (no-op if already done) // and discover the OCR model files in the extracted folder. WriteEventLog("ANSALPR_OCR::LoadEngine: Step 3 - Loading OCR engine (ANSONNXOCR)"); this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 3: Loading OCR engine (ANSONNXOCR)", __FILE__, __LINE__); // Verify OCR model files exist in the already-extracted folder std::string ocrDetModel = CreateFilePath(_modelFolder, "ansocrdec.onnx"); std::string ocrRecModel = CreateFilePath(_modelFolder, "ansocrrec.onnx"); if (!FileExist(ocrDetModel) || !FileExist(ocrRecModel)) { this->_logger.LogFatal("ANSALPR_OCR::LoadEngine", "OCR model files not found in model folder: " + _modelFolder + " (expected ansocrdec.onnx, ansocrrec.onnx)", __FILE__, __LINE__); _isInitialized = false; return false; } _ocrEngine = std::make_unique(); // Determine OCR language based on country OCRLanguage ocrLang = OCRLanguage::ENGLISH; switch (_country) { case Country::JAPAN: ocrLang = OCRLanguage::JAPANESE; break; case Country::CHINA: ocrLang = OCRLanguage::CHINESE; break; case Country::VIETNAM: ocrLang = OCRLanguage::ENGLISH; break; case Country::AUSTRALIA: ocrLang = OCRLanguage::ENGLISH; break; case Country::USA: ocrLang = OCRLanguage::ENGLISH; break; case Country::INDONESIA: ocrLang = OCRLanguage::ENGLISH; break; default: ocrLang = OCRLanguage::ENGLISH; break; } OCRModelConfig ocrModelConfig; ocrModelConfig.ocrLanguage = ocrLang; ocrModelConfig.useDetector = true; ocrModelConfig.useRecognizer = true; // Skip the angle classifier for ALPR. License-plate boxes // from the YOLO detector are already axis-aligned, so the // 180° classifier is dead weight (one extra ORT call per // plate for no recall gain). ocrModelConfig.useCLS = false; ocrModelConfig.useLayout = false; ocrModelConfig.useTable = false; ocrModelConfig.useTensorRT = true; ocrModelConfig.enableMKLDNN = false; ocrModelConfig.useDilation = true; ocrModelConfig.useAngleCLS = false; ocrModelConfig.gpuId = 0; ocrModelConfig.detectionDBThreshold = 0.5; ocrModelConfig.detectionBoxThreshold = 0.3; ocrModelConfig.detectionDBUnclipRatio = 1.2; ocrModelConfig.clsThreshold = 0.9; ocrModelConfig.limitSideLen = 480; // Pass the original ALPR model zip path — ANSOCRBase::Initialize // will extract it to the same folder (already done, so extraction // is a no-op) and set up ansocrdec.onnx / ansocrcls.onnx / // ansocrrec.onnx / dict_ch.txt paths automatically. bool ocrSuccess = _ocrEngine->Initialize(_licenseKey, ocrModelConfig, _modelZipFilePath, "", 0); if (!ocrSuccess) { this->_logger.LogFatal("ANSALPR_OCR::LoadEngine", "Failed to initialize OCR engine (ANSONNXOCR).", __FILE__, __LINE__); _ocrEngine.reset(); _isInitialized = false; return false; } // Set ALPR mode and country on the OCR engine _ocrEngine->SetOCRMode(OCRMode::OCR_ALPR); _ocrEngine->SetCountry(_country); this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 3: OCR engine loaded successfully.", __FILE__, __LINE__); // ── Step 4: Load colour classifier (optional) ──────────────── if (FileExist(colorModel) && (_lpColourModelConfig.detectionScoreThreshold > 0)) { // Try TensorRT (ANSRTYOLO) when NVIDIA GPU is detected if (engineType == EngineType::NVIDIA_GPU) { WriteEventLog("ANSALPR_OCR::LoadEngine: Step 4 - Loading colour classifier with TensorRT"); this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 4: Loading colour classifier with TensorRT", __FILE__, __LINE__); _lpColourModelConfig.detectionType = DetectionType::CLASSIFICATION; _lpColourModelConfig.modelType = ModelType::RTYOLO; { LoadRtParams_OCR p{}; p.licenseKey = &_licenseKey; p.config = &_lpColourModelConfig; p.modelFolder = &_modelFolder; p.modelName = "lpc"; p.classFile = "lpc.names"; p.labels = &_lpColourLabels; p.detector = &_lpColourDetector; p.enableTracker = false; p.disableStabilization = false; DWORD sehCode = 0; bool colourSuccess = LoadRtModel_OCR_SEH(p, &sehCode); if (sehCode != 0) { char buf[256]; snprintf(buf, sizeof(buf), "ANSALPR_OCR::LoadEngine: Step 4 LPC TRT SEH exception 0x%08X — falling back to ONNX Runtime", sehCode); WriteEventLog(buf, EVENTLOG_ERROR_TYPE); this->_logger.LogError("ANSALPR_OCR::LoadEngine", "Step 4: Colour classifier TensorRT crashed (SEH). Falling back to ONNX Runtime.", __FILE__, __LINE__); if (_lpColourDetector) _lpColourDetector.reset(); } else if (!colourSuccess) { this->_logger.LogError("ANSALPR_OCR::LoadEngine", "Failed to load colour classifier (TensorRT). Falling back to ONNX Runtime.", __FILE__, __LINE__); if (_lpColourDetector) _lpColourDetector.reset(); } } } // Fallback to ONNX Runtime (ANSONNXYOLO) if TRT was not attempted or failed if (!_lpColourDetector) { WriteEventLog("ANSALPR_OCR::LoadEngine: Step 4 - Loading colour classifier with ONNX Runtime"); this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 4: Loading colour classifier with ONNX Runtime", __FILE__, __LINE__); _lpColourModelConfig.detectionType = DetectionType::CLASSIFICATION; _lpColourModelConfig.modelType = ModelType::ONNXYOLO; { LoadOnnxParams_OCR p{}; p.licenseKey = &_licenseKey; p.config = &_lpColourModelConfig; p.modelFolder = &_modelFolder; p.modelName = "lpc"; p.classFile = "lpc.names"; p.labels = &_lpColourLabels; p.detector = &_lpColourDetector; p.enableTracker = false; p.disableStabilization = false; DWORD sehCode = 0; bool colourSuccess = LoadOnnxModel_OCR_SEH(p, &sehCode); if (sehCode != 0) { char buf[256]; snprintf(buf, sizeof(buf), "ANSALPR_OCR::LoadEngine: Step 4 LPC SEH exception 0x%08X — colour detection disabled", sehCode); WriteEventLog(buf, EVENTLOG_ERROR_TYPE); this->_logger.LogError("ANSALPR_OCR::LoadEngine", "Step 4: Colour classifier crashed. Colour detection disabled.", __FILE__, __LINE__); if (_lpColourDetector) _lpColourDetector.reset(); } else if (!colourSuccess) { this->_logger.LogError("ANSALPR_OCR::LoadEngine", "Failed to load colour detector (ONNX Runtime). Colour detection disabled.", __FILE__, __LINE__); if (_lpColourDetector) _lpColourDetector.reset(); } } } } valid = true; _isInitialized = valid; WriteEventLog(("ANSALPR_OCR::LoadEngine: Step 5 - Engine load complete. Valid = " + std::to_string(valid)).c_str()); this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 5: Engine load complete. Valid = " + std::to_string(valid), __FILE__, __LINE__); return valid; } catch (std::exception& e) { WriteEventLog(("ANSALPR_OCR::LoadEngine: C++ exception: " + std::string(e.what())).c_str(), EVENTLOG_ERROR_TYPE); this->_logger.LogFatal("ANSALPR_OCR::LoadEngine", std::string("C++ exception: ") + e.what(), __FILE__, __LINE__); _isInitialized = false; return false; } catch (...) { WriteEventLog("ANSALPR_OCR::LoadEngine: Unknown exception", EVENTLOG_ERROR_TYPE); this->_logger.LogFatal("ANSALPR_OCR::LoadEngine", "Unknown exception", __FILE__, __LINE__); _isInitialized = false; return false; } } // ── Colour detection (same pattern as ANSALPR_OD) ──────────────────── std::string ANSALPR_OCR::DetectLPColourDetector(const cv::Mat& lprROI, const std::string& cameraId) { if (_lpColourModelConfig.detectionScoreThreshold <= 0.0f) return {}; if (!_lpColourDetector) return {}; if (lprROI.empty()) return {}; try { std::vector colourOutputs = _lpColourDetector->RunInference(lprROI, cameraId); if (colourOutputs.empty()) return {}; const auto& bestDetection = *std::max_element( colourOutputs.begin(), colourOutputs.end(), [](const Object& a, const Object& b) { return a.confidence < b.confidence; } ); return bestDetection.className; } catch (const std::exception& e) { this->_logger.LogFatal("ANSALPR_OCR::DetectLPColourDetector", e.what(), __FILE__, __LINE__); return {}; } } std::string ANSALPR_OCR::DetectLPColourCached(const cv::Mat& lprROI, const std::string& cameraId, const std::string& plateText) { if (plateText.empty()) { return DetectLPColourDetector(lprROI, cameraId); } // Check cache first { std::lock_guard cacheLock(_colourCacheMutex); auto it = _colourCache.find(plateText); if (it != _colourCache.end()) { it->second.hitCount++; return it->second.colour; } } // Cache miss — run classifier std::string colour = DetectLPColourDetector(lprROI, cameraId); if (!colour.empty()) { std::lock_guard cacheLock(_colourCacheMutex); if (_colourCache.size() >= COLOUR_CACHE_MAX_SIZE) { _colourCache.clear(); } _colourCache[plateText] = { colour, 0 }; } return colour; } // ── Classical perspective rectification ───────────────────────────── // Takes the axis-aligned LP YOLO bbox and tries to warp the plate to // a tight rectangle whose height is fixed and whose width preserves // the detected plate's actual aspect ratio. This removes camera // tilt/yaw, strips background margin, and normalizes character // spacing — which makes the recognizer see an image much closer to // its training distribution and reduces silent character drops. // // Works entirely in classical OpenCV (Canny + findContours + // approxPolyDP + getPerspectiveTransform + warpPerspective), so it // needs no new models and no retraining. Fails gracefully (returns // false) on plates where the border can't be isolated — caller falls // back to the padded axis-aligned crop in that case. std::vector ANSALPR_OCR::OrderQuadCorners(const std::vector& pts) { // Standard TL/TR/BR/BL ordering via x+y / y-x extrema. Robust to // input winding order (clockwise vs counter-clockwise) and to // approxPolyDP starting the polygon at an arbitrary corner. std::vector ordered(4); if (pts.size() != 4) return ordered; auto sum = [](const cv::Point& p) { return p.x + p.y; }; auto diff = [](const cv::Point& p) { return p.y - p.x; }; int idxMinSum = 0, idxMaxSum = 0, idxMinDiff = 0, idxMaxDiff = 0; for (int i = 1; i < 4; ++i) { if (sum(pts[i]) < sum(pts[idxMinSum])) idxMinSum = i; if (sum(pts[i]) > sum(pts[idxMaxSum])) idxMaxSum = i; if (diff(pts[i]) < diff(pts[idxMinDiff])) idxMinDiff = i; if (diff(pts[i]) > diff(pts[idxMaxDiff])) idxMaxDiff = i; } ordered[0] = cv::Point2f(static_cast(pts[idxMinSum].x), static_cast(pts[idxMinSum].y)); // TL ordered[1] = cv::Point2f(static_cast(pts[idxMinDiff].x), static_cast(pts[idxMinDiff].y)); // TR ordered[2] = cv::Point2f(static_cast(pts[idxMaxSum].x), static_cast(pts[idxMaxSum].y)); // BR ordered[3] = cv::Point2f(static_cast(pts[idxMaxDiff].x), static_cast(pts[idxMaxDiff].y)); // BL return ordered; } bool ANSALPR_OCR::RectifyPlateROI( const cv::Mat& source, const cv::Rect& bbox, cv::Mat& outRectified) const { if (source.empty()) return false; cv::Rect clamped = bbox & cv::Rect(0, 0, source.cols, source.rows); if (clamped.width <= 20 || clamped.height <= 10) return false; const cv::Mat roi = source(clamped); const double roiArea = static_cast(roi.rows) * roi.cols; const double minArea = roiArea * kRectifyAreaFraction; // Step 1: grayscale + blur + Canny to find plate border edges. cv::Mat gray; if (roi.channels() == 3) { cv::cvtColor(roi, gray, cv::COLOR_BGR2GRAY); } else if (roi.channels() == 4) { cv::cvtColor(roi, gray, cv::COLOR_BGRA2GRAY); } else { gray = roi; } cv::GaussianBlur(gray, gray, cv::Size(5, 5), 0); cv::Mat edges; cv::Canny(gray, edges, 50, 150); // Close small gaps in the plate border so findContours sees it as // one closed shape rather than several broken line segments. cv::Mat kernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3)); cv::morphologyEx(edges, edges, cv::MORPH_CLOSE, kernel); // Step 2: find external contours. std::vector> contours; cv::findContours(edges, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE); if (contours.empty()) return false; // Step 3: find the largest contour whose approxPolyDP collapses // to 4 vertices. That's most likely the plate border. std::vector bestQuad; double bestArea = 0.0; for (const auto& c : contours) { const double area = cv::contourArea(c); if (area < minArea) continue; // Sweep epsilon — tighter approximations require more vertices, // looser approximations collapse to fewer. We want the // smallest epsilon at which the contour becomes a quadrilateral. std::vector approx; const double perimeter = cv::arcLength(c, true); for (double eps = 0.02; eps <= 0.08; eps += 0.01) { cv::approxPolyDP(c, approx, eps * perimeter, true); if (approx.size() == 4) break; } if (approx.size() == 4 && area > bestArea) { // Verify the quadrilateral is convex — a non-convex // 4-point contour is almost certainly not a plate if (cv::isContourConvex(approx)) { bestArea = area; bestQuad = approx; } } } // Step 4: fallback — minAreaRect on the largest contour. This // handles pure rotation but not arbitrary perspective skew. if (bestQuad.empty()) { auto largest = std::max_element(contours.begin(), contours.end(), [](const std::vector& a, const std::vector& b) { return cv::contourArea(a) < cv::contourArea(b); }); if (largest == contours.end()) return false; if (cv::contourArea(*largest) < minArea) return false; cv::RotatedRect rr = cv::minAreaRect(*largest); cv::Point2f pts[4]; rr.points(pts); bestQuad.reserve(4); for (int i = 0; i < 4; ++i) { bestQuad.emplace_back(static_cast(pts[i].x), static_cast(pts[i].y)); } } // Step 5: order the 4 corners as TL/TR/BR/BL. std::vector srcCorners = OrderQuadCorners(bestQuad); // Measure the source quadrilateral's dimensions so the output // rectangle preserves the real plate aspect ratio. Without this, // a wide single-row plate would be squashed to 2:1 and a 2-row // plate would be stretched to wrong proportions. auto pointDist = [](const cv::Point2f& a, const cv::Point2f& b) -> float { const float dx = a.x - b.x; const float dy = a.y - b.y; return std::sqrt(dx * dx + dy * dy); }; const float topEdge = pointDist(srcCorners[0], srcCorners[1]); const float bottomEdge = pointDist(srcCorners[3], srcCorners[2]); const float leftEdge = pointDist(srcCorners[0], srcCorners[3]); const float rightEdge = pointDist(srcCorners[1], srcCorners[2]); const float srcW = std::max(topEdge, bottomEdge); const float srcH = std::max(leftEdge, rightEdge); if (srcW < 20.f || srcH < 10.f) return false; const float srcAspect = srcW / srcH; // Gate rectification on plausible plate aspect ratios. Anything // wildly outside the range isn't a plate; fall back to the axis- // aligned crop rather than produce a distorted warp. if (srcAspect < kMinPlateAspect || srcAspect > kMaxPlateAspect) { return false; } // Step 6: warp to a rectangle that preserves aspect ratio. Height // is fixed (kRectifiedHeight) so downstream sizing is predictable. const int outH = kRectifiedHeight; const int outW = std::clamp(static_cast(std::round(outH * srcAspect)), kRectifiedHeight, // min: square kRectifiedHeight * 6); // max: 6:1 long plates std::vector dstCorners = { { 0.f, 0.f }, { static_cast(outW - 1), 0.f }, { static_cast(outW - 1), static_cast(outH - 1) }, { 0.f, static_cast(outH - 1) } }; const cv::Mat M = cv::getPerspectiveTransform(srcCorners, dstCorners); cv::warpPerspective(roi, outRectified, M, cv::Size(outW, outH), cv::INTER_LINEAR, cv::BORDER_REPLICATE); return !outRectified.empty(); } // ── Japan-only: kana recovery on a plate where the fast path silently // dropped the hiragana from the bottom row ──────────────────────── ANSALPR_OCR::CodepointClassCounts ANSALPR_OCR::CountCodepointClasses(const std::string& text) { CodepointClassCounts counts; size_t pos = 0; while (pos < text.size()) { const size_t before = pos; uint32_t cp = ANSOCRUtility::NextUTF8Codepoint(text, pos); if (cp == 0 || pos == before) break; if (ANSOCRUtility::IsCharClass(cp, CHAR_DIGIT)) counts.digit++; if (ANSOCRUtility::IsCharClass(cp, CHAR_KANJI)) counts.kanji++; if (ANSOCRUtility::IsCharClass(cp, CHAR_HIRAGANA)) counts.hiragana++; if (ANSOCRUtility::IsCharClass(cp, CHAR_KATAKANA)) counts.katakana++; } return counts; } bool ANSALPR_OCR::IsJapaneseIncomplete(const std::string& text) { // A valid Japanese plate has at least one kanji in the region // zone, at least one hiragana/katakana in the kana zone, and at // least four digits split between classification (top) and // designation (bottom). // // We only consider a plate "incomplete and worth recovering" // when it ALREADY LOOKS Japanese on the fast path — i.e. the // kanji region was found successfully. Gating on kanji > 0 // prevents the recovery path from firing on non-Japanese plates // (Latin-only, European, Macau, etc.) where there's no kana to // find anyway, which previously wasted ~35 ms per plate burning // all recovery attempts on a search that can never succeed. // // For non-Japanese plates the function returns false, recovery // is skipped, and latency is identical to the pre-recovery // baseline. const CodepointClassCounts c = CountCodepointClasses(text); if (c.kanji == 0) return false; // Not a Japanese plate if (c.digit < 4) return false; // Not enough digits — probably garbage const int kana = c.hiragana + c.katakana; return (kana == 0); // Kanji + digits present, kana missing } // Strip screws/rivets/dirt that the recognizer misreads as small // round punctuation glyphs. The blacklist is deliberately narrow: // only characters that are never legitimate plate content on any // country we support. Middle dots (・ and ·) are KEPT because they // are legitimate padding on Japanese plates with <4 designation // digits (e.g. "・274"), and they get normalised to "0" by // ALPRPostProcessing's zone corrections anyway. std::string ANSALPR_OCR::StripPlateArtifacts(const std::string& text) { if (text.empty()) return text; std::string stripped; stripped.reserve(text.size()); size_t pos = 0; while (pos < text.size()) { const size_t before = pos; uint32_t cp = ANSOCRUtility::NextUTF8Codepoint(text, pos); if (cp == 0 || pos == before) break; bool drop = false; switch (cp) { // Small round glyphs that mimic screws / rivets case 0x00B0: // ° degree sign case 0x02DA: // ˚ ring above case 0x2218: // ∘ ring operator case 0x25CB: // ○ white circle case 0x25CF: // ● black circle case 0x25E6: // ◦ white bullet case 0x2022: // • bullet case 0x2219: // ∙ bullet operator case 0x25A0: // ■ black square case 0x25A1: // □ white square // Quote-like glyphs picked up from plate border / dirt case 0x0022: // " ASCII double quote case 0x0027: // ' ASCII apostrophe case 0x201C: // " LEFT DOUBLE QUOTATION MARK (smart quote) case 0x201D: // " RIGHT DOUBLE QUOTATION MARK case 0x201E: // „ DOUBLE LOW-9 QUOTATION MARK case 0x201F: // ‟ DOUBLE HIGH-REVERSED-9 QUOTATION MARK case 0x2018: // ' LEFT SINGLE QUOTATION MARK case 0x2019: // ' RIGHT SINGLE QUOTATION MARK case 0x201A: // ‚ SINGLE LOW-9 QUOTATION MARK case 0x201B: // ‛ SINGLE HIGH-REVERSED-9 QUOTATION MARK case 0x00AB: // « LEFT-POINTING DOUBLE ANGLE QUOTATION case 0x00BB: // » RIGHT-POINTING DOUBLE ANGLE QUOTATION case 0x2039: // ‹ SINGLE LEFT-POINTING ANGLE QUOTATION case 0x203A: // › SINGLE RIGHT-POINTING ANGLE QUOTATION case 0x301D: // 〝 REVERSED DOUBLE PRIME QUOTATION case 0x301E: // 〞 DOUBLE PRIME QUOTATION case 0x301F: // 〟 LOW DOUBLE PRIME QUOTATION case 0x300A: // 《 LEFT DOUBLE ANGLE BRACKET case 0x300B: // 》 RIGHT DOUBLE ANGLE BRACKET case 0x3008: // 〈 LEFT ANGLE BRACKET case 0x3009: // 〉 RIGHT ANGLE BRACKET // Ideographic punctuation that isn't valid plate content case 0x3002: // 。 ideographic full stop case 0x3001: // 、 ideographic comma case 0x300C: // 「 left corner bracket case 0x300D: // 」 right corner bracket case 0x300E: // 『 left white corner bracket case 0x300F: // 』 right white corner bracket // ASCII punctuation noise picked up from plate borders case 0x0060: // ` grave accent case 0x007E: // ~ tilde case 0x005E: // ^ caret case 0x007C: // | vertical bar case 0x005C: // \ backslash case 0x002F: // / forward slash case 0x0028: // ( left paren case 0x0029: // ) right paren case 0x005B: // [ left bracket case 0x005D: // ] right bracket case 0x007B: // { left brace case 0x007D: // } right brace case 0x003C: // < less than case 0x003E: // > greater than // Misc symbols that round glyphs can collapse to case 0x00A9: // © copyright sign case 0x00AE: // ® registered sign case 0x2117: // ℗ sound recording copyright case 0x2122: // ™ trademark drop = true; break; default: break; } if (!drop) { stripped.append(text, before, pos - before); } } // Collapse runs of spaces introduced by stripping, and trim. std::string collapsed; collapsed.reserve(stripped.size()); bool prevSpace = false; for (char c : stripped) { if (c == ' ') { if (!prevSpace) collapsed.push_back(c); prevSpace = true; } else { collapsed.push_back(c); prevSpace = false; } } const size_t first = collapsed.find_first_not_of(' '); if (first == std::string::npos) return ""; const size_t last = collapsed.find_last_not_of(' '); return collapsed.substr(first, last - first + 1); } std::string ANSALPR_OCR::RecoverKanaFromBottomHalf( const cv::Mat& plateROI, int halfH) const { if (!_ocrEngine || plateROI.empty()) return ""; const int plateW = plateROI.cols; const int plateH = plateROI.rows; if (plateW < 40 || plateH < 30 || halfH <= 0 || halfH >= plateH) { ANS_DBG("ALPR_Kana", "Recovery SKIP: plate too small (%dx%d, halfH=%d)", plateW, plateH, halfH); return ""; } ANS_DBG("ALPR_Kana", "Recovery START: plate=%dx%d halfH=%d bottomHalf=%dx%d", plateW, plateH, halfH, plateW, plateH - halfH); // The kana on a Japanese plate sits in the left ~30% of the // bottom row and is roughly square. Try 3 well-chosen crop // positions — one center, one slightly high, one wider — and // bail out on the first that yields a hiragana/katakana hit. // // 3 attempts is the sweet spot: it catches the common row-split // variation without burning linear time on every fail-case. // Previous versions tried 7 attempts, which added ~20 ms/plate // of pure waste when recovery couldn't find any kana anyway. // // Tiles shorter than 48 px are upscaled to 48 px height before // recognition so the recognizer sees something close to its // training distribution. PaddleOCR's rec model expects 48 px // height and breaks down when given very small crops. struct TileSpec { float widthFraction; // fraction of plateW float yOffset; // 0.0 = top of bottom half, 1.0 = bottom }; const TileSpec attempts[] = { { 0.30f, 0.50f }, // primary: 30% wide, centered vertically { 0.30f, 0.35f }, // row split landed too low — try higher { 0.35f, 0.50f }, // slightly wider crop for off-center kana }; int attemptNo = 0; for (const TileSpec& spec : attempts) { attemptNo++; int tileW = static_cast(plateW * spec.widthFraction); if (tileW < 30) tileW = 30; if (tileW > plateW) tileW = plateW; // Prefer square tile, but allow non-square if the bottom // half is short. Clipped to bottom-half height. int tileH = tileW; const int bottomHalfH = plateH - halfH; if (tileH > bottomHalfH) tileH = bottomHalfH; if (tileH < 20) continue; const int centerY = halfH + static_cast(bottomHalfH * spec.yOffset); int cy = centerY - tileH / 2; if (cy < halfH) cy = halfH; if (cy + tileH > plateH) cy = plateH - tileH; if (cy < 0) cy = 0; const int cx = 0; int cw = tileW; int ch = tileH; if (cx + cw > plateW) cw = plateW - cx; if (cy + ch > plateH) ch = plateH - cy; if (cw <= 10 || ch <= 10) continue; cv::Mat kanaTile = plateROI(cv::Rect(cx, cy, cw, ch)); // Upscale tiles shorter than 48 px so the recognizer sees // something close to its training input size. Preserve // aspect ratio; cv::INTER_CUBIC keeps character strokes // sharper than bilinear. cv::Mat tileForRec; if (kanaTile.rows < 48) { const double scale = 48.0 / kanaTile.rows; cv::resize(kanaTile, tileForRec, cv::Size(), scale, scale, cv::INTER_CUBIC); } else { tileForRec = kanaTile; } std::vector tileBatch{ tileForRec }; auto tileResults = _ocrEngine->RecognizeTextBatch(tileBatch); if (tileResults.empty()) { ANS_DBG("ALPR_Kana", "Attempt %d: tile=%dx%d (rec=%dx%d w=%.2f y=%.2f) " "→ recognizer returned empty batch", attemptNo, cw, ch, tileForRec.cols, tileForRec.rows, spec.widthFraction, spec.yOffset); continue; } const std::string& text = tileResults[0].first; const float conf = tileResults[0].second; ANS_DBG("ALPR_Kana", "Attempt %d: tile=%dx%d (rec=%dx%d w=%.2f y=%.2f) " "→ '%s' conf=%.3f", attemptNo, cw, ch, tileForRec.cols, tileForRec.rows, spec.widthFraction, spec.yOffset, text.c_str(), conf); if (text.empty()) continue; // Japanese plate kana is ALWAYS exactly 1 hiragana or // katakana character. We accept ONLY that — nothing else. // Kanji, Latin letters, digits, punctuation, everything // non-kana is rejected. The returned string is exactly the // one kana codepoint or empty. // // Strictness is deliberate: the relaxed "any letter class" // accept path was letting through kanji bleed from the // region-name zone when the tile positioning was slightly // off, producing wrong plate text like "59-V3 西 752.23" or // "JCL 三". With strict-only accept, a miss in the recovery // is silent and the fast-path result passes through unchanged. std::string firstKana; // first CHAR_HIRAGANA / CHAR_KATAKANA hit int codepointCount = 0; size_t pos = 0; while (pos < text.size()) { const size_t before = pos; uint32_t cp = ANSOCRUtility::NextUTF8Codepoint(text, pos); if (cp == 0 || pos == before) break; codepointCount++; if (!firstKana.empty()) continue; if (ANSOCRUtility::IsCharClass(cp, CHAR_HIRAGANA) || ANSOCRUtility::IsCharClass(cp, CHAR_KATAKANA)) { firstKana = text.substr(before, pos - before); } } if (!firstKana.empty()) { ANS_DBG("ALPR_Kana", "Recovery SUCCESS at attempt %d: extracted '%s' " "from raw '%s' (%d codepoints, conf=%.3f)", attemptNo, firstKana.c_str(), text.c_str(), codepointCount, conf); return firstKana; } } ANS_DBG("ALPR_Kana", "Recovery FAILED: no kana found in %d attempts", attemptNo); return ""; } // ── Full-frame vs pipeline auto-detection ──────────────────────────── // Mirror of ANSALPR_OD::shouldUseALPRChecker. The auto-detection logic // watches whether consecutive frames from a given camera have the exact // same (width, height). Pre-cropped pipeline inputs vary by a few // pixels per crop, so the exact-match check fails and we return false. // Real video frames are pixel-identical across frames, so after a few // consistent frames we flip into FULL-FRAME mode and start running the // ALPRChecker voting + ensureUniquePlateText dedup. bool ANSALPR_OCR::shouldUseALPRChecker(const cv::Size& imageSize, const std::string& cameraId) { // Force disabled via SetALPRCheckerEnabled(false) → never use. if (!_enableALPRChecker) return false; // Small images are always pipeline crops — skip auto-detection. if (imageSize.width < ImageSizeTracker::MIN_FULLFRAME_WIDTH) return false; auto& tracker = _imageSizeTrackers[cameraId]; bool wasFullFrame = tracker.detectedFullFrame; if (imageSize == tracker.lastSize) { tracker.consistentCount++; if (tracker.consistentCount >= ImageSizeTracker::CONFIRM_THRESHOLD) { tracker.detectedFullFrame = true; } } else { tracker.lastSize = imageSize; tracker.consistentCount = 1; tracker.detectedFullFrame = false; } if (tracker.detectedFullFrame != wasFullFrame) { ANS_DBG("ALPR_OCR_Checker", "cam=%s mode auto-detected: %s (img=%dx%d consistent=%d)", cameraId.c_str(), tracker.detectedFullFrame ? "FULL-FRAME (tracker ON)" : "PIPELINE (tracker OFF)", imageSize.width, imageSize.height, tracker.consistentCount); } return tracker.detectedFullFrame; } // ── Spatial plate dedup with accumulated scoring ───────────────────── // Mirror of ANSALPR_OD::ensureUniquePlateText. When more than one // detection in the same frame ends up with the same plate text (e.g. // tracker occlusion or two cars in a single frame reading the same // string), we resolve the ambiguity by accumulating confidence per // spatial location across frames. The location with the higher running // score keeps the plate text; the loser has its className cleared and // is dropped from the output. void ANSALPR_OCR::ensureUniquePlateText(std::vector& results, const std::string& cameraId) { std::lock_guard plateLock(_plateIdentitiesMutex); auto& identities = _plateIdentities[cameraId]; // Auto-detect mode by detection count. // 1 detection → pipeline/single-crop mode → no dedup needed. // 2+ detections → full-frame mode → apply accumulated scoring. if (results.size() <= 1) { // Still age out stale spatial identities from previous full-frame calls if (!identities.empty()) { constexpr int MAX_UNSEEN_FRAMES = 30; for (auto& id : identities) id.framesSinceLastSeen++; for (auto it = identities.begin(); it != identities.end(); ) { if (it->framesSinceLastSeen > MAX_UNSEEN_FRAMES) { it = identities.erase(it); } else { ++it; } } } return; } // Helper: IoU between two rects. auto computeIoU = [](const cv::Rect& a, const cv::Rect& b) -> float { int x1 = std::max(a.x, b.x); int y1 = std::max(a.y, b.y); int x2 = std::min(a.x + a.width, b.x + b.width); int y2 = std::min(a.y + a.height, b.y + b.height); if (x2 <= x1 || y2 <= y1) return 0.0f; float intersection = static_cast((x2 - x1) * (y2 - y1)); float unionArea = static_cast(a.area() + b.area()) - intersection; return (unionArea > 0.0f) ? intersection / unionArea : 0.0f; }; // Helper: find matching spatial identity by bounding-box overlap. auto findSpatialMatch = [&](const cv::Rect& box, const std::string& plateText) -> SpatialPlateIdentity* { for (auto& id : identities) { if (id.plateText == plateText) { cv::Rect storedRect( static_cast(id.center.x - box.width * 0.5f), static_cast(id.center.y - box.height * 0.5f), box.width, box.height); if (computeIoU(box, storedRect) > PLATE_SPATIAL_MATCH_THRESHOLD) { return &id; } } } return nullptr; }; // Step 1: Build map of plateText → candidate indices std::unordered_map> plateCandidates; for (size_t i = 0; i < results.size(); ++i) { if (results[i].className.empty()) continue; plateCandidates[results[i].className].push_back(i); } // Step 2: Resolve duplicates using spatial accumulated scores for (auto& [plateText, indices] : plateCandidates) { if (indices.size() <= 1) continue; size_t winner = indices[0]; float bestScore = 0.0f; for (size_t idx : indices) { float score = results[idx].confidence; auto* match = findSpatialMatch(results[idx].box, plateText); if (match) { score = match->accumulatedScore + results[idx].confidence; } if (score > bestScore) { bestScore = score; winner = idx; } } for (size_t idx : indices) { if (idx != winner) { results[idx].className.clear(); } } } // Step 3: Update spatial identities — winners accumulate, losers decay constexpr float DECAY_FACTOR = 0.8f; constexpr float MIN_SCORE = 0.1f; constexpr int MAX_UNSEEN_FRAMES = 30; for (auto& id : identities) id.framesSinceLastSeen++; for (auto& r : results) { if (r.className.empty()) continue; cv::Point2f center( r.box.x + r.box.width * 0.5f, r.box.y + r.box.height * 0.5f); auto* match = findSpatialMatch(r.box, r.className); if (match) { match->accumulatedScore += r.confidence; match->center = center; match->framesSinceLastSeen = 0; } else { identities.push_back({ center, r.className, r.confidence, 0 }); } } // Decay unseen identities and remove stale ones for (auto it = identities.begin(); it != identities.end(); ) { if (it->framesSinceLastSeen > 0) { it->accumulatedScore *= DECAY_FACTOR; } if (it->accumulatedScore < MIN_SCORE || it->framesSinceLastSeen > MAX_UNSEEN_FRAMES) { it = identities.erase(it); } else { ++it; } } // Step 4: Remove entries with cleared plate text results.erase( std::remove_if(results.begin(), results.end(), [](const Object& o) { return o.className.empty(); }), results.end()); } // ── OCR on a single plate ROI ──────────────────────────────────────── // Returns the plate text via the out-parameter and populates alprExtraInfo // with the structured ALPR JSON (zone parts) when ALPR mode is active. std::string ANSALPR_OCR::RunOCROnPlate(const cv::Mat& plateROI, const std::string& cameraId) { if (!_ocrEngine || plateROI.empty()) return ""; if (plateROI.cols < 10 || plateROI.rows < 10) return ""; try { // Run the full ANSONNXOCR pipeline on the cropped plate image std::vector ocrResults = _ocrEngine->RunInference(plateROI, cameraId); if (ocrResults.empty()) return ""; // If ALPR mode is active and we have plate formats, use the // structured ALPR post-processing to get correct zone ordering // (e.g. "品川 302 ま 93-15" instead of "品川30293-15ま") const auto& alprFormats = _ocrEngine->GetALPRFormats(); if (_ocrEngine->GetOCRMode() == OCRMode::OCR_ALPR && !alprFormats.empty()) { auto alprResults = ANSOCRUtility::ALPRPostProcessing( ocrResults, alprFormats, plateROI.cols, plateROI.rows, _ocrEngine.get(), plateROI); if (!alprResults.empty()) { return alprResults[0].fullPlateText; } } // Fallback: simple concatenation sorted by Y then X std::sort(ocrResults.begin(), ocrResults.end(), [](const OCRObject& a, const OCRObject& b) { int rowThreshold = std::min(a.box.height, b.box.height) / 2; if (std::abs(a.box.y - b.box.y) > rowThreshold) { return a.box.y < b.box.y; } return a.box.x < b.box.x; } ); std::string fullText; for (const auto& obj : ocrResults) { if (!obj.className.empty()) { fullText += obj.className; } } return fullText; } catch (const std::exception& e) { this->_logger.LogError("ANSALPR_OCR::RunOCROnPlate", e.what(), __FILE__, __LINE__); return ""; } } // ── Main inference pipeline ────────────────────────────────────────── std::vector ANSALPR_OCR::RunInference(const cv::Mat& input, const std::string& cameraId) { if (!_licenseValid) { this->_logger.LogError("ANSALPR_OCR::RunInference", "Invalid license", __FILE__, __LINE__); return {}; } if (!_isInitialized) { this->_logger.LogError("ANSALPR_OCR::RunInference", "Model is not initialized", __FILE__, __LINE__); return {}; } if (input.empty() || input.cols < 5 || input.rows < 5) { this->_logger.LogError("ANSALPR_OCR::RunInference", "Input image is empty or too small", __FILE__, __LINE__); return {}; } if (!_lpDetector) { this->_logger.LogFatal("ANSALPR_OCR::RunInference", "_lpDetector is null", __FILE__, __LINE__); return {}; } if (!_ocrEngine) { this->_logger.LogFatal("ANSALPR_OCR::RunInference", "_ocrEngine is null", __FILE__, __LINE__); return {}; } try { // Convert grayscale to BGR if necessary cv::Mat localFrame; if (input.channels() == 1) { cv::cvtColor(input, localFrame, cv::COLOR_GRAY2BGR); } const cv::Mat& frame = (input.channels() == 1) ? localFrame : input; const int frameWidth = frame.cols; const int frameHeight = frame.rows; // Step 1: Detect license plates std::vector lprOutput = _lpDetector->RunInference(frame, cameraId); if (lprOutput.empty()) { return {}; } // Step 2: Collect crops from every valid plate. Wide plates // (aspect >= 2.1) are treated as a single text line; narrow // plates (2-row layouts like Japanese) are split horizontally // at H/2 into top and bottom rows. All crops go through a // single batched recognizer call, bypassing the OCR text-line // detector entirely — for ALPR the LP YOLO box already bounds // the text region precisely. // // Per-plate preprocessing pipeline: // 1. Pad the YOLO LP bbox by 5% on each side so the plate // border is visible to the rectifier and edge characters // aren't clipped by a tight detector output. // 2. Try classical perspective rectification (Canny + // findContours + approxPolyDP + warpPerspective) to // straighten tilted / skewed plates. Falls back to the // padded axis-aligned crop on failure — no regression. // 3. Run the 2-row split heuristic on whichever plate image // we ended up with, using an aspect threshold of 2.1 so // perfect-2:1 rectified Japanese plates still split. // // Rectification is gated on _country == JAPAN at runtime. // For all other countries we skip the classical-CV pipeline // entirely and use the plain padded axis-aligned crop — this // keeps non-Japan inference on the original fast path and // lets SetCountry(nonJapan) take effect on the very next // frame without a restart. const bool useRectification = (_country == Country::JAPAN); struct PlateInfo { size_t origIndex; // into lprOutput std::vector cropIndices; // into allCrops cv::Mat plateROI; // full (unsplit) ROI, kept for colour + kana recovery int halfH = 0; // row-split Y inside plateROI (0 = single row) }; std::vector allCrops; std::vector plateInfos; allCrops.reserve(lprOutput.size() * 2); plateInfos.reserve(lprOutput.size()); for (size_t i = 0; i < lprOutput.size(); ++i) { const cv::Rect& box = lprOutput[i].box; // Calculate safe cropped region const int x1 = std::max(0, box.x); const int y1 = std::max(0, box.y); const int width = std::min(frameWidth - x1, box.width); const int height = std::min(frameHeight - y1, box.height); if (width <= 0 || height <= 0) continue; // Pad the YOLO LP bbox by 5% on each side. Gives the // rectifier some background for edge detection and helps // when the detector cropped a character edge. const int padX = std::max(2, width * 5 / 100); const int padY = std::max(2, height * 5 / 100); const int px = std::max(0, x1 - padX); const int py = std::max(0, y1 - padY); const int pw = std::min(frameWidth - px, width + 2 * padX); const int ph = std::min(frameHeight - py, height + 2 * padY); const cv::Rect paddedBox(px, py, pw, ph); // Perspective rectification is Japan-only to preserve // baseline latency on all other countries. On non-Japan // plates we go straight to the padded axis-aligned crop. cv::Mat plateROI; if (useRectification) { cv::Mat rectified; if (RectifyPlateROI(frame, paddedBox, rectified)) { plateROI = rectified; // owning 3-channel BGR } else { plateROI = frame(paddedBox); // non-owning view } } else { plateROI = frame(paddedBox); // non-owning view } PlateInfo info; info.origIndex = i; info.plateROI = plateROI; const int plateW = plateROI.cols; const int plateH = plateROI.rows; const float aspect = static_cast(plateW) / std::max(1, plateH); // 2-row heuristic: aspect < 2.1 → split top/bottom. // Bumped from 2.0 so a perfectly rectified Japanese plate // (aspect == 2.0) still splits correctly despite floating- // point rounding. Threshold still excludes wide EU/VN // plates (aspect 3.0+). if (aspect < 2.1f && plateH >= 24) { const int halfH = plateH / 2; info.halfH = halfH; info.cropIndices.push_back(allCrops.size()); allCrops.push_back(plateROI(cv::Rect(0, 0, plateW, halfH))); info.cropIndices.push_back(allCrops.size()); allCrops.push_back(plateROI(cv::Rect(0, halfH, plateW, plateH - halfH))); } else { info.halfH = 0; info.cropIndices.push_back(allCrops.size()); allCrops.push_back(plateROI); } plateInfos.push_back(std::move(info)); } if (allCrops.empty()) { return {}; } // Step 3: Single batched recognizer call for every crop. // ONNXOCRRecognizer groups crops by bucket width and issues // one ORT Run per bucket — typically 1–2 GPU calls for an // entire frame regardless of plate count. auto ocrResults = _ocrEngine->RecognizeTextBatch(allCrops); // Step 4: Assemble per-plate output std::vector output; output.reserve(plateInfos.size()); // Decide once per frame whether the tracker-based correction // layer should run. We auto-detect full-frame vs pipeline mode // by watching for pixel-identical consecutive frames, exactly // the same way ANSALPR_OD does it. const bool useChecker = shouldUseALPRChecker( cv::Size(frameWidth, frameHeight), cameraId); for (const auto& info : plateInfos) { // Reassemble row-by-row so we can target the bottom row // for kana recovery when the fast path silently dropped // the hiragana on a Japanese 2-row plate. std::string topText, bottomText; if (info.cropIndices.size() == 2) { if (info.cropIndices[0] < ocrResults.size()) topText = ocrResults[info.cropIndices[0]].first; if (info.cropIndices[1] < ocrResults.size()) bottomText = ocrResults[info.cropIndices[1]].first; } else if (!info.cropIndices.empty() && info.cropIndices[0] < ocrResults.size()) { topText = ocrResults[info.cropIndices[0]].first; } // Strip screw/rivet artifacts (°, ○, etc.) picked up from // plate fasteners before any downstream processing. Runs // on every row regardless of country — these glyphs are // never legitimate plate content anywhere. topText = StripPlateArtifacts(topText); bottomText = StripPlateArtifacts(bottomText); std::string combinedText = topText; if (!bottomText.empty()) { if (!combinedText.empty()) combinedText += " "; combinedText += bottomText; } // Japan-only kana recovery: if the fast-path output is // missing hiragana/katakana, re-crop the kana region and // run the recognizer on just that tile. Clean plates // pass the IsJapaneseIncomplete check and skip this // block entirely — zero cost. if (_country == Country::JAPAN && info.halfH > 0 && IsJapaneseIncomplete(combinedText)) { ANS_DBG("ALPR_Kana", "RunInference: firing recovery on plate '%s' " "(plateROI=%dx%d halfH=%d)", combinedText.c_str(), info.plateROI.cols, info.plateROI.rows, info.halfH); std::string recovered = StripPlateArtifacts( RecoverKanaFromBottomHalf(info.plateROI, info.halfH)); if (!recovered.empty()) { // Prepend the recovered kana to the bottom row // text so the final combined string reads // "region classification kana designation". if (bottomText.empty()) { bottomText = recovered; } else { bottomText = recovered + " " + bottomText; } combinedText = topText; if (!bottomText.empty()) { if (!combinedText.empty()) combinedText += " "; combinedText += bottomText; } ANS_DBG("ALPR_Kana", "RunInference: spliced result '%s'", combinedText.c_str()); } } if (combinedText.empty()) continue; Object lprObject = lprOutput[info.origIndex]; lprObject.cameraId = cameraId; // Cross-frame stabilization: per-track majority vote in // full-frame mode, raw OCR text in pipeline mode. if (useChecker) { lprObject.className = alprChecker.checkPlateByTrackId( cameraId, combinedText, lprObject.trackId); } else { lprObject.className = combinedText; } if (lprObject.className.empty()) continue; // Optional colour detection on the full plate ROI std::string colour = DetectLPColourCached( info.plateROI, cameraId, lprObject.className); if (!colour.empty()) { lprObject.extraInfo = "color:" + colour; } output.push_back(std::move(lprObject)); } // Spatial dedup: if two detections in the same frame ended up // with the same plate text, keep only the one whose spatial // history has the higher accumulated confidence. Skip this in // pipeline mode because there's only ever one plate per call. if (useChecker) { ensureUniquePlateText(output, cameraId); } return output; } catch (const cv::Exception& e) { this->_logger.LogFatal("ANSALPR_OCR::RunInference", std::string("OpenCV Exception: ") + e.what(), __FILE__, __LINE__); } catch (const std::exception& e) { this->_logger.LogFatal("ANSALPR_OCR::RunInference", e.what(), __FILE__, __LINE__); } catch (...) { this->_logger.LogFatal("ANSALPR_OCR::RunInference", "Unknown exception occurred", __FILE__, __LINE__); } return {}; } // ── Stateless batched inference for pipeline mode ─────────────────── // Caller supplies a full frame + a list of vehicle ROIs in FRAME // coordinates. We run ONE LP-detect call across all vehicle crops and // ONE text-recognizer call across every resulting plate (with the same // 2-row split heuristic as ANSALPR_OCR::RunInference), and NO tracker, // voting, spatial dedup, or per-camera accumulating state. This is the // drop-in replacement for the per-bbox loop inside // ANSALPR_RunInferencesComplete_LV (pipeline mode) and is exported as // ANSALPR_RunInferencesBatch_LV / _V2 in dllmain.cpp. Calling this on // ANSALPR_OCR avoids the ORT/TRT per-shape allocator churn that // causes unbounded memory growth when the loop version is used. std::vector ANSALPR_OCR::RunInferencesBatch( const cv::Mat& input, const std::vector& vehicleBoxes, const std::string& cameraId) { if (!_licenseValid) { this->_logger.LogError("ANSALPR_OCR::RunInferencesBatch", "Invalid license", __FILE__, __LINE__); return {}; } if (!_isInitialized) { this->_logger.LogError("ANSALPR_OCR::RunInferencesBatch", "Model is not initialized", __FILE__, __LINE__); return {}; } if (input.empty() || input.cols < 5 || input.rows < 5) return {}; if (!_lpDetector) { this->_logger.LogFatal("ANSALPR_OCR::RunInferencesBatch", "_lpDetector is null", __FILE__, __LINE__); return {}; } if (!_ocrEngine) { this->_logger.LogFatal("ANSALPR_OCR::RunInferencesBatch", "_ocrEngine is null", __FILE__, __LINE__); return {}; } if (vehicleBoxes.empty()) return {}; try { // Promote grayscale input to BGR once (matches RunInference). cv::Mat localFrame; if (input.channels() == 1) { cv::cvtColor(input, localFrame, cv::COLOR_GRAY2BGR); } const cv::Mat& frame = (input.channels() == 1) ? localFrame : input; // ── 1. Clamp and crop vehicle ROIs ──────────────────────── const cv::Rect frameRect(0, 0, frame.cols, frame.rows); std::vector vehicleCrops; std::vector clamped; vehicleCrops.reserve(vehicleBoxes.size()); clamped.reserve(vehicleBoxes.size()); for (const auto& r : vehicleBoxes) { cv::Rect c = r & frameRect; if (c.width <= 5 || c.height <= 5) continue; vehicleCrops.emplace_back(frame(c)); clamped.push_back(c); } if (vehicleCrops.empty()) return {}; // ── 2. ONE batched LP detection call across all vehicles ── std::vector> lpBatch = _lpDetector->RunInferencesBatch(vehicleCrops, cameraId); // ── 3. Flatten plates, applying preprocessing per plate ─── // For each detected plate we: // 1. Pad the LP bbox by 5% so the rectifier sees the // plate border and tight detector crops don't clip // edge characters. // 2. If country == JAPAN, try classical perspective // rectification — if it succeeds the plateROI is a // tight, straightened 2D warp of the real plate; if // it fails we fall back to the padded axis-aligned // crop. For non-Japan countries we skip rectification // entirely to preserve baseline latency. // 3. Apply the same 2-row split heuristic as RunInference // (aspect < 2.1 → split top/bottom). // The halfH field lets the assembly loop call the kana // recovery helper with the correct row-split boundary. const bool useRectification = (_country == Country::JAPAN); struct PlateMeta { size_t vehIdx; // index into vehicleCrops / clamped Object lpObj; // LP detection in VEHICLE-local coords cv::Mat plateROI; // full plate crop (kept for colour + kana recovery) int halfH = 0; // row-split Y inside plateROI (0 = single row) std::vector cropIndices; // indices into allCrops below }; std::vector allCrops; std::vector metas; allCrops.reserve(lpBatch.size() * 2); metas.reserve(lpBatch.size()); for (size_t v = 0; v < lpBatch.size() && v < vehicleCrops.size(); ++v) { const cv::Mat& veh = vehicleCrops[v]; const cv::Rect vehRect(0, 0, veh.cols, veh.rows); for (const auto& lp : lpBatch[v]) { cv::Rect lpBox = lp.box & vehRect; if (lpBox.width <= 0 || lpBox.height <= 0) continue; // Pad by 5% on each side for the rectifier. const int padX = std::max(2, lpBox.width * 5 / 100); const int padY = std::max(2, lpBox.height * 5 / 100); cv::Rect paddedBox( lpBox.x - padX, lpBox.y - padY, lpBox.width + 2 * padX, lpBox.height + 2 * padY); paddedBox &= vehRect; if (paddedBox.width <= 0 || paddedBox.height <= 0) continue; // Perspective rectification is Japan-only to preserve // baseline latency on all other countries. cv::Mat plateROI; if (useRectification) { cv::Mat rectified; if (RectifyPlateROI(veh, paddedBox, rectified)) { plateROI = rectified; // owning canonical } else { plateROI = veh(paddedBox); // non-owning view } } else { plateROI = veh(paddedBox); // non-owning view } PlateMeta pm; pm.vehIdx = v; pm.lpObj = lp; pm.plateROI = plateROI; const int plateW = plateROI.cols; const int plateH = plateROI.rows; const float aspect = static_cast(plateW) / std::max(1, plateH); if (aspect < 2.1f && plateH >= 24) { const int halfH = plateH / 2; pm.halfH = halfH; pm.cropIndices.push_back(allCrops.size()); allCrops.push_back(plateROI(cv::Rect(0, 0, plateW, halfH))); pm.cropIndices.push_back(allCrops.size()); allCrops.push_back(plateROI(cv::Rect(0, halfH, plateW, plateH - halfH))); } else { pm.halfH = 0; pm.cropIndices.push_back(allCrops.size()); allCrops.push_back(plateROI); } metas.push_back(std::move(pm)); } } if (allCrops.empty()) return {}; // ── 4. ONE batched recognizer call across every plate ──── // ONNXOCRRecognizer buckets by width internally, so this is // typically 1-2 ORT Runs regardless of plate count. auto ocrResults = _ocrEngine->RecognizeTextBatch(allCrops); // ── 5. Assemble — NO tracker, NO voting, NO dedup ──────── std::vector output; output.reserve(metas.size()); for (const auto& pm : metas) { // Reassemble row-by-row so Japan kana recovery can splice // the recovered hiragana into the bottom row specifically. std::string topText, bottomText; if (pm.cropIndices.size() == 2) { if (pm.cropIndices[0] < ocrResults.size()) topText = ocrResults[pm.cropIndices[0]].first; if (pm.cropIndices[1] < ocrResults.size()) bottomText = ocrResults[pm.cropIndices[1]].first; } else if (!pm.cropIndices.empty() && pm.cropIndices[0] < ocrResults.size()) { topText = ocrResults[pm.cropIndices[0]].first; } // Strip screw/rivet artifacts (°, ○, etc.) picked up from // plate fasteners before any downstream processing. topText = StripPlateArtifacts(topText); bottomText = StripPlateArtifacts(bottomText); std::string combined = topText; if (!bottomText.empty()) { if (!combined.empty()) combined += " "; combined += bottomText; } // Japan-only kana recovery fast-path fallback. Zero cost // on clean plates (gated by country and by UTF-8 codepoint // class count — clean plates return early). if (_country == Country::JAPAN && pm.halfH > 0 && IsJapaneseIncomplete(combined)) { ANS_DBG("ALPR_Kana", "RunInferencesBatch: firing recovery on plate " "'%s' (plateROI=%dx%d halfH=%d)", combined.c_str(), pm.plateROI.cols, pm.plateROI.rows, pm.halfH); std::string recovered = StripPlateArtifacts( RecoverKanaFromBottomHalf(pm.plateROI, pm.halfH)); if (!recovered.empty()) { if (bottomText.empty()) { bottomText = recovered; } else { bottomText = recovered + " " + bottomText; } combined = topText; if (!bottomText.empty()) { if (!combined.empty()) combined += " "; combined += bottomText; } ANS_DBG("ALPR_Kana", "RunInferencesBatch: spliced result '%s'", combined.c_str()); } } if (combined.empty()) continue; Object out = pm.lpObj; out.className = combined; // raw OCR — no ALPRChecker out.cameraId = cameraId; out.box.x += clamped[pm.vehIdx].x; out.box.y += clamped[pm.vehIdx].y; // Colour lookup — text-keyed cache, bounded. std::string colour = DetectLPColourCached( pm.plateROI, cameraId, out.className); if (!colour.empty()) out.extraInfo = "color:" + colour; output.push_back(std::move(out)); } return output; } catch (const cv::Exception& e) { this->_logger.LogFatal("ANSALPR_OCR::RunInferencesBatch", std::string("OpenCV Exception: ") + e.what(), __FILE__, __LINE__); } catch (const std::exception& e) { this->_logger.LogFatal("ANSALPR_OCR::RunInferencesBatch", e.what(), __FILE__, __LINE__); } catch (...) { this->_logger.LogFatal("ANSALPR_OCR::RunInferencesBatch", "Unknown exception occurred", __FILE__, __LINE__); } return {}; } // ── Inference wrappers ─────────────────────────────────────────────── bool ANSALPR_OCR::Inference(const cv::Mat& input, std::string& lprResult) { if (input.empty()) return false; if (input.cols < 5 || input.rows < 5) return false; return Inference(input, lprResult, "CustomCam"); } bool ANSALPR_OCR::Inference(const cv::Mat& input, std::string& lprResult, const std::string& cameraId) { if (input.empty()) return false; if (input.cols < 5 || input.rows < 5) return false; try { std::vector results = RunInference(input, cameraId); lprResult = VectorDetectionToJsonString(results); return !results.empty(); } catch (...) { return false; } } bool ANSALPR_OCR::Inference(const cv::Mat& input, const std::vector& Bbox, std::string& lprResult) { return Inference(input, Bbox, lprResult, "CustomCam"); } bool ANSALPR_OCR::Inference(const cv::Mat& input, const std::vector& Bbox, std::string& lprResult, const std::string& cameraId) { if (input.empty()) return false; if (input.cols < 5 || input.rows < 5) return false; try { if (Bbox.empty()) { return Inference(input, lprResult, cameraId); } // For cropped images, run OCR on each bounding box std::vector allResults; cv::Mat frame; if (input.channels() == 1) { cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR); } else { frame = input; } for (const auto& bbox : Bbox) { int x1 = std::max(0, bbox.x); int y1 = std::max(0, bbox.y); int w = std::min(frame.cols - x1, bbox.width); int h = std::min(frame.rows - y1, bbox.height); if (w < 5 || h < 5) continue; cv::Rect safeRect(x1, y1, w, h); cv::Mat cropped = frame(safeRect); std::vector results = RunInference(cropped, cameraId); // Adjust bounding boxes back to full image coordinates for (auto& obj : results) { obj.box.x += x1; obj.box.y += y1; allResults.push_back(std::move(obj)); } } lprResult = VectorDetectionToJsonString(allResults); return !allResults.empty(); } catch (...) { return false; } } void ANSALPR_OCR::SetCountry(Country country) { const Country previous = _country; _country = country; if (_ocrEngine) { _ocrEngine->SetCountry(country); } // Log every SetCountry call so runtime country switches are // visible and we can confirm the update landed on the right // handle. The recovery + rectification gates read _country on // every frame, so this change takes effect on the very next // RunInference / RunInferencesBatch call — no restart needed. ANS_DBG("ALPR_SetCountry", "country changed %d -> %d (Japan=%d, Vietnam=%d, " "China=%d, Australia=%d, USA=%d, Indonesia=%d) — " "rectification+recovery gates update on next frame", static_cast(previous), static_cast(country), static_cast(Country::JAPAN), static_cast(Country::VIETNAM), static_cast(Country::CHINA), static_cast(Country::AUSTRALIA), static_cast(Country::USA), static_cast(Country::INDONESIA)); } bool ANSALPR_OCR::Destroy() { try { if (_lpDetector) { _lpDetector->Destroy(); _lpDetector.reset(); } if (_lpColourDetector) { _lpColourDetector->Destroy(); _lpColourDetector.reset(); } if (_ocrEngine) { _ocrEngine->Destroy(); _ocrEngine.reset(); } _isInitialized = false; return true; } catch (std::exception& e) { this->_logger.LogFatal("ANSALPR_OCR::Destroy", e.what(), __FILE__, __LINE__); return false; } } } // namespace ANSCENTER