Files
ANSCORE/modules/ANSLPR/ANSLPR_OCR.cpp

1059 lines
39 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include "ANSLPR_OCR.h"
#include "ANSRTYOLO.h"
#include "ANSONNXYOLO.h"
#include "ANSOnnxOCR.h"
#include "ANSOCRBase.h"
#include <json.hpp>
#include <algorithm>
#include <chrono>
// ---------------------------------------------------------------------------
// SEH wrapper for loading ONNX models — identical to the one in ANSLPR_OD.cpp
// ---------------------------------------------------------------------------
static void WriteEventLog(const char* message, WORD eventType = EVENTLOG_INFORMATION_TYPE) {
static HANDLE hEventLog = RegisterEventSourceA(NULL, "ANSLogger");
if (hEventLog) {
const char* msgs[1] = { message };
ReportEventA(hEventLog, eventType, 0, 0, NULL, 1, 0, msgs, NULL);
}
OutputDebugStringA(message);
OutputDebugStringA("\n");
}
// ---------------------------------------------------------------------------
// SEH wrapper for loading ANSRTYOLO (TensorRT) models — used when NVIDIA GPU
// is detected. Falls back to ANSONNXYOLO if TRT fails.
// ---------------------------------------------------------------------------
struct LoadRtParams_OCR {
const std::string* licenseKey;
ANSCENTER::ModelConfig* config;
const std::string* modelFolder;
const char* modelName;
const char* classFile;
std::string* labels;
std::unique_ptr<ANSCENTER::ANSODBase>* detector;
bool enableTracker;
bool disableStabilization;
};
static bool LoadRtModel_OCR_Impl(const LoadRtParams_OCR& p) {
try {
auto rtyolo = std::make_unique<ANSCENTER::ANSRTYOLO>();
bool ok = rtyolo->LoadModelFromFolder(
*p.licenseKey, *p.config, p.modelName, p.classFile,
*p.modelFolder, *p.labels);
if (!ok) {
return false;
}
if (p.enableTracker) {
rtyolo->SetTracker(ANSCENTER::TrackerType::BYTETRACK, true);
} else {
rtyolo->SetTracker(ANSCENTER::TrackerType::BYTETRACK, false);
}
if (p.disableStabilization) {
rtyolo->SetStabilization(false);
}
*p.detector = std::move(rtyolo);
return true;
}
catch (...) {
p.detector->reset();
return false;
}
}
static bool LoadRtModel_OCR_SEH(const LoadRtParams_OCR& p, DWORD* outCode) {
*outCode = 0;
__try {
return LoadRtModel_OCR_Impl(p);
}
__except (EXCEPTION_EXECUTE_HANDLER) {
*outCode = GetExceptionCode();
return false;
}
}
struct LoadOnnxParams_OCR {
const std::string* licenseKey;
ANSCENTER::ModelConfig* config;
const std::string* modelFolder;
const char* modelName;
const char* classFile;
std::string* labels;
std::unique_ptr<ANSCENTER::ANSODBase>* detector;
bool enableTracker;
bool disableStabilization;
};
static bool LoadOnnxModel_OCR_Impl(const LoadOnnxParams_OCR& p) {
try {
auto onnxyolo = std::make_unique<ANSCENTER::ANSONNXYOLO>();
bool ok = onnxyolo->LoadModelFromFolder(
*p.licenseKey, *p.config, p.modelName, p.classFile,
*p.modelFolder, *p.labels);
if (!ok) {
return false;
}
if (p.enableTracker) {
onnxyolo->SetTracker(ANSCENTER::TrackerType::BYTETRACK, true);
} else {
onnxyolo->SetTracker(ANSCENTER::TrackerType::BYTETRACK, false);
}
if (p.disableStabilization) {
onnxyolo->SetStabilization(false);
}
*p.detector = std::move(onnxyolo);
return true;
}
catch (...) {
p.detector->reset();
return false;
}
}
static bool LoadOnnxModel_OCR_SEH(const LoadOnnxParams_OCR& p, DWORD* outCode) {
*outCode = 0;
__try {
return LoadOnnxModel_OCR_Impl(p);
}
__except (EXCEPTION_EXECUTE_HANDLER) {
*outCode = GetExceptionCode();
return false;
}
}
namespace ANSCENTER
{
ANSALPR_OCR::ANSALPR_OCR() {
engineType = EngineType::CPU;
}
ANSALPR_OCR::~ANSALPR_OCR() {
try {
Destroy();
}
catch (...) {}
}
bool ANSALPR_OCR::Initialize(const std::string& licenseKey, const std::string& modelZipFilePath,
const std::string& modelZipPassword, double detectorThreshold, double ocrThreshold, double colourThreshold) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
_licenseKey = licenseKey;
_licenseValid = false;
_detectorThreshold = detectorThreshold;
_ocrThreshold = ocrThreshold;
_colorThreshold = colourThreshold;
_country = Country::JAPAN; // Default to JAPAN for OCR-based ALPR
CheckLicense();
if (!_licenseValid) {
this->_logger.LogError("ANSALPR_OCR::Initialize", "License is not valid.", __FILE__, __LINE__);
return false;
}
// Extract model folder
if (!FileExist(modelZipFilePath)) {
this->_logger.LogFatal("ANSALPR_OCR::Initialize", "Model zip file does not exist: " + modelZipFilePath, __FILE__, __LINE__);
return false;
}
this->_logger.LogInfo("ANSALPR_OCR::Initialize", "Model zip file found: " + modelZipFilePath, __FILE__, __LINE__);
// Unzip model zip file
std::vector<std::string> passwordArray;
if (!modelZipPassword.empty()) passwordArray.push_back(modelZipPassword);
passwordArray.push_back("AnsDemoModels20@!");
passwordArray.push_back("Sh7O7nUe7vJ/417W0gWX+dSdfcP9hUqtf/fEqJGqxYL3PedvHubJag==");
passwordArray.push_back("3LHxGrjQ7kKDJBD9MX86H96mtKLJaZcTYXrYRdQgW8BKGt7enZHYMg==");
std::string modelName = GetFileNameWithoutExtension(modelZipFilePath);
for (size_t i = 0; i < passwordArray.size(); i++) {
if (ExtractPasswordProtectedZip(modelZipFilePath, passwordArray[i], modelName, _modelFolder, false))
break;
}
if (!FolderExist(_modelFolder)) {
this->_logger.LogError("ANSALPR_OCR::Initialize", "Output model folder does not exist: " + _modelFolder, __FILE__, __LINE__);
return false;
}
// Check country from country.txt
std::string countryFile = CreateFilePath(_modelFolder, "country.txt");
if (FileExist(countryFile)) {
std::ifstream infile(countryFile);
std::string countryStr;
std::getline(infile, countryStr);
infile.close();
if (countryStr == "0") _country = Country::VIETNAM;
else if (countryStr == "1") _country = Country::CHINA;
else if (countryStr == "2") _country = Country::AUSTRALIA;
else if (countryStr == "3") _country = Country::USA;
else if (countryStr == "4") _country = Country::INDONESIA;
else if (countryStr == "5") _country = Country::JAPAN;
else _country = Country::JAPAN; // Default for OCR mode
}
// Store the original model zip path — the OCR models (ansocrdec.onnx,
// ansocrcls.onnx, ansocrrec.onnx, dict_ch.txt) are bundled inside the
// same ALPR model zip, so we reuse it for ANSONNXOCR initialization.
_modelZipFilePath = modelZipFilePath;
// Initialize ALPRChecker
alprChecker.Init(MAX_ALPR_FRAME);
_lpColourModelConfig.detectionScoreThreshold = _colorThreshold;
_lpdmodelConfig.detectionScoreThreshold = _detectorThreshold;
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSALPR_OCR::Initialize", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSALPR_OCR::LoadEngine() {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
WriteEventLog("ANSALPR_OCR::LoadEngine: Step 1 - Starting engine load");
this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 1: Starting engine load", __FILE__, __LINE__);
// Detect hardware
_lpdmodelConfig.detectionScoreThreshold = _detectorThreshold;
_lpColourModelConfig.detectionScoreThreshold = _colorThreshold;
if (_lpdmodelConfig.detectionScoreThreshold < 0.25) _lpdmodelConfig.detectionScoreThreshold = 0.25;
if (_lpdmodelConfig.detectionScoreThreshold > 0.95) _lpdmodelConfig.detectionScoreThreshold = 0.95;
engineType = ANSLicenseHelper::CheckHardwareInformation();
this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Detected engine type: " + std::to_string(static_cast<int>(engineType)), __FILE__, __LINE__);
float confThreshold = 0.5f;
float MNSThreshold = 0.5f;
_lpdmodelConfig.modelConfThreshold = confThreshold;
_lpdmodelConfig.modelMNSThreshold = MNSThreshold;
_lpColourModelConfig.modelConfThreshold = confThreshold;
_lpColourModelConfig.modelMNSThreshold = MNSThreshold;
std::string lprModel = CreateFilePath(_modelFolder, "lpd.onnx");
std::string colorModel = CreateFilePath(_modelFolder, "lpc.onnx");
bool valid = false;
// ── Step 2: Load LP detector ─────────────────────────────────
if (FileExist(lprModel)) {
// Try TensorRT (ANSRTYOLO) when NVIDIA GPU is detected
if (engineType == EngineType::NVIDIA_GPU) {
WriteEventLog("ANSALPR_OCR::LoadEngine: Step 2 - Loading LP detector with TensorRT");
this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 2: Loading LP detector with TensorRT", __FILE__, __LINE__);
_lpdmodelConfig.detectionType = DetectionType::DETECTION;
_lpdmodelConfig.modelType = ModelType::RTYOLO;
std::string _lprClasses;
{
LoadRtParams_OCR p{};
p.licenseKey = &_licenseKey;
p.config = &_lpdmodelConfig;
p.modelFolder = &_modelFolder;
p.modelName = "lpd";
p.classFile = "lpd.names";
p.labels = &_lprClasses;
p.detector = &_lpDetector;
p.enableTracker = true;
p.disableStabilization = true;
DWORD sehCode = 0;
bool lpSuccess = LoadRtModel_OCR_SEH(p, &sehCode);
if (sehCode != 0) {
char buf[256];
snprintf(buf, sizeof(buf),
"ANSALPR_OCR::LoadEngine: Step 2 LPD TRT SEH exception 0x%08X — falling back to ONNX Runtime", sehCode);
WriteEventLog(buf, EVENTLOG_ERROR_TYPE);
this->_logger.LogError("ANSALPR_OCR::LoadEngine",
"Step 2: LP detector TensorRT crashed (SEH). Falling back to ONNX Runtime.", __FILE__, __LINE__);
if (_lpDetector) _lpDetector.reset();
}
else if (!lpSuccess) {
this->_logger.LogError("ANSALPR_OCR::LoadEngine",
"Failed to load LP detector (TensorRT). Falling back to ONNX Runtime.", __FILE__, __LINE__);
if (_lpDetector) _lpDetector.reset();
}
}
}
// Fallback to ONNX Runtime (ANSONNXYOLO) if TRT was not attempted or failed
if (!_lpDetector) {
WriteEventLog("ANSALPR_OCR::LoadEngine: Step 2 - Loading LP detector with ONNX Runtime");
this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 2: Loading LP detector with ONNX Runtime", __FILE__, __LINE__);
_lpdmodelConfig.detectionType = DetectionType::DETECTION;
_lpdmodelConfig.modelType = ModelType::ONNXYOLO;
std::string _lprClasses;
{
LoadOnnxParams_OCR p{};
p.licenseKey = &_licenseKey;
p.config = &_lpdmodelConfig;
p.modelFolder = &_modelFolder;
p.modelName = "lpd";
p.classFile = "lpd.names";
p.labels = &_lprClasses;
p.detector = &_lpDetector;
p.enableTracker = true;
p.disableStabilization = true;
DWORD sehCode = 0;
bool lpSuccess = LoadOnnxModel_OCR_SEH(p, &sehCode);
if (sehCode != 0) {
char buf[256];
snprintf(buf, sizeof(buf),
"ANSALPR_OCR::LoadEngine: Step 2 LPD SEH exception 0x%08X — LP detector disabled", sehCode);
WriteEventLog(buf, EVENTLOG_ERROR_TYPE);
this->_logger.LogFatal("ANSALPR_OCR::LoadEngine",
"Step 2: LP detector crashed (SEH). LP detector disabled.", __FILE__, __LINE__);
if (_lpDetector) _lpDetector.reset();
}
else if (!lpSuccess) {
this->_logger.LogError("ANSALPR_OCR::LoadEngine",
"Failed to load LP detector (ONNX Runtime).", __FILE__, __LINE__);
if (_lpDetector) _lpDetector.reset();
}
}
}
}
if (!_lpDetector) {
this->_logger.LogFatal("ANSALPR_OCR::LoadEngine", "LP detector failed to load. Cannot proceed.", __FILE__, __LINE__);
_isInitialized = false;
return false;
}
// ── Step 3: Load OCR engine (ANSONNXOCR) ─────────────────────
// The OCR models (ansocrdec.onnx, ansocrcls.onnx, ansocrrec.onnx,
// dict_ch.txt) are bundled inside the same ALPR model zip, so we
// pass the original ALPR zip path to ANSONNXOCR::Initialize.
// ANSOCRBase::Initialize will extract it (no-op if already done)
// and discover the OCR model files in the extracted folder.
WriteEventLog("ANSALPR_OCR::LoadEngine: Step 3 - Loading OCR engine (ANSONNXOCR)");
this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 3: Loading OCR engine (ANSONNXOCR)", __FILE__, __LINE__);
// Verify OCR model files exist in the already-extracted folder
std::string ocrDetModel = CreateFilePath(_modelFolder, "ansocrdec.onnx");
std::string ocrRecModel = CreateFilePath(_modelFolder, "ansocrrec.onnx");
if (!FileExist(ocrDetModel) || !FileExist(ocrRecModel)) {
this->_logger.LogFatal("ANSALPR_OCR::LoadEngine",
"OCR model files not found in model folder: " + _modelFolder +
" (expected ansocrdec.onnx, ansocrrec.onnx)", __FILE__, __LINE__);
_isInitialized = false;
return false;
}
_ocrEngine = std::make_unique<ANSONNXOCR>();
// Determine OCR language based on country
OCRLanguage ocrLang = OCRLanguage::ENGLISH;
switch (_country) {
case Country::JAPAN: ocrLang = OCRLanguage::JAPANESE; break;
case Country::CHINA: ocrLang = OCRLanguage::CHINESE; break;
case Country::VIETNAM: ocrLang = OCRLanguage::ENGLISH; break;
case Country::AUSTRALIA: ocrLang = OCRLanguage::ENGLISH; break;
case Country::USA: ocrLang = OCRLanguage::ENGLISH; break;
case Country::INDONESIA: ocrLang = OCRLanguage::ENGLISH; break;
default: ocrLang = OCRLanguage::ENGLISH; break;
}
OCRModelConfig ocrModelConfig;
ocrModelConfig.ocrLanguage = ocrLang;
ocrModelConfig.useDetector = true;
ocrModelConfig.useRecognizer = true;
// Skip the angle classifier for ALPR. License-plate boxes
// from the YOLO detector are already axis-aligned, so the
// 180° classifier is dead weight (one extra ORT call per
// plate for no recall gain).
ocrModelConfig.useCLS = false;
ocrModelConfig.useLayout = false;
ocrModelConfig.useTable = false;
ocrModelConfig.useTensorRT = true;
ocrModelConfig.enableMKLDNN = false;
ocrModelConfig.useDilation = true;
ocrModelConfig.useAngleCLS = false;
ocrModelConfig.gpuId = 0;
ocrModelConfig.detectionDBThreshold = 0.5;
ocrModelConfig.detectionBoxThreshold = 0.3;
ocrModelConfig.detectionDBUnclipRatio = 1.2;
ocrModelConfig.clsThreshold = 0.9;
ocrModelConfig.limitSideLen = 480;
// Pass the original ALPR model zip path — ANSOCRBase::Initialize
// will extract it to the same folder (already done, so extraction
// is a no-op) and set up ansocrdec.onnx / ansocrcls.onnx /
// ansocrrec.onnx / dict_ch.txt paths automatically.
bool ocrSuccess = _ocrEngine->Initialize(_licenseKey, ocrModelConfig, _modelZipFilePath, "", 0);
if (!ocrSuccess) {
this->_logger.LogFatal("ANSALPR_OCR::LoadEngine", "Failed to initialize OCR engine (ANSONNXOCR).", __FILE__, __LINE__);
_ocrEngine.reset();
_isInitialized = false;
return false;
}
// Set ALPR mode and country on the OCR engine
_ocrEngine->SetOCRMode(OCRMode::OCR_ALPR);
_ocrEngine->SetCountry(_country);
this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 3: OCR engine loaded successfully.", __FILE__, __LINE__);
// ── Step 4: Load colour classifier (optional) ────────────────
if (FileExist(colorModel) && (_lpColourModelConfig.detectionScoreThreshold > 0)) {
// Try TensorRT (ANSRTYOLO) when NVIDIA GPU is detected
if (engineType == EngineType::NVIDIA_GPU) {
WriteEventLog("ANSALPR_OCR::LoadEngine: Step 4 - Loading colour classifier with TensorRT");
this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 4: Loading colour classifier with TensorRT", __FILE__, __LINE__);
_lpColourModelConfig.detectionType = DetectionType::CLASSIFICATION;
_lpColourModelConfig.modelType = ModelType::RTYOLO;
{
LoadRtParams_OCR p{};
p.licenseKey = &_licenseKey;
p.config = &_lpColourModelConfig;
p.modelFolder = &_modelFolder;
p.modelName = "lpc";
p.classFile = "lpc.names";
p.labels = &_lpColourLabels;
p.detector = &_lpColourDetector;
p.enableTracker = false;
p.disableStabilization = false;
DWORD sehCode = 0;
bool colourSuccess = LoadRtModel_OCR_SEH(p, &sehCode);
if (sehCode != 0) {
char buf[256];
snprintf(buf, sizeof(buf),
"ANSALPR_OCR::LoadEngine: Step 4 LPC TRT SEH exception 0x%08X — falling back to ONNX Runtime", sehCode);
WriteEventLog(buf, EVENTLOG_ERROR_TYPE);
this->_logger.LogError("ANSALPR_OCR::LoadEngine",
"Step 4: Colour classifier TensorRT crashed (SEH). Falling back to ONNX Runtime.", __FILE__, __LINE__);
if (_lpColourDetector) _lpColourDetector.reset();
}
else if (!colourSuccess) {
this->_logger.LogError("ANSALPR_OCR::LoadEngine",
"Failed to load colour classifier (TensorRT). Falling back to ONNX Runtime.", __FILE__, __LINE__);
if (_lpColourDetector) _lpColourDetector.reset();
}
}
}
// Fallback to ONNX Runtime (ANSONNXYOLO) if TRT was not attempted or failed
if (!_lpColourDetector) {
WriteEventLog("ANSALPR_OCR::LoadEngine: Step 4 - Loading colour classifier with ONNX Runtime");
this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 4: Loading colour classifier with ONNX Runtime", __FILE__, __LINE__);
_lpColourModelConfig.detectionType = DetectionType::CLASSIFICATION;
_lpColourModelConfig.modelType = ModelType::ONNXYOLO;
{
LoadOnnxParams_OCR p{};
p.licenseKey = &_licenseKey;
p.config = &_lpColourModelConfig;
p.modelFolder = &_modelFolder;
p.modelName = "lpc";
p.classFile = "lpc.names";
p.labels = &_lpColourLabels;
p.detector = &_lpColourDetector;
p.enableTracker = false;
p.disableStabilization = false;
DWORD sehCode = 0;
bool colourSuccess = LoadOnnxModel_OCR_SEH(p, &sehCode);
if (sehCode != 0) {
char buf[256];
snprintf(buf, sizeof(buf),
"ANSALPR_OCR::LoadEngine: Step 4 LPC SEH exception 0x%08X — colour detection disabled", sehCode);
WriteEventLog(buf, EVENTLOG_ERROR_TYPE);
this->_logger.LogError("ANSALPR_OCR::LoadEngine",
"Step 4: Colour classifier crashed. Colour detection disabled.", __FILE__, __LINE__);
if (_lpColourDetector) _lpColourDetector.reset();
}
else if (!colourSuccess) {
this->_logger.LogError("ANSALPR_OCR::LoadEngine",
"Failed to load colour detector (ONNX Runtime). Colour detection disabled.", __FILE__, __LINE__);
if (_lpColourDetector) _lpColourDetector.reset();
}
}
}
}
valid = true;
_isInitialized = valid;
WriteEventLog(("ANSALPR_OCR::LoadEngine: Step 5 - Engine load complete. Valid = " + std::to_string(valid)).c_str());
this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 5: Engine load complete. Valid = " + std::to_string(valid), __FILE__, __LINE__);
return valid;
}
catch (std::exception& e) {
WriteEventLog(("ANSALPR_OCR::LoadEngine: C++ exception: " + std::string(e.what())).c_str(), EVENTLOG_ERROR_TYPE);
this->_logger.LogFatal("ANSALPR_OCR::LoadEngine", std::string("C++ exception: ") + e.what(), __FILE__, __LINE__);
_isInitialized = false;
return false;
}
catch (...) {
WriteEventLog("ANSALPR_OCR::LoadEngine: Unknown exception", EVENTLOG_ERROR_TYPE);
this->_logger.LogFatal("ANSALPR_OCR::LoadEngine", "Unknown exception", __FILE__, __LINE__);
_isInitialized = false;
return false;
}
}
// ── Colour detection (same pattern as ANSALPR_OD) ────────────────────
std::string ANSALPR_OCR::DetectLPColourDetector(const cv::Mat& lprROI, const std::string& cameraId) {
if (_lpColourModelConfig.detectionScoreThreshold <= 0.0f) return {};
if (!_lpColourDetector) return {};
if (lprROI.empty()) return {};
try {
std::vector<Object> colourOutputs = _lpColourDetector->RunInference(lprROI, cameraId);
if (colourOutputs.empty()) return {};
const auto& bestDetection = *std::max_element(
colourOutputs.begin(), colourOutputs.end(),
[](const Object& a, const Object& b) { return a.confidence < b.confidence; }
);
return bestDetection.className;
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSALPR_OCR::DetectLPColourDetector", e.what(), __FILE__, __LINE__);
return {};
}
}
std::string ANSALPR_OCR::DetectLPColourCached(const cv::Mat& lprROI, const std::string& cameraId, const std::string& plateText) {
if (plateText.empty()) {
return DetectLPColourDetector(lprROI, cameraId);
}
// Check cache first
{
std::lock_guard<std::mutex> cacheLock(_colourCacheMutex);
auto it = _colourCache.find(plateText);
if (it != _colourCache.end()) {
it->second.hitCount++;
return it->second.colour;
}
}
// Cache miss — run classifier
std::string colour = DetectLPColourDetector(lprROI, cameraId);
if (!colour.empty()) {
std::lock_guard<std::mutex> cacheLock(_colourCacheMutex);
if (_colourCache.size() >= COLOUR_CACHE_MAX_SIZE) {
_colourCache.clear();
}
_colourCache[plateText] = { colour, 0 };
}
return colour;
}
// ── Full-frame vs pipeline auto-detection ────────────────────────────
// Mirror of ANSALPR_OD::shouldUseALPRChecker. The auto-detection logic
// watches whether consecutive frames from a given camera have the exact
// same (width, height). Pre-cropped pipeline inputs vary by a few
// pixels per crop, so the exact-match check fails and we return false.
// Real video frames are pixel-identical across frames, so after a few
// consistent frames we flip into FULL-FRAME mode and start running the
// ALPRChecker voting + ensureUniquePlateText dedup.
bool ANSALPR_OCR::shouldUseALPRChecker(const cv::Size& imageSize,
const std::string& cameraId) {
// Force disabled via SetALPRCheckerEnabled(false) → never use.
if (!_enableALPRChecker) return false;
// Small images are always pipeline crops — skip auto-detection.
if (imageSize.width < ImageSizeTracker::MIN_FULLFRAME_WIDTH) return false;
auto& tracker = _imageSizeTrackers[cameraId];
bool wasFullFrame = tracker.detectedFullFrame;
if (imageSize == tracker.lastSize) {
tracker.consistentCount++;
if (tracker.consistentCount >= ImageSizeTracker::CONFIRM_THRESHOLD) {
tracker.detectedFullFrame = true;
}
} else {
tracker.lastSize = imageSize;
tracker.consistentCount = 1;
tracker.detectedFullFrame = false;
}
if (tracker.detectedFullFrame != wasFullFrame) {
ANS_DBG("ALPR_OCR_Checker",
"cam=%s mode auto-detected: %s (img=%dx%d consistent=%d)",
cameraId.c_str(),
tracker.detectedFullFrame ? "FULL-FRAME (tracker ON)" : "PIPELINE (tracker OFF)",
imageSize.width, imageSize.height, tracker.consistentCount);
}
return tracker.detectedFullFrame;
}
// ── Spatial plate dedup with accumulated scoring ─────────────────────
// Mirror of ANSALPR_OD::ensureUniquePlateText. When more than one
// detection in the same frame ends up with the same plate text (e.g.
// tracker occlusion or two cars in a single frame reading the same
// string), we resolve the ambiguity by accumulating confidence per
// spatial location across frames. The location with the higher running
// score keeps the plate text; the loser has its className cleared and
// is dropped from the output.
void ANSALPR_OCR::ensureUniquePlateText(std::vector<Object>& results,
const std::string& cameraId) {
std::lock_guard<std::mutex> plateLock(_plateIdentitiesMutex);
auto& identities = _plateIdentities[cameraId];
// Auto-detect mode by detection count.
// 1 detection → pipeline/single-crop mode → no dedup needed.
// 2+ detections → full-frame mode → apply accumulated scoring.
if (results.size() <= 1) {
// Still age out stale spatial identities from previous full-frame calls
if (!identities.empty()) {
constexpr int MAX_UNSEEN_FRAMES = 30;
for (auto& id : identities) id.framesSinceLastSeen++;
for (auto it = identities.begin(); it != identities.end(); ) {
if (it->framesSinceLastSeen > MAX_UNSEEN_FRAMES) {
it = identities.erase(it);
} else {
++it;
}
}
}
return;
}
// Helper: IoU between two rects.
auto computeIoU = [](const cv::Rect& a, const cv::Rect& b) -> float {
int x1 = std::max(a.x, b.x);
int y1 = std::max(a.y, b.y);
int x2 = std::min(a.x + a.width, b.x + b.width);
int y2 = std::min(a.y + a.height, b.y + b.height);
if (x2 <= x1 || y2 <= y1) return 0.0f;
float intersection = static_cast<float>((x2 - x1) * (y2 - y1));
float unionArea = static_cast<float>(a.area() + b.area()) - intersection;
return (unionArea > 0.0f) ? intersection / unionArea : 0.0f;
};
// Helper: find matching spatial identity by bounding-box overlap.
auto findSpatialMatch = [&](const cv::Rect& box,
const std::string& plateText) -> SpatialPlateIdentity* {
for (auto& id : identities) {
if (id.plateText == plateText) {
cv::Rect storedRect(
static_cast<int>(id.center.x - box.width * 0.5f),
static_cast<int>(id.center.y - box.height * 0.5f),
box.width, box.height);
if (computeIoU(box, storedRect) > PLATE_SPATIAL_MATCH_THRESHOLD) {
return &id;
}
}
}
return nullptr;
};
// Step 1: Build map of plateText → candidate indices
std::unordered_map<std::string, std::vector<size_t>> plateCandidates;
for (size_t i = 0; i < results.size(); ++i) {
if (results[i].className.empty()) continue;
plateCandidates[results[i].className].push_back(i);
}
// Step 2: Resolve duplicates using spatial accumulated scores
for (auto& [plateText, indices] : plateCandidates) {
if (indices.size() <= 1) continue;
size_t winner = indices[0];
float bestScore = 0.0f;
for (size_t idx : indices) {
float score = results[idx].confidence;
auto* match = findSpatialMatch(results[idx].box, plateText);
if (match) {
score = match->accumulatedScore + results[idx].confidence;
}
if (score > bestScore) {
bestScore = score;
winner = idx;
}
}
for (size_t idx : indices) {
if (idx != winner) {
results[idx].className.clear();
}
}
}
// Step 3: Update spatial identities — winners accumulate, losers decay
constexpr float DECAY_FACTOR = 0.8f;
constexpr float MIN_SCORE = 0.1f;
constexpr int MAX_UNSEEN_FRAMES = 30;
for (auto& id : identities) id.framesSinceLastSeen++;
for (auto& r : results) {
if (r.className.empty()) continue;
cv::Point2f center(
r.box.x + r.box.width * 0.5f,
r.box.y + r.box.height * 0.5f);
auto* match = findSpatialMatch(r.box, r.className);
if (match) {
match->accumulatedScore += r.confidence;
match->center = center;
match->framesSinceLastSeen = 0;
} else {
identities.push_back({ center, r.className, r.confidence, 0 });
}
}
// Decay unseen identities and remove stale ones
for (auto it = identities.begin(); it != identities.end(); ) {
if (it->framesSinceLastSeen > 0) {
it->accumulatedScore *= DECAY_FACTOR;
}
if (it->accumulatedScore < MIN_SCORE || it->framesSinceLastSeen > MAX_UNSEEN_FRAMES) {
it = identities.erase(it);
} else {
++it;
}
}
// Step 4: Remove entries with cleared plate text
results.erase(
std::remove_if(results.begin(), results.end(),
[](const Object& o) { return o.className.empty(); }),
results.end());
}
// ── OCR on a single plate ROI ────────────────────────────────────────
// Returns the plate text via the out-parameter and populates alprExtraInfo
// with the structured ALPR JSON (zone parts) when ALPR mode is active.
std::string ANSALPR_OCR::RunOCROnPlate(const cv::Mat& plateROI, const std::string& cameraId) {
if (!_ocrEngine || plateROI.empty()) return "";
if (plateROI.cols < 10 || plateROI.rows < 10) return "";
try {
// Run the full ANSONNXOCR pipeline on the cropped plate image
std::vector<OCRObject> ocrResults = _ocrEngine->RunInference(plateROI, cameraId);
if (ocrResults.empty()) return "";
// If ALPR mode is active and we have plate formats, use the
// structured ALPR post-processing to get correct zone ordering
// (e.g. "品川 302 ま 93-15" instead of "品川30293-15ま")
const auto& alprFormats = _ocrEngine->GetALPRFormats();
if (_ocrEngine->GetOCRMode() == OCRMode::OCR_ALPR && !alprFormats.empty()) {
auto alprResults = ANSOCRUtility::ALPRPostProcessing(
ocrResults, alprFormats,
plateROI.cols, plateROI.rows,
_ocrEngine.get(), plateROI);
if (!alprResults.empty()) {
return alprResults[0].fullPlateText;
}
}
// Fallback: simple concatenation sorted by Y then X
std::sort(ocrResults.begin(), ocrResults.end(),
[](const OCRObject& a, const OCRObject& b) {
int rowThreshold = std::min(a.box.height, b.box.height) / 2;
if (std::abs(a.box.y - b.box.y) > rowThreshold) {
return a.box.y < b.box.y;
}
return a.box.x < b.box.x;
}
);
std::string fullText;
for (const auto& obj : ocrResults) {
if (!obj.className.empty()) {
fullText += obj.className;
}
}
return fullText;
}
catch (const std::exception& e) {
this->_logger.LogError("ANSALPR_OCR::RunOCROnPlate", e.what(), __FILE__, __LINE__);
return "";
}
}
// ── Main inference pipeline ──────────────────────────────────────────
std::vector<Object> ANSALPR_OCR::RunInference(const cv::Mat& input, const std::string& cameraId) {
if (!_licenseValid) {
this->_logger.LogError("ANSALPR_OCR::RunInference", "Invalid license", __FILE__, __LINE__);
return {};
}
if (!_isInitialized) {
this->_logger.LogError("ANSALPR_OCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
return {};
}
if (input.empty() || input.cols < 5 || input.rows < 5) {
this->_logger.LogError("ANSALPR_OCR::RunInference", "Input image is empty or too small", __FILE__, __LINE__);
return {};
}
if (!_lpDetector) {
this->_logger.LogFatal("ANSALPR_OCR::RunInference", "_lpDetector is null", __FILE__, __LINE__);
return {};
}
if (!_ocrEngine) {
this->_logger.LogFatal("ANSALPR_OCR::RunInference", "_ocrEngine is null", __FILE__, __LINE__);
return {};
}
try {
// Convert grayscale to BGR if necessary
cv::Mat localFrame;
if (input.channels() == 1) {
cv::cvtColor(input, localFrame, cv::COLOR_GRAY2BGR);
}
const cv::Mat& frame = (input.channels() == 1) ? localFrame : input;
const int frameWidth = frame.cols;
const int frameHeight = frame.rows;
// Step 1: Detect license plates
std::vector<Object> lprOutput = _lpDetector->RunInference(frame, cameraId);
if (lprOutput.empty()) {
return {};
}
// Step 2: Collect crops from every valid plate. Wide plates
// (aspect >= 2.0) are treated as a single text line; narrow
// plates (2-row layouts like Japanese) are split horizontally
// at H/2 into top and bottom rows. All crops go through a
// single batched recognizer call, bypassing the OCR text-line
// detector entirely — for ALPR the LP YOLO box already bounds
// the text region precisely.
struct PlateInfo {
size_t origIndex; // into lprOutput
std::vector<size_t> cropIndices; // into allCrops
cv::Mat plateROI; // full (unsplit) ROI, kept for colour
};
std::vector<cv::Mat> allCrops;
std::vector<PlateInfo> plateInfos;
allCrops.reserve(lprOutput.size() * 2);
plateInfos.reserve(lprOutput.size());
for (size_t i = 0; i < lprOutput.size(); ++i) {
const cv::Rect& box = lprOutput[i].box;
// Calculate safe cropped region
const int x1 = std::max(0, box.x);
const int y1 = std::max(0, box.y);
const int width = std::min(frameWidth - x1, box.width);
const int height = std::min(frameHeight - y1, box.height);
if (width <= 0 || height <= 0) continue;
cv::Mat plateROI = frame(cv::Rect(x1, y1, width, height));
PlateInfo info;
info.origIndex = i;
info.plateROI = plateROI;
const float aspect = static_cast<float>(width) /
std::max(1, height);
// 2-row heuristic: aspect < 2.0 → split top/bottom.
// Threshold tuned to catch Japanese square plates
// (~1.51.9) while leaving wide EU/VN plates (3.0+)
// untouched.
if (aspect < 2.0f && height >= 24) {
const int halfH = height / 2;
info.cropIndices.push_back(allCrops.size());
allCrops.push_back(plateROI(cv::Rect(0, 0, width, halfH)));
info.cropIndices.push_back(allCrops.size());
allCrops.push_back(plateROI(cv::Rect(0, halfH, width, height - halfH)));
}
else {
info.cropIndices.push_back(allCrops.size());
allCrops.push_back(plateROI);
}
plateInfos.push_back(std::move(info));
}
if (allCrops.empty()) {
return {};
}
// Step 3: Single batched recognizer call for every crop.
// ONNXOCRRecognizer groups crops by bucket width and issues
// one ORT Run per bucket — typically 12 GPU calls for an
// entire frame regardless of plate count.
auto ocrResults = _ocrEngine->RecognizeTextBatch(allCrops);
// Step 4: Assemble per-plate output
std::vector<Object> output;
output.reserve(plateInfos.size());
// Decide once per frame whether the tracker-based correction
// layer should run. We auto-detect full-frame vs pipeline mode
// by watching for pixel-identical consecutive frames, exactly
// the same way ANSALPR_OD does it.
const bool useChecker = shouldUseALPRChecker(
cv::Size(frameWidth, frameHeight), cameraId);
for (const auto& info : plateInfos) {
std::string combinedText;
for (size_t cropIdx : info.cropIndices) {
if (cropIdx >= ocrResults.size()) continue;
const std::string& lineText = ocrResults[cropIdx].first;
if (lineText.empty()) continue;
if (!combinedText.empty()) combinedText += " ";
combinedText += lineText;
}
if (combinedText.empty()) continue;
Object lprObject = lprOutput[info.origIndex];
lprObject.cameraId = cameraId;
// Cross-frame stabilization: per-track majority vote in
// full-frame mode, raw OCR text in pipeline mode.
if (useChecker) {
lprObject.className = alprChecker.checkPlateByTrackId(
cameraId, combinedText, lprObject.trackId);
}
else {
lprObject.className = combinedText;
}
if (lprObject.className.empty()) continue;
// Optional colour detection on the full plate ROI
std::string colour = DetectLPColourCached(
info.plateROI, cameraId, lprObject.className);
if (!colour.empty()) {
lprObject.extraInfo = "color:" + colour;
}
output.push_back(std::move(lprObject));
}
// Spatial dedup: if two detections in the same frame ended up
// with the same plate text, keep only the one whose spatial
// history has the higher accumulated confidence. Skip this in
// pipeline mode because there's only ever one plate per call.
if (useChecker) {
ensureUniquePlateText(output, cameraId);
}
return output;
}
catch (const cv::Exception& e) {
this->_logger.LogFatal("ANSALPR_OCR::RunInference", std::string("OpenCV Exception: ") + e.what(), __FILE__, __LINE__);
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSALPR_OCR::RunInference", e.what(), __FILE__, __LINE__);
}
catch (...) {
this->_logger.LogFatal("ANSALPR_OCR::RunInference", "Unknown exception occurred", __FILE__, __LINE__);
}
return {};
}
// ── Inference wrappers ───────────────────────────────────────────────
bool ANSALPR_OCR::Inference(const cv::Mat& input, std::string& lprResult) {
if (input.empty()) return false;
if (input.cols < 5 || input.rows < 5) return false;
return Inference(input, lprResult, "CustomCam");
}
bool ANSALPR_OCR::Inference(const cv::Mat& input, std::string& lprResult, const std::string& cameraId) {
if (input.empty()) return false;
if (input.cols < 5 || input.rows < 5) return false;
try {
std::vector<Object> results = RunInference(input, cameraId);
lprResult = VectorDetectionToJsonString(results);
return !results.empty();
}
catch (...) {
return false;
}
}
bool ANSALPR_OCR::Inference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, std::string& lprResult) {
return Inference(input, Bbox, lprResult, "CustomCam");
}
bool ANSALPR_OCR::Inference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, std::string& lprResult, const std::string& cameraId) {
if (input.empty()) return false;
if (input.cols < 5 || input.rows < 5) return false;
try {
if (Bbox.empty()) {
return Inference(input, lprResult, cameraId);
}
// For cropped images, run OCR on each bounding box
std::vector<Object> allResults;
cv::Mat frame;
if (input.channels() == 1) {
cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR);
} else {
frame = input;
}
for (const auto& bbox : Bbox) {
int x1 = std::max(0, bbox.x);
int y1 = std::max(0, bbox.y);
int w = std::min(frame.cols - x1, bbox.width);
int h = std::min(frame.rows - y1, bbox.height);
if (w < 5 || h < 5) continue;
cv::Rect safeRect(x1, y1, w, h);
cv::Mat cropped = frame(safeRect);
std::vector<Object> results = RunInference(cropped, cameraId);
// Adjust bounding boxes back to full image coordinates
for (auto& obj : results) {
obj.box.x += x1;
obj.box.y += y1;
allResults.push_back(std::move(obj));
}
}
lprResult = VectorDetectionToJsonString(allResults);
return !allResults.empty();
}
catch (...) {
return false;
}
}
void ANSALPR_OCR::SetCountry(Country country) {
_country = country;
if (_ocrEngine) {
_ocrEngine->SetCountry(country);
}
}
bool ANSALPR_OCR::Destroy() {
try {
if (_lpDetector) {
_lpDetector->Destroy();
_lpDetector.reset();
}
if (_lpColourDetector) {
_lpColourDetector->Destroy();
_lpColourDetector.reset();
}
if (_ocrEngine) {
_ocrEngine->Destroy();
_ocrEngine.reset();
}
_isInitialized = false;
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSALPR_OCR::Destroy", e.what(), __FILE__, __LINE__);
return false;
}
}
} // namespace ANSCENTER