1884 lines
72 KiB
C++
1884 lines
72 KiB
C++
#include "ANSLPR_OCR.h"
|
||
#include "ANSRTYOLO.h"
|
||
#include "ANSONNXYOLO.h"
|
||
#include "ANSOnnxOCR.h"
|
||
#include "ANSOCRBase.h"
|
||
|
||
#include <json.hpp>
|
||
#include <algorithm>
|
||
#include <chrono>
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// SEH wrapper for loading ONNX models — identical to the one in ANSLPR_OD.cpp
|
||
// ---------------------------------------------------------------------------
|
||
static void WriteEventLog(const char* message, WORD eventType = EVENTLOG_INFORMATION_TYPE) {
|
||
static HANDLE hEventLog = RegisterEventSourceA(NULL, "ANSLogger");
|
||
if (hEventLog) {
|
||
const char* msgs[1] = { message };
|
||
ReportEventA(hEventLog, eventType, 0, 0, NULL, 1, 0, msgs, NULL);
|
||
}
|
||
OutputDebugStringA(message);
|
||
OutputDebugStringA("\n");
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// SEH wrapper for loading ANSRTYOLO (TensorRT) models — used when NVIDIA GPU
|
||
// is detected. Falls back to ANSONNXYOLO if TRT fails.
|
||
// ---------------------------------------------------------------------------
|
||
struct LoadRtParams_OCR {
|
||
const std::string* licenseKey;
|
||
ANSCENTER::ModelConfig* config;
|
||
const std::string* modelFolder;
|
||
const char* modelName;
|
||
const char* classFile;
|
||
std::string* labels;
|
||
std::unique_ptr<ANSCENTER::ANSODBase>* detector;
|
||
bool enableTracker;
|
||
bool disableStabilization;
|
||
};
|
||
|
||
static bool LoadRtModel_OCR_Impl(const LoadRtParams_OCR& p) {
|
||
try {
|
||
auto rtyolo = std::make_unique<ANSCENTER::ANSRTYOLO>();
|
||
bool ok = rtyolo->LoadModelFromFolder(
|
||
*p.licenseKey, *p.config, p.modelName, p.classFile,
|
||
*p.modelFolder, *p.labels);
|
||
if (!ok) {
|
||
return false;
|
||
}
|
||
if (p.enableTracker) {
|
||
rtyolo->SetTracker(ANSCENTER::TrackerType::BYTETRACK, true);
|
||
} else {
|
||
rtyolo->SetTracker(ANSCENTER::TrackerType::BYTETRACK, false);
|
||
}
|
||
if (p.disableStabilization) {
|
||
rtyolo->SetStabilization(false);
|
||
}
|
||
*p.detector = std::move(rtyolo);
|
||
return true;
|
||
}
|
||
catch (...) {
|
||
p.detector->reset();
|
||
return false;
|
||
}
|
||
}
|
||
|
||
static bool LoadRtModel_OCR_SEH(const LoadRtParams_OCR& p, DWORD* outCode) {
|
||
*outCode = 0;
|
||
__try {
|
||
return LoadRtModel_OCR_Impl(p);
|
||
}
|
||
__except (EXCEPTION_EXECUTE_HANDLER) {
|
||
*outCode = GetExceptionCode();
|
||
return false;
|
||
}
|
||
}
|
||
|
||
struct LoadOnnxParams_OCR {
|
||
const std::string* licenseKey;
|
||
ANSCENTER::ModelConfig* config;
|
||
const std::string* modelFolder;
|
||
const char* modelName;
|
||
const char* classFile;
|
||
std::string* labels;
|
||
std::unique_ptr<ANSCENTER::ANSODBase>* detector;
|
||
bool enableTracker;
|
||
bool disableStabilization;
|
||
};
|
||
|
||
static bool LoadOnnxModel_OCR_Impl(const LoadOnnxParams_OCR& p) {
|
||
try {
|
||
auto onnxyolo = std::make_unique<ANSCENTER::ANSONNXYOLO>();
|
||
bool ok = onnxyolo->LoadModelFromFolder(
|
||
*p.licenseKey, *p.config, p.modelName, p.classFile,
|
||
*p.modelFolder, *p.labels);
|
||
if (!ok) {
|
||
return false;
|
||
}
|
||
if (p.enableTracker) {
|
||
onnxyolo->SetTracker(ANSCENTER::TrackerType::BYTETRACK, true);
|
||
} else {
|
||
onnxyolo->SetTracker(ANSCENTER::TrackerType::BYTETRACK, false);
|
||
}
|
||
if (p.disableStabilization) {
|
||
onnxyolo->SetStabilization(false);
|
||
}
|
||
*p.detector = std::move(onnxyolo);
|
||
return true;
|
||
}
|
||
catch (...) {
|
||
p.detector->reset();
|
||
return false;
|
||
}
|
||
}
|
||
|
||
static bool LoadOnnxModel_OCR_SEH(const LoadOnnxParams_OCR& p, DWORD* outCode) {
|
||
*outCode = 0;
|
||
__try {
|
||
return LoadOnnxModel_OCR_Impl(p);
|
||
}
|
||
__except (EXCEPTION_EXECUTE_HANDLER) {
|
||
*outCode = GetExceptionCode();
|
||
return false;
|
||
}
|
||
}
|
||
|
||
namespace ANSCENTER
|
||
{
|
||
ANSALPR_OCR::ANSALPR_OCR() {
|
||
engineType = EngineType::CPU;
|
||
}
|
||
|
||
ANSALPR_OCR::~ANSALPR_OCR() {
|
||
try {
|
||
Destroy();
|
||
}
|
||
catch (...) {}
|
||
}
|
||
|
||
bool ANSALPR_OCR::Initialize(const std::string& licenseKey, const std::string& modelZipFilePath,
|
||
const std::string& modelZipPassword, double detectorThreshold, double ocrThreshold, double colourThreshold) {
|
||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||
try {
|
||
_licenseKey = licenseKey;
|
||
_licenseValid = false;
|
||
_detectorThreshold = detectorThreshold;
|
||
_ocrThreshold = ocrThreshold;
|
||
_colorThreshold = colourThreshold;
|
||
_country = Country::JAPAN; // Default to JAPAN for OCR-based ALPR
|
||
CheckLicense();
|
||
if (!_licenseValid) {
|
||
this->_logger.LogError("ANSALPR_OCR::Initialize", "License is not valid.", __FILE__, __LINE__);
|
||
return false;
|
||
}
|
||
|
||
// Extract model folder
|
||
if (!FileExist(modelZipFilePath)) {
|
||
this->_logger.LogFatal("ANSALPR_OCR::Initialize", "Model zip file does not exist: " + modelZipFilePath, __FILE__, __LINE__);
|
||
return false;
|
||
}
|
||
this->_logger.LogInfo("ANSALPR_OCR::Initialize", "Model zip file found: " + modelZipFilePath, __FILE__, __LINE__);
|
||
|
||
// Unzip model zip file
|
||
std::vector<std::string> passwordArray;
|
||
if (!modelZipPassword.empty()) passwordArray.push_back(modelZipPassword);
|
||
passwordArray.push_back("AnsDemoModels20@!");
|
||
passwordArray.push_back("Sh7O7nUe7vJ/417W0gWX+dSdfcP9hUqtf/fEqJGqxYL3PedvHubJag==");
|
||
passwordArray.push_back("3LHxGrjQ7kKDJBD9MX86H96mtKLJaZcTYXrYRdQgW8BKGt7enZHYMg==");
|
||
std::string modelName = GetFileNameWithoutExtension(modelZipFilePath);
|
||
|
||
for (size_t i = 0; i < passwordArray.size(); i++) {
|
||
if (ExtractPasswordProtectedZip(modelZipFilePath, passwordArray[i], modelName, _modelFolder, false))
|
||
break;
|
||
}
|
||
|
||
if (!FolderExist(_modelFolder)) {
|
||
this->_logger.LogError("ANSALPR_OCR::Initialize", "Output model folder does not exist: " + _modelFolder, __FILE__, __LINE__);
|
||
return false;
|
||
}
|
||
|
||
// Check country from country.txt
|
||
std::string countryFile = CreateFilePath(_modelFolder, "country.txt");
|
||
if (FileExist(countryFile)) {
|
||
std::ifstream infile(countryFile);
|
||
std::string countryStr;
|
||
std::getline(infile, countryStr);
|
||
infile.close();
|
||
if (countryStr == "0") _country = Country::VIETNAM;
|
||
else if (countryStr == "1") _country = Country::CHINA;
|
||
else if (countryStr == "2") _country = Country::AUSTRALIA;
|
||
else if (countryStr == "3") _country = Country::USA;
|
||
else if (countryStr == "4") _country = Country::INDONESIA;
|
||
else if (countryStr == "5") _country = Country::JAPAN;
|
||
else _country = Country::JAPAN; // Default for OCR mode
|
||
}
|
||
|
||
// Store the original model zip path — the OCR models (ansocrdec.onnx,
|
||
// ansocrcls.onnx, ansocrrec.onnx, dict_ch.txt) are bundled inside the
|
||
// same ALPR model zip, so we reuse it for ANSONNXOCR initialization.
|
||
_modelZipFilePath = modelZipFilePath;
|
||
|
||
// Initialize ALPRChecker
|
||
alprChecker.Init(MAX_ALPR_FRAME);
|
||
|
||
_lpColourModelConfig.detectionScoreThreshold = _colorThreshold;
|
||
_lpdmodelConfig.detectionScoreThreshold = _detectorThreshold;
|
||
|
||
return true;
|
||
}
|
||
catch (std::exception& e) {
|
||
this->_logger.LogFatal("ANSALPR_OCR::Initialize", e.what(), __FILE__, __LINE__);
|
||
return false;
|
||
}
|
||
}
|
||
|
||
bool ANSALPR_OCR::LoadEngine() {
|
||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||
try {
|
||
WriteEventLog("ANSALPR_OCR::LoadEngine: Step 1 - Starting engine load");
|
||
this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 1: Starting engine load", __FILE__, __LINE__);
|
||
|
||
// Detect hardware
|
||
_lpdmodelConfig.detectionScoreThreshold = _detectorThreshold;
|
||
_lpColourModelConfig.detectionScoreThreshold = _colorThreshold;
|
||
|
||
if (_lpdmodelConfig.detectionScoreThreshold < 0.25) _lpdmodelConfig.detectionScoreThreshold = 0.25;
|
||
if (_lpdmodelConfig.detectionScoreThreshold > 0.95) _lpdmodelConfig.detectionScoreThreshold = 0.95;
|
||
|
||
engineType = ANSLicenseHelper::CheckHardwareInformation();
|
||
this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Detected engine type: " + std::to_string(static_cast<int>(engineType)), __FILE__, __LINE__);
|
||
|
||
float confThreshold = 0.5f;
|
||
float MNSThreshold = 0.5f;
|
||
_lpdmodelConfig.modelConfThreshold = confThreshold;
|
||
_lpdmodelConfig.modelMNSThreshold = MNSThreshold;
|
||
_lpColourModelConfig.modelConfThreshold = confThreshold;
|
||
_lpColourModelConfig.modelMNSThreshold = MNSThreshold;
|
||
|
||
std::string lprModel = CreateFilePath(_modelFolder, "lpd.onnx");
|
||
std::string colorModel = CreateFilePath(_modelFolder, "lpc.onnx");
|
||
|
||
bool valid = false;
|
||
|
||
// ── Step 2: Load LP detector ─────────────────────────────────
|
||
if (FileExist(lprModel)) {
|
||
// Try TensorRT (ANSRTYOLO) when NVIDIA GPU is detected
|
||
if (engineType == EngineType::NVIDIA_GPU) {
|
||
WriteEventLog("ANSALPR_OCR::LoadEngine: Step 2 - Loading LP detector with TensorRT");
|
||
this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 2: Loading LP detector with TensorRT", __FILE__, __LINE__);
|
||
_lpdmodelConfig.detectionType = DetectionType::DETECTION;
|
||
_lpdmodelConfig.modelType = ModelType::RTYOLO;
|
||
std::string _lprClasses;
|
||
{
|
||
LoadRtParams_OCR p{};
|
||
p.licenseKey = &_licenseKey;
|
||
p.config = &_lpdmodelConfig;
|
||
p.modelFolder = &_modelFolder;
|
||
p.modelName = "lpd";
|
||
p.classFile = "lpd.names";
|
||
p.labels = &_lprClasses;
|
||
p.detector = &_lpDetector;
|
||
p.enableTracker = true;
|
||
p.disableStabilization = true;
|
||
|
||
DWORD sehCode = 0;
|
||
bool lpSuccess = LoadRtModel_OCR_SEH(p, &sehCode);
|
||
if (sehCode != 0) {
|
||
char buf[256];
|
||
snprintf(buf, sizeof(buf),
|
||
"ANSALPR_OCR::LoadEngine: Step 2 LPD TRT SEH exception 0x%08X — falling back to ONNX Runtime", sehCode);
|
||
WriteEventLog(buf, EVENTLOG_ERROR_TYPE);
|
||
this->_logger.LogError("ANSALPR_OCR::LoadEngine",
|
||
"Step 2: LP detector TensorRT crashed (SEH). Falling back to ONNX Runtime.", __FILE__, __LINE__);
|
||
if (_lpDetector) _lpDetector.reset();
|
||
}
|
||
else if (!lpSuccess) {
|
||
this->_logger.LogError("ANSALPR_OCR::LoadEngine",
|
||
"Failed to load LP detector (TensorRT). Falling back to ONNX Runtime.", __FILE__, __LINE__);
|
||
if (_lpDetector) _lpDetector.reset();
|
||
}
|
||
}
|
||
}
|
||
|
||
// Fallback to ONNX Runtime (ANSONNXYOLO) if TRT was not attempted or failed
|
||
if (!_lpDetector) {
|
||
WriteEventLog("ANSALPR_OCR::LoadEngine: Step 2 - Loading LP detector with ONNX Runtime");
|
||
this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 2: Loading LP detector with ONNX Runtime", __FILE__, __LINE__);
|
||
_lpdmodelConfig.detectionType = DetectionType::DETECTION;
|
||
_lpdmodelConfig.modelType = ModelType::ONNXYOLO;
|
||
std::string _lprClasses;
|
||
{
|
||
LoadOnnxParams_OCR p{};
|
||
p.licenseKey = &_licenseKey;
|
||
p.config = &_lpdmodelConfig;
|
||
p.modelFolder = &_modelFolder;
|
||
p.modelName = "lpd";
|
||
p.classFile = "lpd.names";
|
||
p.labels = &_lprClasses;
|
||
p.detector = &_lpDetector;
|
||
p.enableTracker = true;
|
||
p.disableStabilization = true;
|
||
|
||
DWORD sehCode = 0;
|
||
bool lpSuccess = LoadOnnxModel_OCR_SEH(p, &sehCode);
|
||
if (sehCode != 0) {
|
||
char buf[256];
|
||
snprintf(buf, sizeof(buf),
|
||
"ANSALPR_OCR::LoadEngine: Step 2 LPD SEH exception 0x%08X — LP detector disabled", sehCode);
|
||
WriteEventLog(buf, EVENTLOG_ERROR_TYPE);
|
||
this->_logger.LogFatal("ANSALPR_OCR::LoadEngine",
|
||
"Step 2: LP detector crashed (SEH). LP detector disabled.", __FILE__, __LINE__);
|
||
if (_lpDetector) _lpDetector.reset();
|
||
}
|
||
else if (!lpSuccess) {
|
||
this->_logger.LogError("ANSALPR_OCR::LoadEngine",
|
||
"Failed to load LP detector (ONNX Runtime).", __FILE__, __LINE__);
|
||
if (_lpDetector) _lpDetector.reset();
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if (!_lpDetector) {
|
||
this->_logger.LogFatal("ANSALPR_OCR::LoadEngine", "LP detector failed to load. Cannot proceed.", __FILE__, __LINE__);
|
||
_isInitialized = false;
|
||
return false;
|
||
}
|
||
|
||
// ── Step 3: Load OCR engine (ANSONNXOCR) ─────────────────────
|
||
// The OCR models (ansocrdec.onnx, ansocrcls.onnx, ansocrrec.onnx,
|
||
// dict_ch.txt) are bundled inside the same ALPR model zip, so we
|
||
// pass the original ALPR zip path to ANSONNXOCR::Initialize.
|
||
// ANSOCRBase::Initialize will extract it (no-op if already done)
|
||
// and discover the OCR model files in the extracted folder.
|
||
WriteEventLog("ANSALPR_OCR::LoadEngine: Step 3 - Loading OCR engine (ANSONNXOCR)");
|
||
this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 3: Loading OCR engine (ANSONNXOCR)", __FILE__, __LINE__);
|
||
|
||
// Verify OCR model files exist in the already-extracted folder
|
||
std::string ocrDetModel = CreateFilePath(_modelFolder, "ansocrdec.onnx");
|
||
std::string ocrRecModel = CreateFilePath(_modelFolder, "ansocrrec.onnx");
|
||
if (!FileExist(ocrDetModel) || !FileExist(ocrRecModel)) {
|
||
this->_logger.LogFatal("ANSALPR_OCR::LoadEngine",
|
||
"OCR model files not found in model folder: " + _modelFolder +
|
||
" (expected ansocrdec.onnx, ansocrrec.onnx)", __FILE__, __LINE__);
|
||
_isInitialized = false;
|
||
return false;
|
||
}
|
||
|
||
_ocrEngine = std::make_unique<ANSONNXOCR>();
|
||
|
||
// Determine OCR language based on country
|
||
OCRLanguage ocrLang = OCRLanguage::ENGLISH;
|
||
switch (_country) {
|
||
case Country::JAPAN: ocrLang = OCRLanguage::JAPANESE; break;
|
||
case Country::CHINA: ocrLang = OCRLanguage::CHINESE; break;
|
||
case Country::VIETNAM: ocrLang = OCRLanguage::ENGLISH; break;
|
||
case Country::AUSTRALIA: ocrLang = OCRLanguage::ENGLISH; break;
|
||
case Country::USA: ocrLang = OCRLanguage::ENGLISH; break;
|
||
case Country::INDONESIA: ocrLang = OCRLanguage::ENGLISH; break;
|
||
default: ocrLang = OCRLanguage::ENGLISH; break;
|
||
}
|
||
|
||
OCRModelConfig ocrModelConfig;
|
||
ocrModelConfig.ocrLanguage = ocrLang;
|
||
ocrModelConfig.useDetector = true;
|
||
ocrModelConfig.useRecognizer = true;
|
||
// Skip the angle classifier for ALPR. License-plate boxes
|
||
// from the YOLO detector are already axis-aligned, so the
|
||
// 180° classifier is dead weight (one extra ORT call per
|
||
// plate for no recall gain).
|
||
ocrModelConfig.useCLS = false;
|
||
ocrModelConfig.useLayout = false;
|
||
ocrModelConfig.useTable = false;
|
||
ocrModelConfig.useTensorRT = true;
|
||
ocrModelConfig.enableMKLDNN = false;
|
||
ocrModelConfig.useDilation = true;
|
||
ocrModelConfig.useAngleCLS = false;
|
||
ocrModelConfig.gpuId = 0;
|
||
ocrModelConfig.detectionDBThreshold = 0.5;
|
||
ocrModelConfig.detectionBoxThreshold = 0.3;
|
||
ocrModelConfig.detectionDBUnclipRatio = 1.2;
|
||
ocrModelConfig.clsThreshold = 0.9;
|
||
ocrModelConfig.limitSideLen = 480;
|
||
|
||
// Pass the original ALPR model zip path — ANSOCRBase::Initialize
|
||
// will extract it to the same folder (already done, so extraction
|
||
// is a no-op) and set up ansocrdec.onnx / ansocrcls.onnx /
|
||
// ansocrrec.onnx / dict_ch.txt paths automatically.
|
||
bool ocrSuccess = _ocrEngine->Initialize(_licenseKey, ocrModelConfig, _modelZipFilePath, "", 0);
|
||
if (!ocrSuccess) {
|
||
this->_logger.LogFatal("ANSALPR_OCR::LoadEngine", "Failed to initialize OCR engine (ANSONNXOCR).", __FILE__, __LINE__);
|
||
_ocrEngine.reset();
|
||
_isInitialized = false;
|
||
return false;
|
||
}
|
||
|
||
// Set ALPR mode and country on the OCR engine
|
||
_ocrEngine->SetOCRMode(OCRMode::OCR_ALPR);
|
||
_ocrEngine->SetCountry(_country);
|
||
|
||
this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 3: OCR engine loaded successfully.", __FILE__, __LINE__);
|
||
|
||
// ── Step 4: Load colour classifier (optional) ────────────────
|
||
if (FileExist(colorModel) && (_lpColourModelConfig.detectionScoreThreshold > 0)) {
|
||
// Try TensorRT (ANSRTYOLO) when NVIDIA GPU is detected
|
||
if (engineType == EngineType::NVIDIA_GPU) {
|
||
WriteEventLog("ANSALPR_OCR::LoadEngine: Step 4 - Loading colour classifier with TensorRT");
|
||
this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 4: Loading colour classifier with TensorRT", __FILE__, __LINE__);
|
||
_lpColourModelConfig.detectionType = DetectionType::CLASSIFICATION;
|
||
_lpColourModelConfig.modelType = ModelType::RTYOLO;
|
||
{
|
||
LoadRtParams_OCR p{};
|
||
p.licenseKey = &_licenseKey;
|
||
p.config = &_lpColourModelConfig;
|
||
p.modelFolder = &_modelFolder;
|
||
p.modelName = "lpc";
|
||
p.classFile = "lpc.names";
|
||
p.labels = &_lpColourLabels;
|
||
p.detector = &_lpColourDetector;
|
||
p.enableTracker = false;
|
||
p.disableStabilization = false;
|
||
|
||
DWORD sehCode = 0;
|
||
bool colourSuccess = LoadRtModel_OCR_SEH(p, &sehCode);
|
||
if (sehCode != 0) {
|
||
char buf[256];
|
||
snprintf(buf, sizeof(buf),
|
||
"ANSALPR_OCR::LoadEngine: Step 4 LPC TRT SEH exception 0x%08X — falling back to ONNX Runtime", sehCode);
|
||
WriteEventLog(buf, EVENTLOG_ERROR_TYPE);
|
||
this->_logger.LogError("ANSALPR_OCR::LoadEngine",
|
||
"Step 4: Colour classifier TensorRT crashed (SEH). Falling back to ONNX Runtime.", __FILE__, __LINE__);
|
||
if (_lpColourDetector) _lpColourDetector.reset();
|
||
}
|
||
else if (!colourSuccess) {
|
||
this->_logger.LogError("ANSALPR_OCR::LoadEngine",
|
||
"Failed to load colour classifier (TensorRT). Falling back to ONNX Runtime.", __FILE__, __LINE__);
|
||
if (_lpColourDetector) _lpColourDetector.reset();
|
||
}
|
||
}
|
||
}
|
||
|
||
// Fallback to ONNX Runtime (ANSONNXYOLO) if TRT was not attempted or failed
|
||
if (!_lpColourDetector) {
|
||
WriteEventLog("ANSALPR_OCR::LoadEngine: Step 4 - Loading colour classifier with ONNX Runtime");
|
||
this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 4: Loading colour classifier with ONNX Runtime", __FILE__, __LINE__);
|
||
_lpColourModelConfig.detectionType = DetectionType::CLASSIFICATION;
|
||
_lpColourModelConfig.modelType = ModelType::ONNXYOLO;
|
||
{
|
||
LoadOnnxParams_OCR p{};
|
||
p.licenseKey = &_licenseKey;
|
||
p.config = &_lpColourModelConfig;
|
||
p.modelFolder = &_modelFolder;
|
||
p.modelName = "lpc";
|
||
p.classFile = "lpc.names";
|
||
p.labels = &_lpColourLabels;
|
||
p.detector = &_lpColourDetector;
|
||
p.enableTracker = false;
|
||
p.disableStabilization = false;
|
||
|
||
DWORD sehCode = 0;
|
||
bool colourSuccess = LoadOnnxModel_OCR_SEH(p, &sehCode);
|
||
if (sehCode != 0) {
|
||
char buf[256];
|
||
snprintf(buf, sizeof(buf),
|
||
"ANSALPR_OCR::LoadEngine: Step 4 LPC SEH exception 0x%08X — colour detection disabled", sehCode);
|
||
WriteEventLog(buf, EVENTLOG_ERROR_TYPE);
|
||
this->_logger.LogError("ANSALPR_OCR::LoadEngine",
|
||
"Step 4: Colour classifier crashed. Colour detection disabled.", __FILE__, __LINE__);
|
||
if (_lpColourDetector) _lpColourDetector.reset();
|
||
}
|
||
else if (!colourSuccess) {
|
||
this->_logger.LogError("ANSALPR_OCR::LoadEngine",
|
||
"Failed to load colour detector (ONNX Runtime). Colour detection disabled.", __FILE__, __LINE__);
|
||
if (_lpColourDetector) _lpColourDetector.reset();
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
valid = true;
|
||
_isInitialized = valid;
|
||
WriteEventLog(("ANSALPR_OCR::LoadEngine: Step 5 - Engine load complete. Valid = " + std::to_string(valid)).c_str());
|
||
this->_logger.LogInfo("ANSALPR_OCR::LoadEngine", "Step 5: Engine load complete. Valid = " + std::to_string(valid), __FILE__, __LINE__);
|
||
return valid;
|
||
}
|
||
catch (std::exception& e) {
|
||
WriteEventLog(("ANSALPR_OCR::LoadEngine: C++ exception: " + std::string(e.what())).c_str(), EVENTLOG_ERROR_TYPE);
|
||
this->_logger.LogFatal("ANSALPR_OCR::LoadEngine", std::string("C++ exception: ") + e.what(), __FILE__, __LINE__);
|
||
_isInitialized = false;
|
||
return false;
|
||
}
|
||
catch (...) {
|
||
WriteEventLog("ANSALPR_OCR::LoadEngine: Unknown exception", EVENTLOG_ERROR_TYPE);
|
||
this->_logger.LogFatal("ANSALPR_OCR::LoadEngine", "Unknown exception", __FILE__, __LINE__);
|
||
_isInitialized = false;
|
||
return false;
|
||
}
|
||
}
|
||
|
||
// ── Colour detection (same pattern as ANSALPR_OD) ────────────────────
|
||
std::string ANSALPR_OCR::DetectLPColourDetector(const cv::Mat& lprROI, const std::string& cameraId) {
|
||
if (_lpColourModelConfig.detectionScoreThreshold <= 0.0f) return {};
|
||
if (!_lpColourDetector) return {};
|
||
if (lprROI.empty()) return {};
|
||
|
||
try {
|
||
std::vector<Object> colourOutputs = _lpColourDetector->RunInference(lprROI, cameraId);
|
||
if (colourOutputs.empty()) return {};
|
||
|
||
const auto& bestDetection = *std::max_element(
|
||
colourOutputs.begin(), colourOutputs.end(),
|
||
[](const Object& a, const Object& b) { return a.confidence < b.confidence; }
|
||
);
|
||
return bestDetection.className;
|
||
}
|
||
catch (const std::exception& e) {
|
||
this->_logger.LogFatal("ANSALPR_OCR::DetectLPColourDetector", e.what(), __FILE__, __LINE__);
|
||
return {};
|
||
}
|
||
}
|
||
|
||
std::string ANSALPR_OCR::DetectLPColourCached(const cv::Mat& lprROI, const std::string& cameraId, const std::string& plateText) {
|
||
if (plateText.empty()) {
|
||
return DetectLPColourDetector(lprROI, cameraId);
|
||
}
|
||
|
||
// Check cache first
|
||
{
|
||
std::lock_guard<std::mutex> cacheLock(_colourCacheMutex);
|
||
auto it = _colourCache.find(plateText);
|
||
if (it != _colourCache.end()) {
|
||
it->second.hitCount++;
|
||
return it->second.colour;
|
||
}
|
||
}
|
||
|
||
// Cache miss — run classifier
|
||
std::string colour = DetectLPColourDetector(lprROI, cameraId);
|
||
|
||
if (!colour.empty()) {
|
||
std::lock_guard<std::mutex> cacheLock(_colourCacheMutex);
|
||
if (_colourCache.size() >= COLOUR_CACHE_MAX_SIZE) {
|
||
_colourCache.clear();
|
||
}
|
||
_colourCache[plateText] = { colour, 0 };
|
||
}
|
||
|
||
return colour;
|
||
}
|
||
|
||
// ── Classical perspective rectification ─────────────────────────────
|
||
// Takes the axis-aligned LP YOLO bbox and tries to warp the plate to
|
||
// a tight rectangle whose height is fixed and whose width preserves
|
||
// the detected plate's actual aspect ratio. This removes camera
|
||
// tilt/yaw, strips background margin, and normalizes character
|
||
// spacing — which makes the recognizer see an image much closer to
|
||
// its training distribution and reduces silent character drops.
|
||
//
|
||
// Works entirely in classical OpenCV (Canny + findContours +
|
||
// approxPolyDP + getPerspectiveTransform + warpPerspective), so it
|
||
// needs no new models and no retraining. Fails gracefully (returns
|
||
// false) on plates where the border can't be isolated — caller falls
|
||
// back to the padded axis-aligned crop in that case.
|
||
std::vector<cv::Point2f>
|
||
ANSALPR_OCR::OrderQuadCorners(const std::vector<cv::Point>& pts) {
|
||
// Standard TL/TR/BR/BL ordering via x+y / y-x extrema. Robust to
|
||
// input winding order (clockwise vs counter-clockwise) and to
|
||
// approxPolyDP starting the polygon at an arbitrary corner.
|
||
std::vector<cv::Point2f> ordered(4);
|
||
if (pts.size() != 4) return ordered;
|
||
|
||
auto sum = [](const cv::Point& p) { return p.x + p.y; };
|
||
auto diff = [](const cv::Point& p) { return p.y - p.x; };
|
||
|
||
int idxMinSum = 0, idxMaxSum = 0, idxMinDiff = 0, idxMaxDiff = 0;
|
||
for (int i = 1; i < 4; ++i) {
|
||
if (sum(pts[i]) < sum(pts[idxMinSum])) idxMinSum = i;
|
||
if (sum(pts[i]) > sum(pts[idxMaxSum])) idxMaxSum = i;
|
||
if (diff(pts[i]) < diff(pts[idxMinDiff])) idxMinDiff = i;
|
||
if (diff(pts[i]) > diff(pts[idxMaxDiff])) idxMaxDiff = i;
|
||
}
|
||
ordered[0] = cv::Point2f(static_cast<float>(pts[idxMinSum].x), static_cast<float>(pts[idxMinSum].y)); // TL
|
||
ordered[1] = cv::Point2f(static_cast<float>(pts[idxMinDiff].x), static_cast<float>(pts[idxMinDiff].y)); // TR
|
||
ordered[2] = cv::Point2f(static_cast<float>(pts[idxMaxSum].x), static_cast<float>(pts[idxMaxSum].y)); // BR
|
||
ordered[3] = cv::Point2f(static_cast<float>(pts[idxMaxDiff].x), static_cast<float>(pts[idxMaxDiff].y)); // BL
|
||
return ordered;
|
||
}
|
||
|
||
bool ANSALPR_OCR::RectifyPlateROI(
|
||
const cv::Mat& source,
|
||
const cv::Rect& bbox,
|
||
cv::Mat& outRectified) const
|
||
{
|
||
if (source.empty()) return false;
|
||
cv::Rect clamped = bbox & cv::Rect(0, 0, source.cols, source.rows);
|
||
if (clamped.width <= 20 || clamped.height <= 10) return false;
|
||
|
||
const cv::Mat roi = source(clamped);
|
||
const double roiArea = static_cast<double>(roi.rows) * roi.cols;
|
||
const double minArea = roiArea * kRectifyAreaFraction;
|
||
|
||
// Step 1: grayscale + blur + Canny to find plate border edges.
|
||
cv::Mat gray;
|
||
if (roi.channels() == 3) {
|
||
cv::cvtColor(roi, gray, cv::COLOR_BGR2GRAY);
|
||
} else if (roi.channels() == 4) {
|
||
cv::cvtColor(roi, gray, cv::COLOR_BGRA2GRAY);
|
||
} else {
|
||
gray = roi;
|
||
}
|
||
cv::GaussianBlur(gray, gray, cv::Size(5, 5), 0);
|
||
cv::Mat edges;
|
||
cv::Canny(gray, edges, 50, 150);
|
||
|
||
// Close small gaps in the plate border so findContours sees it as
|
||
// one closed shape rather than several broken line segments.
|
||
cv::Mat kernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
|
||
cv::morphologyEx(edges, edges, cv::MORPH_CLOSE, kernel);
|
||
|
||
// Step 2: find external contours.
|
||
std::vector<std::vector<cv::Point>> contours;
|
||
cv::findContours(edges, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);
|
||
if (contours.empty()) return false;
|
||
|
||
// Step 3: find the largest contour whose approxPolyDP collapses
|
||
// to 4 vertices. That's most likely the plate border.
|
||
std::vector<cv::Point> bestQuad;
|
||
double bestArea = 0.0;
|
||
for (const auto& c : contours) {
|
||
const double area = cv::contourArea(c);
|
||
if (area < minArea) continue;
|
||
|
||
// Sweep epsilon — tighter approximations require more vertices,
|
||
// looser approximations collapse to fewer. We want the
|
||
// smallest epsilon at which the contour becomes a quadrilateral.
|
||
std::vector<cv::Point> approx;
|
||
const double perimeter = cv::arcLength(c, true);
|
||
for (double eps = 0.02; eps <= 0.08; eps += 0.01) {
|
||
cv::approxPolyDP(c, approx, eps * perimeter, true);
|
||
if (approx.size() == 4) break;
|
||
}
|
||
if (approx.size() == 4 && area > bestArea) {
|
||
// Verify the quadrilateral is convex — a non-convex
|
||
// 4-point contour is almost certainly not a plate
|
||
if (cv::isContourConvex(approx)) {
|
||
bestArea = area;
|
||
bestQuad = approx;
|
||
}
|
||
}
|
||
}
|
||
|
||
// Step 4: fallback — minAreaRect on the largest contour. This
|
||
// handles pure rotation but not arbitrary perspective skew.
|
||
if (bestQuad.empty()) {
|
||
auto largest = std::max_element(contours.begin(), contours.end(),
|
||
[](const std::vector<cv::Point>& a, const std::vector<cv::Point>& b) {
|
||
return cv::contourArea(a) < cv::contourArea(b);
|
||
});
|
||
if (largest == contours.end()) return false;
|
||
if (cv::contourArea(*largest) < minArea) return false;
|
||
|
||
cv::RotatedRect rr = cv::minAreaRect(*largest);
|
||
cv::Point2f pts[4];
|
||
rr.points(pts);
|
||
bestQuad.reserve(4);
|
||
for (int i = 0; i < 4; ++i) {
|
||
bestQuad.emplace_back(static_cast<int>(pts[i].x),
|
||
static_cast<int>(pts[i].y));
|
||
}
|
||
}
|
||
|
||
// Step 5: order the 4 corners as TL/TR/BR/BL.
|
||
std::vector<cv::Point2f> srcCorners = OrderQuadCorners(bestQuad);
|
||
|
||
// Measure the source quadrilateral's dimensions so the output
|
||
// rectangle preserves the real plate aspect ratio. Without this,
|
||
// a wide single-row plate would be squashed to 2:1 and a 2-row
|
||
// plate would be stretched to wrong proportions.
|
||
auto pointDist = [](const cv::Point2f& a, const cv::Point2f& b) -> float {
|
||
const float dx = a.x - b.x;
|
||
const float dy = a.y - b.y;
|
||
return std::sqrt(dx * dx + dy * dy);
|
||
};
|
||
const float topEdge = pointDist(srcCorners[0], srcCorners[1]);
|
||
const float bottomEdge = pointDist(srcCorners[3], srcCorners[2]);
|
||
const float leftEdge = pointDist(srcCorners[0], srcCorners[3]);
|
||
const float rightEdge = pointDist(srcCorners[1], srcCorners[2]);
|
||
const float srcW = std::max(topEdge, bottomEdge);
|
||
const float srcH = std::max(leftEdge, rightEdge);
|
||
if (srcW < 20.f || srcH < 10.f) return false;
|
||
|
||
const float srcAspect = srcW / srcH;
|
||
// Gate rectification on plausible plate aspect ratios. Anything
|
||
// wildly outside the range isn't a plate; fall back to the axis-
|
||
// aligned crop rather than produce a distorted warp.
|
||
if (srcAspect < kMinPlateAspect || srcAspect > kMaxPlateAspect) {
|
||
return false;
|
||
}
|
||
|
||
// Step 6: warp to a rectangle that preserves aspect ratio. Height
|
||
// is fixed (kRectifiedHeight) so downstream sizing is predictable.
|
||
const int outH = kRectifiedHeight;
|
||
const int outW = std::clamp(static_cast<int>(std::round(outH * srcAspect)),
|
||
kRectifiedHeight, // min: square
|
||
kRectifiedHeight * 6); // max: 6:1 long plates
|
||
std::vector<cv::Point2f> dstCorners = {
|
||
{ 0.f, 0.f },
|
||
{ static_cast<float>(outW - 1), 0.f },
|
||
{ static_cast<float>(outW - 1), static_cast<float>(outH - 1) },
|
||
{ 0.f, static_cast<float>(outH - 1) }
|
||
};
|
||
|
||
const cv::Mat M = cv::getPerspectiveTransform(srcCorners, dstCorners);
|
||
cv::warpPerspective(roi, outRectified, M, cv::Size(outW, outH),
|
||
cv::INTER_LINEAR, cv::BORDER_REPLICATE);
|
||
return !outRectified.empty();
|
||
}
|
||
|
||
// ── Japan-only: kana recovery on a plate where the fast path silently
|
||
// dropped the hiragana from the bottom row ────────────────────────
|
||
ANSALPR_OCR::CodepointClassCounts
|
||
ANSALPR_OCR::CountCodepointClasses(const std::string& text) {
|
||
CodepointClassCounts counts;
|
||
size_t pos = 0;
|
||
while (pos < text.size()) {
|
||
const size_t before = pos;
|
||
uint32_t cp = ANSOCRUtility::NextUTF8Codepoint(text, pos);
|
||
if (cp == 0 || pos == before) break;
|
||
if (ANSOCRUtility::IsCharClass(cp, CHAR_DIGIT)) counts.digit++;
|
||
if (ANSOCRUtility::IsCharClass(cp, CHAR_KANJI)) counts.kanji++;
|
||
if (ANSOCRUtility::IsCharClass(cp, CHAR_HIRAGANA)) counts.hiragana++;
|
||
if (ANSOCRUtility::IsCharClass(cp, CHAR_KATAKANA)) counts.katakana++;
|
||
}
|
||
return counts;
|
||
}
|
||
|
||
bool ANSALPR_OCR::IsJapaneseIncomplete(const std::string& text) {
|
||
// A valid Japanese plate has at least one kanji in the region
|
||
// zone, at least one hiragana/katakana in the kana zone, and at
|
||
// least four digits split between classification (top) and
|
||
// designation (bottom).
|
||
//
|
||
// We only consider a plate "incomplete and worth recovering"
|
||
// when it ALREADY LOOKS Japanese on the fast path — i.e. the
|
||
// kanji region was found successfully. Gating on kanji > 0
|
||
// prevents the recovery path from firing on non-Japanese plates
|
||
// (Latin-only, European, Macau, etc.) where there's no kana to
|
||
// find anyway, which previously wasted ~35 ms per plate burning
|
||
// all recovery attempts on a search that can never succeed.
|
||
//
|
||
// For non-Japanese plates the function returns false, recovery
|
||
// is skipped, and latency is identical to the pre-recovery
|
||
// baseline.
|
||
const CodepointClassCounts c = CountCodepointClasses(text);
|
||
if (c.kanji == 0) return false; // Not a Japanese plate
|
||
if (c.digit < 4) return false; // Not enough digits — probably garbage
|
||
const int kana = c.hiragana + c.katakana;
|
||
return (kana == 0); // Kanji + digits present, kana missing
|
||
}
|
||
|
||
// Strip screws/rivets/dirt that the recognizer misreads as small
|
||
// round punctuation glyphs. The blacklist is deliberately narrow:
|
||
// only characters that are never legitimate plate content on any
|
||
// country we support. Middle dots (・ and ·) are KEPT because they
|
||
// are legitimate padding on Japanese plates with <4 designation
|
||
// digits (e.g. "・274"), and they get normalised to "0" by
|
||
// ALPRPostProcessing's zone corrections anyway.
|
||
std::string ANSALPR_OCR::StripPlateArtifacts(const std::string& text) {
|
||
if (text.empty()) return text;
|
||
std::string stripped;
|
||
stripped.reserve(text.size());
|
||
size_t pos = 0;
|
||
while (pos < text.size()) {
|
||
const size_t before = pos;
|
||
uint32_t cp = ANSOCRUtility::NextUTF8Codepoint(text, pos);
|
||
if (cp == 0 || pos == before) break;
|
||
|
||
bool drop = false;
|
||
switch (cp) {
|
||
// Small round glyphs that mimic screws / rivets
|
||
case 0x00B0: // ° degree sign
|
||
case 0x02DA: // ˚ ring above
|
||
case 0x2218: // ∘ ring operator
|
||
case 0x25CB: // ○ white circle
|
||
case 0x25CF: // ● black circle
|
||
case 0x25E6: // ◦ white bullet
|
||
case 0x2022: // • bullet
|
||
case 0x2219: // ∙ bullet operator
|
||
case 0x25A0: // ■ black square
|
||
case 0x25A1: // □ white square
|
||
// Quote-like glyphs picked up from plate border / dirt
|
||
case 0x0022: // " ASCII double quote
|
||
case 0x0027: // ' ASCII apostrophe
|
||
case 0x201C: // " LEFT DOUBLE QUOTATION MARK (smart quote)
|
||
case 0x201D: // " RIGHT DOUBLE QUOTATION MARK
|
||
case 0x201E: // „ DOUBLE LOW-9 QUOTATION MARK
|
||
case 0x201F: // ‟ DOUBLE HIGH-REVERSED-9 QUOTATION MARK
|
||
case 0x2018: // ' LEFT SINGLE QUOTATION MARK
|
||
case 0x2019: // ' RIGHT SINGLE QUOTATION MARK
|
||
case 0x201A: // ‚ SINGLE LOW-9 QUOTATION MARK
|
||
case 0x201B: // ‛ SINGLE HIGH-REVERSED-9 QUOTATION MARK
|
||
case 0x00AB: // « LEFT-POINTING DOUBLE ANGLE QUOTATION
|
||
case 0x00BB: // » RIGHT-POINTING DOUBLE ANGLE QUOTATION
|
||
case 0x2039: // ‹ SINGLE LEFT-POINTING ANGLE QUOTATION
|
||
case 0x203A: // › SINGLE RIGHT-POINTING ANGLE QUOTATION
|
||
case 0x301D: // 〝 REVERSED DOUBLE PRIME QUOTATION
|
||
case 0x301E: // 〞 DOUBLE PRIME QUOTATION
|
||
case 0x301F: // 〟 LOW DOUBLE PRIME QUOTATION
|
||
case 0x300A: // 《 LEFT DOUBLE ANGLE BRACKET
|
||
case 0x300B: // 》 RIGHT DOUBLE ANGLE BRACKET
|
||
case 0x3008: // 〈 LEFT ANGLE BRACKET
|
||
case 0x3009: // 〉 RIGHT ANGLE BRACKET
|
||
// Ideographic punctuation that isn't valid plate content
|
||
case 0x3002: // 。 ideographic full stop
|
||
case 0x3001: // 、 ideographic comma
|
||
case 0x300C: // 「 left corner bracket
|
||
case 0x300D: // 」 right corner bracket
|
||
case 0x300E: // 『 left white corner bracket
|
||
case 0x300F: // 』 right white corner bracket
|
||
// ASCII punctuation noise picked up from plate borders
|
||
case 0x0060: // ` grave accent
|
||
case 0x007E: // ~ tilde
|
||
case 0x005E: // ^ caret
|
||
case 0x007C: // | vertical bar
|
||
case 0x005C: // \ backslash
|
||
case 0x002F: // / forward slash
|
||
case 0x0028: // ( left paren
|
||
case 0x0029: // ) right paren
|
||
case 0x005B: // [ left bracket
|
||
case 0x005D: // ] right bracket
|
||
case 0x007B: // { left brace
|
||
case 0x007D: // } right brace
|
||
case 0x003C: // < less than
|
||
case 0x003E: // > greater than
|
||
// Misc symbols that round glyphs can collapse to
|
||
case 0x00A9: // © copyright sign
|
||
case 0x00AE: // ® registered sign
|
||
case 0x2117: // ℗ sound recording copyright
|
||
case 0x2122: // ™ trademark
|
||
drop = true;
|
||
break;
|
||
default:
|
||
break;
|
||
}
|
||
if (!drop) {
|
||
stripped.append(text, before, pos - before);
|
||
}
|
||
}
|
||
|
||
// Collapse runs of spaces introduced by stripping, and trim.
|
||
std::string collapsed;
|
||
collapsed.reserve(stripped.size());
|
||
bool prevSpace = false;
|
||
for (char c : stripped) {
|
||
if (c == ' ') {
|
||
if (!prevSpace) collapsed.push_back(c);
|
||
prevSpace = true;
|
||
} else {
|
||
collapsed.push_back(c);
|
||
prevSpace = false;
|
||
}
|
||
}
|
||
const size_t first = collapsed.find_first_not_of(' ');
|
||
if (first == std::string::npos) return "";
|
||
const size_t last = collapsed.find_last_not_of(' ');
|
||
return collapsed.substr(first, last - first + 1);
|
||
}
|
||
|
||
std::string ANSALPR_OCR::RecoverKanaFromBottomHalf(
|
||
const cv::Mat& plateROI, int halfH) const
|
||
{
|
||
if (!_ocrEngine || plateROI.empty()) return "";
|
||
const int plateW = plateROI.cols;
|
||
const int plateH = plateROI.rows;
|
||
if (plateW < 40 || plateH < 30 || halfH <= 0 || halfH >= plateH) {
|
||
ANS_DBG("ALPR_Kana",
|
||
"Recovery SKIP: plate too small (%dx%d, halfH=%d)",
|
||
plateW, plateH, halfH);
|
||
return "";
|
||
}
|
||
|
||
ANS_DBG("ALPR_Kana",
|
||
"Recovery START: plate=%dx%d halfH=%d bottomHalf=%dx%d",
|
||
plateW, plateH, halfH, plateW, plateH - halfH);
|
||
|
||
// The kana on a Japanese plate sits in the left ~30% of the
|
||
// bottom row and is roughly square. Try 3 well-chosen crop
|
||
// positions — one center, one slightly high, one wider — and
|
||
// bail out on the first that yields a hiragana/katakana hit.
|
||
//
|
||
// 3 attempts is the sweet spot: it catches the common row-split
|
||
// variation without burning linear time on every fail-case.
|
||
// Previous versions tried 7 attempts, which added ~20 ms/plate
|
||
// of pure waste when recovery couldn't find any kana anyway.
|
||
//
|
||
// Tiles shorter than 48 px are upscaled to 48 px height before
|
||
// recognition so the recognizer sees something close to its
|
||
// training distribution. PaddleOCR's rec model expects 48 px
|
||
// height and breaks down when given very small crops.
|
||
struct TileSpec {
|
||
float widthFraction; // fraction of plateW
|
||
float yOffset; // 0.0 = top of bottom half, 1.0 = bottom
|
||
};
|
||
const TileSpec attempts[] = {
|
||
{ 0.30f, 0.50f }, // primary: 30% wide, centered vertically
|
||
{ 0.30f, 0.35f }, // row split landed too low — try higher
|
||
{ 0.35f, 0.50f }, // slightly wider crop for off-center kana
|
||
};
|
||
|
||
int attemptNo = 0;
|
||
for (const TileSpec& spec : attempts) {
|
||
attemptNo++;
|
||
int tileW = static_cast<int>(plateW * spec.widthFraction);
|
||
if (tileW < 30) tileW = 30;
|
||
if (tileW > plateW) tileW = plateW;
|
||
|
||
// Prefer square tile, but allow non-square if the bottom
|
||
// half is short. Clipped to bottom-half height.
|
||
int tileH = tileW;
|
||
const int bottomHalfH = plateH - halfH;
|
||
if (tileH > bottomHalfH) tileH = bottomHalfH;
|
||
if (tileH < 20) continue;
|
||
|
||
const int centerY = halfH + static_cast<int>(bottomHalfH * spec.yOffset);
|
||
int cy = centerY - tileH / 2;
|
||
if (cy < halfH) cy = halfH;
|
||
if (cy + tileH > plateH) cy = plateH - tileH;
|
||
if (cy < 0) cy = 0;
|
||
|
||
const int cx = 0;
|
||
int cw = tileW;
|
||
int ch = tileH;
|
||
if (cx + cw > plateW) cw = plateW - cx;
|
||
if (cy + ch > plateH) ch = plateH - cy;
|
||
if (cw <= 10 || ch <= 10) continue;
|
||
|
||
cv::Mat kanaTile = plateROI(cv::Rect(cx, cy, cw, ch));
|
||
|
||
// Upscale tiles shorter than 48 px so the recognizer sees
|
||
// something close to its training input size. Preserve
|
||
// aspect ratio; cv::INTER_CUBIC keeps character strokes
|
||
// sharper than bilinear.
|
||
cv::Mat tileForRec;
|
||
if (kanaTile.rows < 48) {
|
||
const double scale = 48.0 / kanaTile.rows;
|
||
cv::resize(kanaTile, tileForRec, cv::Size(),
|
||
scale, scale, cv::INTER_CUBIC);
|
||
} else {
|
||
tileForRec = kanaTile;
|
||
}
|
||
|
||
std::vector<cv::Mat> tileBatch{ tileForRec };
|
||
auto tileResults = _ocrEngine->RecognizeTextBatch(tileBatch);
|
||
if (tileResults.empty()) {
|
||
ANS_DBG("ALPR_Kana",
|
||
"Attempt %d: tile=%dx%d (rec=%dx%d w=%.2f y=%.2f) "
|
||
"→ recognizer returned empty batch",
|
||
attemptNo, cw, ch, tileForRec.cols, tileForRec.rows,
|
||
spec.widthFraction, spec.yOffset);
|
||
continue;
|
||
}
|
||
|
||
const std::string& text = tileResults[0].first;
|
||
const float conf = tileResults[0].second;
|
||
ANS_DBG("ALPR_Kana",
|
||
"Attempt %d: tile=%dx%d (rec=%dx%d w=%.2f y=%.2f) "
|
||
"→ '%s' conf=%.3f",
|
||
attemptNo, cw, ch, tileForRec.cols, tileForRec.rows,
|
||
spec.widthFraction, spec.yOffset, text.c_str(), conf);
|
||
|
||
if (text.empty()) continue;
|
||
|
||
// Japanese plate kana is ALWAYS exactly 1 hiragana or
|
||
// katakana character. We accept ONLY that — nothing else.
|
||
// Kanji, Latin letters, digits, punctuation, everything
|
||
// non-kana is rejected. The returned string is exactly the
|
||
// one kana codepoint or empty.
|
||
//
|
||
// Strictness is deliberate: the relaxed "any letter class"
|
||
// accept path was letting through kanji bleed from the
|
||
// region-name zone when the tile positioning was slightly
|
||
// off, producing wrong plate text like "59-V3 西 752.23" or
|
||
// "JCL 三". With strict-only accept, a miss in the recovery
|
||
// is silent and the fast-path result passes through unchanged.
|
||
std::string firstKana; // first CHAR_HIRAGANA / CHAR_KATAKANA hit
|
||
int codepointCount = 0;
|
||
size_t pos = 0;
|
||
while (pos < text.size()) {
|
||
const size_t before = pos;
|
||
uint32_t cp = ANSOCRUtility::NextUTF8Codepoint(text, pos);
|
||
if (cp == 0 || pos == before) break;
|
||
codepointCount++;
|
||
if (!firstKana.empty()) continue;
|
||
|
||
if (ANSOCRUtility::IsCharClass(cp, CHAR_HIRAGANA) ||
|
||
ANSOCRUtility::IsCharClass(cp, CHAR_KATAKANA)) {
|
||
firstKana = text.substr(before, pos - before);
|
||
}
|
||
}
|
||
|
||
if (!firstKana.empty()) {
|
||
ANS_DBG("ALPR_Kana",
|
||
"Recovery SUCCESS at attempt %d: extracted '%s' "
|
||
"from raw '%s' (%d codepoints, conf=%.3f)",
|
||
attemptNo, firstKana.c_str(), text.c_str(),
|
||
codepointCount, conf);
|
||
return firstKana;
|
||
}
|
||
}
|
||
ANS_DBG("ALPR_Kana",
|
||
"Recovery FAILED: no kana found in %d attempts",
|
||
attemptNo);
|
||
return "";
|
||
}
|
||
|
||
// ── Full-frame vs pipeline auto-detection ────────────────────────────
|
||
// Mirror of ANSALPR_OD::shouldUseALPRChecker. The auto-detection logic
|
||
// watches whether consecutive frames from a given camera have the exact
|
||
// same (width, height). Pre-cropped pipeline inputs vary by a few
|
||
// pixels per crop, so the exact-match check fails and we return false.
|
||
// Real video frames are pixel-identical across frames, so after a few
|
||
// consistent frames we flip into FULL-FRAME mode and start running the
|
||
// ALPRChecker voting + ensureUniquePlateText dedup.
|
||
bool ANSALPR_OCR::shouldUseALPRChecker(const cv::Size& imageSize,
|
||
const std::string& cameraId) {
|
||
// Force disabled via SetALPRCheckerEnabled(false) → never use.
|
||
if (!_enableALPRChecker) return false;
|
||
|
||
// Small images are always pipeline crops — skip auto-detection.
|
||
if (imageSize.width < ImageSizeTracker::MIN_FULLFRAME_WIDTH) return false;
|
||
|
||
auto& tracker = _imageSizeTrackers[cameraId];
|
||
bool wasFullFrame = tracker.detectedFullFrame;
|
||
if (imageSize == tracker.lastSize) {
|
||
tracker.consistentCount++;
|
||
if (tracker.consistentCount >= ImageSizeTracker::CONFIRM_THRESHOLD) {
|
||
tracker.detectedFullFrame = true;
|
||
}
|
||
} else {
|
||
tracker.lastSize = imageSize;
|
||
tracker.consistentCount = 1;
|
||
tracker.detectedFullFrame = false;
|
||
}
|
||
if (tracker.detectedFullFrame != wasFullFrame) {
|
||
ANS_DBG("ALPR_OCR_Checker",
|
||
"cam=%s mode auto-detected: %s (img=%dx%d consistent=%d)",
|
||
cameraId.c_str(),
|
||
tracker.detectedFullFrame ? "FULL-FRAME (tracker ON)" : "PIPELINE (tracker OFF)",
|
||
imageSize.width, imageSize.height, tracker.consistentCount);
|
||
}
|
||
return tracker.detectedFullFrame;
|
||
}
|
||
|
||
// ── Spatial plate dedup with accumulated scoring ─────────────────────
|
||
// Mirror of ANSALPR_OD::ensureUniquePlateText. When more than one
|
||
// detection in the same frame ends up with the same plate text (e.g.
|
||
// tracker occlusion or two cars in a single frame reading the same
|
||
// string), we resolve the ambiguity by accumulating confidence per
|
||
// spatial location across frames. The location with the higher running
|
||
// score keeps the plate text; the loser has its className cleared and
|
||
// is dropped from the output.
|
||
void ANSALPR_OCR::ensureUniquePlateText(std::vector<Object>& results,
|
||
const std::string& cameraId) {
|
||
std::lock_guard<std::mutex> plateLock(_plateIdentitiesMutex);
|
||
auto& identities = _plateIdentities[cameraId];
|
||
|
||
// Auto-detect mode by detection count.
|
||
// 1 detection → pipeline/single-crop mode → no dedup needed.
|
||
// 2+ detections → full-frame mode → apply accumulated scoring.
|
||
if (results.size() <= 1) {
|
||
// Still age out stale spatial identities from previous full-frame calls
|
||
if (!identities.empty()) {
|
||
constexpr int MAX_UNSEEN_FRAMES = 30;
|
||
for (auto& id : identities) id.framesSinceLastSeen++;
|
||
for (auto it = identities.begin(); it != identities.end(); ) {
|
||
if (it->framesSinceLastSeen > MAX_UNSEEN_FRAMES) {
|
||
it = identities.erase(it);
|
||
} else {
|
||
++it;
|
||
}
|
||
}
|
||
}
|
||
return;
|
||
}
|
||
|
||
// Helper: IoU between two rects.
|
||
auto computeIoU = [](const cv::Rect& a, const cv::Rect& b) -> float {
|
||
int x1 = std::max(a.x, b.x);
|
||
int y1 = std::max(a.y, b.y);
|
||
int x2 = std::min(a.x + a.width, b.x + b.width);
|
||
int y2 = std::min(a.y + a.height, b.y + b.height);
|
||
if (x2 <= x1 || y2 <= y1) return 0.0f;
|
||
float intersection = static_cast<float>((x2 - x1) * (y2 - y1));
|
||
float unionArea = static_cast<float>(a.area() + b.area()) - intersection;
|
||
return (unionArea > 0.0f) ? intersection / unionArea : 0.0f;
|
||
};
|
||
|
||
// Helper: find matching spatial identity by bounding-box overlap.
|
||
auto findSpatialMatch = [&](const cv::Rect& box,
|
||
const std::string& plateText) -> SpatialPlateIdentity* {
|
||
for (auto& id : identities) {
|
||
if (id.plateText == plateText) {
|
||
cv::Rect storedRect(
|
||
static_cast<int>(id.center.x - box.width * 0.5f),
|
||
static_cast<int>(id.center.y - box.height * 0.5f),
|
||
box.width, box.height);
|
||
if (computeIoU(box, storedRect) > PLATE_SPATIAL_MATCH_THRESHOLD) {
|
||
return &id;
|
||
}
|
||
}
|
||
}
|
||
return nullptr;
|
||
};
|
||
|
||
// Step 1: Build map of plateText → candidate indices
|
||
std::unordered_map<std::string, std::vector<size_t>> plateCandidates;
|
||
for (size_t i = 0; i < results.size(); ++i) {
|
||
if (results[i].className.empty()) continue;
|
||
plateCandidates[results[i].className].push_back(i);
|
||
}
|
||
|
||
// Step 2: Resolve duplicates using spatial accumulated scores
|
||
for (auto& [plateText, indices] : plateCandidates) {
|
||
if (indices.size() <= 1) continue;
|
||
|
||
size_t winner = indices[0];
|
||
float bestScore = 0.0f;
|
||
|
||
for (size_t idx : indices) {
|
||
float score = results[idx].confidence;
|
||
auto* match = findSpatialMatch(results[idx].box, plateText);
|
||
if (match) {
|
||
score = match->accumulatedScore + results[idx].confidence;
|
||
}
|
||
if (score > bestScore) {
|
||
bestScore = score;
|
||
winner = idx;
|
||
}
|
||
}
|
||
|
||
for (size_t idx : indices) {
|
||
if (idx != winner) {
|
||
results[idx].className.clear();
|
||
}
|
||
}
|
||
}
|
||
|
||
// Step 3: Update spatial identities — winners accumulate, losers decay
|
||
constexpr float DECAY_FACTOR = 0.8f;
|
||
constexpr float MIN_SCORE = 0.1f;
|
||
constexpr int MAX_UNSEEN_FRAMES = 30;
|
||
|
||
for (auto& id : identities) id.framesSinceLastSeen++;
|
||
|
||
for (auto& r : results) {
|
||
if (r.className.empty()) continue;
|
||
|
||
cv::Point2f center(
|
||
r.box.x + r.box.width * 0.5f,
|
||
r.box.y + r.box.height * 0.5f);
|
||
|
||
auto* match = findSpatialMatch(r.box, r.className);
|
||
if (match) {
|
||
match->accumulatedScore += r.confidence;
|
||
match->center = center;
|
||
match->framesSinceLastSeen = 0;
|
||
} else {
|
||
identities.push_back({ center, r.className, r.confidence, 0 });
|
||
}
|
||
}
|
||
|
||
// Decay unseen identities and remove stale ones
|
||
for (auto it = identities.begin(); it != identities.end(); ) {
|
||
if (it->framesSinceLastSeen > 0) {
|
||
it->accumulatedScore *= DECAY_FACTOR;
|
||
}
|
||
if (it->accumulatedScore < MIN_SCORE || it->framesSinceLastSeen > MAX_UNSEEN_FRAMES) {
|
||
it = identities.erase(it);
|
||
} else {
|
||
++it;
|
||
}
|
||
}
|
||
|
||
// Step 4: Remove entries with cleared plate text
|
||
results.erase(
|
||
std::remove_if(results.begin(), results.end(),
|
||
[](const Object& o) { return o.className.empty(); }),
|
||
results.end());
|
||
}
|
||
|
||
// ── OCR on a single plate ROI ────────────────────────────────────────
|
||
// Returns the plate text via the out-parameter and populates alprExtraInfo
|
||
// with the structured ALPR JSON (zone parts) when ALPR mode is active.
|
||
std::string ANSALPR_OCR::RunOCROnPlate(const cv::Mat& plateROI, const std::string& cameraId) {
|
||
if (!_ocrEngine || plateROI.empty()) return "";
|
||
if (plateROI.cols < 10 || plateROI.rows < 10) return "";
|
||
|
||
try {
|
||
// Run the full ANSONNXOCR pipeline on the cropped plate image
|
||
std::vector<OCRObject> ocrResults = _ocrEngine->RunInference(plateROI, cameraId);
|
||
|
||
if (ocrResults.empty()) return "";
|
||
|
||
// If ALPR mode is active and we have plate formats, use the
|
||
// structured ALPR post-processing to get correct zone ordering
|
||
// (e.g. "品川 302 ま 93-15" instead of "品川30293-15ま")
|
||
const auto& alprFormats = _ocrEngine->GetALPRFormats();
|
||
if (_ocrEngine->GetOCRMode() == OCRMode::OCR_ALPR && !alprFormats.empty()) {
|
||
auto alprResults = ANSOCRUtility::ALPRPostProcessing(
|
||
ocrResults, alprFormats,
|
||
plateROI.cols, plateROI.rows,
|
||
_ocrEngine.get(), plateROI);
|
||
|
||
if (!alprResults.empty()) {
|
||
return alprResults[0].fullPlateText;
|
||
}
|
||
}
|
||
|
||
// Fallback: simple concatenation sorted by Y then X
|
||
std::sort(ocrResults.begin(), ocrResults.end(),
|
||
[](const OCRObject& a, const OCRObject& b) {
|
||
int rowThreshold = std::min(a.box.height, b.box.height) / 2;
|
||
if (std::abs(a.box.y - b.box.y) > rowThreshold) {
|
||
return a.box.y < b.box.y;
|
||
}
|
||
return a.box.x < b.box.x;
|
||
}
|
||
);
|
||
|
||
std::string fullText;
|
||
for (const auto& obj : ocrResults) {
|
||
if (!obj.className.empty()) {
|
||
fullText += obj.className;
|
||
}
|
||
}
|
||
|
||
return fullText;
|
||
}
|
||
catch (const std::exception& e) {
|
||
this->_logger.LogError("ANSALPR_OCR::RunOCROnPlate", e.what(), __FILE__, __LINE__);
|
||
return "";
|
||
}
|
||
}
|
||
|
||
// ── Main inference pipeline ──────────────────────────────────────────
|
||
std::vector<Object> ANSALPR_OCR::RunInference(const cv::Mat& input, const std::string& cameraId) {
|
||
if (!_licenseValid) {
|
||
this->_logger.LogError("ANSALPR_OCR::RunInference", "Invalid license", __FILE__, __LINE__);
|
||
return {};
|
||
}
|
||
if (!_isInitialized) {
|
||
this->_logger.LogError("ANSALPR_OCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||
return {};
|
||
}
|
||
if (input.empty() || input.cols < 5 || input.rows < 5) {
|
||
this->_logger.LogError("ANSALPR_OCR::RunInference", "Input image is empty or too small", __FILE__, __LINE__);
|
||
return {};
|
||
}
|
||
if (!_lpDetector) {
|
||
this->_logger.LogFatal("ANSALPR_OCR::RunInference", "_lpDetector is null", __FILE__, __LINE__);
|
||
return {};
|
||
}
|
||
if (!_ocrEngine) {
|
||
this->_logger.LogFatal("ANSALPR_OCR::RunInference", "_ocrEngine is null", __FILE__, __LINE__);
|
||
return {};
|
||
}
|
||
|
||
try {
|
||
// Convert grayscale to BGR if necessary
|
||
cv::Mat localFrame;
|
||
if (input.channels() == 1) {
|
||
cv::cvtColor(input, localFrame, cv::COLOR_GRAY2BGR);
|
||
}
|
||
const cv::Mat& frame = (input.channels() == 1) ? localFrame : input;
|
||
|
||
const int frameWidth = frame.cols;
|
||
const int frameHeight = frame.rows;
|
||
|
||
// Step 1: Detect license plates
|
||
std::vector<Object> lprOutput = _lpDetector->RunInference(frame, cameraId);
|
||
|
||
if (lprOutput.empty()) {
|
||
return {};
|
||
}
|
||
|
||
// Step 2: Collect crops from every valid plate. Wide plates
|
||
// (aspect >= 2.1) are treated as a single text line; narrow
|
||
// plates (2-row layouts like Japanese) are split horizontally
|
||
// at H/2 into top and bottom rows. All crops go through a
|
||
// single batched recognizer call, bypassing the OCR text-line
|
||
// detector entirely — for ALPR the LP YOLO box already bounds
|
||
// the text region precisely.
|
||
//
|
||
// Per-plate preprocessing pipeline:
|
||
// 1. Pad the YOLO LP bbox by 5% on each side so the plate
|
||
// border is visible to the rectifier and edge characters
|
||
// aren't clipped by a tight detector output.
|
||
// 2. Try classical perspective rectification (Canny +
|
||
// findContours + approxPolyDP + warpPerspective) to
|
||
// straighten tilted / skewed plates. Falls back to the
|
||
// padded axis-aligned crop on failure — no regression.
|
||
// 3. Run the 2-row split heuristic on whichever plate image
|
||
// we ended up with, using an aspect threshold of 2.1 so
|
||
// perfect-2:1 rectified Japanese plates still split.
|
||
//
|
||
// Rectification is gated on _country == JAPAN at runtime.
|
||
// For all other countries we skip the classical-CV pipeline
|
||
// entirely and use the plain padded axis-aligned crop — this
|
||
// keeps non-Japan inference on the original fast path and
|
||
// lets SetCountry(nonJapan) take effect on the very next
|
||
// frame without a restart.
|
||
const bool useRectification = (_country == Country::JAPAN);
|
||
struct PlateInfo {
|
||
size_t origIndex; // into lprOutput
|
||
std::vector<size_t> cropIndices; // into allCrops
|
||
cv::Mat plateROI; // full (unsplit) ROI, kept for colour + kana recovery
|
||
int halfH = 0; // row-split Y inside plateROI (0 = single row)
|
||
};
|
||
std::vector<cv::Mat> allCrops;
|
||
std::vector<PlateInfo> plateInfos;
|
||
allCrops.reserve(lprOutput.size() * 2);
|
||
plateInfos.reserve(lprOutput.size());
|
||
|
||
for (size_t i = 0; i < lprOutput.size(); ++i) {
|
||
const cv::Rect& box = lprOutput[i].box;
|
||
|
||
// Calculate safe cropped region
|
||
const int x1 = std::max(0, box.x);
|
||
const int y1 = std::max(0, box.y);
|
||
const int width = std::min(frameWidth - x1, box.width);
|
||
const int height = std::min(frameHeight - y1, box.height);
|
||
if (width <= 0 || height <= 0) continue;
|
||
|
||
// Pad the YOLO LP bbox by 5% on each side. Gives the
|
||
// rectifier some background for edge detection and helps
|
||
// when the detector cropped a character edge.
|
||
const int padX = std::max(2, width * 5 / 100);
|
||
const int padY = std::max(2, height * 5 / 100);
|
||
const int px = std::max(0, x1 - padX);
|
||
const int py = std::max(0, y1 - padY);
|
||
const int pw = std::min(frameWidth - px, width + 2 * padX);
|
||
const int ph = std::min(frameHeight - py, height + 2 * padY);
|
||
const cv::Rect paddedBox(px, py, pw, ph);
|
||
|
||
// Perspective rectification is Japan-only to preserve
|
||
// baseline latency on all other countries. On non-Japan
|
||
// plates we go straight to the padded axis-aligned crop.
|
||
cv::Mat plateROI;
|
||
if (useRectification) {
|
||
cv::Mat rectified;
|
||
if (RectifyPlateROI(frame, paddedBox, rectified)) {
|
||
plateROI = rectified; // owning 3-channel BGR
|
||
} else {
|
||
plateROI = frame(paddedBox); // non-owning view
|
||
}
|
||
} else {
|
||
plateROI = frame(paddedBox); // non-owning view
|
||
}
|
||
|
||
PlateInfo info;
|
||
info.origIndex = i;
|
||
info.plateROI = plateROI;
|
||
|
||
const int plateW = plateROI.cols;
|
||
const int plateH = plateROI.rows;
|
||
const float aspect = static_cast<float>(plateW) /
|
||
std::max(1, plateH);
|
||
|
||
// 2-row heuristic: aspect < 2.1 → split top/bottom.
|
||
// Bumped from 2.0 so a perfectly rectified Japanese plate
|
||
// (aspect == 2.0) still splits correctly despite floating-
|
||
// point rounding. Threshold still excludes wide EU/VN
|
||
// plates (aspect 3.0+).
|
||
if (aspect < 2.1f && plateH >= 24) {
|
||
const int halfH = plateH / 2;
|
||
info.halfH = halfH;
|
||
info.cropIndices.push_back(allCrops.size());
|
||
allCrops.push_back(plateROI(cv::Rect(0, 0, plateW, halfH)));
|
||
info.cropIndices.push_back(allCrops.size());
|
||
allCrops.push_back(plateROI(cv::Rect(0, halfH, plateW, plateH - halfH)));
|
||
}
|
||
else {
|
||
info.halfH = 0;
|
||
info.cropIndices.push_back(allCrops.size());
|
||
allCrops.push_back(plateROI);
|
||
}
|
||
|
||
plateInfos.push_back(std::move(info));
|
||
}
|
||
|
||
if (allCrops.empty()) {
|
||
return {};
|
||
}
|
||
|
||
// Step 3: Single batched recognizer call for every crop.
|
||
// ONNXOCRRecognizer groups crops by bucket width and issues
|
||
// one ORT Run per bucket — typically 1–2 GPU calls for an
|
||
// entire frame regardless of plate count.
|
||
auto ocrResults = _ocrEngine->RecognizeTextBatch(allCrops);
|
||
|
||
// Step 4: Assemble per-plate output
|
||
std::vector<Object> output;
|
||
output.reserve(plateInfos.size());
|
||
|
||
// Decide once per frame whether the tracker-based correction
|
||
// layer should run. We auto-detect full-frame vs pipeline mode
|
||
// by watching for pixel-identical consecutive frames, exactly
|
||
// the same way ANSALPR_OD does it.
|
||
const bool useChecker = shouldUseALPRChecker(
|
||
cv::Size(frameWidth, frameHeight), cameraId);
|
||
|
||
for (const auto& info : plateInfos) {
|
||
// Reassemble row-by-row so we can target the bottom row
|
||
// for kana recovery when the fast path silently dropped
|
||
// the hiragana on a Japanese 2-row plate.
|
||
std::string topText, bottomText;
|
||
if (info.cropIndices.size() == 2) {
|
||
if (info.cropIndices[0] < ocrResults.size())
|
||
topText = ocrResults[info.cropIndices[0]].first;
|
||
if (info.cropIndices[1] < ocrResults.size())
|
||
bottomText = ocrResults[info.cropIndices[1]].first;
|
||
} else if (!info.cropIndices.empty() &&
|
||
info.cropIndices[0] < ocrResults.size()) {
|
||
topText = ocrResults[info.cropIndices[0]].first;
|
||
}
|
||
|
||
// Strip screw/rivet artifacts (°, ○, etc.) picked up from
|
||
// plate fasteners before any downstream processing. Runs
|
||
// on every row regardless of country — these glyphs are
|
||
// never legitimate plate content anywhere.
|
||
topText = StripPlateArtifacts(topText);
|
||
bottomText = StripPlateArtifacts(bottomText);
|
||
|
||
std::string combinedText = topText;
|
||
if (!bottomText.empty()) {
|
||
if (!combinedText.empty()) combinedText += " ";
|
||
combinedText += bottomText;
|
||
}
|
||
|
||
// Japan-only kana recovery: if the fast-path output is
|
||
// missing hiragana/katakana, re-crop the kana region and
|
||
// run the recognizer on just that tile. Clean plates
|
||
// pass the IsJapaneseIncomplete check and skip this
|
||
// block entirely — zero cost.
|
||
if (_country == Country::JAPAN && info.halfH > 0 &&
|
||
IsJapaneseIncomplete(combinedText)) {
|
||
ANS_DBG("ALPR_Kana",
|
||
"RunInference: firing recovery on plate '%s' "
|
||
"(plateROI=%dx%d halfH=%d)",
|
||
combinedText.c_str(),
|
||
info.plateROI.cols, info.plateROI.rows,
|
||
info.halfH);
|
||
std::string recovered = StripPlateArtifacts(
|
||
RecoverKanaFromBottomHalf(info.plateROI, info.halfH));
|
||
if (!recovered.empty()) {
|
||
// Prepend the recovered kana to the bottom row
|
||
// text so the final combined string reads
|
||
// "region classification kana designation".
|
||
if (bottomText.empty()) {
|
||
bottomText = recovered;
|
||
} else {
|
||
bottomText = recovered + " " + bottomText;
|
||
}
|
||
combinedText = topText;
|
||
if (!bottomText.empty()) {
|
||
if (!combinedText.empty()) combinedText += " ";
|
||
combinedText += bottomText;
|
||
}
|
||
ANS_DBG("ALPR_Kana",
|
||
"RunInference: spliced result '%s'",
|
||
combinedText.c_str());
|
||
}
|
||
}
|
||
|
||
if (combinedText.empty()) continue;
|
||
|
||
Object lprObject = lprOutput[info.origIndex];
|
||
lprObject.cameraId = cameraId;
|
||
|
||
// Cross-frame stabilization: per-track majority vote in
|
||
// full-frame mode, raw OCR text in pipeline mode.
|
||
if (useChecker) {
|
||
lprObject.className = alprChecker.checkPlateByTrackId(
|
||
cameraId, combinedText, lprObject.trackId);
|
||
}
|
||
else {
|
||
lprObject.className = combinedText;
|
||
}
|
||
|
||
if (lprObject.className.empty()) continue;
|
||
|
||
// Optional colour detection on the full plate ROI
|
||
std::string colour = DetectLPColourCached(
|
||
info.plateROI, cameraId, lprObject.className);
|
||
if (!colour.empty()) {
|
||
lprObject.extraInfo = "color:" + colour;
|
||
}
|
||
|
||
output.push_back(std::move(lprObject));
|
||
}
|
||
|
||
// Spatial dedup: if two detections in the same frame ended up
|
||
// with the same plate text, keep only the one whose spatial
|
||
// history has the higher accumulated confidence. Skip this in
|
||
// pipeline mode because there's only ever one plate per call.
|
||
if (useChecker) {
|
||
ensureUniquePlateText(output, cameraId);
|
||
}
|
||
|
||
return output;
|
||
}
|
||
catch (const cv::Exception& e) {
|
||
this->_logger.LogFatal("ANSALPR_OCR::RunInference", std::string("OpenCV Exception: ") + e.what(), __FILE__, __LINE__);
|
||
}
|
||
catch (const std::exception& e) {
|
||
this->_logger.LogFatal("ANSALPR_OCR::RunInference", e.what(), __FILE__, __LINE__);
|
||
}
|
||
catch (...) {
|
||
this->_logger.LogFatal("ANSALPR_OCR::RunInference", "Unknown exception occurred", __FILE__, __LINE__);
|
||
}
|
||
|
||
return {};
|
||
}
|
||
|
||
// ── Stateless batched inference for pipeline mode ───────────────────
|
||
// Caller supplies a full frame + a list of vehicle ROIs in FRAME
|
||
// coordinates. We run ONE LP-detect call across all vehicle crops and
|
||
// ONE text-recognizer call across every resulting plate (with the same
|
||
// 2-row split heuristic as ANSALPR_OCR::RunInference), and NO tracker,
|
||
// voting, spatial dedup, or per-camera accumulating state. This is the
|
||
// drop-in replacement for the per-bbox loop inside
|
||
// ANSALPR_RunInferencesComplete_LV (pipeline mode) and is exported as
|
||
// ANSALPR_RunInferencesBatch_LV / _V2 in dllmain.cpp. Calling this on
|
||
// ANSALPR_OCR avoids the ORT/TRT per-shape allocator churn that
|
||
// causes unbounded memory growth when the loop version is used.
|
||
std::vector<Object> ANSALPR_OCR::RunInferencesBatch(
|
||
const cv::Mat& input,
|
||
const std::vector<cv::Rect>& vehicleBoxes,
|
||
const std::string& cameraId)
|
||
{
|
||
if (!_licenseValid) {
|
||
this->_logger.LogError("ANSALPR_OCR::RunInferencesBatch", "Invalid license", __FILE__, __LINE__);
|
||
return {};
|
||
}
|
||
if (!_isInitialized) {
|
||
this->_logger.LogError("ANSALPR_OCR::RunInferencesBatch", "Model is not initialized", __FILE__, __LINE__);
|
||
return {};
|
||
}
|
||
if (input.empty() || input.cols < 5 || input.rows < 5) return {};
|
||
if (!_lpDetector) {
|
||
this->_logger.LogFatal("ANSALPR_OCR::RunInferencesBatch", "_lpDetector is null", __FILE__, __LINE__);
|
||
return {};
|
||
}
|
||
if (!_ocrEngine) {
|
||
this->_logger.LogFatal("ANSALPR_OCR::RunInferencesBatch", "_ocrEngine is null", __FILE__, __LINE__);
|
||
return {};
|
||
}
|
||
if (vehicleBoxes.empty()) return {};
|
||
|
||
try {
|
||
// Promote grayscale input to BGR once (matches RunInference).
|
||
cv::Mat localFrame;
|
||
if (input.channels() == 1) {
|
||
cv::cvtColor(input, localFrame, cv::COLOR_GRAY2BGR);
|
||
}
|
||
const cv::Mat& frame = (input.channels() == 1) ? localFrame : input;
|
||
|
||
// ── 1. Clamp and crop vehicle ROIs ────────────────────────
|
||
const cv::Rect frameRect(0, 0, frame.cols, frame.rows);
|
||
std::vector<cv::Mat> vehicleCrops;
|
||
std::vector<cv::Rect> clamped;
|
||
vehicleCrops.reserve(vehicleBoxes.size());
|
||
clamped.reserve(vehicleBoxes.size());
|
||
for (const auto& r : vehicleBoxes) {
|
||
cv::Rect c = r & frameRect;
|
||
if (c.width <= 5 || c.height <= 5) continue;
|
||
vehicleCrops.emplace_back(frame(c));
|
||
clamped.push_back(c);
|
||
}
|
||
if (vehicleCrops.empty()) return {};
|
||
|
||
// ── 2. ONE batched LP detection call across all vehicles ──
|
||
std::vector<std::vector<Object>> lpBatch =
|
||
_lpDetector->RunInferencesBatch(vehicleCrops, cameraId);
|
||
|
||
// ── 3. Flatten plates, applying preprocessing per plate ───
|
||
// For each detected plate we:
|
||
// 1. Pad the LP bbox by 5% so the rectifier sees the
|
||
// plate border and tight detector crops don't clip
|
||
// edge characters.
|
||
// 2. If country == JAPAN, try classical perspective
|
||
// rectification — if it succeeds the plateROI is a
|
||
// tight, straightened 2D warp of the real plate; if
|
||
// it fails we fall back to the padded axis-aligned
|
||
// crop. For non-Japan countries we skip rectification
|
||
// entirely to preserve baseline latency.
|
||
// 3. Apply the same 2-row split heuristic as RunInference
|
||
// (aspect < 2.1 → split top/bottom).
|
||
// The halfH field lets the assembly loop call the kana
|
||
// recovery helper with the correct row-split boundary.
|
||
const bool useRectification = (_country == Country::JAPAN);
|
||
struct PlateMeta {
|
||
size_t vehIdx; // index into vehicleCrops / clamped
|
||
Object lpObj; // LP detection in VEHICLE-local coords
|
||
cv::Mat plateROI; // full plate crop (kept for colour + kana recovery)
|
||
int halfH = 0; // row-split Y inside plateROI (0 = single row)
|
||
std::vector<size_t> cropIndices; // indices into allCrops below
|
||
};
|
||
std::vector<cv::Mat> allCrops;
|
||
std::vector<PlateMeta> metas;
|
||
allCrops.reserve(lpBatch.size() * 2);
|
||
metas.reserve(lpBatch.size());
|
||
for (size_t v = 0; v < lpBatch.size() && v < vehicleCrops.size(); ++v) {
|
||
const cv::Mat& veh = vehicleCrops[v];
|
||
const cv::Rect vehRect(0, 0, veh.cols, veh.rows);
|
||
for (const auto& lp : lpBatch[v]) {
|
||
cv::Rect lpBox = lp.box & vehRect;
|
||
if (lpBox.width <= 0 || lpBox.height <= 0) continue;
|
||
|
||
// Pad by 5% on each side for the rectifier.
|
||
const int padX = std::max(2, lpBox.width * 5 / 100);
|
||
const int padY = std::max(2, lpBox.height * 5 / 100);
|
||
cv::Rect paddedBox(
|
||
lpBox.x - padX, lpBox.y - padY,
|
||
lpBox.width + 2 * padX,
|
||
lpBox.height + 2 * padY);
|
||
paddedBox &= vehRect;
|
||
if (paddedBox.width <= 0 || paddedBox.height <= 0) continue;
|
||
|
||
// Perspective rectification is Japan-only to preserve
|
||
// baseline latency on all other countries.
|
||
cv::Mat plateROI;
|
||
if (useRectification) {
|
||
cv::Mat rectified;
|
||
if (RectifyPlateROI(veh, paddedBox, rectified)) {
|
||
plateROI = rectified; // owning canonical
|
||
} else {
|
||
plateROI = veh(paddedBox); // non-owning view
|
||
}
|
||
} else {
|
||
plateROI = veh(paddedBox); // non-owning view
|
||
}
|
||
|
||
PlateMeta pm;
|
||
pm.vehIdx = v;
|
||
pm.lpObj = lp;
|
||
pm.plateROI = plateROI;
|
||
|
||
const int plateW = plateROI.cols;
|
||
const int plateH = plateROI.rows;
|
||
const float aspect =
|
||
static_cast<float>(plateW) / std::max(1, plateH);
|
||
if (aspect < 2.1f && plateH >= 24) {
|
||
const int halfH = plateH / 2;
|
||
pm.halfH = halfH;
|
||
pm.cropIndices.push_back(allCrops.size());
|
||
allCrops.push_back(plateROI(cv::Rect(0, 0, plateW, halfH)));
|
||
pm.cropIndices.push_back(allCrops.size());
|
||
allCrops.push_back(plateROI(cv::Rect(0, halfH, plateW, plateH - halfH)));
|
||
} else {
|
||
pm.halfH = 0;
|
||
pm.cropIndices.push_back(allCrops.size());
|
||
allCrops.push_back(plateROI);
|
||
}
|
||
metas.push_back(std::move(pm));
|
||
}
|
||
}
|
||
if (allCrops.empty()) return {};
|
||
|
||
// ── 4. ONE batched recognizer call across every plate ────
|
||
// ONNXOCRRecognizer buckets by width internally, so this is
|
||
// typically 1-2 ORT Runs regardless of plate count.
|
||
auto ocrResults = _ocrEngine->RecognizeTextBatch(allCrops);
|
||
|
||
// ── 5. Assemble — NO tracker, NO voting, NO dedup ────────
|
||
std::vector<Object> output;
|
||
output.reserve(metas.size());
|
||
for (const auto& pm : metas) {
|
||
// Reassemble row-by-row so Japan kana recovery can splice
|
||
// the recovered hiragana into the bottom row specifically.
|
||
std::string topText, bottomText;
|
||
if (pm.cropIndices.size() == 2) {
|
||
if (pm.cropIndices[0] < ocrResults.size())
|
||
topText = ocrResults[pm.cropIndices[0]].first;
|
||
if (pm.cropIndices[1] < ocrResults.size())
|
||
bottomText = ocrResults[pm.cropIndices[1]].first;
|
||
} else if (!pm.cropIndices.empty() &&
|
||
pm.cropIndices[0] < ocrResults.size()) {
|
||
topText = ocrResults[pm.cropIndices[0]].first;
|
||
}
|
||
|
||
// Strip screw/rivet artifacts (°, ○, etc.) picked up from
|
||
// plate fasteners before any downstream processing.
|
||
topText = StripPlateArtifacts(topText);
|
||
bottomText = StripPlateArtifacts(bottomText);
|
||
|
||
std::string combined = topText;
|
||
if (!bottomText.empty()) {
|
||
if (!combined.empty()) combined += " ";
|
||
combined += bottomText;
|
||
}
|
||
|
||
// Japan-only kana recovery fast-path fallback. Zero cost
|
||
// on clean plates (gated by country and by UTF-8 codepoint
|
||
// class count — clean plates return early).
|
||
if (_country == Country::JAPAN && pm.halfH > 0 &&
|
||
IsJapaneseIncomplete(combined)) {
|
||
ANS_DBG("ALPR_Kana",
|
||
"RunInferencesBatch: firing recovery on plate "
|
||
"'%s' (plateROI=%dx%d halfH=%d)",
|
||
combined.c_str(),
|
||
pm.plateROI.cols, pm.plateROI.rows, pm.halfH);
|
||
std::string recovered = StripPlateArtifacts(
|
||
RecoverKanaFromBottomHalf(pm.plateROI, pm.halfH));
|
||
if (!recovered.empty()) {
|
||
if (bottomText.empty()) {
|
||
bottomText = recovered;
|
||
} else {
|
||
bottomText = recovered + " " + bottomText;
|
||
}
|
||
combined = topText;
|
||
if (!bottomText.empty()) {
|
||
if (!combined.empty()) combined += " ";
|
||
combined += bottomText;
|
||
}
|
||
ANS_DBG("ALPR_Kana",
|
||
"RunInferencesBatch: spliced result '%s'",
|
||
combined.c_str());
|
||
}
|
||
}
|
||
|
||
if (combined.empty()) continue;
|
||
|
||
Object out = pm.lpObj;
|
||
out.className = combined; // raw OCR — no ALPRChecker
|
||
out.cameraId = cameraId;
|
||
out.box.x += clamped[pm.vehIdx].x;
|
||
out.box.y += clamped[pm.vehIdx].y;
|
||
|
||
// Colour lookup — text-keyed cache, bounded.
|
||
std::string colour = DetectLPColourCached(
|
||
pm.plateROI, cameraId, out.className);
|
||
if (!colour.empty()) out.extraInfo = "color:" + colour;
|
||
|
||
output.push_back(std::move(out));
|
||
}
|
||
return output;
|
||
}
|
||
catch (const cv::Exception& e) {
|
||
this->_logger.LogFatal("ANSALPR_OCR::RunInferencesBatch",
|
||
std::string("OpenCV Exception: ") + e.what(), __FILE__, __LINE__);
|
||
}
|
||
catch (const std::exception& e) {
|
||
this->_logger.LogFatal("ANSALPR_OCR::RunInferencesBatch",
|
||
e.what(), __FILE__, __LINE__);
|
||
}
|
||
catch (...) {
|
||
this->_logger.LogFatal("ANSALPR_OCR::RunInferencesBatch",
|
||
"Unknown exception occurred", __FILE__, __LINE__);
|
||
}
|
||
return {};
|
||
}
|
||
|
||
// ── Inference wrappers ───────────────────────────────────────────────
|
||
bool ANSALPR_OCR::Inference(const cv::Mat& input, std::string& lprResult) {
|
||
if (input.empty()) return false;
|
||
if (input.cols < 5 || input.rows < 5) return false;
|
||
return Inference(input, lprResult, "CustomCam");
|
||
}
|
||
|
||
bool ANSALPR_OCR::Inference(const cv::Mat& input, std::string& lprResult, const std::string& cameraId) {
|
||
if (input.empty()) return false;
|
||
if (input.cols < 5 || input.rows < 5) return false;
|
||
|
||
try {
|
||
std::vector<Object> results = RunInference(input, cameraId);
|
||
lprResult = VectorDetectionToJsonString(results);
|
||
return !results.empty();
|
||
}
|
||
catch (...) {
|
||
return false;
|
||
}
|
||
}
|
||
|
||
bool ANSALPR_OCR::Inference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, std::string& lprResult) {
|
||
return Inference(input, Bbox, lprResult, "CustomCam");
|
||
}
|
||
|
||
bool ANSALPR_OCR::Inference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, std::string& lprResult, const std::string& cameraId) {
|
||
if (input.empty()) return false;
|
||
if (input.cols < 5 || input.rows < 5) return false;
|
||
|
||
try {
|
||
if (Bbox.empty()) {
|
||
return Inference(input, lprResult, cameraId);
|
||
}
|
||
|
||
// For cropped images, run OCR on each bounding box
|
||
std::vector<Object> allResults;
|
||
cv::Mat frame;
|
||
if (input.channels() == 1) {
|
||
cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR);
|
||
} else {
|
||
frame = input;
|
||
}
|
||
|
||
for (const auto& bbox : Bbox) {
|
||
int x1 = std::max(0, bbox.x);
|
||
int y1 = std::max(0, bbox.y);
|
||
int w = std::min(frame.cols - x1, bbox.width);
|
||
int h = std::min(frame.rows - y1, bbox.height);
|
||
|
||
if (w < 5 || h < 5) continue;
|
||
|
||
cv::Rect safeRect(x1, y1, w, h);
|
||
cv::Mat cropped = frame(safeRect);
|
||
|
||
std::vector<Object> results = RunInference(cropped, cameraId);
|
||
|
||
// Adjust bounding boxes back to full image coordinates
|
||
for (auto& obj : results) {
|
||
obj.box.x += x1;
|
||
obj.box.y += y1;
|
||
allResults.push_back(std::move(obj));
|
||
}
|
||
}
|
||
|
||
lprResult = VectorDetectionToJsonString(allResults);
|
||
return !allResults.empty();
|
||
}
|
||
catch (...) {
|
||
return false;
|
||
}
|
||
}
|
||
|
||
void ANSALPR_OCR::SetCountry(Country country) {
|
||
const Country previous = _country;
|
||
_country = country;
|
||
if (_ocrEngine) {
|
||
_ocrEngine->SetCountry(country);
|
||
}
|
||
// Log every SetCountry call so runtime country switches are
|
||
// visible and we can confirm the update landed on the right
|
||
// handle. The recovery + rectification gates read _country on
|
||
// every frame, so this change takes effect on the very next
|
||
// RunInference / RunInferencesBatch call — no restart needed.
|
||
ANS_DBG("ALPR_SetCountry",
|
||
"country changed %d -> %d (Japan=%d, Vietnam=%d, "
|
||
"China=%d, Australia=%d, USA=%d, Indonesia=%d) — "
|
||
"rectification+recovery gates update on next frame",
|
||
static_cast<int>(previous),
|
||
static_cast<int>(country),
|
||
static_cast<int>(Country::JAPAN),
|
||
static_cast<int>(Country::VIETNAM),
|
||
static_cast<int>(Country::CHINA),
|
||
static_cast<int>(Country::AUSTRALIA),
|
||
static_cast<int>(Country::USA),
|
||
static_cast<int>(Country::INDONESIA));
|
||
}
|
||
|
||
bool ANSALPR_OCR::Destroy() {
|
||
try {
|
||
if (_lpDetector) {
|
||
_lpDetector->Destroy();
|
||
_lpDetector.reset();
|
||
}
|
||
if (_lpColourDetector) {
|
||
_lpColourDetector->Destroy();
|
||
_lpColourDetector.reset();
|
||
}
|
||
if (_ocrEngine) {
|
||
_ocrEngine->Destroy();
|
||
_ocrEngine.reset();
|
||
}
|
||
_isInitialized = false;
|
||
return true;
|
||
}
|
||
catch (std::exception& e) {
|
||
this->_logger.LogFatal("ANSALPR_OCR::Destroy", e.what(), __FILE__, __LINE__);
|
||
return false;
|
||
}
|
||
}
|
||
|
||
} // namespace ANSCENTER
|