Files
ANSCORE/modules/ANSLPR/ANSLPR_OD.cpp
2026-04-01 17:01:05 +11:00

2858 lines
97 KiB
C++

#include "ANSLPR_OD.h"
#include "ANSTENSORRTOD.h"
#include "ANSYOLOOD.h"
#include "ANSTENSORRTCL.h"
#include "ANSOPENVINOCL.h"
#include "ANSRTYOLO.h"
#include "ANSONNXYOLO.h"
#include "ANSGpuFrameRegistry.h"
#include <json.hpp>
#include <future>
#include <thread>
#include <chrono>
#include <algorithm>
#include <unordered_map>
// ---------------------------------------------------------------------------
// Check ONNX model opset version by reading the protobuf header directly.
// No dependency on onnx_pb.h / libprotobuf — just reads the raw bytes.
// Returns 0 on failure. TRT 10.x supports up to opset ~17.
// ---------------------------------------------------------------------------
/// Read the default-domain opset version from an ONNX protobuf file.
/// Returns 0 on failure (treat as "unknown — try TRT").
///
/// ONNX protobuf layout (ModelProto):
/// field 8 (opset_import) = repeated OperatorSetIdProto {
/// field 1 (domain) = string (empty for default domain)
/// field 2 (version) = int64
/// }
///
/// We scan the last 4 KB of the file because ONNX protobuf places
/// opset_import (field 8) AFTER the graph (field 7), which can be
/// hundreds of megabytes. The opset_import entry is always a tiny
/// submessage (2-20 bytes) near the end of the file.
/// Falls back to scanning the first 8 KB for older ONNX formats.
static int GetOnnxOpsetVersion(const std::string& onnxPath) {
std::ifstream f(onnxPath, std::ios::binary | std::ios::ate);
if (!f.good()) return 0;
auto fileSize = f.tellg();
if (fileSize < 16) return 0;
// Helper lambda: scan a buffer for opset_import submessages
auto scanForOpset = [](const unsigned char* buf, int bytesRead) -> int {
int maxDefaultOpset = 0;
for (int i = 0; i < bytesRead - 2; ++i) {
if (buf[i] != 0x42) continue;
int subLen = 0, lenBytes = 0;
for (int b = i + 1; b < bytesRead && b < i + 4; ++b) {
subLen |= (buf[b] & 0x7F) << (7 * lenBytes);
lenBytes++;
if ((buf[b] & 0x80) == 0) break;
}
if (subLen < 2 || subLen > 60) continue;
int subStart = i + 1 + lenBytes;
int subEnd = subStart + subLen;
if (subEnd > bytesRead) continue;
bool hasNonEmptyDomain = false;
int version = 0;
int pos = subStart;
while (pos < subEnd) {
unsigned char tag = buf[pos++];
int fieldNum = tag >> 3;
int wireType = tag & 0x07;
if (fieldNum == 1 && wireType == 2) {
if (pos >= subEnd) break;
int strLen = buf[pos++];
if (strLen > 0) hasNonEmptyDomain = true;
pos += strLen;
} else if (fieldNum == 2 && wireType == 0) {
version = 0; int shift = 0;
while (pos < subEnd) {
unsigned char vb = buf[pos++];
version |= (vb & 0x7F) << shift;
shift += 7;
if ((vb & 0x80) == 0) break;
}
} else { break; }
}
if (!hasNonEmptyDomain && version > maxDefaultOpset)
maxDefaultOpset = version;
}
return maxDefaultOpset;
};
// Scan TAIL of file first (where opset_import usually lives)
constexpr int TAIL_SIZE = 4096;
std::streampos tailOffset = 0;
if (fileSize > TAIL_SIZE)
tailOffset = fileSize - static_cast<std::streampos>(TAIL_SIZE);
f.seekg(tailOffset, std::ios::beg);
unsigned char buf[8192];
f.read(reinterpret_cast<char*>(buf), TAIL_SIZE);
int bytesRead = static_cast<int>(f.gcount());
int result = scanForOpset(buf, bytesRead);
if (result > 0) return result;
// Fallback: scan HEAD of file (older ONNX formats)
f.seekg(0, std::ios::beg);
f.read(reinterpret_cast<char*>(buf), 8192);
bytesRead = static_cast<int>(f.gcount());
return scanForOpset(buf, bytesRead);
}
// Write a message to Windows Event Log (Application log, source "ANSLogger").
// Visible in Event Viewer even when no console is attached (e.g. LabVIEW).
static void WriteEventLog(const char* message, WORD eventType = EVENTLOG_INFORMATION_TYPE) {
static HANDLE hLog = RegisterEventSourceA(NULL, "ANSLogger");
if (hLog) {
const char* msgs[1] = { message };
ReportEventA(hLog, eventType, 0, 0, NULL, 1, 0, msgs, NULL);
}
}
// ---------------------------------------------------------------------------
// SEH wrapper for pre-building a single TRT engine from ONNX.
// MSVC forbids __try in functions that use C++ object unwinding,
// so the inner C++ logic lives in PreBuildOneModel_Impl (with try/catch)
// and the outer SEH lives in PreBuildOneModel_SEH (no C++ objects).
// ---------------------------------------------------------------------------
struct PreBuildParams {
const std::string* licenseKey;
const std::string* modelFolder;
std::string modelName;
std::string className;
std::string label;
ANSCENTER::ModelConfig config;
};
static bool PreBuildOneModel_Impl(const PreBuildParams& p) {
try {
auto tempDetector = std::make_unique<ANSCENTER::ANSRTYOLO>();
tempDetector->SetLoadEngineOnCreation(false);
ANSCENTER::ModelConfig cfg = p.config;
cfg.modelType = ANSCENTER::ModelType::RTYOLO;
std::string tempLabels;
bool configured = tempDetector->LoadModelFromFolder(
*p.licenseKey, cfg, p.modelName, p.className, *p.modelFolder, tempLabels);
if (!configured) return false;
// Try FP16 first
std::string optimizedFolder;
bool built = tempDetector->OptimizeModel(true /*fp16*/, optimizedFolder);
// FP16 failed — retry with FP32
// Some ONNX models (especially opset 19+) crash TRT during FP16 tactic selection.
if (!built) {
std::cout << "[ANSALPR] Pre-build: FP16 failed for " << p.label
<< ", retrying with FP32..." << std::endl;
// Recreate detector to get a clean Engine<float> with FP32 precision
tempDetector.reset();
tempDetector = std::make_unique<ANSCENTER::ANSRTYOLO>();
tempDetector->SetLoadEngineOnCreation(false);
tempLabels.clear();
configured = tempDetector->LoadModelFromFolder(
*p.licenseKey, cfg, p.modelName, p.className, *p.modelFolder, tempLabels);
if (configured) {
built = tempDetector->OptimizeModel(false /*fp32*/, optimizedFolder);
if (built) {
std::cout << "[ANSALPR] Pre-build: " << p.label
<< " FP32 fallback succeeded." << std::endl;
}
}
}
tempDetector.reset(); // free VRAM
return built;
}
catch (...) {
return false;
}
}
// FP32-only build — used as fallback when FP16 SEH-crashes
static bool PreBuildOneModel_FP32Only(const PreBuildParams& p) {
try {
auto tempDetector = std::make_unique<ANSCENTER::ANSRTYOLO>();
tempDetector->SetLoadEngineOnCreation(false);
ANSCENTER::ModelConfig cfg = p.config;
cfg.modelType = ANSCENTER::ModelType::RTYOLO;
std::string tempLabels;
bool configured = tempDetector->LoadModelFromFolder(
*p.licenseKey, cfg, p.modelName, p.className, *p.modelFolder, tempLabels);
if (!configured) return false;
std::string optimizedFolder;
bool built = tempDetector->OptimizeModel(false /*fp32*/, optimizedFolder);
tempDetector.reset();
return built;
}
catch (...) { return false; }
}
static bool PreBuildOneModel_FP32Only_SEH(const PreBuildParams& p, DWORD* outCode) {
*outCode = 0;
__try {
return PreBuildOneModel_FP32Only(p);
}
__except (EXCEPTION_EXECUTE_HANDLER) {
*outCode = GetExceptionCode();
return false;
}
}
// Pure SEH wrapper — no C++ objects, no try/catch
// If FP16 SEH-crashes, automatically retries with FP32.
static bool PreBuildOneModel_SEH(const PreBuildParams& p, DWORD* outCode) {
*outCode = 0;
__try {
return PreBuildOneModel_Impl(p);
}
__except (EXCEPTION_EXECUTE_HANDLER) {
*outCode = GetExceptionCode();
}
// FP16 crashed — try FP32 fallback
if (*outCode != 0) {
std::cout << "[ANSALPR] Pre-build: " << p.label
<< " FP16 SEH crash (0x" << std::hex << *outCode << std::dec
<< "), retrying with FP32..." << std::endl;
DWORD fp32Code = 0;
bool fp32Ok = PreBuildOneModel_FP32Only_SEH(p, &fp32Code);
if (fp32Ok) {
std::cout << "[ANSALPR] Pre-build: " << p.label
<< " FP32 fallback succeeded." << std::endl;
*outCode = 0; // clear error — FP32 worked
return true;
}
// FP32 also failed — restore original error code
if (fp32Code != 0) *outCode = fp32Code;
}
return false;
}
// ---------------------------------------------------------------------------
// SEH wrapper for loading the LPC colour model (Step 5).
// ---------------------------------------------------------------------------
struct LoadLpcParams {
const std::string* licenseKey;
ANSCENTER::ModelConfig* config;
const std::string* modelFolder;
std::string* labels;
std::unique_ptr<ANSCENTER::ANSODBase>* detector;
};
static bool LoadLpcModel_Impl(const LoadLpcParams& p) {
try {
p.config->detectionType = ANSCENTER::DetectionType::CLASSIFICATION;
p.config->modelType = ANSCENTER::ModelType::RTYOLO;
auto rtyolo = std::make_unique<ANSCENTER::ANSRTYOLO>();
bool ok = rtyolo->LoadModelFromFolder(
*p.licenseKey, *p.config, "lpc", "lpc.names", *p.modelFolder, *p.labels);
if (!ok) {
return false;
}
rtyolo->SetTracker(ANSCENTER::TrackerType::BYTETRACK, false);
*p.detector = std::move(rtyolo); // upcast ANSRTYOLO -> ANSODBase
return true;
}
catch (...) {
p.detector->reset();
return false;
}
}
static bool LoadLpcModel_SEH(const LoadLpcParams& p, DWORD* outCode) {
*outCode = 0;
__try {
return LoadLpcModel_Impl(p);
}
__except (EXCEPTION_EXECUTE_HANDLER) {
*outCode = GetExceptionCode();
return false;
}
}
//#define FNS_DEBUG
namespace ANSCENTER {
// ---- Tunable constants for license plate recognition ----
constexpr float ROW_SPLIT_MIN_GAP_FACTOR = 0.2f; // maxGap < avgHeight * this => single row
constexpr float ROW_SPLIT_AVGY_FACTOR = 0.4f; // avgY diff must exceed avgHeight * this
constexpr size_t ROW_SPLIT_MIN_GROUP_SIZE = 2; // minimum chars per row
constexpr float DUPLICATE_DIST_THRESHOLD = 5.0f; // pixels: chars closer than this are duplicates
constexpr int DUPLICATE_GRID_SIZE = 10; // spatial hash grid cell size in pixels
constexpr int ASYNC_TIMEOUT_SECONDS = 30; // timeout for async worker threads
ANSALPR_OD::ANSALPR_OD() {
valid = false;
};
ANSALPR_OD::~ANSALPR_OD() {
try {
Destroy();
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSALPR_OD::~ANSALPR_OD", e.what(), __FILE__, __LINE__);
}
};
bool ANSALPR_OD::Destroy() {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
if (this->_ocrDetector) this->_ocrDetector.reset();
if (this->_lpDetector) this->_lpDetector.reset();
if (this->_lpColourDetector) this->_lpColourDetector.reset();
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSALPR_OD::Destroy", e.what(), __FILE__, __LINE__);
return false;
}
};
bool ANSALPR_OD::Initialize(const std::string& licenseKey, const std::string& modelZipFilePath, const std::string& modelZipPassword, double detectorThreshold, double ocrThreshold, double colourThreshold) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
_licenseKey = licenseKey;
_licenseValid = false;
_detectorThreshold = detectorThreshold;
_ocrThreshold = ocrThreshold;
_colorThreshold = colourThreshold;
_country = Country::VIETNAM;
CheckLicense();
if (!_licenseValid) {
this->_logger.LogError("ANSALPR_OD::Initialize.", "License is not valid.", __FILE__, __LINE__);
return false;
}
// Extract model folder
// 0. Check if the modelZipFilePath exist?
if (!FileExist(modelZipFilePath)) {
this->_logger.LogFatal("ANSALPR_OD::Initialize", "Model zip file is not exist", __FILE__, __LINE__);
}
else {
this->_logger.LogInfo("ANSALPR_OD::Initialize. Model zip file found: ", modelZipFilePath, __FILE__, __LINE__);
}
// 1. Unzip model zip file to a special location with folder name as model file (and version)
std::string outputFolder;
std::vector<std::string> passwordArray;
if (!modelZipPassword.empty()) passwordArray.push_back(modelZipPassword);
passwordArray.push_back("AnsDemoModels20@!");
passwordArray.push_back("Sh7O7nUe7vJ/417W0gWX+dSdfcP9hUqtf/fEqJGqxYL3PedvHubJag==");
passwordArray.push_back("3LHxGrjQ7kKDJBD9MX86H96mtKLJaZcTYXrYRdQgW8BKGt7enZHYMg==");
std::string modelName = GetFileNameWithoutExtension(modelZipFilePath);
size_t vectorSize = passwordArray.size();
for (size_t i = 0; i < vectorSize; i++) {
if (ExtractPasswordProtectedZip(modelZipFilePath, passwordArray[i], modelName, _modelFolder, false))
break; // Break the loop when the condition is met.
}
// 2. Check if the outputFolder exist
if (!FolderExist(_modelFolder)) {
this->_logger.LogError("ANSALPR_OD::Initialize. Output model folder is not exist", _modelFolder, __FILE__, __LINE__);
return false; // That means the model file is not exist or the password is not correct
}
// Check country
std::string countryFile = CreateFilePath(_modelFolder, "country.txt");
if (FileExist(countryFile)) {
std::ifstream infile(countryFile);
std::string countryStr;
std::getline(infile, countryStr);
infile.close();
if (countryStr == "0") {
_country = Country::VIETNAM;
_plateFormats.push_back("ddlddddd");
_plateFormats.push_back("ddldddd");
_plateFormats.push_back("ddldddddd");
_plateFormats.push_back("ddllddddd");
_plateFormats.push_back("ddllddddd");
_plateFormats.push_back("ddMDdddddd");
_plateFormats.push_back("dddddNGdd");
_plateFormats.push_back("dddddQTdd");
_plateFormats.push_back("dddddCVdd");
_plateFormats.push_back("dddddNNdd");
_plateFormats.push_back("lldddd");
}
else if (countryStr == "1")
_country = Country::CHINA;
else if (countryStr == "2")
_country = Country::AUSTRALIA;
else if (countryStr == "3")
_country = Country::USA;
else if (countryStr == "4")
_country = Country::INDONESIA;
else {
_country = Country::VIETNAM;// Default
_plateFormats.push_back("ddlddddd");
_plateFormats.push_back("ddldddd");
_plateFormats.push_back("ddldddddd");
_plateFormats.push_back("ddllddddd");
_plateFormats.push_back("ddllddddd");
_plateFormats.push_back("ddMDdddddd");
_plateFormats.push_back("dddddNGdd");
_plateFormats.push_back("dddddQTdd");
_plateFormats.push_back("dddddCVdd");
_plateFormats.push_back("dddddNNdd");
_plateFormats.push_back("lldddd");
}
}
else {
_country = Country::VIETNAM;// Default
_plateFormats.push_back("ddlddddd");
_plateFormats.push_back("ddldddd");
_plateFormats.push_back("ddldddddd");
_plateFormats.push_back("ddllddddd");
_plateFormats.push_back("ddllddddd");
_plateFormats.push_back("ddMDdddddd");
_plateFormats.push_back("dddddNGdd");
_plateFormats.push_back("dddddQTdd");
_plateFormats.push_back("dddddCVdd");
_plateFormats.push_back("dddddNNdd");
_plateFormats.push_back("lldddd");
}
// 3. Load LD and OCR models
alprChecker.Init(MAX_ALPR_FRAME);
_lpColourModelConfig.detectionScoreThreshold = _colorThreshold;
_lpdmodelConfig.detectionScoreThreshold= _detectorThreshold;
_ocrModelConfig.detectionScoreThreshold= _ocrThreshold;
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSALPR_OD::Initialize", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSALPR_OD::LoadEngine() {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
WriteEventLog("ANSALPR_OD::LoadEngine: Step 1 - Starting engine load");
this->_logger.LogInfo("ANSALPR_OD::LoadEngine", "Step 1: Starting engine load", __FILE__, __LINE__);
// Check the hardware type
_lpdmodelConfig.detectionScoreThreshold = _detectorThreshold;
_ocrModelConfig.detectionScoreThreshold = _ocrThreshold;
_lpColourModelConfig.detectionScoreThreshold = _colorThreshold;
if (_lpdmodelConfig.detectionScoreThreshold < 0.25)_lpdmodelConfig.detectionScoreThreshold = 0.25;
if (_ocrModelConfig.detectionScoreThreshold < 0.25)_ocrModelConfig.detectionScoreThreshold = 0.25;
_lpdmodelConfig.modelConfThreshold = 0.5;
_lpdmodelConfig.modelMNSThreshold = 0.5;
_ocrModelConfig.modelConfThreshold = 0.5;
_ocrModelConfig.modelMNSThreshold = 0.5;
_lpColourModelConfig.modelConfThreshold = 0.5;
_lpColourModelConfig.modelMNSThreshold = 0.5;
_lpdmodelConfig.inpHeight = 640;
_lpdmodelConfig.inpWidth = 640;
_ocrModelConfig.inpHeight = 640;
_ocrModelConfig.inpWidth = 640;
_ocrModelConfig.gpuOptBatchSize = 8;
_ocrModelConfig.gpuMaxBatchSize = 32; // desired max; engine builder auto-caps by GPU VRAM
_ocrModelConfig.maxInputHeight = 640;
_ocrModelConfig.maxInputWidth = 640;
_ocrModelConfig.minInputHeight = 640;
_ocrModelConfig.minInputWidth = 640;
_ocrModelConfig.optInputHeight = 640;
_ocrModelConfig.optInputWidth = 640;
_lpColourModelConfig.inpHeight = 224;
_lpColourModelConfig.inpWidth = 224;
_lpColourModelConfig.gpuOptBatchSize = 8;
_lpColourModelConfig.gpuMaxBatchSize = 32; // desired max; engine builder auto-caps by GPU VRAM
_lpColourModelConfig.maxInputHeight = 224;
_lpColourModelConfig.maxInputWidth = 224;
_lpColourModelConfig.minInputHeight = 224;
_lpColourModelConfig.minInputWidth = 224;
_lpColourModelConfig.optInputHeight = 224;
_lpColourModelConfig.optInputWidth = 224;
std::string lprModel = CreateFilePath(_modelFolder, "lpd.onnx");
std::string lprClassesFile = CreateFilePath(_modelFolder, "lpd.names");
std::string ocrModel = CreateFilePath(_modelFolder, "ocr.onnx");
std::string ocrClassesFile = CreateFilePath(_modelFolder, "ocr.names");
std::string colorModel = CreateFilePath(_modelFolder, "lpc.xml");
std::string colorClassesFile = CreateFilePath(_modelFolder, "lpc.names");
WriteEventLog("ANSALPR_OD::LoadEngine: Step 2 - Checking hardware information");
this->_logger.LogInfo("ANSALPR_OD::LoadEngine", "Step 2: Checking hardware information", __FILE__, __LINE__);
engineType = ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();//
WriteEventLog(("ANSALPR_OD::LoadEngine: Step 2 complete - Engine type = " + std::to_string(static_cast<int>(engineType))).c_str());
this->_logger.LogInfo("ANSALPR_OD::LoadEngine", "Step 2 complete: Engine type = " + std::to_string(static_cast<int>(engineType)), __FILE__, __LINE__);
valid = false;
if (_lpDetector) _lpDetector.reset();
if (_ocrDetector) _ocrDetector.reset();
if (_lpColourDetector) _lpColourDetector.reset();
// ================================================================
// PRE-BUILD PASS: Build all TRT engine files before loading any.
//
// ANSALPR loads 3 models sequentially (LPD, OCR, LPC). When cached
// .engine files exist, each one is just deserialized (low VRAM).
// But after a driver/TRT update the caches are invalidated and every
// model must be built from ONNX — a process that requires 2-5x the
// final model size in temporary GPU workspace.
//
// Problem: if model #1 is built AND kept loaded, its VRAM footprint
// reduces the workspace available for building model #2, which can
// cause OOM crashes (LabVIEW error 1097) or very long hangs.
//
// Solution: use ANSRTYOLO::OptimizeModel() to build each .engine
// file in a throwaway instance (buildWithRetry only — no load).
// The instance is destroyed after saving, guaranteeing each build
// gets the full GPU VRAM. The subsequent loading pass finds the
// cached files and only needs the smaller deserialization memory.
// ================================================================
if (engineType == ANSCENTER::EngineType::NVIDIA_GPU) {
if (FileExist(lprModel) && FileExist(ocrModel)) {
// Collect ONNX models that need TRT engine builds.
// Each entry: { modelConfig, onnxModelName, classFileName, label }
struct PreBuildSpec {
ModelConfig config;
std::string modelName; // e.g. "lpd"
std::string className; // e.g. "lpd.names"
std::string label; // for logging
};
std::vector<PreBuildSpec> specs;
{
ModelConfig lpdCfg = _lpdmodelConfig;
lpdCfg.detectionType = DetectionType::DETECTION;
specs.push_back({ lpdCfg, "lpd", "lpd.names", "LPD" });
}
{
ModelConfig ocrCfg = _ocrModelConfig;
ocrCfg.detectionType = DetectionType::DETECTION;
specs.push_back({ ocrCfg, "ocr", "ocr.names", "OCR" });
}
// LPC is optional and may be .xml (OpenVINO), only include if ONNX exists.
// IMPORTANT: TRT 10.x crashes on opset 19+ ONNX models (access violation
// in the ONNX parser that corrupts the CUDA context). Skip TRT pre-build
// for high-opset models — they will fall through to ONNX Runtime at Step 5.
std::string lpcOnnx = CreateFilePath(_modelFolder, "lpc.onnx");
bool lpcSkipTrt = false;
if (FileExist(lpcOnnx) && (_lpColourModelConfig.detectionScoreThreshold > 0)) {
int lpcOpset = GetOnnxOpsetVersion(lpcOnnx);
this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
"Pre-build: LPC ONNX opset detected = " + std::to_string(lpcOpset), __FILE__, __LINE__);
if (lpcOpset > 17 || lpcOpset == 0) {
// opset > 17: TRT crashes on these models
// opset == 0: detection failed, assume high opset (safer than crashing)
lpcSkipTrt = true;
this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
"Pre-build: LPC opset " + std::to_string(lpcOpset) +
" > 17, skipping TRT (will use ONNX Runtime instead)", __FILE__, __LINE__);
WriteEventLog(("ANSALPR_OD::LoadEngine: LPC opset " +
std::to_string(lpcOpset) + " too high for TRT, using ORT").c_str());
} else {
ModelConfig lpcCfg = _lpColourModelConfig;
lpcCfg.detectionType = DetectionType::CLASSIFICATION;
specs.push_back({ lpcCfg, "lpc", "lpc.names", "LPC" });
}
}
// Quick check: do ANY engines need building?
// If all are cached, skip entirely for zero overhead on normal launches.
//
// IMPORTANT: Apply the same GPU-tier batch cap that buildLoadNetwork()
// applies internally. Without this, the probe looks for e.g.
// "ocr.engine...b32" but the actual build saved "ocr.engine...b16"
// (capped by VRAM), causing needless rebuilds every launch.
int gpuMaxBatch = 1;
{
auto gpus = Engine<float>::enumerateDevices();
if (!gpus.empty()) {
const size_t totalMiB = gpus[0].totalMemoryBytes / (1024ULL * 1024);
if (totalMiB >= 15800) gpuMaxBatch = 32; // ~16 GiB+
else if (totalMiB >= 11800) gpuMaxBatch = 16; // ~12 GiB
else if (totalMiB >= 7900) gpuMaxBatch = 8; // ~ 8 GiB (batch=16 OCR exec ctx ~987 MiB, too large for 4 concurrent tasks)
else if (totalMiB >= 3900) gpuMaxBatch = 4; // ~ 4 GiB
else if (totalMiB >= 1900) gpuMaxBatch = 2; // ~ 2 GiB
else gpuMaxBatch = 1;
}
}
bool anyNeedsBuild = false;
for (auto& spec : specs) {
ANSCENTER::Options o;
o.optBatchSize = spec.config.gpuOptBatchSize;
o.maxBatchSize = spec.config.gpuMaxBatchSize;
o.deviceIndex = spec.config.gpuDeviceIndex;
o.maxInputHeight = spec.config.maxInputHeight;
o.minInputHeight = spec.config.minInputHeight;
o.optInputHeight = spec.config.optInputHeight;
o.maxInputWidth = spec.config.maxInputWidth;
o.minInputWidth = spec.config.minInputWidth;
o.optInputWidth = spec.config.optInputWidth;
o.engineFileDir = _modelFolder;
o.precision = ANSCENTER::Precision::FP16;
// Apply GPU-tier batch cap (must match buildLoadNetwork behavior)
if (o.maxBatchSize > gpuMaxBatch) {
o.maxBatchSize = gpuMaxBatch;
o.optBatchSize = std::min(o.optBatchSize, o.maxBatchSize);
}
auto probe = std::make_unique<Engine<float>>(o);
std::string fp16File = probe->serializeEngineOptions(o, CreateFilePath(_modelFolder, spec.modelName + ".onnx"));
o.precision = ANSCENTER::Precision::FP32;
std::string fp32File = probe->serializeEngineOptions(o, CreateFilePath(_modelFolder, spec.modelName + ".onnx"));
probe.reset();
if (!FileExist(fp16File) && !FileExist(fp32File)) {
anyNeedsBuild = true;
this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
"Pre-build: " + spec.label + " engine not cached, build required", __FILE__, __LINE__);
}
}
if (anyNeedsBuild) {
WriteEventLog("ANSALPR_OD::LoadEngine: Pre-build pass starting - optimizing engines one-at-a-time");
this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
"Pre-build pass: optimizing engine files one-at-a-time with full GPU VRAM", __FILE__, __LINE__);
for (auto& spec : specs) {
this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
"Pre-build: Optimizing " + spec.label + " engine...", __FILE__, __LINE__);
WriteEventLog(("ANSALPR_OD::LoadEngine: Pre-build: Optimizing " + spec.label + " engine...").c_str());
PreBuildParams pbp;
pbp.licenseKey = &_licenseKey;
pbp.modelFolder = &_modelFolder;
pbp.modelName = spec.modelName;
pbp.className = spec.className;
pbp.label = spec.label;
pbp.config = spec.config;
DWORD sehCode = 0;
bool built = PreBuildOneModel_SEH(pbp, &sehCode);
if (sehCode != 0) {
char buf[256];
snprintf(buf, sizeof(buf),
"ANSALPR_OD::LoadEngine: Pre-build: %s SEH exception 0x%08X - skipping",
spec.label.c_str(), sehCode);
WriteEventLog(buf, EVENTLOG_ERROR_TYPE);
this->_logger.LogError("ANSALPR_OD::LoadEngine",
"Pre-build: " + spec.label + " SEH crash, skipping", __FILE__, __LINE__);
}
else if (built) {
this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
"Pre-build: " + spec.label + " engine built and cached successfully", __FILE__, __LINE__);
WriteEventLog(("ANSALPR_OD::LoadEngine: Pre-build: " + spec.label + " engine built OK").c_str());
}
else {
this->_logger.LogError("ANSALPR_OD::LoadEngine",
"Pre-build: " + spec.label + " engine build failed (will retry in load pass)", __FILE__, __LINE__);
WriteEventLog(("ANSALPR_OD::LoadEngine: Pre-build: " + spec.label + " build FAILED").c_str(),
EVENTLOG_WARNING_TYPE);
}
}
this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
"Pre-build pass complete. Proceeding to load all engines.", __FILE__, __LINE__);
}
}
}
if (engineType == ANSCENTER::EngineType::NVIDIA_GPU) {
if (FileExist(lprModel) && (FileExist(ocrModel)))
{
WriteEventLog("ANSALPR_OD::LoadEngine: Step 3 - Loading LP detector with TensorRT");
this->_logger.LogInfo("ANSALPR_OD::LoadEngine", "Step 3: Loading LP detector with TensorRT", __FILE__, __LINE__);
_lpdmodelConfig.detectionType = DetectionType::DETECTION;
_lpdmodelConfig.modelType = ModelType::RTYOLO;
_lpDetector = std::make_unique<ANSCENTER::ANSRTYOLO>();// TensorRT
bool lpSuccess = _lpDetector->LoadModelFromFolder(_licenseKey, _lpdmodelConfig, "lpd", "lpd.names", _modelFolder, _lpdLabels);
if (!lpSuccess) {
this->_logger.LogError("ANSALPR_OD::LoadEngine", "Failed to load LP detector (TensorRT). GPU may not support this model.", __FILE__, __LINE__);
_lpDetector.reset();
}
else {
// Enable tracker on LP detector for stable bounding box tracking,
// but disable stabilization (no ghost plates — ALPRChecker handles text stabilization)
_lpDetector->SetTracker(TrackerType::BYTETRACK, true);
_lpDetector->SetStabilization(false);
}
WriteEventLog("ANSALPR_OD::LoadEngine: Step 4 - Loading OCR detector with TensorRT");
this->_logger.LogInfo("ANSALPR_OD::LoadEngine", "Step 4: Loading OCR detector with TensorRT", __FILE__, __LINE__);
_ocrModelConfig.detectionType = DetectionType::DETECTION;
_ocrModelConfig.modelType = ModelType::RTYOLO;
_ocrDetector = std::make_unique<ANSCENTER::ANSRTYOLO>();// TensorRT
bool ocrSuccess = _ocrDetector->LoadModelFromFolder(_licenseKey, _ocrModelConfig, "ocr", "ocr.names", _modelFolder, _ocrLabels);
if (!ocrSuccess) {
this->_logger.LogError("ANSALPR_OD::LoadEngine", "Failed to load OCR detector (TensorRT). GPU may not support this model.", __FILE__, __LINE__);
_ocrDetector.reset();
}
else {
_ocrDetector->SetTracker(TrackerType::BYTETRACK, false);
}
// Check if we need to load the color model (optional — SEH-protected)
if (FileExist(colorModel) && (_lpColourModelConfig.detectionScoreThreshold > 0)) {
// Route decision: use ONNX Runtime for high-opset models that crash TRT
int lpcOpsetCheck = GetOnnxOpsetVersion(CreateFilePath(_modelFolder, "lpc.onnx"));
// opset > 17: TRT crashes on these models
// opset == 0: detection failed, assume high opset (safer than crashing)
bool lpcSkipTrt = (lpcOpsetCheck > 17 || lpcOpsetCheck == 0);
this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
"Step 5: LPC opset detected = " + std::to_string(lpcOpsetCheck) +
", skipTrt = " + (lpcSkipTrt ? std::string("true") : std::string("false")), __FILE__, __LINE__);
if (lpcSkipTrt) {
WriteEventLog("ANSALPR_OD::LoadEngine: Step 5 - Loading colour classifier with ONNX Runtime (opset > 17)");
this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
"Step 5: Loading colour classifier with ONNX Runtime (opset too high for TRT)", __FILE__, __LINE__);
try {
_lpColourModelConfig.detectionType = DetectionType::CLASSIFICATION;
_lpColourModelConfig.modelType = ModelType::ONNXYOLO;
auto ortDetector = std::make_unique<ANSCENTER::ANSONNXYOLO>();
bool ok = ortDetector->LoadModelFromFolder(
_licenseKey, _lpColourModelConfig, "lpc", "lpc.names", _modelFolder, _lpColourLabels);
if (ok) {
ortDetector->SetTracker(TrackerType::BYTETRACK, false);
_lpColourDetector = std::move(ortDetector);
WriteEventLog("ANSALPR_OD::LoadEngine: Step 5 - LPC loaded via ONNX Runtime");
this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
"Step 5: Colour classifier loaded via ONNX Runtime", __FILE__, __LINE__);
} else {
WriteEventLog("ANSALPR_OD::LoadEngine: Step 5 - LPC ONNX Runtime load failed", EVENTLOG_ERROR_TYPE);
this->_logger.LogError("ANSALPR_OD::LoadEngine",
"Step 5: Colour classifier ONNX Runtime load failed. Colour detection disabled.", __FILE__, __LINE__);
}
}
catch (const std::exception& e) {
WriteEventLog(("ANSALPR_OD::LoadEngine: Step 5 - LPC exception: " + std::string(e.what())).c_str(), EVENTLOG_ERROR_TYPE);
this->_logger.LogError("ANSALPR_OD::LoadEngine",
"Step 5: Colour classifier exception: " + std::string(e.what()), __FILE__, __LINE__);
_lpColourDetector.reset();
}
catch (...) {
WriteEventLog("ANSALPR_OD::LoadEngine: Step 5 - LPC unknown exception", EVENTLOG_ERROR_TYPE);
this->_logger.LogError("ANSALPR_OD::LoadEngine",
"Step 5: Colour classifier unknown exception. Colour detection disabled.", __FILE__, __LINE__);
_lpColourDetector.reset();
}
} else {
// Normal TRT path (opset ≤ 17)
WriteEventLog("ANSALPR_OD::LoadEngine: Step 5 - Loading colour classifier with TensorRT");
this->_logger.LogInfo("ANSALPR_OD::LoadEngine", "Step 5: Loading colour classifier with TensorRT", __FILE__, __LINE__);
LoadLpcParams lpc;
lpc.licenseKey = &_licenseKey;
lpc.config = &_lpColourModelConfig;
lpc.modelFolder = &_modelFolder;
lpc.labels = &_lpColourLabels;
lpc.detector = &_lpColourDetector;
DWORD sehCode = 0;
bool colourOk = LoadLpcModel_SEH(lpc, &sehCode);
if (sehCode != 0) {
char buf[256];
snprintf(buf, sizeof(buf),
"ANSALPR_OD::LoadEngine: Step 5 LPC SEH exception 0x%08X - colour detection disabled", sehCode);
WriteEventLog(buf, EVENTLOG_ERROR_TYPE);
this->_logger.LogError("ANSALPR_OD::LoadEngine",
"Step 5: Colour classifier crashed (SEH). Colour detection disabled.", __FILE__, __LINE__);
_lpColourDetector.reset();
}
else if (!colourOk) {
this->_logger.LogError("ANSALPR_OD::LoadEngine",
"Failed to load colour detector (TensorRT). Colour detection disabled.", __FILE__, __LINE__);
}
}
}
// TensorRT failed for both critical models — fall back to ONNX Runtime
if (!lpSuccess || !ocrSuccess) {
this->_logger.LogError("ANSALPR_OD::LoadEngine", "TensorRT engine build failed. Falling back to ONNX Runtime...", __FILE__, __LINE__);
if (_lpDetector) _lpDetector.reset();
if (_ocrDetector) _ocrDetector.reset();
if (_lpColourDetector) _lpColourDetector.reset();
// Fall through to ONNX path below
engineType = ANSCENTER::EngineType::CPU;
}
else {
valid = true;
}
}
}
// ONNX Runtime fallback path (CPU or when TensorRT fails)
if (!valid) {
if (FileExist(lprModel) && (FileExist(ocrModel)))
{
WriteEventLog("ANSALPR_OD::LoadEngine: Step 6 - Loading LP detector with ONNX Runtime");
this->_logger.LogInfo("ANSALPR_OD::LoadEngine", "Step 6: Loading LP detector with ONNX Runtime", __FILE__, __LINE__);
_lpdmodelConfig.detectionType = DetectionType::DETECTION;
_lpdmodelConfig.modelType = ModelType::ONNXYOLO;
std::string _lprClasses;
_lpDetector = std::make_unique<ANSCENTER::ANSONNXYOLO>();// Yolo
bool lpSuccess = _lpDetector->LoadModelFromFolder(_licenseKey, _lpdmodelConfig, "lpd", "lpd.names", _modelFolder, _lprClasses);
if (!lpSuccess) {
this->_logger.LogError("ANSALPR_OD::LoadEngine", "Failed to load LP detector (ONNX Runtime).", __FILE__, __LINE__);
_lpDetector.reset();
}
else {
// Enable tracker on LP detector for stable bounding box tracking,
// but disable stabilization (no ghost plates — ALPRChecker handles text stabilization)
_lpDetector->SetTracker(TrackerType::BYTETRACK, true);
_lpDetector->SetStabilization(false);
}
WriteEventLog("ANSALPR_OD::LoadEngine: Step 7 - Loading OCR detector with ONNX Runtime");
this->_logger.LogInfo("ANSALPR_OD::LoadEngine", "Step 7: Loading OCR detector with ONNX Runtime", __FILE__, __LINE__);
_ocrModelConfig.detectionType = DetectionType::DETECTION;
_ocrModelConfig.modelType = ModelType::ONNXYOLO;
_ocrDetector = std::make_unique<ANSCENTER::ANSONNXYOLO>();// Yolo
bool ocrSuccess = _ocrDetector->LoadModelFromFolder(_licenseKey, _ocrModelConfig, "ocr", "ocr.names", _modelFolder, _ocrLabels);
if (!ocrSuccess) {
this->_logger.LogError("ANSALPR_OD::LoadEngine", "Failed to load OCR detector (ONNX Runtime).", __FILE__, __LINE__);
_ocrDetector.reset();
}
else {
_ocrDetector->SetTracker(TrackerType::BYTETRACK, false);
}
// Check if we need to load the color model
if (FileExist(colorModel) && (_lpColourModelConfig.detectionScoreThreshold > 0)) {
_lpColourModelConfig.detectionType = DetectionType::CLASSIFICATION;
_lpColourModelConfig.modelType = ModelType::ONNXYOLO;
_lpColourDetector = std::make_unique<ANSCENTER::ANSONNXYOLO>();// Classification with ONNX
bool colourSuccess = _lpColourDetector->LoadModelFromFolder(_licenseKey, _lpColourModelConfig, "lpc", "lpc.names", _modelFolder, _lpColourLabels);
if (!colourSuccess) {
this->_logger.LogError("ANSALPR_OD::LoadEngine", "Failed to load colour detector (ONNX Runtime). Colour detection disabled.", __FILE__, __LINE__);
_lpColourDetector.reset();
}
else {
_lpColourDetector->SetTracker(TrackerType::BYTETRACK, false);
}
}
if (lpSuccess && ocrSuccess) {
valid = true;
if (engineType == ANSCENTER::EngineType::CPU) {
this->_logger.LogDebug("ANSALPR_OD::LoadEngine", "Successfully loaded models with ONNX Runtime fallback.", __FILE__, __LINE__);
}
}
else {
this->_logger.LogFatal("ANSALPR_OD::LoadEngine", "Failed to load critical models with both TensorRT and ONNX Runtime.", __FILE__, __LINE__);
}
}
}
_isInitialized = valid;
WriteEventLog(("ANSALPR_OD::LoadEngine: Step 8 - Engine load complete. Valid = " + std::to_string(valid)).c_str());
this->_logger.LogInfo("ANSALPR_OD::LoadEngine", "Step 8: Engine load complete. Valid = " + std::to_string(valid), __FILE__, __LINE__);
return valid;
}
catch (std::exception& e) {
WriteEventLog(("ANSALPR_OD::LoadEngine: C++ exception: " + std::string(e.what())).c_str(), EVENTLOG_ERROR_TYPE);
this->_logger.LogFatal("ANSALPR_OD::LoadEngine", std::string("C++ exception: ") + e.what(), __FILE__, __LINE__);
return false;
}
}
std::vector<Object> ANSALPR_OD::RunInferenceSingleFrame(const cv::Mat& input, const std::string& cameraId) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
// Early validation
if (!_licenseValid) {
this->_logger.LogError("ANSALPR_OD::RunInference", "Invalid license", __FILE__, __LINE__);
return {};
}
if (!valid) {
this->_logger.LogError("ANSALPR_OD::RunInference", "Invalid model", __FILE__, __LINE__);
return {};
}
if (!_isInitialized) {
this->_logger.LogError("ANSALPR_OD::RunInference", "Model is not initialized", __FILE__, __LINE__);
return {};
}
if (input.empty()) {
this->_logger.LogError("ANSALPR_OD::RunInference", "Input image is empty", __FILE__, __LINE__);
return {};
}
if (input.cols < 5 || input.rows < 5) {
this->_logger.LogError("ANSALPR_OD::RunInference", "Input image size is too small", __FILE__, __LINE__);
return {};
}
if (!this->_lpDetector) {
this->_logger.LogFatal("ANSALPR_OD::RunInference", "_lprDetector is null", __FILE__, __LINE__);
return {};
}
if (!this->_ocrDetector) {
this->_logger.LogFatal("ANSALPR_OD::RunInference", "PPOCR instance is null", __FILE__, __LINE__);
return {};
}
try {
// Convert grayscale to BGR if necessary (use local buffer for thread safety)
cv::Mat localFrame;
if (input.channels() == 1) {
cv::cvtColor(input, localFrame, cv::COLOR_GRAY2BGR);
}
const cv::Mat& frame = (input.channels() == 1) ? localFrame : input;
const int frameWidth = frame.cols;
const int frameHeight = frame.rows;
#ifdef FNS_DEBUG
cv::Mat draw = input.clone();
#endif
_detectedArea = cv::Rect(0, 0, frameWidth, frameHeight);
if (_detectedArea.width <= 50 || _detectedArea.height <= 50) {
return {};
}
#ifdef FNS_DEBUG
cv::rectangle(draw, _detectedArea, cv::Scalar(0, 0, 255), 2);
#endif
// Run license plate detection
cv::Mat activeFrame = frame(_detectedArea);
std::vector<Object> lprOutput = _lpDetector->RunInference(activeFrame, cameraId);
if (lprOutput.empty()) {
#ifdef FNS_DEBUG
cv::resize(draw, draw, cv::Size(1920, 1080));
cv::imshow("Detected Areas", draw);
cv::waitKey(1);
#endif
return {};
}
std::vector<Object> output;
output.reserve(lprOutput.size());
for (auto& lprObject : lprOutput) {
const cv::Rect& box = lprObject.box;
#ifdef FNS_DEBUG
cv::rectangle(draw, box, cv::Scalar(0, 255, 255), 2);
#endif
// Calculate cropped region (padding = 0)
const int x1 = std::max(0, box.x);
const int y1 = std::max(0, box.y);
const int width = std::min(frameWidth - x1, box.width);
const int height = std::min(frameHeight - y1, box.height);
if (width <= 0 || height <= 0) {
continue;
}
cv::Rect lprPos(x1, y1, width, height);
cv::Mat alignedLPR = frame(lprPos);// .clone();
// OCR inference
std::string ocrText = DetectLicensePlateString(alignedLPR, cameraId);
if (ocrText.empty()) {
continue;
}
lprObject.cameraId = cameraId;
lprObject.className = alprChecker.checkPlate(cameraId, ocrText, lprObject.box);
if (lprObject.className.empty()) {
continue;
}
std::string colour = DetectLPColourCached(alignedLPR, cameraId, lprObject.className);
if (!colour.empty()) {
lprObject.extraInfo = "color:" + colour;
}
output.push_back(std::move(lprObject));
}
#ifdef FNS_DEBUG
cv::resize(draw, draw, cv::Size(1920, 1080));
cv::imshow("Detected Areas", draw);
cv::waitKey(1);
#endif
// Deduplicate: if two trackIds claim the same plate text, keep the one
// with the higher accumulated score to prevent plate flickering
ensureUniquePlateText(output, cameraId);
return output;
}
catch (const cv::Exception& e) {
this->_logger.LogFatal("ANSALPR_OD::RunInference", std::string("OpenCV Exception: ") + e.what(), __FILE__, __LINE__);
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSALPR_OD::RunInference", e.what(), __FILE__, __LINE__);
}
catch (...) {
this->_logger.LogFatal("ANSALPR_OD::RunInference", "Unknown exception occurred", __FILE__, __LINE__);
}
return {};
}
std::string ANSALPR_OD::DetectLicensePlateString(const cv::Mat& lprROI, const std::string& cameraId) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
// convert lprROI to greyscale if it is not already
if (lprROI.empty()) {
this->_logger.LogError("ANSALPR_OD::DetectLicensePlateString", "Input image is empty", __FILE__, __LINE__);
return "";
}
cv::Mat grayLprROI;
if (lprROI.channels() == 3) {
cv::cvtColor(lprROI, grayLprROI, cv::COLOR_BGR2GRAY);
}
else {
grayLprROI = lprROI;
}
std::vector<Object> ocrOutput = _ocrDetector->RunInference(grayLprROI, cameraId);
std::string ocrText = "";
if (ocrOutput.empty()) return ocrText;
//std::cout << "=== OCR Detections ===" << std::endl;
//for (size_t i = 0; i < ocrOutput.size(); ++i) {
// << "' X=" << ocrOutput[i].box.x
// << " Y=" << ocrOutput[i].box.y << std::endl;
//}
// Remove duplicates
std::vector<Object> uniqueOutput;
for (const auto& obj : ocrOutput) {
bool isDuplicate = false;
for (const auto& unique : uniqueOutput) {
if (std::abs(obj.box.x - unique.box.x) < DUPLICATE_DIST_THRESHOLD &&
std::abs(obj.box.y - unique.box.y) < DUPLICATE_DIST_THRESHOLD) {
isDuplicate = true;
break;
}
}
if (!isDuplicate) {
uniqueOutput.push_back(obj);
}
}
//std::cout << "\nAfter removing duplicates: " << uniqueOutput.size() << " chars" << std::endl;
if (uniqueOutput.empty()) return ocrText;
if (uniqueOutput.size() == 1) return uniqueOutput[0].className;
// Calculate average character height for threshold
float avgHeight = 0;
for (const auto& obj : uniqueOutput) {
avgHeight += obj.box.height;
}
avgHeight /= uniqueOutput.size();
// Calculate linear regression Y = mX + b
float sumX = 0, sumY = 0, sumXY = 0, sumX2 = 0;
int n = uniqueOutput.size();
for (const auto& obj : uniqueOutput) {
float x = obj.box.x;
float y = obj.box.y;
sumX += x;
sumY += y;
sumXY += x * y;
sumX2 += x * x;
}
float denominator = n * sumX2 - sumX * sumX;
float slope = (std::abs(denominator) > 1e-6f) ? (n * sumXY - sumX * sumY) / denominator : 0.0f;
float intercept = (n > 0) ? (sumY - slope * sumX) / n : 0.0f;
//std::cout << "Linear regression: Y = " << slope << " * X + " << intercept << std::endl;
// Calculate perpendicular distance from each point to regression line
float a = slope;
float b = -1.0f;
float c = intercept;
float normFactor = std::sqrt(a * a + b * b);
std::vector<std::pair<float, size_t>> distances;
for (size_t i = 0; i < uniqueOutput.size(); ++i) {
float x = uniqueOutput[i].box.x;
float y = uniqueOutput[i].box.y;
float dist = (a * x + b * y + c) / normFactor;
distances.push_back({ dist, i });
}
// Sort by perpendicular distance
std::sort(distances.begin(), distances.end(),
[](const std::pair<float, size_t>& a, const std::pair<float, size_t>& b) {
return a.first < b.first;
});
// Find largest gap in perpendicular distances
float maxGap = 0;
size_t splitIdx = distances.size() / 2;
//std::cout << "\n=== Distance gaps ===" << std::endl;
for (size_t i = 1; i < distances.size(); ++i) {
float gap = distances[i].first - distances[i - 1].first;
//std::cout << "Gap " << i << ": " << gap << std::endl;
if (gap > maxGap) {
maxGap = gap;
splitIdx = i;
}
}
// Check if this is actually a single row
// If max gap is too small relative to character height, it's a single row
std::vector<std::vector<Object>> rows;
if (maxGap < avgHeight * ROW_SPLIT_MIN_GAP_FACTOR) {
// Single row - all characters on one line
rows.resize(1);
rows[0] = uniqueOutput;
}
else {
// Two rows
rows.resize(2);
// Split into two rows based on perpendicular distance
for (size_t i = 0; i < distances.size(); ++i) {
size_t objIdx = distances[i].second;
if (i < splitIdx) {
rows[0].push_back(uniqueOutput[objIdx]);
}
else {
rows[1].push_back(uniqueOutput[objIdx]);
}
}
// Determine which row is on top (lower average Y = top row)
float avgY0 = 0, avgY1 = 0;
for (const auto& obj : rows[0]) {
avgY0 += obj.box.y;
}
for (const auto& obj : rows[1]) {
avgY1 += obj.box.y;
}
avgY0 /= rows[0].size();
avgY1 /= rows[1].size();
//std::cout << "Average Y: Row0=" << avgY0 << " Row1=" << avgY1 << std::endl;
// Swap if needed (top row should be row 0)
if (avgY0 > avgY1) {
//std::cout << "Swapping rows (Row 0 should be on top)" << std::endl;
std::swap(rows[0], rows[1]);
}
}
//std::cout << "\n=== Rows before X-sorting ===" << std::endl;
//for (size_t r = 0; r < rows.size(); ++r) {
// std::cout << "Row " << r << " (" << rows[r].size() << " chars): ";
// for (const auto& obj : rows[r]) {
// }
// std::cout << std::endl;
//}
// Sort each row by X
for (auto& row : rows) {
std::sort(row.begin(), row.end(),
[](const Object& a, const Object& b) {
return a.box.x < b.box.x;
});
}
/*std::cout << "\n=== Rows after X-sorting ===" << std::endl;
for (size_t r = 0; r < rows.size(); ++r) {
std::cout << "Row " << r << ": ";
for (const auto& obj : rows[r]) {
}
std::cout << std::endl;
}*/
// Concatenate
for (const auto& row : rows) {
for (const auto& obj : row) {
ocrText += obj.className;
}
}
//std::cout << "\nFinal text: '" << ocrText << "'" << std::endl;
grayLprROI.release(); // Release the grayscale image to free memory
std::string processedOcrText = AnalyseLicensePlateText(ocrText);
return processedOcrText;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSALPR_OD::DetectLicensePlateString", e.what(), __FILE__, __LINE__);
return "";
}
}
std::string ANSALPR_OD::AnalyseLicensePlateText(const std::string& ocrText) {
std::string analysedLP = "";
try {
std::string cleanOCRText = "";
for (size_t i = 0; i < ocrText.size(); ++i) {
char c = ocrText[i];
if (std::isalnum(c))cleanOCRText += c;
}
std::transform(cleanOCRText.begin(), cleanOCRText.end(), cleanOCRText.begin(), ::toupper);
int ocrSize = cleanOCRText.size();
switch (_country) {
case Country::VIETNAM:
analysedLP = cleanOCRText;
break;
case Country::INDONESIA:
analysedLP = cleanOCRText;
break;
case Country::AUSTRALIA:
analysedLP = cleanOCRText;
break;
case Country::USA:
break;
}
// Format validation: reject plates that don't match any configured format
if (!analysedLP.empty() && !_plateFormats.empty() && !MatchesPlateFormat(analysedLP)) {
return "";
}
return analysedLP;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSALPR_OD::AnalyseLicensePlateText", e.what(), __FILE__, __LINE__);
return "";
}
}
bool ANSALPR_OD::MatchesPlateFormat(const std::string& plate) const {
if (_plateFormats.empty()) {
return true; // No formats configured - accept all
}
for (const auto& format : _plateFormats) {
if (plate.size() != format.size())
continue;
bool matches = true;
for (size_t i = 0; i < format.size(); ++i) {
char f = format[i];
char p = plate[i];
if (f == 'd') {
if (!std::isdigit(static_cast<unsigned char>(p))) { matches = false; break; }
}
else if (f == 'l') {
if (!std::isalpha(static_cast<unsigned char>(p))) { matches = false; break; }
}
else {
if (p != f) { matches = false; break; } // Fixed letter (A-Z) or other literal
}
}
if (matches) return true;
}
return false;
}
std::string ANSALPR_OD::DetectLPColourDetector(const cv::Mat& lprROI, const std::string& cameraId) {
// Early validation - no lock needed for these checks
if (_lpColourModelConfig.detectionScoreThreshold <= 0.0f) {
return {}; // Colour detection not enabled
}
if (!_lpColourDetector) {
return {};
}
if (lprROI.empty()) {
this->_logger.LogError("ANSALPR_OD::DetectLPColourDetector", "Input image is empty", __FILE__, __LINE__);
return {};
}
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
std::vector<Object> colourOutputs = _lpColourDetector->RunInference(lprROI, cameraId);
if (colourOutputs.empty()) {
return {};
}
// Find detection with highest confidence
const auto& bestDetection = *std::max_element(
colourOutputs.begin(),
colourOutputs.end(),
[](const Object& a, const Object& b) {
return a.confidence < b.confidence;
}
);
return bestDetection.className;
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSALPR_OD::DetectLPColourDetector", e.what(), __FILE__, __LINE__);
return {};
}
}
std::string ANSALPR_OD::DetectLPColourCached(const cv::Mat& lprROI, const std::string& cameraId, const std::string& plateText) {
// Empty plate text = can't cache, fall through to full inference
if (plateText.empty()) {
return DetectLPColourDetector(lprROI, cameraId);
}
// Check cache first (no GPU work needed)
{
auto it = _colourCache.find(plateText);
if (it != _colourCache.end()) {
it->second.hitCount++;
return it->second.colour; // Cache hit — 0ms
}
}
// Cache miss — run the actual classifier
std::string colour = DetectLPColourDetector(lprROI, cameraId);
// Store in cache
if (!colour.empty()) {
if (_colourCache.size() >= COLOUR_CACHE_MAX_SIZE) {
_colourCache.clear();
}
_colourCache[plateText] = { colour, 0 };
}
return colour;
}
bool ANSALPR_OD::Inference(const cv::Mat& input, std::string& lprResult) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (input.empty()) return false;
if ((input.cols < 5) || (input.rows < 5)) return false;
return Inference(input, lprResult, "CustomCam");
}
bool ANSALPR_OD::Inference(const cv::Mat& input, std::string& lprResult, const std::string& cameraId) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
// Early validation
if (!_licenseValid) {
this->_logger.LogError("ANSALPR_OD::Inference", "Invalid license", __FILE__, __LINE__);
return false;
}
if (!valid) {
this->_logger.LogError("ANSALPR_OD::Inference", "Invalid model", __FILE__, __LINE__);
return false;
}
if (!_isInitialized) {
this->_logger.LogError("ANSALPR_OD::Inference", "Model is not initialized", __FILE__, __LINE__);
return false;
}
if (input.empty()) {
this->_logger.LogError("ANSALPR_OD::Inference", "Input image is empty", __FILE__, __LINE__);
return false;
}
if (input.cols < 5 || input.rows < 5) {
this->_logger.LogError("ANSALPR_OD::Inference", "Input image size is too small", __FILE__, __LINE__);
return false;
}
if (!this->_lpDetector) {
this->_logger.LogFatal("ANSALPR_OD::Inference", "_lpDetector is null", __FILE__, __LINE__);
return false;
}
std::vector<Object> output;
try {
// --- Debug timer helper (zero-cost when _debugFlag == false) ---
using Clock = std::chrono::steady_clock;
const bool dbg = _debugFlag;
auto t0 = dbg ? Clock::now() : Clock::time_point{};
auto tPrev = t0;
auto elapsed = [&]() -> double {
auto now = Clock::now();
double ms = std::chrono::duration<double, std::milli>(now - tPrev).count();
tPrev = now;
return ms;
};
// Convert grayscale to BGR if necessary (use local buffer for thread safety)
cv::Mat localFrame;
if (input.channels() == 1) {
cv::cvtColor(input, localFrame, cv::COLOR_GRAY2BGR);
}
const cv::Mat& frame = (input.channels() == 1) ? localFrame : input;
double msColorConvert = dbg ? elapsed() : 0;
const int frameWidth = frame.cols;
const int frameHeight = frame.rows;
// --- Step 1: LP Detection ---
cv::Rect roi(0, 0, 0, 0);
std::vector<Object> lprOutput = this->_lpDetector->RunStaticInference(frame, roi, cameraId);
double msLPDetect = dbg ? elapsed() : 0;
int numPlates = (int)lprOutput.size();
double totalOcrMs = 0, totalValidateMs = 0, totalColourMs = 0, totalCropMs = 0;
int ocrCount = 0, validCount = 0, colourCount = 0;
if (!lprOutput.empty()) {
output.reserve(lprOutput.size());
constexpr int padding = 10;
// --- Compute display→full-res scale (once per frame, cheap) ---
float scaleX = 1.f, scaleY = 1.f;
{
auto* gpuData = tl_currentGpuFrame();
if (gpuData && gpuData->width > frame.cols && gpuData->height > frame.rows) {
scaleX = static_cast<float>(gpuData->width) / frame.cols;
scaleY = static_cast<float>(gpuData->height) / frame.rows;
}
}
for (auto& lprObject : lprOutput) {
const cv::Rect& box = lprObject.box;
// --- Step 2: Crop LP region ---
auto tCrop = dbg ? Clock::now() : Clock::time_point{};
cv::Mat lprImage;
// Try GPU NV12 crop (NVIDIA decode: NV12 still in GPU VRAM)
if (scaleX > 1.f) {
auto cropResult = _nv12Helper.tryNV12CropToBGR(
frame, 0, box, padding, scaleX, scaleY,
this->_logger, "LPR");
if (cropResult.succeeded)
lprImage = cropResult.bgrCrop;
}
// Fallback: crop from display-res frame
if (lprImage.empty()) {
const int x1 = std::max(0, box.x - padding);
const int y1 = std::max(0, box.y - padding);
const int x2 = std::min(frameWidth, box.x + box.width + padding);
const int y2 = std::min(frameHeight, box.y + box.height + padding);
const int width = x2 - x1;
const int height = y2 - y1;
if (width <= padding || height <= padding) {
continue;
}
lprImage = frame(cv::Rect(x1, y1, width, height)).clone();
}
if (dbg) totalCropMs += std::chrono::duration<double, std::milli>(Clock::now() - tCrop).count();
// --- Step 3: OCR inference ---
auto tOcr = dbg ? Clock::now() : Clock::time_point{};
std::string ocrText = DetectLicensePlateString(lprImage, cameraId);
if (dbg) { totalOcrMs += std::chrono::duration<double, std::milli>(Clock::now() - tOcr).count(); ocrCount++; }
if (ocrText.empty()) {
continue;
}
// --- Step 4: Plate validation ---
auto tValidate = dbg ? Clock::now() : Clock::time_point{};
lprObject.cameraId = cameraId;
lprObject.polygon = RectToNormalizedPolygon(lprObject.box, input.cols, input.rows);
lprObject.className = alprChecker.checkPlate(cameraId, ocrText, lprObject.box);
if (dbg) { totalValidateMs += std::chrono::duration<double, std::milli>(Clock::now() - tValidate).count(); }
if (lprObject.className.empty()) {
continue;
}
validCount++;
// --- Step 5: Colour classification (cached) ---
auto tColour = dbg ? Clock::now() : Clock::time_point{};
std::string colour = DetectLPColourCached(lprImage, cameraId, lprObject.className);
if (!colour.empty()) {
lprObject.extraInfo = "color:" + colour;
}
if (dbg) { totalColourMs += std::chrono::duration<double, std::milli>(Clock::now() - tColour).count(); colourCount++; }
output.push_back(std::move(lprObject));
}
}
// --- Step 6: Serialize results ---
auto tJson = dbg ? Clock::now() : Clock::time_point{};
lprResult = VectorDetectionToJsonString(output);
double msJson = dbg ? std::chrono::duration<double, std::milli>(Clock::now() - tJson).count() : 0;
// --- Log full pipeline breakdown ---
if (dbg) {
double msTotal = std::chrono::duration<double, std::milli>(Clock::now() - t0).count();
char buf[1024];
snprintf(buf, sizeof(buf),
"[DEBUG] %s | ColorCvt=%.1fms LPDetect=%.1fms (plates=%d) "
"Crop=%.1fms OCR=%.1fms (x%d) Validate=%.1fms Colour=%.1fms (x%d) "
"JSON=%.1fms | TOTAL=%.1fms Output=%d",
cameraId.c_str(), msColorConvert, msLPDetect, numPlates,
totalCropMs, totalOcrMs, ocrCount, totalValidateMs,
totalColourMs, colourCount, msJson, msTotal, (int)output.size());
_logger.LogInfo("ANSALPR_OD::Inference", buf, __FILE__, __LINE__);
}
return true;
}
catch (const std::exception& e) {
lprResult = VectorDetectionToJsonString(output);
this->_logger.LogFatal("ANSALPR_OD::Inference", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSALPR_OD::Inference(const cv::Mat& input, const std::vector<cv::Rect> & Bbox, std::string& lprResult) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (input.empty()) return false;
if ((input.cols < 5) || (input.rows < 5)) return false;
return Inference(input, Bbox, lprResult, "CustomCam");
}
bool ANSALPR_OD::Inference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox,std::string& lprResult, const std::string& cameraId)
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
// Early validation
if (!_licenseValid) {
this->_logger.LogError("ANSALPR_OD::Inference", "Invalid license", __FILE__, __LINE__);
lprResult.clear();
return false;
}
if (!valid) {
this->_logger.LogError("ANSALPR_OD::Inference", "Invalid model", __FILE__, __LINE__);
lprResult.clear();
return false;
}
if (!_isInitialized) {
this->_logger.LogError("ANSALPR_OD::Inference", "Model is not initialized", __FILE__, __LINE__);
lprResult.clear();
return false;
}
if (input.empty()) {
this->_logger.LogError("ANSALPR_OD::Inference", "Input image is empty", __FILE__, __LINE__);
lprResult.clear();
return false;
}
if (input.cols < 5 || input.rows < 5) {
this->_logger.LogError("ANSALPR_OD::Inference", "Input image size is too small", __FILE__, __LINE__);
lprResult.clear();
return false;
}
if (!_lpDetector) {
this->_logger.LogFatal("ANSALPR_OD::Inference", "_lpDetector is null", __FILE__, __LINE__);
lprResult.clear();
return false;
}
try {
// Convert grayscale to BGR if necessary (use local buffer for thread safety)
cv::Mat localFrame;
if (input.channels() == 1) {
cv::cvtColor(input, localFrame, cv::COLOR_GRAY2BGR);
}
const cv::Mat& frame = (input.channels() == 1) ? localFrame : input;
const int frameWidth = frame.cols;
const int frameHeight = frame.rows;
constexpr int padding = 10;
// --- Compute display→full-res scale (once per frame, cheap) ---
float scaleX2 = 1.f, scaleY2 = 1.f;
{
auto* gpuData = tl_currentGpuFrame();
if (gpuData && gpuData->width > frame.cols && gpuData->height > frame.rows) {
scaleX2 = static_cast<float>(gpuData->width) / frame.cols;
scaleY2 = static_cast<float>(gpuData->height) / frame.rows;
}
}
std::vector<Object> detectedObjects;
if (!Bbox.empty()) {
// Process each bounding box region
detectedObjects.reserve(Bbox.size());
for (const auto& bbox : Bbox) {
const int x1c = std::max(0, bbox.x);
const int y1c = std::max(0, bbox.y);
const int cropWidth = std::min(frameWidth - x1c, bbox.width);
const int cropHeight = std::min(frameHeight - y1c, bbox.height);
if (cropWidth < 5 || cropHeight < 5) {
continue;
}
cv::Rect objectPos(x1c, y1c, cropWidth, cropHeight);
cv::Mat croppedObject = frame(objectPos);
std::vector<Object> lprOutput = _lpDetector->RunInference(croppedObject, cameraId);
for (auto& lprObject : lprOutput) {
const cv::Rect& box = lprObject.box;
// Calculate padded region within cropped image
const int x1 = std::max(0, box.x - padding);
const int y1 = std::max(0, box.y - padding);
const int x2 = std::min(cropWidth, box.x + box.width + padding);
const int y2 = std::min(cropHeight, box.y + box.height + padding);
// Adjust to original frame coordinates
lprObject.box.x = std::max(0, x1c + x1);
lprObject.box.y = std::max(0, y1c + y1);
lprObject.box.width = std::min(frameWidth - lprObject.box.x, x2 - x1);
lprObject.box.height = std::min(frameHeight - lprObject.box.y, y2 - y1);
if (lprObject.box.width <= padding || lprObject.box.height <= padding) {
continue;
}
lprObject.cameraId = cameraId;
lprObject.polygon = RectToNormalizedPolygon(lprObject.box, input.cols, input.rows);
// Crop from full-res NV12 on GPU if available, otherwise display-res
cv::Mat lprImage;
if (scaleX2 > 1.f) {
auto cropResult = _nv12Helper.tryNV12CropToBGR(
frame, 0, lprObject.box, 0, scaleX2, scaleY2,
this->_logger, "LPR");
if (cropResult.succeeded)
lprImage = cropResult.bgrCrop;
}
if (lprImage.empty())
lprImage = frame(lprObject.box);
cv::Mat alignedLPR = enhanceForOCR(lprImage);
std::string ocrText = DetectLicensePlateString(alignedLPR, cameraId);
if (ocrText.empty()) {
continue;
}
lprObject.className = alprChecker.checkPlate(cameraId, ocrText, lprObject.box);
if (lprObject.className.empty()) {
continue;
}
std::string colour = DetectLPColourCached(lprImage, cameraId, lprObject.className);
if (!colour.empty()) {
lprObject.extraInfo = "color:" + colour;
}
detectedObjects.push_back(std::move(lprObject));
}
}
}
else {
// No bounding boxes - run on full frame
std::vector<Object> lprOutput = _lpDetector->RunInference(frame, cameraId);
detectedObjects.reserve(lprOutput.size());
for (auto& lprObject : lprOutput) {
const cv::Rect& box = lprObject.box;
// Calculate padded region
const int x1 = std::max(0, box.x - padding);
const int y1 = std::max(0, box.y - padding);
const int width = std::min(frameWidth - x1, box.width + 2 * padding);
const int height = std::min(frameHeight - y1, box.height + 2 * padding);
if (width <= padding || height <= padding) {
continue;
}
lprObject.cameraId = cameraId;
lprObject.polygon = RectToNormalizedPolygon(lprObject.box, input.cols, input.rows);
// Crop from full-res NV12 on GPU if available, otherwise display-res
cv::Rect lprPos(x1, y1, width, height);
cv::Mat lprImage;
if (scaleX2 > 1.f) {
auto cropResult = _nv12Helper.tryNV12CropToBGR(
frame, 0, lprPos, 0, scaleX2, scaleY2,
this->_logger, "LPR");
if (cropResult.succeeded)
lprImage = cropResult.bgrCrop;
}
if (lprImage.empty())
lprImage = frame(lprPos);
cv::Mat alignedLPR = enhanceForOCR(lprImage);
std::string rawText = DetectLicensePlateString(alignedLPR, cameraId);
lprObject.className = alprChecker.checkPlate(cameraId, rawText, lprObject.box);
if (lprObject.className.empty()) {
continue;
}
std::string colour = DetectLPColourCached(lprImage, cameraId, lprObject.className);
if (!colour.empty()) {
lprObject.extraInfo = "color:" + colour;
}
detectedObjects.push_back(std::move(lprObject));
}
}
// Deduplicate: same plate text should not appear on multiple vehicles
// Note: in Bbox mode, internal LP trackIds overlap across crops, so
// dedup uses plate bounding box position (via Object::box) to distinguish.
// The ensureUniquePlateText method handles this by plate text grouping.
ensureUniquePlateText(detectedObjects, cameraId);
lprResult = VectorDetectionToJsonString(detectedObjects);
return true;
}
catch (const std::exception& e) {
lprResult.clear();
this->_logger.LogFatal("ANSALPR_OD::Inference", e.what(), __FILE__, __LINE__);
return false;
}
}
int ANSALPR_OD::findSubstringIndex(const std::string& str) {
//std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
// List of substrings to search for
std::string substrings[] = { "NN", "CV", "NG", "QT" };
// Iterate through each substring
for (const std::string& sub : substrings) {
// Use std::string::find to search for the substring in the given string
std::size_t pos = str.find(sub);
// If the substring is found, return the index
if (pos != std::string::npos) {
return static_cast<int>(pos); // Cast to int and return the index
}
}
// If none of the substrings is found, return -1
return -1;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSALPR_OD::findSubstringIndex", e.what(), __FILE__, __LINE__);
return -1;
}
}
char ANSALPR_OD::fixLPDigit(char c) {
//std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
switch (c) {
case 'b':
return '6';
case 'c':
return '0';
case 'f':
case 't':
return '4';
case 'j':
case 'i':
case 'l':
return '1';
case 's':
return '5';
case 'g':
case 'q':
case 'y':
return '9';
case 'o':
return '0';
default:
return c; // If the character is not a letter to convert, return it unchanged
}
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSALPR_OD::fixLPDigit", e.what(), __FILE__, __LINE__);
return c;
}
}
//only accept these letters: A, B, C, D, E, F, G, H, K, L, M, N, P, S, T, U, V, X, Y, Z
// I, J, O, Q, R, W
char ANSALPR_OD::convertDigitToLetter(char c) {
//std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
switch (c) {
case '0':
case 'o':
case 'O':
case 'Q':
return 'C'; // '0' is typically mapped to 'O' or 'C', choosing 'O' to match letter set
case '1':
case 'I':
case 'i':
case 'l':
case 'J':
return 'L'; // '1' is commonly confused with 'I'
case '2':
case 'z':
return 'Z'; // '2' resembles 'Z' in some fonts
case '3':
return 'E'; // '3' can resemble 'E' in some cases
case '4':
return 'A'; // '4' can resemble 'A' or 'H', choosing 'A'
case '5':
case 's':
return 'S'; // '5' looks similar to 'S'
case '6':
case 'g':
return 'G'; // '6' resembles 'G'
case '7':
return 'T'; // '7' is often confused with 'T'
case '8':
case 'b':
return 'B'; // '8' resembles 'B'
case '9':
case 'R':
return 'P'; // '9' is close to 'P'
case 'W':
case 'w':
return 'V'; // 'W' is close to 'V'
default:
return c; // If the character is not a digit to convert, return it unchanged
}
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSALPR_OD::convertDigitToLetter", e.what(), __FILE__, __LINE__);
return c;
}
}
char ANSALPR_OD::convertLetterToDigit(char c) {
// std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
switch (c) {
// Convert common letter confusions with digits
case 'B':
case 'b': // Adding lowercase 'b' to match common mistypes
return '8';
case 'I':
case 'i':
case 'J': // Capital 'J' can also resemble '1'
case 'j':
case 'L':
case 'l':
return '1';
case 'S':
case 's':
return '5';
case 'G':
case 'g': // Adding lowercase 'g' for better matching
return '6';
case 'O':
case 'o':
case 'Q': // 'Q' can also be misread as '0'
case 'U':
case 'u': // Adding lowercase 'u' as it resembles '0'
return '0';
case 'T': // Capital 'T' sometimes looks like '7'
return '7';
case 'F':
case 'f':
case 't':
return '4';
case 'Y': // Capital 'Y' may resemble '9'
case 'y':
case 'q':
return '9';
default:
return '0'; // If no conversion, return the character unchanged
}
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSALPR_OD::convertLetterToDigit", e.what(), __FILE__, __LINE__);
return c;
}
}
// Function to convert string to digits, skipping conversion if the character is already a digit
std::string ANSALPR_OD::convertStringToDigits(const std::string& input) {
// std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
std::string result;
for (char c : input) {
if (std::isdigit(c)) {
result += c; // Skip conversion if the character is a digit
}
else {
result += convertLetterToDigit(c); // Convert if it's a letter
}
}
return result;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSALPR_OD::convertStringToDigits", e.what(), __FILE__, __LINE__);
return input;
}
}
// Function to convert string to letters, skipping conversion if the character is already a letter
std::string ANSALPR_OD::convertStringToLetters(const std::string& input) {
//std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
std::string result;
for (char c : input) {
if (std::isalpha(c)) {
result += c; // Skip conversion if the character is already a letter
}
else {
result += convertDigitToLetter(c); // Convert if it's a digit
}
}
return result;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSALPR_OD::convertStringToLetters", e.what(), __FILE__, __LINE__);
return input;
}
}
int ANSALPR_OD::searchDiplomacyLP(const std::string& input) {
//std::lock_guard<std::recursive_mutex> lock(_mutex);
// List of substrings to search for
try {
std::string substrings[] = { "NN", "NG", "CV", "QT" };
// Initialize index to -1 (not found)
int foundIndex = -1;
// Loop through the substrings
for (const auto& sub : substrings) {
// Find the index of the current substring
size_t index = input.find(sub);
// If the substring is found and either no other substrings have been found,
// or this substring occurs at an earlier position, update foundIndex.
if (index != std::string::npos && (foundIndex == -1 || index < foundIndex)) {
foundIndex = index;
}
}
return foundIndex; // If none are found, returns -1
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSALPR_OD::searchDiplomacyLP", e.what(), __FILE__, __LINE__);
return -1;
}
}
bool ANSALPR_OD::ValidateVNMotobikeLP(const std::string& input) {
// std::lock_guard<std::recursive_mutex> lock(_mutex);
// Search for the string in the list
auto it = std::find(ValidVNMotobikeList.begin(), ValidVNMotobikeList.end(), input);
// Check if found
if (it != ValidVNMotobikeList.end()) {
return true;
}
else {
return false;
}
}
bool ANSALPR_OD::ValidateVNCarLP(const std::string& input) {
// std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
// Search for the string in the list
auto it = std::find(ValidVNCarList.begin(), ValidVNCarList.end(), input);
// Check if found
if (it != ValidVNCarList.end()) {
return true;
}
else {
return false;
}
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSALPR_OD::ValidateVNCarLP", e.what(), __FILE__, __LINE__);
return false;
}
}
cv::Mat ANSALPR_OD::alignPlateForOCR(const cv::Mat& fullImage, const cv::Rect& bbox) {
try {
const cv::Rect safeBox = bbox & cv::Rect(0, 0, fullImage.cols, fullImage.rows);
if (safeBox.width < 10 || safeBox.height < 10) {
return fullImage(safeBox).clone();
}
cv::Mat roi = fullImage(safeBox);
// Convert to grayscale and create binary image
cv::Mat gray;
cv::cvtColor(roi, gray, cv::COLOR_BGR2GRAY);
cv::Mat binary;
cv::adaptiveThreshold(gray, binary, 255, cv::ADAPTIVE_THRESH_MEAN_C,
cv::THRESH_BINARY_INV, 15, 10);
std::vector<std::vector<cv::Point>> contours;
cv::findContours(binary, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);
if (contours.empty()) {
return enhanceAndDebug(roi);
}
// Find best contour closest to center
const cv::Point2f roiCenter(static_cast<float>(roi.cols) / 2.0f,
static_cast<float>(roi.rows) / 2.0f);
const float minWidth = roi.cols * 0.5f;
const float minHeight = roi.rows * 0.5f;
constexpr float minAreaRatio = 0.3f;
float minDist = std::numeric_limits<float>::max();
int bestIdx = -1;
for (size_t i = 0; i < contours.size(); ++i) {
cv::RotatedRect rect = cv::minAreaRect(contours[i]);
const float width = rect.size.width;
const float height = rect.size.height;
if (width < minWidth || height < minHeight) {
continue;
}
const float areaRect = width * height;
const float areaContour = static_cast<float>(cv::contourArea(contours[i]));
if (areaContour / areaRect < minAreaRatio) {
continue;
}
const float dist = cv::norm(rect.center - roiCenter);
if (dist < minDist) {
minDist = dist;
bestIdx = static_cast<int>(i);
}
}
if (bestIdx == -1) {
return enhanceAndDebug(roi);
}
// Align using best rotated rect
cv::RotatedRect bestRect = cv::minAreaRect(contours[bestIdx]);
float angle = bestRect.angle;
if (bestRect.size.width < bestRect.size.height) {
angle += 90.0f;
std::swap(bestRect.size.width, bestRect.size.height);
}
angle = std::clamp(angle, -45.0f, 45.0f);
// Rotate the image
const cv::Point2f rotationCenter(roi.cols / 2.0f, roi.rows / 2.0f);
cv::Mat rotationMatrix = cv::getRotationMatrix2D(rotationCenter, angle, 1.0);
cv::Mat rotated;
cv::warpAffine(roi, rotated, rotationMatrix, roi.size(), cv::INTER_LINEAR, cv::BORDER_REPLICATE);
// Transform rect center after rotation
const double* rotData = rotationMatrix.ptr<double>(0);
const cv::Point2f newCenter(
static_cast<float>(rotData[0] * bestRect.center.x + rotData[1] * bestRect.center.y + rotData[2]),
static_cast<float>(rotData[3] * bestRect.center.x + rotData[4] * bestRect.center.y + rotData[5])
);
// Apply small padding and crop
constexpr int padding = 2;
const cv::Size paddedSize(
std::min(rotated.cols, static_cast<int>(bestRect.size.width) + 2 * padding),
std::min(rotated.rows, static_cast<int>(bestRect.size.height) + 2 * padding)
);
cv::Mat rawCropped;
cv::getRectSubPix(rotated, paddedSize, newCenter, rawCropped);
cv::Mat cropped = enhanceForOCR(rawCropped);
#ifdef FNS_DEBUG
showDebugComparison(roi, cropped, contours, bestIdx, bestRect);
#endif
return cropped;
}
catch (const std::exception& e) {
this->_logger.LogError("ANSALPR_OD::alignPlateForOCR",
std::string("Exception: ") + e.what(), __FILE__, __LINE__);
return fullImage(bbox & cv::Rect(0, 0, fullImage.cols, fullImage.rows)).clone();
}
}
#ifdef FNS_DEBUG
void ANSALPR_OD::showDebugComparison(const cv::Mat& roi, const cv::Mat& processed,
const std::vector<std::vector<cv::Point>>& contours, int bestIdx,
const cv::RotatedRect& bestRect)
{
try {
cv::Mat debugRoi = roi.clone();
if (bestIdx >= 0) {
cv::drawContours(debugRoi, contours, bestIdx, cv::Scalar(0, 255, 0), 1);
cv::Point2f points[4];
bestRect.points(points);
for (int j = 0; j < 4; ++j) {
cv::line(debugRoi, points[j], points[(j + 1) % 4], cv::Scalar(255, 0, 0), 1);
}
}
cv::Mat debugLeft, debugRight;
cv::resize(debugRoi, debugLeft, cv::Size(240, 80));
cv::resize(processed, debugRight, cv::Size(240, 80));
cv::Mat combined;
cv::hconcat(debugLeft, debugRight, combined);
cv::putText(combined, "Raw", cv::Point(10, 15),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 1);
cv::putText(combined, "Aligned", cv::Point(250, 15),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 0, 0), 1);
cv::imshow("LPR Cropped + Rotated", combined);
cv::waitKey(1);
}
catch (const std::exception& e) {
std::cerr << "LPR Debug Error: " << e.what() << std::endl;
}
}
#endif
cv::Mat ANSALPR_OD::enhanceAndDebug(const cv::Mat& roi) {
cv::Mat enhanced = enhanceForOCR(roi);
#ifdef FNS_DEBUG
showDebugComparison(roi, enhanced, {}, -1, cv::RotatedRect());
#endif
return enhanced;
}
cv::Mat ANSALPR_OD::enhanceForOCR(const cv::Mat& plateROIOriginal) {
if (plateROIOriginal.empty()) {
this->_logger.LogError("ANSALPR_OD::enhanceForOCR", "plateROI is empty", __FILE__, __LINE__);
return cv::Mat();
}
// Step 1: Upscale for OCR clarity
cv::Mat plateROI;
cv::resize(plateROIOriginal, plateROI, cv::Size(), 2.0, 2.0, cv::INTER_LANCZOS4);
// Step 2: Grayscale
cv::Mat gray;
if (plateROI.channels() == 3) {
cv::cvtColor(plateROI, gray, cv::COLOR_BGR2GRAY);
}
else {
gray = plateROI;
}
// Step 3: Gentle denoise to preserve edges
cv::Mat denoised;
cv::bilateralFilter(gray, denoised, 7, 50, 50);
// Step 4: Unsharp masking
cv::Mat blurred;
cv::GaussianBlur(denoised, blurred, cv::Size(0, 0), 1.5);
cv::Mat unsharp;
cv::addWeighted(denoised, 1.8, blurred, -0.8, 0, unsharp);
// Step 5: CLAHE contrast enhancement
if (!_clahe) {
_clahe = cv::createCLAHE(4.0, cv::Size(8, 8));
}
cv::Mat contrastEnhanced;
_clahe->apply(unsharp, contrastEnhanced);
// Step 6: Laplacian edge sharpening
cv::Mat lap;
cv::Laplacian(contrastEnhanced, lap, CV_16S, 3);
cv::Mat lapAbs;
cv::convertScaleAbs(lap, lapAbs);
cv::Mat sharpened;
cv::addWeighted(contrastEnhanced, 1.2, lapAbs, -0.3, 0, sharpened);
// Step 7: Convert back to BGR for OCR
cv::Mat ocrInput;
cv::cvtColor(sharpened, ocrInput, cv::COLOR_GRAY2BGR);
return ocrInput;
}
// Batch Inference
std::vector<Object> ANSALPR_OD::RunInference(const cv::Mat& input, const std::string& cameraId) {
// Read-only validation without lock (immutable after initialization)
if (!_licenseValid || !valid || !_isInitialized) {
this->_logger.LogWarn("ANSALPR_OD::RunInference",
"Invalid state: license=" + std::to_string(_licenseValid) +
" valid=" + std::to_string(valid) +
" init=" + std::to_string(_isInitialized), __FILE__, __LINE__);
return {};
}
if (input.empty() || input.cols < 5 || input.rows < 5) {
this->_logger.LogWarn("ANSALPR_OD::RunInference",
"Skipped: input too small (" + std::to_string(input.cols) + "x" + std::to_string(input.rows) + ")",
__FILE__, __LINE__);
return {};
}
// Pointer checks (these should be immutable after initialization)
if (!this->_lpDetector || !this->_ocrDetector) {
this->_logger.LogFatal("ANSALPR_OD::RunInference",
"Detector instances are null", __FILE__, __LINE__);
return {};
}
try {
// Use local buffer instead of shared _frameBuffer
cv::Mat frame;
if (input.channels() == 1) {
cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR);
}
else {
frame = input; // No copy, just reference
}
const int frameWidth = frame.cols;
const int frameHeight = frame.rows;
// Use local variable instead of shared _detectedArea
cv::Rect detectedArea(0, 0, frameWidth, frameHeight);
if (detectedArea.width <= 50 || detectedArea.height <= 50) {
return {};
}
// Run license plate detection (should be thread-safe internally)
cv::Mat activeFrame = frame(detectedArea);
std::vector<Object> lprOutput = _lpDetector->RunInference(activeFrame, cameraId);
if (lprOutput.empty()) {
return {};
}
// Prepare batch - pre-allocate and use move semantics
std::vector<cv::Mat> alignedLPRBatch;
std::vector<size_t> validIndices;
alignedLPRBatch.reserve(lprOutput.size());
validIndices.reserve(lprOutput.size());
for (size_t i = 0; i < lprOutput.size(); ++i) {
const cv::Rect& box = lprOutput[i].box;
// Calculate cropped region with bounds checking
const int x1 = std::max(0, box.x);
const int y1 = std::max(0, box.y);
const int x2 = std::min(frameWidth, box.x + box.width);
const int y2 = std::min(frameHeight, box.y + box.height);
const int width = x2 - x1;
const int height = y2 - y1;
if (width <= 0 || height <= 0) {
continue;
}
cv::Rect lprPos(x1, y1, width, height);
alignedLPRBatch.emplace_back(frame(lprPos)); // Use emplace_back
validIndices.push_back(i);
}
if (alignedLPRBatch.empty()) {
return {};
}
// Run OCR first, then use cached colour detection.
// Colour caching by plate text eliminates ~95% of LPC inferences
// (plate colour doesn't change frame-to-frame).
std::vector<std::string> ocrTextBatch = DetectLicensePlateStringBatch(alignedLPRBatch, cameraId);
// Build output — colour detection uses cache keyed by stabilized plate text
std::vector<Object> output;
output.reserve(validIndices.size());
for (size_t i = 0; i < validIndices.size(); ++i) {
const size_t origIdx = validIndices[i];
const std::string& ocrText = ocrTextBatch[i];
if (ocrText.empty()) {
continue;
}
Object lprObject = lprOutput[origIdx];
lprObject.cameraId = cameraId;
// Stabilize OCR text through ALPRChecker (spatial tracking + majority voting)
lprObject.className = alprChecker.checkPlate(cameraId, ocrText, lprObject.box);
if (lprObject.className.empty()) {
continue;
}
// Colour detection with cache — only runs inference on first
// occurrence of each plate text, subsequent frames get 0ms cache hit
std::string colour = DetectLPColourCached(alignedLPRBatch[i], cameraId, lprObject.className);
if (!colour.empty()) {
lprObject.extraInfo = "color:" + colour;
}
output.push_back(std::move(lprObject));
}
// Deduplicate: if two trackIds claim the same plate text, keep the one
// with the higher accumulated score to prevent plate flickering
ensureUniquePlateText(output, cameraId);
return output;
}
catch (const cv::Exception& e) {
this->_logger.LogFatal("ANSALPR_OD::RunInference",
std::string("OpenCV Exception: ") + e.what(), __FILE__, __LINE__);
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSALPR_OD::RunInference",
e.what(), __FILE__, __LINE__);
}
catch (...) {
this->_logger.LogFatal("ANSALPR_OD::RunInference",
"Unknown exception occurred", __FILE__, __LINE__);
}
return {};
}
std::vector<std::string> ANSALPR_OD::DetectLPColourDetectorBatch(const std::vector<cv::Mat>& lprROIs, const std::string& cameraId) {
// Early validation - no lock needed for immutable config
if (_lpColourModelConfig.detectionScoreThreshold <= 0.0f || !_lpColourDetector) {
return std::vector<std::string>(lprROIs.size(), "");
}
try {
if (lprROIs.empty()) {
return {};
}
const size_t batchSize = lprROIs.size();
// Filter out empty ROIs — just a cheap .empty() check, no need for threads
std::vector<cv::Mat> validROIs;
std::vector<size_t> validIndices;
validROIs.reserve(batchSize);
validIndices.reserve(batchSize);
for (size_t i = 0; i < batchSize; ++i) {
if (!lprROIs[i].empty()) {
validROIs.push_back(lprROIs[i]);
validIndices.push_back(i);
}
}
if (validROIs.empty()) {
return std::vector<std::string>(batchSize, "");
}
// Run batch colour detection (GPU-accelerated, already optimized)
std::vector<std::vector<Object>> colourBatchOutputs =
_lpColourDetector->RunInferencesBatch(validROIs, cameraId);
if (colourBatchOutputs.size() != validROIs.size()) {
this->_logger.LogError("ANSALPR_OD::DetectLPColourDetectorBatch",
"Colour detector batch size mismatch", __FILE__, __LINE__);
return std::vector<std::string>(batchSize, "");
}
// Prepare results vector (initialize all to empty)
std::vector<std::string> results(batchSize);
// Process results in parallel for large batches
const size_t validSize = colourBatchOutputs.size();
if (validSize > 10) {
const unsigned int hwThreads = std::thread::hardware_concurrency();
const unsigned int numThreads = std::min(hwThreads > 0 ? hwThreads : 4,
static_cast<unsigned int>(validSize));
const size_t chunkSize = (validSize + numThreads - 1) / numThreads;
std::vector<std::future<void>> futures;
futures.reserve(numThreads);
for (unsigned int t = 0; t < numThreads; ++t) {
const size_t startIdx = t * chunkSize;
const size_t endIdx = std::min(startIdx + chunkSize, validSize);
if (startIdx >= validSize) break;
futures.push_back(std::async(std::launch::async,
[&colourBatchOutputs, &validIndices, &results, startIdx, endIdx, this]() {
const float threshold = _lpColourModelConfig.detectionScoreThreshold;
for (size_t i = startIdx; i < endIdx; ++i) {
if (colourBatchOutputs[i].empty()) {
continue;
}
// Find detection with highest confidence above threshold
float maxConfidence = threshold;
std::string bestClassName;
for (const auto& detection : colourBatchOutputs[i]) {
if (detection.confidence > maxConfidence) {
maxConfidence = detection.confidence;
bestClassName = detection.className;
}
}
if (!bestClassName.empty()) {
results[validIndices[i]] = bestClassName;
}
}
}
));
}
// Wait for all processing to complete
for (auto& future : futures) {
try {
if (future.wait_for(std::chrono::seconds(ASYNC_TIMEOUT_SECONDS)) == std::future_status::timeout) {
this->_logger.LogError("ANSALPR_OD::DetectLPColourDetectorBatch", "Async colour processing timed out (" + std::to_string(ASYNC_TIMEOUT_SECONDS) + "s)", __FILE__, __LINE__);
continue;
}
future.get();
}
catch (const std::exception& e) {
this->_logger.LogError("ANSALPR_OD::DetectLPColourDetectorBatch", std::string("Async colour processing failed: ") + e.what(), __FILE__, __LINE__);
}
}
}
else {
// Sequential for small batches
const float threshold = _lpColourModelConfig.detectionScoreThreshold;
for (size_t i = 0; i < validSize; ++i) {
if (colourBatchOutputs[i].empty()) {
continue;
}
// Find detection with highest confidence above threshold
float maxConfidence = threshold;
std::string bestClassName;
for (const auto& detection : colourBatchOutputs[i]) {
if (detection.confidence > maxConfidence) {
maxConfidence = detection.confidence;
bestClassName = detection.className;
}
}
if (!bestClassName.empty()) {
results[validIndices[i]] = bestClassName;
}
}
}
return results;
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSALPR_OD::DetectLPColourDetectorBatch",
e.what(), __FILE__, __LINE__);
return std::vector<std::string>(lprROIs.size(), "");
}
}
std::vector<std::string> ANSALPR_OD::DetectLicensePlateStringBatch(const std::vector<cv::Mat>& lprROIs, const std::string& cameraId) {
if (lprROIs.empty()) {
return {};
}
const size_t batchSize = lprROIs.size();
std::vector<std::string> results(batchSize);
try {
// Prepare grayscale batch — cvtColor on small LP crops is microseconds,
// thread spawn overhead dominates, so always run sequentially.
std::vector<cv::Mat> grayBatch(batchSize);
for (size_t i = 0; i < batchSize; ++i) {
const cv::Mat& lprROI = lprROIs[i];
if (lprROI.empty()) continue;
if (lprROI.channels() == 3) {
cv::cvtColor(lprROI, grayBatch[i], cv::COLOR_BGR2GRAY);
}
else if (lprROI.channels() == 1) {
grayBatch[i] = lprROI;
}
}
// Run batch OCR inference (GPU-accelerated, already optimized)
std::vector<std::vector<Object>> ocrBatchOutputs = _ocrDetector->RunInferencesBatch(grayBatch, cameraId);
if (ocrBatchOutputs.size() != batchSize) {
this->_logger.LogWarn("ANSALPR_OD::DetectLicensePlateStringBatch",
"Skipped: OCR batch size mismatch", __FILE__, __LINE__);
return std::vector<std::string>(batchSize, "");
}
// Process OCR results — ProcessSingleOCRResult is pure CPU math
// on ~5-20 characters per plate, so thread overhead dominates.
for (size_t i = 0; i < batchSize; ++i) {
results[i] = ProcessSingleOCRResult(ocrBatchOutputs[i]);
}
return results;
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSALPR_OD::DetectLicensePlateStringBatch",
e.what(), __FILE__, __LINE__);
return std::vector<std::string>(batchSize, "");
}
}
// New helper function - thread-safe, no shared state
std::string ANSALPR_OD::ProcessSingleOCRResult(const std::vector<Object>& ocrOutput) {
if (ocrOutput.empty()) {
return "";
}
// Remove duplicates using spatial hashing for O(n) instead of O(n^2)
std::unordered_map<int64_t, std::vector<size_t>> spatialHash;
spatialHash.reserve(ocrOutput.size());
std::vector<Object> uniqueOutput;
uniqueOutput.reserve(ocrOutput.size());
for (size_t i = 0; i < ocrOutput.size(); ++i) {
const auto& obj = ocrOutput[i];
const int gridX = static_cast<int>(obj.box.x / DUPLICATE_GRID_SIZE);
const int gridY = static_cast<int>(obj.box.y / DUPLICATE_GRID_SIZE);
const int64_t hashKey = static_cast<int64_t>(gridY) * 100000LL + gridX;
bool isDuplicate = false;
// Check only nearby cells (current + 8 neighbors)
for (int dy = -1; dy <= 1 && !isDuplicate; ++dy) {
for (int dx = -1; dx <= 1; ++dx) {
const int64_t neighborKey = static_cast<int64_t>(gridY + dy) * 100000LL + (gridX + dx);
auto it = spatialHash.find(neighborKey);
if (it != spatialHash.end()) {
for (size_t idx : it->second) {
const auto& unique = uniqueOutput[idx];
if (std::abs(obj.box.x - unique.box.x) < DUPLICATE_DIST_THRESHOLD &&
std::abs(obj.box.y - unique.box.y) < DUPLICATE_DIST_THRESHOLD) {
isDuplicate = true;
break;
}
}
if (isDuplicate) break;
}
}
}
if (!isDuplicate) {
spatialHash[hashKey].push_back(uniqueOutput.size());
uniqueOutput.push_back(obj);
}
}
if (uniqueOutput.empty()) {
return "";
}
if (uniqueOutput.size() == 1) {
return AnalyseLicensePlateText(uniqueOutput[0].className);
}
// ---- Character centers and average height ----
float avgHeight = 0.0f;
int n = static_cast<int>(uniqueOutput.size());
std::vector<float> cxs(n), cys(n);
for (int i = 0; i < n; ++i) {
cxs[i] = uniqueOutput[i].box.x + uniqueOutput[i].box.width * 0.5f;
cys[i] = uniqueOutput[i].box.y + uniqueOutput[i].box.height * 0.5f;
avgHeight += uniqueOutput[i].box.height;
}
avgHeight /= static_cast<float>(n);
// ---- Mean center ----
float meanX = 0.0f, meanY = 0.0f;
for (int i = 0; i < n; ++i) {
meanX += cxs[i];
meanY += cys[i];
}
meanX /= n;
meanY /= n;
// PCA to find the plate's reading direction.
// Robust to large tilt angles unlike OLS regression.
float cov_xx = 0.0f, cov_xy = 0.0f, cov_yy = 0.0f;
for (int i = 0; i < n; ++i) {
float dx = cxs[i] - meanX;
float dy = cys[i] - meanY;
cov_xx += dx * dx;
cov_xy += dx * dy;
cov_yy += dy * dy;
}
float theta = 0.5f * std::atan2(2.0f * cov_xy, cov_xx - cov_yy);
float dirX = std::cos(theta);
float dirY = std::sin(theta);
if (dirX < 0) { dirX = -dirX; dirY = -dirY; }
float perpX = -dirY;
float perpY = dirX;
// Project centers onto both axes
std::vector<float> projAlong(n), projPerp(n);
for (int i = 0; i < n; ++i) {
float dx = cxs[i] - meanX;
float dy = cys[i] - meanY;
projAlong[i] = dx * dirX + dy * dirY;
projPerp[i] = dx * perpX + dy * perpY;
}
// Row splitting using perpendicular projections
std::vector<std::pair<float, size_t>> perpSorted;
perpSorted.reserve(n);
for (int i = 0; i < n; ++i)
perpSorted.push_back({ projPerp[i], static_cast<size_t>(i) });
std::sort(perpSorted.begin(), perpSorted.end());
// Find largest gap with validation:
// 1. Both groups must have >= 2 chars
// 2. Groups must be vertically separated (avgY check)
float maxGap = 0.0f;
size_t splitIdx = perpSorted.size() / 2;
const size_t minGroupSize = ROW_SPLIT_MIN_GROUP_SIZE;
for (size_t i = 1; i < perpSorted.size(); ++i) {
float gap = perpSorted[i].first - perpSorted[i - 1].first;
if (i < minGroupSize || (perpSorted.size() - i) < minGroupSize)
continue;
if (gap > maxGap) {
float avgY_g1 = 0.0f, avgY_g2 = 0.0f;
for (size_t j = 0; j < i; ++j)
avgY_g1 += cys[perpSorted[j].second];
for (size_t j = i; j < perpSorted.size(); ++j)
avgY_g2 += cys[perpSorted[j].second];
avgY_g1 /= static_cast<float>(i);
avgY_g2 /= static_cast<float>(perpSorted.size() - i);
if (std::abs(avgY_g2 - avgY_g1) > avgHeight * ROW_SPLIT_AVGY_FACTOR) {
maxGap = gap;
splitIdx = i;
}
}
}
if (maxGap < avgHeight * ROW_SPLIT_MIN_GAP_FACTOR) {
// Single row - sort by projection along reading direction
std::vector<std::pair<float, size_t>> allProj;
for (int i = 0; i < n; ++i)
allProj.push_back({ projAlong[i], static_cast<size_t>(i) });
std::sort(allProj.begin(), allProj.end());
std::string ocrText;
ocrText.reserve(n);
for (const auto& p : allProj)
ocrText += uniqueOutput[p.second].className;
return AnalyseLicensePlateText(ocrText);
}
else {
// Two rows
std::vector<std::vector<size_t>> rowIndices(2);
for (size_t i = 0; i < perpSorted.size(); ++i) {
size_t objIdx = perpSorted[i].second;
if (i < splitIdx)
rowIndices[0].push_back(objIdx);
else
rowIndices[1].push_back(objIdx);
}
// Ensure row 0 is top row (lower average center-Y)
float avgY0 = 0.0f, avgY1 = 0.0f;
for (auto idx : rowIndices[0]) avgY0 += cys[idx];
for (auto idx : rowIndices[1]) avgY1 += cys[idx];
avgY0 /= static_cast<float>(rowIndices[0].size());
avgY1 /= static_cast<float>(rowIndices[1].size());
if (avgY0 > avgY1)
std::swap(rowIndices[0], rowIndices[1]);
// Sort each row by projection along reading direction
std::string ocrText;
ocrText.reserve(n);
for (const auto& row : rowIndices) {
std::vector<std::pair<float, size_t>> rowProj;
for (auto idx : row)
rowProj.push_back({ projAlong[idx], idx });
std::sort(rowProj.begin(), rowProj.end());
for (const auto& p : rowProj)
ocrText += uniqueOutput[p.second].className;
}
return AnalyseLicensePlateText(ocrText);
}
}
void ANSALPR_OD::ensureUniquePlateText(std::vector<Object>& results, const std::string& cameraId)
{
auto& identities = _plateIdentities[cameraId];
// Option B: Auto-detect mode by counting detections.
// 1 detection → crop/pipeline mode → return instant result, no accumulated scoring
// 2+ detections → full-frame mode → use accumulated scoring for dedup
if (results.size() <= 1) {
// Still prune stale spatial identities from previous full-frame calls
if (!identities.empty()) {
constexpr int MAX_UNSEEN_FRAMES = 30;
for (auto& id : identities) {
id.framesSinceLastSeen++;
}
for (auto it = identities.begin(); it != identities.end(); ) {
if (it->framesSinceLastSeen > MAX_UNSEEN_FRAMES) {
it = identities.erase(it);
} else {
++it;
}
}
}
return;
}
// --- Full-frame mode: 2+ detections, apply accumulated-score dedup ---
// Helper: compute IoU between two rects
auto computeIoU = [](const cv::Rect& a, const cv::Rect& b) -> float {
int x1 = std::max(a.x, b.x);
int y1 = std::max(a.y, b.y);
int x2 = std::min(a.x + a.width, b.x + b.width);
int y2 = std::min(a.y + a.height, b.y + b.height);
if (x2 <= x1 || y2 <= y1) return 0.0f;
float intersection = static_cast<float>((x2 - x1) * (y2 - y1));
float unionArea = static_cast<float>(a.area() + b.area()) - intersection;
return (unionArea > 0.0f) ? intersection / unionArea : 0.0f;
};
// Helper: find matching spatial identity by bounding box overlap
auto findSpatialMatch = [&](const cv::Rect& box, const std::string& plateText) -> SpatialPlateIdentity* {
for (auto& id : identities) {
if (id.plateText == plateText) {
// Reconstruct approximate rect from stored center
cv::Rect storedRect(
static_cast<int>(id.center.x - box.width * 0.5f),
static_cast<int>(id.center.y - box.height * 0.5f),
box.width, box.height);
if (computeIoU(box, storedRect) > PLATE_SPATIAL_MATCH_THRESHOLD) {
return &id;
}
}
}
return nullptr;
};
// Step 1: Build map of plateText → candidate indices
std::unordered_map<std::string, std::vector<size_t>> plateCandidates;
for (size_t i = 0; i < results.size(); ++i) {
if (results[i].className.empty()) continue;
plateCandidates[results[i].className].push_back(i);
}
// Step 2: Resolve duplicates using spatial accumulated scores
for (auto& [plateText, indices] : plateCandidates) {
if (indices.size() <= 1) continue;
// Find which candidate has the best accumulated score at its location
size_t winner = indices[0];
float bestScore = 0.0f;
for (size_t idx : indices) {
float score = results[idx].confidence;
auto* match = findSpatialMatch(results[idx].box, plateText);
if (match) {
score = match->accumulatedScore + results[idx].confidence;
}
if (score > bestScore) {
bestScore = score;
winner = idx;
}
}
// Clear plate text from non-winners
for (size_t idx : indices) {
if (idx != winner) {
results[idx].className.clear();
}
}
}
// Step 3: Update spatial identities — winners accumulate, losers decay
constexpr float DECAY_FACTOR = 0.8f;
constexpr float MIN_SCORE = 0.1f;
constexpr int MAX_UNSEEN_FRAMES = 30;
// Age all existing identities
for (auto& id : identities) {
id.framesSinceLastSeen++;
}
for (auto& r : results) {
if (r.className.empty()) continue;
cv::Point2f center(
r.box.x + r.box.width * 0.5f,
r.box.y + r.box.height * 0.5f);
auto* match = findSpatialMatch(r.box, r.className);
if (match) {
// Same plate at same location — accumulate
match->accumulatedScore += r.confidence;
match->center = center; // update position
match->framesSinceLastSeen = 0;
} else {
// New plate location — add entry
identities.push_back({ center, r.className, r.confidence, 0 });
}
}
// Decay unseen identities and remove stale ones
for (auto it = identities.begin(); it != identities.end(); ) {
if (it->framesSinceLastSeen > 0) {
it->accumulatedScore *= DECAY_FACTOR;
}
if (it->accumulatedScore < MIN_SCORE || it->framesSinceLastSeen > MAX_UNSEEN_FRAMES) {
it = identities.erase(it);
} else {
++it;
}
}
// Step 4: Remove entries with cleared plate text
results.erase(
std::remove_if(results.begin(), results.end(),
[](const Object& o) { return o.className.empty(); }),
results.end());
}
};