ANSCORE/modules/ANSLPR/ANSLPR_OD.cpp

#include "ANSLPR_OD.h"
#include "ANSTENSORRTOD.h"
#include "ANSYOLOOD.h"
#include "ANSTENSORRTCL.h"
#include "ANSOPENVINOCL.h"
#include "ANSRTYOLO.h"
#include "ANSONNXYOLO.h"
#include "ANSGpuFrameRegistry.h"

#include <json.hpp>
#include <future>
#include <thread>
#include <chrono>
#include <algorithm>
#include <unordered_map>
// ---------------------------------------------------------------------------
// Check ONNX model opset version by reading the protobuf header directly.
// No dependency on onnx_pb.h / libprotobuf — just reads the raw bytes.
// Returns 0 on failure.  TRT 10.x supports up to opset ~17.
// ---------------------------------------------------------------------------
/// Read the default-domain opset version from an ONNX protobuf file.
/// Returns 0 on failure (treat as "unknown — try TRT").
///
/// ONNX protobuf layout (ModelProto):
///   field 8 (opset_import) = repeated OperatorSetIdProto {
///     field 1 (domain)  = string   (empty for default domain)
///     field 2 (version) = int64
///   }
///
/// We scan the last 4 KB of the file because ONNX protobuf places
/// opset_import (field 8) AFTER the graph (field 7), which can be
/// hundreds of megabytes.  The opset_import entry is always a tiny
/// submessage (2-20 bytes) near the end of the file.
/// Falls back to scanning the first 8 KB for older ONNX formats.
static int GetOnnxOpsetVersion(const std::string& onnxPath) {
	std::ifstream f(onnxPath, std::ios::binary | std::ios::ate);
	if (!f.good()) return 0;
	auto fileSize = f.tellg();
	if (fileSize < 16) return 0;

	// Helper lambda: scan a buffer for opset_import submessages
	auto scanForOpset = [](const unsigned char* buf, int bytesRead) -> int {
		int maxDefaultOpset = 0;
		for (int i = 0; i < bytesRead - 2; ++i) {
			if (buf[i] != 0x42) continue;
			int subLen = 0, lenBytes = 0;
			for (int b = i + 1; b < bytesRead && b < i + 4; ++b) {
				subLen |= (buf[b] & 0x7F) << (7 * lenBytes);
				lenBytes++;
				if ((buf[b] & 0x80) == 0) break;
			}
			if (subLen < 2 || subLen > 60) continue;
			int subStart = i + 1 + lenBytes;
			int subEnd = subStart + subLen;
			if (subEnd > bytesRead) continue;
			bool hasNonEmptyDomain = false;
			int version = 0;
			int pos = subStart;
			while (pos < subEnd) {
				unsigned char tag = buf[pos++];
				int fieldNum = tag >> 3;
				int wireType = tag & 0x07;
				if (fieldNum == 1 && wireType == 2) {
					if (pos >= subEnd) break;
					int strLen = buf[pos++];
					if (strLen > 0) hasNonEmptyDomain = true;
					pos += strLen;
				} else if (fieldNum == 2 && wireType == 0) {
					version = 0; int shift = 0;
					while (pos < subEnd) {
						unsigned char vb = buf[pos++];
						version |= (vb & 0x7F) << shift;
						shift += 7;
						if ((vb & 0x80) == 0) break;
					}
				} else { break; }
			}
			if (!hasNonEmptyDomain && version > maxDefaultOpset)
				maxDefaultOpset = version;
		}
		return maxDefaultOpset;
	};

	// Scan TAIL of file first (where opset_import usually lives)
	constexpr int TAIL_SIZE = 4096;
	std::streampos tailOffset = 0;
	if (fileSize > TAIL_SIZE)
		tailOffset = fileSize - static_cast<std::streampos>(TAIL_SIZE);
	f.seekg(tailOffset, std::ios::beg);
	unsigned char buf[8192];
	f.read(reinterpret_cast<char*>(buf), TAIL_SIZE);
	int bytesRead = static_cast<int>(f.gcount());
	int result = scanForOpset(buf, bytesRead);
	if (result > 0) return result;

	// Fallback: scan HEAD of file (older ONNX formats)
	f.seekg(0, std::ios::beg);
	f.read(reinterpret_cast<char*>(buf), 8192);
	bytesRead = static_cast<int>(f.gcount());
	return scanForOpset(buf, bytesRead);
}

// Write a message to Windows Event Log (Application log, source "ANSLogger").
// Visible in Event Viewer even when no console is attached (e.g. LabVIEW).
static void WriteEventLog(const char* message, WORD eventType = EVENTLOG_INFORMATION_TYPE) {
	static HANDLE hLog = RegisterEventSourceA(NULL, "ANSLogger");
	if (hLog) {
		const char* msgs[1] = { message };
		ReportEventA(hLog, eventType, 0, 0, NULL, 1, 0, msgs, NULL);
	}
}

// ---------------------------------------------------------------------------
// SEH wrapper for pre-building a single TRT engine from ONNX.
// MSVC forbids __try in functions that use C++ object unwinding,
// so the inner C++ logic lives in PreBuildOneModel_Impl (with try/catch)
// and the outer SEH lives in PreBuildOneModel_SEH (no C++ objects).
// ---------------------------------------------------------------------------
struct PreBuildParams {
	const std::string* licenseKey;
	const std::string* modelFolder;
	std::string  modelName;
	std::string  className;
	std::string  label;
	ANSCENTER::ModelConfig config;
};

static bool PreBuildOneModel_Impl(const PreBuildParams& p) {
	try {
		auto tempDetector = std::make_unique<ANSCENTER::ANSRTYOLO>();
		tempDetector->SetLoadEngineOnCreation(false);

		ANSCENTER::ModelConfig cfg = p.config;
		cfg.modelType = ANSCENTER::ModelType::RTYOLO;

		std::string tempLabels;
		bool configured = tempDetector->LoadModelFromFolder(
			*p.licenseKey, cfg, p.modelName, p.className, *p.modelFolder, tempLabels);
		if (!configured) return false;

		// Try FP16 first
		std::string optimizedFolder;
		bool built = tempDetector->OptimizeModel(true /*fp16*/, optimizedFolder);

		// FP16 failed — retry with FP32
		// Some ONNX models (especially opset 19+) crash TRT during FP16 tactic selection.
		if (!built) {
			std::cout << "[ANSALPR] Pre-build: FP16 failed for " << p.label
			          << ", retrying with FP32..." << std::endl;
			// Recreate detector to get a clean Engine<float> with FP32 precision
			tempDetector.reset();
			tempDetector = std::make_unique<ANSCENTER::ANSRTYOLO>();
			tempDetector->SetLoadEngineOnCreation(false);
			tempLabels.clear();
			configured = tempDetector->LoadModelFromFolder(
				*p.licenseKey, cfg, p.modelName, p.className, *p.modelFolder, tempLabels);
			if (configured) {
				built = tempDetector->OptimizeModel(false /*fp32*/, optimizedFolder);
				if (built) {
					std::cout << "[ANSALPR] Pre-build: " << p.label
					          << " FP32 fallback succeeded." << std::endl;
				}
			}
		}

		tempDetector.reset();  // free VRAM
		return built;
	}
	catch (...) {
		return false;
	}
}

// FP32-only build — used as fallback when FP16 SEH-crashes
static bool PreBuildOneModel_FP32Only(const PreBuildParams& p) {
	try {
		auto tempDetector = std::make_unique<ANSCENTER::ANSRTYOLO>();
		tempDetector->SetLoadEngineOnCreation(false);
		ANSCENTER::ModelConfig cfg = p.config;
		cfg.modelType = ANSCENTER::ModelType::RTYOLO;
		std::string tempLabels;
		bool configured = tempDetector->LoadModelFromFolder(
			*p.licenseKey, cfg, p.modelName, p.className, *p.modelFolder, tempLabels);
		if (!configured) return false;
		std::string optimizedFolder;
		bool built = tempDetector->OptimizeModel(false /*fp32*/, optimizedFolder);
		tempDetector.reset();
		return built;
	}
	catch (...) { return false; }
}

static bool PreBuildOneModel_FP32Only_SEH(const PreBuildParams& p, DWORD* outCode) {
	*outCode = 0;
	__try {
		return PreBuildOneModel_FP32Only(p);
	}
	__except (EXCEPTION_EXECUTE_HANDLER) {
		*outCode = GetExceptionCode();
		return false;
	}
}

// Pure SEH wrapper — no C++ objects, no try/catch
// If FP16 SEH-crashes, automatically retries with FP32.
static bool PreBuildOneModel_SEH(const PreBuildParams& p, DWORD* outCode) {
	*outCode = 0;
	__try {
		return PreBuildOneModel_Impl(p);
	}
	__except (EXCEPTION_EXECUTE_HANDLER) {
		*outCode = GetExceptionCode();
	}

	// FP16 crashed — try FP32 fallback
	if (*outCode != 0) {
		std::cout << "[ANSALPR] Pre-build: " << p.label
		          << " FP16 SEH crash (0x" << std::hex << *outCode << std::dec
		          << "), retrying with FP32..." << std::endl;
		DWORD fp32Code = 0;
		bool fp32Ok = PreBuildOneModel_FP32Only_SEH(p, &fp32Code);
		if (fp32Ok) {
			std::cout << "[ANSALPR] Pre-build: " << p.label
			          << " FP32 fallback succeeded." << std::endl;
			*outCode = 0;  // clear error — FP32 worked
			return true;
		}
		// FP32 also failed — restore original error code
		if (fp32Code != 0) *outCode = fp32Code;
	}
	return false;
}

// ---------------------------------------------------------------------------
// SEH wrapper for loading the LPC colour model (Step 5).
// ---------------------------------------------------------------------------
struct LoadLpcParams {
	const std::string*     licenseKey;
	ANSCENTER::ModelConfig* config;
	const std::string*     modelFolder;
	std::string*           labels;
	std::unique_ptr<ANSCENTER::ANSODBase>* detector;
};

static bool LoadLpcModel_Impl(const LoadLpcParams& p) {
	try {
		p.config->detectionType = ANSCENTER::DetectionType::CLASSIFICATION;
		p.config->modelType     = ANSCENTER::ModelType::RTYOLO;
		auto rtyolo = std::make_unique<ANSCENTER::ANSRTYOLO>();
		bool ok = rtyolo->LoadModelFromFolder(
			*p.licenseKey, *p.config, "lpc", "lpc.names", *p.modelFolder, *p.labels);
		if (!ok) {
			return false;
		}
		rtyolo->SetTracker(ANSCENTER::TrackerType::BYTETRACK, false);
		*p.detector = std::move(rtyolo);  // upcast ANSRTYOLO -> ANSODBase
		return true;
	}
	catch (...) {
		p.detector->reset();
		return false;
	}
}

static bool LoadLpcModel_SEH(const LoadLpcParams& p, DWORD* outCode) {
	*outCode = 0;
	__try {
		return LoadLpcModel_Impl(p);
	}
	__except (EXCEPTION_EXECUTE_HANDLER) {
		*outCode = GetExceptionCode();
		return false;
	}
}

// ---------------------------------------------------------------------------
// Generic SEH wrapper for loading an ANSONNXYOLO model (used by the CPU /
// AMD / Intel fallback path where TensorRT is unavailable).
//
// Why SEH is required here
// ------------------------
// DirectML / OpenVINO / CUDA ORT session creation can crash with an
// asynchronous hardware fault (STATUS_ACCESS_VIOLATION 0xC0000005) when
// the underlying provider driver is in a bad state.  C++ `try/catch` does
// NOT catch SEH exceptions on MSVC unless the translator is explicitly
// installed.  Without this SEH wrapper the AV propagates up through
// ANSALPR_OD::LoadEngine into LoadANSALPREngineHandle, which logs
// "SEH exception 0xC0000005 caught during engine load" and returns 0 —
// the user sees a generic error with no way to tell which detector
// (LPD / OCR / LPC) failed.
//
// Wrapping each detector creation lets us:
//   1. Isolate the failing detector without taking down the whole load.
//   2. Log a precise error message indicating which model crashed.
//   3. Let the caller zero out the unique_ptr so Destroy() won't run a
//      half-initialised engine during cleanup.
// ---------------------------------------------------------------------------
struct LoadOnnxParams {
	const std::string*                      licenseKey;
	ANSCENTER::ModelConfig*                 config;
	const std::string*                      modelFolder;
	const char*                             modelName;
	const char*                             classFile;
	std::string*                            labels;
	std::unique_ptr<ANSCENTER::ANSODBase>*  detector;
	bool                                    enableTracker;
	bool                                    disableStabilization;
};

static bool LoadOnnxModel_Impl(const LoadOnnxParams& p) {
	try {
		auto onnxyolo = std::make_unique<ANSCENTER::ANSONNXYOLO>();
		bool ok = onnxyolo->LoadModelFromFolder(
			*p.licenseKey, *p.config, p.modelName, p.classFile,
			*p.modelFolder, *p.labels);
		if (!ok) {
			return false;
		}
		if (p.enableTracker) {
			onnxyolo->SetTracker(ANSCENTER::TrackerType::BYTETRACK, true);
		} else {
			onnxyolo->SetTracker(ANSCENTER::TrackerType::BYTETRACK, false);
		}
		if (p.disableStabilization) {
			onnxyolo->SetStabilization(false);
		}
		*p.detector = std::move(onnxyolo);  // upcast ANSONNXYOLO -> ANSODBase
		return true;
	}
	catch (...) {
		p.detector->reset();
		return false;
	}
}

static bool LoadOnnxModel_SEH(const LoadOnnxParams& p, DWORD* outCode) {
	// IMPORTANT: a function containing __try/__except must not run C++
	// destructors in the handler body — the CRT's SEH unwind can collide
	// with C++ unwind and call std::terminate.  We therefore defer any
	// cleanup (unique_ptr::reset) to the caller, which runs outside the
	// SEH context.  This mirrors the LoadLpcModel_SEH pattern above.
	*outCode = 0;
	__try {
		return LoadOnnxModel_Impl(p);
	}
	__except (EXCEPTION_EXECUTE_HANDLER) {
		*outCode = GetExceptionCode();
		return false;
	}
}
//#define FNS_DEBUG
namespace ANSCENTER {

	// ---- Tunable constants for license plate recognition ----
	constexpr float  ROW_SPLIT_MIN_GAP_FACTOR    = 0.2f;   // maxGap < avgHeight * this => single row
	constexpr float  ROW_SPLIT_AVGY_FACTOR        = 0.4f;   // avgY diff must exceed avgHeight * this
	constexpr size_t ROW_SPLIT_MIN_GROUP_SIZE     = 2;      // minimum chars per row
	constexpr float  DUPLICATE_DIST_THRESHOLD     = 5.0f;   // pixels: chars closer than this are duplicates
	constexpr int    DUPLICATE_GRID_SIZE          = 10;      // spatial hash grid cell size in pixels
	constexpr int    ASYNC_TIMEOUT_SECONDS        = 30;      // timeout for async worker threads

	ANSALPR_OD::ANSALPR_OD() {
		valid = false;
		// Default to safest engine (CPU). LoadEngine() overrides this after
		// CheckHardwareInformation() runs. We must not leave engineType
		// uninitialised because vendor predicates (isNvidiaEngine() etc.)
		// gate NV12/CUDA paths and could otherwise activate the CUDA runtime
		// on AMD/Intel hardware.
		engineType = ANSCENTER::EngineType::CPU;
	};
	ANSALPR_OD::~ANSALPR_OD() {
		try {
			Destroy();
		}
		catch (std::exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::~ANSALPR_OD", e.what(), __FILE__, __LINE__);
		}
	};
	bool ANSALPR_OD::Destroy() {
		std::lock_guard<std::recursive_mutex> lock(_mutex);
		try {
			if (this->_ocrDetector) this->_ocrDetector.reset();
			if (this->_lpDetector) this->_lpDetector.reset();
			if (this->_lpColourDetector) this->_lpColourDetector.reset();
			return true;
		}
		catch (std::exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::Destroy", e.what(), __FILE__, __LINE__);
			return false;
		}
	};
	bool ANSALPR_OD::Initialize(const std::string& licenseKey, const std::string& modelZipFilePath, const std::string& modelZipPassword, double detectorThreshold, double ocrThreshold, double colourThreshold) {
		std::lock_guard<std::recursive_mutex> lock(_mutex);
		try {
			_licenseKey = licenseKey;
			_licenseValid = false;
			_detectorThreshold = detectorThreshold;
			_ocrThreshold = ocrThreshold;
			_colorThreshold = colourThreshold;
			_country = Country::VIETNAM;
			CheckLicense();
			if (!_licenseValid) {
				this->_logger.LogError("ANSALPR_OD::Initialize.", "License is not valid.", __FILE__, __LINE__);
				return false;
			}
			// Extract model folder
			// 0. Check if the modelZipFilePath exist?
			if (!FileExist(modelZipFilePath)) {
				this->_logger.LogFatal("ANSALPR_OD::Initialize", "Model zip file is not exist", __FILE__, __LINE__);
			}
			else {
				this->_logger.LogInfo("ANSALPR_OD::Initialize. Model zip file found: ", modelZipFilePath, __FILE__, __LINE__);
			}
			// 1. Unzip model zip file to a special location with folder name as model file (and version)
			std::string outputFolder;
			std::vector<std::string> passwordArray;
			if (!modelZipPassword.empty()) passwordArray.push_back(modelZipPassword);
			passwordArray.push_back("AnsDemoModels20@!");
			passwordArray.push_back("Sh7O7nUe7vJ/417W0gWX+dSdfcP9hUqtf/fEqJGqxYL3PedvHubJag==");
			passwordArray.push_back("3LHxGrjQ7kKDJBD9MX86H96mtKLJaZcTYXrYRdQgW8BKGt7enZHYMg==");
			std::string modelName = GetFileNameWithoutExtension(modelZipFilePath);

			size_t vectorSize = passwordArray.size();
			for (size_t i = 0; i < vectorSize; i++) {
				if (ExtractPasswordProtectedZip(modelZipFilePath, passwordArray[i], modelName, _modelFolder, false))
					break; // Break the loop when the condition is met.
			}
			// 2. Check if the outputFolder exist
			if (!FolderExist(_modelFolder)) {
				this->_logger.LogError("ANSALPR_OD::Initialize. Output model folder is not exist", _modelFolder, __FILE__, __LINE__);
				return false; // That means the model file is not exist or the password is not correct
			}

			// Check country
			std::string countryFile = CreateFilePath(_modelFolder, "country.txt");
			if (FileExist(countryFile)) {
				std::ifstream infile(countryFile);
				std::string countryStr;
				std::getline(infile, countryStr);
				infile.close();
				if (countryStr == "0") {
					_country = Country::VIETNAM;
					_plateFormats.push_back("ddlddddd");
					_plateFormats.push_back("ddldddd");
					_plateFormats.push_back("ddldddddd");
					_plateFormats.push_back("ddllddddd");
					_plateFormats.push_back("ddllddddd");
					_plateFormats.push_back("ddMDdddddd");
					_plateFormats.push_back("dddddNGdd");
					_plateFormats.push_back("dddddQTdd");
					_plateFormats.push_back("dddddCVdd");
					_plateFormats.push_back("dddddNNdd");
					_plateFormats.push_back("lldddd");
				}
				else if (countryStr == "1")
					_country = Country::CHINA;
				else if (countryStr == "2")
					_country = Country::AUSTRALIA;
				else if (countryStr == "3")
					_country = Country::USA;
				else if (countryStr == "4")
					_country = Country::INDONESIA;
				else {
					_country = Country::VIETNAM;// Default
					_plateFormats.push_back("ddlddddd");
					_plateFormats.push_back("ddldddd");
					_plateFormats.push_back("ddldddddd");
					_plateFormats.push_back("ddllddddd");
					_plateFormats.push_back("ddllddddd");
					_plateFormats.push_back("ddMDdddddd");
					_plateFormats.push_back("dddddNGdd");
					_plateFormats.push_back("dddddQTdd");
					_plateFormats.push_back("dddddCVdd");
					_plateFormats.push_back("dddddNNdd");
					_plateFormats.push_back("lldddd");
				}
			}
			else {
				_country = Country::VIETNAM;// Default
				_plateFormats.push_back("ddlddddd");
				_plateFormats.push_back("ddldddd");
				_plateFormats.push_back("ddldddddd");
				_plateFormats.push_back("ddllddddd");
				_plateFormats.push_back("ddllddddd");
				_plateFormats.push_back("ddMDdddddd");
				_plateFormats.push_back("dddddNGdd");
				_plateFormats.push_back("dddddQTdd");
				_plateFormats.push_back("dddddCVdd");
				_plateFormats.push_back("dddddNNdd");
				_plateFormats.push_back("lldddd");
			}

			// 3. Load LD and OCR models
			alprChecker.Init(MAX_ALPR_FRAME);

			_lpColourModelConfig.detectionScoreThreshold = _colorThreshold;
			_lpdmodelConfig.detectionScoreThreshold= _detectorThreshold;
			_ocrModelConfig.detectionScoreThreshold= _ocrThreshold;

			return true;

		}
		catch (std::exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::Initialize", e.what(), __FILE__, __LINE__);
			return false;
		}
	}
	bool ANSALPR_OD::LoadEngine() {
		std::lock_guard<std::recursive_mutex> lock(_mutex);
		try {
			WriteEventLog("ANSALPR_OD::LoadEngine: Step 1 - Starting engine load");
			this->_logger.LogInfo("ANSALPR_OD::LoadEngine", "Step 1: Starting engine load", __FILE__, __LINE__);

			// Check the hardware type
			_lpdmodelConfig.detectionScoreThreshold = _detectorThreshold;
			_ocrModelConfig.detectionScoreThreshold = _ocrThreshold;
			_lpColourModelConfig.detectionScoreThreshold = _colorThreshold;

			if (_lpdmodelConfig.detectionScoreThreshold < 0.25)_lpdmodelConfig.detectionScoreThreshold = 0.25;
			if (_ocrModelConfig.detectionScoreThreshold < 0.25)_ocrModelConfig.detectionScoreThreshold = 0.25;

			_lpdmodelConfig.modelConfThreshold = 0.5;
			_lpdmodelConfig.modelMNSThreshold = 0.5;

			_ocrModelConfig.modelConfThreshold = 0.5;
			_ocrModelConfig.modelMNSThreshold = 0.5;

			_lpColourModelConfig.modelConfThreshold = 0.5;
			_lpColourModelConfig.modelMNSThreshold = 0.5;


			_lpdmodelConfig.inpHeight = 640;
			_lpdmodelConfig.inpWidth = 640;

			_ocrModelConfig.inpHeight = 640;
			_ocrModelConfig.inpWidth = 640;
			// Max=4 chosen to fit typical plate counts per frame on 8 GB GPUs.
			// Was opt=8/max=32 which sized TRT workspace for 32 concurrent plates
			// (~1 GB for this model alone). Cap of 4 is still >= the usual 1–3
			// plates visible per camera frame, amortized throughput unchanged.
			_ocrModelConfig.gpuOptBatchSize = 4;
			_ocrModelConfig.gpuMaxBatchSize = 4;    // desired max; engine builder auto-caps by GPU VRAM
			_ocrModelConfig.maxInputHeight = 640;
			_ocrModelConfig.maxInputWidth = 640;
			_ocrModelConfig.minInputHeight = 640;
			_ocrModelConfig.minInputWidth = 640;
			_ocrModelConfig.optInputHeight = 640;
			_ocrModelConfig.optInputWidth = 640;

			_lpColourModelConfig.inpHeight = 224;
			_lpColourModelConfig.inpWidth = 224;
			// See _ocrModelConfig above — matching batch cap for consistency.
			_lpColourModelConfig.gpuOptBatchSize = 4;
			_lpColourModelConfig.gpuMaxBatchSize = 4;    // desired max; engine builder auto-caps by GPU VRAM
			_lpColourModelConfig.maxInputHeight = 224;
			_lpColourModelConfig.maxInputWidth = 224;
			_lpColourModelConfig.minInputHeight = 224;
			_lpColourModelConfig.minInputWidth = 224;
			_lpColourModelConfig.optInputHeight = 224;
			_lpColourModelConfig.optInputWidth = 224;


			std::string lprModel = CreateFilePath(_modelFolder, "lpd.onnx");
			std::string lprClassesFile = CreateFilePath(_modelFolder, "lpd.names");

			std::string ocrModel = CreateFilePath(_modelFolder, "ocr.onnx");
			std::string ocrClassesFile = CreateFilePath(_modelFolder, "ocr.names");

			std::string colorModel = CreateFilePath(_modelFolder, "lpc.xml");
			std::string colorClassesFile = CreateFilePath(_modelFolder, "lpc.names");

			WriteEventLog("ANSALPR_OD::LoadEngine: Step 2 - Checking hardware information");
			this->_logger.LogInfo("ANSALPR_OD::LoadEngine", "Step 2: Checking hardware information", __FILE__, __LINE__);
			engineType = ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();//
			const char* vendorTag =
				isNvidiaEngine() ? "NVIDIA_GPU (TensorRT + NV12/CUDA fast path)" :
				isAmdEngine()    ? "AMD_GPU (DirectML via ONNX Runtime, NV12/CUDA DISABLED)" :
				isIntelEngine()  ? "OPENVINO_GPU (OpenVINO via ONNX Runtime, NV12/CUDA DISABLED)" :
				                   "CPU (ONNX Runtime, NV12/CUDA DISABLED)";
			WriteEventLog(("ANSALPR_OD::LoadEngine: Step 2 complete - Engine type = " +
				std::to_string(static_cast<int>(engineType)) + " [" + vendorTag + "]").c_str());
			this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
				"Step 2 complete: Engine type = " + std::to_string(static_cast<int>(engineType)) +
				" [" + vendorTag + "]", __FILE__, __LINE__);

			valid = false;
			if (_lpDetector) _lpDetector.reset();
			if (_ocrDetector) _ocrDetector.reset();
			if (_lpColourDetector) _lpColourDetector.reset();

			// ================================================================
			// PRE-BUILD PASS: Build all TRT engine files before loading any.
			//
			// ANSALPR loads 3 models sequentially (LPD, OCR, LPC).  When cached
			// .engine files exist, each one is just deserialized (low VRAM).
			// But after a driver/TRT update the caches are invalidated and every
			// model must be built from ONNX — a process that requires 2-5x the
			// final model size in temporary GPU workspace.
			//
			// Problem: if model #1 is built AND kept loaded, its VRAM footprint
			// reduces the workspace available for building model #2, which can
			// cause OOM crashes (LabVIEW error 1097) or very long hangs.
			//
			// Solution: use ANSRTYOLO::OptimizeModel() to build each .engine
			// file in a throwaway instance (buildWithRetry only — no load).
			// The instance is destroyed after saving, guaranteeing each build
			// gets the full GPU VRAM.  The subsequent loading pass finds the
			// cached files and only needs the smaller deserialization memory.
			// ================================================================
			if (engineType == ANSCENTER::EngineType::NVIDIA_GPU) {
				if (FileExist(lprModel) && FileExist(ocrModel)) {
					// Collect ONNX models that need TRT engine builds.
					// Each entry: { modelConfig, onnxModelName, classFileName, label }
					struct PreBuildSpec {
						ModelConfig  config;
						std::string  modelName;       // e.g. "lpd"
						std::string  className;       // e.g. "lpd.names"
						std::string  label;           // for logging
					};
					std::vector<PreBuildSpec> specs;

					{
						ModelConfig lpdCfg = _lpdmodelConfig;
						lpdCfg.detectionType = DetectionType::DETECTION;
						specs.push_back({ lpdCfg, "lpd", "lpd.names", "LPD" });
					}
					{
						ModelConfig ocrCfg = _ocrModelConfig;
						ocrCfg.detectionType = DetectionType::DETECTION;
						specs.push_back({ ocrCfg, "ocr", "ocr.names", "OCR" });
					}

					// LPC is optional and may be .xml (OpenVINO), only include if ONNX exists.
					// IMPORTANT: TRT 10.x crashes on opset 19+ ONNX models (access violation
					// in the ONNX parser that corrupts the CUDA context).  Skip TRT pre-build
					// for high-opset models — they will fall through to ONNX Runtime at Step 5.
					std::string lpcOnnx = CreateFilePath(_modelFolder, "lpc.onnx");
					bool lpcSkipTrt = false;
					if (FileExist(lpcOnnx) && (_lpColourModelConfig.detectionScoreThreshold > 0)) {
						int lpcOpset = GetOnnxOpsetVersion(lpcOnnx);
						this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
							"Pre-build: LPC ONNX opset detected = " + std::to_string(lpcOpset), __FILE__, __LINE__);
						if (lpcOpset > 17 || lpcOpset == 0) {
							// opset > 17: TRT crashes on these models
							// opset == 0: detection failed, assume high opset (safer than crashing)
							lpcSkipTrt = true;
							this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
								"Pre-build: LPC opset " + std::to_string(lpcOpset) +
								" > 17, skipping TRT (will use ONNX Runtime instead)", __FILE__, __LINE__);
							WriteEventLog(("ANSALPR_OD::LoadEngine: LPC opset " +
								std::to_string(lpcOpset) + " too high for TRT, using ORT").c_str());
						} else {
							ModelConfig lpcCfg = _lpColourModelConfig;
							lpcCfg.detectionType = DetectionType::CLASSIFICATION;
							specs.push_back({ lpcCfg, "lpc", "lpc.names", "LPC" });
						}
					}

					// Quick check: do ANY engines need building?
					// If all are cached, skip entirely for zero overhead on normal launches.
					//
					// IMPORTANT: Apply the same GPU-tier batch cap that buildLoadNetwork()
					// applies internally.  Without this, the probe looks for e.g.
					// "ocr.engine...b32" but the actual build saved "ocr.engine...b16"
					// (capped by VRAM), causing needless rebuilds every launch.
					int gpuMaxBatch = 1;
					{
						auto gpus = Engine<float>::enumerateDevices();
						if (!gpus.empty()) {
							const size_t totalMiB = gpus[0].totalMemoryBytes / (1024ULL * 1024);
							if      (totalMiB >= 15800) gpuMaxBatch = 32;  // ~16 GiB+
							else if (totalMiB >= 11800) gpuMaxBatch = 16;  // ~12 GiB
							else if (totalMiB >=  7900) gpuMaxBatch = 8;   // ~ 8 GiB (batch=16 OCR exec ctx ~987 MiB, too large for 4 concurrent tasks)
							else if (totalMiB >=  3900) gpuMaxBatch = 4;   // ~ 4 GiB
							else if (totalMiB >=  1900) gpuMaxBatch = 2;   // ~ 2 GiB
							else                        gpuMaxBatch = 1;
						}
					}

					bool anyNeedsBuild = false;
					for (auto& spec : specs) {
						ANSCENTER::Options o;
						o.optBatchSize   = spec.config.gpuOptBatchSize;
						o.maxBatchSize   = spec.config.gpuMaxBatchSize;
						o.deviceIndex    = spec.config.gpuDeviceIndex;
						o.maxInputHeight = spec.config.maxInputHeight;
						o.minInputHeight = spec.config.minInputHeight;
						o.optInputHeight = spec.config.optInputHeight;
						o.maxInputWidth  = spec.config.maxInputWidth;
						o.minInputWidth  = spec.config.minInputWidth;
						o.optInputWidth  = spec.config.optInputWidth;
						o.engineFileDir  = _modelFolder;
						o.precision      = ANSCENTER::Precision::FP16;

						// Apply GPU-tier batch cap (must match buildLoadNetwork behavior)
						if (o.maxBatchSize > gpuMaxBatch) {
							o.maxBatchSize = gpuMaxBatch;
							o.optBatchSize = std::min(o.optBatchSize, o.maxBatchSize);
						}

						auto probe = std::make_unique<Engine<float>>(o);
						std::string fp16File = probe->serializeEngineOptions(o, CreateFilePath(_modelFolder, spec.modelName + ".onnx"));
						o.precision = ANSCENTER::Precision::FP32;
						std::string fp32File = probe->serializeEngineOptions(o, CreateFilePath(_modelFolder, spec.modelName + ".onnx"));
						probe.reset();

						if (!FileExist(fp16File) && !FileExist(fp32File)) {
							anyNeedsBuild = true;
							this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
								"Pre-build: " + spec.label + " engine not cached, build required", __FILE__, __LINE__);
						}
					}

					if (anyNeedsBuild) {
						WriteEventLog("ANSALPR_OD::LoadEngine: Pre-build pass starting - optimizing engines one-at-a-time");
						this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
							"Pre-build pass: optimizing engine files one-at-a-time with full GPU VRAM", __FILE__, __LINE__);

						for (auto& spec : specs) {
							this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
								"Pre-build: Optimizing " + spec.label + " engine...", __FILE__, __LINE__);
							WriteEventLog(("ANSALPR_OD::LoadEngine: Pre-build: Optimizing " + spec.label + " engine...").c_str());

							PreBuildParams pbp;
							pbp.licenseKey = &_licenseKey;
							pbp.modelFolder = &_modelFolder;
							pbp.modelName = spec.modelName;
							pbp.className = spec.className;
							pbp.label     = spec.label;
							pbp.config    = spec.config;

							DWORD sehCode = 0;
							bool built = PreBuildOneModel_SEH(pbp, &sehCode);

							if (sehCode != 0) {
								char buf[256];
								snprintf(buf, sizeof(buf),
									"ANSALPR_OD::LoadEngine: Pre-build: %s SEH exception 0x%08X - skipping",
									spec.label.c_str(), sehCode);
								WriteEventLog(buf, EVENTLOG_ERROR_TYPE);
								this->_logger.LogError("ANSALPR_OD::LoadEngine",
									"Pre-build: " + spec.label + " SEH crash, skipping", __FILE__, __LINE__);
							}
							else if (built) {
								this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
									"Pre-build: " + spec.label + " engine built and cached successfully", __FILE__, __LINE__);
								WriteEventLog(("ANSALPR_OD::LoadEngine: Pre-build: " + spec.label + " engine built OK").c_str());
							}
							else {
								this->_logger.LogError("ANSALPR_OD::LoadEngine",
									"Pre-build: " + spec.label + " engine build failed (will retry in load pass)", __FILE__, __LINE__);
								WriteEventLog(("ANSALPR_OD::LoadEngine: Pre-build: " + spec.label + " build FAILED").c_str(),
									EVENTLOG_WARNING_TYPE);
							}
						}

						this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
							"Pre-build pass complete. Proceeding to load all engines.", __FILE__, __LINE__);
					}
				}
			}

			if (engineType == ANSCENTER::EngineType::NVIDIA_GPU) {
				if (FileExist(lprModel) && (FileExist(ocrModel)))
				{
					WriteEventLog("ANSALPR_OD::LoadEngine: Step 3 - Loading LP detector with TensorRT");
					this->_logger.LogInfo("ANSALPR_OD::LoadEngine", "Step 3: Loading LP detector with TensorRT", __FILE__, __LINE__);
					_lpdmodelConfig.detectionType = DetectionType::DETECTION;
					_lpdmodelConfig.modelType = ModelType::RTYOLO;
					_lpDetector = std::make_unique<ANSCENTER::ANSRTYOLO>();// TensorRT
					bool lpSuccess = _lpDetector->LoadModelFromFolder(_licenseKey, _lpdmodelConfig, "lpd", "lpd.names", _modelFolder, _lpdLabels);
					if (!lpSuccess) {
						this->_logger.LogError("ANSALPR_OD::LoadEngine", "Failed to load LP detector (TensorRT). GPU may not support this model.", __FILE__, __LINE__);
						_lpDetector.reset();
					}
					else {
						// Enable tracker on LP detector for stable bounding box tracking,
						// but disable stabilization (no ghost plates — ALPRChecker handles text stabilization)
						_lpDetector->SetTracker(TrackerType::BYTETRACK, true);
						_lpDetector->SetStabilization(false);
					}

					WriteEventLog("ANSALPR_OD::LoadEngine: Step 4 - Loading OCR detector with TensorRT");
					this->_logger.LogInfo("ANSALPR_OD::LoadEngine", "Step 4: Loading OCR detector with TensorRT", __FILE__, __LINE__);
					_ocrModelConfig.detectionType = DetectionType::DETECTION;
					_ocrModelConfig.modelType = ModelType::RTYOLO;
					_ocrDetector = std::make_unique<ANSCENTER::ANSRTYOLO>();// TensorRT
					bool ocrSuccess = _ocrDetector->LoadModelFromFolder(_licenseKey, _ocrModelConfig, "ocr", "ocr.names", _modelFolder, _ocrLabels);
					if (!ocrSuccess) {
						this->_logger.LogError("ANSALPR_OD::LoadEngine", "Failed to load OCR detector (TensorRT). GPU may not support this model.", __FILE__, __LINE__);
						_ocrDetector.reset();
					}
					else {
						_ocrDetector->SetTracker(TrackerType::BYTETRACK, false);
					}

					// Check if we need to load the color model (optional — SEH-protected)
					if (FileExist(colorModel) && (_lpColourModelConfig.detectionScoreThreshold > 0)) {
						// Route decision: use ONNX Runtime for high-opset models that crash TRT
						int lpcOpsetCheck = GetOnnxOpsetVersion(CreateFilePath(_modelFolder, "lpc.onnx"));
						// opset > 17: TRT crashes on these models
						// opset == 0: detection failed, assume high opset (safer than crashing)
						bool lpcSkipTrt = (lpcOpsetCheck > 17 || lpcOpsetCheck == 0);
						this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
							"Step 5: LPC opset detected = " + std::to_string(lpcOpsetCheck) +
							", skipTrt = " + (lpcSkipTrt ? std::string("true") : std::string("false")), __FILE__, __LINE__);
						if (lpcSkipTrt) {
							WriteEventLog("ANSALPR_OD::LoadEngine: Step 5 - Loading colour classifier with ONNX Runtime (opset > 17)");
							this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
								"Step 5: Loading colour classifier with ONNX Runtime (opset too high for TRT)", __FILE__, __LINE__);
							try {
								_lpColourModelConfig.detectionType = DetectionType::CLASSIFICATION;
								_lpColourModelConfig.modelType     = ModelType::ONNXYOLO;
								auto ortDetector = std::make_unique<ANSCENTER::ANSONNXYOLO>();
								bool ok = ortDetector->LoadModelFromFolder(
									_licenseKey, _lpColourModelConfig, "lpc", "lpc.names", _modelFolder, _lpColourLabels);
								if (ok) {
									ortDetector->SetTracker(TrackerType::BYTETRACK, false);
									_lpColourDetector = std::move(ortDetector);
									WriteEventLog("ANSALPR_OD::LoadEngine: Step 5 - LPC loaded via ONNX Runtime");
									this->_logger.LogInfo("ANSALPR_OD::LoadEngine",
										"Step 5: Colour classifier loaded via ONNX Runtime", __FILE__, __LINE__);
								} else {
									WriteEventLog("ANSALPR_OD::LoadEngine: Step 5 - LPC ONNX Runtime load failed", EVENTLOG_ERROR_TYPE);
									this->_logger.LogError("ANSALPR_OD::LoadEngine",
										"Step 5: Colour classifier ONNX Runtime load failed. Colour detection disabled.", __FILE__, __LINE__);
								}
							}
							catch (const std::exception& e) {
								WriteEventLog(("ANSALPR_OD::LoadEngine: Step 5 - LPC exception: " + std::string(e.what())).c_str(), EVENTLOG_ERROR_TYPE);
								this->_logger.LogError("ANSALPR_OD::LoadEngine",
									"Step 5: Colour classifier exception: " + std::string(e.what()), __FILE__, __LINE__);
								_lpColourDetector.reset();
							}
							catch (...) {
								WriteEventLog("ANSALPR_OD::LoadEngine: Step 5 - LPC unknown exception", EVENTLOG_ERROR_TYPE);
								this->_logger.LogError("ANSALPR_OD::LoadEngine",
									"Step 5: Colour classifier unknown exception. Colour detection disabled.", __FILE__, __LINE__);
								_lpColourDetector.reset();
							}
						} else {
							// Normal TRT path (opset ≤ 17)
							WriteEventLog("ANSALPR_OD::LoadEngine: Step 5 - Loading colour classifier with TensorRT");
							this->_logger.LogInfo("ANSALPR_OD::LoadEngine", "Step 5: Loading colour classifier with TensorRT", __FILE__, __LINE__);

							LoadLpcParams lpc;
							lpc.licenseKey  = &_licenseKey;
							lpc.config      = &_lpColourModelConfig;
							lpc.modelFolder = &_modelFolder;
							lpc.labels      = &_lpColourLabels;
							lpc.detector    = &_lpColourDetector;

							DWORD sehCode = 0;
							bool colourOk = LoadLpcModel_SEH(lpc, &sehCode);
							if (sehCode != 0) {
								char buf[256];
								snprintf(buf, sizeof(buf),
									"ANSALPR_OD::LoadEngine: Step 5 LPC SEH exception 0x%08X - colour detection disabled", sehCode);
								WriteEventLog(buf, EVENTLOG_ERROR_TYPE);
								this->_logger.LogError("ANSALPR_OD::LoadEngine",
									"Step 5: Colour classifier crashed (SEH). Colour detection disabled.", __FILE__, __LINE__);
								_lpColourDetector.reset();
							}
							else if (!colourOk) {
								this->_logger.LogError("ANSALPR_OD::LoadEngine",
									"Failed to load colour detector (TensorRT). Colour detection disabled.", __FILE__, __LINE__);
							}
						}
					}

					// TensorRT failed for both critical models — fall back to ONNX Runtime
					if (!lpSuccess || !ocrSuccess) {
						this->_logger.LogError("ANSALPR_OD::LoadEngine", "TensorRT engine build failed. Falling back to ONNX Runtime...", __FILE__, __LINE__);
						if (_lpDetector) _lpDetector.reset();
						if (_ocrDetector) _ocrDetector.reset();
						if (_lpColourDetector) _lpColourDetector.reset();

						// Fall through to ONNX path below
						engineType = ANSCENTER::EngineType::CPU;
					}
					else {
						valid = true;
					}
				}
			}
			// ONNX Runtime fallback path (CPU / AMD / Intel — or NVIDIA when
			// TensorRT build failed).  Each detector is loaded through a
			// dedicated SEH wrapper (LoadOnnxModel_SEH) so that an
			// AV / STATUS_ACCESS_VIOLATION raised deep inside the ONNX
			// Runtime session creator (e.g. from a misbehaving DirectML
			// / OpenVINO / CUDA provider driver) does not tear down the
			// whole LoadEngine call.  The wrapper logs the exact detector
			// that failed and zeros out the corresponding unique_ptr.
			if (!valid) {
				if (FileExist(lprModel) && (FileExist(ocrModel)))
				{
					bool lpSuccess = false, ocrSuccess = false;

					// ── Step 6: LPD ─────────────────────────────────────
					WriteEventLog("ANSALPR_OD::LoadEngine: Step 6 - Loading LP detector with ONNX Runtime");
					this->_logger.LogInfo("ANSALPR_OD::LoadEngine", "Step 6: Loading LP detector with ONNX Runtime", __FILE__, __LINE__);
					_lpdmodelConfig.detectionType = DetectionType::DETECTION;
					_lpdmodelConfig.modelType     = ModelType::ONNXYOLO;
					std::string _lprClasses;
					{
						LoadOnnxParams p{};
						p.licenseKey           = &_licenseKey;
						p.config               = &_lpdmodelConfig;
						p.modelFolder          = &_modelFolder;
						p.modelName            = "lpd";
						p.classFile            = "lpd.names";
						p.labels               = &_lprClasses;
						p.detector             = &_lpDetector;
						p.enableTracker        = true;
						p.disableStabilization = true;

						DWORD sehCode = 0;
						lpSuccess = LoadOnnxModel_SEH(p, &sehCode);
						if (sehCode != 0) {
							char buf[256];
							snprintf(buf, sizeof(buf),
								"ANSALPR_OD::LoadEngine: Step 6 LPD SEH exception 0x%08X — LP detector disabled", sehCode);
							WriteEventLog(buf, EVENTLOG_ERROR_TYPE);
							this->_logger.LogFatal("ANSALPR_OD::LoadEngine",
								"Step 6: LP detector crashed (SEH 0x" + std::to_string(sehCode) + "). LP detector disabled.",
								__FILE__, __LINE__);
							lpSuccess = false;
							// Drop any half-initialised state outside SEH context.
							if (_lpDetector) _lpDetector.reset();
						}
						else if (!lpSuccess) {
							this->_logger.LogError("ANSALPR_OD::LoadEngine",
								"Failed to load LP detector (ONNX Runtime).", __FILE__, __LINE__);
							if (_lpDetector) _lpDetector.reset();
						}
					}

					// ── Step 7: OCR ─────────────────────────────────────
					WriteEventLog("ANSALPR_OD::LoadEngine: Step 7 - Loading OCR detector with ONNX Runtime");
					this->_logger.LogInfo("ANSALPR_OD::LoadEngine", "Step 7: Loading OCR detector with ONNX Runtime", __FILE__, __LINE__);
					_ocrModelConfig.detectionType = DetectionType::DETECTION;
					_ocrModelConfig.modelType     = ModelType::ONNXYOLO;
					{
						LoadOnnxParams p{};
						p.licenseKey           = &_licenseKey;
						p.config               = &_ocrModelConfig;
						p.modelFolder          = &_modelFolder;
						p.modelName            = "ocr";
						p.classFile            = "ocr.names";
						p.labels               = &_ocrLabels;
						p.detector             = &_ocrDetector;
						p.enableTracker        = false;
						p.disableStabilization = false;

						DWORD sehCode = 0;
						ocrSuccess = LoadOnnxModel_SEH(p, &sehCode);
						if (sehCode != 0) {
							char buf[256];
							snprintf(buf, sizeof(buf),
								"ANSALPR_OD::LoadEngine: Step 7 OCR SEH exception 0x%08X — OCR detector disabled", sehCode);
							WriteEventLog(buf, EVENTLOG_ERROR_TYPE);
							this->_logger.LogFatal("ANSALPR_OD::LoadEngine",
								"Step 7: OCR detector crashed (SEH 0x" + std::to_string(sehCode) + "). OCR detector disabled.",
								__FILE__, __LINE__);
							ocrSuccess = false;
							// Drop any half-initialised state outside SEH context.
							if (_ocrDetector) _ocrDetector.reset();
						}
						else if (!ocrSuccess) {
							this->_logger.LogError("ANSALPR_OD::LoadEngine",
								"Failed to load OCR detector (ONNX Runtime).", __FILE__, __LINE__);
							if (_ocrDetector) _ocrDetector.reset();
						}
					}

					// ── Step 8: LPC (optional) ──────────────────────────
					if (FileExist(colorModel) && (_lpColourModelConfig.detectionScoreThreshold > 0)) {
						WriteEventLog("ANSALPR_OD::LoadEngine: Step 8 - Loading colour classifier with ONNX Runtime");
						this->_logger.LogInfo("ANSALPR_OD::LoadEngine", "Step 8: Loading colour classifier with ONNX Runtime", __FILE__, __LINE__);
						_lpColourModelConfig.detectionType = DetectionType::CLASSIFICATION;
						_lpColourModelConfig.modelType     = ModelType::ONNXYOLO;
						{
							LoadOnnxParams p{};
							p.licenseKey           = &_licenseKey;
							p.config               = &_lpColourModelConfig;
							p.modelFolder          = &_modelFolder;
							p.modelName            = "lpc";
							p.classFile            = "lpc.names";
							p.labels               = &_lpColourLabels;
							p.detector             = &_lpColourDetector;
							p.enableTracker        = false;
							p.disableStabilization = false;

							DWORD sehCode = 0;
							bool colourSuccess = LoadOnnxModel_SEH(p, &sehCode);
							if (sehCode != 0) {
								char buf[256];
								snprintf(buf, sizeof(buf),
									"ANSALPR_OD::LoadEngine: Step 8 LPC SEH exception 0x%08X — colour detection disabled", sehCode);
								WriteEventLog(buf, EVENTLOG_ERROR_TYPE);
								this->_logger.LogError("ANSALPR_OD::LoadEngine",
									"Step 8: Colour classifier crashed (SEH 0x" + std::to_string(sehCode) + "). Colour detection disabled.",
									__FILE__, __LINE__);
								// Drop any half-initialised state outside SEH context.
								if (_lpColourDetector) _lpColourDetector.reset();
							}
							else if (!colourSuccess) {
								this->_logger.LogError("ANSALPR_OD::LoadEngine",
									"Failed to load colour detector (ONNX Runtime). Colour detection disabled.", __FILE__, __LINE__);
								if (_lpColourDetector) _lpColourDetector.reset();
							}
						}
					}

					if (lpSuccess && ocrSuccess) {
						valid = true;
						if (engineType == ANSCENTER::EngineType::CPU) {
							this->_logger.LogDebug("ANSALPR_OD::LoadEngine", "Successfully loaded models with ONNX Runtime fallback.", __FILE__, __LINE__);
						}
					}
					else {
						this->_logger.LogFatal("ANSALPR_OD::LoadEngine", "Failed to load critical models with both TensorRT and ONNX Runtime.", __FILE__, __LINE__);
					}
				}
			}
			_isInitialized = valid;
			WriteEventLog(("ANSALPR_OD::LoadEngine: Step 9 - Engine load complete. Valid = " + std::to_string(valid)).c_str());
			this->_logger.LogInfo("ANSALPR_OD::LoadEngine", "Step 9: Engine load complete. Valid = " + std::to_string(valid), __FILE__, __LINE__);
			return valid;

		}
		catch (std::exception& e) {
			WriteEventLog(("ANSALPR_OD::LoadEngine: C++ exception: " + std::string(e.what())).c_str(), EVENTLOG_ERROR_TYPE);
			this->_logger.LogFatal("ANSALPR_OD::LoadEngine", std::string("C++ exception: ") + e.what(), __FILE__, __LINE__);
			return false;
		}
	}

	bool ANSALPR_OD::shouldUseALPRChecker(const cv::Size& imageSize, const std::string& cameraId) {
		// Master switch for tracker + voting post-processing. Off by default;
		// flip with SetTrackerVotingEnabled(true) to re-enable alprChecker
		// voting and ensureUniquePlateText dedup in all Inference/RunInference paths.
		if (!_enableTrackerVoting) return false;

		// Force disabled → never use
		if (!_enableALPRChecker) return false;

		// Small images are always pipeline crops — skip auto-detection
		if (imageSize.width < ImageSizeTracker::MIN_FULLFRAME_WIDTH) return false;

		// Enabled: auto-detect pipeline vs full-frame by exact image size consistency.
		// Full-frame: same resolution every frame (e.g., 3840x2160 always).
		// Pipeline crops: vary by a few pixels (e.g., 496x453, 497x455) — exact match fails.
		auto& tracker = _imageSizeTrackers[cameraId];
		bool wasFullFrame = tracker.detectedFullFrame;
		if (imageSize == tracker.lastSize) {
			tracker.consistentCount++;
			if (tracker.consistentCount >= ImageSizeTracker::CONFIRM_THRESHOLD) {
				tracker.detectedFullFrame = true;
			}
		} else {
			tracker.lastSize = imageSize;
			tracker.consistentCount = 1;
			tracker.detectedFullFrame = false;
		}
		// Log state transitions
		if (tracker.detectedFullFrame != wasFullFrame) {
			ANS_DBG("ALPR_Checker", "cam=%s mode auto-detected: %s (img=%dx%d consistent=%d)",
				cameraId.c_str(),
				tracker.detectedFullFrame ? "FULL-FRAME (Layer2+3 ON)" : "PIPELINE (Layer2+3 OFF)",
				imageSize.width, imageSize.height, tracker.consistentCount);
		}
		return tracker.detectedFullFrame;
	}

	std::vector<Object> ANSALPR_OD::RunInferenceSingleFrame(const cv::Mat& input, const std::string& cameraId) {
	// No coarse _mutex here — sub-components (detectors, alprChecker) have their own locks.
	// LabVIEW semaphore controls concurrency at the caller level.

	// Early validation
	if (!_licenseValid) {
		this->_logger.LogError("ANSALPR_OD::RunInference", "Invalid license", __FILE__, __LINE__);
		return {};
	}

	if (!valid) {
		this->_logger.LogError("ANSALPR_OD::RunInference", "Invalid model", __FILE__, __LINE__);
		return {};
	}

	if (!_isInitialized) {
		this->_logger.LogError("ANSALPR_OD::RunInference", "Model is not initialized", __FILE__, __LINE__);
		return {};
	}

	if (input.empty()) {
		this->_logger.LogError("ANSALPR_OD::RunInference", "Input image is empty", __FILE__, __LINE__);
		return {};
	}

	if (input.cols < 5 || input.rows < 5) {
		this->_logger.LogError("ANSALPR_OD::RunInference", "Input image size is too small", __FILE__, __LINE__);
		return {};
	}

	if (!this->_lpDetector) {
		this->_logger.LogFatal("ANSALPR_OD::RunInference", "_lprDetector is null", __FILE__, __LINE__);
		return {};
	}

	if (!this->_ocrDetector) {
		this->_logger.LogFatal("ANSALPR_OD::RunInference", "PPOCR instance is null", __FILE__, __LINE__);
		return {};
	}

	try {
		// Convert grayscale to BGR if necessary (use local buffer for thread safety)
		cv::Mat localFrame;
		if (input.channels() == 1) {
			cv::cvtColor(input, localFrame, cv::COLOR_GRAY2BGR);
		}
		const cv::Mat& frame = (input.channels() == 1) ? localFrame : input;

		const int frameWidth = frame.cols;
		const int frameHeight = frame.rows;

#ifdef FNS_DEBUG
		cv::Mat draw = input.clone();
#endif

		// Use local variable instead of shared _detectedArea for thread safety
		cv::Rect detectedArea(0, 0, frameWidth, frameHeight);

		if (detectedArea.width <= 50 || detectedArea.height <= 50) {
			return {};
		}

#ifdef FNS_DEBUG
		cv::rectangle(draw, detectedArea, cv::Scalar(0, 0, 255), 2);
#endif

		// Run license plate detection
		cv::Mat activeFrame = frame(detectedArea);
		fprintf(stderr, "[ALPR] RunInference: calling lpd %dx%d cam=%s\n", activeFrame.cols, activeFrame.rows, cameraId.c_str());
		std::vector<Object> lprOutput = _lpDetector->RunInference(activeFrame, cameraId);
		fprintf(stderr, "[ALPR] RunInference: lpd done, %zu detections cam=%s\n", lprOutput.size(), cameraId.c_str());
		for (size_t _di = 0; _di < lprOutput.size(); ++_di) {
			ANS_DBG("ALPR_Track", "cam=%s det[%zu] tid=%d box=(%d,%d,%d,%d) conf=%.2f",
				cameraId.c_str(), _di, lprOutput[_di].trackId,
				lprOutput[_di].box.x, lprOutput[_di].box.y,
				lprOutput[_di].box.width, lprOutput[_di].box.height,
				lprOutput[_di].confidence);
		}

		if (lprOutput.empty()) {
#ifdef FNS_DEBUG
			cv::resize(draw, draw, cv::Size(1920, 1080));
			cv::imshow("Detected Areas", draw);
			cv::waitKey(1);
#endif
			return {};
		}

		std::vector<Object> output;
		output.reserve(lprOutput.size());

		for (auto& lprObject : lprOutput) {
			const cv::Rect& box = lprObject.box;

#ifdef FNS_DEBUG
			cv::rectangle(draw, box, cv::Scalar(0, 255, 255), 2);
#endif

			// Calculate cropped region (padding = 0)
			const int x1 = std::max(0, box.x);
			const int y1 = std::max(0, box.y);
			const int width = std::min(frameWidth - x1, box.width);
			const int height = std::min(frameHeight - y1, box.height);

			if (width <= 0 || height <= 0) {
				continue;
			}

			cv::Rect lprPos(x1, y1, width, height);
			cv::Mat alignedLPR = frame(lprPos);// .clone();

			// OCR inference
			fprintf(stderr, "[ALPR] RunInference: calling OCR on plate %dx%d cam=%s\n", alignedLPR.cols, alignedLPR.rows, cameraId.c_str());
			std::string ocrText = DetectLicensePlateString(alignedLPR, cameraId);
			fprintf(stderr, "[ALPR] RunInference: OCR done, text='%s' cam=%s\n", ocrText.c_str(), cameraId.c_str());

			if (ocrText.empty()) {
				continue;
			}

			lprObject.cameraId = cameraId;
			if (shouldUseALPRChecker(cv::Size(frameWidth, frameHeight), cameraId)) {
				lprObject.className = alprChecker.checkPlateByTrackId(cameraId, ocrText, lprObject.trackId);
			} else {
				lprObject.className = ocrText;
			}

			if (lprObject.className.empty()) {
				continue;
			}

			std::string colour = DetectLPColourCached(alignedLPR, cameraId, lprObject.className);
			if (!colour.empty()) {
				lprObject.extraInfo = "color:" + colour;
			}

			output.push_back(std::move(lprObject));
		}

#ifdef FNS_DEBUG
		cv::resize(draw, draw, cv::Size(1920, 1080));
		cv::imshow("Detected Areas", draw);
		cv::waitKey(1);
#endif

		// Deduplicate: if two trackIds claim the same plate text, keep the one
		// with the higher accumulated score to prevent plate flickering
		if (shouldUseALPRChecker(cv::Size(frameWidth, frameHeight), cameraId)) {
			ensureUniquePlateText(output, cameraId);
		}

		return output;

		}
		catch (const cv::Exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::RunInference", std::string("OpenCV Exception: ") + e.what(), __FILE__, __LINE__);
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::RunInference", e.what(), __FILE__, __LINE__);
		}
		catch (...) {
			this->_logger.LogFatal("ANSALPR_OD::RunInference", "Unknown exception occurred", __FILE__, __LINE__);
		}

		return {};
	}
	std::string ANSALPR_OD::DetectLicensePlateString(const cv::Mat& lprROI, const std::string& cameraId) {
		// No coarse _mutex — _ocrDetector has its own m_inferenceMutex
		try {
			// convert lprROI to greyscale if it is not already
			if (lprROI.empty()) {
				this->_logger.LogError("ANSALPR_OD::DetectLicensePlateString", "Input image is empty", __FILE__, __LINE__);
				return "";
			}
			cv::Mat grayLprROI;
			if (lprROI.channels() == 3) {
				cv::cvtColor(lprROI, grayLprROI, cv::COLOR_BGR2GRAY);
			}
			else {
				grayLprROI = lprROI;
			}

			std::vector<Object> ocrOutput = _ocrDetector->RunInference(grayLprROI, cameraId);
			std::string ocrText = "";
			if (ocrOutput.empty()) return ocrText;

			//std::cout << "=== OCR Detections ===" << std::endl;
			//for (size_t i = 0; i < ocrOutput.size(); ++i) {
			//		<< "' X=" << ocrOutput[i].box.x
			//		<< " Y=" << ocrOutput[i].box.y << std::endl;
			//}

			// Remove duplicates
			std::vector<Object> uniqueOutput;
			for (const auto& obj : ocrOutput) {
				bool isDuplicate = false;
				for (const auto& unique : uniqueOutput) {
					if (std::abs(obj.box.x - unique.box.x) < DUPLICATE_DIST_THRESHOLD &&
						std::abs(obj.box.y - unique.box.y) < DUPLICATE_DIST_THRESHOLD) {
						isDuplicate = true;
						break;
					}
				}
				if (!isDuplicate) {
					uniqueOutput.push_back(obj);
				}
			}

			//std::cout << "\nAfter removing duplicates: " << uniqueOutput.size() << " chars" << std::endl;
			if (uniqueOutput.empty()) return ocrText;
			if (uniqueOutput.size() == 1) return uniqueOutput[0].className;

			// Calculate average character height for threshold
			float avgHeight = 0;
			for (const auto& obj : uniqueOutput) {
				avgHeight += obj.box.height;
			}
			avgHeight /= uniqueOutput.size();

			// Calculate linear regression Y = mX + b
			float sumX = 0, sumY = 0, sumXY = 0, sumX2 = 0;
			int n = uniqueOutput.size();

			for (const auto& obj : uniqueOutput) {
				float x = obj.box.x;
				float y = obj.box.y;
				sumX += x;
				sumY += y;
				sumXY += x * y;
				sumX2 += x * x;
			}

			float denominator = n * sumX2 - sumX * sumX;
			float slope = (std::abs(denominator) > 1e-6f) ? (n * sumXY - sumX * sumY) / denominator : 0.0f;
			float intercept = (n > 0) ? (sumY - slope * sumX) / n : 0.0f;

			//std::cout << "Linear regression: Y = " << slope << " * X + " << intercept << std::endl;

			// Calculate perpendicular distance from each point to regression line
			float a = slope;
			float b = -1.0f;
			float c = intercept;
			float normFactor = std::sqrt(a * a + b * b);

			std::vector<std::pair<float, size_t>> distances;
			for (size_t i = 0; i < uniqueOutput.size(); ++i) {
				float x = uniqueOutput[i].box.x;
				float y = uniqueOutput[i].box.y;
				float dist = (a * x + b * y + c) / normFactor;
				distances.push_back({ dist, i });
			}

			// Sort by perpendicular distance
			std::sort(distances.begin(), distances.end(),
				[](const std::pair<float, size_t>& a, const std::pair<float, size_t>& b) {
					return a.first < b.first;
				});

			// Find largest gap in perpendicular distances
			float maxGap = 0;
			size_t splitIdx = distances.size() / 2;

			//std::cout << "\n=== Distance gaps ===" << std::endl;
			for (size_t i = 1; i < distances.size(); ++i) {
				float gap = distances[i].first - distances[i - 1].first;
				//std::cout << "Gap " << i << ": " << gap << std::endl;
				if (gap > maxGap) {
					maxGap = gap;
					splitIdx = i;
				}
			}


			// Check if this is actually a single row
			// If max gap is too small relative to character height, it's a single row
			std::vector<std::vector<Object>> rows;

			if (maxGap < avgHeight * ROW_SPLIT_MIN_GAP_FACTOR) {
				// Single row - all characters on one line
				rows.resize(1);
				rows[0] = uniqueOutput;
			}
			else {
				// Two rows
				rows.resize(2);

				// Split into two rows based on perpendicular distance
				for (size_t i = 0; i < distances.size(); ++i) {
					size_t objIdx = distances[i].second;
					if (i < splitIdx) {
						rows[0].push_back(uniqueOutput[objIdx]);
					}
					else {
						rows[1].push_back(uniqueOutput[objIdx]);
					}
				}

				// Determine which row is on top (lower average Y = top row)
				float avgY0 = 0, avgY1 = 0;
				for (const auto& obj : rows[0]) {
					avgY0 += obj.box.y;
				}
				for (const auto& obj : rows[1]) {
					avgY1 += obj.box.y;
				}
				avgY0 /= rows[0].size();
				avgY1 /= rows[1].size();

				//std::cout << "Average Y: Row0=" << avgY0 << " Row1=" << avgY1 << std::endl;

				// Swap if needed (top row should be row 0)
				if (avgY0 > avgY1) {
					//std::cout << "Swapping rows (Row 0 should be on top)" << std::endl;
					std::swap(rows[0], rows[1]);
				}
			}

			//std::cout << "\n=== Rows before X-sorting ===" << std::endl;
			//for (size_t r = 0; r < rows.size(); ++r) {
			//	std::cout << "Row " << r << " (" << rows[r].size() << " chars): ";
			//	for (const auto& obj : rows[r]) {
			//	}
			//	std::cout << std::endl;
			//}

			// Sort each row by X
			for (auto& row : rows) {
				std::sort(row.begin(), row.end(),
					[](const Object& a, const Object& b) {
						return a.box.x < b.box.x;
					});
			}

			/*std::cout << "\n=== Rows after X-sorting ===" << std::endl;
			for (size_t r = 0; r < rows.size(); ++r) {
				std::cout << "Row " << r << ": ";
				for (const auto& obj : rows[r]) {
				}
				std::cout << std::endl;
			}*/

			// Concatenate
			for (const auto& row : rows) {
				for (const auto& obj : row) {
					ocrText += obj.className;
				}
			}

			//std::cout << "\nFinal text: '" << ocrText << "'" << std::endl;
			grayLprROI.release(); // Release the grayscale image to free memory
			std::string processedOcrText = AnalyseLicensePlateText(ocrText);
			return processedOcrText;
		}
		catch (std::exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::DetectLicensePlateString", e.what(), __FILE__, __LINE__);
			return "";
		}
	}
	std::string ANSALPR_OD::AnalyseLicensePlateText(const std::string& ocrText) {
		std::string analysedLP = "";
		try {
			std::string cleanOCRText = "";
			for (size_t i = 0; i < ocrText.size(); ++i) {
				char c = ocrText[i];
				if (std::isalnum(c))cleanOCRText += c;
			}
			std::transform(cleanOCRText.begin(), cleanOCRText.end(), cleanOCRText.begin(), ::toupper);
			int ocrSize = cleanOCRText.size();

			switch (_country) {
			case Country::VIETNAM:
				analysedLP = cleanOCRText;
				break;
			case Country::INDONESIA:
				analysedLP = cleanOCRText;
				break;
			case Country::AUSTRALIA:
				analysedLP = cleanOCRText;
				break;
			case Country::USA:
				break;
			}
			// Format validation: reject plates that don't match any configured format
			if (!analysedLP.empty() && !_plateFormats.empty() && !MatchesPlateFormat(analysedLP)) {
				return "";
			}

			return analysedLP;
		}
		catch (std::exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::AnalyseLicensePlateText", e.what(), __FILE__, __LINE__);
			return "";
		}
	}
	bool ANSALPR_OD::MatchesPlateFormat(const std::string& plate) const {
		if (_plateFormats.empty()) {
			return true; // No formats configured - accept all
		}
		for (const auto& format : _plateFormats) {
			if (plate.size() != format.size())
				continue;
			bool matches = true;
			for (size_t i = 0; i < format.size(); ++i) {
				char f = format[i];
				char p = plate[i];
				if (f == 'd') {
					if (!std::isdigit(static_cast<unsigned char>(p))) { matches = false; break; }
				}
				else if (f == 'l') {
					if (!std::isalpha(static_cast<unsigned char>(p))) { matches = false; break; }
				}
				else {
					if (p != f) { matches = false; break; } // Fixed letter (A-Z) or other literal
				}
			}
			if (matches) return true;
		}
		return false;
	}
	std::string ANSALPR_OD::DetectLPColourDetector(const cv::Mat& lprROI, const std::string& cameraId) {
		// Early validation - no lock needed for these checks
		if (_lpColourModelConfig.detectionScoreThreshold <= 0.0f) {
			return {};  // Colour detection not enabled
		}

		if (!_lpColourDetector) {
			return {};
		}

		if (lprROI.empty()) {
			this->_logger.LogError("ANSALPR_OD::DetectLPColourDetector", "Input image is empty", __FILE__, __LINE__);
			return {};
		}

		// No coarse _mutex — _lpColourDetector has its own m_inferenceMutex
		try {
			std::vector<Object> colourOutputs = _lpColourDetector->RunInference(lprROI, cameraId);

			if (colourOutputs.empty()) {
				return {};
			}

			// Find detection with highest confidence
			const auto& bestDetection = *std::max_element(
				colourOutputs.begin(),
				colourOutputs.end(),
				[](const Object& a, const Object& b) {
					return a.confidence < b.confidence;
				}
			);

			return bestDetection.className;

		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::DetectLPColourDetector", e.what(), __FILE__, __LINE__);
			return {};
		}
	}

	std::string ANSALPR_OD::DetectLPColourCached(const cv::Mat& lprROI, const std::string& cameraId, const std::string& plateText) {
		// Empty plate text = can't cache, fall through to full inference
		if (plateText.empty()) {
			return DetectLPColourDetector(lprROI, cameraId);
		}

		// Check cache first (fine-grained lock, no GPU work)
		{
			std::lock_guard<std::mutex> cacheLock(_colourCacheMutex);
			auto it = _colourCache.find(plateText);
			if (it != _colourCache.end()) {
				it->second.hitCount++;
				return it->second.colour;  // Cache hit — 0ms
			}
		}

		// Cache miss — run the actual classifier (no lock held during GPU inference)
		std::string colour = DetectLPColourDetector(lprROI, cameraId);

		// Store in cache (fine-grained lock)
		if (!colour.empty()) {
			std::lock_guard<std::mutex> cacheLock(_colourCacheMutex);
			if (_colourCache.size() >= COLOUR_CACHE_MAX_SIZE) {
				_colourCache.clear();
			}
			_colourCache[plateText] = { colour, 0 };
		}

		return colour;
	}

	bool ANSALPR_OD::Inference(const cv::Mat& input, std::string& lprResult) {
		// No coarse _mutex — delegates to Inference(input, lprResult, cameraId) which is also lock-free
		if (input.empty()) return false;
		if ((input.cols < 5) || (input.rows < 5)) return false;
		return Inference(input, lprResult, "CustomCam");
	}
	bool ANSALPR_OD::Inference(const cv::Mat& input, std::string& lprResult, const std::string& cameraId) {
		// No coarse _mutex — sub-components have their own fine-grained locks.
		// LabVIEW semaphore controls concurrency at the caller level.

		// Early validation
		if (!_licenseValid) {
			this->_logger.LogError("ANSALPR_OD::Inference", "Invalid license", __FILE__, __LINE__);
			return false;
		}

		if (!valid) {
			this->_logger.LogError("ANSALPR_OD::Inference", "Invalid model", __FILE__, __LINE__);
			return false;
		}

		if (!_isInitialized) {
			this->_logger.LogError("ANSALPR_OD::Inference", "Model is not initialized", __FILE__, __LINE__);
			return false;
		}

		if (input.empty()) {
			this->_logger.LogError("ANSALPR_OD::Inference", "Input image is empty", __FILE__, __LINE__);
			return false;
		}

		if (input.cols < 5 || input.rows < 5) {
			this->_logger.LogError("ANSALPR_OD::Inference", "Input image size is too small", __FILE__, __LINE__);
			return false;
		}

		if (!this->_lpDetector) {
			this->_logger.LogFatal("ANSALPR_OD::Inference", "_lpDetector is null", __FILE__, __LINE__);
			return false;
		}

		std::vector<Object> output;

		try {
			// --- Debug timer helper (zero-cost when _debugFlag == false) ---
			using Clock = std::chrono::steady_clock;
			const bool dbg = _debugFlag;
			auto t0 = dbg ? Clock::now() : Clock::time_point{};
			auto tPrev = t0;
			auto elapsed = [&]() -> double {
				auto now = Clock::now();
				double ms = std::chrono::duration<double, std::milli>(now - tPrev).count();
				tPrev = now;
				return ms;
			};

			// Convert grayscale to BGR if necessary (use local buffer for thread safety)
			cv::Mat localFrame;
			if (input.channels() == 1) {
				cv::cvtColor(input, localFrame, cv::COLOR_GRAY2BGR);
			}
			const cv::Mat& frame = (input.channels() == 1) ? localFrame : input;
			double msColorConvert = dbg ? elapsed() : 0;

			const int frameWidth = frame.cols;
			const int frameHeight = frame.rows;

			// --- Step 1: LP Detection ---
			cv::Rect roi(0, 0, 0, 0);
			std::vector<Object> lprOutput = this->_lpDetector->RunStaticInference(frame, roi, cameraId);
			double msLPDetect = dbg ? elapsed() : 0;
			int numPlates = (int)lprOutput.size();

			double totalOcrMs = 0, totalValidateMs = 0, totalColourMs = 0, totalCropMs = 0;
			int ocrCount = 0, validCount = 0, colourCount = 0;

			if (!lprOutput.empty()) {
				output.reserve(lprOutput.size());
				constexpr int padding = 10;

				// --- Compute display→full-res scale (once per frame, cheap) ---
				// NV12 GPU fast path is NVIDIA-only — cv::cuda::Stream/GpuMat
				// touch the CUDA runtime even when the helper would early-return,
				// which destabilises AMD/Intel hardware.  Gate strictly on NVIDIA.
				float scaleX = 1.f, scaleY = 1.f;
				if (isNvidiaEngine()) {
					auto* gpuData = tl_currentGpuFrame();
					if (gpuData && gpuData->width > frame.cols && gpuData->height > frame.rows) {
						scaleX = static_cast<float>(gpuData->width) / frame.cols;
						scaleY = static_cast<float>(gpuData->height) / frame.rows;
					}
				}

				for (auto& lprObject : lprOutput) {
					const cv::Rect& box = lprObject.box;

					// --- Step 2: Crop LP region ---
					auto tCrop = dbg ? Clock::now() : Clock::time_point{};

					cv::Mat lprImage;

					// Try GPU NV12 crop (NVIDIA decode: NV12 still in GPU VRAM).
					// Skipped on AMD/Intel/CPU — see isNvidiaEngine() guard above.
					if (isNvidiaEngine() && scaleX > 1.f) {
						auto cropResult = _nv12Helper.tryNV12CropToBGR(
							frame, 0, box, padding, scaleX, scaleY,
							this->_logger, "LPR");
						if (cropResult.succeeded)
							lprImage = cropResult.bgrCrop;
					}

					// Fallback: crop from display-res frame
					if (lprImage.empty()) {
						const int x1 = std::max(0, box.x - padding);
						const int y1 = std::max(0, box.y - padding);
						const int x2 = std::min(frameWidth, box.x + box.width + padding);
						const int y2 = std::min(frameHeight, box.y + box.height + padding);
						const int width = x2 - x1;
						const int height = y2 - y1;

						if (width <= padding || height <= padding) {
							continue;
						}

						lprImage = frame(cv::Rect(x1, y1, width, height)).clone();
					}
					if (dbg) totalCropMs += std::chrono::duration<double, std::milli>(Clock::now() - tCrop).count();

					// --- Step 3: OCR inference ---
					auto tOcr = dbg ? Clock::now() : Clock::time_point{};
					std::string ocrText = DetectLicensePlateString(lprImage, cameraId);
					if (dbg) { totalOcrMs += std::chrono::duration<double, std::milli>(Clock::now() - tOcr).count(); ocrCount++; }

					if (ocrText.empty()) {
						continue;
					}

					// --- Step 4: Plate validation ---
					auto tValidate = dbg ? Clock::now() : Clock::time_point{};
					lprObject.cameraId = cameraId;
					lprObject.polygon = RectToNormalizedPolygon(lprObject.box, input.cols, input.rows);
					if (shouldUseALPRChecker(cv::Size(input.cols, input.rows), cameraId)) {
						lprObject.className = alprChecker.checkPlateByTrackId(cameraId, ocrText, lprObject.trackId);
					} else {
						lprObject.className = ocrText;
					}
					if (dbg) { totalValidateMs += std::chrono::duration<double, std::milli>(Clock::now() - tValidate).count(); }

					if (lprObject.className.empty()) {
						continue;
					}
					validCount++;

					// --- Step 5: Colour classification (cached) ---
					auto tColour = dbg ? Clock::now() : Clock::time_point{};
					std::string colour = DetectLPColourCached(lprImage, cameraId, lprObject.className);
					if (!colour.empty()) {
						lprObject.extraInfo = "color:" + colour;
					}
					if (dbg) { totalColourMs += std::chrono::duration<double, std::milli>(Clock::now() - tColour).count(); colourCount++; }

					output.push_back(std::move(lprObject));
				}
			}

			// --- Step 6: Serialize results ---
			auto tJson = dbg ? Clock::now() : Clock::time_point{};
			lprResult = VectorDetectionToJsonString(output);
			double msJson = dbg ? std::chrono::duration<double, std::milli>(Clock::now() - tJson).count() : 0;

			// --- Log full pipeline breakdown ---
			if (dbg) {
				double msTotal = std::chrono::duration<double, std::milli>(Clock::now() - t0).count();
				char buf[1024];
				snprintf(buf, sizeof(buf),
					"[DEBUG] %s | ColorCvt=%.1fms LPDetect=%.1fms (plates=%d) "
					"Crop=%.1fms OCR=%.1fms (x%d) Validate=%.1fms Colour=%.1fms (x%d) "
					"JSON=%.1fms | TOTAL=%.1fms Output=%d",
					cameraId.c_str(), msColorConvert, msLPDetect, numPlates,
					totalCropMs, totalOcrMs, ocrCount, totalValidateMs,
					totalColourMs, colourCount, msJson, msTotal, (int)output.size());
				_logger.LogInfo("ANSALPR_OD::Inference", buf, __FILE__, __LINE__);
			}

			return true;

		}
		catch (const std::exception& e) {
			lprResult = VectorDetectionToJsonString(output);
			this->_logger.LogFatal("ANSALPR_OD::Inference", e.what(), __FILE__, __LINE__);
			return false;
		}
	}
	bool ANSALPR_OD::Inference(const cv::Mat& input, const std::vector<cv::Rect> & Bbox, std::string& lprResult) {
		// No coarse _mutex — delegates to Inference(input, Bbox, lprResult, cameraId)
		if (input.empty()) return false;
		if ((input.cols < 5) || (input.rows < 5)) return false;
		return Inference(input, Bbox, lprResult, "CustomCam");
	}
	bool ANSALPR_OD::Inference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox,std::string& lprResult, const std::string& cameraId)
	{
		// No coarse _mutex — sub-components have their own fine-grained locks.

		// Early validation
		if (!_licenseValid) {
			this->_logger.LogError("ANSALPR_OD::Inference", "Invalid license", __FILE__, __LINE__);
			lprResult.clear();
			return false;
		}

		if (!valid) {
			this->_logger.LogError("ANSALPR_OD::Inference", "Invalid model", __FILE__, __LINE__);
			lprResult.clear();
			return false;
		}

		if (!_isInitialized) {
			this->_logger.LogError("ANSALPR_OD::Inference", "Model is not initialized", __FILE__, __LINE__);
			lprResult.clear();
			return false;
		}

		if (input.empty()) {
			this->_logger.LogError("ANSALPR_OD::Inference", "Input image is empty", __FILE__, __LINE__);
			lprResult.clear();
			return false;
		}

		if (input.cols < 5 || input.rows < 5) {
			this->_logger.LogError("ANSALPR_OD::Inference", "Input image size is too small", __FILE__, __LINE__);
			lprResult.clear();
			return false;
		}

		if (!_lpDetector) {
			this->_logger.LogFatal("ANSALPR_OD::Inference", "_lpDetector is null", __FILE__, __LINE__);
			lprResult.clear();
			return false;
		}

		try {
			// Convert grayscale to BGR if necessary (use local buffer for thread safety)
			cv::Mat localFrame;
			if (input.channels() == 1) {
				cv::cvtColor(input, localFrame, cv::COLOR_GRAY2BGR);
			}
			const cv::Mat& frame = (input.channels() == 1) ? localFrame : input;

			const int frameWidth = frame.cols;
			const int frameHeight = frame.rows;
			constexpr int padding = 10;

			// --- Compute display→full-res scale (once per frame, cheap) ---
			// NVIDIA-only NV12 fast path — see isNvidiaEngine() discussion
			// above.  cv::cuda::* types touch CUDA even inside the "guarded"
			// helper, so we must not even read tl_currentGpuFrame() on AMD.
			float scaleX2 = 1.f, scaleY2 = 1.f;
			if (isNvidiaEngine()) {
				auto* gpuData = tl_currentGpuFrame();
				if (gpuData && gpuData->width > frame.cols && gpuData->height > frame.rows) {
					scaleX2 = static_cast<float>(gpuData->width) / frame.cols;
					scaleY2 = static_cast<float>(gpuData->height) / frame.rows;
				}
			}

			std::vector<Object> detectedObjects;

			if (!Bbox.empty()) {
				// Process each bounding box region
				detectedObjects.reserve(Bbox.size());

				for (const auto& bbox : Bbox) {
					const int x1c = std::max(0, bbox.x);
					const int y1c = std::max(0, bbox.y);
					const int cropWidth = std::min(frameWidth - x1c, bbox.width);
					const int cropHeight = std::min(frameHeight - y1c, bbox.height);

					if (cropWidth < 5 || cropHeight < 5) {
						continue;
					}

					cv::Rect objectPos(x1c, y1c, cropWidth, cropHeight);
					cv::Mat croppedObject = frame(objectPos);

					std::vector<Object> lprOutput = _lpDetector->RunInference(croppedObject, cameraId);
					for (size_t _di = 0; _di < lprOutput.size(); ++_di) {
						ANS_DBG("ALPR_Track", "cam=%s bbox det[%zu] tid=%d box=(%d,%d,%d,%d) conf=%.2f",
							cameraId.c_str(), _di, lprOutput[_di].trackId,
							lprOutput[_di].box.x, lprOutput[_di].box.y,
							lprOutput[_di].box.width, lprOutput[_di].box.height,
							lprOutput[_di].confidence);
					}

					for (auto& lprObject : lprOutput) {
						const cv::Rect& box = lprObject.box;

						// Calculate padded region within cropped image
						const int x1 = std::max(0, box.x - padding);
						const int y1 = std::max(0, box.y - padding);
						const int x2 = std::min(cropWidth, box.x + box.width + padding);
						const int y2 = std::min(cropHeight, box.y + box.height + padding);

						// Adjust to original frame coordinates
						lprObject.box.x = std::max(0, x1c + x1);
						lprObject.box.y = std::max(0, y1c + y1);
						lprObject.box.width = std::min(frameWidth - lprObject.box.x, x2 - x1);
						lprObject.box.height = std::min(frameHeight - lprObject.box.y, y2 - y1);

						if (lprObject.box.width <= padding || lprObject.box.height <= padding) {
							continue;
						}

						lprObject.cameraId = cameraId;
						lprObject.polygon = RectToNormalizedPolygon(lprObject.box, input.cols, input.rows);

						// Crop from full-res NV12 on GPU if available, otherwise display-res.
						// NV12 helper is NVIDIA-only — isNvidiaEngine() gate keeps
						// CUDA runtime inactive on AMD/Intel/CPU hardware.
						cv::Mat lprImage;
						if (isNvidiaEngine() && scaleX2 > 1.f) {
							auto cropResult = _nv12Helper.tryNV12CropToBGR(
								frame, 0, lprObject.box, 0, scaleX2, scaleY2,
								this->_logger, "LPR");
							if (cropResult.succeeded)
								lprImage = cropResult.bgrCrop;
						}
						if (lprImage.empty())
							lprImage = frame(lprObject.box);
						cv::Mat alignedLPR = enhanceForOCR(lprImage);

						std::string ocrText = DetectLicensePlateString(alignedLPR, cameraId);

						if (ocrText.empty()) {
							continue;
						}

						if (shouldUseALPRChecker(cv::Size(input.cols, input.rows), cameraId)) {
							lprObject.className = alprChecker.checkPlateByTrackId(cameraId, ocrText, lprObject.trackId);
						} else {
							lprObject.className = ocrText;
						}

						if (lprObject.className.empty()) {
							continue;
						}

						std::string colour = DetectLPColourCached(lprImage, cameraId, lprObject.className);
						if (!colour.empty()) {
							lprObject.extraInfo = "color:" + colour;
						}

						detectedObjects.push_back(std::move(lprObject));
					}
				}
			}
			else {
				// No bounding boxes - run on full frame
				std::vector<Object> lprOutput = _lpDetector->RunInference(frame, cameraId);
				for (size_t _di = 0; _di < lprOutput.size(); ++_di) {
					ANS_DBG("ALPR_Track", "cam=%s full det[%zu] tid=%d box=(%d,%d,%d,%d) conf=%.2f",
						cameraId.c_str(), _di, lprOutput[_di].trackId,
						lprOutput[_di].box.x, lprOutput[_di].box.y,
						lprOutput[_di].box.width, lprOutput[_di].box.height,
						lprOutput[_di].confidence);
				}
				detectedObjects.reserve(lprOutput.size());

				for (auto& lprObject : lprOutput) {
					const cv::Rect& box = lprObject.box;

					// Calculate padded region
					const int x1 = std::max(0, box.x - padding);
					const int y1 = std::max(0, box.y - padding);
					const int width = std::min(frameWidth - x1, box.width + 2 * padding);
					const int height = std::min(frameHeight - y1, box.height + 2 * padding);

					if (width <= padding || height <= padding) {
						continue;
					}

					lprObject.cameraId = cameraId;
					lprObject.polygon = RectToNormalizedPolygon(lprObject.box, input.cols, input.rows);

					// Crop from full-res NV12 on GPU if available, otherwise display-res.
					// NV12 helper is NVIDIA-only — isNvidiaEngine() gate keeps
					// CUDA runtime inactive on AMD/Intel/CPU hardware.
					cv::Rect lprPos(x1, y1, width, height);
					cv::Mat lprImage;
					if (isNvidiaEngine() && scaleX2 > 1.f) {
						auto cropResult = _nv12Helper.tryNV12CropToBGR(
							frame, 0, lprPos, 0, scaleX2, scaleY2,
							this->_logger, "LPR");
						if (cropResult.succeeded)
							lprImage = cropResult.bgrCrop;
					}
					if (lprImage.empty())
						lprImage = frame(lprPos);
					cv::Mat alignedLPR = enhanceForOCR(lprImage);

					std::string rawText = DetectLicensePlateString(alignedLPR, cameraId);

					if (shouldUseALPRChecker(cv::Size(input.cols, input.rows), cameraId)) {
						lprObject.className = alprChecker.checkPlateByTrackId(cameraId, rawText, lprObject.trackId);
					} else {
						lprObject.className = rawText;
					}

					if (lprObject.className.empty()) {
						continue;
					}

					std::string colour = DetectLPColourCached(lprImage, cameraId, lprObject.className);
					if (!colour.empty()) {
						lprObject.extraInfo = "color:" + colour;
					}

					detectedObjects.push_back(std::move(lprObject));
				}
			}

			// Deduplicate: same plate text should not appear on multiple vehicles
			// Note: in Bbox mode, internal LP trackIds overlap across crops, so
			// dedup uses plate bounding box position (via Object::box) to distinguish.
			// The ensureUniquePlateText method handles this by plate text grouping.
			if (shouldUseALPRChecker(cv::Size(input.cols, input.rows), cameraId)) {
				ensureUniquePlateText(detectedObjects, cameraId);
			}

			lprResult = VectorDetectionToJsonString(detectedObjects);
			return true;

		}
		catch (const std::exception& e) {
			lprResult.clear();
			this->_logger.LogFatal("ANSALPR_OD::Inference", e.what(), __FILE__, __LINE__);
			return false;
		}
	}


	int ANSALPR_OD::findSubstringIndex(const std::string& str) {
		//std::lock_guard<std::recursive_mutex> lock(_mutex);
		try {
			// List of substrings to search for
			std::string substrings[] = { "NN", "CV", "NG", "QT" };

			// Iterate through each substring
			for (const std::string& sub : substrings) {
				// Use std::string::find to search for the substring in the given string
				std::size_t pos = str.find(sub);

				// If the substring is found, return the index
				if (pos != std::string::npos) {
					return static_cast<int>(pos); // Cast to int and return the index
				}
			}

			// If none of the substrings is found, return -1
			return -1;
		}
		catch (std::exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::findSubstringIndex", e.what(), __FILE__, __LINE__);
			return -1;

		}
	}
	char ANSALPR_OD::fixLPDigit(char c) {
		//std::lock_guard<std::recursive_mutex> lock(_mutex);
		try {
			switch (c) {
			case 'b':
				return '6';
			case 'c':
				return '0';
			case 'f':
			case 't':
				return '4';
			case 'j':
			case 'i':
			case 'l':
				return '1';
			case 's':
				return '5';
			case 'g':
			case 'q':
			case 'y':
				return '9';
			case 'o':
				return '0';
			default:
				return c; // If the character is not a letter to convert, return it unchanged
			}
		}
		catch (std::exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::fixLPDigit", e.what(), __FILE__, __LINE__);
			return c;
		}
	}
	//only accept these letters: A, B, C, D, E, F, G, H, K, L, M, N, P, S, T, U, V, X, Y, Z
	// I, J, O, Q, R, W
	char ANSALPR_OD::convertDigitToLetter(char c) {
		//std::lock_guard<std::recursive_mutex> lock(_mutex);
		try {
			switch (c) {
			case '0':
			case 'o':
			case 'O':
			case 'Q':
				return 'C';    // '0' is typically mapped to 'O' or 'C', choosing 'O' to match letter set
			case '1':
			case 'I':
			case 'i':
			case 'l':
			case 'J':
				return 'L';    // '1' is commonly confused with 'I'
			case '2':
			case 'z':
				return 'Z';    // '2' resembles 'Z' in some fonts
			case '3':
				return 'E';    // '3' can resemble 'E' in some cases
			case '4':
				return 'A';    // '4' can resemble 'A' or 'H', choosing 'A'
			case '5':
			case 's':
				return 'S';    // '5' looks similar to 'S'
			case '6':
			case 'g':
				return 'G';    // '6' resembles 'G'
			case '7':
				return 'T';    // '7' is often confused with 'T'
			case '8':
			case 'b':
				return 'B';    // '8' resembles 'B'
			case '9':
			case 'R':

				return 'P';    // '9' is close to 'P'
			case 'W':
			case 'w':
				return 'V';    // 'W' is close to 'V'
			default:
				return c; // If the character is not a digit to convert, return it unchanged
			}
		}
		catch (std::exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::convertDigitToLetter", e.what(), __FILE__, __LINE__);
			return c;
		}
	}
	char ANSALPR_OD::convertLetterToDigit(char c) {
		// std::lock_guard<std::recursive_mutex> lock(_mutex);
		try {
			switch (c) {
				// Convert common letter confusions with digits
			case 'B':
			case 'b':           // Adding lowercase 'b' to match common mistypes
				return '8';
			case 'I':
			case 'i':
			case 'J':           // Capital 'J' can also resemble '1'
			case 'j':
			case 'L':
			case 'l':
				return '1';
			case 'S':
			case 's':
				return '5';
			case 'G':
			case 'g':           // Adding lowercase 'g' for better matching
				return '6';
			case 'O':
			case 'o':
			case 'Q':           // 'Q' can also be misread as '0'
			case 'U':
			case 'u':           // Adding lowercase 'u' as it resembles '0'
				return '0';
			case 'T':           // Capital 'T' sometimes looks like '7'
				return '7';
			case 'F':
			case 'f':
			case 't':
				return '4';
			case 'Y':           // Capital 'Y' may resemble '9'
			case 'y':
			case 'q':
				return '9';
			default:
				return '0'; // If no conversion, return the character unchanged
			}
		}
		catch (std::exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::convertLetterToDigit", e.what(), __FILE__, __LINE__);
			return c;
		}
	}
	// Function to convert string to digits, skipping conversion if the character is already a digit
	std::string ANSALPR_OD::convertStringToDigits(const std::string& input) {
		// std::lock_guard<std::recursive_mutex> lock(_mutex);
		try {
			std::string result;
			for (char c : input) {
				if (std::isdigit(c)) {
					result += c;  // Skip conversion if the character is a digit
				}
				else {
					result += convertLetterToDigit(c);  // Convert if it's a letter
				}
			}
			return result;
		}
		catch (std::exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::convertStringToDigits", e.what(), __FILE__, __LINE__);
			return input;
		}
	}
	// Function to convert string to letters, skipping conversion if the character is already a letter
	std::string ANSALPR_OD::convertStringToLetters(const std::string& input) {
		//std::lock_guard<std::recursive_mutex> lock(_mutex);
		try {
			std::string result;
			for (char c : input) {
				if (std::isalpha(c)) {
					result += c;  // Skip conversion if the character is already a letter
				}
				else {
					result += convertDigitToLetter(c);  // Convert if it's a digit
				}
			}

			return result;
		}
		catch (std::exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::convertStringToLetters", e.what(), __FILE__, __LINE__);
			return input;
		}

	}
	int ANSALPR_OD::searchDiplomacyLP(const std::string& input) {
		//std::lock_guard<std::recursive_mutex> lock(_mutex);
		// List of substrings to search for
		try {
			std::string substrings[] = { "NN", "NG", "CV", "QT" };
			// Initialize index to -1 (not found)
			int foundIndex = -1;

			// Loop through the substrings
			for (const auto& sub : substrings) {
				// Find the index of the current substring
				size_t index = input.find(sub);

				// If the substring is found and either no other substrings have been found,
				// or this substring occurs at an earlier position, update foundIndex.
				if (index != std::string::npos && (foundIndex == -1 || index < foundIndex)) {
					foundIndex = index;
				}
			}

			return foundIndex; // If none are found, returns -1
		}
		catch (std::exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::searchDiplomacyLP", e.what(), __FILE__, __LINE__);
			return -1;
		}
	}
	bool ANSALPR_OD::ValidateVNMotobikeLP(const std::string& input) {
		// std::lock_guard<std::recursive_mutex> lock(_mutex);
		 // Search for the string in the list
		auto it = std::find(ValidVNMotobikeList.begin(), ValidVNMotobikeList.end(), input);
		// Check if found
		if (it != ValidVNMotobikeList.end()) {
			return true;
		}
		else {
			return false;
		}
	}
	bool ANSALPR_OD::ValidateVNCarLP(const std::string& input) {
		// std::lock_guard<std::recursive_mutex> lock(_mutex);
		try {
			// Search for the string in the list
			auto it = std::find(ValidVNCarList.begin(), ValidVNCarList.end(), input);
			// Check if found
			if (it != ValidVNCarList.end()) {
				return true;
			}
			else {
				return false;
			}
		}
		catch (std::exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::ValidateVNCarLP", e.what(), __FILE__, __LINE__);
			return false;
		}
	}
	cv::Mat ANSALPR_OD::alignPlateForOCR(const cv::Mat& fullImage, const cv::Rect& bbox) {
		try {
			const cv::Rect safeBox = bbox & cv::Rect(0, 0, fullImage.cols, fullImage.rows);

			if (safeBox.width < 10 || safeBox.height < 10) {
				return fullImage(safeBox).clone();
			}

			cv::Mat roi = fullImage(safeBox);

			// Convert to grayscale and create binary image
			cv::Mat gray;
			cv::cvtColor(roi, gray, cv::COLOR_BGR2GRAY);

			cv::Mat binary;
			cv::adaptiveThreshold(gray, binary, 255, cv::ADAPTIVE_THRESH_MEAN_C,
				cv::THRESH_BINARY_INV, 15, 10);

			std::vector<std::vector<cv::Point>> contours;
			cv::findContours(binary, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);

			if (contours.empty()) {
				return enhanceAndDebug(roi);
			}

			// Find best contour closest to center
			const cv::Point2f roiCenter(static_cast<float>(roi.cols) / 2.0f,
				static_cast<float>(roi.rows) / 2.0f);
			const float minWidth = roi.cols * 0.5f;
			const float minHeight = roi.rows * 0.5f;
			constexpr float minAreaRatio = 0.3f;

			float minDist = std::numeric_limits<float>::max();
			int bestIdx = -1;

			for (size_t i = 0; i < contours.size(); ++i) {
				cv::RotatedRect rect = cv::minAreaRect(contours[i]);
				const float width = rect.size.width;
				const float height = rect.size.height;

				if (width < minWidth || height < minHeight) {
					continue;
				}

				const float areaRect = width * height;
				const float areaContour = static_cast<float>(cv::contourArea(contours[i]));

				if (areaContour / areaRect < minAreaRatio) {
					continue;
				}

				const float dist = cv::norm(rect.center - roiCenter);
				if (dist < minDist) {
					minDist = dist;
					bestIdx = static_cast<int>(i);
				}
			}

			if (bestIdx == -1) {
				return enhanceAndDebug(roi);
			}

			// Align using best rotated rect
			cv::RotatedRect bestRect = cv::minAreaRect(contours[bestIdx]);
			float angle = bestRect.angle;

			if (bestRect.size.width < bestRect.size.height) {
				angle += 90.0f;
				std::swap(bestRect.size.width, bestRect.size.height);
			}
			angle = std::clamp(angle, -45.0f, 45.0f);

			// Rotate the image
			const cv::Point2f rotationCenter(roi.cols / 2.0f, roi.rows / 2.0f);
			cv::Mat rotationMatrix = cv::getRotationMatrix2D(rotationCenter, angle, 1.0);

			cv::Mat rotated;
			cv::warpAffine(roi, rotated, rotationMatrix, roi.size(), cv::INTER_LINEAR, cv::BORDER_REPLICATE);

			// Transform rect center after rotation
			const double* rotData = rotationMatrix.ptr<double>(0);
			const cv::Point2f newCenter(
				static_cast<float>(rotData[0] * bestRect.center.x + rotData[1] * bestRect.center.y + rotData[2]),
				static_cast<float>(rotData[3] * bestRect.center.x + rotData[4] * bestRect.center.y + rotData[5])
			);

			// Apply small padding and crop
			constexpr int padding = 2;
			const cv::Size paddedSize(
				std::min(rotated.cols, static_cast<int>(bestRect.size.width) + 2 * padding),
				std::min(rotated.rows, static_cast<int>(bestRect.size.height) + 2 * padding)
			);

			cv::Mat rawCropped;
			cv::getRectSubPix(rotated, paddedSize, newCenter, rawCropped);

			cv::Mat cropped = enhanceForOCR(rawCropped);

	#ifdef FNS_DEBUG
			showDebugComparison(roi, cropped, contours, bestIdx, bestRect);
	#endif

			return cropped;

		}
		catch (const std::exception& e) {
			this->_logger.LogError("ANSALPR_OD::alignPlateForOCR",
				std::string("Exception: ") + e.what(), __FILE__, __LINE__);
			return fullImage(bbox & cv::Rect(0, 0, fullImage.cols, fullImage.rows)).clone();
		}
	}

	#ifdef FNS_DEBUG
	void ANSALPR_OD::showDebugComparison(const cv::Mat& roi, const cv::Mat& processed,
		const std::vector<std::vector<cv::Point>>& contours, int bestIdx,
		const cv::RotatedRect& bestRect)
	{
		try {
			cv::Mat debugRoi = roi.clone();

			if (bestIdx >= 0) {
				cv::drawContours(debugRoi, contours, bestIdx, cv::Scalar(0, 255, 0), 1);
				cv::Point2f points[4];
				bestRect.points(points);
				for (int j = 0; j < 4; ++j) {
					cv::line(debugRoi, points[j], points[(j + 1) % 4], cv::Scalar(255, 0, 0), 1);
				}
			}

			cv::Mat debugLeft, debugRight;
			cv::resize(debugRoi, debugLeft, cv::Size(240, 80));
			cv::resize(processed, debugRight, cv::Size(240, 80));

			cv::Mat combined;
			cv::hconcat(debugLeft, debugRight, combined);

			cv::putText(combined, "Raw", cv::Point(10, 15),
				cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 1);
			cv::putText(combined, "Aligned", cv::Point(250, 15),
				cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 0, 0), 1);

			cv::imshow("LPR Cropped + Rotated", combined);
			cv::waitKey(1);

		}
		catch (const std::exception& e) {
			std::cerr << "LPR Debug Error: " << e.what() << std::endl;
		}
	}
	#endif

	cv::Mat ANSALPR_OD::enhanceAndDebug(const cv::Mat& roi) {
		cv::Mat enhanced = enhanceForOCR(roi);

	#ifdef FNS_DEBUG
		showDebugComparison(roi, enhanced, {}, -1, cv::RotatedRect());
	#endif

		return enhanced;
	}
	cv::Mat ANSALPR_OD::enhanceForOCR(const cv::Mat& plateROIOriginal) {
		if (plateROIOriginal.empty()) {
			this->_logger.LogError("ANSALPR_OD::enhanceForOCR", "plateROI is empty", __FILE__, __LINE__);
			return cv::Mat();
		}

		// Step 1: Upscale for OCR clarity
		cv::Mat plateROI;
		cv::resize(plateROIOriginal, plateROI, cv::Size(), 2.0, 2.0, cv::INTER_LANCZOS4);

		// Step 2: Grayscale
		cv::Mat gray;
		if (plateROI.channels() == 3) {
			cv::cvtColor(plateROI, gray, cv::COLOR_BGR2GRAY);
		}
		else {
			gray = plateROI;
		}

		// Step 3: Gentle denoise to preserve edges
		cv::Mat denoised;
		cv::bilateralFilter(gray, denoised, 7, 50, 50);

		// Step 4: Unsharp masking
		cv::Mat blurred;
		cv::GaussianBlur(denoised, blurred, cv::Size(0, 0), 1.5);

		cv::Mat unsharp;
		cv::addWeighted(denoised, 1.8, blurred, -0.8, 0, unsharp);

		// Step 5: CLAHE contrast enhancement (thread-local for thread safety)
		thread_local cv::Ptr<cv::CLAHE> tl_clahe = cv::createCLAHE(4.0, cv::Size(8, 8));
		cv::Mat contrastEnhanced;
		tl_clahe->apply(unsharp, contrastEnhanced);

		// Step 6: Laplacian edge sharpening
		cv::Mat lap;
		cv::Laplacian(contrastEnhanced, lap, CV_16S, 3);

		cv::Mat lapAbs;
		cv::convertScaleAbs(lap, lapAbs);

		cv::Mat sharpened;
		cv::addWeighted(contrastEnhanced, 1.2, lapAbs, -0.3, 0, sharpened);

		// Step 7: Convert back to BGR for OCR
		cv::Mat ocrInput;
		cv::cvtColor(sharpened, ocrInput, cv::COLOR_GRAY2BGR);

		return ocrInput;
	}


	// Batch Inference
	std::vector<Object> ANSALPR_OD::RunInference(const cv::Mat& input, const std::string& cameraId) {
		// Read-only validation without lock (immutable after initialization)
		if (!_licenseValid || !valid || !_isInitialized) {
			this->_logger.LogWarn("ANSALPR_OD::RunInference",
				"Invalid state: license=" + std::to_string(_licenseValid) +
				" valid=" + std::to_string(valid) +
				" init=" + std::to_string(_isInitialized), __FILE__, __LINE__);
			return {};
		}

		if (input.empty() || input.cols < 5 || input.rows < 5) {
			this->_logger.LogWarn("ANSALPR_OD::RunInference",
				"Skipped: input too small (" + std::to_string(input.cols) + "x" + std::to_string(input.rows) + ")",
				__FILE__, __LINE__);
			return {};
		}

		// Pointer checks (these should be immutable after initialization)
		if (!this->_lpDetector || !this->_ocrDetector) {
			this->_logger.LogFatal("ANSALPR_OD::RunInference",
				"Detector instances are null", __FILE__, __LINE__);
			return {};
		}

		try {
			// Use local buffer instead of shared _frameBuffer
			cv::Mat frame;
			if (input.channels() == 1) {
				cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR);
			}
			else {
				frame = input;  // No copy, just reference
			}

			const int frameWidth = frame.cols;
			const int frameHeight = frame.rows;

			// Use local variable instead of shared _detectedArea
			cv::Rect detectedArea(0, 0, frameWidth, frameHeight);

			if (detectedArea.width <= 50 || detectedArea.height <= 50) {
				return {};
			}

			// Run license plate detection (should be thread-safe internally)
			cv::Mat activeFrame = frame(detectedArea);
			std::vector<Object> lprOutput = _lpDetector->RunInference(activeFrame, cameraId);
			for (size_t _di = 0; _di < lprOutput.size(); ++_di) {
				ANS_DBG("ALPR_Track", "cam=%s batch det[%zu] tid=%d box=(%d,%d,%d,%d) conf=%.2f",
					cameraId.c_str(), _di, lprOutput[_di].trackId,
					lprOutput[_di].box.x, lprOutput[_di].box.y,
					lprOutput[_di].box.width, lprOutput[_di].box.height,
					lprOutput[_di].confidence);
			}

			if (lprOutput.empty()) {
				return {};
			}

			// Prepare batch - pre-allocate and use move semantics
			std::vector<cv::Mat> alignedLPRBatch;
			std::vector<size_t> validIndices;

			alignedLPRBatch.reserve(lprOutput.size());
			validIndices.reserve(lprOutput.size());

			for (size_t i = 0; i < lprOutput.size(); ++i) {
				const cv::Rect& box = lprOutput[i].box;

				// Calculate cropped region with bounds checking
				const int x1 = std::max(0, box.x);
				const int y1 = std::max(0, box.y);
				const int x2 = std::min(frameWidth, box.x + box.width);
				const int y2 = std::min(frameHeight, box.y + box.height);
				const int width = x2 - x1;
				const int height = y2 - y1;

				if (width <= 0 || height <= 0) {
					continue;
				}

				cv::Rect lprPos(x1, y1, width, height);
				alignedLPRBatch.emplace_back(frame(lprPos));  // Use emplace_back
				validIndices.push_back(i);
			}

			if (alignedLPRBatch.empty()) {
				return {};
			}

			// Run OCR first, then use cached colour detection.
			// Colour caching by plate text eliminates ~95% of LPC inferences
			// (plate colour doesn't change frame-to-frame).
			std::vector<std::string> ocrTextBatch = DetectLicensePlateStringBatch(alignedLPRBatch, cameraId);

			// Build output — colour detection uses cache keyed by stabilized plate text
			std::vector<Object> output;
			output.reserve(validIndices.size());

			for (size_t i = 0; i < validIndices.size(); ++i) {
				const size_t origIdx = validIndices[i];
				const std::string& ocrText = ocrTextBatch[i];

				if (ocrText.empty()) {
					continue;
				}

				Object lprObject = lprOutput[origIdx];
				lprObject.cameraId = cameraId;

				// Stabilize OCR text through ALPRChecker (hybrid trackId + Levenshtein fallback)
				if (shouldUseALPRChecker(cv::Size(input.cols, input.rows), cameraId)) {
					lprObject.className = alprChecker.checkPlateByTrackId(cameraId, ocrText, lprObject.trackId);
				} else {
					lprObject.className = ocrText;
				}

				if (lprObject.className.empty()) {
					continue;
				}

				// Colour detection with cache — only runs inference on first
				// occurrence of each plate text, subsequent frames get 0ms cache hit
				std::string colour = DetectLPColourCached(alignedLPRBatch[i], cameraId, lprObject.className);
				if (!colour.empty()) {
					lprObject.extraInfo = "color:" + colour;
				}

				output.push_back(std::move(lprObject));
			}

			// Deduplicate: if two trackIds claim the same plate text, keep the one
			// with the higher accumulated score to prevent plate flickering
			if (shouldUseALPRChecker(cv::Size(input.cols, input.rows), cameraId)) {
				ensureUniquePlateText(output, cameraId);
			}

			return output;
		}
		catch (const cv::Exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::RunInference",
				std::string("OpenCV Exception: ") + e.what(), __FILE__, __LINE__);
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::RunInference",
				e.what(), __FILE__, __LINE__);
		}
		catch (...) {
			this->_logger.LogFatal("ANSALPR_OD::RunInference",
				"Unknown exception occurred", __FILE__, __LINE__);
		}

		return {};
	}

	// ── Stateless batched inference for pipeline mode ───────────────────
	// Caller supplies a full frame + a list of vehicle ROIs in FRAME
	// coordinates. We run ONE LP-detect call across all vehicle crops and
	// ONE char-OCR batch across every resulting plate, with NO tracker,
	// voting, spatial dedup, or per-camera accumulating state. This is the
	// drop-in replacement for the per-bbox loop inside
	// ANSALPR_RunInferencesComplete_LV (pipeline mode) and is exported as
	// ANSALPR_RunInferencesBatch_LV / _V2 in dllmain.cpp.
	std::vector<Object> ANSALPR_OD::RunInferencesBatch(
		const cv::Mat& input,
		const std::vector<cv::Rect>& vehicleBoxes,
		const std::string& cameraId)
	{
		if (!_licenseValid || !valid || !_isInitialized) {
			this->_logger.LogWarn("ANSALPR_OD::RunInferencesBatch",
				"Invalid state: license=" + std::to_string(_licenseValid) +
				" valid=" + std::to_string(valid) +
				" init=" + std::to_string(_isInitialized), __FILE__, __LINE__);
			return {};
		}
		if (input.empty() || input.cols < 5 || input.rows < 5) return {};
		if (!this->_lpDetector || !this->_ocrDetector) {
			this->_logger.LogFatal("ANSALPR_OD::RunInferencesBatch",
				"Detector instances are null", __FILE__, __LINE__);
			return {};
		}
		if (vehicleBoxes.empty()) return {};

		try {
			// ── 1. Clamp and crop vehicle ROIs ────────────────────────
			const cv::Rect frameRect(0, 0, input.cols, input.rows);
			std::vector<cv::Mat>  vehicleCrops;
			std::vector<cv::Rect> clamped;
			vehicleCrops.reserve(vehicleBoxes.size());
			clamped.reserve(vehicleBoxes.size());
			for (const auto& r : vehicleBoxes) {
				cv::Rect c = r & frameRect;
				if (c.width <= 5 || c.height <= 5) continue;
				vehicleCrops.emplace_back(input(c));
				clamped.push_back(c);
			}
			if (vehicleCrops.empty()) return {};

			// ── 2. ONE batched LP detection call across all vehicles ──
			// ANSODBase::RunInferencesBatch is fixed-shape YOLO, so this
			// is a single ORT/TRT call regardless of how many vehicles
			// the caller passed.
			std::vector<std::vector<Object>> lpBatch =
				_lpDetector->RunInferencesBatch(vehicleCrops, cameraId);

			// ── 3. Flatten detected plates, keeping back-reference ───
			struct PlateMeta {
				size_t vehIdx;      // index into vehicleCrops / clamped
				Object lpObj;       // LP detection in VEHICLE-local coords
			};
			std::vector<cv::Mat>   alignedLPRBatch;
			std::vector<PlateMeta> metas;
			alignedLPRBatch.reserve(lpBatch.size() * 2);
			metas.reserve(lpBatch.size() * 2);
			for (size_t v = 0; v < lpBatch.size() && v < vehicleCrops.size(); ++v) {
				const cv::Mat& veh = vehicleCrops[v];
				const cv::Rect vehRect(0, 0, veh.cols, veh.rows);
				for (const auto& lp : lpBatch[v]) {
					cv::Rect lpBox = lp.box & vehRect;
					if (lpBox.width <= 0 || lpBox.height <= 0) continue;
					alignedLPRBatch.emplace_back(veh(lpBox));
					metas.push_back({ v, lp });
				}
			}
			if (alignedLPRBatch.empty()) return {};

			// ── 4. ONE batched char-OCR call across every plate ──────
			std::vector<std::string> ocrTextBatch =
				DetectLicensePlateStringBatch(alignedLPRBatch, cameraId);
			if (ocrTextBatch.size() != alignedLPRBatch.size()) {
				this->_logger.LogWarn("ANSALPR_OD::RunInferencesBatch",
					"Char OCR batch size mismatch", __FILE__, __LINE__);
				return {};
			}

			// ── 5. Assemble — NO tracker, NO voting, NO dedup ────────
			std::vector<Object> output;
			output.reserve(alignedLPRBatch.size());
			for (size_t i = 0; i < alignedLPRBatch.size(); ++i) {
				const std::string& text = ocrTextBatch[i];
				if (text.empty()) continue;

				Object out  = metas[i].lpObj;
				out.className = text;          // raw OCR — no ALPRChecker
				out.cameraId  = cameraId;
				out.box.x += clamped[metas[i].vehIdx].x;
				out.box.y += clamped[metas[i].vehIdx].y;

				// Colour lookup — uses text-keyed cache, bounded by
				// COLOUR_CACHE_MAX_SIZE, no per-frame growth.
				std::string colour = DetectLPColourCached(
					alignedLPRBatch[i], cameraId, out.className);
				if (!colour.empty()) out.extraInfo = "color:" + colour;

				output.push_back(std::move(out));
			}
			return output;
		}
		catch (const cv::Exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::RunInferencesBatch",
				std::string("OpenCV Exception: ") + e.what(), __FILE__, __LINE__);
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::RunInferencesBatch",
				e.what(), __FILE__, __LINE__);
		}
		catch (...) {
			this->_logger.LogFatal("ANSALPR_OD::RunInferencesBatch",
				"Unknown exception occurred", __FILE__, __LINE__);
		}
		return {};
	}

	std::vector<std::string> ANSALPR_OD::DetectLPColourDetectorBatch(const std::vector<cv::Mat>& lprROIs, const std::string& cameraId) {
		// Early validation - no lock needed for immutable config
		if (_lpColourModelConfig.detectionScoreThreshold <= 0.0f || !_lpColourDetector) {
			return std::vector<std::string>(lprROIs.size(), "");
		}

		try {
			if (lprROIs.empty()) {
				return {};
			}

			const size_t batchSize = lprROIs.size();

			// Filter out empty ROIs — just a cheap .empty() check, no need for threads
			std::vector<cv::Mat> validROIs;
			std::vector<size_t> validIndices;
			validROIs.reserve(batchSize);
			validIndices.reserve(batchSize);

			for (size_t i = 0; i < batchSize; ++i) {
				if (!lprROIs[i].empty()) {
					validROIs.push_back(lprROIs[i]);
					validIndices.push_back(i);
				}
			}

			if (validROIs.empty()) {
				return std::vector<std::string>(batchSize, "");
			}

			// Run batch colour detection (GPU-accelerated, already optimized)
			std::vector<std::vector<Object>> colourBatchOutputs =
				_lpColourDetector->RunInferencesBatch(validROIs, cameraId);

			if (colourBatchOutputs.size() != validROIs.size()) {
				this->_logger.LogError("ANSALPR_OD::DetectLPColourDetectorBatch",
					"Colour detector batch size mismatch", __FILE__, __LINE__);
				return std::vector<std::string>(batchSize, "");
			}

			// Prepare results vector (initialize all to empty)
			std::vector<std::string> results(batchSize);

			// Process results in parallel for large batches
			const size_t validSize = colourBatchOutputs.size();

			if (validSize > 10) {
				const unsigned int hwThreads = std::thread::hardware_concurrency();
				const unsigned int numThreads = std::min(hwThreads > 0 ? hwThreads : 4,
					static_cast<unsigned int>(validSize));
				const size_t chunkSize = (validSize + numThreads - 1) / numThreads;

				std::vector<std::future<void>> futures;
				futures.reserve(numThreads);

				for (unsigned int t = 0; t < numThreads; ++t) {
					const size_t startIdx = t * chunkSize;
					const size_t endIdx = std::min(startIdx + chunkSize, validSize);

					if (startIdx >= validSize) break;

					futures.push_back(std::async(std::launch::async,
						[&colourBatchOutputs, &validIndices, &results, startIdx, endIdx, this]() {
							const float threshold = _lpColourModelConfig.detectionScoreThreshold;

							for (size_t i = startIdx; i < endIdx; ++i) {
								if (colourBatchOutputs[i].empty()) {
									continue;
								}

								// Find detection with highest confidence above threshold
								float maxConfidence = threshold;
								std::string bestClassName;

								for (const auto& detection : colourBatchOutputs[i]) {
									if (detection.confidence > maxConfidence) {
										maxConfidence = detection.confidence;
										bestClassName = detection.className;
									}
								}

								if (!bestClassName.empty()) {
									results[validIndices[i]] = bestClassName;
								}
							}
						}
					));
				}

				// Wait for all processing to complete
				for (auto& future : futures) {
					try {
						if (future.wait_for(std::chrono::seconds(ASYNC_TIMEOUT_SECONDS)) == std::future_status::timeout) {
							this->_logger.LogError("ANSALPR_OD::DetectLPColourDetectorBatch", "Async colour processing timed out (" + std::to_string(ASYNC_TIMEOUT_SECONDS) + "s)", __FILE__, __LINE__);
							continue;
						}
						future.get();
					}
					catch (const std::exception& e) {
						this->_logger.LogError("ANSALPR_OD::DetectLPColourDetectorBatch", std::string("Async colour processing failed: ") + e.what(), __FILE__, __LINE__);
					}
				}
			}
			else {
				// Sequential for small batches
				const float threshold = _lpColourModelConfig.detectionScoreThreshold;

				for (size_t i = 0; i < validSize; ++i) {
					if (colourBatchOutputs[i].empty()) {
						continue;
					}

					// Find detection with highest confidence above threshold
					float maxConfidence = threshold;
					std::string bestClassName;

					for (const auto& detection : colourBatchOutputs[i]) {
						if (detection.confidence > maxConfidence) {
							maxConfidence = detection.confidence;
							bestClassName = detection.className;
						}
					}

					if (!bestClassName.empty()) {
						results[validIndices[i]] = bestClassName;
					}
				}
			}

			return results;
		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::DetectLPColourDetectorBatch",
				e.what(), __FILE__, __LINE__);
			return std::vector<std::string>(lprROIs.size(), "");
		}
	}
	std::vector<std::string> ANSALPR_OD::DetectLicensePlateStringBatch(const std::vector<cv::Mat>& lprROIs, const std::string& cameraId) {
		if (lprROIs.empty()) {
			return {};
		}

		const size_t batchSize = lprROIs.size();
		std::vector<std::string> results(batchSize);

		try {
			// Prepare grayscale batch — cvtColor on small LP crops is microseconds,
			// thread spawn overhead dominates, so always run sequentially.
			std::vector<cv::Mat> grayBatch(batchSize);
			for (size_t i = 0; i < batchSize; ++i) {
				const cv::Mat& lprROI = lprROIs[i];
				if (lprROI.empty()) continue;

				if (lprROI.channels() == 3) {
					cv::cvtColor(lprROI, grayBatch[i], cv::COLOR_BGR2GRAY);
				}
				else if (lprROI.channels() == 1) {
					grayBatch[i] = lprROI;
				}
			}

			// Run batch OCR inference (GPU-accelerated, already optimized)
			std::vector<std::vector<Object>> ocrBatchOutputs = _ocrDetector->RunInferencesBatch(grayBatch, cameraId);

			if (ocrBatchOutputs.size() != batchSize) {
				this->_logger.LogWarn("ANSALPR_OD::DetectLicensePlateStringBatch",
					"Skipped: OCR batch size mismatch", __FILE__, __LINE__);
				return std::vector<std::string>(batchSize, "");
			}

			// Process OCR results — ProcessSingleOCRResult is pure CPU math
			// on ~5-20 characters per plate, so thread overhead dominates.
			for (size_t i = 0; i < batchSize; ++i) {
				results[i] = ProcessSingleOCRResult(ocrBatchOutputs[i]);
			}

			return results;

		}
		catch (const std::exception& e) {
			this->_logger.LogFatal("ANSALPR_OD::DetectLicensePlateStringBatch",
				e.what(), __FILE__, __LINE__);
			return std::vector<std::string>(batchSize, "");
		}
	}

	// New helper function - thread-safe, no shared state
	std::string ANSALPR_OD::ProcessSingleOCRResult(const std::vector<Object>& ocrOutput) {
		if (ocrOutput.empty()) {
			return "";
		}

		// Remove duplicates using spatial hashing for O(n) instead of O(n^2)

		std::unordered_map<int64_t, std::vector<size_t>> spatialHash;
		spatialHash.reserve(ocrOutput.size());

		std::vector<Object> uniqueOutput;
		uniqueOutput.reserve(ocrOutput.size());

		for (size_t i = 0; i < ocrOutput.size(); ++i) {
			const auto& obj = ocrOutput[i];
			const int gridX = static_cast<int>(obj.box.x / DUPLICATE_GRID_SIZE);
			const int gridY = static_cast<int>(obj.box.y / DUPLICATE_GRID_SIZE);
			const int64_t hashKey = static_cast<int64_t>(gridY) * 100000LL + gridX;

			bool isDuplicate = false;

			// Check only nearby cells (current + 8 neighbors)
			for (int dy = -1; dy <= 1 && !isDuplicate; ++dy) {
				for (int dx = -1; dx <= 1; ++dx) {
					const int64_t neighborKey = static_cast<int64_t>(gridY + dy) * 100000LL + (gridX + dx);
					auto it = spatialHash.find(neighborKey);
					if (it != spatialHash.end()) {
						for (size_t idx : it->second) {
							const auto& unique = uniqueOutput[idx];
							if (std::abs(obj.box.x - unique.box.x) < DUPLICATE_DIST_THRESHOLD &&
								std::abs(obj.box.y - unique.box.y) < DUPLICATE_DIST_THRESHOLD) {
								isDuplicate = true;
								break;
							}
						}
						if (isDuplicate) break;
					}
				}
			}

			if (!isDuplicate) {
				spatialHash[hashKey].push_back(uniqueOutput.size());
				uniqueOutput.push_back(obj);
			}
		}

		if (uniqueOutput.empty()) {
			return "";
		}

		if (uniqueOutput.size() == 1) {
			return AnalyseLicensePlateText(uniqueOutput[0].className);
		}

		// ---- Character centers and average height ----
		float avgHeight = 0.0f;
		int n = static_cast<int>(uniqueOutput.size());
		std::vector<float> cxs(n), cys(n);
		for (int i = 0; i < n; ++i) {
			cxs[i] = uniqueOutput[i].box.x + uniqueOutput[i].box.width * 0.5f;
			cys[i] = uniqueOutput[i].box.y + uniqueOutput[i].box.height * 0.5f;
			avgHeight += uniqueOutput[i].box.height;
		}
		avgHeight /= static_cast<float>(n);

		// ---- Mean center ----
		float meanX = 0.0f, meanY = 0.0f;
		for (int i = 0; i < n; ++i) {
			meanX += cxs[i];
			meanY += cys[i];
		}
		meanX /= n;
		meanY /= n;

		// PCA to find the plate's reading direction.
		// Robust to large tilt angles unlike OLS regression.
		float cov_xx = 0.0f, cov_xy = 0.0f, cov_yy = 0.0f;
		for (int i = 0; i < n; ++i) {
			float dx = cxs[i] - meanX;
			float dy = cys[i] - meanY;
			cov_xx += dx * dx;
			cov_xy += dx * dy;
			cov_yy += dy * dy;
		}

		float theta = 0.5f * std::atan2(2.0f * cov_xy, cov_xx - cov_yy);
		float dirX = std::cos(theta);
		float dirY = std::sin(theta);
		if (dirX < 0) { dirX = -dirX; dirY = -dirY; }

		float perpX = -dirY;
		float perpY = dirX;

		// Project centers onto both axes
		std::vector<float> projAlong(n), projPerp(n);
		for (int i = 0; i < n; ++i) {
			float dx = cxs[i] - meanX;
			float dy = cys[i] - meanY;
			projAlong[i] = dx * dirX + dy * dirY;
			projPerp[i]  = dx * perpX + dy * perpY;
		}

		// Row splitting using perpendicular projections
		std::vector<std::pair<float, size_t>> perpSorted;
		perpSorted.reserve(n);
		for (int i = 0; i < n; ++i)
			perpSorted.push_back({ projPerp[i], static_cast<size_t>(i) });
		std::sort(perpSorted.begin(), perpSorted.end());

		// Find largest gap with validation:
		// 1. Both groups must have >= 2 chars
		// 2. Groups must be vertically separated (avgY check)
		float maxGap = 0.0f;
		size_t splitIdx = perpSorted.size() / 2;
		const size_t minGroupSize = ROW_SPLIT_MIN_GROUP_SIZE;

		for (size_t i = 1; i < perpSorted.size(); ++i) {
			float gap = perpSorted[i].first - perpSorted[i - 1].first;
			if (i < minGroupSize || (perpSorted.size() - i) < minGroupSize)
				continue;
			if (gap > maxGap) {
				float avgY_g1 = 0.0f, avgY_g2 = 0.0f;
				for (size_t j = 0; j < i; ++j)
					avgY_g1 += cys[perpSorted[j].second];
				for (size_t j = i; j < perpSorted.size(); ++j)
					avgY_g2 += cys[perpSorted[j].second];
				avgY_g1 /= static_cast<float>(i);
				avgY_g2 /= static_cast<float>(perpSorted.size() - i);
				if (std::abs(avgY_g2 - avgY_g1) > avgHeight * ROW_SPLIT_AVGY_FACTOR) {
					maxGap = gap;
					splitIdx = i;
				}
			}
		}


		if (maxGap < avgHeight * ROW_SPLIT_MIN_GAP_FACTOR) {
			// Single row - sort by projection along reading direction
			std::vector<std::pair<float, size_t>> allProj;
			for (int i = 0; i < n; ++i)
				allProj.push_back({ projAlong[i], static_cast<size_t>(i) });
			std::sort(allProj.begin(), allProj.end());
			std::string ocrText;
			ocrText.reserve(n);
			for (const auto& p : allProj)
				ocrText += uniqueOutput[p.second].className;
			return AnalyseLicensePlateText(ocrText);
		}
		else {
			// Two rows
			std::vector<std::vector<size_t>> rowIndices(2);
			for (size_t i = 0; i < perpSorted.size(); ++i) {
				size_t objIdx = perpSorted[i].second;
				if (i < splitIdx)
					rowIndices[0].push_back(objIdx);
				else
					rowIndices[1].push_back(objIdx);
			}

			// Ensure row 0 is top row (lower average center-Y)
			float avgY0 = 0.0f, avgY1 = 0.0f;
			for (auto idx : rowIndices[0]) avgY0 += cys[idx];
			for (auto idx : rowIndices[1]) avgY1 += cys[idx];
			avgY0 /= static_cast<float>(rowIndices[0].size());
			avgY1 /= static_cast<float>(rowIndices[1].size());
			if (avgY0 > avgY1)
				std::swap(rowIndices[0], rowIndices[1]);


			// Sort each row by projection along reading direction
			std::string ocrText;
			ocrText.reserve(n);
			for (const auto& row : rowIndices) {
				std::vector<std::pair<float, size_t>> rowProj;
				for (auto idx : row)
					rowProj.push_back({ projAlong[idx], idx });
				std::sort(rowProj.begin(), rowProj.end());
				for (const auto& p : rowProj)
					ocrText += uniqueOutput[p.second].className;
			}
			return AnalyseLicensePlateText(ocrText);
		}
	}

	void ANSALPR_OD::ensureUniquePlateText(std::vector<Object>& results, const std::string& cameraId)
	{
		std::lock_guard<std::mutex> plateLock(_plateIdentitiesMutex);
		auto& identities = _plateIdentities[cameraId];

		// Option B: Auto-detect mode by counting detections.
		// 1 detection  → crop/pipeline mode → return instant result, no accumulated scoring
		// 2+ detections → full-frame mode   → use accumulated scoring for dedup
		if (results.size() <= 1) {
			// Still prune stale spatial identities from previous full-frame calls
			if (!identities.empty()) {
				constexpr int MAX_UNSEEN_FRAMES = 30;
				for (auto& id : identities) {
					id.framesSinceLastSeen++;
				}
				for (auto it = identities.begin(); it != identities.end(); ) {
					if (it->framesSinceLastSeen > MAX_UNSEEN_FRAMES) {
						it = identities.erase(it);
					} else {
						++it;
					}
				}
			}
			return;
		}

		// --- Full-frame mode: 2+ detections, apply accumulated-score dedup ---

		// Helper: compute IoU between two rects
		auto computeIoU = [](const cv::Rect& a, const cv::Rect& b) -> float {
			int x1 = std::max(a.x, b.x);
			int y1 = std::max(a.y, b.y);
			int x2 = std::min(a.x + a.width, b.x + b.width);
			int y2 = std::min(a.y + a.height, b.y + b.height);
			if (x2 <= x1 || y2 <= y1) return 0.0f;
			float intersection = static_cast<float>((x2 - x1) * (y2 - y1));
			float unionArea = static_cast<float>(a.area() + b.area()) - intersection;
			return (unionArea > 0.0f) ? intersection / unionArea : 0.0f;
		};

		// Helper: find matching spatial identity by bounding box overlap
		auto findSpatialMatch = [&](const cv::Rect& box, const std::string& plateText) -> SpatialPlateIdentity* {
			for (auto& id : identities) {
				if (id.plateText == plateText) {
					// Reconstruct approximate rect from stored center
					cv::Rect storedRect(
						static_cast<int>(id.center.x - box.width * 0.5f),
						static_cast<int>(id.center.y - box.height * 0.5f),
						box.width, box.height);
					if (computeIoU(box, storedRect) > PLATE_SPATIAL_MATCH_THRESHOLD) {
						return &id;
					}
				}
			}
			return nullptr;
		};

		// Step 1: Build map of plateText → candidate indices
		std::unordered_map<std::string, std::vector<size_t>> plateCandidates;
		for (size_t i = 0; i < results.size(); ++i) {
			if (results[i].className.empty()) continue;
			plateCandidates[results[i].className].push_back(i);
		}

		// Step 2: Resolve duplicates using spatial accumulated scores
		for (auto& [plateText, indices] : plateCandidates) {
			if (indices.size() <= 1) continue;

			// Find which candidate has the best accumulated score at its location
			size_t winner = indices[0];
			float bestScore = 0.0f;

			for (size_t idx : indices) {
				float score = results[idx].confidence;
				auto* match = findSpatialMatch(results[idx].box, plateText);
				if (match) {
					score = match->accumulatedScore + results[idx].confidence;
				}
				if (score > bestScore) {
					bestScore = score;
					winner = idx;
				}
			}

			// Clear plate text from non-winners
			for (size_t idx : indices) {
				if (idx != winner) {
					results[idx].className.clear();
				}
			}
		}

		// Step 3: Update spatial identities — winners accumulate, losers decay
		constexpr float DECAY_FACTOR = 0.8f;
		constexpr float MIN_SCORE = 0.1f;
		constexpr int MAX_UNSEEN_FRAMES = 30;

		// Age all existing identities
		for (auto& id : identities) {
			id.framesSinceLastSeen++;
		}

		for (auto& r : results) {
			if (r.className.empty()) continue;

			cv::Point2f center(
				r.box.x + r.box.width * 0.5f,
				r.box.y + r.box.height * 0.5f);

			auto* match = findSpatialMatch(r.box, r.className);
			if (match) {
				// Same plate at same location — accumulate
				match->accumulatedScore += r.confidence;
				match->center = center; // update position
				match->framesSinceLastSeen = 0;
			} else {
				// New plate location — add entry
				identities.push_back({ center, r.className, r.confidence, 0 });
			}
		}

		// Decay unseen identities and remove stale ones
		for (auto it = identities.begin(); it != identities.end(); ) {
			if (it->framesSinceLastSeen > 0) {
				it->accumulatedScore *= DECAY_FACTOR;
			}
			if (it->accumulatedScore < MIN_SCORE || it->framesSinceLastSeen > MAX_UNSEEN_FRAMES) {
				it = identities.erase(it);
			} else {
				++it;
			}
		}

		// Step 4: Remove entries with cleared plate text
		results.erase(
			std::remove_if(results.begin(), results.end(),
				[](const Object& o) { return o.className.empty(); }),
			results.end());
	}
};