Improve ALPR_OCR peformance

This commit is contained in:
2026-04-14 20:30:21 +10:00
parent 3349b45ade
commit f9a0af8949
18 changed files with 991 additions and 77 deletions

View File

@@ -363,10 +363,14 @@ namespace ANSCENTER
ocrModelConfig.ocrLanguage = ocrLang;
ocrModelConfig.useDetector = true;
ocrModelConfig.useRecognizer = true;
ocrModelConfig.useCLS = true;
// Skip the angle classifier for ALPR. License-plate boxes
// from the YOLO detector are already axis-aligned, so the
// 180° classifier is dead weight (one extra ORT call per
// plate for no recall gain).
ocrModelConfig.useCLS = false;
ocrModelConfig.useLayout = false;
ocrModelConfig.useTable = false;
ocrModelConfig.useTensorRT = false;
ocrModelConfig.useTensorRT = true;
ocrModelConfig.enableMKLDNN = false;
ocrModelConfig.useDilation = true;
ocrModelConfig.useAngleCLS = false;
@@ -375,7 +379,7 @@ namespace ANSCENTER
ocrModelConfig.detectionBoxThreshold = 0.3;
ocrModelConfig.detectionDBUnclipRatio = 1.2;
ocrModelConfig.clsThreshold = 0.9;
ocrModelConfig.limitSideLen = 2560;
ocrModelConfig.limitSideLen = 480;
// Pass the original ALPR model zip path — ANSOCRBase::Initialize
// will extract it to the same folder (already done, so extraction
@@ -638,41 +642,104 @@ namespace ANSCENTER
return {};
}
std::vector<Object> output;
output.reserve(lprOutput.size());
// Step 2: Collect crops from every valid plate. Wide plates
// (aspect >= 2.0) are treated as a single text line; narrow
// plates (2-row layouts like Japanese) are split horizontally
// at H/2 into top and bottom rows. All crops go through a
// single batched recognizer call, bypassing the OCR text-line
// detector entirely — for ALPR the LP YOLO box already bounds
// the text region precisely.
struct PlateInfo {
size_t origIndex; // into lprOutput
std::vector<size_t> cropIndices; // into allCrops
cv::Mat plateROI; // full (unsplit) ROI, kept for colour
};
std::vector<cv::Mat> allCrops;
std::vector<PlateInfo> plateInfos;
allCrops.reserve(lprOutput.size() * 2);
plateInfos.reserve(lprOutput.size());
for (auto& lprObject : lprOutput) {
const cv::Rect& box = lprObject.box;
for (size_t i = 0; i < lprOutput.size(); ++i) {
const cv::Rect& box = lprOutput[i].box;
// Calculate safe cropped region
const int x1 = std::max(0, box.x);
const int y1 = std::max(0, box.y);
const int width = std::min(frameWidth - x1, box.width);
const int width = std::min(frameWidth - x1, box.width);
const int height = std::min(frameHeight - y1, box.height);
if (width <= 0 || height <= 0) continue;
cv::Rect lprPos(x1, y1, width, height);
cv::Mat plateROI = frame(lprPos);
cv::Mat plateROI = frame(cv::Rect(x1, y1, width, height));
// Step 2: Run OCR on the detected plate
std::string ocrText = RunOCROnPlate(plateROI, cameraId);
PlateInfo info;
info.origIndex = i;
info.plateROI = plateROI;
if (ocrText.empty()) continue;
const float aspect = static_cast<float>(width) /
std::max(1, height);
// 2-row heuristic: aspect < 2.0 → split top/bottom.
// Threshold tuned to catch Japanese square plates
// (~1.51.9) while leaving wide EU/VN plates (3.0+)
// untouched.
if (aspect < 2.0f && height >= 24) {
const int halfH = height / 2;
info.cropIndices.push_back(allCrops.size());
allCrops.push_back(plateROI(cv::Rect(0, 0, width, halfH)));
info.cropIndices.push_back(allCrops.size());
allCrops.push_back(plateROI(cv::Rect(0, halfH, width, height - halfH)));
}
else {
info.cropIndices.push_back(allCrops.size());
allCrops.push_back(plateROI);
}
plateInfos.push_back(std::move(info));
}
if (allCrops.empty()) {
return {};
}
// Step 3: Single batched recognizer call for every crop.
// ONNXOCRRecognizer groups crops by bucket width and issues
// one ORT Run per bucket — typically 12 GPU calls for an
// entire frame regardless of plate count.
auto ocrResults = _ocrEngine->RecognizeTextBatch(allCrops);
// Step 4: Assemble per-plate output
std::vector<Object> output;
output.reserve(plateInfos.size());
for (const auto& info : plateInfos) {
std::string combinedText;
for (size_t cropIdx : info.cropIndices) {
if (cropIdx >= ocrResults.size()) continue;
const std::string& lineText = ocrResults[cropIdx].first;
if (lineText.empty()) continue;
if (!combinedText.empty()) combinedText += " ";
combinedText += lineText;
}
if (combinedText.empty()) continue;
Object lprObject = lprOutput[info.origIndex];
lprObject.cameraId = cameraId;
// Use ALPRChecker for text stabilization if enabled
// Cross-frame stabilization (unchanged)
if (_enableALPRChecker) {
lprObject.className = alprChecker.checkPlateByTrackId(cameraId, ocrText, lprObject.trackId);
} else {
lprObject.className = ocrText;
lprObject.className = alprChecker.checkPlateByTrackId(
cameraId, combinedText, lprObject.trackId);
}
else {
lprObject.className = combinedText;
}
if (lprObject.className.empty()) continue;
// Step 3: Colour detection (optional)
std::string colour = DetectLPColourCached(plateROI, cameraId, lprObject.className);
// Optional colour detection on the full plate ROI
std::string colour = DetectLPColourCached(
info.plateROI, cameraId, lprObject.className);
if (!colour.empty()) {
lprObject.extraInfo = "color:" + colour;
}