Support tracker to improve ALPR_OCR

This commit is contained in:
2026-04-14 21:18:10 +10:00
parent f9a0af8949
commit 5706615ed5
4 changed files with 435 additions and 62 deletions

View File

@@ -547,6 +547,181 @@ namespace ANSCENTER
return colour;
}
// ── Full-frame vs pipeline auto-detection ────────────────────────────
// Mirror of ANSALPR_OD::shouldUseALPRChecker. The auto-detection logic
// watches whether consecutive frames from a given camera have the exact
// same (width, height). Pre-cropped pipeline inputs vary by a few
// pixels per crop, so the exact-match check fails and we return false.
// Real video frames are pixel-identical across frames, so after a few
// consistent frames we flip into FULL-FRAME mode and start running the
// ALPRChecker voting + ensureUniquePlateText dedup.
bool ANSALPR_OCR::shouldUseALPRChecker(const cv::Size& imageSize,
const std::string& cameraId) {
// Force disabled via SetALPRCheckerEnabled(false) → never use.
if (!_enableALPRChecker) return false;
// Small images are always pipeline crops — skip auto-detection.
if (imageSize.width < ImageSizeTracker::MIN_FULLFRAME_WIDTH) return false;
auto& tracker = _imageSizeTrackers[cameraId];
bool wasFullFrame = tracker.detectedFullFrame;
if (imageSize == tracker.lastSize) {
tracker.consistentCount++;
if (tracker.consistentCount >= ImageSizeTracker::CONFIRM_THRESHOLD) {
tracker.detectedFullFrame = true;
}
} else {
tracker.lastSize = imageSize;
tracker.consistentCount = 1;
tracker.detectedFullFrame = false;
}
if (tracker.detectedFullFrame != wasFullFrame) {
ANS_DBG("ALPR_OCR_Checker",
"cam=%s mode auto-detected: %s (img=%dx%d consistent=%d)",
cameraId.c_str(),
tracker.detectedFullFrame ? "FULL-FRAME (tracker ON)" : "PIPELINE (tracker OFF)",
imageSize.width, imageSize.height, tracker.consistentCount);
}
return tracker.detectedFullFrame;
}
// ── Spatial plate dedup with accumulated scoring ─────────────────────
// Mirror of ANSALPR_OD::ensureUniquePlateText. When more than one
// detection in the same frame ends up with the same plate text (e.g.
// tracker occlusion or two cars in a single frame reading the same
// string), we resolve the ambiguity by accumulating confidence per
// spatial location across frames. The location with the higher running
// score keeps the plate text; the loser has its className cleared and
// is dropped from the output.
void ANSALPR_OCR::ensureUniquePlateText(std::vector<Object>& results,
const std::string& cameraId) {
std::lock_guard<std::mutex> plateLock(_plateIdentitiesMutex);
auto& identities = _plateIdentities[cameraId];
// Auto-detect mode by detection count.
// 1 detection → pipeline/single-crop mode → no dedup needed.
// 2+ detections → full-frame mode → apply accumulated scoring.
if (results.size() <= 1) {
// Still age out stale spatial identities from previous full-frame calls
if (!identities.empty()) {
constexpr int MAX_UNSEEN_FRAMES = 30;
for (auto& id : identities) id.framesSinceLastSeen++;
for (auto it = identities.begin(); it != identities.end(); ) {
if (it->framesSinceLastSeen > MAX_UNSEEN_FRAMES) {
it = identities.erase(it);
} else {
++it;
}
}
}
return;
}
// Helper: IoU between two rects.
auto computeIoU = [](const cv::Rect& a, const cv::Rect& b) -> float {
int x1 = std::max(a.x, b.x);
int y1 = std::max(a.y, b.y);
int x2 = std::min(a.x + a.width, b.x + b.width);
int y2 = std::min(a.y + a.height, b.y + b.height);
if (x2 <= x1 || y2 <= y1) return 0.0f;
float intersection = static_cast<float>((x2 - x1) * (y2 - y1));
float unionArea = static_cast<float>(a.area() + b.area()) - intersection;
return (unionArea > 0.0f) ? intersection / unionArea : 0.0f;
};
// Helper: find matching spatial identity by bounding-box overlap.
auto findSpatialMatch = [&](const cv::Rect& box,
const std::string& plateText) -> SpatialPlateIdentity* {
for (auto& id : identities) {
if (id.plateText == plateText) {
cv::Rect storedRect(
static_cast<int>(id.center.x - box.width * 0.5f),
static_cast<int>(id.center.y - box.height * 0.5f),
box.width, box.height);
if (computeIoU(box, storedRect) > PLATE_SPATIAL_MATCH_THRESHOLD) {
return &id;
}
}
}
return nullptr;
};
// Step 1: Build map of plateText → candidate indices
std::unordered_map<std::string, std::vector<size_t>> plateCandidates;
for (size_t i = 0; i < results.size(); ++i) {
if (results[i].className.empty()) continue;
plateCandidates[results[i].className].push_back(i);
}
// Step 2: Resolve duplicates using spatial accumulated scores
for (auto& [plateText, indices] : plateCandidates) {
if (indices.size() <= 1) continue;
size_t winner = indices[0];
float bestScore = 0.0f;
for (size_t idx : indices) {
float score = results[idx].confidence;
auto* match = findSpatialMatch(results[idx].box, plateText);
if (match) {
score = match->accumulatedScore + results[idx].confidence;
}
if (score > bestScore) {
bestScore = score;
winner = idx;
}
}
for (size_t idx : indices) {
if (idx != winner) {
results[idx].className.clear();
}
}
}
// Step 3: Update spatial identities — winners accumulate, losers decay
constexpr float DECAY_FACTOR = 0.8f;
constexpr float MIN_SCORE = 0.1f;
constexpr int MAX_UNSEEN_FRAMES = 30;
for (auto& id : identities) id.framesSinceLastSeen++;
for (auto& r : results) {
if (r.className.empty()) continue;
cv::Point2f center(
r.box.x + r.box.width * 0.5f,
r.box.y + r.box.height * 0.5f);
auto* match = findSpatialMatch(r.box, r.className);
if (match) {
match->accumulatedScore += r.confidence;
match->center = center;
match->framesSinceLastSeen = 0;
} else {
identities.push_back({ center, r.className, r.confidence, 0 });
}
}
// Decay unseen identities and remove stale ones
for (auto it = identities.begin(); it != identities.end(); ) {
if (it->framesSinceLastSeen > 0) {
it->accumulatedScore *= DECAY_FACTOR;
}
if (it->accumulatedScore < MIN_SCORE || it->framesSinceLastSeen > MAX_UNSEEN_FRAMES) {
it = identities.erase(it);
} else {
++it;
}
}
// Step 4: Remove entries with cleared plate text
results.erase(
std::remove_if(results.begin(), results.end(),
[](const Object& o) { return o.className.empty(); }),
results.end());
}
// ── OCR on a single plate ROI ────────────────────────────────────────
// Returns the plate text via the out-parameter and populates alprExtraInfo
// with the structured ALPR JSON (zone parts) when ALPR mode is active.
@@ -712,6 +887,13 @@ namespace ANSCENTER
std::vector<Object> output;
output.reserve(plateInfos.size());
// Decide once per frame whether the tracker-based correction
// layer should run. We auto-detect full-frame vs pipeline mode
// by watching for pixel-identical consecutive frames, exactly
// the same way ANSALPR_OD does it.
const bool useChecker = shouldUseALPRChecker(
cv::Size(frameWidth, frameHeight), cameraId);
for (const auto& info : plateInfos) {
std::string combinedText;
for (size_t cropIdx : info.cropIndices) {
@@ -726,8 +908,9 @@ namespace ANSCENTER
Object lprObject = lprOutput[info.origIndex];
lprObject.cameraId = cameraId;
// Cross-frame stabilization (unchanged)
if (_enableALPRChecker) {
// Cross-frame stabilization: per-track majority vote in
// full-frame mode, raw OCR text in pipeline mode.
if (useChecker) {
lprObject.className = alprChecker.checkPlateByTrackId(
cameraId, combinedText, lprObject.trackId);
}
@@ -747,6 +930,14 @@ namespace ANSCENTER
output.push_back(std::move(lprObject));
}
// Spatial dedup: if two detections in the same frame ended up
// with the same plate text, keep only the one whose spatial
// history has the higher accumulated confidence. Skip this in
// pipeline mode because there's only ever one plate per call.
if (useChecker) {
ensureUniquePlateText(output, cameraId);
}
return output;
}
catch (const cv::Exception& e) {

View File

@@ -6,6 +6,7 @@
#include <map>
#include <string>
#include <mutex>
#include <unordered_map>
#include <utility>
#include <vector>
@@ -45,6 +46,66 @@ namespace ANSCENTER
ALPRChecker alprChecker;
// ----------------------------------------------------------------
// Full-frame vs pipeline auto-detection (ported from ANSALPR_OD)
//
// When the caller feeds ANSLPR_OCR pre-cropped vehicle ROIs (each
// frame is a different small image), the tracker can't work — the
// LP detector sees a totally new image every call so trackIds mean
// nothing. In that "pipeline" mode we must skip the ALPRChecker
// voting layer entirely and return raw OCR results.
//
// When the caller feeds full-frame video (same resolution every
// frame, plates moving through the scene), the tracker works
// normally and we run plate text through ALPRChecker majority
// voting + spatial dedup to stabilise readings.
//
// Mode is auto-detected by watching whether consecutive frames
// share the exact same (width, height) for at least
// CONFIRM_THRESHOLD frames. Pipeline crops vary by a few pixels;
// full-frame video is pixel-identical.
// ----------------------------------------------------------------
struct ImageSizeTracker {
cv::Size lastSize{ 0, 0 };
int consistentCount = 0;
bool detectedFullFrame = false;
static constexpr int CONFIRM_THRESHOLD = 5;
static constexpr int MIN_FULLFRAME_WIDTH = 1000;
};
std::unordered_map<std::string, ImageSizeTracker> _imageSizeTrackers;
[[nodiscard]] bool shouldUseALPRChecker(const cv::Size& imageSize,
const std::string& cameraId);
// ----------------------------------------------------------------
// Spatial plate identity persistence (ported from ANSALPR_OD)
//
// Prevents the same plate string from appearing on two different
// vehicles in the same frame. The LP tracker may briefly assign
// the same trackId to two different plates when vehicles pass
// each other, or two different trackIds to the same plate when
// occlusion breaks a track. In either case, OCR can produce the
// same text for two spatial locations for a frame or two — which
// looks like "plate flicker" in the UI.
//
// ensureUniquePlateText() resolves the ambiguity by accumulating
// confidence per spatial location. When two detections share a
// plate text, the one whose spatial history has the higher score
// wins and the other has its className cleared.
// ----------------------------------------------------------------
struct SpatialPlateIdentity {
cv::Point2f center; // plate center in frame coords
std::string plateText;
float accumulatedScore = 0.0f;
int framesSinceLastSeen = 0;
};
std::mutex _plateIdentitiesMutex;
std::unordered_map<std::string, std::vector<SpatialPlateIdentity>> _plateIdentities;
static constexpr float PLATE_SPATIAL_MATCH_THRESHOLD = 0.3f; // IoU threshold
void ensureUniquePlateText(std::vector<Object>& results,
const std::string& cameraId);
// --- Original model zip path (reused for ANSONNXOCR initialization) ---
std::string _modelZipFilePath;