Support tracker to improve ALPR_OCR

2026-04-14 21:18:10 +10:00
parent f9a0af8949
commit 5706615ed5
4 changed files with 435 additions and 62 deletions
--- a/modules/ANSLPR/ANSLPR_OCR.cpp
+++ b/modules/ANSLPR/ANSLPR_OCR.cpp
@@ -547,6 +547,181 @@ namespace ANSCENTER
 		return colour;
 	}

+	// ── Full-frame vs pipeline auto-detection ────────────────────────────
+	// Mirror of ANSALPR_OD::shouldUseALPRChecker. The auto-detection logic
+	// watches whether consecutive frames from a given camera have the exact
+	// same (width, height). Pre-cropped pipeline inputs vary by a few
+	// pixels per crop, so the exact-match check fails and we return false.
+	// Real video frames are pixel-identical across frames, so after a few
+	// consistent frames we flip into FULL-FRAME mode and start running the
+	// ALPRChecker voting + ensureUniquePlateText dedup.
+	bool ANSALPR_OCR::shouldUseALPRChecker(const cv::Size& imageSize,
+	                                        const std::string& cameraId) {
+		// Force disabled via SetALPRCheckerEnabled(false) → never use.
+		if (!_enableALPRChecker) return false;
+
+		// Small images are always pipeline crops — skip auto-detection.
+		if (imageSize.width < ImageSizeTracker::MIN_FULLFRAME_WIDTH) return false;
+
+		auto& tracker = _imageSizeTrackers[cameraId];
+		bool wasFullFrame = tracker.detectedFullFrame;
+		if (imageSize == tracker.lastSize) {
+			tracker.consistentCount++;
+			if (tracker.consistentCount >= ImageSizeTracker::CONFIRM_THRESHOLD) {
+				tracker.detectedFullFrame = true;
+			}
+		} else {
+			tracker.lastSize          = imageSize;
+			tracker.consistentCount   = 1;
+			tracker.detectedFullFrame = false;
+		}
+		if (tracker.detectedFullFrame != wasFullFrame) {
+			ANS_DBG("ALPR_OCR_Checker",
+			        "cam=%s mode auto-detected: %s (img=%dx%d consistent=%d)",
+			        cameraId.c_str(),
+			        tracker.detectedFullFrame ? "FULL-FRAME (tracker ON)" : "PIPELINE (tracker OFF)",
+			        imageSize.width, imageSize.height, tracker.consistentCount);
+		}
+		return tracker.detectedFullFrame;
+	}
+
+	// ── Spatial plate dedup with accumulated scoring ─────────────────────
+	// Mirror of ANSALPR_OD::ensureUniquePlateText. When more than one
+	// detection in the same frame ends up with the same plate text (e.g.
+	// tracker occlusion or two cars in a single frame reading the same
+	// string), we resolve the ambiguity by accumulating confidence per
+	// spatial location across frames. The location with the higher running
+	// score keeps the plate text; the loser has its className cleared and
+	// is dropped from the output.
+	void ANSALPR_OCR::ensureUniquePlateText(std::vector<Object>& results,
+	                                        const std::string& cameraId) {
+		std::lock_guard<std::mutex> plateLock(_plateIdentitiesMutex);
+		auto& identities = _plateIdentities[cameraId];
+
+		// Auto-detect mode by detection count.
+		//   1 detection  → pipeline/single-crop mode → no dedup needed.
+		//   2+ detections → full-frame mode → apply accumulated scoring.
+		if (results.size() <= 1) {
+			// Still age out stale spatial identities from previous full-frame calls
+			if (!identities.empty()) {
+				constexpr int MAX_UNSEEN_FRAMES = 30;
+				for (auto& id : identities) id.framesSinceLastSeen++;
+				for (auto it = identities.begin(); it != identities.end(); ) {
+					if (it->framesSinceLastSeen > MAX_UNSEEN_FRAMES) {
+						it = identities.erase(it);
+					} else {
+						++it;
+					}
+				}
+			}
+			return;
+		}
+
+		// Helper: IoU between two rects.
+		auto computeIoU = [](const cv::Rect& a, const cv::Rect& b) -> float {
+			int x1 = std::max(a.x, b.x);
+			int y1 = std::max(a.y, b.y);
+			int x2 = std::min(a.x + a.width,  b.x + b.width);
+			int y2 = std::min(a.y + a.height, b.y + b.height);
+			if (x2 <= x1 || y2 <= y1) return 0.0f;
+			float intersection = static_cast<float>((x2 - x1) * (y2 - y1));
+			float unionArea = static_cast<float>(a.area() + b.area()) - intersection;
+			return (unionArea > 0.0f) ? intersection / unionArea : 0.0f;
+		};
+
+		// Helper: find matching spatial identity by bounding-box overlap.
+		auto findSpatialMatch = [&](const cv::Rect& box,
+		                            const std::string& plateText) -> SpatialPlateIdentity* {
+			for (auto& id : identities) {
+				if (id.plateText == plateText) {
+					cv::Rect storedRect(
+						static_cast<int>(id.center.x - box.width  * 0.5f),
+						static_cast<int>(id.center.y - box.height * 0.5f),
+						box.width, box.height);
+					if (computeIoU(box, storedRect) > PLATE_SPATIAL_MATCH_THRESHOLD) {
+						return &id;
+					}
+				}
+			}
+			return nullptr;
+		};
+
+		// Step 1: Build map of plateText → candidate indices
+		std::unordered_map<std::string, std::vector<size_t>> plateCandidates;
+		for (size_t i = 0; i < results.size(); ++i) {
+			if (results[i].className.empty()) continue;
+			plateCandidates[results[i].className].push_back(i);
+		}
+
+		// Step 2: Resolve duplicates using spatial accumulated scores
+		for (auto& [plateText, indices] : plateCandidates) {
+			if (indices.size() <= 1) continue;
+
+			size_t winner = indices[0];
+			float  bestScore = 0.0f;
+
+			for (size_t idx : indices) {
+				float score = results[idx].confidence;
+				auto* match = findSpatialMatch(results[idx].box, plateText);
+				if (match) {
+					score = match->accumulatedScore + results[idx].confidence;
+				}
+				if (score > bestScore) {
+					bestScore = score;
+					winner    = idx;
+				}
+			}
+
+			for (size_t idx : indices) {
+				if (idx != winner) {
+					results[idx].className.clear();
+				}
+			}
+		}
+
+		// Step 3: Update spatial identities — winners accumulate, losers decay
+		constexpr float DECAY_FACTOR     = 0.8f;
+		constexpr float MIN_SCORE        = 0.1f;
+		constexpr int   MAX_UNSEEN_FRAMES = 30;
+
+		for (auto& id : identities) id.framesSinceLastSeen++;
+
+		for (auto& r : results) {
+			if (r.className.empty()) continue;
+
+			cv::Point2f center(
+				r.box.x + r.box.width  * 0.5f,
+				r.box.y + r.box.height * 0.5f);
+
+			auto* match = findSpatialMatch(r.box, r.className);
+			if (match) {
+				match->accumulatedScore += r.confidence;
+				match->center            = center;
+				match->framesSinceLastSeen = 0;
+			} else {
+				identities.push_back({ center, r.className, r.confidence, 0 });
+			}
+		}
+
+		// Decay unseen identities and remove stale ones
+		for (auto it = identities.begin(); it != identities.end(); ) {
+			if (it->framesSinceLastSeen > 0) {
+				it->accumulatedScore *= DECAY_FACTOR;
+			}
+			if (it->accumulatedScore < MIN_SCORE || it->framesSinceLastSeen > MAX_UNSEEN_FRAMES) {
+				it = identities.erase(it);
+			} else {
+				++it;
+			}
+		}
+
+		// Step 4: Remove entries with cleared plate text
+		results.erase(
+			std::remove_if(results.begin(), results.end(),
+				[](const Object& o) { return o.className.empty(); }),
+			results.end());
+	}
+
 	// ── OCR on a single plate ROI ────────────────────────────────────────
 	// Returns the plate text via the out-parameter and populates alprExtraInfo
 	// with the structured ALPR JSON (zone parts) when ALPR mode is active.
@@ -712,6 +887,13 @@ namespace ANSCENTER
 			std::vector<Object> output;
 			output.reserve(plateInfos.size());

+			// Decide once per frame whether the tracker-based correction
+			// layer should run. We auto-detect full-frame vs pipeline mode
+			// by watching for pixel-identical consecutive frames, exactly
+			// the same way ANSALPR_OD does it.
+			const bool useChecker = shouldUseALPRChecker(
+				cv::Size(frameWidth, frameHeight), cameraId);
+
 			for (const auto& info : plateInfos) {
 				std::string combinedText;
 				for (size_t cropIdx : info.cropIndices) {
@@ -726,8 +908,9 @@ namespace ANSCENTER
 				Object lprObject = lprOutput[info.origIndex];
 				lprObject.cameraId = cameraId;

-				// Cross-frame stabilization (unchanged)
-				if (_enableALPRChecker) {
+				// Cross-frame stabilization: per-track majority vote in
+				// full-frame mode, raw OCR text in pipeline mode.
+				if (useChecker) {
 					lprObject.className = alprChecker.checkPlateByTrackId(
 						cameraId, combinedText, lprObject.trackId);
 				}
@@ -747,6 +930,14 @@ namespace ANSCENTER
 				output.push_back(std::move(lprObject));
 			}

+			// Spatial dedup: if two detections in the same frame ended up
+			// with the same plate text, keep only the one whose spatial
+			// history has the higher accumulated confidence. Skip this in
+			// pipeline mode because there's only ever one plate per call.
+			if (useChecker) {
+				ensureUniquePlateText(output, cameraId);
+			}
+
 			return output;
 		}
 		catch (const cv::Exception& e) {
--- a/modules/ANSLPR/ANSLPR_OCR.h
+++ b/modules/ANSLPR/ANSLPR_OCR.h
@@ -6,6 +6,7 @@
 #include <map>
 #include <string>
 #include <mutex>
+#include <unordered_map>
 #include <utility>
 #include <vector>

@@ -45,6 +46,66 @@ namespace ANSCENTER

        ALPRChecker     alprChecker;

+        // ----------------------------------------------------------------
+        //  Full-frame vs pipeline auto-detection (ported from ANSALPR_OD)
+        //
+        //  When the caller feeds ANSLPR_OCR pre-cropped vehicle ROIs (each
+        //  frame is a different small image), the tracker can't work — the
+        //  LP detector sees a totally new image every call so trackIds mean
+        //  nothing. In that "pipeline" mode we must skip the ALPRChecker
+        //  voting layer entirely and return raw OCR results.
+        //
+        //  When the caller feeds full-frame video (same resolution every
+        //  frame, plates moving through the scene), the tracker works
+        //  normally and we run plate text through ALPRChecker majority
+        //  voting + spatial dedup to stabilise readings.
+        //
+        //  Mode is auto-detected by watching whether consecutive frames
+        //  share the exact same (width, height) for at least
+        //  CONFIRM_THRESHOLD frames. Pipeline crops vary by a few pixels;
+        //  full-frame video is pixel-identical.
+        // ----------------------------------------------------------------
+        struct ImageSizeTracker {
+            cv::Size lastSize{ 0, 0 };
+            int      consistentCount = 0;
+            bool     detectedFullFrame = false;
+            static constexpr int CONFIRM_THRESHOLD   = 5;
+            static constexpr int MIN_FULLFRAME_WIDTH = 1000;
+        };
+        std::unordered_map<std::string, ImageSizeTracker> _imageSizeTrackers;
+
+        [[nodiscard]] bool shouldUseALPRChecker(const cv::Size& imageSize,
+                                                const std::string& cameraId);
+
+        // ----------------------------------------------------------------
+        //  Spatial plate identity persistence (ported from ANSALPR_OD)
+        //
+        //  Prevents the same plate string from appearing on two different
+        //  vehicles in the same frame. The LP tracker may briefly assign
+        //  the same trackId to two different plates when vehicles pass
+        //  each other, or two different trackIds to the same plate when
+        //  occlusion breaks a track. In either case, OCR can produce the
+        //  same text for two spatial locations for a frame or two — which
+        //  looks like "plate flicker" in the UI.
+        //
+        //  ensureUniquePlateText() resolves the ambiguity by accumulating
+        //  confidence per spatial location. When two detections share a
+        //  plate text, the one whose spatial history has the higher score
+        //  wins and the other has its className cleared.
+        // ----------------------------------------------------------------
+        struct SpatialPlateIdentity {
+            cv::Point2f center;                   // plate center in frame coords
+            std::string plateText;
+            float       accumulatedScore    = 0.0f;
+            int         framesSinceLastSeen = 0;
+        };
+        std::mutex _plateIdentitiesMutex;
+        std::unordered_map<std::string, std::vector<SpatialPlateIdentity>> _plateIdentities;
+        static constexpr float PLATE_SPATIAL_MATCH_THRESHOLD = 0.3f; // IoU threshold
+
+        void ensureUniquePlateText(std::vector<Object>& results,
+                                   const std::string& cameraId);
+
        // --- Original model zip path (reused for ANSONNXOCR initialization) ---
        std::string     _modelZipFilePath;