2026-04-12 17:16:16 +10:00
|
|
|
#ifndef ANSLPROCR_H
|
|
|
|
|
#define ANSLPROCR_H
|
|
|
|
|
#pragma once
|
|
|
|
|
#include "ANSLPR.h"
|
|
|
|
|
#include <list>
|
|
|
|
|
#include <map>
|
|
|
|
|
#include <string>
|
|
|
|
|
#include <mutex>
|
2026-04-14 21:18:10 +10:00
|
|
|
#include <unordered_map>
|
2026-04-12 17:16:16 +10:00
|
|
|
#include <utility>
|
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
|
|
// Forward-declare ANSONNXOCR to avoid pulling in the full ANSOCR header chain
|
|
|
|
|
namespace ANSCENTER { class ANSONNXOCR; struct OCRModelConfig; }
|
|
|
|
|
|
|
|
|
|
namespace ANSCENTER
|
|
|
|
|
{
|
2026-04-13 19:48:32 +10:00
|
|
|
/// ANSALPR_OCR — License plate recognition using YOLO for LP detection
|
2026-04-12 17:16:16 +10:00
|
|
|
/// and ANSONNXOCR (PaddleOCR v5) for text recognition.
|
|
|
|
|
///
|
|
|
|
|
/// Pipeline:
|
2026-04-13 19:48:32 +10:00
|
|
|
/// 1. Detect license plates using _lpDetector (ANSRTYOLO on NVIDIA GPU, ANSONNXYOLO otherwise)
|
2026-04-12 17:16:16 +10:00
|
|
|
/// 2. For each detected plate, run OCR using _ocrEngine (ANSONNXOCR)
|
2026-04-13 19:48:32 +10:00
|
|
|
/// 3. Optionally classify plate colour using _lpColourDetector (ANSRTYOLO on NVIDIA GPU, ANSONNXYOLO otherwise)
|
2026-04-12 17:16:16 +10:00
|
|
|
///
|
|
|
|
|
/// Supports multiple countries via the Country enum and ALPR post-processing
|
|
|
|
|
/// from ANSOCR's ANSOCRBase infrastructure.
|
|
|
|
|
class ANSLPR_API ANSALPR_OCR : public ANSALPR {
|
|
|
|
|
private:
|
|
|
|
|
ANSCENTER::EngineType engineType;
|
|
|
|
|
|
|
|
|
|
// --- Detectors ---
|
|
|
|
|
std::unique_ptr<ANSCENTER::ANSODBase> _lpDetector = nullptr; // License plate detector
|
|
|
|
|
std::unique_ptr<ANSCENTER::ANSODBase> _lpColourDetector = nullptr; // License plate colour classifier
|
|
|
|
|
std::unique_ptr<ANSCENTER::ANSONNXOCR> _ocrEngine = nullptr; // OCR text recognizer
|
|
|
|
|
|
|
|
|
|
// --- Model configs ---
|
|
|
|
|
ANSCENTER::ModelConfig _lpdmodelConfig;
|
|
|
|
|
ANSCENTER::ModelConfig _lpColourModelConfig;
|
|
|
|
|
|
|
|
|
|
std::string _lpdLabels;
|
|
|
|
|
std::string _lpColourLabels;
|
|
|
|
|
cv::Mat _frameBuffer; // Reusable buffer for color conversion
|
|
|
|
|
|
|
|
|
|
std::vector<std::string> _lprModelClass;
|
|
|
|
|
|
|
|
|
|
ALPRChecker alprChecker;
|
|
|
|
|
|
2026-04-14 21:18:10 +10:00
|
|
|
// ----------------------------------------------------------------
|
|
|
|
|
// Full-frame vs pipeline auto-detection (ported from ANSALPR_OD)
|
|
|
|
|
//
|
|
|
|
|
// When the caller feeds ANSLPR_OCR pre-cropped vehicle ROIs (each
|
|
|
|
|
// frame is a different small image), the tracker can't work — the
|
|
|
|
|
// LP detector sees a totally new image every call so trackIds mean
|
|
|
|
|
// nothing. In that "pipeline" mode we must skip the ALPRChecker
|
|
|
|
|
// voting layer entirely and return raw OCR results.
|
|
|
|
|
//
|
|
|
|
|
// When the caller feeds full-frame video (same resolution every
|
|
|
|
|
// frame, plates moving through the scene), the tracker works
|
|
|
|
|
// normally and we run plate text through ALPRChecker majority
|
|
|
|
|
// voting + spatial dedup to stabilise readings.
|
|
|
|
|
//
|
|
|
|
|
// Mode is auto-detected by watching whether consecutive frames
|
|
|
|
|
// share the exact same (width, height) for at least
|
|
|
|
|
// CONFIRM_THRESHOLD frames. Pipeline crops vary by a few pixels;
|
|
|
|
|
// full-frame video is pixel-identical.
|
|
|
|
|
// ----------------------------------------------------------------
|
|
|
|
|
struct ImageSizeTracker {
|
|
|
|
|
cv::Size lastSize{ 0, 0 };
|
|
|
|
|
int consistentCount = 0;
|
|
|
|
|
bool detectedFullFrame = false;
|
|
|
|
|
static constexpr int CONFIRM_THRESHOLD = 5;
|
|
|
|
|
static constexpr int MIN_FULLFRAME_WIDTH = 1000;
|
|
|
|
|
};
|
|
|
|
|
std::unordered_map<std::string, ImageSizeTracker> _imageSizeTrackers;
|
|
|
|
|
|
|
|
|
|
[[nodiscard]] bool shouldUseALPRChecker(const cv::Size& imageSize,
|
|
|
|
|
const std::string& cameraId);
|
|
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------
|
|
|
|
|
// Spatial plate identity persistence (ported from ANSALPR_OD)
|
|
|
|
|
//
|
|
|
|
|
// Prevents the same plate string from appearing on two different
|
|
|
|
|
// vehicles in the same frame. The LP tracker may briefly assign
|
|
|
|
|
// the same trackId to two different plates when vehicles pass
|
|
|
|
|
// each other, or two different trackIds to the same plate when
|
|
|
|
|
// occlusion breaks a track. In either case, OCR can produce the
|
|
|
|
|
// same text for two spatial locations for a frame or two — which
|
|
|
|
|
// looks like "plate flicker" in the UI.
|
|
|
|
|
//
|
|
|
|
|
// ensureUniquePlateText() resolves the ambiguity by accumulating
|
|
|
|
|
// confidence per spatial location. When two detections share a
|
|
|
|
|
// plate text, the one whose spatial history has the higher score
|
|
|
|
|
// wins and the other has its className cleared.
|
|
|
|
|
// ----------------------------------------------------------------
|
|
|
|
|
struct SpatialPlateIdentity {
|
|
|
|
|
cv::Point2f center; // plate center in frame coords
|
|
|
|
|
std::string plateText;
|
|
|
|
|
float accumulatedScore = 0.0f;
|
|
|
|
|
int framesSinceLastSeen = 0;
|
|
|
|
|
};
|
|
|
|
|
std::mutex _plateIdentitiesMutex;
|
|
|
|
|
std::unordered_map<std::string, std::vector<SpatialPlateIdentity>> _plateIdentities;
|
|
|
|
|
static constexpr float PLATE_SPATIAL_MATCH_THRESHOLD = 0.3f; // IoU threshold
|
|
|
|
|
|
|
|
|
|
void ensureUniquePlateText(std::vector<Object>& results,
|
|
|
|
|
const std::string& cameraId);
|
|
|
|
|
|
2026-04-12 17:16:16 +10:00
|
|
|
// --- Original model zip path (reused for ANSONNXOCR initialization) ---
|
|
|
|
|
std::string _modelZipFilePath;
|
|
|
|
|
|
|
|
|
|
// --- Colour detection helpers ---
|
|
|
|
|
[[nodiscard]] std::string DetectLPColourDetector(const cv::Mat& lprROI, const std::string& cameraId);
|
|
|
|
|
[[nodiscard]] std::string DetectLPColourCached(const cv::Mat& lprROI, const std::string& cameraId, const std::string& plateText);
|
|
|
|
|
|
|
|
|
|
// LPC colour cache
|
|
|
|
|
struct ColourCacheEntry {
|
|
|
|
|
std::string colour;
|
|
|
|
|
int hitCount = 0;
|
|
|
|
|
};
|
|
|
|
|
std::mutex _colourCacheMutex;
|
|
|
|
|
std::unordered_map<std::string, ColourCacheEntry> _colourCache;
|
|
|
|
|
static constexpr size_t COLOUR_CACHE_MAX_SIZE = 200;
|
|
|
|
|
|
|
|
|
|
// --- OCR helper ---
|
|
|
|
|
[[nodiscard]] std::string RunOCROnPlate(const cv::Mat& plateROI, const std::string& cameraId);
|
|
|
|
|
|
2026-04-15 23:00:19 +10:00
|
|
|
// ----------------------------------------------------------------
|
|
|
|
|
// Plate preprocessing: classical perspective rectification
|
|
|
|
|
//
|
|
|
|
|
// Takes an LP YOLO bounding box and tries to find the plate's
|
|
|
|
|
// actual 4 corners via Canny + findContours + approxPolyDP. When
|
|
|
|
|
// that succeeds, the plate is warped to a rectangle whose height
|
|
|
|
|
// is fixed (kRectifiedHeight) and whose width preserves the
|
|
|
|
|
// detected plate's aspect ratio. This produces a tight,
|
|
|
|
|
// perspective-corrected crop that the recognizer handles more
|
|
|
|
|
// reliably than the tilted / skewed axis-aligned bbox.
|
|
|
|
|
//
|
|
|
|
|
// Falls back to minAreaRect on the largest contour if no 4-point
|
|
|
|
|
// polygon is found, and returns false outright if nothing
|
|
|
|
|
// plausible can be isolated. Callers must handle the false case
|
|
|
|
|
// by using the (padded) axis-aligned crop instead.
|
|
|
|
|
// ----------------------------------------------------------------
|
|
|
|
|
static constexpr int kRectifiedHeight = 220;
|
|
|
|
|
static constexpr float kMinPlateAspect = 1.3f;
|
|
|
|
|
static constexpr float kMaxPlateAspect = 6.0f;
|
|
|
|
|
static constexpr float kRectifyAreaFraction = 0.30f;
|
|
|
|
|
|
|
|
|
|
[[nodiscard]] bool RectifyPlateROI(
|
|
|
|
|
const cv::Mat& source,
|
|
|
|
|
const cv::Rect& bbox,
|
|
|
|
|
cv::Mat& outRectified) const;
|
|
|
|
|
|
|
|
|
|
// Order an arbitrary quadrilateral as
|
|
|
|
|
// [top-left, top-right, bottom-right, bottom-left] (in that order)
|
|
|
|
|
// using the x+y / y-x extreme trick so perspective transforms land
|
|
|
|
|
// right-side-up regardless of input winding.
|
|
|
|
|
[[nodiscard]] static std::vector<cv::Point2f>
|
|
|
|
|
OrderQuadCorners(const std::vector<cv::Point>& pts);
|
|
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------
|
|
|
|
|
// Japan-only: targeted kana recovery
|
|
|
|
|
//
|
|
|
|
|
// The PaddleOCR v5 recognizer's CTC decoder silently drops a
|
|
|
|
|
// character when it sits next to a large blank region in the
|
|
|
|
|
// input image — which is exactly the layout of the bottom row
|
|
|
|
|
// of a Japanese plate (single small hiragana on the left, big
|
|
|
|
|
// gap, then 4 digits on the right). We detect this failure
|
|
|
|
|
// mode by counting UTF-8 codepoint classes in the fast-path
|
|
|
|
|
// output, and if hiragana/katakana is missing we re-run the
|
|
|
|
|
// recognizer on a tight crop of the kana region only. The
|
|
|
|
|
// recognizer handles that tight crop correctly because the
|
|
|
|
|
// input matches what it was trained on (a dense text-line
|
|
|
|
|
// image with no large blank stretches).
|
|
|
|
|
// ----------------------------------------------------------------
|
|
|
|
|
struct CodepointClassCounts {
|
|
|
|
|
int digit = 0;
|
|
|
|
|
int kanji = 0;
|
|
|
|
|
int hiragana = 0;
|
|
|
|
|
int katakana = 0;
|
|
|
|
|
};
|
|
|
|
|
[[nodiscard]] static CodepointClassCounts CountCodepointClasses(const std::string& text);
|
|
|
|
|
[[nodiscard]] static bool IsJapaneseIncomplete(const std::string& text);
|
|
|
|
|
|
|
|
|
|
// Strip non-text artifacts (screws, rivets, dirt, stickers) that
|
|
|
|
|
// the OCR recognizer occasionally picks up from plate surface
|
|
|
|
|
// features. These glyphs (degree sign, ring above, circles,
|
|
|
|
|
// ideographic punctuation, etc.) are never legitimate plate
|
|
|
|
|
// characters in any supported country, so we can drop them
|
|
|
|
|
// unconditionally. Runs of spaces resulting from stripped
|
|
|
|
|
// characters are collapsed and leading/trailing spaces trimmed.
|
|
|
|
|
[[nodiscard]] static std::string StripPlateArtifacts(const std::string& text);
|
|
|
|
|
|
|
|
|
|
// Run recognizer-only on a tight crop of the left portion of the
|
|
|
|
|
// bottom half, trying three vertical offsets to absorb row-split
|
|
|
|
|
// inaccuracies. Returns the first non-empty result that contains
|
|
|
|
|
// a hiragana or katakana codepoint, or empty string on failure.
|
|
|
|
|
[[nodiscard]] std::string RecoverKanaFromBottomHalf(
|
|
|
|
|
const cv::Mat& plateROI, int halfH) const;
|
|
|
|
|
|
2026-04-12 17:16:16 +10:00
|
|
|
public:
|
|
|
|
|
ANSALPR_OCR();
|
|
|
|
|
~ANSALPR_OCR();
|
|
|
|
|
[[nodiscard]] bool Initialize(const std::string& licenseKey, const std::string& modelZipFilePath, const std::string& modelZipPassword, double detectorThreshold, double ocrThreshold, double colourThreshold) override;
|
|
|
|
|
[[nodiscard]] bool LoadEngine() override;
|
|
|
|
|
[[nodiscard]] bool Inference(const cv::Mat& input, std::string& lprResult) override;
|
|
|
|
|
[[nodiscard]] bool Inference(const cv::Mat& input, std::string& lprResult, const std::string& cameraId) override;
|
|
|
|
|
[[nodiscard]] bool Inference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, std::string& lprResult) override;
|
|
|
|
|
[[nodiscard]] bool Inference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, std::string& lprResult, const std::string& cameraId) override;
|
|
|
|
|
[[nodiscard]] std::vector<Object> RunInference(const cv::Mat& input, const std::string& cameraId) override;
|
2026-04-15 09:23:05 +10:00
|
|
|
[[nodiscard]] std::vector<Object> RunInferencesBatch(
|
|
|
|
|
const cv::Mat& input,
|
|
|
|
|
const std::vector<cv::Rect>& vehicleBoxes,
|
|
|
|
|
const std::string& cameraId) override;
|
2026-04-12 17:16:16 +10:00
|
|
|
[[nodiscard]] bool Destroy() override;
|
|
|
|
|
|
|
|
|
|
/// Propagate country to inner OCR engine so ALPR post-processing
|
|
|
|
|
/// uses the correct plate formats and character corrections.
|
|
|
|
|
void SetCountry(Country country) override;
|
|
|
|
|
|
|
|
|
|
/// Propagate debug flag to all sub-detectors
|
|
|
|
|
void ActivateDebugger(bool debugFlag) override {
|
|
|
|
|
_debugFlag = debugFlag;
|
|
|
|
|
if (_lpDetector) _lpDetector->ActivateDebugger(debugFlag);
|
|
|
|
|
if (_lpColourDetector) _lpColourDetector->ActivateDebugger(debugFlag);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
#endif
|