#ifndef ANSLPROCR_H #define ANSLPROCR_H #pragma once #include "ANSLPR.h" #include #include #include #include #include #include #include // Forward-declare ANSONNXOCR to avoid pulling in the full ANSOCR header chain namespace ANSCENTER { class ANSONNXOCR; struct OCRModelConfig; } namespace ANSCENTER { /// ANSALPR_OCR — License plate recognition using YOLO for LP detection /// and ANSONNXOCR (PaddleOCR v5) for text recognition. /// /// Pipeline: /// 1. Detect license plates using _lpDetector (ANSRTYOLO on NVIDIA GPU, ANSONNXYOLO otherwise) /// 2. For each detected plate, run OCR using _ocrEngine (ANSONNXOCR) /// 3. Optionally classify plate colour using _lpColourDetector (ANSRTYOLO on NVIDIA GPU, ANSONNXYOLO otherwise) /// /// Supports multiple countries via the Country enum and ALPR post-processing /// from ANSOCR's ANSOCRBase infrastructure. class ANSLPR_API ANSALPR_OCR : public ANSALPR { private: ANSCENTER::EngineType engineType; // --- Detectors --- std::unique_ptr _lpDetector = nullptr; // License plate detector std::unique_ptr _lpColourDetector = nullptr; // License plate colour classifier std::unique_ptr _ocrEngine = nullptr; // OCR text recognizer // --- Model configs --- ANSCENTER::ModelConfig _lpdmodelConfig; ANSCENTER::ModelConfig _lpColourModelConfig; std::string _lpdLabels; std::string _lpColourLabels; cv::Mat _frameBuffer; // Reusable buffer for color conversion std::vector _lprModelClass; ALPRChecker alprChecker; // ---------------------------------------------------------------- // Full-frame vs pipeline auto-detection (ported from ANSALPR_OD) // // When the caller feeds ANSLPR_OCR pre-cropped vehicle ROIs (each // frame is a different small image), the tracker can't work — the // LP detector sees a totally new image every call so trackIds mean // nothing. In that "pipeline" mode we must skip the ALPRChecker // voting layer entirely and return raw OCR results. // // When the caller feeds full-frame video (same resolution every // frame, plates moving through the scene), the tracker works // normally and we run plate text through ALPRChecker majority // voting + spatial dedup to stabilise readings. // // Mode is auto-detected by watching whether consecutive frames // share the exact same (width, height) for at least // CONFIRM_THRESHOLD frames. Pipeline crops vary by a few pixels; // full-frame video is pixel-identical. // ---------------------------------------------------------------- struct ImageSizeTracker { cv::Size lastSize{ 0, 0 }; int consistentCount = 0; bool detectedFullFrame = false; static constexpr int CONFIRM_THRESHOLD = 5; static constexpr int MIN_FULLFRAME_WIDTH = 1000; }; std::unordered_map _imageSizeTrackers; [[nodiscard]] bool shouldUseALPRChecker(const cv::Size& imageSize, const std::string& cameraId); // ---------------------------------------------------------------- // Spatial plate identity persistence (ported from ANSALPR_OD) // // Prevents the same plate string from appearing on two different // vehicles in the same frame. The LP tracker may briefly assign // the same trackId to two different plates when vehicles pass // each other, or two different trackIds to the same plate when // occlusion breaks a track. In either case, OCR can produce the // same text for two spatial locations for a frame or two — which // looks like "plate flicker" in the UI. // // ensureUniquePlateText() resolves the ambiguity by accumulating // confidence per spatial location. When two detections share a // plate text, the one whose spatial history has the higher score // wins and the other has its className cleared. // ---------------------------------------------------------------- struct SpatialPlateIdentity { cv::Point2f center; // plate center in frame coords std::string plateText; float accumulatedScore = 0.0f; int framesSinceLastSeen = 0; }; std::mutex _plateIdentitiesMutex; std::unordered_map> _plateIdentities; static constexpr float PLATE_SPATIAL_MATCH_THRESHOLD = 0.3f; // IoU threshold void ensureUniquePlateText(std::vector& results, const std::string& cameraId); // --- Original model zip path (reused for ANSONNXOCR initialization) --- std::string _modelZipFilePath; // --- Colour detection helpers --- [[nodiscard]] std::string DetectLPColourDetector(const cv::Mat& lprROI, const std::string& cameraId); [[nodiscard]] std::string DetectLPColourCached(const cv::Mat& lprROI, const std::string& cameraId, const std::string& plateText); // LPC colour cache struct ColourCacheEntry { std::string colour; int hitCount = 0; }; std::mutex _colourCacheMutex; std::unordered_map _colourCache; static constexpr size_t COLOUR_CACHE_MAX_SIZE = 200; // --- OCR helper --- [[nodiscard]] std::string RunOCROnPlate(const cv::Mat& plateROI, const std::string& cameraId); // ---------------------------------------------------------------- // Plate preprocessing: classical perspective rectification // // Takes an LP YOLO bounding box and tries to find the plate's // actual 4 corners via Canny + findContours + approxPolyDP. When // that succeeds, the plate is warped to a rectangle whose height // is fixed (kRectifiedHeight) and whose width preserves the // detected plate's aspect ratio. This produces a tight, // perspective-corrected crop that the recognizer handles more // reliably than the tilted / skewed axis-aligned bbox. // // Falls back to minAreaRect on the largest contour if no 4-point // polygon is found, and returns false outright if nothing // plausible can be isolated. Callers must handle the false case // by using the (padded) axis-aligned crop instead. // ---------------------------------------------------------------- static constexpr int kRectifiedHeight = 220; static constexpr float kMinPlateAspect = 1.3f; static constexpr float kMaxPlateAspect = 6.0f; static constexpr float kRectifyAreaFraction = 0.30f; [[nodiscard]] bool RectifyPlateROI( const cv::Mat& source, const cv::Rect& bbox, cv::Mat& outRectified) const; // Order an arbitrary quadrilateral as // [top-left, top-right, bottom-right, bottom-left] (in that order) // using the x+y / y-x extreme trick so perspective transforms land // right-side-up regardless of input winding. [[nodiscard]] static std::vector OrderQuadCorners(const std::vector& pts); // ---------------------------------------------------------------- // Japan-only: targeted kana recovery // // The PaddleOCR v5 recognizer's CTC decoder silently drops a // character when it sits next to a large blank region in the // input image — which is exactly the layout of the bottom row // of a Japanese plate (single small hiragana on the left, big // gap, then 4 digits on the right). We detect this failure // mode by counting UTF-8 codepoint classes in the fast-path // output, and if hiragana/katakana is missing we re-run the // recognizer on a tight crop of the kana region only. The // recognizer handles that tight crop correctly because the // input matches what it was trained on (a dense text-line // image with no large blank stretches). // ---------------------------------------------------------------- struct CodepointClassCounts { int digit = 0; int kanji = 0; int hiragana = 0; int katakana = 0; }; [[nodiscard]] static CodepointClassCounts CountCodepointClasses(const std::string& text); [[nodiscard]] static bool IsJapaneseIncomplete(const std::string& text); // Strip non-text artifacts (screws, rivets, dirt, stickers) that // the OCR recognizer occasionally picks up from plate surface // features. These glyphs (degree sign, ring above, circles, // ideographic punctuation, etc.) are never legitimate plate // characters in any supported country, so we can drop them // unconditionally. Runs of spaces resulting from stripped // characters are collapsed and leading/trailing spaces trimmed. [[nodiscard]] static std::string StripPlateArtifacts(const std::string& text); // Run recognizer-only on a tight crop of the left portion of the // bottom half, trying three vertical offsets to absorb row-split // inaccuracies. Returns the first non-empty result that contains // a hiragana or katakana codepoint, or empty string on failure. [[nodiscard]] std::string RecoverKanaFromBottomHalf( const cv::Mat& plateROI, int halfH) const; public: ANSALPR_OCR(); ~ANSALPR_OCR(); [[nodiscard]] bool Initialize(const std::string& licenseKey, const std::string& modelZipFilePath, const std::string& modelZipPassword, double detectorThreshold, double ocrThreshold, double colourThreshold) override; [[nodiscard]] bool LoadEngine() override; [[nodiscard]] bool Inference(const cv::Mat& input, std::string& lprResult) override; [[nodiscard]] bool Inference(const cv::Mat& input, std::string& lprResult, const std::string& cameraId) override; [[nodiscard]] bool Inference(const cv::Mat& input, const std::vector& Bbox, std::string& lprResult) override; [[nodiscard]] bool Inference(const cv::Mat& input, const std::vector& Bbox, std::string& lprResult, const std::string& cameraId) override; [[nodiscard]] std::vector RunInference(const cv::Mat& input, const std::string& cameraId) override; [[nodiscard]] std::vector RunInferencesBatch( const cv::Mat& input, const std::vector& vehicleBoxes, const std::string& cameraId) override; [[nodiscard]] bool Destroy() override; /// Propagate country to inner OCR engine so ALPR post-processing /// uses the correct plate formats and character corrections. void SetCountry(Country country) override; /// Propagate debug flag to all sub-detectors void ActivateDebugger(bool debugFlag) override { _debugFlag = debugFlag; if (_lpDetector) _lpDetector->ActivateDebugger(debugFlag); if (_lpColourDetector) _lpColourDetector->ActivateDebugger(debugFlag); } }; } #endif