Improve ALPR_OCR peformance

2026-04-14 20:30:21 +10:00
parent 3349b45ade
commit f9a0af8949
18 changed files with 991 additions and 77 deletions
--- a/modules/ANSOCR/ANSONNXOCR/ONNXOCRRecognizer.h
+++ b/modules/ANSOCR/ANSONNXOCR/ONNXOCRRecognizer.h
@@ -12,6 +12,9 @@ namespace onnxocr {
 class ONNXOCRRecognizer : public BasicOrtHandler {
 public:
    explicit ONNXOCRRecognizer(const std::string& onnx_path, unsigned int num_threads = 1);
+    explicit ONNXOCRRecognizer(const std::string& onnx_path,
+                               const OrtHandlerOptions& options,
+                               unsigned int num_threads = 1);
    ~ONNXOCRRecognizer() override = default;

    // Load character dictionary (must be called before Recognize)
@@ -20,13 +23,31 @@ public:
    // Recognize text from a single cropped text image
    TextLine Recognize(const cv::Mat& croppedImage);

-    // Batch recognition for multiple cropped images
+    // Batch recognition for multiple cropped images.
+    // Crops are grouped into a small set of fixed width buckets and
+    // submitted to ORT as [N,3,imgH_,bucketW] tensors so cuDNN sees
+    // shape-stable inputs and can reuse algorithms across calls.
    std::vector<TextLine> RecognizeBatch(const std::vector<cv::Mat>& croppedImages);

+    // Pre-warm cuDNN/TRT for every bucket width by running dummy
+    // inferences. Idempotent — no-op if already warmed up.
+    void Warmup();
+
 private:
    Ort::Value transform(const cv::Mat& mat) override;
    Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;

+    // Round resizedW up to the next bucket width (capped at imgMaxW_).
+    // Used by both Recognize() and RecognizeBatch() so cuDNN only ever
+    // sees a small finite set of input shapes.
+    int RoundUpToBucket(int resizedW) const;
+
+    // Run a single [N,3,imgH_,bucketW] inference and CTC-decode each row.
+    void RunBatchAtWidth(const std::vector<cv::Mat>& crops,
+                         const std::vector<size_t>& origIndices,
+                         int bucketW,
+                         std::vector<TextLine>& out);
+
    // CTC greedy decode
    TextLine CTCDecode(const float* outputData, int seqLen, int numClasses);

@@ -34,6 +55,7 @@ private:
    int imgH_    = kRecImgH;
    int imgMaxW_ = kRecImgMaxW;
    std::mutex _mutex;
+    bool _warmedUp = false;
 };

 } // namespace onnxocr