Improve ALPR_OCR peformance

This commit is contained in:
2026-04-14 20:30:21 +10:00
parent 3349b45ade
commit f9a0af8949
18 changed files with 991 additions and 77 deletions

View File

@@ -12,6 +12,9 @@ namespace onnxocr {
class ONNXOCRRecognizer : public BasicOrtHandler {
public:
explicit ONNXOCRRecognizer(const std::string& onnx_path, unsigned int num_threads = 1);
explicit ONNXOCRRecognizer(const std::string& onnx_path,
const OrtHandlerOptions& options,
unsigned int num_threads = 1);
~ONNXOCRRecognizer() override = default;
// Load character dictionary (must be called before Recognize)
@@ -20,13 +23,31 @@ public:
// Recognize text from a single cropped text image
TextLine Recognize(const cv::Mat& croppedImage);
// Batch recognition for multiple cropped images
// Batch recognition for multiple cropped images.
// Crops are grouped into a small set of fixed width buckets and
// submitted to ORT as [N,3,imgH_,bucketW] tensors so cuDNN sees
// shape-stable inputs and can reuse algorithms across calls.
std::vector<TextLine> RecognizeBatch(const std::vector<cv::Mat>& croppedImages);
// Pre-warm cuDNN/TRT for every bucket width by running dummy
// inferences. Idempotent — no-op if already warmed up.
void Warmup();
private:
Ort::Value transform(const cv::Mat& mat) override;
Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
// Round resizedW up to the next bucket width (capped at imgMaxW_).
// Used by both Recognize() and RecognizeBatch() so cuDNN only ever
// sees a small finite set of input shapes.
int RoundUpToBucket(int resizedW) const;
// Run a single [N,3,imgH_,bucketW] inference and CTC-decode each row.
void RunBatchAtWidth(const std::vector<cv::Mat>& crops,
const std::vector<size_t>& origIndices,
int bucketW,
std::vector<TextLine>& out);
// CTC greedy decode
TextLine CTCDecode(const float* outputData, int seqLen, int numClasses);
@@ -34,6 +55,7 @@ private:
int imgH_ = kRecImgH;
int imgMaxW_ = kRecImgMaxW;
std::mutex _mutex;
bool _warmedUp = false;
};
} // namespace onnxocr