63 lines
2.2 KiB
C++
63 lines
2.2 KiB
C++
#pragma once
|
|
|
|
#include "ONNXOCRTypes.h"
|
|
#include "ONNXEngine.h"
|
|
#include <vector>
|
|
#include <string>
|
|
#include <mutex>
|
|
|
|
namespace ANSCENTER {
|
|
namespace onnxocr {
|
|
|
|
class ONNXOCRRecognizer : public BasicOrtHandler {
|
|
public:
|
|
explicit ONNXOCRRecognizer(const std::string& onnx_path, unsigned int num_threads = 1);
|
|
explicit ONNXOCRRecognizer(const std::string& onnx_path,
|
|
const OrtHandlerOptions& options,
|
|
unsigned int num_threads = 1);
|
|
~ONNXOCRRecognizer() override = default;
|
|
|
|
// Load character dictionary (must be called before Recognize)
|
|
bool LoadDictionary(const std::string& dictPath);
|
|
|
|
// Recognize text from a single cropped text image
|
|
TextLine Recognize(const cv::Mat& croppedImage);
|
|
|
|
// Batch recognition for multiple cropped images.
|
|
// Crops are grouped into a small set of fixed width buckets and
|
|
// submitted to ORT as [N,3,imgH_,bucketW] tensors so cuDNN sees
|
|
// shape-stable inputs and can reuse algorithms across calls.
|
|
std::vector<TextLine> RecognizeBatch(const std::vector<cv::Mat>& croppedImages);
|
|
|
|
// Pre-warm cuDNN/TRT for every bucket width by running dummy
|
|
// inferences. Idempotent — no-op if already warmed up.
|
|
void Warmup();
|
|
|
|
private:
|
|
Ort::Value transform(const cv::Mat& mat) override;
|
|
Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
|
|
|
|
// Round resizedW up to the next bucket width (capped at imgMaxW_).
|
|
// Used by both Recognize() and RecognizeBatch() so cuDNN only ever
|
|
// sees a small finite set of input shapes.
|
|
int RoundUpToBucket(int resizedW) const;
|
|
|
|
// Run a single [N,3,imgH_,bucketW] inference and CTC-decode each row.
|
|
void RunBatchAtWidth(const std::vector<cv::Mat>& crops,
|
|
const std::vector<size_t>& origIndices,
|
|
int bucketW,
|
|
std::vector<TextLine>& out);
|
|
|
|
// CTC greedy decode
|
|
TextLine CTCDecode(const float* outputData, int seqLen, int numClasses);
|
|
|
|
std::vector<std::string> keys_;
|
|
int imgH_ = kRecImgH;
|
|
int imgMaxW_ = kRecImgMaxW;
|
|
std::mutex _mutex;
|
|
bool _warmedUp = false;
|
|
};
|
|
|
|
} // namespace onnxocr
|
|
} // namespace ANSCENTER
|