Improve ALPR_OCR peformance
This commit is contained in:
@@ -363,10 +363,14 @@ namespace ANSCENTER
|
||||
ocrModelConfig.ocrLanguage = ocrLang;
|
||||
ocrModelConfig.useDetector = true;
|
||||
ocrModelConfig.useRecognizer = true;
|
||||
ocrModelConfig.useCLS = true;
|
||||
// Skip the angle classifier for ALPR. License-plate boxes
|
||||
// from the YOLO detector are already axis-aligned, so the
|
||||
// 180° classifier is dead weight (one extra ORT call per
|
||||
// plate for no recall gain).
|
||||
ocrModelConfig.useCLS = false;
|
||||
ocrModelConfig.useLayout = false;
|
||||
ocrModelConfig.useTable = false;
|
||||
ocrModelConfig.useTensorRT = false;
|
||||
ocrModelConfig.useTensorRT = true;
|
||||
ocrModelConfig.enableMKLDNN = false;
|
||||
ocrModelConfig.useDilation = true;
|
||||
ocrModelConfig.useAngleCLS = false;
|
||||
@@ -375,7 +379,7 @@ namespace ANSCENTER
|
||||
ocrModelConfig.detectionBoxThreshold = 0.3;
|
||||
ocrModelConfig.detectionDBUnclipRatio = 1.2;
|
||||
ocrModelConfig.clsThreshold = 0.9;
|
||||
ocrModelConfig.limitSideLen = 2560;
|
||||
ocrModelConfig.limitSideLen = 480;
|
||||
|
||||
// Pass the original ALPR model zip path — ANSOCRBase::Initialize
|
||||
// will extract it to the same folder (already done, so extraction
|
||||
@@ -638,41 +642,104 @@ namespace ANSCENTER
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<Object> output;
|
||||
output.reserve(lprOutput.size());
|
||||
// Step 2: Collect crops from every valid plate. Wide plates
|
||||
// (aspect >= 2.0) are treated as a single text line; narrow
|
||||
// plates (2-row layouts like Japanese) are split horizontally
|
||||
// at H/2 into top and bottom rows. All crops go through a
|
||||
// single batched recognizer call, bypassing the OCR text-line
|
||||
// detector entirely — for ALPR the LP YOLO box already bounds
|
||||
// the text region precisely.
|
||||
struct PlateInfo {
|
||||
size_t origIndex; // into lprOutput
|
||||
std::vector<size_t> cropIndices; // into allCrops
|
||||
cv::Mat plateROI; // full (unsplit) ROI, kept for colour
|
||||
};
|
||||
std::vector<cv::Mat> allCrops;
|
||||
std::vector<PlateInfo> plateInfos;
|
||||
allCrops.reserve(lprOutput.size() * 2);
|
||||
plateInfos.reserve(lprOutput.size());
|
||||
|
||||
for (auto& lprObject : lprOutput) {
|
||||
const cv::Rect& box = lprObject.box;
|
||||
for (size_t i = 0; i < lprOutput.size(); ++i) {
|
||||
const cv::Rect& box = lprOutput[i].box;
|
||||
|
||||
// Calculate safe cropped region
|
||||
const int x1 = std::max(0, box.x);
|
||||
const int y1 = std::max(0, box.y);
|
||||
const int width = std::min(frameWidth - x1, box.width);
|
||||
const int width = std::min(frameWidth - x1, box.width);
|
||||
const int height = std::min(frameHeight - y1, box.height);
|
||||
|
||||
if (width <= 0 || height <= 0) continue;
|
||||
|
||||
cv::Rect lprPos(x1, y1, width, height);
|
||||
cv::Mat plateROI = frame(lprPos);
|
||||
cv::Mat plateROI = frame(cv::Rect(x1, y1, width, height));
|
||||
|
||||
// Step 2: Run OCR on the detected plate
|
||||
std::string ocrText = RunOCROnPlate(plateROI, cameraId);
|
||||
PlateInfo info;
|
||||
info.origIndex = i;
|
||||
info.plateROI = plateROI;
|
||||
|
||||
if (ocrText.empty()) continue;
|
||||
const float aspect = static_cast<float>(width) /
|
||||
std::max(1, height);
|
||||
|
||||
// 2-row heuristic: aspect < 2.0 → split top/bottom.
|
||||
// Threshold tuned to catch Japanese square plates
|
||||
// (~1.5–1.9) while leaving wide EU/VN plates (3.0+)
|
||||
// untouched.
|
||||
if (aspect < 2.0f && height >= 24) {
|
||||
const int halfH = height / 2;
|
||||
info.cropIndices.push_back(allCrops.size());
|
||||
allCrops.push_back(plateROI(cv::Rect(0, 0, width, halfH)));
|
||||
info.cropIndices.push_back(allCrops.size());
|
||||
allCrops.push_back(plateROI(cv::Rect(0, halfH, width, height - halfH)));
|
||||
}
|
||||
else {
|
||||
info.cropIndices.push_back(allCrops.size());
|
||||
allCrops.push_back(plateROI);
|
||||
}
|
||||
|
||||
plateInfos.push_back(std::move(info));
|
||||
}
|
||||
|
||||
if (allCrops.empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
// Step 3: Single batched recognizer call for every crop.
|
||||
// ONNXOCRRecognizer groups crops by bucket width and issues
|
||||
// one ORT Run per bucket — typically 1–2 GPU calls for an
|
||||
// entire frame regardless of plate count.
|
||||
auto ocrResults = _ocrEngine->RecognizeTextBatch(allCrops);
|
||||
|
||||
// Step 4: Assemble per-plate output
|
||||
std::vector<Object> output;
|
||||
output.reserve(plateInfos.size());
|
||||
|
||||
for (const auto& info : plateInfos) {
|
||||
std::string combinedText;
|
||||
for (size_t cropIdx : info.cropIndices) {
|
||||
if (cropIdx >= ocrResults.size()) continue;
|
||||
const std::string& lineText = ocrResults[cropIdx].first;
|
||||
if (lineText.empty()) continue;
|
||||
if (!combinedText.empty()) combinedText += " ";
|
||||
combinedText += lineText;
|
||||
}
|
||||
if (combinedText.empty()) continue;
|
||||
|
||||
Object lprObject = lprOutput[info.origIndex];
|
||||
lprObject.cameraId = cameraId;
|
||||
|
||||
// Use ALPRChecker for text stabilization if enabled
|
||||
// Cross-frame stabilization (unchanged)
|
||||
if (_enableALPRChecker) {
|
||||
lprObject.className = alprChecker.checkPlateByTrackId(cameraId, ocrText, lprObject.trackId);
|
||||
} else {
|
||||
lprObject.className = ocrText;
|
||||
lprObject.className = alprChecker.checkPlateByTrackId(
|
||||
cameraId, combinedText, lprObject.trackId);
|
||||
}
|
||||
else {
|
||||
lprObject.className = combinedText;
|
||||
}
|
||||
|
||||
if (lprObject.className.empty()) continue;
|
||||
|
||||
// Step 3: Colour detection (optional)
|
||||
std::string colour = DetectLPColourCached(plateROI, cameraId, lprObject.className);
|
||||
// Optional colour detection on the full plate ROI
|
||||
std::string colour = DetectLPColourCached(
|
||||
info.plateROI, cameraId, lprObject.className);
|
||||
if (!colour.empty()) {
|
||||
lprObject.extraInfo = "color:" + colour;
|
||||
}
|
||||
|
||||
@@ -159,6 +159,18 @@ namespace ANSCENTER {
|
||||
// Returns recognized text and confidence. Default returns empty.
|
||||
virtual std::pair<std::string, float> RecognizeText(const cv::Mat& croppedImage) { return {"", 0.0f}; }
|
||||
|
||||
// Batch recognition — skips the text-line detector entirely and
|
||||
// runs the whole batch through a single ORT call when possible.
|
||||
// Default implementation falls back to per-image RecognizeText
|
||||
// so existing subclasses keep working without changes.
|
||||
virtual std::vector<std::pair<std::string, float>> RecognizeTextBatch(
|
||||
const std::vector<cv::Mat>& croppedImages) {
|
||||
std::vector<std::pair<std::string, float>> out;
|
||||
out.reserve(croppedImages.size());
|
||||
for (const auto& m : croppedImages) out.push_back(RecognizeText(m));
|
||||
return out;
|
||||
}
|
||||
|
||||
// ALPR configuration methods
|
||||
void SetOCRMode(OCRMode mode);
|
||||
OCRMode GetOCRMode() const;
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <chrono>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace onnxocr {
|
||||
@@ -12,6 +13,12 @@ ONNXOCRClassifier::ONNXOCRClassifier(const std::string& onnx_path, unsigned int
|
||||
: BasicOrtHandler(onnx_path, num_threads) {
|
||||
}
|
||||
|
||||
ONNXOCRClassifier::ONNXOCRClassifier(const std::string& onnx_path,
|
||||
const OrtHandlerOptions& options,
|
||||
unsigned int num_threads)
|
||||
: BasicOrtHandler(onnx_path, options, num_threads) {
|
||||
}
|
||||
|
||||
Ort::Value ONNXOCRClassifier::transform(const cv::Mat& mat) {
|
||||
cv::Mat resized;
|
||||
// Direct resize to 80x160 (PP-LCNet_x1_0_textline_ori)
|
||||
@@ -103,5 +110,38 @@ void ONNXOCRClassifier::Classify(std::vector<cv::Mat>& img_list,
|
||||
}
|
||||
}
|
||||
|
||||
void ONNXOCRClassifier::Warmup() {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
if (_warmedUp || !ort_session) return;
|
||||
|
||||
try {
|
||||
cv::Mat dummy(kClsImageH * 2, kClsImageW * 2, CV_8UC3, cv::Scalar(128, 128, 128));
|
||||
cv::Mat resized;
|
||||
cv::resize(dummy, resized, cv::Size(kClsImageW, kClsImageH));
|
||||
resized.convertTo(resized, CV_32FC3);
|
||||
auto inputData = NormalizeAndPermute(resized);
|
||||
|
||||
std::array<int64_t, 4> inputShape = { 1, 3, kClsImageH, kClsImageW };
|
||||
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
|
||||
*memory_info_handler, inputData.data(), inputData.size(),
|
||||
inputShape.data(), inputShape.size());
|
||||
|
||||
auto t0 = std::chrono::high_resolution_clock::now();
|
||||
(void)ort_session->Run(
|
||||
Ort::RunOptions{ nullptr },
|
||||
input_node_names.data(), &inputTensor, 1,
|
||||
output_node_names.data(), num_outputs);
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
double ms = std::chrono::duration<double, std::milli>(t1 - t0).count();
|
||||
std::cout << "[ONNXOCRClassifier] Warmup [1,3,"
|
||||
<< kClsImageH << "," << kClsImageW << "] "
|
||||
<< ms << " ms" << std::endl;
|
||||
}
|
||||
catch (const Ort::Exception& e) {
|
||||
std::cerr << "[ONNXOCRClassifier] Warmup failed: " << e.what() << std::endl;
|
||||
}
|
||||
_warmedUp = true;
|
||||
}
|
||||
|
||||
} // namespace onnxocr
|
||||
} // namespace ANSCENTER
|
||||
|
||||
@@ -11,6 +11,9 @@ namespace onnxocr {
|
||||
class ONNXOCRClassifier : public BasicOrtHandler {
|
||||
public:
|
||||
explicit ONNXOCRClassifier(const std::string& onnx_path, unsigned int num_threads = 1);
|
||||
explicit ONNXOCRClassifier(const std::string& onnx_path,
|
||||
const OrtHandlerOptions& options,
|
||||
unsigned int num_threads = 1);
|
||||
~ONNXOCRClassifier() override = default;
|
||||
|
||||
// Classify text orientation for a list of cropped images
|
||||
@@ -21,7 +24,12 @@ public:
|
||||
std::vector<float>& cls_scores,
|
||||
float cls_thresh = kClsThresh);
|
||||
|
||||
// Pre-warm cuDNN/TRT for the classifier's fixed [1,3,80,160] shape.
|
||||
// Idempotent — no-op after the first call.
|
||||
void Warmup();
|
||||
|
||||
private:
|
||||
bool _warmedUp = false;
|
||||
Ort::Value transform(const cv::Mat& mat) override;
|
||||
Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <chrono>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace onnxocr {
|
||||
@@ -15,6 +16,12 @@ ONNXOCRDetector::ONNXOCRDetector(const std::string& onnx_path, unsigned int num_
|
||||
: BasicOrtHandler(onnx_path, num_threads) {
|
||||
}
|
||||
|
||||
ONNXOCRDetector::ONNXOCRDetector(const std::string& onnx_path,
|
||||
const OrtHandlerOptions& options,
|
||||
unsigned int num_threads)
|
||||
: BasicOrtHandler(onnx_path, options, num_threads) {
|
||||
}
|
||||
|
||||
Ort::Value ONNXOCRDetector::transform(const cv::Mat& mat) {
|
||||
// Not used directly - detection uses custom Preprocess + manual tensor creation
|
||||
// Provided to satisfy BasicOrtHandler pure virtual
|
||||
@@ -308,5 +315,41 @@ std::vector<cv::Point2f> ONNXOCRDetector::UnclipPolygon(const std::array<cv::Poi
|
||||
return result;
|
||||
}
|
||||
|
||||
void ONNXOCRDetector::Warmup() {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
if (_warmedUp || !ort_session) return;
|
||||
|
||||
// 320x320 covers the typical license-plate ROI after LPD crop +
|
||||
// multiple-of-32 rounding. cuDNN caches the algorithm for this
|
||||
// shape so the first real inference doesn't pay the picker cost.
|
||||
constexpr int kWarmupSide = 320;
|
||||
try {
|
||||
cv::Mat dummy(kWarmupSide, kWarmupSide, CV_8UC3, cv::Scalar(128, 128, 128));
|
||||
cv::Mat dummyF;
|
||||
dummy.convertTo(dummyF, CV_32FC3);
|
||||
auto inputData = NormalizeAndPermute(dummyF);
|
||||
|
||||
std::array<int64_t, 4> inputShape = { 1, 3, kWarmupSide, kWarmupSide };
|
||||
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
|
||||
*memory_info_handler, inputData.data(), inputData.size(),
|
||||
inputShape.data(), inputShape.size());
|
||||
|
||||
auto t0 = std::chrono::high_resolution_clock::now();
|
||||
(void)ort_session->Run(
|
||||
Ort::RunOptions{ nullptr },
|
||||
input_node_names.data(), &inputTensor, 1,
|
||||
output_node_names.data(), num_outputs);
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
double ms = std::chrono::duration<double, std::milli>(t1 - t0).count();
|
||||
std::cout << "[ONNXOCRDetector] Warmup [1,3,"
|
||||
<< kWarmupSide << "," << kWarmupSide << "] "
|
||||
<< ms << " ms" << std::endl;
|
||||
}
|
||||
catch (const Ort::Exception& e) {
|
||||
std::cerr << "[ONNXOCRDetector] Warmup failed: " << e.what() << std::endl;
|
||||
}
|
||||
_warmedUp = true;
|
||||
}
|
||||
|
||||
} // namespace onnxocr
|
||||
} // namespace ANSCENTER
|
||||
|
||||
@@ -11,6 +11,9 @@ namespace onnxocr {
|
||||
class ONNXOCRDetector : public BasicOrtHandler {
|
||||
public:
|
||||
explicit ONNXOCRDetector(const std::string& onnx_path, unsigned int num_threads = 1);
|
||||
explicit ONNXOCRDetector(const std::string& onnx_path,
|
||||
const OrtHandlerOptions& options,
|
||||
unsigned int num_threads = 1);
|
||||
~ONNXOCRDetector() override = default;
|
||||
|
||||
// Run text detection on an image
|
||||
@@ -21,7 +24,12 @@ public:
|
||||
float unclipRatio = kDetUnclipRatio,
|
||||
bool useDilation = false);
|
||||
|
||||
// Pre-warm cuDNN/TRT at a canonical 320x320 input so the first real
|
||||
// call doesn't pay the algorithm-selection tax. Idempotent.
|
||||
void Warmup();
|
||||
|
||||
private:
|
||||
bool _warmedUp = false;
|
||||
Ort::Value transform(const cv::Mat& mat) override;
|
||||
Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <cmath>
|
||||
#include <cfloat>
|
||||
#include <cstring>
|
||||
#include <chrono>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace onnxocr {
|
||||
@@ -15,6 +16,12 @@ ONNXOCRRecognizer::ONNXOCRRecognizer(const std::string& onnx_path, unsigned int
|
||||
: BasicOrtHandler(onnx_path, num_threads) {
|
||||
}
|
||||
|
||||
ONNXOCRRecognizer::ONNXOCRRecognizer(const std::string& onnx_path,
|
||||
const OrtHandlerOptions& options,
|
||||
unsigned int num_threads)
|
||||
: BasicOrtHandler(onnx_path, options, num_threads) {
|
||||
}
|
||||
|
||||
bool ONNXOCRRecognizer::LoadDictionary(const std::string& dictPath) {
|
||||
keys_ = LoadDict(dictPath);
|
||||
if (keys_.size() < 2) {
|
||||
@@ -46,6 +53,54 @@ Ort::Value ONNXOCRRecognizer::transformBatch(const std::vector<cv::Mat>& images)
|
||||
return Ort::Value(nullptr);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Width buckets — every recognizer input is padded up to one of these widths
|
||||
// before reaching ORT. This bounds the number of distinct shapes cuDNN ever
|
||||
// sees to four, so its HEURISTIC algorithm cache hits on every subsequent
|
||||
// call instead of re-tuning per plate. Buckets cover the realistic range:
|
||||
// 320 px → short Latin/Japanese plates (most common)
|
||||
// 480 px → wider Latin plates with two rows of text
|
||||
// 640 px → long single-row plates / multi-line stacked text
|
||||
// 960 px → safety upper bound (== kRecImgMaxW)
|
||||
// ----------------------------------------------------------------------------
|
||||
static constexpr int kRecBucketWidths[] = { 320, 480, 640, 960 };
|
||||
static constexpr int kRecNumBuckets = sizeof(kRecBucketWidths) / sizeof(kRecBucketWidths[0]);
|
||||
|
||||
int ONNXOCRRecognizer::RoundUpToBucket(int resizedW) const {
|
||||
const int capped = std::min(resizedW, imgMaxW_);
|
||||
for (int b = 0; b < kRecNumBuckets; ++b) {
|
||||
if (kRecBucketWidths[b] >= capped) return kRecBucketWidths[b];
|
||||
}
|
||||
return imgMaxW_;
|
||||
}
|
||||
|
||||
// Resize + normalize a single crop into a CHW float vector at width
|
||||
// `bucketW`, padding with zeros on the right when needed. The returned
|
||||
// vector has exactly 3*imgH_*bucketW elements.
|
||||
static std::vector<float> PreprocessCropToBucket(const cv::Mat& crop,
|
||||
int imgH, int bucketW) {
|
||||
cv::Mat resized = ResizeRecImage(crop, imgH, bucketW);
|
||||
int resizedW = resized.cols;
|
||||
resized.convertTo(resized, CV_32FC3);
|
||||
auto normalizedData = NormalizeAndPermuteCls(resized);
|
||||
|
||||
if (resizedW == bucketW) {
|
||||
return normalizedData;
|
||||
}
|
||||
|
||||
// Zero-pad on the right (CHW layout)
|
||||
std::vector<float> padded(3 * imgH * bucketW, 0.0f);
|
||||
for (int c = 0; c < 3; c++) {
|
||||
for (int y = 0; y < imgH; y++) {
|
||||
std::memcpy(
|
||||
&padded[c * imgH * bucketW + y * bucketW],
|
||||
&normalizedData[c * imgH * resizedW + y * resizedW],
|
||||
resizedW * sizeof(float));
|
||||
}
|
||||
}
|
||||
return padded;
|
||||
}
|
||||
|
||||
TextLine ONNXOCRRecognizer::Recognize(const cv::Mat& croppedImage) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
|
||||
@@ -54,52 +109,27 @@ TextLine ONNXOCRRecognizer::Recognize(const cv::Mat& croppedImage) {
|
||||
}
|
||||
|
||||
try {
|
||||
// Preprocess: resize to fixed height, proportional width
|
||||
// Step 1: aspect-preserving resize to height=imgH_, width capped
|
||||
// at imgMaxW_. Then round resized width up to the next bucket.
|
||||
cv::Mat resized = ResizeRecImage(croppedImage, imgH_, imgMaxW_);
|
||||
int resizedW = resized.cols;
|
||||
const int bucketW = RoundUpToBucket(resized.cols);
|
||||
|
||||
resized.convertTo(resized, CV_32FC3);
|
||||
// Recognition uses (pixel/255 - 0.5) / 0.5 normalization (same as classifier)
|
||||
auto normalizedData = NormalizeAndPermuteCls(resized);
|
||||
std::vector<float> inputData = PreprocessCropToBucket(croppedImage, imgH_, bucketW);
|
||||
|
||||
// Pad to at least kRecImgW width (matching official PaddleOCR behavior)
|
||||
// Official PaddleOCR: padding_im = np.zeros((C, H, W)), then copies normalized
|
||||
// image into left portion. Padding value = 0.0 in normalized space.
|
||||
int imgW = std::max(resizedW, kRecImgW);
|
||||
|
||||
std::vector<float> inputData;
|
||||
if (imgW > resizedW) {
|
||||
// Zero-pad on the right (CHW layout)
|
||||
inputData.resize(3 * imgH_ * imgW, 0.0f);
|
||||
for (int c = 0; c < 3; c++) {
|
||||
for (int y = 0; y < imgH_; y++) {
|
||||
std::memcpy(
|
||||
&inputData[c * imgH_ * imgW + y * imgW],
|
||||
&normalizedData[c * imgH_ * resizedW + y * resizedW],
|
||||
resizedW * sizeof(float));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
inputData = std::move(normalizedData);
|
||||
}
|
||||
|
||||
// Create input tensor with (possibly padded) width
|
||||
std::array<int64_t, 4> inputShape = { 1, 3, imgH_, imgW };
|
||||
std::array<int64_t, 4> inputShape = { 1, 3, imgH_, bucketW };
|
||||
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
|
||||
*memory_info_handler, inputData.data(), inputData.size(),
|
||||
inputShape.data(), inputShape.size());
|
||||
|
||||
// Run inference
|
||||
auto outputTensors = ort_session->Run(
|
||||
Ort::RunOptions{ nullptr },
|
||||
input_node_names.data(), &inputTensor, 1,
|
||||
output_node_names.data(), num_outputs);
|
||||
|
||||
// Get output
|
||||
float* outputData = outputTensors[0].GetTensorMutableData<float>();
|
||||
auto outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
|
||||
|
||||
int seqLen = static_cast<int>(outputShape[1]);
|
||||
int seqLen = static_cast<int>(outputShape[1]);
|
||||
int numClasses = static_cast<int>(outputShape[2]);
|
||||
|
||||
return CTCDecode(outputData, seqLen, numClasses);
|
||||
@@ -110,18 +140,162 @@ TextLine ONNXOCRRecognizer::Recognize(const cv::Mat& croppedImage) {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<TextLine> ONNXOCRRecognizer::RecognizeBatch(const std::vector<cv::Mat>& croppedImages) {
|
||||
std::vector<TextLine> results;
|
||||
results.reserve(croppedImages.size());
|
||||
void ONNXOCRRecognizer::RunBatchAtWidth(const std::vector<cv::Mat>& crops,
|
||||
const std::vector<size_t>& origIndices,
|
||||
int bucketW,
|
||||
std::vector<TextLine>& out) {
|
||||
if (crops.empty()) return;
|
||||
|
||||
// Process one at a time (dynamic width per image)
|
||||
for (size_t i = 0; i < croppedImages.size(); i++) {
|
||||
results.push_back(Recognize(croppedImages[i]));
|
||||
try {
|
||||
const size_t batchN = crops.size();
|
||||
const size_t perImage = static_cast<size_t>(3) * imgH_ * bucketW;
|
||||
|
||||
// Stack N preprocessed crops into one [N,3,H,W] buffer
|
||||
std::vector<float> batchInput(batchN * perImage, 0.0f);
|
||||
for (size_t i = 0; i < batchN; ++i) {
|
||||
auto img = PreprocessCropToBucket(crops[i], imgH_, bucketW);
|
||||
std::memcpy(&batchInput[i * perImage], img.data(),
|
||||
perImage * sizeof(float));
|
||||
}
|
||||
|
||||
std::array<int64_t, 4> inputShape = {
|
||||
static_cast<int64_t>(batchN), 3,
|
||||
static_cast<int64_t>(imgH_),
|
||||
static_cast<int64_t>(bucketW)
|
||||
};
|
||||
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
|
||||
*memory_info_handler, batchInput.data(), batchInput.size(),
|
||||
inputShape.data(), inputShape.size());
|
||||
|
||||
auto outputTensors = ort_session->Run(
|
||||
Ort::RunOptions{ nullptr },
|
||||
input_node_names.data(), &inputTensor, 1,
|
||||
output_node_names.data(), num_outputs);
|
||||
|
||||
float* outputData = outputTensors[0].GetTensorMutableData<float>();
|
||||
auto outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
|
||||
|
||||
// Expected output: [N, seqLen, numClasses]
|
||||
if (outputShape.size() < 3) {
|
||||
std::cerr << "[ONNXOCRRecognizer] Unexpected batch output rank: "
|
||||
<< outputShape.size() << std::endl;
|
||||
return;
|
||||
}
|
||||
const int outBatch = static_cast<int>(outputShape[0]);
|
||||
const int seqLen = static_cast<int>(outputShape[1]);
|
||||
const int numClasses = static_cast<int>(outputShape[2]);
|
||||
const size_t perRow = static_cast<size_t>(seqLen) * numClasses;
|
||||
|
||||
for (int i = 0; i < outBatch && i < static_cast<int>(batchN); ++i) {
|
||||
TextLine tl = CTCDecode(outputData + i * perRow, seqLen, numClasses);
|
||||
out[origIndices[i]] = std::move(tl);
|
||||
}
|
||||
}
|
||||
catch (const Ort::Exception& e) {
|
||||
// ORT will throw if the model doesn't support a batch dimension > 1.
|
||||
// Fall back to per-image inference for this group.
|
||||
std::cerr << "[ONNXOCRRecognizer] Batch inference failed at bucketW="
|
||||
<< bucketW << " (" << e.what()
|
||||
<< ") — falling back to single-image path." << std::endl;
|
||||
for (size_t i = 0; i < crops.size(); ++i) {
|
||||
// Direct call (we already hold _mutex via the public RecognizeBatch
|
||||
// wrapper). Replicate the single-image preprocessing here to avoid
|
||||
// re-entering Recognize() and double-locking the mutex.
|
||||
try {
|
||||
cv::Mat resized = ResizeRecImage(crops[i], imgH_, imgMaxW_);
|
||||
int singleBucket = RoundUpToBucket(resized.cols);
|
||||
auto inputData = PreprocessCropToBucket(crops[i], imgH_, singleBucket);
|
||||
std::array<int64_t, 4> inputShape = { 1, 3, imgH_, singleBucket };
|
||||
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
|
||||
*memory_info_handler, inputData.data(), inputData.size(),
|
||||
inputShape.data(), inputShape.size());
|
||||
auto outputTensors = ort_session->Run(
|
||||
Ort::RunOptions{ nullptr },
|
||||
input_node_names.data(), &inputTensor, 1,
|
||||
output_node_names.data(), num_outputs);
|
||||
float* outData = outputTensors[0].GetTensorMutableData<float>();
|
||||
auto outShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
|
||||
int seqLen = static_cast<int>(outShape[1]);
|
||||
int numClasses = static_cast<int>(outShape[2]);
|
||||
out[origIndices[i]] = CTCDecode(outData, seqLen, numClasses);
|
||||
} catch (const Ort::Exception& e2) {
|
||||
std::cerr << "[ONNXOCRRecognizer] Single-image fallback also failed: "
|
||||
<< e2.what() << std::endl;
|
||||
out[origIndices[i]] = {};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<TextLine> ONNXOCRRecognizer::RecognizeBatch(const std::vector<cv::Mat>& croppedImages) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
|
||||
std::vector<TextLine> results(croppedImages.size());
|
||||
if (!ort_session || croppedImages.empty() || keys_.empty()) {
|
||||
return results;
|
||||
}
|
||||
|
||||
// Group crops by their target bucket width
|
||||
std::vector<std::vector<cv::Mat>> groupCrops(kRecNumBuckets);
|
||||
std::vector<std::vector<size_t>> groupIdx(kRecNumBuckets);
|
||||
|
||||
for (size_t i = 0; i < croppedImages.size(); ++i) {
|
||||
if (croppedImages[i].empty()) continue;
|
||||
cv::Mat resized = ResizeRecImage(croppedImages[i], imgH_, imgMaxW_);
|
||||
const int bw = RoundUpToBucket(resized.cols);
|
||||
// Find bucket index
|
||||
int bucketIdx = kRecNumBuckets - 1;
|
||||
for (int b = 0; b < kRecNumBuckets; ++b) {
|
||||
if (kRecBucketWidths[b] == bw) { bucketIdx = b; break; }
|
||||
}
|
||||
groupCrops[bucketIdx].push_back(croppedImages[i]);
|
||||
groupIdx[bucketIdx].push_back(i);
|
||||
}
|
||||
|
||||
// Run one batched inference per non-empty bucket
|
||||
for (int b = 0; b < kRecNumBuckets; ++b) {
|
||||
if (groupCrops[b].empty()) continue;
|
||||
RunBatchAtWidth(groupCrops[b], groupIdx[b], kRecBucketWidths[b], results);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
void ONNXOCRRecognizer::Warmup() {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
if (_warmedUp || !ort_session) return;
|
||||
|
||||
// Dummy 3-channel image, mid-grey, large enough to resize to imgH_
|
||||
cv::Mat dummy(imgH_ * 2, kRecBucketWidths[kRecNumBuckets - 1] * 2,
|
||||
CV_8UC3, cv::Scalar(128, 128, 128));
|
||||
|
||||
for (int b = 0; b < kRecNumBuckets; ++b) {
|
||||
const int bucketW = kRecBucketWidths[b];
|
||||
try {
|
||||
auto inputData = PreprocessCropToBucket(dummy, imgH_, bucketW);
|
||||
std::array<int64_t, 4> inputShape = { 1, 3, imgH_, bucketW };
|
||||
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
|
||||
*memory_info_handler, inputData.data(), inputData.size(),
|
||||
inputShape.data(), inputShape.size());
|
||||
|
||||
auto t0 = std::chrono::high_resolution_clock::now();
|
||||
(void)ort_session->Run(
|
||||
Ort::RunOptions{ nullptr },
|
||||
input_node_names.data(), &inputTensor, 1,
|
||||
output_node_names.data(), num_outputs);
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
double ms = std::chrono::duration<double, std::milli>(t1 - t0).count();
|
||||
std::cout << "[ONNXOCRRecognizer] Warmup bucketW=" << bucketW
|
||||
<< " " << ms << " ms" << std::endl;
|
||||
}
|
||||
catch (const Ort::Exception& e) {
|
||||
std::cerr << "[ONNXOCRRecognizer] Warmup failed at bucketW="
|
||||
<< bucketW << ": " << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
_warmedUp = true;
|
||||
}
|
||||
|
||||
TextLine ONNXOCRRecognizer::CTCDecode(const float* outputData, int seqLen, int numClasses) {
|
||||
TextLine result;
|
||||
std::string text;
|
||||
|
||||
@@ -12,6 +12,9 @@ namespace onnxocr {
|
||||
class ONNXOCRRecognizer : public BasicOrtHandler {
|
||||
public:
|
||||
explicit ONNXOCRRecognizer(const std::string& onnx_path, unsigned int num_threads = 1);
|
||||
explicit ONNXOCRRecognizer(const std::string& onnx_path,
|
||||
const OrtHandlerOptions& options,
|
||||
unsigned int num_threads = 1);
|
||||
~ONNXOCRRecognizer() override = default;
|
||||
|
||||
// Load character dictionary (must be called before Recognize)
|
||||
@@ -20,13 +23,31 @@ public:
|
||||
// Recognize text from a single cropped text image
|
||||
TextLine Recognize(const cv::Mat& croppedImage);
|
||||
|
||||
// Batch recognition for multiple cropped images
|
||||
// Batch recognition for multiple cropped images.
|
||||
// Crops are grouped into a small set of fixed width buckets and
|
||||
// submitted to ORT as [N,3,imgH_,bucketW] tensors so cuDNN sees
|
||||
// shape-stable inputs and can reuse algorithms across calls.
|
||||
std::vector<TextLine> RecognizeBatch(const std::vector<cv::Mat>& croppedImages);
|
||||
|
||||
// Pre-warm cuDNN/TRT for every bucket width by running dummy
|
||||
// inferences. Idempotent — no-op if already warmed up.
|
||||
void Warmup();
|
||||
|
||||
private:
|
||||
Ort::Value transform(const cv::Mat& mat) override;
|
||||
Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
|
||||
|
||||
// Round resizedW up to the next bucket width (capped at imgMaxW_).
|
||||
// Used by both Recognize() and RecognizeBatch() so cuDNN only ever
|
||||
// sees a small finite set of input shapes.
|
||||
int RoundUpToBucket(int resizedW) const;
|
||||
|
||||
// Run a single [N,3,imgH_,bucketW] inference and CTC-decode each row.
|
||||
void RunBatchAtWidth(const std::vector<cv::Mat>& crops,
|
||||
const std::vector<size_t>& origIndices,
|
||||
int bucketW,
|
||||
std::vector<TextLine>& out);
|
||||
|
||||
// CTC greedy decode
|
||||
TextLine CTCDecode(const float* outputData, int seqLen, int numClasses);
|
||||
|
||||
@@ -34,6 +55,7 @@ private:
|
||||
int imgH_ = kRecImgH;
|
||||
int imgMaxW_ = kRecImgMaxW;
|
||||
std::mutex _mutex;
|
||||
bool _warmedUp = false;
|
||||
};
|
||||
|
||||
} // namespace onnxocr
|
||||
|
||||
@@ -88,11 +88,22 @@ inline std::vector<std::string> LoadDict(const std::string& dictPath) {
|
||||
return keys;
|
||||
}
|
||||
|
||||
// Compute resize dimensions for detection model (multiples of 32)
|
||||
// Compute resize dimensions for detection model.
|
||||
// limit_type='max': scale down if max side > maxSideLen (PP-OCRv5 server default)
|
||||
// maxSideLimit: safety cap on final max dimension (default 4000)
|
||||
//
|
||||
// Each dimension is rounded UP to a multiple of kDetSizeBucket (96). The
|
||||
// coarse granularity is deliberate: cuDNN HEURISTIC has to re-select
|
||||
// convolution algorithms every time it sees a new input shape, and that
|
||||
// selection costs ~100 ms per shape. With multiples of 32, a typical ALPR
|
||||
// run produces 30+ unique detector shapes; with multiples of 96 that drops
|
||||
// to 5–10, which cuDNN can cache and reuse for the rest of the video.
|
||||
// 96 is divisible by the DBNet down-stride of 32, so feature-map sizes
|
||||
// stay integer.
|
||||
inline cv::Size ComputeDetResizeShape(int srcH, int srcW, int maxSideLen,
|
||||
int maxSideLimit = kDetMaxSideLimit) {
|
||||
constexpr int kDetSizeBucket = 96;
|
||||
|
||||
float ratio = 1.0f;
|
||||
int maxSide = std::max(srcH, srcW);
|
||||
if (maxSide > maxSideLen) {
|
||||
@@ -108,8 +119,12 @@ inline cv::Size ComputeDetResizeShape(int srcH, int srcW, int maxSideLen,
|
||||
newW = static_cast<int>(newW * clampRatio);
|
||||
}
|
||||
|
||||
newH = std::max(32, static_cast<int>(std::round(newH / 32.0) * 32));
|
||||
newW = std::max(32, static_cast<int>(std::round(newW / 32.0) * 32));
|
||||
auto roundUpToBucket = [](int x) {
|
||||
return std::max(kDetSizeBucket,
|
||||
((x + kDetSizeBucket - 1) / kDetSizeBucket) * kDetSizeBucket);
|
||||
};
|
||||
newH = roundUpToBucket(newH);
|
||||
newW = roundUpToBucket(newW);
|
||||
return cv::Size(newW, newH);
|
||||
}
|
||||
|
||||
|
||||
@@ -11,13 +11,75 @@ namespace onnxocr {
|
||||
bool PaddleOCRV5Engine::Initialize(const std::string& detModelPath,
|
||||
const std::string& clsModelPath,
|
||||
const std::string& recModelPath,
|
||||
const std::string& dictPath) {
|
||||
const std::string& dictPath,
|
||||
bool preferTensorRT) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
ModelLoadingGuard mlg(_modelLoading);
|
||||
|
||||
// High-perf options. The OCR sub-models split into two groups:
|
||||
//
|
||||
// 1. Detector — its input shape varies continuously with every
|
||||
// plate-ROI aspect ratio. TRT EP is a poor fit because it
|
||||
// builds a fresh engine for each unique shape (minutes each).
|
||||
// We keep it on CUDA EP with the largest cuDNN workspace and
|
||||
// let cuDNN HEURISTIC handle the per-shape algo selection.
|
||||
//
|
||||
// 2. Classifier + Recognizer — fixed-bucket shapes (cls is
|
||||
// [1,3,80,160], rec is [1,3,48,{320,480,640,960}]). These
|
||||
// benefit massively from TRT EP because the engine is built
|
||||
// once per shape and reused forever.
|
||||
OrtHandlerOptions detectorOpts;
|
||||
// Detector uses CUDA EP with *conservative* cuDNN workspace.
|
||||
// Empirical: on VRAM-constrained GPUs (LPD TRT engine + rec TRT
|
||||
// engine + ORT arena in play) the max-workspace mode causes cuDNN
|
||||
// to pick Winograd/implicit-precomp-GEMM variants that silently
|
||||
// fall back to slow NO-WORKSPACE algorithms when the big workspace
|
||||
// can't be allocated. With "0" cuDNN picks algorithms that are
|
||||
// known to fit and runs ~10x faster in practice.
|
||||
detectorOpts.useMaxCudnnWorkspace = false;
|
||||
detectorOpts.preferTensorRT = false; // never TRT for the detector
|
||||
|
||||
// Classifier (fixed [1,3,80,160]): TRT with no profile is fine.
|
||||
OrtHandlerOptions classifierOpts;
|
||||
classifierOpts.useMaxCudnnWorkspace = true;
|
||||
classifierOpts.preferTensorRT = preferTensorRT;
|
||||
classifierOpts.trtFP16 = true;
|
||||
|
||||
// Recognizer: needs a DYNAMIC profile so one TRT engine covers every
|
||||
// (batch, bucket_width) pair we generate at runtime. Without this,
|
||||
// each new shape triggers a ~80s engine rebuild mid-stream when a
|
||||
// new plate appears or the plate count changes.
|
||||
//
|
||||
// Profile range:
|
||||
// batch : 1 .. 16 (16 plates worth of crops is generous)
|
||||
// H : 48 (fixed)
|
||||
// W : 320 .. 960 (covers all 4 recognizer buckets)
|
||||
//
|
||||
// Query the actual input name from the .onnx file instead of
|
||||
// hardcoding — PaddleOCR usually exports it as "x" but the name can
|
||||
// vary across model versions.
|
||||
OrtHandlerOptions recognizerOpts;
|
||||
recognizerOpts.useMaxCudnnWorkspace = true;
|
||||
recognizerOpts.preferTensorRT = preferTensorRT;
|
||||
recognizerOpts.trtFP16 = true;
|
||||
if (preferTensorRT) {
|
||||
std::string recInputName = BasicOrtHandler::QueryModelInputName(recModelPath);
|
||||
if (recInputName.empty()) {
|
||||
std::cerr << "[PaddleOCRV5Engine] Could not query recognizer "
|
||||
"input name — defaulting to 'x'" << std::endl;
|
||||
recInputName = "x";
|
||||
}
|
||||
std::cout << "[PaddleOCRV5Engine] Recognizer input name: '"
|
||||
<< recInputName << "' — building TRT dynamic profile "
|
||||
<< "[batch=1..16, W=320..960]" << std::endl;
|
||||
recognizerOpts.trtProfileMinShapes = recInputName + ":1x3x48x320";
|
||||
recognizerOpts.trtProfileOptShapes = recInputName + ":4x3x48x480";
|
||||
recognizerOpts.trtProfileMaxShapes = recInputName + ":16x3x48x960";
|
||||
}
|
||||
|
||||
try {
|
||||
// Initialize detector (also triggers EPLoader init in BasicOrtHandler)
|
||||
detector_ = std::make_unique<ONNXOCRDetector>(detModelPath);
|
||||
detector_ = std::make_unique<ONNXOCRDetector>(detModelPath, detectorOpts);
|
||||
std::cout << "[PaddleOCRV5Engine] Detector initialized: " << detModelPath << std::endl;
|
||||
|
||||
// Ensure this DLL's copy of Ort::Global<void>::api_ is initialized.
|
||||
@@ -29,7 +91,7 @@ bool PaddleOCRV5Engine::Initialize(const std::string& detModelPath,
|
||||
|
||||
// Initialize classifier (optional)
|
||||
if (!clsModelPath.empty()) {
|
||||
classifier_ = std::make_unique<ONNXOCRClassifier>(clsModelPath);
|
||||
classifier_ = std::make_unique<ONNXOCRClassifier>(clsModelPath, classifierOpts);
|
||||
std::cout << "[PaddleOCRV5Engine] Classifier initialized: " << clsModelPath << std::endl;
|
||||
}
|
||||
else {
|
||||
@@ -38,13 +100,26 @@ bool PaddleOCRV5Engine::Initialize(const std::string& detModelPath,
|
||||
}
|
||||
|
||||
// Initialize recognizer
|
||||
recognizer_ = std::make_unique<ONNXOCRRecognizer>(recModelPath);
|
||||
recognizer_ = std::make_unique<ONNXOCRRecognizer>(recModelPath, recognizerOpts);
|
||||
if (!recognizer_->LoadDictionary(dictPath)) {
|
||||
std::cerr << "[PaddleOCRV5Engine] Failed to load dictionary" << std::endl;
|
||||
return false;
|
||||
}
|
||||
std::cout << "[PaddleOCRV5Engine] Recognizer initialized: " << recModelPath << std::endl;
|
||||
|
||||
// Pre-warm classifier (fixed [1,3,80,160]) and recognizer (4
|
||||
// bucket widths) so the first frame doesn't pay the cuDNN/TRT
|
||||
// algorithm-selection tax. The detector is intentionally NOT
|
||||
// warmed up: its input shape varies continuously with each
|
||||
// plate-ROI aspect ratio, so a warmup at any single canonical
|
||||
// shape would cost minutes (TRT) or be useless (CUDA cache miss
|
||||
// on the real frame anyway). Real frames will pay the per-shape
|
||||
// cuDNN HEURISTIC cost on first use.
|
||||
std::cout << "[PaddleOCRV5Engine] Warming up OCR pipeline..." << std::endl;
|
||||
if (classifier_) classifier_->Warmup();
|
||||
if (recognizer_) recognizer_->Warmup();
|
||||
std::cout << "[PaddleOCRV5Engine] Warmup complete" << std::endl;
|
||||
|
||||
_initialized = true;
|
||||
std::cout << "[PaddleOCRV5Engine] Pipeline initialized successfully" << std::endl;
|
||||
return true;
|
||||
@@ -140,5 +215,18 @@ TextLine PaddleOCRV5Engine::recognizeOnly(const cv::Mat& croppedImage) {
|
||||
return recognizer_->Recognize(croppedImage);
|
||||
}
|
||||
|
||||
std::vector<TextLine> PaddleOCRV5Engine::recognizeMany(const std::vector<cv::Mat>& croppedImages) {
|
||||
if (_modelLoading.load()) return std::vector<TextLine>(croppedImages.size());
|
||||
{
|
||||
auto lk = TryLockWithTimeout("PaddleOCRV5Engine::recognizeMany");
|
||||
if (!lk.owns_lock()) return std::vector<TextLine>(croppedImages.size());
|
||||
if (!_initialized || !recognizer_ || croppedImages.empty()) {
|
||||
return std::vector<TextLine>(croppedImages.size());
|
||||
}
|
||||
}
|
||||
// Delegates to the bucketed, batched path in ONNXOCRRecognizer.
|
||||
return recognizer_->RecognizeBatch(croppedImages);
|
||||
}
|
||||
|
||||
} // namespace onnxocr
|
||||
} // namespace ANSCENTER
|
||||
|
||||
@@ -25,10 +25,13 @@ public:
|
||||
|
||||
// Initialize the OCR pipeline
|
||||
// clsModelPath can be empty to skip classification
|
||||
// preferTensorRT: try TensorRT EP first for the three sub-models
|
||||
// (cuDNN-friendly cuDNN max-workspace mode either way)
|
||||
bool Initialize(const std::string& detModelPath,
|
||||
const std::string& clsModelPath,
|
||||
const std::string& recModelPath,
|
||||
const std::string& dictPath);
|
||||
const std::string& dictPath,
|
||||
bool preferTensorRT = false);
|
||||
|
||||
// Run full OCR pipeline on an image
|
||||
// Returns results matching PaddleOCR::OCRPredictResult format
|
||||
@@ -37,6 +40,14 @@ public:
|
||||
// Run recognizer only on a pre-cropped text image (no detection step)
|
||||
TextLine recognizeOnly(const cv::Mat& croppedImage);
|
||||
|
||||
// Run recognizer only on a batch of pre-cropped text images in a
|
||||
// single batched ORT inference. Skips the detector entirely — the
|
||||
// caller is expected to supply crops that are already roughly
|
||||
// axis-aligned single-line text (e.g. ALPR plate ROIs, optionally
|
||||
// pre-split into rows). Crops are grouped by bucket width, so a
|
||||
// single call to this function typically issues 1–2 ORT Runs total.
|
||||
std::vector<TextLine> recognizeMany(const std::vector<cv::Mat>& croppedImages);
|
||||
|
||||
// Configuration setters (matching OCRModelConfig parameters)
|
||||
void SetDetMaxSideLen(int val) { _maxSideLen = val; }
|
||||
void SetDetDbThresh(float val) { _detDbThresh = val; }
|
||||
|
||||
@@ -50,7 +50,8 @@ bool ANSONNXOCR::Initialize(const std::string& licenseKey, OCRModelConfig modelC
|
||||
_modelConfig.detectionModelFile,
|
||||
clsModelPath,
|
||||
_modelConfig.recognizerModelFile,
|
||||
_modelConfig.recogizerCharDictionaryPath);
|
||||
_modelConfig.recogizerCharDictionaryPath,
|
||||
_modelConfig.useTensorRT);
|
||||
|
||||
return _isInitialized;
|
||||
}
|
||||
@@ -391,4 +392,16 @@ std::pair<std::string, float> ANSONNXOCR::RecognizeText(const cv::Mat& croppedIm
|
||||
return {result.text, result.score};
|
||||
}
|
||||
|
||||
std::vector<std::pair<std::string, float>> ANSONNXOCR::RecognizeTextBatch(
|
||||
const std::vector<cv::Mat>& croppedImages) {
|
||||
std::vector<std::pair<std::string, float>> out(croppedImages.size(), {"", 0.0f});
|
||||
if (!_isInitialized || !_engine || croppedImages.empty()) return out;
|
||||
|
||||
auto lines = _engine->recognizeMany(croppedImages);
|
||||
for (size_t i = 0; i < lines.size() && i < out.size(); ++i) {
|
||||
out[i] = { lines[i].text, lines[i].score };
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
} // namespace ANSCENTER
|
||||
|
||||
@@ -24,6 +24,8 @@ namespace ANSCENTER {
|
||||
std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, const std::string& cameraId) override;
|
||||
|
||||
std::pair<std::string, float> RecognizeText(const cv::Mat& croppedImage) override;
|
||||
std::vector<std::pair<std::string, float>> RecognizeTextBatch(
|
||||
const std::vector<cv::Mat>& croppedImages) override;
|
||||
~ANSONNXOCR();
|
||||
bool Destroy() override;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user