diff --git a/modules/ANSOCR/ANSONNXOCR/ONNXOCRRecognizer.cpp b/modules/ANSOCR/ANSONNXOCR/ONNXOCRRecognizer.cpp index 190a30a..52879bf 100644 --- a/modules/ANSOCR/ANSONNXOCR/ONNXOCRRecognizer.cpp +++ b/modules/ANSOCR/ANSONNXOCR/ONNXOCRRecognizer.cpp @@ -252,10 +252,29 @@ std::vector ONNXOCRRecognizer::RecognizeBatch(const std::vector(kRecMaxBatch), total); + std::vector chunkCrops(bucketCrops.begin() + start, + bucketCrops.begin() + end); + std::vector chunkIdx(bucketIndices.begin() + start, + bucketIndices.begin() + end); + RunBatchAtWidth(chunkCrops, chunkIdx, bucketW, results); + } } return results; diff --git a/modules/ANSOCR/ANSONNXOCR/ONNXOCRTypes.h b/modules/ANSOCR/ANSONNXOCR/ONNXOCRTypes.h index 5f07f2c..c12e02d 100644 --- a/modules/ANSOCR/ANSONNXOCR/ONNXOCRTypes.h +++ b/modules/ANSOCR/ANSONNXOCR/ONNXOCRTypes.h @@ -48,6 +48,20 @@ constexpr int kRecImgW = 320; // Default rec width (PP-OCRv5 rec_image_shap constexpr int kRecImgMaxW = 960; // Allow wide recognition input for long text lines constexpr int kRecBatchSize = 6; +// Maximum crops submitted to the recognizer in a single ORT Run call. +// Two things must stay in sync with this value: +// 1. The TRT dynamic profile in PaddleOCRV5Engine::BuildNvidiaOcrOptions — +// the profile's max-batch dimension is set from kRecMaxBatch so TRT +// builds a single engine that handles everything up to this size. +// 2. The bucket-chunking loop in ONNXOCRRecognizer::RecognizeBatch — +// bucket groups larger than this get sliced into multiple Run() calls +// so we never exceed the profile and fall off the fast batched path. +// +// Raising it increases peak runtime VRAM (the TRT execution context +// allocates worst-case activation buffers), so keep it as low as is +// reasonable for your expected plate count per frame. +constexpr int kRecMaxBatch = 24; + // A detected text box: 4 corner points (top-left, top-right, bottom-right, bottom-left) struct TextBox { std::array points; diff --git a/modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp b/modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp index 71406d4..04c082b 100644 --- a/modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp +++ b/modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp @@ -60,7 +60,11 @@ static PerModelOcrOptions BuildNvidiaOcrOptions( opts.classifierOpts.preferTensorRT = preferTensorRT; opts.classifierOpts.trtFP16 = true; - // Recognizer: TRT EP with dynamic shape profile. + // Recognizer: TRT EP with dynamic shape profile. The max-batch + // dimension is kRecMaxBatch (defined in ONNXOCRTypes.h) — the same + // constant that ONNXOCRRecognizer::RecognizeBatch uses to chunk + // oversized bucket groups. Keeping them in lockstep ensures the + // recognizer never submits a shape that falls outside the TRT profile. opts.recognizerOpts.useMaxCudnnWorkspace = true; opts.recognizerOpts.preferTensorRT = preferTensorRT; opts.recognizerOpts.trtFP16 = true; @@ -71,12 +75,13 @@ static PerModelOcrOptions BuildNvidiaOcrOptions( "input name — defaulting to 'x'" << std::endl; recInputName = "x"; } + const std::string maxB = std::to_string(kRecMaxBatch); std::cout << "[PaddleOCRV5Engine] Recognizer input name: '" << recInputName << "' — building TRT dynamic profile " - << "[batch=1..16, W=320..960]" << std::endl; + << "[batch=1.." << maxB << ", W=320..960]" << std::endl; opts.recognizerOpts.trtProfileMinShapes = recInputName + ":1x3x48x320"; opts.recognizerOpts.trtProfileOptShapes = recInputName + ":4x3x48x480"; - opts.recognizerOpts.trtProfileMaxShapes = recInputName + ":16x3x48x960"; + opts.recognizerOpts.trtProfileMaxShapes = recInputName + ":" + maxB + "x3x48x960"; } return opts; }