Two-part fix
Fix 1 — Chunk oversized bucket groups (the correctness fix) ONNXOCRRecognizer::RecognizeBatch now slices each bucket group into chunks of ≤ kRecMaxBatch before submitting to TRT. A frame with 30 crops in bucket 320 produces two back-to-back batched calls (24 + 6), both within the profile, both on the fast path. Fix 2 — Raise the profile max from 16 to 24 (the performance fix) The old profile max was 16; your real scenes routinely hit 24. Raising the profile max to 24 means the common 12-plate scene (24 crops) fits in a single batched call with no chunking needed. Scenes with > 24 crops now use chunking, but that's rare.
This commit is contained in:
@@ -252,10 +252,29 @@ std::vector<TextLine> ONNXOCRRecognizer::RecognizeBatch(const std::vector<cv::Ma
|
||||
groupIdx[bucketIdx].push_back(i);
|
||||
}
|
||||
|
||||
// Run one batched inference per non-empty bucket
|
||||
// Run batched inference per non-empty bucket, slicing each bucket
|
||||
// group into chunks of at most kRecMaxBatch crops so we never exceed
|
||||
// the TRT dynamic profile's max-batch dimension. On a busy scene with
|
||||
// (say) 30 plates all falling in bucket 320, we issue two back-to-back
|
||||
// batched calls of 24 + 6 instead of one oversized call that would
|
||||
// throw "does not satisfy any optimization profiles" and fall off
|
||||
// the fast path to the per-image fallback.
|
||||
for (int b = 0; b < kRecNumBuckets; ++b) {
|
||||
if (groupCrops[b].empty()) continue;
|
||||
RunBatchAtWidth(groupCrops[b], groupIdx[b], kRecBucketWidths[b], results);
|
||||
const auto& bucketCrops = groupCrops[b];
|
||||
const auto& bucketIndices = groupIdx[b];
|
||||
if (bucketCrops.empty()) continue;
|
||||
|
||||
const int bucketW = kRecBucketWidths[b];
|
||||
const size_t total = bucketCrops.size();
|
||||
|
||||
for (size_t start = 0; start < total; start += kRecMaxBatch) {
|
||||
const size_t end = std::min(start + static_cast<size_t>(kRecMaxBatch), total);
|
||||
std::vector<cv::Mat> chunkCrops(bucketCrops.begin() + start,
|
||||
bucketCrops.begin() + end);
|
||||
std::vector<size_t> chunkIdx(bucketIndices.begin() + start,
|
||||
bucketIndices.begin() + end);
|
||||
RunBatchAtWidth(chunkCrops, chunkIdx, bucketW, results);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
|
||||
Reference in New Issue
Block a user