Support tracker to improve ALPR_OCR
This commit is contained in:
@@ -547,6 +547,181 @@ namespace ANSCENTER
|
||||
return colour;
|
||||
}
|
||||
|
||||
// ── Full-frame vs pipeline auto-detection ────────────────────────────
|
||||
// Mirror of ANSALPR_OD::shouldUseALPRChecker. The auto-detection logic
|
||||
// watches whether consecutive frames from a given camera have the exact
|
||||
// same (width, height). Pre-cropped pipeline inputs vary by a few
|
||||
// pixels per crop, so the exact-match check fails and we return false.
|
||||
// Real video frames are pixel-identical across frames, so after a few
|
||||
// consistent frames we flip into FULL-FRAME mode and start running the
|
||||
// ALPRChecker voting + ensureUniquePlateText dedup.
|
||||
bool ANSALPR_OCR::shouldUseALPRChecker(const cv::Size& imageSize,
|
||||
const std::string& cameraId) {
|
||||
// Force disabled via SetALPRCheckerEnabled(false) → never use.
|
||||
if (!_enableALPRChecker) return false;
|
||||
|
||||
// Small images are always pipeline crops — skip auto-detection.
|
||||
if (imageSize.width < ImageSizeTracker::MIN_FULLFRAME_WIDTH) return false;
|
||||
|
||||
auto& tracker = _imageSizeTrackers[cameraId];
|
||||
bool wasFullFrame = tracker.detectedFullFrame;
|
||||
if (imageSize == tracker.lastSize) {
|
||||
tracker.consistentCount++;
|
||||
if (tracker.consistentCount >= ImageSizeTracker::CONFIRM_THRESHOLD) {
|
||||
tracker.detectedFullFrame = true;
|
||||
}
|
||||
} else {
|
||||
tracker.lastSize = imageSize;
|
||||
tracker.consistentCount = 1;
|
||||
tracker.detectedFullFrame = false;
|
||||
}
|
||||
if (tracker.detectedFullFrame != wasFullFrame) {
|
||||
ANS_DBG("ALPR_OCR_Checker",
|
||||
"cam=%s mode auto-detected: %s (img=%dx%d consistent=%d)",
|
||||
cameraId.c_str(),
|
||||
tracker.detectedFullFrame ? "FULL-FRAME (tracker ON)" : "PIPELINE (tracker OFF)",
|
||||
imageSize.width, imageSize.height, tracker.consistentCount);
|
||||
}
|
||||
return tracker.detectedFullFrame;
|
||||
}
|
||||
|
||||
// ── Spatial plate dedup with accumulated scoring ─────────────────────
|
||||
// Mirror of ANSALPR_OD::ensureUniquePlateText. When more than one
|
||||
// detection in the same frame ends up with the same plate text (e.g.
|
||||
// tracker occlusion or two cars in a single frame reading the same
|
||||
// string), we resolve the ambiguity by accumulating confidence per
|
||||
// spatial location across frames. The location with the higher running
|
||||
// score keeps the plate text; the loser has its className cleared and
|
||||
// is dropped from the output.
|
||||
void ANSALPR_OCR::ensureUniquePlateText(std::vector<Object>& results,
|
||||
const std::string& cameraId) {
|
||||
std::lock_guard<std::mutex> plateLock(_plateIdentitiesMutex);
|
||||
auto& identities = _plateIdentities[cameraId];
|
||||
|
||||
// Auto-detect mode by detection count.
|
||||
// 1 detection → pipeline/single-crop mode → no dedup needed.
|
||||
// 2+ detections → full-frame mode → apply accumulated scoring.
|
||||
if (results.size() <= 1) {
|
||||
// Still age out stale spatial identities from previous full-frame calls
|
||||
if (!identities.empty()) {
|
||||
constexpr int MAX_UNSEEN_FRAMES = 30;
|
||||
for (auto& id : identities) id.framesSinceLastSeen++;
|
||||
for (auto it = identities.begin(); it != identities.end(); ) {
|
||||
if (it->framesSinceLastSeen > MAX_UNSEEN_FRAMES) {
|
||||
it = identities.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Helper: IoU between two rects.
|
||||
auto computeIoU = [](const cv::Rect& a, const cv::Rect& b) -> float {
|
||||
int x1 = std::max(a.x, b.x);
|
||||
int y1 = std::max(a.y, b.y);
|
||||
int x2 = std::min(a.x + a.width, b.x + b.width);
|
||||
int y2 = std::min(a.y + a.height, b.y + b.height);
|
||||
if (x2 <= x1 || y2 <= y1) return 0.0f;
|
||||
float intersection = static_cast<float>((x2 - x1) * (y2 - y1));
|
||||
float unionArea = static_cast<float>(a.area() + b.area()) - intersection;
|
||||
return (unionArea > 0.0f) ? intersection / unionArea : 0.0f;
|
||||
};
|
||||
|
||||
// Helper: find matching spatial identity by bounding-box overlap.
|
||||
auto findSpatialMatch = [&](const cv::Rect& box,
|
||||
const std::string& plateText) -> SpatialPlateIdentity* {
|
||||
for (auto& id : identities) {
|
||||
if (id.plateText == plateText) {
|
||||
cv::Rect storedRect(
|
||||
static_cast<int>(id.center.x - box.width * 0.5f),
|
||||
static_cast<int>(id.center.y - box.height * 0.5f),
|
||||
box.width, box.height);
|
||||
if (computeIoU(box, storedRect) > PLATE_SPATIAL_MATCH_THRESHOLD) {
|
||||
return &id;
|
||||
}
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
};
|
||||
|
||||
// Step 1: Build map of plateText → candidate indices
|
||||
std::unordered_map<std::string, std::vector<size_t>> plateCandidates;
|
||||
for (size_t i = 0; i < results.size(); ++i) {
|
||||
if (results[i].className.empty()) continue;
|
||||
plateCandidates[results[i].className].push_back(i);
|
||||
}
|
||||
|
||||
// Step 2: Resolve duplicates using spatial accumulated scores
|
||||
for (auto& [plateText, indices] : plateCandidates) {
|
||||
if (indices.size() <= 1) continue;
|
||||
|
||||
size_t winner = indices[0];
|
||||
float bestScore = 0.0f;
|
||||
|
||||
for (size_t idx : indices) {
|
||||
float score = results[idx].confidence;
|
||||
auto* match = findSpatialMatch(results[idx].box, plateText);
|
||||
if (match) {
|
||||
score = match->accumulatedScore + results[idx].confidence;
|
||||
}
|
||||
if (score > bestScore) {
|
||||
bestScore = score;
|
||||
winner = idx;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t idx : indices) {
|
||||
if (idx != winner) {
|
||||
results[idx].className.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: Update spatial identities — winners accumulate, losers decay
|
||||
constexpr float DECAY_FACTOR = 0.8f;
|
||||
constexpr float MIN_SCORE = 0.1f;
|
||||
constexpr int MAX_UNSEEN_FRAMES = 30;
|
||||
|
||||
for (auto& id : identities) id.framesSinceLastSeen++;
|
||||
|
||||
for (auto& r : results) {
|
||||
if (r.className.empty()) continue;
|
||||
|
||||
cv::Point2f center(
|
||||
r.box.x + r.box.width * 0.5f,
|
||||
r.box.y + r.box.height * 0.5f);
|
||||
|
||||
auto* match = findSpatialMatch(r.box, r.className);
|
||||
if (match) {
|
||||
match->accumulatedScore += r.confidence;
|
||||
match->center = center;
|
||||
match->framesSinceLastSeen = 0;
|
||||
} else {
|
||||
identities.push_back({ center, r.className, r.confidence, 0 });
|
||||
}
|
||||
}
|
||||
|
||||
// Decay unseen identities and remove stale ones
|
||||
for (auto it = identities.begin(); it != identities.end(); ) {
|
||||
if (it->framesSinceLastSeen > 0) {
|
||||
it->accumulatedScore *= DECAY_FACTOR;
|
||||
}
|
||||
if (it->accumulatedScore < MIN_SCORE || it->framesSinceLastSeen > MAX_UNSEEN_FRAMES) {
|
||||
it = identities.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4: Remove entries with cleared plate text
|
||||
results.erase(
|
||||
std::remove_if(results.begin(), results.end(),
|
||||
[](const Object& o) { return o.className.empty(); }),
|
||||
results.end());
|
||||
}
|
||||
|
||||
// ── OCR on a single plate ROI ────────────────────────────────────────
|
||||
// Returns the plate text via the out-parameter and populates alprExtraInfo
|
||||
// with the structured ALPR JSON (zone parts) when ALPR mode is active.
|
||||
@@ -712,6 +887,13 @@ namespace ANSCENTER
|
||||
std::vector<Object> output;
|
||||
output.reserve(plateInfos.size());
|
||||
|
||||
// Decide once per frame whether the tracker-based correction
|
||||
// layer should run. We auto-detect full-frame vs pipeline mode
|
||||
// by watching for pixel-identical consecutive frames, exactly
|
||||
// the same way ANSALPR_OD does it.
|
||||
const bool useChecker = shouldUseALPRChecker(
|
||||
cv::Size(frameWidth, frameHeight), cameraId);
|
||||
|
||||
for (const auto& info : plateInfos) {
|
||||
std::string combinedText;
|
||||
for (size_t cropIdx : info.cropIndices) {
|
||||
@@ -726,8 +908,9 @@ namespace ANSCENTER
|
||||
Object lprObject = lprOutput[info.origIndex];
|
||||
lprObject.cameraId = cameraId;
|
||||
|
||||
// Cross-frame stabilization (unchanged)
|
||||
if (_enableALPRChecker) {
|
||||
// Cross-frame stabilization: per-track majority vote in
|
||||
// full-frame mode, raw OCR text in pipeline mode.
|
||||
if (useChecker) {
|
||||
lprObject.className = alprChecker.checkPlateByTrackId(
|
||||
cameraId, combinedText, lprObject.trackId);
|
||||
}
|
||||
@@ -747,6 +930,14 @@ namespace ANSCENTER
|
||||
output.push_back(std::move(lprObject));
|
||||
}
|
||||
|
||||
// Spatial dedup: if two detections in the same frame ended up
|
||||
// with the same plate text, keep only the one whose spatial
|
||||
// history has the higher accumulated confidence. Skip this in
|
||||
// pipeline mode because there's only ever one plate per call.
|
||||
if (useChecker) {
|
||||
ensureUniquePlateText(output, cameraId);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
catch (const cv::Exception& e) {
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
@@ -45,6 +46,66 @@ namespace ANSCENTER
|
||||
|
||||
ALPRChecker alprChecker;
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Full-frame vs pipeline auto-detection (ported from ANSALPR_OD)
|
||||
//
|
||||
// When the caller feeds ANSLPR_OCR pre-cropped vehicle ROIs (each
|
||||
// frame is a different small image), the tracker can't work — the
|
||||
// LP detector sees a totally new image every call so trackIds mean
|
||||
// nothing. In that "pipeline" mode we must skip the ALPRChecker
|
||||
// voting layer entirely and return raw OCR results.
|
||||
//
|
||||
// When the caller feeds full-frame video (same resolution every
|
||||
// frame, plates moving through the scene), the tracker works
|
||||
// normally and we run plate text through ALPRChecker majority
|
||||
// voting + spatial dedup to stabilise readings.
|
||||
//
|
||||
// Mode is auto-detected by watching whether consecutive frames
|
||||
// share the exact same (width, height) for at least
|
||||
// CONFIRM_THRESHOLD frames. Pipeline crops vary by a few pixels;
|
||||
// full-frame video is pixel-identical.
|
||||
// ----------------------------------------------------------------
|
||||
struct ImageSizeTracker {
|
||||
cv::Size lastSize{ 0, 0 };
|
||||
int consistentCount = 0;
|
||||
bool detectedFullFrame = false;
|
||||
static constexpr int CONFIRM_THRESHOLD = 5;
|
||||
static constexpr int MIN_FULLFRAME_WIDTH = 1000;
|
||||
};
|
||||
std::unordered_map<std::string, ImageSizeTracker> _imageSizeTrackers;
|
||||
|
||||
[[nodiscard]] bool shouldUseALPRChecker(const cv::Size& imageSize,
|
||||
const std::string& cameraId);
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Spatial plate identity persistence (ported from ANSALPR_OD)
|
||||
//
|
||||
// Prevents the same plate string from appearing on two different
|
||||
// vehicles in the same frame. The LP tracker may briefly assign
|
||||
// the same trackId to two different plates when vehicles pass
|
||||
// each other, or two different trackIds to the same plate when
|
||||
// occlusion breaks a track. In either case, OCR can produce the
|
||||
// same text for two spatial locations for a frame or two — which
|
||||
// looks like "plate flicker" in the UI.
|
||||
//
|
||||
// ensureUniquePlateText() resolves the ambiguity by accumulating
|
||||
// confidence per spatial location. When two detections share a
|
||||
// plate text, the one whose spatial history has the higher score
|
||||
// wins and the other has its className cleared.
|
||||
// ----------------------------------------------------------------
|
||||
struct SpatialPlateIdentity {
|
||||
cv::Point2f center; // plate center in frame coords
|
||||
std::string plateText;
|
||||
float accumulatedScore = 0.0f;
|
||||
int framesSinceLastSeen = 0;
|
||||
};
|
||||
std::mutex _plateIdentitiesMutex;
|
||||
std::unordered_map<std::string, std::vector<SpatialPlateIdentity>> _plateIdentities;
|
||||
static constexpr float PLATE_SPATIAL_MATCH_THRESHOLD = 0.3f; // IoU threshold
|
||||
|
||||
void ensureUniquePlateText(std::vector<Object>& results,
|
||||
const std::string& cameraId);
|
||||
|
||||
// --- Original model zip path (reused for ANSONNXOCR initialization) ---
|
||||
std::string _modelZipFilePath;
|
||||
|
||||
|
||||
@@ -8,60 +8,62 @@
|
||||
namespace ANSCENTER {
|
||||
namespace onnxocr {
|
||||
|
||||
bool PaddleOCRV5Engine::Initialize(const std::string& detModelPath,
|
||||
const std::string& clsModelPath,
|
||||
const std::string& recModelPath,
|
||||
const std::string& dictPath,
|
||||
bool preferTensorRT) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
ModelLoadingGuard mlg(_modelLoading);
|
||||
// ============================================================================
|
||||
// Per-backend OCR option builders
|
||||
//
|
||||
// Each backend (NVIDIA / AMD / Intel / CPU) has its own helper that returns
|
||||
// a fully-populated set of OrtHandlerOptions for the detector, classifier,
|
||||
// and recognizer sub-models. PaddleOCRV5Engine::Initialize dispatches to the
|
||||
// correct helper based on the engine type that EPLoader resolved at startup.
|
||||
//
|
||||
// Adding a new backend optimization is a strictly contained change: touch
|
||||
// only that backend's builder. The others — especially NVIDIA, which is
|
||||
// hand-tuned and should not regress — stay untouched.
|
||||
// ============================================================================
|
||||
|
||||
// High-perf options. The OCR sub-models split into two groups:
|
||||
//
|
||||
// 1. Detector — its input shape varies continuously with every
|
||||
// plate-ROI aspect ratio. TRT EP is a poor fit because it
|
||||
// builds a fresh engine for each unique shape (minutes each).
|
||||
// We keep it on CUDA EP with the largest cuDNN workspace and
|
||||
// let cuDNN HEURISTIC handle the per-shape algo selection.
|
||||
//
|
||||
// 2. Classifier + Recognizer — fixed-bucket shapes (cls is
|
||||
// [1,3,80,160], rec is [1,3,48,{320,480,640,960}]). These
|
||||
// benefit massively from TRT EP because the engine is built
|
||||
// once per shape and reused forever.
|
||||
namespace {
|
||||
|
||||
struct PerModelOcrOptions {
|
||||
OrtHandlerOptions detectorOpts;
|
||||
// Detector uses CUDA EP with *conservative* cuDNN workspace.
|
||||
// Empirical: on VRAM-constrained GPUs (LPD TRT engine + rec TRT
|
||||
// engine + ORT arena in play) the max-workspace mode causes cuDNN
|
||||
// to pick Winograd/implicit-precomp-GEMM variants that silently
|
||||
// fall back to slow NO-WORKSPACE algorithms when the big workspace
|
||||
// can't be allocated. With "0" cuDNN picks algorithms that are
|
||||
// known to fit and runs ~10x faster in practice.
|
||||
detectorOpts.useMaxCudnnWorkspace = false;
|
||||
detectorOpts.preferTensorRT = false; // never TRT for the detector
|
||||
|
||||
// Classifier (fixed [1,3,80,160]): TRT with no profile is fine.
|
||||
OrtHandlerOptions classifierOpts;
|
||||
classifierOpts.useMaxCudnnWorkspace = true;
|
||||
classifierOpts.preferTensorRT = preferTensorRT;
|
||||
classifierOpts.trtFP16 = true;
|
||||
|
||||
// Recognizer: needs a DYNAMIC profile so one TRT engine covers every
|
||||
// (batch, bucket_width) pair we generate at runtime. Without this,
|
||||
// each new shape triggers a ~80s engine rebuild mid-stream when a
|
||||
// new plate appears or the plate count changes.
|
||||
//
|
||||
// Profile range:
|
||||
// batch : 1 .. 16 (16 plates worth of crops is generous)
|
||||
// H : 48 (fixed)
|
||||
// W : 320 .. 960 (covers all 4 recognizer buckets)
|
||||
//
|
||||
// Query the actual input name from the .onnx file instead of
|
||||
// hardcoding — PaddleOCR usually exports it as "x" but the name can
|
||||
// vary across model versions.
|
||||
OrtHandlerOptions recognizerOpts;
|
||||
recognizerOpts.useMaxCudnnWorkspace = true;
|
||||
recognizerOpts.preferTensorRT = preferTensorRT;
|
||||
recognizerOpts.trtFP16 = true;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// NVIDIA — LOCKED. Do NOT modify this helper unless fixing a specific
|
||||
// NVIDIA-observable regression.
|
||||
//
|
||||
// The OCR sub-models split into two groups:
|
||||
// 1. Detector — variable input shape per plate-ROI aspect. TRT EP is a
|
||||
// poor fit (one engine build per unique shape, minutes each). Runs on
|
||||
// CUDA EP with *conservative* cuDNN workspace: empirical measurements
|
||||
// showed that max-workspace mode forces cuDNN to pick Winograd/
|
||||
// implicit-precomp-GEMM variants that silently fall back to slow
|
||||
// NO-WORKSPACE algorithms when the big workspace can't be allocated
|
||||
// under VRAM pressure (LPD TRT engine + rec TRT engine + ORT arena).
|
||||
// 2. Classifier + Recognizer — TRT EP. Classifier has fixed shape so no
|
||||
// profile is needed. Recognizer gets a dynamic profile
|
||||
// [batch=1..16, W=320..960] so a single pre-built engine handles every
|
||||
// runtime shape without mid-stream rebuilds (fixes 60–90 s hangs).
|
||||
// ----------------------------------------------------------------------------
|
||||
static PerModelOcrOptions BuildNvidiaOcrOptions(
|
||||
const std::string& recModelPath,
|
||||
bool preferTensorRT) {
|
||||
PerModelOcrOptions opts;
|
||||
|
||||
// Detector: CUDA EP, conservative workspace, never TRT.
|
||||
opts.detectorOpts.useMaxCudnnWorkspace = false;
|
||||
opts.detectorOpts.preferTensorRT = false;
|
||||
|
||||
// Classifier: TRT EP, no profile (fixed [1,3,80,160]).
|
||||
opts.classifierOpts.useMaxCudnnWorkspace = true;
|
||||
opts.classifierOpts.preferTensorRT = preferTensorRT;
|
||||
opts.classifierOpts.trtFP16 = true;
|
||||
|
||||
// Recognizer: TRT EP with dynamic shape profile.
|
||||
opts.recognizerOpts.useMaxCudnnWorkspace = true;
|
||||
opts.recognizerOpts.preferTensorRT = preferTensorRT;
|
||||
opts.recognizerOpts.trtFP16 = true;
|
||||
if (preferTensorRT) {
|
||||
std::string recInputName = BasicOrtHandler::QueryModelInputName(recModelPath);
|
||||
if (recInputName.empty()) {
|
||||
@@ -72,10 +74,80 @@ bool PaddleOCRV5Engine::Initialize(const std::string& detModelPath,
|
||||
std::cout << "[PaddleOCRV5Engine] Recognizer input name: '"
|
||||
<< recInputName << "' — building TRT dynamic profile "
|
||||
<< "[batch=1..16, W=320..960]" << std::endl;
|
||||
recognizerOpts.trtProfileMinShapes = recInputName + ":1x3x48x320";
|
||||
recognizerOpts.trtProfileOptShapes = recInputName + ":4x3x48x480";
|
||||
recognizerOpts.trtProfileMaxShapes = recInputName + ":16x3x48x960";
|
||||
opts.recognizerOpts.trtProfileMinShapes = recInputName + ":1x3x48x320";
|
||||
opts.recognizerOpts.trtProfileOptShapes = recInputName + ":4x3x48x480";
|
||||
opts.recognizerOpts.trtProfileMaxShapes = recInputName + ":16x3x48x960";
|
||||
}
|
||||
return opts;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Intel (OpenVINO EP) — placeholder.
|
||||
//
|
||||
// Returns default-constructed options: no backend-specific tuning applied
|
||||
// yet. When adding Intel optimizations (OpenVINO cache_dir, explicit device
|
||||
// selection, INT8 paths, etc.), add the corresponding fields to the Intel
|
||||
// section of OrtHandlerOptions and populate them here.
|
||||
// ----------------------------------------------------------------------------
|
||||
static PerModelOcrOptions BuildIntelOcrOptions() {
|
||||
return PerModelOcrOptions{}; // defaults everywhere
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// AMD (DirectML EP / MIGraphX EP) — placeholder.
|
||||
//
|
||||
// Returns default-constructed options: no backend-specific tuning applied
|
||||
// yet. When adding AMD optimizations (graph opt gate for RDNA3+ desktop
|
||||
// cards, MIGraphX cache on Linux, etc.), add the corresponding fields to
|
||||
// the AMD section of OrtHandlerOptions and populate them here.
|
||||
// ----------------------------------------------------------------------------
|
||||
static PerModelOcrOptions BuildAmdOcrOptions() {
|
||||
return PerModelOcrOptions{}; // defaults everywhere
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// CPU / unknown hardware — no tuning.
|
||||
// ----------------------------------------------------------------------------
|
||||
static PerModelOcrOptions BuildDefaultOcrOptions() {
|
||||
return PerModelOcrOptions{}; // defaults everywhere
|
||||
}
|
||||
|
||||
// Dispatch entry point used by Initialize().
|
||||
static PerModelOcrOptions BuildOcrOptionsForBackend(
|
||||
const std::string& recModelPath,
|
||||
bool preferTensorRT) {
|
||||
const EngineType backend = EPLoader::Current().type;
|
||||
switch (backend) {
|
||||
case EngineType::NVIDIA_GPU:
|
||||
return BuildNvidiaOcrOptions(recModelPath, preferTensorRT);
|
||||
case EngineType::AMD_GPU:
|
||||
return BuildAmdOcrOptions();
|
||||
case EngineType::OPENVINO_GPU:
|
||||
return BuildIntelOcrOptions();
|
||||
case EngineType::CPU:
|
||||
default:
|
||||
return BuildDefaultOcrOptions();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace (anonymous)
|
||||
|
||||
bool PaddleOCRV5Engine::Initialize(const std::string& detModelPath,
|
||||
const std::string& clsModelPath,
|
||||
const std::string& recModelPath,
|
||||
const std::string& dictPath,
|
||||
bool preferTensorRT) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
ModelLoadingGuard mlg(_modelLoading);
|
||||
|
||||
// Dispatch to the correct per-backend option builder. The NVIDIA path
|
||||
// is fully locked-in; AMD/Intel/CPU paths currently return defaults
|
||||
// and are the place to add future backend-specific tuning.
|
||||
const PerModelOcrOptions opts =
|
||||
BuildOcrOptionsForBackend(recModelPath, preferTensorRT);
|
||||
const OrtHandlerOptions& detectorOpts = opts.detectorOpts;
|
||||
const OrtHandlerOptions& classifierOpts = opts.classifierOpts;
|
||||
const OrtHandlerOptions& recognizerOpts = opts.recognizerOpts;
|
||||
|
||||
try {
|
||||
// Initialize detector (also triggers EPLoader init in BasicOrtHandler)
|
||||
|
||||
Reference in New Issue
Block a user