Improve ALPR_OCR peformance

This commit is contained in:
2026-04-14 20:30:21 +10:00
parent 3349b45ade
commit f9a0af8949
18 changed files with 991 additions and 77 deletions

View File

@@ -248,6 +248,46 @@ namespace ANSCENTER {
return std::string(ort_session->GetOutputNameAllocated(index, allocator).get());
}
// ====================================================================
// High-perf options for OCR sub-models that need TRT EP and full
// cuDNN workspace. Default-constructed = identical to the legacy
// behavior (CUDA EP only, minimal cuDNN workspace).
// ====================================================================
struct OrtHandlerOptions {
// Try to attach TensorRT EP before CUDA EP (NVIDIA only).
// Falls back to CUDA EP automatically if TRT EP creation or session
// creation fails. Engines are cached on disk for fast reload.
bool preferTensorRT = false;
// Use the largest cuDNN conv workspace. cuDNN can then pick fast
// algorithms (Winograd, implicit-precomp-GEMM with big workspaces).
// Defaults off because some deployments share VRAM with TRT engines
// and need the minimal-workspace mode to avoid OOM.
bool useMaxCudnnWorkspace = false;
// Where to cache built TRT engines. Empty → default
// %TEMP%/ANSCENTER/TRTEngineCache. Only used when preferTensorRT.
std::string trtEngineCacheDir;
// FP16 builds for TRT EP. Recommended for inference; ignored if
// preferTensorRT is false.
bool trtFP16 = true;
// Dynamic-shape profile for TRT EP. When set, TRT builds ONE
// engine that handles every input shape in the [min..max] range
// instead of rebuilding per unique shape. Critical for models
// that see many (batch_size, spatial) combinations at runtime.
//
// Format: "input_name:d0xd1xd2xd3[,input2:...]"
// e.g. "x:1x3x48x320" for batch=1, C=3, H=48, W=320
//
// All three fields must be set together. An empty min implies
// no profile (fall back to static-shape-per-unique-input mode).
std::string trtProfileMinShapes;
std::string trtProfileOptShapes;
std::string trtProfileMaxShapes;
};
// ====================================================================
// BasicOrtHandler
// ====================================================================
@@ -280,6 +320,9 @@ namespace ANSCENTER {
const unsigned int num_threads;
EngineType m_engineType;
// Per-session high-perf options. Default = legacy behavior.
OrtHandlerOptions m_handlerOptions;
protected:
// Default: hardware auto-detection via ANSLicenseHelper through EPLoader
explicit BasicOrtHandler(const std::string& _onnx_path,
@@ -290,6 +333,19 @@ namespace ANSCENTER {
EngineType engineType,
unsigned int _num_threads = 1);
// Engine override + per-session high-perf options (TRT EP, max
// cuDNN workspace, etc.). Used by OCR sub-models that need
// shape-stable, high-throughput inference.
explicit BasicOrtHandler(const std::string& _onnx_path,
EngineType engineType,
const OrtHandlerOptions& options,
unsigned int _num_threads = 1);
// Auto-detect engine via EPLoader, but with high-perf options.
explicit BasicOrtHandler(const std::string& _onnx_path,
const OrtHandlerOptions& options,
unsigned int _num_threads = 1);
virtual ~BasicOrtHandler();
BasicOrtHandler(const BasicOrtHandler&) = delete;
@@ -298,6 +354,13 @@ namespace ANSCENTER {
// Resolved EP type (after EPLoader fallback). Subclasses use this
// to branch on actual EP at inference time.
EngineType getEngineType() const { return m_engineType; }
// Spin up a tiny CPU-only ORT session just long enough to read
// the name of the model's first input, then tear it down. Used
// by callers that need to build TRT profile-shape strings
// (which require the input name) BEFORE the real session is
// created. Returns an empty string on failure.
static std::string QueryModelInputName(const std::string& onnxPath);
private:
void initialize_handler();
protected:
@@ -306,6 +369,7 @@ namespace ANSCENTER {
// EP-specific session option builders
bool TryAppendCUDA(Ort::SessionOptions& opts);
bool TryAppendTensorRT(Ort::SessionOptions& opts);
bool TryAppendDirectML(Ort::SessionOptions& opts);
bool TryAppendOpenVINO(Ort::SessionOptions& opts);
};