Fix setting GPU behaviour:
Condition maxSlotsPerGpu Behavior OptimizeModelStr 0 Bypass: non-shared temporary engine 1 GPU 1 Single slot, no round-robin >1 GPU, VRAM < 24 GB 1 Round-robin: 1 slot per GPU >1 GPU, VRAM >= 24 GB -1 Elastic: on-demand slot growth
This commit is contained in:
@@ -40,7 +40,7 @@ bool RTOCRClassifier::Initialize(const std::string& onnxPath, int gpuId,
|
||||
options.maxBatchSize };
|
||||
m_engine = EnginePoolManager<float>::instance().acquire(
|
||||
m_poolKey, options, onnxPath,
|
||||
kClsSubVals, kClsDivVals, true, -1);
|
||||
kClsSubVals, kClsDivVals, true, getPoolMaxSlotsPerGpu());
|
||||
m_usingSharedPool = (m_engine != nullptr);
|
||||
|
||||
if (!m_engine) {
|
||||
|
||||
@@ -57,7 +57,7 @@ bool RTOCRDetector::Initialize(const std::string& onnxPath, int gpuId,
|
||||
options.maxBatchSize };
|
||||
m_engine = EnginePoolManager<float>::instance().acquire(
|
||||
m_poolKey, options, onnxPath,
|
||||
kDetSubVals, kDetDivVals, true, -1);
|
||||
kDetSubVals, kDetDivVals, true, getPoolMaxSlotsPerGpu());
|
||||
m_usingSharedPool = (m_engine != nullptr);
|
||||
|
||||
if (!m_engine) {
|
||||
|
||||
@@ -52,7 +52,7 @@ bool RTOCRRecognizer::Initialize(const std::string& onnxPath, const std::string&
|
||||
options.maxBatchSize };
|
||||
m_engine = EnginePoolManager<float>::instance().acquire(
|
||||
m_poolKey, options, onnxPath,
|
||||
kRecSubVals, kRecDivVals, true, -1);
|
||||
kRecSubVals, kRecDivVals, true, getPoolMaxSlotsPerGpu());
|
||||
m_usingSharedPool = (m_engine != nullptr);
|
||||
|
||||
if (!m_engine) {
|
||||
|
||||
@@ -7,6 +7,10 @@
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <cmath>
|
||||
#include <climits>
|
||||
#include <mutex>
|
||||
#include <iostream>
|
||||
#include <cuda_runtime.h>
|
||||
#include <opencv2/core.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
@@ -95,6 +99,38 @@ struct OCRPredictResult {
|
||||
// Utility functions
|
||||
// ============================================================================
|
||||
|
||||
// Determine maxSlotsPerGpu based on GPU topology:
|
||||
// 1 GPU → 1 (single slot, no round-robin needed)
|
||||
// >1 GPU, VRAM<24GB → 1 (round-robin: 1 slot per GPU)
|
||||
// >1 GPU, VRAM≥24GB → -1 (elastic: on-demand slot growth)
|
||||
// Result is cached after the first query.
|
||||
inline int getPoolMaxSlotsPerGpu() {
|
||||
static int s_result = INT_MIN;
|
||||
static std::mutex s_mutex;
|
||||
std::lock_guard<std::mutex> lk(s_mutex);
|
||||
if (s_result != INT_MIN) return s_result;
|
||||
int gpuCount = 0;
|
||||
cudaGetDeviceCount(&gpuCount);
|
||||
if (gpuCount <= 1) {
|
||||
s_result = 1;
|
||||
std::cout << "Info [OCR GPU]: Single GPU — pool mode: 1 slot, no round-robin" << std::endl;
|
||||
return s_result;
|
||||
}
|
||||
// Multiple GPUs — check VRAM (GPUs are assumed same spec)
|
||||
constexpr size_t kLargeVramBytes = 24ULL * 1024 * 1024 * 1024; // 24 GB
|
||||
size_t totalMem = 0, freeMem = 0;
|
||||
cudaSetDevice(0);
|
||||
cudaMemGetInfo(&freeMem, &totalMem);
|
||||
if (totalMem >= kLargeVramBytes) {
|
||||
s_result = -1;
|
||||
std::cout << "Info [OCR GPU]: " << gpuCount << " GPUs, VRAM >= 24 GB — pool mode: elastic" << std::endl;
|
||||
} else {
|
||||
s_result = 1;
|
||||
std::cout << "Info [OCR GPU]: " << gpuCount << " GPUs, VRAM < 24 GB — pool mode: round-robin" << std::endl;
|
||||
}
|
||||
return s_result;
|
||||
}
|
||||
|
||||
// Load character dictionary from file
|
||||
inline std::vector<std::string> LoadDict(const std::string& dictPath) {
|
||||
std::vector<std::string> keys;
|
||||
|
||||
Reference in New Issue
Block a user