Fix setting GPU behaviour:
Condition maxSlotsPerGpu Behavior OptimizeModelStr 0 Bypass: non-shared temporary engine 1 GPU 1 Single slot, no round-robin >1 GPU, VRAM < 24 GB 1 Round-robin: 1 slot per GPU >1 GPU, VRAM >= 24 GB -1 Elastic: on-demand slot growth
This commit is contained in:
@@ -5,6 +5,7 @@
|
||||
#include "ANSGpuFrameRegistry.h" // gpu_frame_lookup(cv::Mat*)
|
||||
#include "engine/TRTEngineCache.h" // clearAll() on DLL_PROCESS_DETACH
|
||||
#include "engine/EnginePoolManager.h" // clearAll() on DLL_PROCESS_DETACH
|
||||
#include <climits> // INT_MIN
|
||||
|
||||
// Process-wide flag: when true, all engines force single-GPU path (no pool, no idle timers).
|
||||
// Defined here, declared extern in EngineBuildLoadNetwork.inl.
|
||||
@@ -96,6 +97,37 @@ static int GetNumGPUs() {
|
||||
return g_numGPUs;
|
||||
}
|
||||
|
||||
// Determine maxSlotsPerGpu based on GPU topology:
|
||||
// 1 GPU → 1 (single slot, no round-robin needed)
|
||||
// >1 GPU, VRAM<24GB → 1 (round-robin: 1 slot per GPU)
|
||||
// >1 GPU, VRAM≥24GB → -1 (elastic: on-demand slot growth)
|
||||
// Result is cached after the first query.
|
||||
static int GetPoolMaxSlotsPerGpu() {
|
||||
static int s_result = INT_MIN;
|
||||
static std::mutex s_mutex;
|
||||
std::lock_guard<std::mutex> lk(s_mutex);
|
||||
if (s_result != INT_MIN) return s_result;
|
||||
const int n = GetNumGPUs();
|
||||
if (n <= 1) {
|
||||
s_result = 1;
|
||||
std::cout << "Info [GPU]: Single GPU — pool mode: 1 slot, no round-robin" << std::endl;
|
||||
return s_result;
|
||||
}
|
||||
// Multiple GPUs — check VRAM (GPUs are assumed same spec)
|
||||
constexpr size_t kLargeVramBytes = 24ULL * 1024 * 1024 * 1024; // 24 GB
|
||||
size_t totalMem = 0, freeMem = 0;
|
||||
cudaSetDevice(0);
|
||||
cudaMemGetInfo(&freeMem, &totalMem);
|
||||
if (totalMem >= kLargeVramBytes) {
|
||||
s_result = -1;
|
||||
std::cout << "Info [GPU]: " << n << " GPUs, VRAM >= 24 GB — pool mode: elastic" << std::endl;
|
||||
} else {
|
||||
s_result = 1;
|
||||
std::cout << "Info [GPU]: " << n << " GPUs, VRAM < 24 GB — pool mode: round-robin" << std::endl;
|
||||
}
|
||||
return s_result;
|
||||
}
|
||||
|
||||
// Returns the next GPU index in round-robin order.
|
||||
// Thread-safe: uses atomic fetch_add.
|
||||
static int AssignNextGPU() {
|
||||
@@ -588,6 +620,7 @@ extern "C" ANSODENGINE_API std::string CreateANSODHandle(ANSCENTER::ANSODBase**
|
||||
CheckGPUVRAM(assignedGPU);
|
||||
|
||||
RegisterODHandle(*Handle);
|
||||
(*Handle)->SetMaxSlotsPerGpu(GetPoolMaxSlotsPerGpu());
|
||||
(*Handle)->SetLoadEngineOnCreation(_loadEngineOnCreation); //Set force to load the engine immediately
|
||||
bool loadResult = (*Handle)->Initialize(licenseKey, modelConfig, modelFilePath, modelFileZipPassword, labelMap);
|
||||
return labelMap;
|
||||
@@ -894,6 +927,7 @@ extern "C" __declspec(dllexport) int LoadModelFromFolder(ANSCENTER::ANSODBase**
|
||||
CheckGPUVRAM(assignedGPU);
|
||||
|
||||
RegisterODHandle(*Handle);
|
||||
(*Handle)->SetMaxSlotsPerGpu(GetPoolMaxSlotsPerGpu());
|
||||
(*Handle)->SetLoadEngineOnCreation(_loadEngineOnCreation); //Set force to load the engine immediately
|
||||
bool result = (*Handle)->LoadModelFromFolder(licenseKey, modelConfig, modelName, className, modelFolder, labelMap);
|
||||
if (result) return 1;
|
||||
|
||||
Reference in New Issue
Block a user