Fix setting GPU behaviour:

Condition maxSlotsPerGpu Behavior OptimizeModelStr 0 Bypass: non-shared temporary engine 1 GPU 1 Single slot, no round-robin >1 GPU, VRAM < 24 GB 1 Round-robin: 1 slot per GPU >1 GPU, VRAM >= 24 GB -1 Elastic: on-demand slot growth
2026-03-30 09:59:09 +11:00
parent 01eabf76bd
commit c1b919ec47
9 changed files with 123 additions and 6 deletions
--- a/modules/ANSODEngine/dllmain.cpp
+++ b/modules/ANSODEngine/dllmain.cpp
@@ -5,6 +5,7 @@
 #include "ANSGpuFrameRegistry.h"    // gpu_frame_lookup(cv::Mat*)
 #include "engine/TRTEngineCache.h"   // clearAll() on DLL_PROCESS_DETACH
 #include "engine/EnginePoolManager.h" // clearAll() on DLL_PROCESS_DETACH
+#include <climits>                    // INT_MIN

 // Process-wide flag: when true, all engines force single-GPU path (no pool, no idle timers).
 // Defined here, declared extern in EngineBuildLoadNetwork.inl.
@@ -96,6 +97,37 @@ static int GetNumGPUs() {
 	return g_numGPUs;
 }

+// Determine maxSlotsPerGpu based on GPU topology:
+//   1 GPU            → 1  (single slot, no round-robin needed)
+//   >1 GPU, VRAM<24GB → 1  (round-robin: 1 slot per GPU)
+//   >1 GPU, VRAM≥24GB → -1 (elastic: on-demand slot growth)
+// Result is cached after the first query.
+static int GetPoolMaxSlotsPerGpu() {
+	static int s_result = INT_MIN;
+	static std::mutex s_mutex;
+	std::lock_guard<std::mutex> lk(s_mutex);
+	if (s_result != INT_MIN) return s_result;
+	const int n = GetNumGPUs();
+	if (n <= 1) {
+		s_result = 1;
+		std::cout << "Info [GPU]: Single GPU — pool mode: 1 slot, no round-robin" << std::endl;
+		return s_result;
+	}
+	// Multiple GPUs — check VRAM (GPUs are assumed same spec)
+	constexpr size_t kLargeVramBytes = 24ULL * 1024 * 1024 * 1024;  // 24 GB
+	size_t totalMem = 0, freeMem = 0;
+	cudaSetDevice(0);
+	cudaMemGetInfo(&freeMem, &totalMem);
+	if (totalMem >= kLargeVramBytes) {
+		s_result = -1;
+		std::cout << "Info [GPU]: " << n << " GPUs, VRAM >= 24 GB — pool mode: elastic" << std::endl;
+	} else {
+		s_result = 1;
+		std::cout << "Info [GPU]: " << n << " GPUs, VRAM < 24 GB — pool mode: round-robin" << std::endl;
+	}
+	return s_result;
+}
+
 // Returns the next GPU index in round-robin order.
 // Thread-safe: uses atomic fetch_add.
 static int AssignNextGPU() {
@@ -588,6 +620,7 @@ extern "C" ANSODENGINE_API std::string  CreateANSODHandle(ANSCENTER::ANSODBase**
 		CheckGPUVRAM(assignedGPU);

 		RegisterODHandle(*Handle);
+		(*Handle)->SetMaxSlotsPerGpu(GetPoolMaxSlotsPerGpu());
 		(*Handle)->SetLoadEngineOnCreation(_loadEngineOnCreation); //Set force to load the engine immediately
 		bool loadResult = (*Handle)->Initialize(licenseKey, modelConfig, modelFilePath, modelFileZipPassword, labelMap);
 		return labelMap;
@@ -894,6 +927,7 @@ extern "C" __declspec(dllexport) int LoadModelFromFolder(ANSCENTER::ANSODBase**
 			CheckGPUVRAM(assignedGPU);

 			RegisterODHandle(*Handle);
+			(*Handle)->SetMaxSlotsPerGpu(GetPoolMaxSlotsPerGpu());
 			(*Handle)->SetLoadEngineOnCreation(_loadEngineOnCreation); //Set force to load the engine immediately
 			bool result = (*Handle)->LoadModelFromFolder(licenseKey, modelConfig, modelName, className, modelFolder, labelMap);
 			if (result) return 1;