Fix setting GPU behaviour:

Condition	maxSlotsPerGpu	Behavior
OptimizeModelStr	0	Bypass: non-shared temporary engine
1 GPU	1	Single slot, no round-robin
>1 GPU, VRAM < 24 GB	1	Round-robin: 1 slot per GPU
>1 GPU, VRAM >= 24 GB	-1	Elastic: on-demand slot growth
This commit is contained in:
2026-03-30 09:59:09 +11:00
parent 01eabf76bd
commit c1b919ec47
9 changed files with 123 additions and 6 deletions

View File

@@ -120,7 +120,7 @@ namespace ANSCENTER {
std::shared_ptr<Engine<float>> m_trtEngine = nullptr; // NVIDIA TensorRT
EnginePoolManager<float>::PoolKey m_poolKey;
bool m_usingSharedPool = false;
int m_maxSlotsPerGpu{ -1 }; // -1 = elastic mode (on-demand slots, auto-cleanup)
int m_maxSlotsPerGpu{ 1 }; // 1 = single slot (default); set by dllmain based on GPU topology
void SetMaxSlotsPerGpu(int n) override { m_maxSlotsPerGpu = n; }
std::shared_ptr<faiss::IndexIDMap> faiss_index;
std::shared_ptr<faiss::gpu::StandardGpuResources> m_gpuResources;