Fix setting GPU behaviour:
Condition maxSlotsPerGpu Behavior OptimizeModelStr 0 Bypass: non-shared temporary engine 1 GPU 1 Single slot, no round-robin >1 GPU, VRAM < 24 GB 1 Round-robin: 1 slot per GPU >1 GPU, VRAM >= 24 GB -1 Elastic: on-demand slot growth
This commit is contained in:
@@ -120,7 +120,7 @@ namespace ANSCENTER {
|
||||
std::shared_ptr<Engine<float>> m_trtEngine = nullptr; // NVIDIA TensorRT
|
||||
EnginePoolManager<float>::PoolKey m_poolKey;
|
||||
bool m_usingSharedPool = false;
|
||||
int m_maxSlotsPerGpu{ -1 }; // -1 = elastic mode (on-demand slots, auto-cleanup)
|
||||
int m_maxSlotsPerGpu{ 1 }; // 1 = single slot (default); set by dllmain based on GPU topology
|
||||
void SetMaxSlotsPerGpu(int n) override { m_maxSlotsPerGpu = n; }
|
||||
std::shared_ptr<faiss::IndexIDMap> faiss_index;
|
||||
std::shared_ptr<faiss::gpu::StandardGpuResources> m_gpuResources;
|
||||
|
||||
Reference in New Issue
Block a user