Fix setting GPU behaviour:
Condition maxSlotsPerGpu Behavior OptimizeModelStr 0 Bypass: non-shared temporary engine 1 GPU 1 Single slot, no round-robin >1 GPU, VRAM < 24 GB 1 Round-robin: 1 slot per GPU >1 GPU, VRAM >= 24 GB -1 Elastic: on-demand slot growth
This commit is contained in:
@@ -9,7 +9,10 @@
|
||||
#include "FaceNet.h"
|
||||
#include "ANSFaceRecognizer.h"
|
||||
#include "ANSLibsLoader.h"
|
||||
#include "engine/TRTEngineCache.h"
|
||||
#include "engine/EnginePoolManager.h"
|
||||
#include <memory>
|
||||
#include <climits>
|
||||
#include <unordered_map>
|
||||
#include <condition_variable>
|
||||
#include <cstdint>
|
||||
@@ -93,6 +96,36 @@ public:
|
||||
FRHandleGuard& operator=(const FRHandleGuard&) = delete;
|
||||
};
|
||||
|
||||
// Determine maxSlotsPerGpu based on GPU topology:
|
||||
// 1 GPU → 1 (single slot, no round-robin needed)
|
||||
// >1 GPU, VRAM<24GB → 1 (round-robin: 1 slot per GPU)
|
||||
// >1 GPU, VRAM≥24GB → -1 (elastic: on-demand slot growth)
|
||||
static int GetPoolMaxSlotsPerGpu() {
|
||||
static int s_result = INT_MIN;
|
||||
static std::mutex s_mutex;
|
||||
std::lock_guard<std::mutex> lk(s_mutex);
|
||||
if (s_result != INT_MIN) return s_result;
|
||||
int gpuCount = 0;
|
||||
cudaGetDeviceCount(&gpuCount);
|
||||
if (gpuCount <= 1) {
|
||||
s_result = 1;
|
||||
std::cout << "Info [FR GPU]: Single GPU — pool mode: 1 slot, no round-robin" << std::endl;
|
||||
return s_result;
|
||||
}
|
||||
constexpr size_t kLargeVramBytes = 24ULL * 1024 * 1024 * 1024; // 24 GB
|
||||
size_t totalMem = 0, freeMem = 0;
|
||||
cudaSetDevice(0);
|
||||
cudaMemGetInfo(&freeMem, &totalMem);
|
||||
if (totalMem >= kLargeVramBytes) {
|
||||
s_result = -1;
|
||||
std::cout << "Info [FR GPU]: " << gpuCount << " GPUs, VRAM >= 24 GB — pool mode: elastic" << std::endl;
|
||||
} else {
|
||||
s_result = 1;
|
||||
std::cout << "Info [FR GPU]: " << gpuCount << " GPUs, VRAM < 24 GB — pool mode: round-robin" << std::endl;
|
||||
}
|
||||
return s_result;
|
||||
}
|
||||
|
||||
BOOL APIENTRY DllMain( HMODULE hModule,
|
||||
DWORD ul_reason_for_call,
|
||||
LPVOID lpReserved
|
||||
@@ -117,8 +150,14 @@ BOOL APIENTRY DllMain( HMODULE hModule,
|
||||
case DLL_THREAD_DETACH:
|
||||
break;
|
||||
case DLL_PROCESS_DETACH:
|
||||
// Clean up any handles that LabVIEW didn't release before closing.
|
||||
// Without this, idle-timer threads keep the process alive indefinitely.
|
||||
// ExitProcess: OS killed worker threads, CUDA context is dead.
|
||||
// Set flag so Engine/Pool destructors skip CUDA cleanup.
|
||||
if (lpReserved != nullptr) {
|
||||
g_processExiting().store(true, std::memory_order_relaxed);
|
||||
break;
|
||||
}
|
||||
|
||||
// Dynamic FreeLibrary — threads are still alive, safe to clean up.
|
||||
try {
|
||||
std::vector<ANSCENTER::ANSFacialRecognition*> leakedHandles;
|
||||
{
|
||||
@@ -130,6 +169,8 @@ BOOL APIENTRY DllMain( HMODULE hModule,
|
||||
for (auto* h : leakedHandles) {
|
||||
try { h->Destroy(); delete h; } catch (...) {}
|
||||
}
|
||||
try { EnginePoolManager<float>::instance().clearAll(); } catch (...) {}
|
||||
try { TRTEngineCache::instance().clearAll(); } catch (...) {}
|
||||
} catch (...) {}
|
||||
break;
|
||||
}
|
||||
@@ -185,6 +226,8 @@ extern "C" ANSFR_API int CreateANSRFHandle(ANSCENTER::ANSFacialRecognition**
|
||||
const bool _enableFaceLiveness = (enableFaceLiveness == 1);
|
||||
const bool _enableAntiSpoofing = (enableAntiSpoofing == 1);
|
||||
|
||||
ptr->SetMaxSlotsPerGpu(GetPoolMaxSlotsPerGpu());
|
||||
|
||||
int result = ptr->Initialize(licenseKey,
|
||||
configFilePath,
|
||||
databaseFilePath,
|
||||
|
||||
Reference in New Issue
Block a user