676 lines
38 KiB
C++
676 lines
38 KiB
C++
|
|
// ============================================================================
|
|||
|
|
// NvDynLoader.cpp
|
|||
|
|
//
|
|||
|
|
// Two responsibilities:
|
|||
|
|
//
|
|||
|
|
// 1. Provide extern "C" stub DEFINITIONS for every symbol that TRT / ONNX
|
|||
|
|
// parser inline wrappers reference. This satisfies the linker without
|
|||
|
|
// linking nvinfer_XX.lib / nvonnxparser_XX.lib. Each stub forwards the
|
|||
|
|
// call through a function pointer that Initialize() populates at runtime.
|
|||
|
|
//
|
|||
|
|
// 2. Implement NvDynLoader::Initialize() which searches for installed NVIDIA
|
|||
|
|
// DLLs (trying multiple major versions, newest first) and binds all
|
|||
|
|
// required function pointers.
|
|||
|
|
// ============================================================================
|
|||
|
|
|
|||
|
|
#include "include/engine/NvDynLoader.h"
|
|||
|
|
#include <iostream>
|
|||
|
|
#include <cassert>
|
|||
|
|
#include <mutex>
|
|||
|
|
|
|||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
// Static member definitions
|
|||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
bool NvDynLoader::s_initialized = false;
|
|||
|
|
int NvDynLoader::s_trtMajor = 0;
|
|||
|
|
std::string NvDynLoader::s_trtPath;
|
|||
|
|
std::string NvDynLoader::s_onnxPath;
|
|||
|
|
std::string NvDynLoader::s_cudaPath;
|
|||
|
|
NvLibHandle NvDynLoader::s_hTrt = nullptr;
|
|||
|
|
NvLibHandle NvDynLoader::s_hOnnx = nullptr;
|
|||
|
|
NvLibHandle NvDynLoader::s_hCuda = nullptr;
|
|||
|
|
NvLibHandle NvDynLoader::s_hCudnn = nullptr;
|
|||
|
|
std::string NvDynLoader::s_cudnnPath;
|
|||
|
|
|
|||
|
|
NvDynLoader::PfnBuilder* NvDynLoader::pfn_createInferBuilder_INTERNAL = nullptr;
|
|||
|
|
NvDynLoader::PfnRuntime* NvDynLoader::pfn_createInferRuntime_INTERNAL = nullptr;
|
|||
|
|
NvDynLoader::PfnRefitter* NvDynLoader::pfn_createInferRefitter_INTERNAL = nullptr;
|
|||
|
|
NvDynLoader::PfnParser* NvDynLoader::pfn_createNvOnnxParser_INTERNAL = nullptr;
|
|||
|
|
NvDynLoader::PfnParserRefitter* NvDynLoader::pfn_createNvOnnxParserRefitter_INTERNAL = nullptr;
|
|||
|
|
NvDynLoader::PfnGetParserVersion* NvDynLoader::pfn_getNvOnnxParserVersion = nullptr;
|
|||
|
|
|
|||
|
|
#ifdef NV_DYNAMIC_CUDA
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaGetDeviceCount) (int*) = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaSetDevice) (int) = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaGetDeviceProperties) (cudaDeviceProp*, int) = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaDeviceSetLimit) (cudaLimit, size_t) = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaDeviceSynchronize) () = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaDeviceGetStreamPriorityRange)(int*, int*) = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaMalloc) (void**, size_t) = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaFree) (void*) = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaMemset) (void*, int, size_t) = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaMemGetInfo) (size_t*, size_t*) = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaMemcpy) (void*, const void*, size_t, cudaMemcpyKind) = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaMemcpyAsync) (void*, const void*, size_t, cudaMemcpyKind, cudaStream_t) = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaStreamCreate) (cudaStream_t*) = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaStreamCreateWithPriority) (cudaStream_t*, unsigned int, int) = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaStreamDestroy) (cudaStream_t) = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaStreamSynchronize) (cudaStream_t) = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaStreamWaitEvent) (cudaStream_t, cudaEvent_t, unsigned int) = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaEventCreate) (cudaEvent_t*) = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaEventCreateWithFlags) (cudaEvent_t*, unsigned int) = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaEventRecord) (cudaEvent_t, cudaStream_t) = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaEventDestroy) (cudaEvent_t) = nullptr;
|
|||
|
|
const char* (*NvDynLoader::pfn_cudaGetErrorString) (cudaError_t) = nullptr;
|
|||
|
|
cudaError_t (*NvDynLoader::pfn_cudaGetLastError) () = nullptr;
|
|||
|
|
#endif // NV_DYNAMIC_CUDA
|
|||
|
|
|
|||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
// extern "C" stubs
|
|||
|
|
//
|
|||
|
|
// These provide the linker symbols that TRT / ONNX-parser inline wrappers
|
|||
|
|
// reference. With TENSORRTAPI / NVONNXPARSER_API overridden to empty (done
|
|||
|
|
// at the top of engine.h, before TRT headers are first included), the inline
|
|||
|
|
// wrappers compile to direct calls — not indirect import-table calls — so the
|
|||
|
|
// linker resolves them to these definitions rather than to a .lib file.
|
|||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
extern "C" {
|
|||
|
|
|
|||
|
|
void* createInferBuilder_INTERNAL(void* logger, int32_t version) noexcept
|
|||
|
|
{
|
|||
|
|
assert(NvDynLoader::pfn_createInferBuilder_INTERNAL &&
|
|||
|
|
"NvDynLoader::Initialize() has not been called before the first TRT use.");
|
|||
|
|
return NvDynLoader::pfn_createInferBuilder_INTERNAL(logger, version);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
void* createInferRuntime_INTERNAL(void* logger, int32_t version) noexcept
|
|||
|
|
{
|
|||
|
|
assert(NvDynLoader::pfn_createInferRuntime_INTERNAL &&
|
|||
|
|
"NvDynLoader::Initialize() has not been called before the first TRT use.");
|
|||
|
|
return NvDynLoader::pfn_createInferRuntime_INTERNAL(logger, version);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// createInferRefitter_INTERNAL — only needed if engine refitting is used;
|
|||
|
|
// guarded so a missing symbol is a no-op rather than an assert failure.
|
|||
|
|
void* createInferRefitter_INTERNAL(void* engine, void* logger, int32_t version) noexcept
|
|||
|
|
{
|
|||
|
|
if (!NvDynLoader::pfn_createInferRefitter_INTERNAL) return nullptr;
|
|||
|
|
return NvDynLoader::pfn_createInferRefitter_INTERNAL(engine, logger, version);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
void* createNvOnnxParser_INTERNAL(void* network, void* logger, int32_t version) noexcept
|
|||
|
|
{
|
|||
|
|
assert(NvDynLoader::pfn_createNvOnnxParser_INTERNAL &&
|
|||
|
|
"NvDynLoader::Initialize() has not been called before the first ONNX-parser use.");
|
|||
|
|
return NvDynLoader::pfn_createNvOnnxParser_INTERNAL(network, logger, version);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// createNvOnnxParserRefitter_INTERNAL — TRT 10+ symbol; absent on TRT 8/9.
|
|||
|
|
// Guarded rather than asserted so older TRT versions continue to work.
|
|||
|
|
void* createNvOnnxParserRefitter_INTERNAL(void* refitter, void* logger, int32_t version) noexcept
|
|||
|
|
{
|
|||
|
|
if (!NvDynLoader::pfn_createNvOnnxParserRefitter_INTERNAL) return nullptr;
|
|||
|
|
return NvDynLoader::pfn_createNvOnnxParserRefitter_INTERNAL(refitter, logger, version);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// getNvOnnxParserVersion — optional version query; returns 0 if not available.
|
|||
|
|
int getNvOnnxParserVersion() noexcept
|
|||
|
|
{
|
|||
|
|
if (!NvDynLoader::pfn_getNvOnnxParserVersion) return 0;
|
|||
|
|
return NvDynLoader::pfn_getNvOnnxParserVersion();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
} // extern "C"
|
|||
|
|
|
|||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
// CUDA stubs (only when NV_DYNAMIC_CUDA is defined)
|
|||
|
|
// Removes the cudart_static.lib dependency; CUDA RT is loaded dynamically.
|
|||
|
|
//
|
|||
|
|
// WHY extern "C":
|
|||
|
|
// cuda_runtime.h wraps every public API in `extern "C"` when compiled as C++,
|
|||
|
|
// giving each function C linkage (no name mangling). Our stub DEFINITIONS
|
|||
|
|
// must use the same linkage — otherwise the linker sees a C-linkage declaration
|
|||
|
|
// (from the header) paired with a C++-mangled definition (from our .cpp), which
|
|||
|
|
// is an unresolved-symbol error on both MSVC and GCC/Clang.
|
|||
|
|
//
|
|||
|
|
// WHY cudaDeviceSynchronize / cudaGetLastError are NOT in the macro list:
|
|||
|
|
// CUDA_STUB_N macros require at least one parameter. These two functions take
|
|||
|
|
// zero arguments; they are defined manually in the block below. Do NOT add
|
|||
|
|
// them back to the macro invocation list — it would create duplicate definitions.
|
|||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
#ifdef NV_DYNAMIC_CUDA
|
|||
|
|
|
|||
|
|
extern "C" { // ← must match the extern "C" in cuda_runtime.h
|
|||
|
|
|
|||
|
|
#define CUDA_STUB_1(ret, name, t0, a0) \
|
|||
|
|
ret name(t0 a0) { \
|
|||
|
|
assert(NvDynLoader::pfn_##name); return NvDynLoader::pfn_##name(a0); }
|
|||
|
|
|
|||
|
|
#define CUDA_STUB_2(ret, name, t0,a0, t1,a1) \
|
|||
|
|
ret name(t0 a0, t1 a1) { \
|
|||
|
|
assert(NvDynLoader::pfn_##name); return NvDynLoader::pfn_##name(a0,a1); }
|
|||
|
|
|
|||
|
|
#define CUDA_STUB_3(ret, name, t0,a0, t1,a1, t2,a2) \
|
|||
|
|
ret name(t0 a0, t1 a1, t2 a2) { \
|
|||
|
|
assert(NvDynLoader::pfn_##name); return NvDynLoader::pfn_##name(a0,a1,a2); }
|
|||
|
|
|
|||
|
|
#define CUDA_STUB_4(ret, name, t0,a0, t1,a1, t2,a2, t3,a3) \
|
|||
|
|
ret name(t0 a0, t1 a1, t2 a2, t3 a3) { \
|
|||
|
|
assert(NvDynLoader::pfn_##name); return NvDynLoader::pfn_##name(a0,a1,a2,a3); }
|
|||
|
|
|
|||
|
|
#define CUDA_STUB_5(ret, name, t0,a0, t1,a1, t2,a2, t3,a3, t4,a4) \
|
|||
|
|
ret name(t0 a0, t1 a1, t2 a2, t3 a3, t4 a4) { \
|
|||
|
|
assert(NvDynLoader::pfn_##name); return NvDynLoader::pfn_##name(a0,a1,a2,a3,a4); }
|
|||
|
|
|
|||
|
|
CUDA_STUB_1(cudaError_t, cudaGetDeviceCount, int*, count)
|
|||
|
|
CUDA_STUB_1(cudaError_t, cudaSetDevice, int, dev)
|
|||
|
|
CUDA_STUB_2(cudaError_t, cudaGetDeviceProperties, cudaDeviceProp*, prop, int, dev)
|
|||
|
|
CUDA_STUB_2(cudaError_t, cudaDeviceSetLimit, cudaLimit, limit, size_t, value)
|
|||
|
|
CUDA_STUB_2(cudaError_t, cudaDeviceGetStreamPriorityRange, int*, leastPriority, int*, greatestPriority)
|
|||
|
|
CUDA_STUB_2(cudaError_t, cudaMalloc, void**, devPtr, size_t, size)
|
|||
|
|
CUDA_STUB_1(cudaError_t, cudaFree, void*, devPtr)
|
|||
|
|
CUDA_STUB_3(cudaError_t, cudaMemset, void*, devPtr, int, value, size_t, count)
|
|||
|
|
CUDA_STUB_2(cudaError_t, cudaMemGetInfo, size_t*, free, size_t*, total)
|
|||
|
|
CUDA_STUB_4(cudaError_t, cudaMemcpy, void*, dst, const void*, src, size_t, count, cudaMemcpyKind, kind)
|
|||
|
|
CUDA_STUB_5(cudaError_t, cudaMemcpyAsync, void*, dst, const void*, src, size_t, count, cudaMemcpyKind, kind, cudaStream_t, stream)
|
|||
|
|
CUDA_STUB_1(cudaError_t, cudaStreamCreate, cudaStream_t*, stream)
|
|||
|
|
CUDA_STUB_3(cudaError_t, cudaStreamCreateWithPriority, cudaStream_t*, stream, unsigned int, flags, int, priority)
|
|||
|
|
CUDA_STUB_1(cudaError_t, cudaStreamDestroy, cudaStream_t, stream)
|
|||
|
|
CUDA_STUB_1(cudaError_t, cudaStreamSynchronize, cudaStream_t, stream)
|
|||
|
|
CUDA_STUB_3(cudaError_t, cudaStreamWaitEvent, cudaStream_t, stream, cudaEvent_t, event, unsigned int, flags)
|
|||
|
|
CUDA_STUB_1(cudaError_t, cudaEventCreate, cudaEvent_t*, event)
|
|||
|
|
CUDA_STUB_2(cudaError_t, cudaEventCreateWithFlags, cudaEvent_t*, event, unsigned int, flags)
|
|||
|
|
CUDA_STUB_2(cudaError_t, cudaEventRecord, cudaEvent_t, event, cudaStream_t, stream)
|
|||
|
|
CUDA_STUB_1(cudaError_t, cudaEventDestroy, cudaEvent_t, event)
|
|||
|
|
CUDA_STUB_1(const char*, cudaGetErrorString, cudaError_t, error)
|
|||
|
|
|
|||
|
|
// 0-argument stubs — defined manually; NOT in the macro list above:
|
|||
|
|
cudaError_t cudaDeviceSynchronize() {
|
|||
|
|
assert(NvDynLoader::pfn_cudaDeviceSynchronize);
|
|||
|
|
return NvDynLoader::pfn_cudaDeviceSynchronize();
|
|||
|
|
}
|
|||
|
|
cudaError_t cudaGetLastError() {
|
|||
|
|
assert(NvDynLoader::pfn_cudaGetLastError);
|
|||
|
|
return NvDynLoader::pfn_cudaGetLastError();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
#undef CUDA_STUB_1
|
|||
|
|
#undef CUDA_STUB_2
|
|||
|
|
#undef CUDA_STUB_3
|
|||
|
|
#undef CUDA_STUB_4
|
|||
|
|
#undef CUDA_STUB_5
|
|||
|
|
|
|||
|
|
} // extern "C"
|
|||
|
|
#endif // NV_DYNAMIC_CUDA
|
|||
|
|
|
|||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
// Template helper — type-safe GetProcAddress
|
|||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
template<typename Fn>
|
|||
|
|
bool NvDynLoader::Bind(NvLibHandle h, const char* sym, Fn*& pfn)
|
|||
|
|
{
|
|||
|
|
pfn = reinterpret_cast<Fn*>(NV_GET_PROC(h, sym));
|
|||
|
|
return pfn != nullptr;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
// ANSCENTER shared directory — always checked first, ahead of system PATH.
|
|||
|
|
//
|
|||
|
|
// Windows : C:\ProgramData\ANSCENTER\Shared\
|
|||
|
|
// Linux : /usr/share/ANSCENTER/Shared/
|
|||
|
|
//
|
|||
|
|
// Drop any versioned NVIDIA DLL into this directory and it will be discovered
|
|||
|
|
// automatically on the next application launch — no project rebuild required.
|
|||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
#ifdef _WIN32
|
|||
|
|
static const std::string kAnsDir = "C:\\ProgramData\\ANSCENTER\\Shared\\";
|
|||
|
|
#else
|
|||
|
|
static const std::string kAnsDir = "/usr/share/ANSCENTER/Shared/";
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
// DLL candidate lists
|
|||
|
|
// Priority 1 — ANSCENTER shared directory (full absolute path, newest first)
|
|||
|
|
// Priority 2 — System PATH / LD_LIBRARY_PATH fallback (bare name, newest first)
|
|||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
std::vector<std::string> NvDynLoader::TrtCandidates()
|
|||
|
|
{
|
|||
|
|
std::vector<std::string> v;
|
|||
|
|
// ── Priority 1: ANSCENTER shared directory ────────────────────────────────
|
|||
|
|
for (int major = 20; major >= 8; --major) {
|
|||
|
|
#ifdef _WIN32
|
|||
|
|
v.push_back(kAnsDir + "nvinfer_" + std::to_string(major) + ".dll");
|
|||
|
|
#else
|
|||
|
|
v.push_back(kAnsDir + "libnvinfer.so." + std::to_string(major));
|
|||
|
|
#endif
|
|||
|
|
}
|
|||
|
|
// ── Priority 2: System PATH / LD_LIBRARY_PATH ─────────────────────────────
|
|||
|
|
for (int major = 20; major >= 8; --major) {
|
|||
|
|
#ifdef _WIN32
|
|||
|
|
v.push_back("nvinfer_" + std::to_string(major) + ".dll");
|
|||
|
|
#else
|
|||
|
|
v.push_back("libnvinfer.so." + std::to_string(major));
|
|||
|
|
#endif
|
|||
|
|
}
|
|||
|
|
return v;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
std::vector<std::string> NvDynLoader::OnnxCandidates()
|
|||
|
|
{
|
|||
|
|
std::vector<std::string> v;
|
|||
|
|
// ── Priority 1: ANSCENTER shared directory ────────────────────────────────
|
|||
|
|
for (int major = 20; major >= 8; --major) {
|
|||
|
|
#ifdef _WIN32
|
|||
|
|
v.push_back(kAnsDir + "nvonnxparser_" + std::to_string(major) + ".dll");
|
|||
|
|
#else
|
|||
|
|
v.push_back(kAnsDir + "libnvonnxparser.so." + std::to_string(major));
|
|||
|
|
#endif
|
|||
|
|
}
|
|||
|
|
// ── Priority 2: System PATH / LD_LIBRARY_PATH ─────────────────────────────
|
|||
|
|
for (int major = 20; major >= 8; --major) {
|
|||
|
|
#ifdef _WIN32
|
|||
|
|
v.push_back("nvonnxparser_" + std::to_string(major) + ".dll");
|
|||
|
|
#else
|
|||
|
|
v.push_back("libnvonnxparser.so." + std::to_string(major));
|
|||
|
|
#endif
|
|||
|
|
}
|
|||
|
|
return v;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
std::vector<std::string> NvDynLoader::CudnnCandidates()
|
|||
|
|
{
|
|||
|
|
std::vector<std::string> v;
|
|||
|
|
// ── cuDNN DLL/SO naming conventions ───────────────────────────────────────
|
|||
|
|
// Windows: cudnn64_<major>.dll (e.g. cudnn64_8.dll, cudnn64_9.dll)
|
|||
|
|
// Linux: libcudnn.so.<major> (e.g. libcudnn.so.8, libcudnn.so.9)
|
|||
|
|
//
|
|||
|
|
// Note: cuDNN 9 on Windows ships as multiple split DLLs
|
|||
|
|
// (cudnn_ops.dll, cudnn_cnn.dll, etc.) but cudnn64_9.dll remains the
|
|||
|
|
// main entry-point / compatibility shim. The split DLLs are resolved
|
|||
|
|
// automatically once the ANSCENTER directory is added to the search path
|
|||
|
|
// via LOAD_WITH_ALTERED_SEARCH_PATH (Windows) or RTLD_GLOBAL (Linux).
|
|||
|
|
//
|
|||
|
|
// ── Priority 1: ANSCENTER shared directory ────────────────────────────────
|
|||
|
|
for (int major = 12; major >= 7; --major) {
|
|||
|
|
#ifdef _WIN32
|
|||
|
|
v.push_back(kAnsDir + "cudnn64_" + std::to_string(major) + ".dll");
|
|||
|
|
#else
|
|||
|
|
v.push_back(kAnsDir + "libcudnn.so." + std::to_string(major));
|
|||
|
|
#endif
|
|||
|
|
}
|
|||
|
|
// ── Priority 2: System PATH / LD_LIBRARY_PATH ─────────────────────────────
|
|||
|
|
for (int major = 12; major >= 7; --major) {
|
|||
|
|
#ifdef _WIN32
|
|||
|
|
v.push_back("cudnn64_" + std::to_string(major) + ".dll");
|
|||
|
|
#else
|
|||
|
|
v.push_back("libcudnn.so." + std::to_string(major));
|
|||
|
|
#endif
|
|||
|
|
}
|
|||
|
|
return v;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
#ifdef NV_DYNAMIC_CUDA
|
|||
|
|
std::vector<std::string> NvDynLoader::CudaRtCandidates()
|
|||
|
|
{
|
|||
|
|
std::vector<std::string> v;
|
|||
|
|
// ── CUDA runtime DLL/SO naming conventions ────────────────────────────────
|
|||
|
|
//
|
|||
|
|
// Windows — NVIDIA changed naming at CUDA 12:
|
|||
|
|
// CUDA 11.x : cudart64_110.dll … cudart64_118.dll (major + minor digit)
|
|||
|
|
// CUDA 12+ : cudart64_12.dll, cudart64_13.dll … (major-only)
|
|||
|
|
//
|
|||
|
|
// Linux — major-only symlink is the stable name on all versions:
|
|||
|
|
// libcudart.so.11, libcudart.so.12, libcudart.so.13 …
|
|||
|
|
// (major.minor symlinks also tried as fallback for CUDA 11.x)
|
|||
|
|
//
|
|||
|
|
// ── Priority 1: ANSCENTER shared directory ────────────────────────────────
|
|||
|
|
#ifdef _WIN32
|
|||
|
|
for (int major = 15; major >= 12; --major) // CUDA 12+ : major-only
|
|||
|
|
v.push_back(kAnsDir + "cudart64_" + std::to_string(major) + ".dll");
|
|||
|
|
for (int minor = 9; minor >= 0; --minor) // CUDA 11.x: major+minor
|
|||
|
|
v.push_back(kAnsDir + "cudart64_11" + std::to_string(minor) + ".dll");
|
|||
|
|
v.push_back(kAnsDir + "cudart64.dll"); // non-versioned alias
|
|||
|
|
#else
|
|||
|
|
for (int major = 15; major >= 11; --major) // all versions: major-only
|
|||
|
|
v.push_back(kAnsDir + "libcudart.so." + std::to_string(major));
|
|||
|
|
for (int minor = 9; minor >= 0; --minor) // CUDA 11.x: major.minor fallback
|
|||
|
|
v.push_back(kAnsDir + "libcudart.so.11." + std::to_string(minor));
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
// ── Priority 2: System PATH / LD_LIBRARY_PATH ─────────────────────────────
|
|||
|
|
#ifdef _WIN32
|
|||
|
|
for (int major = 15; major >= 12; --major)
|
|||
|
|
v.push_back("cudart64_" + std::to_string(major) + ".dll");
|
|||
|
|
for (int minor = 9; minor >= 0; --minor)
|
|||
|
|
v.push_back("cudart64_11" + std::to_string(minor) + ".dll");
|
|||
|
|
v.push_back("cudart64.dll");
|
|||
|
|
#else
|
|||
|
|
for (int major = 15; major >= 11; --major)
|
|||
|
|
v.push_back("libcudart.so." + std::to_string(major));
|
|||
|
|
for (int minor = 9; minor >= 0; --minor)
|
|||
|
|
v.push_back("libcudart.so.11." + std::to_string(minor));
|
|||
|
|
#endif
|
|||
|
|
return v;
|
|||
|
|
}
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
// FindAndLoad — iterate candidates, return first successful handle
|
|||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
NvLibHandle NvDynLoader::FindAndLoad(const std::vector<std::string>& candidates,
|
|||
|
|
std::string& loadedPath, bool verbose,
|
|||
|
|
bool globalLoad)
|
|||
|
|
{
|
|||
|
|
for (const auto& name : candidates) {
|
|||
|
|
#ifdef _WIN32
|
|||
|
|
// LoadLibraryExA with LOAD_WITH_ALTERED_SEARCH_PATH:
|
|||
|
|
// • For absolute paths (ANSCENTER dir): searches the DLL's own directory
|
|||
|
|
// for its transitive dependencies (cuDNN, cudart, etc.).
|
|||
|
|
// • For bare names (PATH fallback): flag is ignored, standard search applies.
|
|||
|
|
// globalLoad is irrelevant on Windows — LoadLibraryExA is always process-global.
|
|||
|
|
(void)globalLoad;
|
|||
|
|
NvLibHandle h = NV_LOAD_LIB(name.c_str());
|
|||
|
|
#else
|
|||
|
|
// RTLD_GLOBAL: exports the loaded library's symbols into the process-wide
|
|||
|
|
// symbol table so subsequently loaded libraries (e.g. TRT needs cuDNN
|
|||
|
|
// symbols) can resolve them.
|
|||
|
|
// RTLD_LOCAL: keeps symbols private — used for TRT / ONNX where we call
|
|||
|
|
// through function pointers and don't want symbol collisions.
|
|||
|
|
NvLibHandle h = globalLoad
|
|||
|
|
? ::dlopen(name.c_str(), RTLD_LAZY | RTLD_GLOBAL)
|
|||
|
|
: ::dlopen(name.c_str(), RTLD_LAZY | RTLD_LOCAL);
|
|||
|
|
#endif
|
|||
|
|
if (h) {
|
|||
|
|
loadedPath = name;
|
|||
|
|
if (verbose)
|
|||
|
|
std::cout << "NvDynLoader: loaded " << name << std::endl;
|
|||
|
|
return h;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return nullptr;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
// Initialize
|
|||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
static std::mutex g_initMutex;
|
|||
|
|
|
|||
|
|
bool NvDynLoader::Initialize(bool verbose)
|
|||
|
|
{
|
|||
|
|
std::lock_guard<std::mutex> lock(g_initMutex);
|
|||
|
|
if (s_initialized) return true;
|
|||
|
|
|
|||
|
|
// If a previous Initialize() call partially loaded some libraries before
|
|||
|
|
// failing (s_initialized stays false but handles may be non-null), release
|
|||
|
|
// them now so we can retry cleanly without leaking handles.
|
|||
|
|
// The if(h) guards make this a safe no-op on the very first call.
|
|||
|
|
if (s_hOnnx) { NV_FREE_LIB(s_hOnnx); s_hOnnx = nullptr; }
|
|||
|
|
if (s_hTrt) { NV_FREE_LIB(s_hTrt); s_hTrt = nullptr; }
|
|||
|
|
if (s_hCudnn) { NV_FREE_LIB(s_hCudnn); s_hCudnn = nullptr; }
|
|||
|
|
#ifdef NV_DYNAMIC_CUDA
|
|||
|
|
if (s_hCuda) { NV_FREE_LIB(s_hCuda); s_hCuda = nullptr; }
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
bool ok = true;
|
|||
|
|
|
|||
|
|
if (verbose) {
|
|||
|
|
std::cout << "==================================================" << std::endl;
|
|||
|
|
std::cout << "NvDynLoader: discovering NVIDIA libraries..." << std::endl;
|
|||
|
|
std::cout << " Shared dir : " << kAnsDir << std::endl;
|
|||
|
|
std::cout << "==================================================" << std::endl;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ── STEP 1: CUDA Runtime (NV_DYNAMIC_CUDA only) ───────────────────────────
|
|||
|
|
// Loaded first so cuDNN and TRT find CUDA symbols already resident in the
|
|||
|
|
// process image when they are opened.
|
|||
|
|
// Linux: globalLoad=true (RTLD_GLOBAL) makes cudart symbols process-wide.
|
|||
|
|
// Windows: LoadLibraryExA is always process-global; globalLoad is a no-op.
|
|||
|
|
#ifdef NV_DYNAMIC_CUDA
|
|||
|
|
s_hCuda = FindAndLoad(CudaRtCandidates(), s_cudaPath, verbose, /*globalLoad=*/true);
|
|||
|
|
if (!s_hCuda) {
|
|||
|
|
std::cout << "NvDynLoader ERROR: No CUDA runtime DLL found. "
|
|||
|
|
"Searched " << kAnsDir << " and system PATH." << std::endl;
|
|||
|
|
ok = false;
|
|||
|
|
} else {
|
|||
|
|
bool b = true;
|
|||
|
|
b &= Bind(s_hCuda, "cudaGetDeviceCount", pfn_cudaGetDeviceCount);
|
|||
|
|
b &= Bind(s_hCuda, "cudaSetDevice", pfn_cudaSetDevice);
|
|||
|
|
// cudaGetDeviceProperties: CUDA 12 renamed the export to '_v2'; CUDA 11
|
|||
|
|
// uses the plain name. Try the CUDA 12+ symbol first, fall back to
|
|||
|
|
// the CUDA 11 name. IMPORTANT: do NOT `b &=` the first attempt before
|
|||
|
|
// the fallback — that would set b=false on CUDA 11 even though the
|
|||
|
|
// plain-name fallback succeeds, causing a spurious init failure.
|
|||
|
|
Bind(s_hCuda, "cudaGetDeviceProperties_v2", pfn_cudaGetDeviceProperties); // CUDA 12+
|
|||
|
|
if (!pfn_cudaGetDeviceProperties)
|
|||
|
|
Bind(s_hCuda, "cudaGetDeviceProperties", pfn_cudaGetDeviceProperties); // CUDA 11
|
|||
|
|
b &= (pfn_cudaGetDeviceProperties != nullptr);
|
|||
|
|
b &= Bind(s_hCuda, "cudaDeviceSetLimit", pfn_cudaDeviceSetLimit);
|
|||
|
|
b &= Bind(s_hCuda, "cudaDeviceSynchronize", pfn_cudaDeviceSynchronize);
|
|||
|
|
b &= Bind(s_hCuda, "cudaDeviceGetStreamPriorityRange", pfn_cudaDeviceGetStreamPriorityRange);
|
|||
|
|
b &= Bind(s_hCuda, "cudaMalloc", pfn_cudaMalloc);
|
|||
|
|
b &= Bind(s_hCuda, "cudaFree", pfn_cudaFree);
|
|||
|
|
b &= Bind(s_hCuda, "cudaMemset", pfn_cudaMemset);
|
|||
|
|
b &= Bind(s_hCuda, "cudaMemGetInfo", pfn_cudaMemGetInfo);
|
|||
|
|
b &= Bind(s_hCuda, "cudaMemcpy", pfn_cudaMemcpy);
|
|||
|
|
b &= Bind(s_hCuda, "cudaMemcpyAsync", pfn_cudaMemcpyAsync);
|
|||
|
|
b &= Bind(s_hCuda, "cudaStreamCreate", pfn_cudaStreamCreate);
|
|||
|
|
b &= Bind(s_hCuda, "cudaStreamCreateWithPriority", pfn_cudaStreamCreateWithPriority);
|
|||
|
|
b &= Bind(s_hCuda, "cudaStreamDestroy", pfn_cudaStreamDestroy);
|
|||
|
|
b &= Bind(s_hCuda, "cudaStreamSynchronize", pfn_cudaStreamSynchronize);
|
|||
|
|
b &= Bind(s_hCuda, "cudaStreamWaitEvent", pfn_cudaStreamWaitEvent);
|
|||
|
|
b &= Bind(s_hCuda, "cudaEventCreate", pfn_cudaEventCreate);
|
|||
|
|
b &= Bind(s_hCuda, "cudaEventCreateWithFlags", pfn_cudaEventCreateWithFlags);
|
|||
|
|
b &= Bind(s_hCuda, "cudaEventRecord", pfn_cudaEventRecord);
|
|||
|
|
b &= Bind(s_hCuda, "cudaEventDestroy", pfn_cudaEventDestroy);
|
|||
|
|
b &= Bind(s_hCuda, "cudaGetErrorString", pfn_cudaGetErrorString);
|
|||
|
|
b &= Bind(s_hCuda, "cudaGetLastError", pfn_cudaGetLastError);
|
|||
|
|
if (!b) {
|
|||
|
|
std::cout << "NvDynLoader ERROR: One or more CUDA entry points not found in "
|
|||
|
|
<< s_cudaPath << std::endl;
|
|||
|
|
ok = false;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
#endif // NV_DYNAMIC_CUDA
|
|||
|
|
|
|||
|
|
// ── STEP 2: cuDNN pre-load ────────────────────────────────────────────────
|
|||
|
|
// Must be loaded BEFORE TRT so the OS can resolve TRT's cuDNN dependency:
|
|||
|
|
//
|
|||
|
|
// Windows – LoadLibraryExA + LOAD_WITH_ALTERED_SEARCH_PATH causes Windows
|
|||
|
|
// to use the DLL's own directory when resolving its transitive
|
|||
|
|
// dependencies. Pre-loading cuDNN from ANSCENTER dir also puts
|
|||
|
|
// it in the process DLL table so TRT's import loader finds it.
|
|||
|
|
//
|
|||
|
|
// Linux – RTLD_GLOBAL exports cuDNN symbols process-wide so TRT's
|
|||
|
|
// DT_NEEDED resolution succeeds without LD_LIBRARY_PATH changes.
|
|||
|
|
// cuDNN 9 also ships split DLLs (libcudnn_ops.so.9, etc.); those
|
|||
|
|
// are found automatically once libcudnn.so.9 is globally loaded.
|
|||
|
|
//
|
|||
|
|
// Not finding cuDNN is a WARNING, not a fatal error — TRT may still locate
|
|||
|
|
// it via LD_LIBRARY_PATH / system PATH at load time.
|
|||
|
|
s_hCudnn = FindAndLoad(CudnnCandidates(), s_cudnnPath, verbose, /*globalLoad=*/true);
|
|||
|
|
if (!s_hCudnn && verbose) {
|
|||
|
|
std::cout << "NvDynLoader: WARNING — cuDNN not found in " << kAnsDir
|
|||
|
|
<< "; TRT will search system PATH / LD_LIBRARY_PATH." << std::endl;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ── STEP 3: TensorRT core ─────────────────────────────────────────────────
|
|||
|
|
s_hTrt = FindAndLoad(TrtCandidates(), s_trtPath, verbose);
|
|||
|
|
if (!s_hTrt) {
|
|||
|
|
std::cout << "NvDynLoader ERROR: No TensorRT DLL found. "
|
|||
|
|
"Searched " << kAnsDir << " and system PATH." << std::endl;
|
|||
|
|
ok = false;
|
|||
|
|
} else {
|
|||
|
|
// Extract major version from the loaded library path.
|
|||
|
|
// Windows : "C:\...\nvinfer_10.dll" → digits between last '_' and last '.'
|
|||
|
|
// Linux : ".../libnvinfer.so.10" → digits after last '.'
|
|||
|
|
#ifdef _WIN32
|
|||
|
|
{
|
|||
|
|
const size_t under = s_trtPath.find_last_of('_');
|
|||
|
|
const size_t dot = s_trtPath.find_last_of('.');
|
|||
|
|
if (under != std::string::npos && dot != std::string::npos && dot > under) {
|
|||
|
|
try { s_trtMajor = std::stoi(s_trtPath.substr(under + 1, dot - under - 1)); }
|
|||
|
|
catch (...) { /* non-numeric — leave s_trtMajor at 0 */ }
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
#else
|
|||
|
|
{
|
|||
|
|
const size_t dot = s_trtPath.find_last_of('.');
|
|||
|
|
if (dot != std::string::npos && dot + 1 < s_trtPath.size()) {
|
|||
|
|
try { s_trtMajor = std::stoi(s_trtPath.substr(dot + 1)); }
|
|||
|
|
catch (...) { /* non-numeric — leave s_trtMajor at 0 */ }
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
bool b = true;
|
|||
|
|
b &= Bind(s_hTrt, "createInferBuilder_INTERNAL", pfn_createInferBuilder_INTERNAL);
|
|||
|
|
b &= Bind(s_hTrt, "createInferRuntime_INTERNAL", pfn_createInferRuntime_INTERNAL);
|
|||
|
|
Bind(s_hTrt, "createInferRefitter_INTERNAL", pfn_createInferRefitter_INTERNAL); // optional
|
|||
|
|
if (!b) {
|
|||
|
|
std::cout << "NvDynLoader ERROR: Required TRT entry points not found in "
|
|||
|
|
<< s_trtPath << std::endl;
|
|||
|
|
ok = false;
|
|||
|
|
} else if (verbose) {
|
|||
|
|
std::cout << "NvDynLoader: TRT major version = " << s_trtMajor << std::endl;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ── STEP 4: ONNX parser ───────────────────────────────────────────────────
|
|||
|
|
s_hOnnx = FindAndLoad(OnnxCandidates(), s_onnxPath, verbose);
|
|||
|
|
if (!s_hOnnx) {
|
|||
|
|
std::cout << "NvDynLoader ERROR: No nvonnxparser DLL found. "
|
|||
|
|
"Searched " << kAnsDir << " and system PATH." << std::endl;
|
|||
|
|
ok = false;
|
|||
|
|
} else {
|
|||
|
|
if (!Bind(s_hOnnx, "createNvOnnxParser_INTERNAL", pfn_createNvOnnxParser_INTERNAL)) {
|
|||
|
|
std::cout << "NvDynLoader ERROR: createNvOnnxParser_INTERNAL not found in "
|
|||
|
|
<< s_onnxPath << std::endl;
|
|||
|
|
ok = false;
|
|||
|
|
} else {
|
|||
|
|
// TRT 10+ optional symbols — absent on TRT 8/9; missing is not an error.
|
|||
|
|
Bind(s_hOnnx, "createNvOnnxParserRefitter_INTERNAL", pfn_createNvOnnxParserRefitter_INTERNAL);
|
|||
|
|
Bind(s_hOnnx, "getNvOnnxParserVersion", pfn_getNvOnnxParserVersion);
|
|||
|
|
|
|||
|
|
// Cross-check: TRT and ONNX-parser must share the same major version.
|
|||
|
|
// Mismatched majors (e.g. nvinfer_10 + nvonnxparser_11) compile fine
|
|||
|
|
// but crash at runtime when the ONNX parser tries to populate a TRT
|
|||
|
|
// network object built by a different major version.
|
|||
|
|
int onnxMajor = 0;
|
|||
|
|
#ifdef _WIN32
|
|||
|
|
{
|
|||
|
|
const size_t under = s_onnxPath.find_last_of('_');
|
|||
|
|
const size_t dot = s_onnxPath.find_last_of('.');
|
|||
|
|
if (under != std::string::npos && dot != std::string::npos && dot > under)
|
|||
|
|
try { onnxMajor = std::stoi(s_onnxPath.substr(under + 1, dot - under - 1)); }
|
|||
|
|
catch (...) {}
|
|||
|
|
}
|
|||
|
|
#else
|
|||
|
|
{
|
|||
|
|
const size_t dot = s_onnxPath.find_last_of('.');
|
|||
|
|
if (dot != std::string::npos && dot + 1 < s_onnxPath.size())
|
|||
|
|
try { onnxMajor = std::stoi(s_onnxPath.substr(dot + 1)); }
|
|||
|
|
catch (...) {}
|
|||
|
|
}
|
|||
|
|
#endif
|
|||
|
|
if (onnxMajor > 0 && s_trtMajor > 0 && onnxMajor != s_trtMajor) {
|
|||
|
|
std::cout << "NvDynLoader ERROR: TRT major version (" << s_trtMajor
|
|||
|
|
<< ") != ONNX-parser major version (" << onnxMajor << ").\n"
|
|||
|
|
<< " TRT : " << s_trtPath << "\n"
|
|||
|
|
<< " ONNX : " << s_onnxPath << "\n"
|
|||
|
|
<< " Replace both DLLs with matching versions in "
|
|||
|
|
<< kAnsDir << std::endl;
|
|||
|
|
ok = false;
|
|||
|
|
} else if (verbose && onnxMajor > 0) {
|
|||
|
|
std::cout << "NvDynLoader: ONNX-parser major version = " << onnxMajor << std::endl;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
s_initialized = ok;
|
|||
|
|
|
|||
|
|
if (verbose) {
|
|||
|
|
std::cout << "NvDynLoader: "
|
|||
|
|
<< (ok ? "initialised successfully." : "FAILED — check errors above.")
|
|||
|
|
<< std::endl;
|
|||
|
|
std::cout << "==================================================" << std::endl;
|
|||
|
|
}
|
|||
|
|
return ok;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
// Shutdown
|
|||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
void NvDynLoader::Shutdown()
|
|||
|
|
{
|
|||
|
|
std::lock_guard<std::mutex> lock(g_initMutex);
|
|||
|
|
|
|||
|
|
// NOTE: No early-return guard on s_initialized here.
|
|||
|
|
//
|
|||
|
|
// Shutdown() must always release whatever handles are currently open,
|
|||
|
|
// including handles left behind by a *partially*-successful Initialize()
|
|||
|
|
// call (where s_initialized == false but some DLLs were already opened).
|
|||
|
|
// Every FreeLibrary / dlclose call is guarded by if(h), so this function
|
|||
|
|
// is safe to call on a fully- or partially-initialised instance, and also
|
|||
|
|
// safe to call multiple times (idempotent).
|
|||
|
|
|
|||
|
|
// ── Null all entry-point pointers first ───────────────────────────────────
|
|||
|
|
// Prevents stubs from jumping into freed library memory in the window
|
|||
|
|
// between Shutdown() and a subsequent Initialize() call.
|
|||
|
|
pfn_createInferBuilder_INTERNAL = nullptr;
|
|||
|
|
pfn_createInferRuntime_INTERNAL = nullptr;
|
|||
|
|
pfn_createInferRefitter_INTERNAL = nullptr;
|
|||
|
|
pfn_createNvOnnxParser_INTERNAL = nullptr;
|
|||
|
|
pfn_createNvOnnxParserRefitter_INTERNAL = nullptr;
|
|||
|
|
pfn_getNvOnnxParserVersion = nullptr;
|
|||
|
|
|
|||
|
|
#ifdef NV_DYNAMIC_CUDA
|
|||
|
|
// Null every CUDA function pointer so the assert() in each stub fires
|
|||
|
|
// immediately if a CUDA call is made after Shutdown() without a new
|
|||
|
|
// Initialize(), rather than silently calling into freed DLL memory.
|
|||
|
|
pfn_cudaGetDeviceCount = nullptr;
|
|||
|
|
pfn_cudaSetDevice = nullptr;
|
|||
|
|
pfn_cudaGetDeviceProperties = nullptr;
|
|||
|
|
pfn_cudaDeviceSetLimit = nullptr;
|
|||
|
|
pfn_cudaDeviceSynchronize = nullptr;
|
|||
|
|
pfn_cudaDeviceGetStreamPriorityRange = nullptr;
|
|||
|
|
pfn_cudaMalloc = nullptr;
|
|||
|
|
pfn_cudaFree = nullptr;
|
|||
|
|
pfn_cudaMemset = nullptr;
|
|||
|
|
pfn_cudaMemGetInfo = nullptr;
|
|||
|
|
pfn_cudaMemcpy = nullptr;
|
|||
|
|
pfn_cudaMemcpyAsync = nullptr;
|
|||
|
|
pfn_cudaStreamCreate = nullptr;
|
|||
|
|
pfn_cudaStreamCreateWithPriority = nullptr;
|
|||
|
|
pfn_cudaStreamDestroy = nullptr;
|
|||
|
|
pfn_cudaStreamSynchronize = nullptr;
|
|||
|
|
pfn_cudaStreamWaitEvent = nullptr;
|
|||
|
|
pfn_cudaEventCreate = nullptr;
|
|||
|
|
pfn_cudaEventCreateWithFlags = nullptr;
|
|||
|
|
pfn_cudaEventRecord = nullptr;
|
|||
|
|
pfn_cudaEventDestroy = nullptr;
|
|||
|
|
pfn_cudaGetErrorString = nullptr;
|
|||
|
|
pfn_cudaGetLastError = nullptr;
|
|||
|
|
#endif // NV_DYNAMIC_CUDA
|
|||
|
|
|
|||
|
|
// ── Release handles in reverse load order ─────────────────────────────────
|
|||
|
|
// ONNX parser → depends on TRT
|
|||
|
|
// TRT → depends on cuDNN + CUDA RT
|
|||
|
|
// cuDNN → depends on CUDA RT
|
|||
|
|
// CUDA RT → base dependency
|
|||
|
|
if (s_hOnnx) { NV_FREE_LIB(s_hOnnx); s_hOnnx = nullptr; }
|
|||
|
|
if (s_hTrt) { NV_FREE_LIB(s_hTrt); s_hTrt = nullptr; }
|
|||
|
|
if (s_hCudnn) { NV_FREE_LIB(s_hCudnn); s_hCudnn = nullptr; }
|
|||
|
|
#ifdef NV_DYNAMIC_CUDA
|
|||
|
|
if (s_hCuda) { NV_FREE_LIB(s_hCuda); s_hCuda = nullptr; }
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
// ── Reset version / path state ────────────────────────────────────────────
|
|||
|
|
// A subsequent Initialize() call will re-discover fresh paths and versions.
|
|||
|
|
s_trtMajor = 0;
|
|||
|
|
s_trtPath.clear();
|
|||
|
|
s_onnxPath.clear();
|
|||
|
|
s_cudaPath.clear();
|
|||
|
|
s_cudnnPath.clear();
|
|||
|
|
s_initialized = false;
|
|||
|
|
}
|