143 lines
7.2 KiB
C++
143 lines
7.2 KiB
C++
#pragma once
|
|
// ============================================================================
|
|
// NvDynLoader -- Runtime discovery and loading of NVIDIA TensorRT / CUDA DLLs
|
|
//
|
|
// Moved from TensorRTAPI/ to ANSLibsLoader/ for centralized library management.
|
|
// Now exported via ANSLIBS_API from ANSLibsLoader.dll.
|
|
//
|
|
// PROBLEM SOLVED
|
|
// --------------
|
|
// Linking against nvinfer_10.lib / nvonnxparser_10.lib hard-codes the major
|
|
// version into the binary's import table. Upgrading TRT 10 -> 11 then forces
|
|
// every project to update its .lib references and relink.
|
|
//
|
|
// SOLUTION
|
|
// --------
|
|
// NvDynLoader provides the three extern "C" symbols that TRT / ONNX-parser
|
|
// inline wrappers call (createInferBuilder_INTERNAL, etc.) as thin stubs
|
|
// compiled directly into ANSLibsLoader.dll. At runtime the stubs call through
|
|
// function pointers into whichever DLL version is actually installed.
|
|
//
|
|
// All C++ vtable dispatch (IBuilder, IRuntime, IParser methods) continues to
|
|
// work correctly because the objects are created by -- and owned by the vtable
|
|
// of -- the DLL that was dynamically loaded.
|
|
//
|
|
// REQUIRED PROJECT CHANGES
|
|
// ------------------------
|
|
// Consuming projects:
|
|
// REMOVE: nvinfer_10.lib / nvonnxparser_10.lib from linker input
|
|
// ADD: ANSLibsLoader.lib
|
|
// KEEP: cudart_static.lib (or use NV_DYNAMIC_CUDA for dynamic CUDA RT)
|
|
// ============================================================================
|
|
|
|
#include "ANSLibsLoader.h" // ANSLIBS_API
|
|
#include "DynLibUtils.h" // LibHandle
|
|
|
|
// -- TRT / ONNX-parser API decoration override --------------------------------
|
|
// Must appear BEFORE including NvInfer.h / NvOnnxParser.h.
|
|
// By default these macros expand to __declspec(dllimport), which would conflict
|
|
// with our extern "C" stub definitions in NvDynLoader.cpp. Setting them to
|
|
// empty makes all TRT inline-wrapper calls direct, so the linker resolves them
|
|
// against our stubs rather than against nvinfer_XX.lib.
|
|
//
|
|
// The stubs are exported from ANSLibsLoader.dll via the .def file
|
|
// (ANSLibsLoader.def), NOT via __declspec(dllexport), to avoid C2375
|
|
// linkage conflicts between the NvInfer.h declarations and our definitions.
|
|
#ifndef TENSORRTAPI
|
|
# define TENSORRTAPI
|
|
#endif
|
|
#ifndef NVONNXPARSER_API
|
|
# define NVONNXPARSER_API
|
|
#endif
|
|
|
|
#include <cuda_runtime.h> // CUDA types (cudaStream_t, cudaDeviceProp, ...)
|
|
#include <NvInfer.h> // TRT types (IBuilder, IRuntime, ...)
|
|
#include <NvOnnxParser.h> // nvonnxparser types
|
|
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
// ============================================================================
|
|
class ANSLIBS_API NvDynLoader
|
|
{
|
|
public:
|
|
// -- Lifecycle -------------------------------------------------------------
|
|
/// Discover and load NVIDIA DLLs at runtime.
|
|
/// Safe to call multiple times -- subsequent calls are no-ops.
|
|
/// @param verbose Print discovery results to stdout.
|
|
/// @returns false if a critical library (TRT or CUDA) could not be loaded.
|
|
[[nodiscard]] static bool Initialize(bool verbose = true);
|
|
|
|
/// Release all loaded library handles. Call at application exit.
|
|
static void Shutdown();
|
|
|
|
[[nodiscard]] static bool IsInitialized() noexcept { return s_initialized; }
|
|
|
|
// -- Informational ---------------------------------------------------------
|
|
[[nodiscard]] static const std::string& TrtDllPath() noexcept { return s_trtPath; }
|
|
[[nodiscard]] static const std::string& OnnxDllPath() noexcept { return s_onnxPath; }
|
|
[[nodiscard]] static const std::string& CudaDllPath() noexcept { return s_cudaPath; }
|
|
[[nodiscard]] static const std::string& CudnnDllPath()noexcept { return s_cudnnPath; }
|
|
[[nodiscard]] static int TrtMajor() noexcept { return s_trtMajor; }
|
|
|
|
// -- TRT factory pointers ---------------------------------------------------
|
|
using PfnBuilder = void*(void* logger, int32_t version) noexcept;
|
|
using PfnRuntime = void*(void* logger, int32_t version) noexcept;
|
|
using PfnRefitter = void*(void* engine, void* logger, int32_t version) noexcept;
|
|
using PfnParser = void*(void* network, void* logger, int32_t version) noexcept;
|
|
using PfnParserRefitter = void*(void* refitter, void* logger, int32_t version) noexcept;
|
|
using PfnGetParserVersion = int() noexcept;
|
|
|
|
static PfnBuilder* pfn_createInferBuilder_INTERNAL;
|
|
static PfnRuntime* pfn_createInferRuntime_INTERNAL;
|
|
static PfnRefitter* pfn_createInferRefitter_INTERNAL;
|
|
static PfnParser* pfn_createNvOnnxParser_INTERNAL;
|
|
static PfnParserRefitter* pfn_createNvOnnxParserRefitter_INTERNAL;
|
|
static PfnGetParserVersion* pfn_getNvOnnxParserVersion;
|
|
|
|
// -- CUDA function pointers (populated only with NV_DYNAMIC_CUDA) -----------
|
|
#ifdef NV_DYNAMIC_CUDA
|
|
static cudaError_t (*pfn_cudaGetDeviceCount) (int*);
|
|
static cudaError_t (*pfn_cudaSetDevice) (int);
|
|
static cudaError_t (*pfn_cudaGetDeviceProperties) (cudaDeviceProp*, int);
|
|
static cudaError_t (*pfn_cudaDeviceSetLimit) (cudaLimit, size_t);
|
|
static cudaError_t (*pfn_cudaDeviceSynchronize) ();
|
|
static cudaError_t (*pfn_cudaDeviceGetStreamPriorityRange)(int*, int*);
|
|
static cudaError_t (*pfn_cudaMalloc) (void**, size_t);
|
|
static cudaError_t (*pfn_cudaFree) (void*);
|
|
static cudaError_t (*pfn_cudaMemset) (void*, int, size_t);
|
|
static cudaError_t (*pfn_cudaMemGetInfo) (size_t*, size_t*);
|
|
static cudaError_t (*pfn_cudaMemcpy) (void*, const void*, size_t, cudaMemcpyKind);
|
|
static cudaError_t (*pfn_cudaMemcpyAsync) (void*, const void*, size_t, cudaMemcpyKind, cudaStream_t);
|
|
static cudaError_t (*pfn_cudaStreamCreate) (cudaStream_t*);
|
|
static cudaError_t (*pfn_cudaStreamCreateWithPriority) (cudaStream_t*, unsigned int, int);
|
|
static cudaError_t (*pfn_cudaStreamDestroy) (cudaStream_t);
|
|
static cudaError_t (*pfn_cudaStreamSynchronize) (cudaStream_t);
|
|
static cudaError_t (*pfn_cudaStreamWaitEvent) (cudaStream_t, cudaEvent_t, unsigned int);
|
|
static cudaError_t (*pfn_cudaEventCreate) (cudaEvent_t*);
|
|
static cudaError_t (*pfn_cudaEventCreateWithFlags) (cudaEvent_t*, unsigned int);
|
|
static cudaError_t (*pfn_cudaEventRecord) (cudaEvent_t, cudaStream_t);
|
|
static cudaError_t (*pfn_cudaEventDestroy) (cudaEvent_t);
|
|
static const char* (*pfn_cudaGetErrorString) (cudaError_t);
|
|
static cudaError_t (*pfn_cudaGetLastError) ();
|
|
#endif // NV_DYNAMIC_CUDA
|
|
|
|
private:
|
|
static bool s_initialized;
|
|
static int s_trtMajor;
|
|
static std::string s_trtPath;
|
|
static std::string s_onnxPath;
|
|
static std::string s_cudaPath;
|
|
static std::string s_cudnnPath;
|
|
static LibHandle s_hTrt;
|
|
static LibHandle s_hOnnx;
|
|
static LibHandle s_hCuda;
|
|
static LibHandle s_hCudnn;
|
|
|
|
// Candidate DLL / SO name lists.
|
|
static std::vector<std::string> TrtCandidates();
|
|
static std::vector<std::string> OnnxCandidates();
|
|
static std::vector<std::string> CudnnCandidates();
|
|
static std::vector<std::string> CudaRtCandidates();
|
|
};
|