core/ANSLibsLoader/include/NvDynLoader.h

#pragma once
// ============================================================================
// NvDynLoader -- Runtime discovery and loading of NVIDIA TensorRT / CUDA DLLs
//
// Moved from TensorRTAPI/ to ANSLibsLoader/ for centralized library management.
// Now exported via ANSLIBS_API from ANSLibsLoader.dll.
//
//  PROBLEM SOLVED
//  --------------
//  Linking against nvinfer_10.lib / nvonnxparser_10.lib hard-codes the major
//  version into the binary's import table.  Upgrading TRT 10 -> 11 then forces
//  every project to update its .lib references and relink.
//
//  SOLUTION
//  --------
//  NvDynLoader provides the three extern "C" symbols that TRT / ONNX-parser
//  inline wrappers call (createInferBuilder_INTERNAL, etc.) as thin stubs
//  compiled directly into ANSLibsLoader.dll.  At runtime the stubs call through
//  function pointers into whichever DLL version is actually installed.
//
//  All C++ vtable dispatch (IBuilder, IRuntime, IParser methods) continues to
//  work correctly because the objects are created by -- and owned by the vtable
//  of -- the DLL that was dynamically loaded.
//
//  REQUIRED PROJECT CHANGES
//  ------------------------
//  Consuming projects:
//    REMOVE:  nvinfer_10.lib / nvonnxparser_10.lib from linker input
//    ADD:     ANSLibsLoader.lib
//    KEEP:    cudart_static.lib  (or use NV_DYNAMIC_CUDA for dynamic CUDA RT)
// ============================================================================

#include "ANSLibsLoader.h"   // ANSLIBS_API
#include "DynLibUtils.h"     // LibHandle

// -- TRT / ONNX-parser API decoration override --------------------------------
// Must appear BEFORE including NvInfer.h / NvOnnxParser.h.
// By default these macros expand to __declspec(dllimport), which would conflict
// with our extern "C" stub definitions in NvDynLoader.cpp.  Setting them to
// empty makes all TRT inline-wrapper calls direct, so the linker resolves them
// against our stubs rather than against nvinfer_XX.lib.
//
// The stubs are exported from ANSLibsLoader.dll via the .def file
// (ANSLibsLoader.def), NOT via __declspec(dllexport), to avoid C2375
// linkage conflicts between the NvInfer.h declarations and our definitions.
#ifndef TENSORRTAPI
#  define TENSORRTAPI
#endif
#ifndef NVONNXPARSER_API
#  define NVONNXPARSER_API
#endif

#include <cuda_runtime.h>   // CUDA types (cudaStream_t, cudaDeviceProp, ...)
#include <NvInfer.h>        // TRT types (IBuilder, IRuntime, ...)
#include <NvOnnxParser.h>   // nvonnxparser types

#include <string>
#include <vector>

// ============================================================================
class ANSLIBS_API NvDynLoader
{
public:
    // -- Lifecycle -------------------------------------------------------------
    /// Discover and load NVIDIA DLLs at runtime.
    /// Safe to call multiple times -- subsequent calls are no-ops.
    /// @param verbose  Print discovery results to stdout.
    /// @returns false if a critical library (TRT or CUDA) could not be loaded.
    [[nodiscard]] static bool Initialize(bool verbose = true);

    /// Release all loaded library handles.  Call at application exit.
    static void Shutdown();

    [[nodiscard]] static bool IsInitialized() noexcept { return s_initialized; }

    // -- Informational ---------------------------------------------------------
    [[nodiscard]] static const std::string& TrtDllPath()  noexcept { return s_trtPath;   }
    [[nodiscard]] static const std::string& OnnxDllPath() noexcept { return s_onnxPath;  }
    [[nodiscard]] static const std::string& CudaDllPath() noexcept { return s_cudaPath;  }
    [[nodiscard]] static const std::string& CudnnDllPath()noexcept { return s_cudnnPath; }
    [[nodiscard]] static int                TrtMajor()    noexcept { return s_trtMajor;  }

    // -- TRT factory pointers ---------------------------------------------------
    using PfnBuilder        = void*(void* logger,   int32_t version) noexcept;
    using PfnRuntime        = void*(void* logger,   int32_t version) noexcept;
    using PfnRefitter       = void*(void* engine,   void* logger, int32_t version) noexcept;
    using PfnParser           = void*(void* network,  void* logger, int32_t version) noexcept;
    using PfnParserRefitter   = void*(void* refitter, void* logger, int32_t version) noexcept;
    using PfnGetParserVersion = int() noexcept;

    static PfnBuilder*           pfn_createInferBuilder_INTERNAL;
    static PfnRuntime*           pfn_createInferRuntime_INTERNAL;
    static PfnRefitter*          pfn_createInferRefitter_INTERNAL;
    static PfnParser*            pfn_createNvOnnxParser_INTERNAL;
    static PfnParserRefitter*    pfn_createNvOnnxParserRefitter_INTERNAL;
    static PfnGetParserVersion*  pfn_getNvOnnxParserVersion;

    // -- CUDA function pointers (populated only with NV_DYNAMIC_CUDA) -----------
#ifdef NV_DYNAMIC_CUDA
    static cudaError_t (*pfn_cudaGetDeviceCount)            (int*);
    static cudaError_t (*pfn_cudaSetDevice)                 (int);
    static cudaError_t (*pfn_cudaGetDeviceProperties)       (cudaDeviceProp*, int);
    static cudaError_t (*pfn_cudaDeviceSetLimit)            (cudaLimit, size_t);
    static cudaError_t (*pfn_cudaDeviceSynchronize)         ();
    static cudaError_t (*pfn_cudaDeviceGetStreamPriorityRange)(int*, int*);
    static cudaError_t (*pfn_cudaMalloc)                    (void**, size_t);
    static cudaError_t (*pfn_cudaFree)                      (void*);
    static cudaError_t (*pfn_cudaMemset)                    (void*, int, size_t);
    static cudaError_t (*pfn_cudaMemGetInfo)                (size_t*, size_t*);
    static cudaError_t (*pfn_cudaMemcpy)                    (void*, const void*, size_t, cudaMemcpyKind);
    static cudaError_t (*pfn_cudaMemcpyAsync)               (void*, const void*, size_t, cudaMemcpyKind, cudaStream_t);
    static cudaError_t (*pfn_cudaStreamCreate)              (cudaStream_t*);
    static cudaError_t (*pfn_cudaStreamCreateWithPriority)  (cudaStream_t*, unsigned int, int);
    static cudaError_t (*pfn_cudaStreamDestroy)             (cudaStream_t);
    static cudaError_t (*pfn_cudaStreamSynchronize)         (cudaStream_t);
    static cudaError_t (*pfn_cudaStreamWaitEvent)           (cudaStream_t, cudaEvent_t, unsigned int);
    static cudaError_t (*pfn_cudaEventCreate)               (cudaEvent_t*);
    static cudaError_t (*pfn_cudaEventCreateWithFlags)      (cudaEvent_t*, unsigned int);
    static cudaError_t (*pfn_cudaEventRecord)               (cudaEvent_t, cudaStream_t);
    static cudaError_t (*pfn_cudaEventDestroy)              (cudaEvent_t);
    static const char* (*pfn_cudaGetErrorString)            (cudaError_t);
    static cudaError_t (*pfn_cudaGetLastError)              ();
#endif // NV_DYNAMIC_CUDA

private:
    static bool        s_initialized;
    static int         s_trtMajor;
    static std::string s_trtPath;
    static std::string s_onnxPath;
    static std::string s_cudaPath;
    static std::string s_cudnnPath;
    static LibHandle   s_hTrt;
    static LibHandle   s_hOnnx;
    static LibHandle   s_hCuda;
    static LibHandle   s_hCudnn;

    // Candidate DLL / SO name lists.
    static std::vector<std::string> TrtCandidates();
    static std::vector<std::string> OnnxCandidates();
    static std::vector<std::string> CudnnCandidates();
    static std::vector<std::string> CudaRtCandidates();
};
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`#pragma once`
			`// ============================================================================`
			`// NvDynLoader -- Runtime discovery and loading of NVIDIA TensorRT / CUDA DLLs`
			`//`
			`// Moved from TensorRTAPI/ to ANSLibsLoader/ for centralized library management.`
			`// Now exported via ANSLIBS_API from ANSLibsLoader.dll.`
			`//`
			`// PROBLEM SOLVED`
			`// --------------`
			`// Linking against nvinfer_10.lib / nvonnxparser_10.lib hard-codes the major`
			`// version into the binary's import table. Upgrading TRT 10 -> 11 then forces`
			`// every project to update its .lib references and relink.`
			`//`
			`// SOLUTION`
			`// --------`
			`// NvDynLoader provides the three extern "C" symbols that TRT / ONNX-parser`
			`// inline wrappers call (createInferBuilder_INTERNAL, etc.) as thin stubs`
			`// compiled directly into ANSLibsLoader.dll. At runtime the stubs call through`
			`// function pointers into whichever DLL version is actually installed.`
			`//`
			`// All C++ vtable dispatch (IBuilder, IRuntime, IParser methods) continues to`
			`// work correctly because the objects are created by -- and owned by the vtable`
			`// of -- the DLL that was dynamically loaded.`
			`//`
			`// REQUIRED PROJECT CHANGES`
			`// ------------------------`
			`// Consuming projects:`
			`// REMOVE: nvinfer_10.lib / nvonnxparser_10.lib from linker input`
			`// ADD: ANSLibsLoader.lib`
			`// KEEP: cudart_static.lib (or use NV_DYNAMIC_CUDA for dynamic CUDA RT)`
			`// ============================================================================`

			`#include "ANSLibsLoader.h" // ANSLIBS_API`
			`#include "DynLibUtils.h" // LibHandle`

			`// -- TRT / ONNX-parser API decoration override --------------------------------`
			`// Must appear BEFORE including NvInfer.h / NvOnnxParser.h.`
			`// By default these macros expand to __declspec(dllimport), which would conflict`
			`// with our extern "C" stub definitions in NvDynLoader.cpp. Setting them to`
			`// empty makes all TRT inline-wrapper calls direct, so the linker resolves them`
			`// against our stubs rather than against nvinfer_XX.lib.`
			`//`
			`// The stubs are exported from ANSLibsLoader.dll via the .def file`
			`// (ANSLibsLoader.def), NOT via __declspec(dllexport), to avoid C2375`
			`// linkage conflicts between the NvInfer.h declarations and our definitions.`
			`#ifndef TENSORRTAPI`
			`# define TENSORRTAPI`
			`#endif`
			`#ifndef NVONNXPARSER_API`
			`# define NVONNXPARSER_API`
			`#endif`

			`#include <cuda_runtime.h> // CUDA types (cudaStream_t, cudaDeviceProp, ...)`
			`#include <NvInfer.h> // TRT types (IBuilder, IRuntime, ...)`
			`#include <NvOnnxParser.h> // nvonnxparser types`

			`#include <string>`
			`#include <vector>`

			`// ============================================================================`
			`class ANSLIBS_API NvDynLoader`
			`{`
			`public:`
			`// -- Lifecycle -------------------------------------------------------------`
			`/// Discover and load NVIDIA DLLs at runtime.`
			`/// Safe to call multiple times -- subsequent calls are no-ops.`
			`/// @param verbose Print discovery results to stdout.`
			`/// @returns false if a critical library (TRT or CUDA) could not be loaded.`
			`[[nodiscard]] static bool Initialize(bool verbose = true);`

			`/// Release all loaded library handles. Call at application exit.`
			`static void Shutdown();`

			`[[nodiscard]] static bool IsInitialized() noexcept { return s_initialized; }`

			`// -- Informational ---------------------------------------------------------`
			`[[nodiscard]] static const std::string& TrtDllPath() noexcept { return s_trtPath; }`
			`[[nodiscard]] static const std::string& OnnxDllPath() noexcept { return s_onnxPath; }`
			`[[nodiscard]] static const std::string& CudaDllPath() noexcept { return s_cudaPath; }`
			`[[nodiscard]] static const std::string& CudnnDllPath()noexcept { return s_cudnnPath; }`
			`[[nodiscard]] static int TrtMajor() noexcept { return s_trtMajor; }`

			`// -- TRT factory pointers ---------------------------------------------------`
			`using PfnBuilder = void(void logger, int32_t version) noexcept;`
			`using PfnRuntime = void(void logger, int32_t version) noexcept;`
			`using PfnRefitter = void(void engine, void* logger, int32_t version) noexcept;`
			`using PfnParser = void(void network, void* logger, int32_t version) noexcept;`
			`using PfnParserRefitter = void(void refitter, void* logger, int32_t version) noexcept;`
			`using PfnGetParserVersion = int() noexcept;`

			`static PfnBuilder* pfn_createInferBuilder_INTERNAL;`
			`static PfnRuntime* pfn_createInferRuntime_INTERNAL;`
			`static PfnRefitter* pfn_createInferRefitter_INTERNAL;`
			`static PfnParser* pfn_createNvOnnxParser_INTERNAL;`
			`static PfnParserRefitter* pfn_createNvOnnxParserRefitter_INTERNAL;`
			`static PfnGetParserVersion* pfn_getNvOnnxParserVersion;`

			`// -- CUDA function pointers (populated only with NV_DYNAMIC_CUDA) -----------`
			`#ifdef NV_DYNAMIC_CUDA`
			`static cudaError_t (pfn_cudaGetDeviceCount) (int);`
			`static cudaError_t (*pfn_cudaSetDevice) (int);`
			`static cudaError_t (pfn_cudaGetDeviceProperties) (cudaDeviceProp, int);`
			`static cudaError_t (*pfn_cudaDeviceSetLimit) (cudaLimit, size_t);`
			`static cudaError_t (*pfn_cudaDeviceSynchronize) ();`
			`static cudaError_t (pfn_cudaDeviceGetStreamPriorityRange)(int, int*);`
			`static cudaError_t (pfn_cudaMalloc) (void*, size_t);`
			`static cudaError_t (pfn_cudaFree) (void);`
			`static cudaError_t (pfn_cudaMemset) (void, int, size_t);`
			`static cudaError_t (pfn_cudaMemGetInfo) (size_t, size_t*);`
			`static cudaError_t (pfn_cudaMemcpy) (void, const void*, size_t, cudaMemcpyKind);`
			`static cudaError_t (pfn_cudaMemcpyAsync) (void, const void*, size_t, cudaMemcpyKind, cudaStream_t);`
			`static cudaError_t (pfn_cudaStreamCreate) (cudaStream_t);`
			`static cudaError_t (pfn_cudaStreamCreateWithPriority) (cudaStream_t, unsigned int, int);`
			`static cudaError_t (*pfn_cudaStreamDestroy) (cudaStream_t);`
			`static cudaError_t (*pfn_cudaStreamSynchronize) (cudaStream_t);`
			`static cudaError_t (*pfn_cudaStreamWaitEvent) (cudaStream_t, cudaEvent_t, unsigned int);`
			`static cudaError_t (pfn_cudaEventCreate) (cudaEvent_t);`
			`static cudaError_t (pfn_cudaEventCreateWithFlags) (cudaEvent_t, unsigned int);`
			`static cudaError_t (*pfn_cudaEventRecord) (cudaEvent_t, cudaStream_t);`
			`static cudaError_t (*pfn_cudaEventDestroy) (cudaEvent_t);`
			`static const char* (*pfn_cudaGetErrorString) (cudaError_t);`
			`static cudaError_t (*pfn_cudaGetLastError) ();`
			`#endif // NV_DYNAMIC_CUDA`

			`private:`
			`static bool s_initialized;`
			`static int s_trtMajor;`
			`static std::string s_trtPath;`
			`static std::string s_onnxPath;`
			`static std::string s_cudaPath;`
			`static std::string s_cudnnPath;`
			`static LibHandle s_hTrt;`
			`static LibHandle s_hOnnx;`
			`static LibHandle s_hCuda;`
			`static LibHandle s_hCudnn;`

			`// Candidate DLL / SO name lists.`
			`static std::vector<std::string> TrtCandidates();`
			`static std::vector<std::string> OnnxCandidates();`
			`static std::vector<std::string> CudnnCandidates();`
			`static std::vector<std::string> CudaRtCandidates();`
			`};`