TensorRT/include/NvInferRuntime.h

/*
 * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef NV_INFER_RUNTIME_H
#define NV_INFER_RUNTIME_H

//!
//! \file NvInferRuntime.h
//!
//! This is the top-level API file for TensorRT extended runtime library.
//!

#include "NvInferImpl.h" // IWYU pragma: export
#define NV_INFER_INTERNAL_INCLUDE 1
#include "NvInferPluginBase.h" // IWYU pragma: export
#undef NV_INFER_INTERNAL_INCLUDE
#include "NvInferRuntimeCommon.h" // IWYU pragma: export

namespace nvinfer1
{

class IExecutionContext; //!< Forward declaration of IExecutionContext for use by other interfaces.
class ICudaEngine;       //!< Forward declaration of ICudaEngine for use by other interfaces.
class IPluginFactory;    //!< Forward declaration of IPluginFactory for use by other interfaces.
class IEngineInspector;  //!< Forward declaration of IEngineInspector for use by other interfaces.

//!
//! \class INoCopy
//!
//! \brief Base class for all TensorRT interfaces that are implemented by the TensorRT libraries
//!
//! Objects of such classes are not movable or copyable, and should only be manipulated
//! via pointers.
//!

class INoCopy
{
protected:
    INoCopy() = default;
    virtual ~INoCopy() = default;
    INoCopy(INoCopy const& other) = delete;
    INoCopy& operator=(INoCopy const& other) = delete;
    INoCopy(INoCopy&& other) = delete;
    INoCopy& operator=(INoCopy&& other) = delete;
};

//!
//! \enum EngineCapability
//!
//! \brief List of supported engine capability flows.
//!
//! \details The EngineCapability determines the restrictions of a network during build time and what runtime
//! it targets. When BuilderFlag::kSAFETY_SCOPE is not set (by default), EngineCapability::kSTANDARD does not provide
//! any restrictions on functionality and the resulting serialized engine can be executed with TensorRT's standard
//! runtime APIs in the nvinfer1 namespace. EngineCapability::kSAFETY provides a restricted subset of network
//! operations that are safety certified and the resulting serialized engine can be executed with TensorRT's safe
//! runtime APIs in the nvinfer1::safe namespace. EngineCapability::kDLA_STANDALONE provides a restricted subset of
//! network operations that are DLA compatible and the resulting serialized engine can be executed using standalone
//! DLA runtime APIs. See sampleCudla for an example of integrating cuDLA APIs with TensorRT APIs.
//!
enum class EngineCapability : int32_t
{
    //!
    //! Standard: TensorRT flow without targeting the safety runtime.
    //! This flow supports both DeviceType::kGPU and DeviceType::kDLA.
    //!
    kSTANDARD = 0,

    //!
    //! Safety: TensorRT flow with restrictions targeting the safety runtime.
    //! See safety documentation for list of supported layers and formats.
    //! This flow supports only DeviceType::kGPU.
    //!
    //! This flag is only supported in NVIDIA Drive(R) products.
    kSAFETY = 1,

    //!
    //! DLA Standalone: TensorRT flow with restrictions targeting external, to TensorRT, DLA runtimes.
    //! See DLA documentation for list of supported layers and formats.
    //! This flow supports only DeviceType::kDLA.
    //!
    kDLA_STANDALONE = 2,
};

namespace impl
{
//! Maximum number of elements in EngineCapability enum. \see EngineCapability
template <>
struct EnumMaxImpl<EngineCapability>
{
    static constexpr int32_t kVALUE = 3;
};
} // namespace impl

//!
//! \class Weights
//!
//! \brief An array of weights used as a layer parameter.
//!
//! When using the DLA, the cumulative size of all Weights used in a network
//! must be less than 512MB in size. If the build option kGPU_FALLBACK is specified,
//! then multiple DLA sub-networks may be generated from the single original network.
//!
//! The weights are held by reference until the engine has been built. Therefore the data referenced
//! by \p values field should be preserved until the build is complete.
//!
//! The term "empty weights" refers to Weights with weight coefficients ( \p count == 0 and \p values == nullptr).
//!
class Weights
{
public:
    DataType type;      //!< The type of the weights.
    void const* values; //!< The weight values, in a contiguous array.
    int64_t count;      //!< The number of weights in the array.
};

//!
//! \class IHostMemory
//!
//! \brief Class to handle library allocated memory that is accessible to the user.
//!
//! The memory allocated via the host memory object is owned by the library and will
//! be de-allocated when the destroy method is called.
//!
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
//!
class IHostMemory : public INoCopy
{
public:
    virtual ~IHostMemory() noexcept = default;

    //! A pointer to the raw data that is owned by the library.
    void* data() const noexcept
    {
        return mImpl->data();
    }

    //! The size in bytes of the data that was allocated.
    std::size_t size() const noexcept
    {
        return mImpl->size();
    }

    //! The type of the memory that was allocated.
    DataType type() const noexcept
    {
        return mImpl->type();
    }

protected:
    apiv::VHostMemory* mImpl;
};

//!
//! \enum DimensionOperation
//!
//! \brief An operation on two IDimensionExpr, which represent integer expressions used in dimension computations.
//!
//! For example, given two IDimensionExpr x and y and an IExprBuilder& eb,
//! eb.operation(DimensionOperation::kSUM, x, y) creates a representation of x+y.
//!
//! \see IDimensionExpr, IExprBuilder
//!
enum class DimensionOperation : int32_t
{
    kSUM = 0,       //!< Sum of the two operands.
    kPROD = 1,      //!< Product of the two operands.
    kMAX = 2,       //!< Maximum of the two operands.
    kMIN = 3,       //!< Minimum of the two operands.
    kSUB = 4,       //!< Substract the second element from the first.
    kEQUAL = 5,     //!< 1 if operands are equal, 0 otherwise.
    kLESS = 6,      //!< 1 if first operand is less than second operand, 0 otherwise.
    kFLOOR_DIV = 7, //!< Floor division of the first element by the second.
    kCEIL_DIV = 8   //!< Division rounding up
};

//! Maximum number of elements in DimensionOperation enum. \see DimensionOperation
template <>
constexpr inline int32_t EnumMax<DimensionOperation>() noexcept
{
    return 9;
}

//!
//! \enum TensorLocation
//!
//! \brief The location for tensor data storage, device or host.
//!
enum class TensorLocation : int32_t
{
    kDEVICE = 0, //!< Data stored on device.
    kHOST = 1,   //!< Data stored on host.
};

namespace impl
{
//! Maximum number of elements in TensorLocation enum. \see TensorLocation
template <>
struct EnumMaxImpl<TensorLocation>
{
    static constexpr int32_t kVALUE = 2;
};
} // namespace impl

//!
//! \class IDimensionExpr
//!
//! \brief An IDimensionExpr represents an integer expression constructed from constants,
//! input dimensions, and binary operations.  These expressions are can be used
//! in overrides of IPluginV2DynamicExt::getOutputDimensions or IPluginV3OneBuild::getOutputShapes() to define output
//! dimensions in terms of input dimensions.
//!
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
//!
//! \see DimensionOperation, IPluginV2DynamicExt::getOutputDimensions, IPluginV3OneBuild::getOutputShapes()
//!
class IDimensionExpr : public INoCopy
{
public:
    //!
    //! \brief Return true if expression is a build-time constant.
    //!
    bool isConstant() const noexcept
    {
        return mImpl->isConstant();
    }

    //!
    //! \brief Get the value of the constant.
    //!
    //! If isConstant(), returns value of the constant.
    //! If !isConstant(), return std::numeric_limits<int64_t>::min().
    //!
    int64_t getConstantValue() const noexcept
    {
        return mImpl->getConstantValue();
    }

protected:
    apiv::VDimensionExpr* mImpl;
    virtual ~IDimensionExpr() noexcept = default;

public:
    //!
    //! \brief Return true if this denotes the value of a size tensor.
    //!
    //! \return True if this was created with method IExprBuilder::declareSizeTensor, false otherwise
    //!
    bool isSizeTensor() const noexcept
    {
        return mImpl->isSizeTensor();
    }
};

//!
//! \class IExprBuilder
//!
//! \brief Object for constructing IDimensionExpr.
//!
//! There is no public way to construct an IExprBuilder.  It appears as an argument to
//! method IPluginV2DynamicExt::getOutputDimensions() and IPluginV3OneBuild::getOutputShapes().  Overrides of that
//! method can use that IExprBuilder argument to construct expressions that define output dimensions in terms of input
//! dimensions.
//!
//! Clients should assume that any values constructed by the IExprBuilder are destroyed
//! after IPluginV2DynamicExt::getOutputDimensions() or IPluginV3OneBuild::getOutputShapes() returns.
//!
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
//!
//! \see IDimensionExpr
//!
class IExprBuilder : public INoCopy
{
public:
    //!
    //! \brief Return pointer to IDimensionExpr for given value.
    //!
    IDimensionExpr const* constant(int64_t value) noexcept
    {
        return mImpl->constant(value);
    }

    //!
    //! \brief Get the operation.
    //!
    //! Return pointer to IDimensionExpr that represents the given operation applied to first and second.
    //! Returns nullptr if op is not a valid DimensionOperation.
    //!
    IDimensionExpr const* operation(
        DimensionOperation op, IDimensionExpr const& first, IDimensionExpr const& second) noexcept
    {
        return mImpl->operation(op, first, second);
    }

protected:
    apiv::VExprBuilder* mImpl;
    virtual ~IExprBuilder() noexcept = default;

public:
    //!
    //! \brief Declare a size tensor at the given output index, with the specified auto-tuning formula and upper bound.
    //!
    //! A size tensor allows a plugin to have output dimensions that cannot be computed solely from input dimensions.
    //! For example, suppose a plugin implements the equivalent of INonZeroLayer for 2D input. The plugin can
    //! have one output for the indices of non-zero elements, and a second output containing the number of non-zero
    //! elements. Suppose the input has size [M,N] and has K non-zero elements. The plugin can write K to the second
    //! output. When telling TensorRT that the first output has shape [2,K], plugin uses IExprBuilder::constant() and
    //! IExprBuilder::declareSizeTensor(1,...) to create the IDimensionExpr that respectively denote 2 and K.
    //!
    //! TensorRT also needs to know the value of K to use for auto-tuning and an upper bound on K so that it can
    //! allocate memory for the output tensor. In the example, supposed typically half of the plugin's input elements
    //! are non-zero, and all the elements might be nonzero. then using M*N/2 might be a good expression for the opt
    //! parameter, and M*N for the upper bound. IDimensionsExpr for these expressions can be constructed from
    //! IDimensionsExpr for the input dimensions.
    //!
    //! \param outputIndex index of a plugin output that is a size tensor.
    //! \param opt formula for computing auto-tuning value. Must not depend on a size tensor.
    //! \param upper Upper bound on the size tensor.
    //!
    //! \return IDimensionExpr denoting the value of the size tensor.
    //!
    //! \see IPluginV3OneBuild::getOutputShapes()
    //!
    IDimensionExpr const* declareSizeTensor(int32_t outputIndex, IDimensionExpr const& opt, IDimensionExpr const& upper)
    {
        return mImpl->declareSizeTensor(outputIndex, opt, upper);
    }
};

//!
//! \class DimsExprs
//!
//! \brief Analog of class Dims with expressions instead of constants for the dimensions.
//!
class DimsExprs
{
public:
    int32_t nbDims;                          //!< The number of dimensions.
    IDimensionExpr const* d[Dims::MAX_DIMS]; //!< The extent of each dimension.
};

//!
//! \struct DynamicPluginTensorDesc
//!
//! \brief Summarizes tensors that a plugin might see for an input or output.
//!
struct DynamicPluginTensorDesc
{
    //! Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of any runtime dimension.
    PluginTensorDesc desc;

    //! Lower bounds on tensor’s dimensions
    Dims min;

    //! Upper bounds on tensor’s dimensions
    Dims max;

    //! Optimum value of tensor’s dimensions specified for auto-tuning
    Dims opt;
};

//!
//! \class IPluginV2DynamicExt
//!
//! \brief Similar to IPluginV2Ext, but with support for dynamic shapes.
//!
//! Clients should override the public methods, including the following inherited methods:
//!
//! * virtual int32_t getNbOutputs() const noexcept = 0;
//!
//! * virtual DataType getOutputDataType(int32_t index, DataType const* inputTypes,
//!                                      int32_t nbInputs) const noexcept = 0;
//!
//! * virtual size_t getSerializationSize() const noexcept = 0;
//!
//! * virtual void serialize(void* buffer) const noexcept = 0;
//!
//! * virtual void destroy() noexcept = 0;
//!
//! * virtual void setPluginNamespace(char const* pluginNamespace) noexcept = 0;
//!
//! * virtual char const* getPluginNamespace() const noexcept = 0;
//!
//! For weakly typed networks, the inputTypes will always be DataType::kFLOAT or DataType::kINT32,
//! and the returned type is canonicalized to DataType::kFLOAT if it is DataType::kHALF or DataType:kINT8.
//! For strongly typed networks, inputTypes are inferred from previous operations, and getOutputDataType
//! specifies the returned type based on the inputTypes.
//! Details about the floating-point precision are elicited later by method supportsFormatCombination.
//!
//! \deprecated Deprecated in TensorRT 10.0. Please implement IPluginV3 instead.
//!
class TRT_DEPRECATED IPluginV2DynamicExt : public nvinfer1::IPluginV2Ext
{
public:
    IPluginV2DynamicExt* clone() const noexcept override = 0;

    //!
    //! \brief Get expressions for computing dimensions of an output tensor from dimensions of the input tensors.
    //!
    //! \param outputIndex The index of the output tensor
    //! \param inputs Expressions for dimensions of the input tensors
    //! \param nbInputs The number of input tensors
    //! \param exprBuilder Object for generating new expressions
    //!
    //! This function is called by the implementations of IBuilder during analysis of the network.
    //!
    //! Example #1: A plugin has a single output that transposes the last two dimensions of the plugin's single input.
    //! The body of the override of getOutputDimensions can be:
    //!
    //!     DimsExprs output(inputs[0]);
    //!     std::swap(output.d[output.nbDims-1], output.d[output.nbDims-2]);
    //!     return output;
    //!
    //! Example #2: A plugin concatenates its two inputs along the first dimension.
    //! The body of the override of getOutputDimensions can be:
    //!
    //!     DimsExprs output(inputs[0]);
    //!     output.d[0] = exprBuilder.operation(DimensionOperation::kSUM, *inputs[0].d[0], *inputs[1].d[0]);
    //!     return output;
    //!
    virtual DimsExprs getOutputDimensions(
        int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept = 0;

    //!
    //! \brief Limit on number of format combinations accepted.
    //!
    static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;

    //!
    //! \brief Return true if plugin supports the format and datatype for the input/output indexed by pos.
    //!
    //! For this method inputs are numbered 0..(nbInputs-1) and outputs are numbered nbInputs..(nbInputs+nbOutputs-1).
    //! Using this numbering, pos is an index into InOut, where 0 <= pos < nbInputs+nbOutputs.
    //!
    //! TensorRT invokes this method to ask if the input/output indexed by pos supports the format/datatype specified
    //! by inOut[pos].format and inOut[pos].type.  The override should return true if that format/datatype at inOut[pos]
    //! are supported by the plugin.  If support is conditional on other input/output formats/datatypes, the plugin can
    //! make its result conditional on the formats/datatypes in inOut[0..pos-1], which will be set to values
    //! that the plugin supports.  The override should not inspect inOut[pos+1..nbInputs+nbOutputs-1],
    //! which will have invalid values.  In other words, the decision for pos must be based on inOut[0..pos] only.
    //!
    //! Some examples:
    //!
    //! * A definition for a plugin that supports only FP16 NCHW:
    //!
    //!         return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kHALF;
    //!
    //! * A definition for a plugin that supports only FP16 NCHW for its two inputs,
    //!   and FP32 NCHW for its single output:
    //!
    //!         return inOut[pos].format == TensorFormat::kLINEAR && (inOut[pos].type == (pos < 2 ? DataType::kHALF :
    //!         DataType::kFLOAT));
    //!
    //! * A definition for a "polymorphic" plugin with two inputs and one output that supports
    //!   any format or type, but the inputs and output must have the same format and type:
    //!
    //!         return pos == 0 || (inOut[pos].format == inOut.format[0] && inOut[pos].type == inOut[0].type);
    //!
    //! Warning: TensorRT will stop asking for formats once it finds kFORMAT_COMBINATION_LIMIT on combinations.
    //!
    virtual bool supportsFormatCombination(
        int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;

    //!
    //! \brief Configure the plugin.
    //!
    //! configurePlugin() can be called multiple times in both the build and execution phases. The build phase happens
    //! before initialize() is called and only occurs during creation of an engine by IBuilder. The execution phase
    //! happens after initialize() is called and occurs during both creation of an engine by IBuilder and execution
    //! of an engine by IExecutionContext.
    //!
    //! Build phase:
    //! IPluginV2DynamicExt->configurePlugin is called when a plugin is being prepared for profiling but not for any
    //! specific input size. This provides an opportunity for the plugin to make algorithmic choices on the basis of
    //! input and output formats, along with the bound of possible dimensions. The min and max value of the
    //! DynamicPluginTensorDesc correspond to the kMIN and kMAX value of the current profile that the plugin is being
    //! profiled for, with the desc.dims field corresponding to the dimensions of plugin specified at network creation.
    //! Wildcard dimensions will exist during this phase in the desc.dims field.
    //!
    //! Execution phase:
    //! IPluginV2DynamicExt->configurePlugin is called when a plugin is being prepared for executing the plugin for a
    //! specific dimensions. This provides an opportunity for the plugin to change algorithmic choices based on the
    //! explicit input dimensions stored in desc.dims field.
    //!  * IBuilder will call this function once per profile, with desc.dims resolved to the values specified by the
    //!  kOPT
    //!    field of the current profile. Wildcard dimensions will not exist during this phase.
    //!  * IExecutionContext will call this during the next subsequent instance enqueue[V2]() or execute[V2]() if:
    //!    - The batch size is changed from previous call of execute()/enqueue() if hasImplicitBatchDimension() returns
    //!    true.
    //!    - The optimization profile is changed via setOptimizationProfileAsync().
    //!    - An input execution binding is changed via setInputShape().
    //! \warning The execution phase is timing critical during IExecutionContext but is not part of the timing loop when
    //! called from IBuilder. Performance bottlenecks of configurePlugin won't show up during engine building but will
    //! be visible during execution after calling functions that trigger layer resource updates.
    //!
    //! \param in The input tensors attributes that are used for configuration.
    //! \param nbInputs Number of input tensors.
    //! \param out The output tensors attributes that are used for configuration.
    //! \param nbOutputs Number of output tensors.
    //!
    virtual void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
        DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;

    //!
    //! \brief Find the workspace size required by the layer.
    //!
    //! This function is called after the plugin is configured, and possibly during execution.
    //! The result should be a sufficient workspace size to deal with inputs and outputs of the given size
    //! or any smaller problem.
    //!
    //! \return The workspace size.
    //!
    virtual size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs,
        int32_t nbOutputs) const noexcept = 0;

    //!
    //! \brief Execute the layer.
    //!
    //! \param inputDesc how to interpret the memory for the input tensors.
    //! \param outputDesc how to interpret the memory for the output tensors.
    //! \param inputs The memory for the input tensors.
    //! \param outputs The memory for the output tensors.
    //! \param workspace Workspace for execution.
    //! \param stream The stream in which to execute the kernels.
    //!
    //! \return 0 for success, else non-zero (which will cause engine termination).
    //!
    virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
        void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;

protected:
    //!
    //! \brief Return the API version with which this plugin was built. The
    //!  upper byte reserved by TensorRT and is used to differentiate this from IPluginV2.
    //!
    //! Do not override this method as it is used by the TensorRT library to maintain backwards-compatibility with
    //! plugins.
    //!
    int32_t getTensorRTVersion() const noexcept override
    {
        return (static_cast<int32_t>(PluginVersion::kV2_DYNAMICEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));
    }

    virtual ~IPluginV2DynamicExt() noexcept {}

private:
    // Following are obsolete base class methods, and must not be implemented or used.

    //!
    //! \brief Set plugin configuration
    //!
    void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,
        bool const*, PluginFormat, int32_t) noexcept override final
    {
    }

    //!
    //! \brief Check if provided data type is supported
    //!
    bool supportsFormat(DataType, PluginFormat) const noexcept override final
    {
        return false;
    }

    //!
    //! \brief Get output dimensions.
    //!
    Dims getOutputDimensions(int32_t, Dims const*, int32_t) noexcept override final
    {
        return Dims{-1, {}};
    }

    //!
    //! \brief Is output broadcasted across batch.
    //!
    //! \warning Expected to return false as implicit batch support was removed in TensorRT 10.0.
    //!
    //! \deprecated Deprecated in TensorRT 10.0. Implicit batch support is removed in TensorRT 10.0.
    //!
    TRT_DEPRECATED bool isOutputBroadcastAcrossBatch(int32_t, bool const*, int32_t) const noexcept override final
    {
        return false;
    }

    //!
    //! \brief Can output broadcasted across batch.
    //!
    //! \warning Expected to return false as implicit batch support was removed in TensorRT 10.0.
    //!
    //! \deprecated Deprecated in TensorRT 10.0. Implicit batch support is removed in TensorRT 10.0.
    //!
    TRT_DEPRECATED bool canBroadcastInputAcrossBatch(int32_t) const noexcept override final
    {
        return true;
    }

    //!
    //! \brief Get required workspace size in bytes.
    //!
    size_t getWorkspaceSize(int32_t) const noexcept override final
    {
        return 0;
    }

    //!
    //! \brief Run inference.
    //!
    int32_t enqueue(int32_t, void const* const*, void* const*, void*, cudaStream_t) noexcept override final
    {
        return 1;
    }
};

namespace v_1_0
{
class IStreamReader : public IVersionedInterface
{
public:
    //!
    //! TensorRT never calls the destructor for an IStreamReader defined by the
    //! application.
    //!
    ~IStreamReader() override = default;
    IStreamReader() = default;

    //!
    //! \brief Return version information associated with this interface. Applications must not override this method.
    //!
    InterfaceInfo getInterfaceInfo() const noexcept override
    {
        return InterfaceInfo{"IStreamReader", 1, 0};
    }

    //!
    //! \brief Read the next number of bytes in the stream.
    //!
    //! \param destination The memory to write to
    //! \param nbBytes The number of bytes to read
    //!
    //! \returns The number of bytes read. Negative values will be considered an automatic error.
    //!
    virtual int64_t read(void* destination, int64_t nbBytes) = 0;

protected:
    IStreamReader(IStreamReader const&) = default;
    IStreamReader(IStreamReader&&) = default;
    IStreamReader& operator=(IStreamReader const&) & = default;
    IStreamReader& operator=(IStreamReader&&) & = default;
};

class IStreamWriter : public IVersionedInterface
{
public:
    //!
    //! TensorRT never calls the destructor for an IStreamWriter defined by the
    //! application.
    //!
    ~IStreamWriter() override = default;
    IStreamWriter() = default;

    //!
    //! \brief Return version information associated with this interface. Applications must not override this method.
    //!
    InterfaceInfo getInterfaceInfo() const noexcept final
    {
        return InterfaceInfo{"IStreamWriter", 1, 0};
    }

    //!
    //! \brief write nbBytes of data into the stream.
    //!
    //! \param data The data to be written to stream
    //! \param nbBytes The number of bytes to write
    //!
    //! \returns The number of bytes written. A value that is negative or less than nBytes indicates that an error
    //! occurred and TensorRT will give up on writing to the stream.
    //!
    virtual int64_t write(void const* data, int64_t nbBytes) = 0;

protected:
    IStreamWriter(IStreamWriter const&) = default;
    IStreamWriter(IStreamWriter&&) = default;
    IStreamWriter& operator=(IStreamWriter const&) & = default;
    IStreamWriter& operator=(IStreamWriter&&) & = default;
};
} // namespace v_1_0

//!
//! \class IStreamReader
//!
//! \brief Application-implemented class for reading data in a stream-based manner.
//!
//! \note To ensure compatibility of source code with future versions of TensorRT, use IStreamReader, not
//!       v_1_0::IStreamReader
//!
using IStreamReader = v_1_0::IStreamReader;

//!
//! \class IStreamWriter
//!
//! \brief Application-implemented class for writing data in a stream-based manner.
//!
//! \note To ensure compatibility of source code with future versions of TensorRT, use IStreamWriter, not
//!       v_1_0::IStreamWriter
//!
using IStreamWriter = v_1_0::IStreamWriter;

//!
//! \enum SeekPosition
//! \brief Controls the seek mode of IStreamReaderV2.
//!
enum class SeekPosition : int32_t
{
    //! From the beginning of the file.
    kSET = 0,

    //! From the current position of the file.
    kCUR = 1,

    //! From the tail of the file.
    kEND = 2,
};

namespace v_1_0
{
class IStreamReaderV2 : public IVersionedInterface
{
public:
    //!
    //! TensorRT never calls the destructor for an IStreamReaderV2 defined by the
    //! application.
    //!
    ~IStreamReaderV2() override = default;
    IStreamReaderV2() = default;

    //!
    //! \brief Return version information associated with this interface. Applications must not override this method.
    //!
    InterfaceInfo getInterfaceInfo() const noexcept override
    {
        return InterfaceInfo{"IStreamReaderV2", 1, 0};
    }

    //!
    //! \brief Read the next number of bytes in the stream asynchronously.
    //!
    //! \param destination The memory to write to, call cudaPointerGetAttributes to get the memory location
    //! \param nbBytes The number of bytes to read
    //! \param stream The CUDA stream used to do the copy
    //!
    //! \returns The number of bytes read. Negative values indicate an unrecoverable error.
    //! A zero indicates that the end of the stream has been reached.
    //!
    virtual int64_t read(void* destination, int64_t nbBytes, cudaStream_t stream) noexcept = 0;

    //!
    //! \brief Sets the position of the stream to the given offset.
    //!
    //! \param offset The number of bytes to offset from where.
    //! \param where The position from where the offset is added. \see SeekPosition
    //!
    //! \returns True if the position is updated successfully.
    //!
    virtual bool seek(int64_t offset, SeekPosition where) noexcept = 0;

protected:
    IStreamReaderV2(IStreamReaderV2 const&) = default;
    IStreamReaderV2(IStreamReaderV2&&) = default;
    IStreamReaderV2& operator=(IStreamReaderV2 const&) & = default;
    IStreamReaderV2& operator=(IStreamReaderV2&&) & = default;
};
} // namespace v_1_0

//!
//! \class IStreamReaderV2
//!
//! \brief Application-implemented class for reading data in a stream-based manner asynchronously. Intended for use with
//! the GDS API for optimizing load times.
//!
//! \note To ensure compatibility of source code with future versions of TensorRT, use IStreamReaderV2, not
//!       v_1_0::IStreamReaderV2
//!
using IStreamReaderV2 = v_1_0::IStreamReaderV2;

//!
//! \class IPluginResourceContext
//!
//! \brief Interface for plugins to access per context resources provided by TensorRT
//!
//! There is no public way to construct an IPluginResourceContext. It appears as an argument to
//! IPluginV3OneRuntime::attachToContext(). Overrides of that method can use the IPluginResourceContext object to access
//! any available per context resources.
//!
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
//!
//! \see IPluginV3OneRuntime::attachToContext()
//!
class IPluginResourceContext
{
public:
    //! \brief Get the GPU allocator associated with the resource context
    //!
    //! \see IPluginV3OneRuntime::attachToContext()
    //!
    virtual IGpuAllocator* getGpuAllocator() const noexcept = 0;

    //! \brief Get the error recorder associated with the resource context
    //!
    //! \see IPluginV3OneRuntime::attachToContext()
    //!
    virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
    virtual ~IPluginResourceContext() noexcept = default;

protected:
    IPluginResourceContext() = default;
    IPluginResourceContext(IPluginResourceContext const&) = default;
    IPluginResourceContext(IPluginResourceContext&&) = default;
    IPluginResourceContext& operator=(IPluginResourceContext const&) & = default;
    IPluginResourceContext& operator=(IPluginResourceContext&&) & = default;
};

namespace v_1_0
{
class IPluginV3OneCore : public IPluginCapability
{
public:
    //!
    //! \brief Return version information associated with this interface. Applications must not override this method.
    //!
    InterfaceInfo getInterfaceInfo() const noexcept override
    {
        return InterfaceInfo{"PLUGIN_V3ONE_CORE", 1, 0};
    }

    //!
    //! \brief Return the plugin name. Should match the plugin name returned by the corresponding plugin creator.
    //!
    //! \see IPluginCreatorV3One::getPluginName()
    //!
    //! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including the
    //! NULL terminator.
    //!
    virtual AsciiChar const* getPluginName() const noexcept = 0;

    //!
    //! \brief Return the plugin version. Should match the plugin version returned by the corresponding plugin creator.
    //!
    //! \see IPluginCreatorV3One::getPluginVersion()
    //!
    //! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including the
    //! NULL terminator.
    //!
    virtual AsciiChar const* getPluginVersion() const noexcept = 0;

    //!
    //! \brief Return the namespace of the plugin object. Should match the plugin namespace returned by the
    //! corresponding plugin creator.
    //!
    //! \see IPluginCreatorV3One::getPluginNamespace()
    //!
    //! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including the
    //! NULL terminator.
    //!
    virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
};

class IPluginV3OneBuild : public IPluginCapability
{
public:
    //!
    //! \brief The default maximum number of format combinations that will be timed by TensorRT during the build phase
    //!
    //! \see getFormatCombinationLimit
    //!
    static constexpr int32_t kDEFAULT_FORMAT_COMBINATION_LIMIT = 100;

    //!
    //! \brief Return version information associated with this interface. Applications must not override this method.
    //!
    InterfaceInfo getInterfaceInfo() const noexcept override
    {
        return InterfaceInfo{"PLUGIN_V3ONE_BUILD", 1, 0};
    }

    //!
    //! \brief Configure the plugin.
    //!
    //! configurePlugin() can be called multiple times in the build phase during creation of an engine by IBuilder.
    //!
    //! configurePlugin() is called when a plugin is being prepared for profiling but not for any
    //! specific input size. This provides an opportunity for the plugin to make algorithmic choices on the basis of
    //! input and output formats, along with the bound of possible dimensions. The min, opt and max value of the
    //! DynamicPluginTensorDesc correspond to the kMIN, kOPT and kMAX value of the current profile that the plugin is
    //! being profiled for, with the desc.dims field corresponding to the dimensions of plugin specified at network
    //! creation. Wildcard dimensions may exist during this phase in the desc.dims field.
    //!
    //! \param in The input tensors attributes that are used for configuration.
    //! \param nbInputs Number of input tensors.
    //! \param out The output tensors attributes that are used for configuration.
    //! \param nbOutputs Number of output tensors.
    //!
    //! \return 0 for success, else non-zero (which will cause engine termination, if invoked by TensorRT).
    //!
    virtual int32_t configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
        DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;

    //!
    //! \brief Provide the data types of the plugin outputs if the input tensors have the data types provided.
    //!
    //! \param outputTypes Pre-allocated array to which the output data types should be written.
    //! \param nbOutputs The number of output tensors. This matches the value returned from getNbOutputs().
    //! \param inputTypes The input data types.
    //! \param nbInputs The number of input tensors.
    //!
    //! \return 0 for success, else non-zero (which will cause engine termination). The returned code will be reported
    //! through the error recorder.
    //!
    //! \note Provide `DataType::kFLOAT`s if the layer has no inputs. The data type for any size tensor outputs must be
    //! `DataType::kINT32`. The returned data types must each have a format that is supported by the plugin.
    //!
    //! \warning DataType:kBOOL and DataType::kUINT8 are not supported.
    //!
    virtual int32_t getOutputDataTypes(
        DataType* outputTypes, int32_t nbOutputs, const DataType* inputTypes, int32_t nbInputs) const noexcept = 0;

    //!
    //! \brief Provide expressions for computing dimensions of the output tensors from dimensions of the input tensors.
    //!
    //! \param inputs Expressions for dimensions of the input tensors
    //! \param nbInputs The number of input tensors
    //! \param shapeInputs Expressions for values of the shape tensor inputs
    //! \param nbShapeInputs The number of shape tensor inputs
    //! \param outputs Pre-allocated array to which the output dimensions must be written
    //! \param nbOutputs Number of outputs.
    //! \param exprBuilder Object for generating new dimension expressions
    //!
    //! \note Any size tensor outputs must be declared to be 0D.
    //!
    //! \note The declaration of shapeInputs as DimsExprs is slightly abusive, because the "dimensions"
    //!       are actually the values of the shape tensor. For example, if the input shape tensor
    //!       is a 2x3 matrix, the DimsExprs will have six "dimensions": the three values from the first
    //!       row of the matrix followed by the three values from the second row of the matrix.
    //!
    //! \return 0 for success, else non-zero (which will cause engine termination). Returned code will be reported
    //! through the error recorder.
    //!
    virtual int32_t getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs,
        int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept = 0;

    //!
    //! \brief Return true if plugin supports the format and datatype for the input/output indexed by pos.
    //!
    //! For this method inputs are numbered 0.. (nbInputs - 1) and outputs are numbered nbInputs.. (nbInputs + nbOutputs
    //! - 1). Using this numbering, pos is an index into InOut, where 0 <= pos < nbInputs + nbOutputs - 1.
    //!
    //! TensorRT invokes this method to ask if the input/output indexed by pos supports the format/datatype specified
    //! by inOut[pos].format and inOut[pos].type.  The override should return true if that format/datatype at inOut[pos]
    //! are supported by the plugin.  If support is conditional on other input/output formats/datatypes, the plugin can
    //! make its result conditional on the formats/datatypes in inOut[0.. pos - 1], which will be set to values
    //! that the plugin supports.  The override should not inspect inOut[pos1.. nbInputs + nbOutputs - 1],
    //! which will have invalid values.  In other words, the decision for pos must be based on inOut[0..pos] only.
    //!
    //! Some examples:
    //!
    //! * A definition for a plugin that supports only FP16 NCHW:
    //!
    //!         return inOut.format[pos] == TensorFormat::kLINEAR && inOut.type[pos] == DataType::kHALF;
    //!
    //! * A definition for a plugin that supports only FP16 NCHW for its two inputs,
    //!   and FP32 NCHW for its single output:
    //!
    //!         return inOut.format[pos] == TensorFormat::kLINEAR && (inOut.type[pos] == pos < 2 ?  DataType::kHALF :
    //!         DataType::kFLOAT);
    //!
    //! * A definition for a "polymorphic" plugin with two inputs and one output that supports
    //!   any format or type, but the inputs and output must have the same format and type:
    //!
    //!         return pos == 0 || (inOut.format[pos] == inOut.format[0] && inOut.type[pos] == inOut.type[0]);
    //!
    //! \warning TensorRT will stop querying once it finds getFormatCombinationLimit() of combinations.
    //!
    //! \see getFormatCombinationLimit
    //!
    virtual bool supportsFormatCombination(
        int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;

    //!
    //! \brief Get the number of outputs from the plugin.
    //!
    //! \return The number of outputs, which must be a positive integer.
    //!
    virtual int32_t getNbOutputs() const noexcept = 0;

    //!
    //! \brief Find the workspace size required by the layer.
    //!
    //! This function is called after the plugin is configured, and possibly during execution.
    //! The result should be a sufficient workspace size to deal with inputs and outputs of the given size
    //! or any smaller problem.
    //!
    //! \return The workspace size.
    //!
    virtual size_t getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs,
        DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept
    {
        return 0;
    }

    //!
    //! \brief Query for any custom tactics that the plugin intends to use
    //!
    //! This method queries for the set of tactics T(f) supported by the plugin for the format combination f indicated
    //! by the immediately preceding call to configurePlugin(). It is guaranteed to be called after configurePlugin().
    //!
    //! For each format combination provided through configurePlugin(), up to a maximum of getFormatCombinationLimit(),
    //! the plugin will be timed for each tactic advertised through this method for that format combination. i.e. The
    //! plugin will be timed \f$N = \sum_{i=0}^{i<getFormatCombinationLimit()} (T(f[i]))\f$ times. If \f$N = 1\f$, the
    //! plugin may not be timed. In pseudocode, the timing protocol appears as the following:
    //!
    //! counter = 0
    //! for each supported format combination
    //!     ++counter
    //!     if counter > getFormatCombinationLimit()
    //!         goto done
    //!     configurePlugin(...)
    //!     for each tactic in getValidTactics(...)
    //!         time tactic
    //! done:
    //!
    //!
    //! \param tactics Pre-allocated buffer to which the tactic values should be written
    //! \param nbTactics The number of tactics advertised through getNbTactics()
    //!
    //! \note The provided tactic values must be unique and non-zero. The tactic value 0 is reserved for the default
    //! tactic attached to each format combination.
    //!
    //! \return 0 for success, else non-zero (which will cause engine termination). The returned code will be reported
    //! through the error recorder.
    //!
    virtual int32_t getValidTactics(int32_t* tactics, int32_t nbTactics) noexcept
    {
        return 0;
    }

    //!
    //! \brief Query for the number of custom tactics the plugin intends to use
    //!
    virtual int32_t getNbTactics() noexcept
    {
        return 0;
    }

    //!
    //! \brief Called to query the suffix to use for the timing cache ID. May be called anytime after plugin creation.
    //!
    //! \return Suffix to use for timing cache ID, considering only the creation state of the plugin.
    //!         Returning nullptr will disable timing caching for the plugin altogether.
    //!
    //! \note If timing caching is enabled for the plugin (by returning non-null), the I/O shape and format information
    //! will be automatically considered to form the prefix of the timing cache ID. Therefore, only other factors
    //! determining the creation state of the plugin, such as its attribute values, should be considered to compose the
    //! return value.
    //!
    virtual char const* getTimingCacheID() noexcept
    {
        return nullptr;
    }

    //!
    //! \brief Return the maximum number of format combinations that will be timed by TensorRT during the build phase
    //!
    virtual int32_t getFormatCombinationLimit() noexcept
    {
        return kDEFAULT_FORMAT_COMBINATION_LIMIT;
    }

    //!
    //! \brief Query for a string representing the configuration of the plugin. May be called anytime after
    //! plugin creation.
    //!
    //! \return A string representing the plugin's creation state, especially with regard to its attribute values.
    //!
    virtual char const* getMetadataString() noexcept
    {
        return nullptr;
    }
};

class IPluginV3OneRuntime : public IPluginCapability
{
public:
    //!
    //! \brief Return version information associated with this interface. Applications must not override this method.
    //!
    InterfaceInfo getInterfaceInfo() const noexcept override
    {
        return InterfaceInfo{"PLUGIN_V3ONE_RUNTIME", 1, 0};
    }

    //!
    //! \brief Set the tactic to be used in the subsequent call to enqueue(). If no custom tactics were advertised, this
    //! will have a value of 0, which is designated as the default tactic.
    //!
    //! \return 0 for success, else non-zero (which will cause engine termination). The returned code will be reported
    //! through the error recorder.
    //!
    virtual int32_t setTactic(int32_t tactic) noexcept
    {
        return 0;
    }

    //!
    //! \brief Called when a plugin is being prepared for execution for specific dimensions. This could
    //! happen multiple times in the execution phase, both during creation of an engine by IBuilder and execution of an
    //! engine by IExecutionContext.
    //!  * IBuilder will call this function once per profile, with `in` resolved to the values specified by the
    //!  kOPT field of the current profile.
    //!  * IExecutionContext will call this during the next subsequent instance of enqueueV3() or executeV2() if:
    //!    - The optimization profile is changed via setOptimizationProfile() or setOptimizationProfileAsync().
    //!    - An input binding is changed via setInputTensorAddress() or setTensorAddress() or setInputShape().
    //! \warning The execution phase is timing critical during IExecutionContext but is not part of the timing loop when
    //! called from IBuilder. Performance bottlenecks of onShapeChange() will not show up during engine building but
    //! will be visible during execution if any triggering functions are called.
    //!
    //! \param in The input tensors attributes that are used for configuration.
    //! \param nbInputs Number of input tensors.
    //! \param out The output tensors attributes that are used for configuration.
    //! \param nbOutputs Number of output tensors.
    //!
    virtual int32_t onShapeChange(
        PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;

    //!
    //! \brief Execute the layer.
    //!
    //! \param inputDesc how to interpret the memory for the input tensors.
    //! \param outputDesc how to interpret the memory for the output tensors.
    //! \param inputs The memory for the input tensors.
    //! \param outputs The memory for the output tensors.
    //! \param workspace Workspace for execution.
    //! \param stream The stream in which to execute the kernels.
    //!
    //! \return 0 for success, else non-zero (which will cause engine termination). The returned code will be reported
    //! through the error recorder.
    //!
    virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
        void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;

    //!
    //! \brief Clone the plugin, attach the cloned plugin object to a execution context and grant the cloned plugin
    //! access to some context resources.
    //!
    //! This function is called automatically for each plugin when a new execution context is created. The plugin may
    //! use resources provided by the IPluginResourceContext until the plugin is deleted by TensorRT.
    //!
    //! If the plugin needs per-context resources, it can be allocated here.
    //!
    //! \param context A resource context that exposes methods to get access to execution context specific resources.
    //!                A different resource context is guaranteed for each different execution context to which the
    //!                plugin is attached.
    //! \see IPluginResourceContext
    //!
    //! \note This method should clone the entire IPluginV3 object, not just the runtime interface
    //!
    //! \return A clone of the IPluginV3 object whose runtime interface on which this method is invoked, which has
    //! attached to the provided resource context.
    //!
    virtual IPluginV3* attachToContext(IPluginResourceContext* context) noexcept = 0;

    //!
    //! \brief Get the plugin fields which should be serialized.
    //!
    //! \note The set of plugin fields returned does not necessarily need to match that advertised through
    //! getFieldNames() of the corresponding plugin creator.

    //! \note To serialize arbitrary plugin data, use a PluginField of
    //! PluginFieldType::kUNKNOWN, with the length of the PluginField set to the correct number of bytes.
    //!
    virtual PluginFieldCollection const* getFieldsToSerialize() noexcept = 0;
};
} // namespace v_1_0

namespace v_2_0
{

class IPluginV3OneBuild : public v_1_0::IPluginV3OneBuild
{
public:
    InterfaceInfo getInterfaceInfo() const noexcept override
    {
        return InterfaceInfo{"PLUGIN_V3ONE_BUILD", 2, 0};
    }

    //!
    //! \brief Communicates to TensorRT that the output at the specified output index is aliased to the input at the
    //! returned index
    //!
    //! Enables read-modify-write behavior in plugins. TensorRT may insert copies to facilitate this capability.
    //!
    //! \return An integer denoting the index of the input which is aliased to the output at outputIndex.
    //!         Returning -1 indicates that the output is not aliased to any input. Otherwise, the valid range for
    //!         return value is [0, nbInputs - 1].
    //!
    //! \note A given plugin input can only be aliased to a single plugin output.
    //!
    //! \note This API will only be called and have an effect when PreviewFeature::kALIASED_PLUGIN_IO_10_03 is turned
    //! on.
    //!
    //! \warning If an input is not shallow copyable, a copy inserted by TensorRT may not work as intended. Therefore,
    //!          using this feature with tensors requiring deep copies is not supported.
    //!
    //! \warning If a given tensor is requested to be aliased by two different plugins, this may result in divergent
    //! copies of the tensor after writes from each plugin. e.g. In the below example, t1 and t2 could be divergent.
    //!
    //!        +-----+            +--------+
    //!     +->|Copy +--> t* ---->|Plugin0 +--> t1
    //!     |  +-----+            +--------+
    //!     t
    //!     |  +-----+            +--------+
    //!     +->|Copy +--> t** --->|Plugin1 +--> t2
    //!        +-----+            +--------+
    //!
    virtual int32_t getAliasedInput(int32_t outputIndex) noexcept
    {
        return -1;
    }
};

} // namespace v_2_0

//!
//! \class IPluginV3OneCore
//!
//! \brief A plugin capability interface that enables the core capability (PluginCapabilityType::kCORE).
//!
//! \see IPluginCapability
//! \see PluginCapabilityType
//! \see IPluginV3::getCapabilityInterface()
//!
using IPluginV3OneCore = v_1_0::IPluginV3OneCore;

//!
//! \class IPluginV3OneBuild
//!
//! \brief A plugin capability interface that enables the build capability (PluginCapabilityType::kBUILD). Exposes
//! methods that allow the expression of the build time properties and behavior of a plugin.
//!
//! \see IPluginCapability
//! \see PluginCapabilityType
//! \see IPluginV3::getCapabilityInterface()
//!
using IPluginV3OneBuild = v_1_0::IPluginV3OneBuild;

//!
//! \class IPluginV3OneRuntime
//!
//! \brief A plugin capability interface that enables the runtime capability (PluginCapabilityType::kRUNTIME). Exposes
//! methods that allow the expression of the runtime properties and behavior of a plugin.
//!
//! \see IPluginCapability
//! \see PluginCapabilityType
//! \see IPluginV3::getCapabilityInterface()
//!
using IPluginV3OneRuntime = v_1_0::IPluginV3OneRuntime;

//!
//! \class IPluginV3OneBuildV2
//!
//! \brief A plugin capability interface that extends IPluginV3OneBuild by providing I/O aliasing functionality.
//!
//! \see IPluginV3OneBuild
//!
using IPluginV3OneBuildV2 = v_2_0::IPluginV3OneBuild;

namespace v_1_0
{
class IProfiler
{
public:
    //!
    //! \brief Layer time reporting callback.
    //!
    //! \param layerName The name of the layer, set when constructing the network definition. If the engine is built
    //!                  with profiling verbosity set to kNONE, the layerName is the decimal index of the layer.
    //! \param ms The time in milliseconds to execute the layer.
    //!
    virtual void reportLayerTime(char const* layerName, float ms) noexcept = 0;

    virtual ~IProfiler() noexcept {}
};
} // namespace v_1_0

//!
//! \class IProfiler
//!
//! \brief Application-implemented interface for profiling.
//!
//! When this class is added to an execution context, the profiler will be called once per layer for each invocation of
//! executeV2()/enqueueV3().
//!
//! It is not recommended to run inference with profiler enabled when the inference execution time is critical since the
//! profiler may affect execution time negatively.
//!
using IProfiler = v_1_0::IProfiler;

//!
//! \enum WeightsRole
//!
//! \brief How a layer uses particular Weights.
//!
//! The power weights of an IScaleLayer are omitted.  Refitting those is not supported.
//!
enum class WeightsRole : int32_t
{
    kKERNEL = 0,   //!< kernel for IConvolutionLayer or IDeconvolutionLayer
    kBIAS = 1,     //!< bias for IConvolutionLayer or IDeconvolutionLayer
    kSHIFT = 2,    //!< shift part of IScaleLayer
    kSCALE = 3,    //!< scale part of IScaleLayer
    kCONSTANT = 4, //!< weights for IConstantLayer
    kANY = 5,      //!< Any other weights role
};

//! Maximum number of elements in WeightsRole enum. \see WeightsRole
template <>
constexpr inline int32_t EnumMax<WeightsRole>() noexcept
{
    return 6;
}

//!
//! \enum DeviceType
//! \brief The device that this layer/network will execute on.
//!
//!
enum class DeviceType : int32_t
{
    kGPU = 0, //!< GPU Device
    kDLA = 1, //!< DLA Core
};

//! Maximum number of elements in DeviceType enum. \see DeviceType
template <>
constexpr inline int32_t EnumMax<DeviceType>() noexcept
{
    return 2;
}

//!
//! \enum TempfileControlFlag
//!
//! \brief Flags used to control TensorRT's behavior when creating executable temporary files.
//!
//! On some platforms the TensorRT runtime may need to create files in a temporary directory or use platform-specific
//! APIs to create files in-memory to load temporary DLLs that implement runtime code. These flags allow the
//! application to explicitly control TensorRT's use of these files. This will preclude the use of certain TensorRT
//! APIs for deserializing and loading lean runtimes.
//!
enum class TempfileControlFlag : int32_t
{
    //! Allow creating and loading files in-memory (or unnamed files).
    kALLOW_IN_MEMORY_FILES = 0,

    //! Allow creating and loading named files in a temporary directory on the filesystem.
    //!
    //! \see IRuntime::setTemporaryDirectory()
    kALLOW_TEMPORARY_FILES = 1,
};

//! Maximum number of elements in TempfileControlFlag enum. \see TempfileControlFlag
template <>
constexpr inline int32_t EnumMax<TempfileControlFlag>() noexcept
{
    return 2;
}

//!
//! \brief Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operations.
//!
//! \see TempfileControlFlag,
//!      IRuntime::setTempfileControlFlags(),
//!      IRuntime::getTempfileControlFlags()
using TempfileControlFlags = uint32_t;

//!
//! \enum TensorFormat
//!
//! \brief Format of the input/output tensors.
//!
//! This enum is used by both plugins and network I/O tensors.
//!
//! \see IPluginV2::supportsFormat(), safe::ICudaEngine::getBindingFormat()
//!
//! Many of the formats are **vector-major** or **vector-minor**. These formats specify
//! a <em>vector dimension</em> and <em>scalars per vector</em>.
//! For example, suppose that the tensor has has dimensions [M,N,C,H,W],
//! the vector dimension is C and there are V scalars per vector.
//!
//! * A **vector-major** format splits the vectorized dimension into two axes in the
//!   memory layout. The vectorized dimension is replaced by an axis of length ceil(C/V)
//!   and a new dimension of length V is appended. For the example tensor, the memory layout
//!   is equivalent to an array with dimensions [M][N][ceil(C/V)][H][W][V].
//!   Tensor coordinate (m,n,c,h,w) maps to array location [m][n][c/V][h][w][c\%V].
//!
//! * A **vector-minor** format moves the vectorized dimension to become the last axis
//!   in the memory layout. For the example tensor, the memory layout is equivalent to an
//!   array with dimensions [M][N][H][W][ceil(C/V)*V]. Tensor coordinate (m,n,c,h,w) maps
//!   array location subscript [m][n][h][w][c].
//!
//! In interfaces that refer to "components per element", that's the value of V above.
//!
//! For more information about data formats, see the topic "Data Format Description" located in the
//! TensorRT Developer Guide.
//! https://docs.nvidia.com/deeplearning/tensorrt/latest/inference-library/advanced.html#i-o-formats
//!
enum class TensorFormat : int32_t
{
    //! Memory layout is similar to an array in C or C++.
    //! The stride of each dimension is the product of the dimensions after it.
    //! The last dimension has unit stride.
    //!
    //! This format supports all TensorRT types.
    //! For DLA usage, the tensor sizes are limited to C,H,W in the range [1,8192].
    kLINEAR = 0,

    //! Vector-major format with two scalars per vector.
    //! Vector dimension is third to last.
    //!
    //! This format requires FP16 or BF16 and at least three dimensions.
    kCHW2 = 1,

    //! Vector-minor format with eight scalars per vector.
    //! Vector dimension is third to last.
    //! This format requires FP16 or BF16 and at least three dimensions.
    kHWC8 = 2,

    //! Vector-major format with four scalars per vector.
    //! Vector dimension is third to last.
    //!
    //! This format requires INT8 and at least three dimensions.
    //! For INT8, the length of the vector dimension must be a build-time constant.
    //!
    //! Deprecated usage:
    //!
    //! If running on the DLA, this format can be used for acceleration
    //! with the caveat that C must be less than or equal to 4.
    //! If used as DLA input and the build option kGPU_FALLBACK is not specified,
    //! it needs to meet line stride requirement of DLA format. Column stride in
    //! bytes must be a multiple of 64 on Orin.
    kCHW4 = 3,

    //! Vector-major format with 16 scalars per vector.
    //! Vector dimension is third to last.
    //!
    //! This format is only supported by DLA and requires FP16 and at least three dimensions.
    //! This format maps to the native feature format for FP16,
    //! and the tensor sizes are limited to C,H,W in the range [1,8192].
    kCHW16 = 4,

    //! Vector-major format with 32 scalars per vector.
    //! Vector dimension is third to last.
    //!
    //! This format requires INT8, FP32, or FP16 and at least three dimensions.
    //!
    //! For DLA usage, this format maps to the native feature format for INT8,
    //! and the tensor sizes are limited to C,H,W in the range [1,8192].
    kCHW32 = 5,

    //! Vector-minor format with eight scalars per vector.
    //! Vector dimension is fourth to last.
    //!
    //! This format requires FP16 or BF16 and at least four dimensions.
    kDHWC8 = 6,

    //! Vector-major format with 32 scalars per vector.
    //! Vector dimension is fourth to last.
    //!
    //! This format requires FP16 or INT8 and at least four dimensions.
    kCDHW32 = 7,

    //! Vector-minor format where channel dimension is third to last and unpadded.
    //!
    //! This format requires either FP32 or UINT8 and at least three dimensions.
    kHWC = 8,

    //! DLA planar format. For a tensor with dimension {N, C, H, W}, the W axis
    //! always has unit stride. The stride for stepping along the H axis is
    //! rounded up to 64 bytes.
    //!
    //! The memory layout is equivalent to a C array with dimensions
    //! [N][C][H][roundUp(W, 64/elementSize)] where elementSize is
    //! 2 for FP16 and 1 for Int8, with the tensor coordinates (n, c, h, w)
    //! mapping to array subscript [n][c][h][w].
    kDLA_LINEAR = 9,

    //! DLA image format. For a tensor with dimension {N, C, H, W} the C axis
    //! always has unit stride. The stride for stepping along the H axis is rounded up
    //! to 64 bytes on Orin. C can only be 1, 3 or 4.
    //! If C == 1, it will map to grayscale format.
    //! If C == 3 or C == 4, it will map to color image format. And if C == 3,
    //! the stride for stepping along the W axis needs to be padded to 4 in elements.
    //!
    //! When C is {1, 3, 4}, then C' is {1, 4, 4} respectively,
    //! the memory layout is equivalent to a C array with dimensions
    //! [N][H][roundUp(W, 64/C'/elementSize)][C'] on Orin
    //! where elementSize is 2 for FP16
    //! and 1 for Int8. The tensor coordinates (n, c, h, w) mapping to array
    //! subscript [n][h][w][c].
    kDLA_HWC4 = 10,

    //! Vector-minor format with 16 scalars per vector.
    //! Vector dimension is third to last.
    //!
    //! This requires FP16, INT8 or FP8 and at least three dimensions.
    kHWC16 = 11,

    //! Vector-minor format with one scalar per vector.
    //! Vector dimension is fourth to last.
    //!
    //! This format requires FP32 and at least four dimensions.
    kDHWC = 12
};

namespace impl
{
//! Maximum number of elements in TensorFormat enum. \see TensorFormat
template <>
struct EnumMaxImpl<TensorFormat>
{
    //! Declaration of kVALUE that represents the maximum number of elements in the TensorFormat enum.
    static constexpr int32_t kVALUE = 13;
};
} // namespace impl

//!
//! \enum AllocatorFlag
//!
//! \brief Allowed type of memory allocation.
//!
enum class AllocatorFlag : int32_t
{
    //! TensorRT may call realloc() on this allocation.
    kRESIZABLE = 0,
};

namespace impl
{
//! Maximum number of elements in AllocatorFlag enum. \see AllocatorFlag
template <>
struct EnumMaxImpl<AllocatorFlag>
{
    //! Declaration of kVALUE that represents the maximum number of elements in the AllocatorFlag enum.
    static constexpr int32_t kVALUE = 1;
};
} // namespace impl

using AllocatorFlags = uint32_t;

//! DO NOT REFER TO namespace v_1_0 IN CODE. ALWAYS USE nvinfer1 INSTEAD.
//! The name v_1_0 may change in future versions of TensorRT.

//!
//! \class ILogger
//!
//! \brief Application-implemented logging interface for the builder, refitter and runtime.
//!
//! The logger used to create an instance of IBuilder, IRuntime or IRefitter is used for all objects created through
//! that interface. The logger must be valid until all objects created are released.
//!
//! The Logger object implementation must be thread safe. All locking and synchronization is pushed to the
//! interface implementation and TensorRT does not hold any synchronization primitives when calling the interface
//! functions.
//!
class ILogger
{
public:
    //!
    //! \enum Severity
    //!
    //! \brief The severity corresponding to a log message.
    //!
    enum class Severity : int32_t
    {
        //! An internal error has occurred. Execution is unrecoverable.
        kINTERNAL_ERROR = 0,
        //! An application error has occurred.
        kERROR = 1,
        //! An application error has been discovered, but TensorRT has recovered or fallen back to a default.
        kWARNING = 2,
        //!  Informational messages with instructional information.
        kINFO = 3,
        //!  Verbose messages with debugging information.
        kVERBOSE = 4,
    };

    //!
    //! \brief A callback implemented by the application to handle logging messages;
    //!
    //! \param severity The severity of the message.
    //! \param msg A null-terminated log message.
    //!
    //! \warning Loggers used in the safety certified runtime must set a maximum message length and truncate
    //!          messages exceeding this length. It is up to the implementer of the derived class to define
    //!          a suitable limit that will prevent buffer overruns, resource exhaustion, and other security
    //!          vulnerabilities in their implementation. The TensorRT safety certified runtime will never
    //!          emit messages longer than 1024 bytes.
    //!
    //! \usage
    //! - Allowed context for the API call
    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
    //!                  when multiple execution contexts are used during runtime, or if the same logger is used
    //!                  for multiple runtimes, builders, or refitters.
    //!
    virtual void log(Severity severity, AsciiChar const* msg) noexcept = 0;

    ILogger() = default;
    virtual ~ILogger() = default;

protected:
    // @cond SuppressDoxyWarnings
    ILogger(ILogger const&) = default;
    ILogger(ILogger&&) = default;
    ILogger& operator=(ILogger const&) & = default;
    ILogger& operator=(ILogger&&) & = default;
    // @endcond
};

namespace impl
{
//! Maximum number of elements in ILogger::Severity enum. \see ILogger::Severity
template <>
struct EnumMaxImpl<ILogger::Severity>
{
    //! Declaration of kVALUE that represents the maximum number of elements in the ILogger::Severity enum.
    static constexpr int32_t kVALUE = 5;
};
} // namespace impl

namespace v_1_0
{

class IGpuAllocator : public IVersionedInterface
{
public:
    //!
    //! \brief A thread-safe callback implemented by the application to handle acquisition of GPU memory.
    //!
    //! \param size The size of the memory block required (in bytes).
    //! \param alignment The required alignment of memory. Alignment will be zero
    //!        or a power of 2 not exceeding the alignment guaranteed by cudaMalloc.
    //!        Thus this allocator can be safely implemented with cudaMalloc/cudaFree.
    //!        An alignment value of zero indicates any alignment is acceptable.
    //! \param flags Reserved for future use. In the current release, 0 will be passed.
    //!
    //! \return If the allocation was successful, the start address of a device memory block of the requested size.
    //! If an allocation request of size 0 is made, nullptr must be returned.
    //! If an allocation request cannot be satisfied, nullptr must be returned.
    //! If a non-null address is returned, it is guaranteed to have the specified alignment.
    //!
    //! \note The implementation must guarantee thread safety for concurrent allocate/reallocate/deallocate
    //! requests.
    //!
    //! \usage
    //! - Allowed context for the API call
    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
    //!
    //! \deprecated Deprecated in TensorRT 10.0. Superseded by allocateAsync
    //!
    TRT_DEPRECATED virtual void* allocate(
        uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept = 0;

    ~IGpuAllocator() override = default;
    IGpuAllocator() = default;

    //!
    //! \brief A thread-safe callback implemented by the application to resize an existing allocation.
    //!
    //! Only allocations which were allocated with AllocatorFlag::kRESIZABLE will be resized.
    //!
    //! Options are one of:
    //! * resize in place leaving min(oldSize, newSize) bytes unchanged and return the original address
    //! * move min(oldSize, newSize) bytes to a new location of sufficient size and return its address
    //! * return nullptr, to indicate that the request could not be fulfilled.
    //!
    //! If nullptr is returned, TensorRT will assume that resize() is not implemented, and that the
    //! allocation at baseAddr is still valid.
    //!
    //! This method is made available for use cases where delegating the resize
    //! strategy to the application provides an opportunity to improve memory management.
    //! One possible implementation is to allocate a large virtual device buffer and
    //! progressively commit physical memory with cuMemMap. CU_MEM_ALLOC_GRANULARITY_RECOMMENDED
    //! is suggested in this case.
    //!
    //! TensorRT may call realloc to increase the buffer by relatively small amounts.
    //!
    //! \param baseAddr the address of the original allocation, which will have been returned by previously calling
    //!        allocate() or reallocate() on the same object.
    //! \param alignment The alignment used by the original allocation. This will be the same value that was previously
    //!        passed to the allocate() or reallocate() call that returned baseAddr.
    //! \param newSize The new memory size required (in bytes).
    //!
    //! \return The address of the reallocated memory, or nullptr. If a non-null address is returned, it is
    //!         guaranteed to have the specified alignment.
    //!
    //! \note The implementation must guarantee thread safety for concurrent allocate/reallocate/deallocate
    //! requests.
    //!
    //! \usage
    //! - Allowed context for the API call
    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
    //!
    virtual void* reallocate(void* const /*baseAddr*/, uint64_t /*alignment*/, uint64_t /*newSize*/) noexcept
    {
        return nullptr;
    }

    //!
    //! \brief A thread-safe callback implemented by the application to handle release of GPU memory.
    //!
    //! TensorRT may pass a nullptr to this function if it was previously returned by allocate().
    //!
    //! \param memory A memory address that was previously returned by an allocate() or reallocate() call of the same
    //! allocator object.
    //!
    //! \return True if the acquired memory is released successfully.
    //!
    //! \note The implementation must guarantee thread safety for concurrent allocate/reallocate/deallocate
    //! requests.
    //!
    //! \usage
    //! - Allowed context for the API call
    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
    //! \deprecated Deprecated in TensorRT 10.0. Superseded by deallocateAsync
    //!
    TRT_DEPRECATED virtual bool deallocate(void* const memory) noexcept = 0;

    //!
    //! \brief A thread-safe callback implemented by the application to handle stream-ordered acquisition of GPU memory.
    //!
    //! The default behavior is to call method allocate(), which is synchronous and thus loses
    //! any performance benefits of asynchronous allocation. If you want the benefits of asynchronous
    //! allocation, see discussion of IGpuAsyncAllocator vs. IGpuAllocator in the documentation
    //! for nvinfer1::IGpuAllocator.
    //!
    //! \param size The size of the memory block required (in bytes).
    //! \param alignment The required alignment of memory. Alignment will be zero
    //!        or a power of 2 not exceeding the alignment guaranteed by cudaMalloc.
    //!        Thus this allocator can be safely implemented with cudaMalloc/cudaFree.
    //!        An alignment value of zero indicates any alignment is acceptable.
    //! \param flags Reserved for future use. In the current release, 0 will be passed.
    //! \param stream specifies the cudaStream for asynchronous usage.
    //!
    //! \return If the allocation was successful, the start address of a device memory block of the requested size.
    //! If an allocation request of size 0 is made, nullptr must be returned.
    //! If an allocation request cannot be satisfied, nullptr must be returned.
    //! If a non-null address is returned, it is guaranteed to have the specified alignment.
    //!
    //! \note The implementation must guarantee thread safety for concurrent allocate/reallocate/deallocate
    //! requests.
    //!
    //! \usage
    //! - Allowed context for the API call
    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
    //!
    virtual void* allocateAsync(
        uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t /*stream*/) noexcept
    {
        return allocate(size, alignment, flags);
    }
    //!
    //! \brief A thread-safe callback implemented by the application to handle stream-ordered release of GPU memory.
    //!
    //! The default behavior is to call method deallocate(), which is synchronous and thus loses
    //! any performance benefits of asynchronous deallocation. If you want the benefits of asynchronous
    //! deallocation, see discussion of IGpuAsyncAllocator vs. IGpuAllocator in the documentation
    //! for nvinfer1::IGpuAllocator.
    //!
    //! TensorRT may pass a nullptr to this function if it was previously returned by allocate().
    //!
    //! \param memory A memory address that was previously returned by an allocate() or reallocate() call of the same
    //! allocator object.
    //! \param stream specifies the cudaStream for asynchronous usage.
    //!
    //! \return True if the acquired memory is released successfully.
    //!
    //! \note The implementation must guarantee thread safety for concurrent allocate/reallocate/deallocate
    //! requests.
    //!
    //! \note The implementation is not required to be asynchronous. It is permitted to synchronize,
    //! albeit doing so will lose the performance advantage of asynchronous deallocation.
    //! Either way, it is critical that it not actually free the memory until the current
    //! stream position is reached.
    //!
    //! \usage
    //! - Allowed context for the API call
    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
    //!
    virtual bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept
    {
        return deallocate(memory);
    }

    //!
    //! \brief Return version information associated with this interface. Applications must not override this method.
    //!
    InterfaceInfo getInterfaceInfo() const noexcept override
    {
        return {"IGpuAllocator", 1, 0};
    }

protected:
    // @cond SuppressDoxyWarnings
    IGpuAllocator(IGpuAllocator const&) = default;
    IGpuAllocator(IGpuAllocator&&) = default;
    IGpuAllocator& operator=(IGpuAllocator const&) & = default;
    IGpuAllocator& operator=(IGpuAllocator&&) & = default;
    // @endcond
};

} // namespace v_1_0

//!
//! \class IGpuAllocator
//!
//! \brief Application-implemented class for controlling allocation on the GPU.
//!
//! \warning The lifetime of an IGpuAllocator object must exceed that of all objects that use it.
//!
//! This class is intended as a base class for allocators that implement synchronous allocation.
//! If you want the benefits of asynchronous allocation, you can do either of:
//!
//! * Derive your class from IGpuAllocator and override all four of its virtual methods
//!   for allocation/deallocation, including the two deprecated methods.
//!
//! * Derive your class from IGpuAsyncAllocator and override its two pure virtual
//!   methods for allocation/deallocation.
//!
//! The latter style is preferred because it does not tie code to deprecated methods.
//!
//! \see IGpuAsyncAllocator.
//!
using IGpuAllocator = v_1_0::IGpuAllocator;


//!
//! \class IRuntime
//!
//! \brief Allows a serialized functionally unsafe engine to be deserialized.
//!
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
//!
class IRuntime : public INoCopy
{
public:
    virtual ~IRuntime() noexcept = default;

    //!
    //! \brief Sets the DLA core used by the network. Defaults to -1.
    //!
    //! \param dlaCore The DLA core to execute the engine on, in the range [0,getNbDlaCores()).
    //!
    //! This function is used to specify which DLA core to use via indexing, if multiple DLA cores are available.
    //!
    //! \warning if getNbDLACores() returns 0, then this function does nothing.
    //!
    //! \see getDLACore()
    //!
    void setDLACore(int32_t dlaCore) noexcept
    {
        mImpl->setDLACore(dlaCore);
    }

    //!
    //! \brief Get the DLA core that the engine executes on.
    //!
    //! \return assigned DLA core or -1 for DLA not present or unset.
    //!
    int32_t getDLACore() const noexcept
    {
        return mImpl->getDLACore();
    }

    //!
    //! \brief Returns number of DLA hardware cores accessible or 0 if DLA is unavailable.
    //!
    int32_t getNbDLACores() const noexcept
    {
        return mImpl->getNbDLACores();
    }

    //!
    //! \brief Set the GPU allocator.
    //!
    //! \param allocator Set the GPU allocator to be used by the runtime. All GPU memory acquired will use this
    //! allocator. If NULL is passed, the default allocator will be used.
    //!
    //! Default: allocateAsync uses cudaMallocAsync if cudaDevAttrMemoryPoolsSupported returns true, otherwise falls
    //! back to cudaMalloc. allocate always uses cudaMalloc.
    //!
    //! If nullptr is passed, the default allocator will be used.
    //!
    void setGpuAllocator(IGpuAllocator* allocator) noexcept
    {
        mImpl->setGpuAllocator(allocator);
    }

    //!
    //! \brief Set the ErrorRecorder for this interface
    //!
    //! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
    //! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
    //! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
    //! a recorder has been registered.
    //!
    //! If an error recorder is not set, messages will be sent to the global log stream.
    //!
    //! \param recorder The error recorder to register with this interface.
    //
    //! \see getErrorRecorder()
    //!
    void setErrorRecorder(IErrorRecorder* recorder) noexcept
    {
        mImpl->setErrorRecorder(recorder);
    }

    //!
    //! \brief get the ErrorRecorder assigned to this interface.
    //!
    //! Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
    //! an error handler has not been set.
    //!
    //! \return A pointer to the IErrorRecorder object that has been registered.
    //!
    //! \see setErrorRecorder()
    //!
    IErrorRecorder* getErrorRecorder() const noexcept
    {
        return mImpl->getErrorRecorder();
    }

    //!
    //! \brief Deserialize an engine from host memory.
    //!
    //! If an error recorder has been set for the runtime, it will also be passed to the engine.
    //!
    //! \warning Destroying the IRuntime before destroying all associated ICudaEngine instances results in undefined
    //! behavior.
    //!
    //! \param blob The memory that holds the serialized engine.
    //! \param size The size of the memory.
    //!
    //! \return The engine, or nullptr if it could not be deserialized.
    //!
    ICudaEngine* deserializeCudaEngine(void const* blob, std::size_t size) noexcept
    {
        return mImpl->deserializeCudaEngine(blob, size);
    }

    //!
    //! \brief Deserialize an engine from a stream.
    //!
    //! If an error recorder has been set for the runtime, it will also be passed to the
    //! engine.
    //!
    //! This deserialization path will reduce host memory usage when weight streaming is enabled.
    //!
    //! \warning Destroying the IRuntime before destroying all associated ICudaEngine instances results in undefined
    //! behavior.
    //!
    //! \param streamReader a read-only stream from which TensorRT will deserialize a
    //!        previously serialized engine.
    //!
    //! \return The engine, or nullptr if it could not be deserialized.
    //!
    //! \deprecated Deprecated in TensorRT 10.7. Superseded by deserializeCudaEngine that takes an IStreamReaderV2
    //! instead of IStreamReader.
    //!
    TRT_DEPRECATED ICudaEngine* deserializeCudaEngine(IStreamReader& streamReader)
    {
        return mImpl->deserializeCudaEngine(streamReader);
    }

    //!
    //! \brief Deserialize an engine from a stream. IStreamReaderV2 is expected to support reading to both host and
    //! device pointers.
    //!
    //! If an error recorder has been set for the runtime, it will also be passed to the
    //! engine.
    //!
    //! This deserialization path will reduce engine load time when applied with GDS (GPU Direct storage), or when
    //! weight streaming is enabled.
    //!
    //! \warning Destroying the IRuntime before destroying all associated ICudaEngine instances results in undefined
    //! behavior.
    //!
    //! \param streamReader a read-only stream from which TensorRT will deserialize a previously serialized engine.
    //!
    //! \return The engine, or nullptr if it could not be deserialized. The pointer may not be valid immediately after
    //! the function returns.
    //!
    ICudaEngine* deserializeCudaEngine(IStreamReaderV2& streamReader)
    {
        return mImpl->deserializeCudaEngineV2(streamReader);
    }

    //!
    //! \brief get the logger with which the runtime was created
    //!
    //! \return the logger
    //!
    ILogger* getLogger() const noexcept
    {
        return mImpl->getLogger();
    }

    //!
    //! \brief Set the maximum number of threads.
    //!
    //! \param maxThreads The maximum number of threads that can be used by the runtime.
    //! \return True if successful, false otherwise.
    //!
    //! The default value is 1 and includes the current thread.
    //! A value greater than 1 permits TensorRT to use multi-threaded algorithms.
    //! A value less than 1 triggers a kINVALID_ARGUMENT error.
    //!
    bool setMaxThreads(int32_t maxThreads) noexcept
    {
        return mImpl->setMaxThreads(maxThreads);
    }

    //!
    //! \brief Get the maximum number of threads that can be used by the runtime.
    //!
    //! Retrieves the maximum number of threads that can be used by the runtime.
    //!
    //! \return The maximum number of threads that can be used by the runtime.
    //!
    //! \see setMaxThreads()
    //!
    int32_t getMaxThreads() const noexcept
    {
        return mImpl->getMaxThreads();
    }

    //!
    //! \brief Set the directory that will be used by this runtime for temporary files.
    //!
    //! On some platforms the TensorRT runtime may need to create and use temporary files
    //! with read/write/execute permissions to implement runtime functionality.
    //!
    //! \param path Path to the temporary directory for use, or nullptr.
    //!
    //! If path is nullptr, then TensorRT will use platform-specific heuristics to pick
    //! a default temporary directory if required:
    //!
    //! - On UNIX/Linux platforms, TensorRT will first try the TMPDIR environment variable, then fall back to /tmp
    //! - On Windows, TensorRT will try the TEMP environment variable.
    //!
    //! See the TensorRT Developer Guide for more information.
    //!
    //! The default value is nullptr.
    //!
    //! \warning If path is not nullptr, it must be a non-empty string representing a relative
    //! or absolute path in the format expected by the host operating system.
    //!
    //! \warning The string path must be null-terminated, and be at most 4096 bytes including the
    //! terminator. Note that the operating system may have stricter path length requirements.
    //!
    //! \warning The process using TensorRT must have rwx permissions for the temporary directory,
    //! and the directory shall be configured to disallow other users from modifying created files
    //! (e.g. on Linux, if the directory is shared with other users, the sticky bit must be set).
    //!
    //! \see getTemporaryDirectory()
    //!
    void setTemporaryDirectory(char const* path) noexcept
    {
        return mImpl->setTemporaryDirectory(path);
    }

    //!
    //! \brief Get the directory that will be used by this runtime for temporary files.
    //!
    //! \returns A path to the temporary directory in use, or nullptr if no path is specified.
    //!
    //! \see setTemporaryDirectory()
    char const* getTemporaryDirectory() const noexcept
    {
        return mImpl->getTemporaryDirectory();
    }

    //!
    //! \brief Set the tempfile control flags for this runtime.
    //!
    //! \param flags The flags to set.
    //!
    //! The default value is all flags set, i.e.
    //!
    //! (1U << static_cast<uint32_t>(kALLOW_IN_MEMORY_FILES)) | (1U << static_cast<uint32_t>(kALLOW_TEMPORARY_FILES))
    //!
    //! \see TempfileControlFlag, TempfileControlFlags, getTempfileControlFlags()
    //!
    void setTempfileControlFlags(TempfileControlFlags flags) noexcept
    {
        return mImpl->setTempfileControlFlags(flags);
    }

    //!
    //! \brief Get the tempfile control flags for this runtime.
    //!
    //! \return The flags currently set.
    //!
    //! \see TempfileControlFlag, TempfileControlFlags, setTempfileControlFlags()
    //!
    TempfileControlFlags getTempfileControlFlags() const noexcept
    {
        return mImpl->getTempfileControlFlags();
    }

    //!
    //! \brief Get the local plugin registry that can be used by the runtime.
    //!
    //! \return The local plugin registry that can be used by the runtime.
    //!
    IPluginRegistry& getPluginRegistry() noexcept
    {
        return mImpl->getPluginRegistry();
    }

    //!
    //! \brief Load IRuntime from the file.
    //!
    //! This method loads a runtime library from a shared library file. The runtime can then be used to execute
    //! a plan file built with BuilderFlag::kVERSION_COMPATIBLE and BuilderFlag::kEXCLUDE_LEAN_RUNTIME both set
    //! and built with the same version of TensorRT as the loaded runtime library.
    //!
    //! \param path Path to the runtime lean library.
    //!
    //! \return the runtime library, or nullptr if it could not be loaded
    //!
    //! \warning The path string must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    IRuntime* loadRuntime(char const* path) noexcept
    {
        return mImpl->loadRuntime(path);
    }

    //!
    //! \brief Set whether the runtime is allowed to deserialize engines with host executable code.
    //!
    //! \param allowed Whether the runtime is allowed to deserialize engines with host executable code.
    //!
    //! The default value is false.
    //!
    void setEngineHostCodeAllowed(bool allowed) noexcept
    {
        return mImpl->setEngineHostCodeAllowed(allowed);
    }

    //!
    //! \brief Get whether the runtime is allowed to deserialize engines with host executable code.
    //!
    //! \return Whether the runtime is allowed to deserialize engines with host executable code.
    //!
    bool getEngineHostCodeAllowed() const noexcept
    {
        return mImpl->getEngineHostCodeAllowed();
    }


protected:
    apiv::VRuntime* mImpl;
};

//!
//! \class IRefitter
//!
//! \brief Updates weights in an engine.
//!
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
//!
class IRefitter : public INoCopy
{
public:
    virtual ~IRefitter() noexcept = default;

    //!
    //! \brief Specify new weights for a layer of given name.
    //! Returns true on success, or false if new weights are rejected.
    //! Possible reasons for rejection are:
    //!
    //! * There is no such layer by that name.
    //! * The layer does not have weights with the specified role.
    //! * The count of weights is inconsistent with the layer’s original specification.
    //! * The type of weights is inconsistent with the layer’s original specification.
    //!
    //! Modifying the weights before method refitCudaEngine or refitCudaEngineAsync returns will result in undefined
    //! behavior.
    //!
    //! \warning The string layerName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    bool setWeights(char const* layerName, WeightsRole role, Weights weights) noexcept
    {
        return mImpl->setWeights(layerName, role, weights);
    }

    //!
    //! \brief Refits associated engine.
    //!
    //! \return True on success, or false if new weights validation fails or getMissingWeights() != 0 before the call.
    //! If false is returned, a subset of weights may have been refitted.
    //!
    //! The behavior is undefined if the engine has pending enqueued work.
    //! Provided weights on CPU or GPU can be unset and released, or updated after refitCudaEngine returns.
    //!
    //! IExecutionContexts associated with the engine remain valid for use afterwards. There is no need to set the same
    //! weights repeatedly for multiple refit calls as the weights memory can be updated directly instead.
    //!
    bool refitCudaEngine() noexcept
    {
        return mImpl->refitCudaEngine();
    }

    //!
    //! \brief Get description of missing weights.
    //!
    //! For example, if some Weights have been set, but the engine was optimized
    //! in a way that combines weights, any unsupplied Weights in the combination
    //! are considered missing.
    //!
    //! \param size The number of items that can be safely written to a non-null layerNames or roles.
    //! \param layerNames Where to write the layer names.
    //! \param roles Where to write the weights roles.
    //!
    //! \return The number of missing Weights.
    //!
    //! If layerNames!=nullptr, each written pointer points to a string owned by
    //! the engine being refit, and becomes invalid when the engine is destroyed.
    //!
    int32_t getMissing(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
    {
        return mImpl->getMissing(size, layerNames, roles);
    }

    //!
    //! \brief Get description of all weights that could be refit.
    //!
    //! \param size The number of items that can be safely written to a non-null layerNames or roles.
    //! \param layerNames Where to write the layer names.
    //! \param roles Where to write the weights roles.
    //!
    //! \return The number of Weights that could be refit.
    //!
    //! If layerNames!=nullptr, each written pointer points to a string owned by
    //! the engine being refit, and becomes invalid when the engine is destroyed.
    //!
    int32_t getAll(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
    {
        return mImpl->getAll(size, layerNames, roles);
    }

    //!
    //! Update dynamic range for a tensor.
    //!
    //! \param tensorName The name of an ITensor in the network.
    //! \param min The minimum of the dynamic range for the tensor.
    //! \param max The maximum of the dynamic range for the tensor.
    //!
    //! \return True if successful; false otherwise.
    //!
    //! Returns false if there is no Int8 engine tensor derived from
    //! a network tensor of that name.  If successful, then getMissing
    //! may report that some weights need to be supplied.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
    //!
    TRT_DEPRECATED bool setDynamicRange(char const* tensorName, float min, float max) noexcept
    {
        return mImpl->setDynamicRange(tensorName, min, max);
    }

    //!
    //! \brief Get minimum of dynamic range.
    //!
    //! \return Minimum of dynamic range.
    //!
    //! If the dynamic range was never set, returns the minimum computed during calibration.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
    //!
    TRT_DEPRECATED float getDynamicRangeMin(char const* tensorName) const noexcept
    {
        return mImpl->getDynamicRangeMin(tensorName);
    }

    //!
    //! \brief Get maximum of dynamic range.
    //!
    //! \return Maximum of dynamic range.
    //!
    //! If the dynamic range was never set, returns the maximum computed during calibration.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
    //!
    TRT_DEPRECATED float getDynamicRangeMax(char const* tensorName) const noexcept
    {
        return mImpl->getDynamicRangeMax(tensorName);
    }

    //!
    //! \brief Get names of all tensors that have refittable dynamic ranges.
    //!
    //! \param size The number of items that can be safely written to a non-null tensorNames.
    //! \param tensorNames Where to write the layer names.
    //!
    //! \return The number of Weights that could be refit.
    //!
    //! If tensorNames!=nullptr, each written pointer points to a string owned by
    //! the engine being refit, and becomes invalid when the engine is destroyed.
    //!
    //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
    //!
    TRT_DEPRECATED int32_t getTensorsWithDynamicRange(int32_t size, char const** tensorNames) const noexcept
    {
        return mImpl->getTensorsWithDynamicRange(size, tensorNames);
    }

    //!
    //! \brief Set the ErrorRecorder for this interface
    //!
    //! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
    //! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
    //! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
    //! a recorder has been registered.
    //!
    //! If an error recorder is not set, messages will be sent to the global log stream.
    //!
    //! \param recorder The error recorder to register with this interface.
    //
    //! \see getErrorRecorder()
    //!
    void setErrorRecorder(IErrorRecorder* recorder) noexcept
    {
        mImpl->setErrorRecorder(recorder);
    }

    //!
    //! \brief Get the ErrorRecorder assigned to this interface.
    //!
    //! Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
    //! an error handler has not been set.
    //!
    //! \return A pointer to the IErrorRecorder object that has been registered.
    //!
    //! \see setErrorRecorder()
    //!
    IErrorRecorder* getErrorRecorder() const noexcept
    {
        return mImpl->getErrorRecorder();
    }

    //!
    //! \brief Specify new weights of given name.
    //!
    //! \param name The name of the weights to be refit.
    //! \param weights The new weights to associate with the name.
    //!
    //! Returns true on success, or false if new weights are rejected.
    //! Possible reasons for rejection are:
    //!
    //! * The name of weights is nullptr or does not correspond to any refittable weights.
    //! * The count of the weights is inconsistent with the count returned from calling getWeightsPrototype() with the
    //! same name.
    //! * The type of the weights is inconsistent with the type returned from calling getWeightsPrototype() with the
    //! same name.
    //!
    //! Modifying the weights before method refitCudaEngine or refitCudaEngineAsync returns will result in undefined
    //! behavior.
    //!
    //! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    bool setNamedWeights(char const* name, Weights weights) noexcept
    {
        return mImpl->setNamedWeights(name, weights);
    }

    //!
    //! \brief Get names of missing weights.
    //!
    //! For example, if some Weights have been set, but the engine was optimized
    //! in a way that combines weights, any unsupplied Weights in the combination
    //! are considered missing.
    //!
    //! \param size The number of weights names that can be safely written to.
    //! \param weightsNames The names of the weights to be updated, or nullptr for unnamed weights.
    //!
    //! \return The number of missing Weights.
    //!
    //! If layerNames!=nullptr, each written pointer points to a string owned by
    //! the engine being refit, and becomes invalid when the engine is destroyed.
    //!
    int32_t getMissingWeights(int32_t size, char const** weightsNames) noexcept
    {
        return mImpl->getMissingWeights(size, weightsNames);
    }

    //!
    //! \brief Get names of all weights that could be refit.
    //!
    //! \param size The number of weights names that can be safely written to.
    //! \param weightsNames The names of the weights to be updated, or nullptr for unnamed weights.
    //!
    //! \return The number of Weights that could be refit.
    //!
    //! If layerNames!=nullptr, each written pointer points to a string owned by
    //! the engine being refit, and becomes invalid when the engine is destroyed.
    //!
    int32_t getAllWeights(int32_t size, char const** weightsNames) noexcept
    {
        return mImpl->getAllWeights(size, weightsNames);
    }

    //!
    //! \brief get the logger with which the refitter was created
    //!
    //! \return the logger
    //!
    ILogger* getLogger() const noexcept
    {
        return mImpl->getLogger();
    }

    //!
    //! \brief Set the maximum number of threads.
    //!
    //! \param maxThreads The maximum number of threads that can be used by the refitter.
    //!
    //! \return True if successful, false otherwise.
    //!
    //! The default value is 1 and includes the current thread.
    //! A value greater than 1 permits TensorRT to use multi-threaded algorithms.
    //! A value less than 1 triggers a kINVALID_ARGUMENT error.
    //!
    bool setMaxThreads(int32_t maxThreads) noexcept
    {
        return mImpl->setMaxThreads(maxThreads);
    }

    //!
    //! \brief get the maximum number of threads that can be used by the refitter.
    //!
    //! Retrieves the maximum number of threads that can be used by the refitter.
    //!
    //! \return The maximum number of threads that can be used by the refitter.
    //!
    //! \see setMaxThreads()
    //!
    int32_t getMaxThreads() const noexcept
    {
        return mImpl->getMaxThreads();
    }

    //!
    //! \brief Specify new weights on a specified device of given name.
    //!
    //! \param name The name of the weights to be refitted.
    //! \param weights The new weights on the specified device.
    //! \param location The location (host vs. device) of the new weights.
    //!
    //! \return True on success, or false if new weights are rejected.
    //! Possible reasons for rejection are:
    //!
    //! * The name of the weights is nullptr or does not correspond to any refittable weights.
    //! * The count of the weights is inconsistent with the count returned from calling getWeightsPrototype() with the
    //! same name.
    //! * The type of the weights is inconsistent with the type returned from calling getWeightsPrototype() with the
    //! same name.
    //!
    //! It is allowed to provide some weights on CPU and others on GPU.
    //! Modifying the weights before the method refitCudaEngine() or refitCudaEngineAsync() completes will result in
    //! undefined behavior.
    //!
    //! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    bool setNamedWeights(char const* name, Weights weights, TensorLocation location) noexcept
    {
        return mImpl->setNamedWeightsWithLocation(name, weights, location);
    }

    //!
    //! \brief Get weights associated with the given name.
    //!
    //! \param weightsName The name of the weights to be refitted.
    //!
    //! \return Weights associated with the given name.
    //!
    //! If the weights were never set, returns null weights and reports an error to the refitter errorRecorder.
    //!
    //! \warning The string weightsName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    Weights getNamedWeights(char const* weightsName) const noexcept
    {
        return mImpl->getNamedWeights(weightsName);
    }

    //!
    //! \brief Get location for the weights associated with the given name.
    //!
    //! \param weightsName The name of the weights to be refitted.
    //!
    //! \return Location for the weights associated with the given name.
    //!
    //! If the weights were never set, returns TensorLocation::kHOST and reports an error to the refitter errorRecorder.
    //!
    //! \warning The string weightsName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    TensorLocation getWeightsLocation(char const* weightsName) const noexcept
    {
        return mImpl->getWeightsLocation(weightsName);
    }

    //!
    //! \brief Unset weights associated with the given name.
    //!
    //! \param weightsName The name of the weights to be refitted.
    //!
    //! \return False if the weights were never set, returns true otherwise.
    //!
    //! Unset weights before releasing them.
    //!
    //! \warning The string weightsName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    bool unsetNamedWeights(char const* weightsName) noexcept
    {
        return mImpl->unsetNamedWeights(weightsName);
    }

    //!
    //! \brief Set whether to validate weights during refitting.
    //!
    //! \param weightsValidation Indicate whether to validate weights during refitting.
    //!
    //! When set to true, TensorRT will validate weights during FP32 to FP16/BF16 weights conversions or
    //! sparsifying weights in the refit call. If provided weights are not proper for some weights transformations,
    //! TensorRT will issue a warning and continue the transformation for minor issues (such as overflow during
    //! narrowing conversion), or issue an error and stop the refitting process for severe issues (such as sparsifying
    //! dense weights). By default the flag is true. Set the flag to false for faster refitting performance.
    //!
    void setWeightsValidation(bool weightsValidation) noexcept
    {
        return mImpl->setWeightsValidation(weightsValidation);
    }

    //!
    //! \brief Get whether to validate weights values during refitting.
    //!
    bool getWeightsValidation() const noexcept
    {
        return mImpl->getWeightsValidation();
    }

    //!
    //! \brief Enqueue weights refitting of the associated engine on the given stream.
    //!
    //! \param stream The stream to enqueue the weights updating task.
    //!
    //! \return True on success, or false if new weights validation fails or getMissingWeights() != 0 before the call.
    //! If false is returned, a subset of weights may have been refitted.
    //!
    //! The behavior is undefined if the engine has pending enqueued work on a different stream from the provided one.
    //! Provided weights on CPU can be unset and released, or updated after refitCudaEngineAsync returns.
    //! Freeing or updating of the provided weights on GPU can be enqueued on the same stream after refitCudaEngineAsync
    //! returns.
    //!
    //! IExecutionContexts associated with the engine remain valid for use afterwards. There is no need to set the same
    //! weights repeatedly for multiple refit calls as the weights memory can be updated directly instead. The weights
    //! updating task should use the same stream as the one used for the refit call.
    //!
    bool refitCudaEngineAsync(cudaStream_t stream) noexcept
    {
        return mImpl->refitCudaEngineAsync(stream);
    }

    //!
    //! \brief Get the Weights prototype associated with the given name.
    //!
    //! \param weightsName The name of the weights to be refitted.
    //!
    //! \return Weights prototype associated with the given name.
    //!
    //! The type and count of weights prototype is the same as weights used for engine building. The values property
    //! is nullptr for weights prototypes. The count of the weights prototype is -1 when the name of the weights is
    //! nullptr or does not correspond to any refittable weights.
    //!
    //! \warning The string weightsName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    Weights getWeightsPrototype(char const* weightsName) const noexcept
    {
        return mImpl->getWeightsPrototype(weightsName);
    }

protected:
    apiv::VRefitter* mImpl;
};

//!
//! \enum OptProfileSelector
//!
//! \brief When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dimensions),
//!        select whether we are interested in the minimum, optimum, or maximum values for these parameters.
//!        The minimum and maximum specify the permitted range that is supported at runtime, while the optimum value
//!        is used for the kernel selection. This should be the "typical" value that is expected to occur at runtime.
//!
//! \see IOptimizationProfile::setDimensions(), IOptimizationProfile::setShapeValuesV2(), IOptimizationProfile::setShapeValues()
//!
enum class OptProfileSelector : int32_t
{
    kMIN = 0, //!< This is used to set or get the minimum permitted value for dynamic dimensions etc.
    kOPT = 1, //!< This is used to set or get the value that is used in the optimization (kernel selection).
    kMAX = 2  //!< This is used to set or get the maximum permitted value for dynamic dimensions etc.
};

//!
//! \brief Number of different values of OptProfileSelector enum.
//!
//! \see OptProfileSelector
//!
template <>
constexpr inline int32_t EnumMax<OptProfileSelector>() noexcept
{
    return 3;
}

//!
//! \class IOptimizationProfile
//! \brief Optimization profile for dynamic input dimensions and shape tensors.
//!
//! When building an ICudaEngine from an INetworkDefinition that has dynamically resizable inputs (at least
//! one input tensor has one or more of its dimensions specified as -1) or shape input tensors, users need to specify
//! at least one optimization profile. Optimization profiles are numbered 0, 1, ...
//! The first optimization profile that has been defined (with index 0) will be used by the ICudaEngine whenever no
//! optimization profile has been selected explicitly. If none of the inputs are dynamic, the default optimization
//! profile will be generated automatically unless it is explicitly provided by the user (this is possible but not
//! required in this case). If more than a single optimization profile is defined, users may set a target how
//! much additional weight space should be maximally allocated to each additional profile (as a fraction of the
//! maximum, unconstrained memory).
//!
//! Users set optimum input tensor dimensions, as well as minimum and maximum input tensor dimensions. The builder
//! selects the kernels that result in the lowest runtime for the optimum input tensor dimensions, and are valid for
//! all input tensor sizes in the valid range between minimum and maximum dimensions. A runtime error will be raised
//! if the input tensor dimensions fall outside the valid range for this profile. Likewise, users provide minimum,
//! optimum, and maximum values for all shape tensor input values.
//!
//! \see IBuilderConfig::addOptimizationProfile()
//!
class IOptimizationProfile : public INoCopy
{
public:
    //!
    //! \brief Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
    //!
    //! This function must be called three times (for the minimum, optimum, and maximum) for any network input tensor
    //! that has dynamic dimensions. If minDims, optDims, and maxDims are the minimum, optimum, and maximum dimensions,
    //! and networkDims are the dimensions for this input tensor that are provided to the INetworkDefinition object,
    //! then the following conditions must all hold:
    //!
    //! (1) minDims.nbDims == optDims.nbDims == maxDims.nbDims == networkDims.nbDims
    //! (2) 0 <= minDims.d[i] <= optDims.d[i] <= maxDims.d[i] for i = 0, ..., networkDims.nbDims-1
    //! (3) if networkDims.d[i] != -1, then minDims.d[i] == optDims.d[i] == maxDims.d[i] == networkDims.d[i]
    //!
    //! This function may (but need not be) called for an input tensor that does not have dynamic dimensions. In this
    //! case, the third argument must always equal networkDims.
    //!
    //! \param inputName The input tensor name
    //! \param select Whether to set the minimum, optimum, or maximum dimensions
    //! \param dims The minimum, optimum, or maximum dimensions for this input tensor
    //!
    //! \return false if an inconsistency was detected (e.g. the rank does not match another dimension that was
    //!         previously set for the same input), true if no inconsistency was detected. Note that inputs can be
    //!         validated only partially; a full validation is performed at engine build time.
    //!
    //! \warning If run on DLA, minimum, optimum, and maximum dimensions must to be the same.
    //!
    //! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    bool setDimensions(char const* inputName, OptProfileSelector select, Dims const& dims) noexcept
    {
        return mImpl->setDimensions(inputName, select, dims);
    }

    //!
    //! \brief Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
    //!
    //! If the dimensions have not been previously set via setDimensions(), return an invalid Dims with nbDims == -1.
    //!
    //! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    Dims getDimensions(char const* inputName, OptProfileSelector select) const noexcept
    {
        return mImpl->getDimensions(inputName, select);
    }

    //!
    //! \brief Set the minimum / optimum / maximum values for an input shape tensor.
    //!
    //! This function must be called three times for every input tensor t that is a shape tensor (t.isShape() == true).
    //! This implies that the dimensions of t are fixed at network definition time and the volume does not exceed 64.
    //! This function must not be called for any input tensor that is not a shape tensor.
    //!
    //! Each time this function is called for the same input tensor, the same nbValues must be supplied (either 1
    //! if the tensor rank is 0, or dims.d[0] if the rank is 1). Furthermore, if minVals, optVals, maxVals are the
    //! minimum, optimum, and maximum values, it must be true that minVals[i] <= optVals[i] <= maxVals[i] for
    //! i = 0, ..., nbValues - 1. Execution of the network must be valid for the optVals.
    //!
    //! Shape tensors are tensors that contribute to shape calculations in some way. While input shape tensors can be
    //! type kINT32 or kINT64, the values used to set the minimum, optimum, and maximum values must fit in int32_t.
    //!
    //! Examples:
    //!
    //! * A shape tensor used as the second input to IShuffleLayer can contain a -1 wildcard.
    //!   The corresponding minVal[i] should be -1.
    //!
    //! * A shape tensor used as the stride input to ISliceLayer can contain any valid strides.
    //!   The values could be positive, negative, or zero.
    //!
    //! * A shape tensor subtracted from zero to compute the size input of an ISliceLayer can
    //!   contain any non-positive values that yield a valid slice operation.
    //!
    //! Tightening the minVals and maxVals bounds to cover only values that are necessary may help optimization.
    //!
    //! \param inputName The input tensor name
    //! \param select Whether to set the minimum, optimum, or maximum input values.
    //! \param values An array of length nbValues containing the minimum, optimum, or maximum shape tensor elements.
    //!               For multidimensional tensors, the array is in row-major order.
    //! \param nbValues The length of the value array, which must equal the number of shape tensor elements (>= 1)
    //!
    //! \return false if an inconsistency was detected (e.g. nbValues does not match a previous call for the same
    //!         tensor), else true. As for setDimensions(), a full validation can only be performed at engine build
    //!         time.
    //!
    //! \warning If run on DLA, minimum, optimum, and maximum shape values must to be the same.
    //!
    //! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    //! \warning When setShapeValuesV2 is called after setShapeValues, a following call to getShapeValues will
    //! return nullptr. Vice versa, a call to setShapeValues undoes the effects of setShapeValuesV2.
    //!
    //! \deprecated Deprecated in TensorRT 10.11. Superseded by setShapeValuesV2().
    //!
    TRT_DEPRECATED bool setShapeValues(
        char const* inputName, OptProfileSelector select, int32_t const* values, int32_t nbValues) noexcept
    {
        return mImpl->setShapeValues(inputName, select, values, nbValues);
    }

    //!
    //! \brief Get the number of values for an input shape tensor.
    //!
    //! This will return the number of shape values if setShapeValues() has been called before for this input tensor.
    //! Otherwise, return -1.
    //!
    //! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    int32_t getNbShapeValues(char const* inputName) const noexcept
    {
        return mImpl->getNbShapeValues(inputName);
    }

    //!
    //! \brief Get the minimum / optimum / maximum values for an input shape tensor.
    //!
    //! If the shape values have not been set previously with setShapeValues(), this returns nullptr.
    //!
    //! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    //! \deprecated Deprecated in TensorRT 10.11. Superseded by getShapeValuesV2().
    //!
    TRT_DEPRECATED int32_t const* getShapeValues(char const* inputName, OptProfileSelector select) const noexcept
    {
        return mImpl->getShapeValues(inputName, select);
    }

    //!
    //! \brief Set a target for extra GPU memory that may be used by this profile.
    //!
    //! \param target Additional memory that the builder should aim to maximally allocate for this profile, as a
    //!        fraction of the memory it would use if the user did not impose any constraints on memory. This
    //!        unconstrained case is the default; it corresponds to target == 1.0. If target == 0.0, the builder
    //!        aims to create the new optimization profile without allocating any additional weight memory.
    //!        Valid inputs lie between 0.0 and 1.0. This parameter is only a hint, and TensorRT does not guarantee
    //!        that the target will be reached. This parameter is ignored for the first (default) optimization profile
    //!        that is defined.
    //!
    //! \return true if the input is in the valid range (between 0 and 1 inclusive), else false.
    //!
    bool setExtraMemoryTarget(float target) noexcept
    {
        return mImpl->setExtraMemoryTarget(target);
    }

    //!
    //! \brief Get the extra memory target that has been defined for this profile.
    //!
    //! This defaults to 1.0F.
    //!
    //! \return the valid value set by setExtraMemoryTarget or 1.0F.
    //!
    float getExtraMemoryTarget() const noexcept
    {
        return mImpl->getExtraMemoryTarget();
    }

    //!
    //! \brief Check whether the optimization profile can be passed to an IBuilderConfig object.
    //!
    //! This function performs partial validation, by e.g. checking that whenever one of the minimum, optimum, or
    //! maximum dimensions of a tensor have been set, the others have also been set and have the same rank, as
    //! well as checking that the optimum dimensions are always as least as large as the minimum dimensions, and
    //! that the maximum dimensions are at least as large as the optimum dimensions. Some validation steps require
    //! knowledge of the network definition and are deferred to engine build time.
    //!
    //!
    //! \return true if the optimization profile is valid and may be passed to an IBuilderConfig, else false.
    //!
    bool isValid() const noexcept
    {
        return mImpl->isValid();
    }

    //!
    //! \brief Set the minimum / optimum / maximum values for an input shape tensor.
    //!
    //! This function must be called three times for every input tensor t that is a shape tensor (t.isShape() == true).
    //! This implies that the dimensions of t are fixed at network definition time and the volume does not exceed 64.
    //! This function must not be called for any input tensor that is not a shape tensor.
    //!
    //! Each time this function is called for the same input tensor, the same nbValues must be supplied (either 1
    //! if the tensor rank is 0, or dims.d[0] if the rank is 1). Furthermore, if minVals, optVals, maxVals are the
    //! minimum, optimum, and maximum values, it must be true that minVals[i] <= optVals[i] <= maxVals[i] for
    //! i = 0, ..., nbValues - 1. Execution of the network must be valid for the optVals.
    //!
    //! Shape tensors are tensors that contribute to shape calculations in some way. While input shape tensors can be
    //! type kINT32 or kINT64, the values used to set the minimum, optimum, and maximum values must fit in int64_t.
    //!
    //! Examples:
    //!
    //! * A shape tensor used as the second input to IShuffleLayer can contain a -1 wildcard.
    //!   The corresponding minVal[i] should be -1.
    //!
    //! * A shape tensor used as the stride input to ISliceLayer can contain any valid strides.
    //!   The values could be positive, negative, or zero.
    //!
    //! * A shape tensor subtracted from zero to compute the size input of an ISliceLayer can
    //!   contain any non-positive values that yield a valid slice operation.
    //!
    //! Tightening the minVals and maxVals bounds to cover only values that are necessary may help optimization.
    //!
    //! \param inputName The input tensor name
    //! \param select Whether to set the minimum, optimum, or maximum input values.
    //! \param values An array of length nbValues containing the minimum, optimum, or maximum shape tensor elements.
    //!               For multidimensional tensors, the array is in row-major order.
    //! \param nbValues The length of the value array, which must equal the number of shape tensor elements (>= 1)
    //!
    //! \return false if an inconsistency was detected (e.g. nbValues does not match a previous call for the same
    //!         tensor), else true. As for setDimensions(), a full validation can only be performed at engine build
    //!         time.
    //!
    //! \warning If run on DLA, minimum, optimum, and maximum shape values must to be the same.
    //!
    //! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    //! \warning When setShapeValues is called after setShapeValuesV2, input shape would be overwritten as 32 bit
    //! and getShapeValuesV2 would return nullptr.
    //!
    bool setShapeValuesV2(
        char const* inputName, OptProfileSelector select, int64_t const* values, int32_t nbValues) noexcept
    {
        return mImpl->setShapeValuesV2(inputName, select, values, nbValues);
    }

    //!
    //! \brief Get the minimum / optimum / maximum values for an input shape tensor.
    //!
    //! If the shape values have not been set previously with setShapeValuesV2(), this returns nullptr.
    //!
    //! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    int64_t const* getShapeValuesV2(char const* inputName, OptProfileSelector select) const noexcept
    {
        return mImpl->getShapeValuesV2(inputName, select);
    }

protected:
    apiv::VOptimizationProfile* mImpl;
    virtual ~IOptimizationProfile() noexcept = default;
};

//!
//! \enum TacticSource
//!
//! \brief List of tactic sources for TensorRT.
//!
//! \see TacticSources, IBuilderConfig::setTacticSources(), IBuilderConfig::getTacticSources()
//!
enum class TacticSource : int32_t
{
    //! cuBLAS tactics. Disabled by default.
    //! \note Disabling kCUBLAS will cause the cuBLAS handle passed to plugins in attachToContext to be null.
    //! \deprecated Deprecated in TensorRT 10.0.
    kCUBLAS TRT_DEPRECATED_ENUM = 0,

    //! cuBLAS LT tactics. Disabled by default.
    //! \deprecated Deprecated in TensorRT 9.0.
    kCUBLAS_LT TRT_DEPRECATED_ENUM = 1,

    //! cuDNN tactics. Disabled by default.
    //! \note Disabling kCUDNN will cause the cuDNN handle passed to plugins in attachToContext to be null.
    //! \deprecated Deprecated in TensorRT 10.0.
    kCUDNN TRT_DEPRECATED_ENUM = 2,

    //! Enables convolution tactics implemented with edge mask tables. These tactics tradeoff memory for performance by
    //! consuming additional memory space proportional to the input size.
    //! Enabled by default.
    kEDGE_MASK_CONVOLUTIONS = 3,

    //! Enables convolution tactics implemented with source-code JIT fusion. The engine building time may increase
    //! when this is enabled. Enabled by default.
    kJIT_CONVOLUTIONS = 4,
};

template <>
constexpr inline int32_t EnumMax<TacticSource>() noexcept
{
    return 5;
} //!< Maximum number of tactic sources in TacticSource enum. \see TacticSource

//!
//! \brief Represents a collection of one or more TacticSource values
//! combine using bitwise-OR operations.
//!
//! \see IBuilderConfig::setTacticSources(), IBuilderConfig::getTacticSources()
//!
using TacticSources = uint32_t;

//!
//! \enum ProfilingVerbosity
//!
//! \brief List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
//!
//! \see IBuilderConfig::setProfilingVerbosity(),
//!      IBuilderConfig::getProfilingVerbosity(),
//!      IEngineInspector
//!
enum class ProfilingVerbosity : int32_t
{
    kLAYER_NAMES_ONLY = 0, //!< Print only the layer names. This is the default setting.
    kNONE = 1,             //!< Do not print any layer information.
    kDETAILED = 2,         //!< Print detailed layer information including layer names and layer parameters.
};

//! Maximum number of profile verbosity levels in ProfilingVerbosity enum. \see ProfilingVerbosity
template <>
constexpr inline int32_t EnumMax<ProfilingVerbosity>() noexcept
{
    return 3;
}

//!
//! \brief Represents one or more SerializationFlag values using binary OR
//! operations, e.g., 1U << SerializationFlag::kEXCLUDE_LEAN_RUNTIME
//!
//! \see ISerializationConfig::setFlags(), ISerializationConfig::getFlags()
//!
using SerializationFlags = uint32_t;

//!
//! \enum SerializationFlag
//!
//! \brief List of valid flags that the engine can enable when serializing the bytes.
//!
//! \see ISerializationConfig::setFlags(), ISerializationConfig::getFlags()
//!
enum class SerializationFlag : int32_t
{
    kEXCLUDE_WEIGHTS = 0,      //!< Exclude the weights that can be refitted.
    kEXCLUDE_LEAN_RUNTIME = 1, //!< Exclude the lean runtime.
    kINCLUDE_REFIT = 2,        //!< Remain refittable if originally so.
};

//! Maximum number of serialization flags in SerializationFlag enum. \see SerializationFlag
template <>
constexpr inline int32_t EnumMax<SerializationFlag>() noexcept
{
    return 3;
}

//!
//! \class ISerializationConfig
//!
//! \brief Holds properties for configuring an engine to serialize the binary.
//!
//! \see SerializationFlag
//!
class ISerializationConfig : public INoCopy
{
public:
    virtual ~ISerializationConfig() noexcept = default;

    //!
    //! \brief Set the serialization flags to turn on for this config.
    //!
    //! The flags are listed in the SerializationFlag enum.
    //!
    //! \param serializationFlags The serialization flags for an engine.
    //!
    //! \note This function will override the previous set flags, rather than bitwise ORing the new flag.
    //!
    //! \see getFlags()
    //!
    bool setFlags(SerializationFlags serializationFlags) noexcept
    {
        return mImpl->setFlags(serializationFlags);
    }

    //!
    //! \brief Get the serialization flags for this config.
    //!
    //! \return The serialization flags as a bitmask.
    //!
    //! \see setFlags()
    //!
    SerializationFlags getFlags() const noexcept
    {
        return mImpl->getFlags();
    }

    //!
    //! \brief clear a serialization flag.
    //!
    //! clears the serialization flag from the config.
    //!
    //! \see setFlags()
    //!
    bool clearFlag(SerializationFlag serializationFlag) noexcept
    {
        return mImpl->clearFlag(serializationFlag);
    }

    //!
    //! \brief Set a serialization flag.
    //!
    //! Add the input serialization flag to the already enabled flags.
    //!
    //! \see setFlags()
    //!
    bool setFlag(SerializationFlag serializationFlag) noexcept
    {
        return mImpl->setFlag(serializationFlag);
    }

    //!
    //! \brief Returns true if the serialization flag is set
    //!
    //! \see getFlags()
    //!
    //! \return True if flag is set, false if unset.
    //!
    bool getFlag(SerializationFlag serializationFlag) const noexcept
    {
        return mImpl->getFlag(serializationFlag);
    }

protected:
    apiv::VSerializationConfig* mImpl;
};

//!
//! \enum ExecutionContextAllocationStrategy
//!
//! \brief Different memory allocation behaviors for IExecutionContext.
//!
//! IExecutionContext requires a block of device memory for internal activation tensors during inference. The user can
//! either let the execution context manage the memory in various ways or allocate the memory themselves.
//!
//! \see ICudaEngine::createExecutionContext()
//! \see IExecutionContext::setDeviceMemory()
//!
enum class ExecutionContextAllocationStrategy : int32_t
{
    kSTATIC = 0,            //!< Default static allocation with the maximum size across all profiles.
    kON_PROFILE_CHANGE = 1, //!< Reallocate for a profile when it's selected.
    kUSER_MANAGED = 2,      //!< The user supplies custom allocation to the execution context.
};

//!
//! \brief Maximum number of memory allocation strategies in ExecutionContextAllocationStrategy enum.
//!
//! \see ExecutionContextAllocationStrategy
//!
template <>
constexpr inline int32_t EnumMax<ExecutionContextAllocationStrategy>() noexcept
{
    return 3;
}


//! \class IRuntimeConfig
//!
//! \brief A class for runtime configuration. This class is used during execution context creation.
//!
//! \see IRuntime, IBuilderConfig
//!
class IRuntimeConfig : public INoCopy
{
public:
    virtual ~IRuntimeConfig() noexcept = default;

    //!
    //! \brief Set the execution context allocation strategy. Default value is kSTATIC.
    //!
    //! \param strategy The execution context allocation strategy.
    //!
    void setExecutionContextAllocationStrategy(ExecutionContextAllocationStrategy strategy) noexcept
    {
        return mImpl->setExecutionContextAllocationStrategy(strategy);
    }

    //!
    //! \brief Get the execution context allocation strategy.
    //!
    //! \return The execution context allocation strategy.
    //!
    ExecutionContextAllocationStrategy getExecutionContextAllocationStrategy() const noexcept
    {
        return mImpl->getExecutionContextAllocationStrategy();
    }


protected:
    apiv::VRuntimeConfig* mImpl;
}; // class IRuntimeConfig

//!
//! \enum EngineStat
//!
//! \brief The kind of engine statistics that queried from the ICudaEngine.
//!
//! \see ICudaEngine::getEngineStat()
//! \see BuilderFlag::kSTRIP_PLAN
//!
enum class EngineStat : int32_t
{
    //! Return the total weight size in bytes.
    kTOTAL_WEIGHTS_SIZE = 0,

    //! Return the stripped weight size in bytes for engines built with BuilderFlag::kSTRIP_PLAN.
    kSTRIPPED_WEIGHTS_SIZE = 1,
};

//!
//! \brief Maximum number of engine statistic kinds in EngineStat enum.
//!
//! \see EngineStat
//!
template <>
constexpr inline int32_t EnumMax<EngineStat>() noexcept
{
    return 2;
}

//!
//! \class ICudaEngine
//!
//! \brief An engine for executing inference on a built network, with functionally unsafe features.
//!
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
//!
class ICudaEngine : public INoCopy
{
public:
    virtual ~ICudaEngine() noexcept = default;

    //!
    //! \brief Get shape of an input or output tensor.
    //!
    //! \param tensorName The name of an input or output tensor.
    //!
    //! \return shape of the tensor, with -1 in place of each dynamic runtime dimension,
    //!         or Dims{-1, {}} if the provided name does not map to an input or output tensor.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    Dims getTensorShape(char const* tensorName) const noexcept
    {
        return mImpl->getTensorShape(tensorName);
    }

    //!
    //! \brief Determine the required data type for a buffer from its tensor name.
    //!
    //! \param tensorName The name of an input or output tensor.
    //!
    //! \return The type of the data in the buffer, or DataType::kFLOAT if the provided name does not map to an input or
    //! output tensor.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    DataType getTensorDataType(char const* tensorName) const noexcept
    {
        return mImpl->getTensorDataType(tensorName);
    }

    //!
    //! \brief Get the number of layers in the network.
    //!
    //! The number of layers in the network is not necessarily the number in the original network definition, as layers
    //! may be combined or eliminated as the engine is optimized. This value can be useful when building per-layer
    //! tables, such as when aggregating profiling data over a number of executions.
    //!
    //! \return The number of layers in the network.
    //!
    int32_t getNbLayers() const noexcept
    {
        return mImpl->getNbLayers();
    }

    //!
    //! \brief Serialize the network to a stream.
    //!
    //! \return A IHostMemory object that contains the serialized engine.
    //!
    //! The network may be deserialized with IRuntime::deserializeCudaEngine().
    //!
    //! \see IRuntime::deserializeCudaEngine()
    //!
    IHostMemory* serialize() const noexcept
    {
        return mImpl->serialize();
    }

    //!
    //! \brief Create an execution context and specify the strategy for allocating internal activation memory.
    //!
    //! The default value for the allocation strategy is ExecutionContextAllocationStrategy::kSTATIC, which means the
    //! context will pre-allocate a block of device memory that is sufficient for all profiles. The newly created
    //! execution context will be assigned optimization profile 0. If an error recorder has been set for the engine, it
    //! will also be passed to the execution context.
    //!
    //! \see IExecutionContext
    //! \see IExecutionContext::setOptimizationProfileAsync()
    //! \see ExecutionContextAllocationStrategy
    //!
    IExecutionContext* createExecutionContext(
        ExecutionContextAllocationStrategy strategy = ExecutionContextAllocationStrategy::kSTATIC) noexcept
    {
        return mImpl->createExecutionContext(strategy);
    }

    //!
    //! \brief Get whether an input or output tensor must be on GPU or CPU.
    //!
    //! \param tensorName The name of an input or output tensor.
    //!
    //! \return TensorLocation::kDEVICE if tensorName must be on GPU, or TensorLocation::kHOST if on CPU, or
    //! TensorLocation::kDEVICE if the provided name does not map to an input or output tensor.
    //!
    //! The location is established at build time. E.g. shape tensors inputs are typically required to be on the CPU.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    TensorLocation getTensorLocation(char const* tensorName) const noexcept
    {
        return mImpl->getTensorLocation(tensorName);
    }

    //!
    //! \brief True if tensor is required as input for shape calculations or is output from shape calculations.
    //!
    //! Return true for either of the following conditions:
    //!
    //! * The tensor is a network input, and its value is required for IExecutionContext::getTensorShape()
    //!   to return the shape of a network output.
    //!
    //! * The tensor is a network output, and inferShape() will compute its values.
    //!
    //! For example, if a network uses an input tensor "foo" as an addend to an IElementWiseLayer
    //! that computes the "reshape dimensions" for IShuffleLayer, then isShapeInferenceIO("foo") == true.
    //! If the network copies said input tensor "foo" to an output "bar", then
    //! isShapeInferenceIO("bar") == true and IExecutionContext::inferShapes() will write to "bar".
    //!
    bool isShapeInferenceIO(char const* tensorName) const noexcept
    {
        return mImpl->isShapeInferenceIO(tensorName);
    }

    //!
    //! \brief Determine whether a tensor is an input or output tensor.
    //!
    //! \param tensorName The name of an input or output tensor.
    //!
    //! \return kINPUT if tensorName is an input, kOUTPUT if tensorName is an output, or kNONE if neither.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    TensorIOMode getTensorIOMode(char const* tensorName) const noexcept
    {
        return mImpl->getTensorIOMode(tensorName);
    }

    //!
    //! \brief Get the input tensor name that an output tensor should alias with.
    //!
    //! Some operations (e.g., KVCacheUpdate) require that certain output tensors share memory with input tensors.
    //! This method returns the name of the input tensor that a given output tensor should alias with.
    //!
    //! \param tensorName The name of an output tensor.
    //!
    //! \return The name of the input tensor to alias with, or nullptr if tensorName is not an output tensor or
    //! the output does not alias with any input.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the
    //! terminator.
    //!
    TRT_NODISCARD char const* getAliasedInputTensor(char const* tensorName) const noexcept
    {
        return mImpl->getAliasedInputTensor(tensorName);
    }

    //!
    //! \brief create an execution context without any device memory allocated
    //!
    //! The memory for execution of this device context must be supplied by the application.
    //!
    //! \deprecated Deprecated in TensorRT 10.0. Superseded by createExecutionContext() with parameter.
    //!
    TRT_DEPRECATED IExecutionContext* createExecutionContextWithoutDeviceMemory() noexcept
    {
        return mImpl->createExecutionContextWithoutDeviceMemory();
    }

    //!
    //! \brief Create an execution context with TensorRT JIT runtime config.
    //!
    //! \param runtimeConfig The runtime config for TensorRT JIT.
    //!
    //! \see IRuntimeConfig
    //!
    IExecutionContext* createExecutionContext(IRuntimeConfig* runtimeConfig) noexcept
    {
        return mImpl->createExecutionContextWithRuntimeConfig(runtimeConfig);
    }

    //!
    //! \brief Create a runtime config for TensorRT JIT.
    //!        The caller is responsible for ownership of the returned IRuntimeConfig object.
    //!
    //! \return A IRuntimeConfig object.
    //!
    //! \see IRuntimeConfig
    //!
    IRuntimeConfig* createRuntimeConfig() noexcept
    {
        return mImpl->createRuntimeConfig();
    }

    //!
    //! \brief Return the maximum device memory required by the context over all profiles.
    //!
    //! \deprecated Deprecated in TensorRT 10.1. Superseded by getDeviceMemorySizeV2().
    //!
    //! \see IExecutionContext::setDeviceMemory()
    //!
    TRT_DEPRECATED size_t getDeviceMemorySize() const noexcept
    {
        return mImpl->getDeviceMemorySize();
    }

    //!
    //! \brief Return the maximum device memory required by the context for a profile.
    //!
    //! \deprecated Deprecated in TensorRT 10.1. Superseded by getDeviceMemorySizeForProfileV2(int32_t).
    //!
    //! \see IExecutionContext::setDeviceMemoryV2()
    //!
    TRT_DEPRECATED size_t getDeviceMemorySizeForProfile(int32_t profileIndex) const noexcept
    {
        return mImpl->getDeviceMemorySizeForProfile(profileIndex);
    }

    //!
    //! \brief Return the maximum device memory required by the context over all profiles.
    //!
    //! This API is stateful, so its call returns different values based on the following calls:
    //! * setWeightStreamingBudget()
    //! * setWeightStreamingBudgetV2()
    //!
    //! \see IExecutionContext::setDeviceMemoryV2()
    //! \see setWeightStreamingBudget()
    //! \see setWeightStreamingBudgetV2()
    //!
    int64_t getDeviceMemorySizeV2() const noexcept
    {
        return mImpl->getDeviceMemorySizeV2();
    }

    //!
    //! \brief Return the maximum device memory required by the context for a profile.
    //!
    //! This API is stateful, so its call returns different values based on the following calls:
    //! * setWeightStreamingBudget()
    //! * setWeightStreamingBudgetV2()
    //!
    //! \see IExecutionContext::setDeviceMemoryV2()
    //! \see setWeightStreamingBudget()
    //! \see setWeightStreamingBudgetV2()
    //!
    int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
    {
        return mImpl->getDeviceMemorySizeForProfileV2(profileIndex);
    }

    //!
    //! \brief Return true if an engine can be refit.
    //!
    //! \see nvinfer1::createInferRefitter()
    //!
    bool isRefittable() const noexcept
    {
        return mImpl->isRefittable();
    }

    //!
    //! \brief Return the number of bytes per component of an element, or -1 if the
    //! tensor is not vectorized or provided name does not map to an input or output tensor.
    //!
    //! The vector component size is returned if getTensorVectorizedDim(tensorName) != -1.
    //!
    //! \param tensorName The name of an input or output tensor.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //! \warning The function can only return the result of profile 0, and issues a warning message when there are
    //! multiple profiles in the engine, use getTensorBytesPerComponent with profileIndex when there are multiple
    //! profiles.
    //!
    //! \see getTensorVectorizedDim()
    //! \see getTensorBytesPerComponent(tensorName, profileIndex)
    //!
    int32_t getTensorBytesPerComponent(char const* tensorName) const noexcept
    {
        return mImpl->getTensorBytesPerComponent(tensorName);
    }

    //!
    //! \brief Return the number of bytes per component of an element given of given profile, or -1 if the tensor is not
    //! vectorized or provided name does not map to an input or output tensor.
    //!
    //! The vector component size is returned if getTensorVectorizedDim(tensorName, profileIndex) != -1.
    //!
    //! \param tensorName The name of an input or output tensor.
    //! \param profileIndex The profile index to query
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    //! \see getTensorVectorizedDim(tensorName, profileIndex)
    //!
    int32_t getTensorBytesPerComponent(char const* tensorName, int32_t profileIndex) const noexcept
    {
        return mImpl->getTensorBytesPerComponentV2(tensorName, profileIndex);
    }

    //!
    //! \brief Return the number of components included in one element, or -1 if tensor is
    //! not vectorized or if the provided name does not map to an input or output tensor.
    //!
    //! The number of elements in the vectors is returned if getTensorVectorizedDim(tensorName) != -1.
    //!
    //! \param tensorName The name of an input or output tensor.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //! \warning The function can only return the result of profile 0, and issues a warning message when there
    //! are multiple profiles in the engine, use getTensorComponentsPerElement with profileIndex when there are
    //! multiple profiles.
    //!
    //! \see getTensorVectorizedDim()
    //! \see getTensorComponentsPerElement(tensorName, profileIndex)
    //!
    int32_t getTensorComponentsPerElement(char const* tensorName) const noexcept
    {
        return mImpl->getTensorComponentsPerElement(tensorName);
    }

    //!
    //! \brief Return the number of components included in one element of given profile, or -1 if tensor is not
    //! vectorized or the provided name does not map to an input or output tensor.
    //!
    //! The number of elements in the vectors is returned if getTensorVectorizedDim(tensorName, profileIndex) != -1.
    //!
    //! \param tensorName The name of an input or output tensor.
    //! \param profileIndex The profile index to query
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    //! \see getTensorVectorizedDim(tensorName, profileIndex)
    //!
    int32_t getTensorComponentsPerElement(char const* tensorName, int32_t profileIndex) const noexcept
    {
        return mImpl->getTensorComponentsPerElementV2(tensorName, profileIndex);
    }

    //!
    //! \brief Return the tensor format, or TensorFormat::kLINEAR if the provided name does not map to an input or
    //! output tensor.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //! \warning This API can only return the tensor format of profile 0, and issues a warning message when there are
    //! multiple profiles in the engine, use getTensorFormat with profileIndex when there are multiple profiles.
    //!
    //! \see getTensorFormat(tensorName, profileIndex)
    //!
    TensorFormat getTensorFormat(char const* tensorName) const noexcept
    {
        return mImpl->getTensorFormat(tensorName);
    }

    //!
    //! \brief Return the tensor format of given profile, or TensorFormat::kLINEAR if the provided name does not map to
    //! an input or output tensor.
    //!
    //! \param tensorName The name of an input or output tensor.
    //! \param profileIndex The profile index to query the format for.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    TensorFormat getTensorFormat(char const* tensorName, int32_t profileIndex) const noexcept
    {
        return mImpl->getTensorFormatV2(tensorName, profileIndex);
    }

    //!
    //! \brief Return the human readable description of the tensor format, or empty string if the provided name does not
    //! map to an input or output tensor.
    //!
    //! The description includes the order, vectorization, data type, and strides.
    //! Examples are shown as follows:
    //!   Example 1: kCHW + FP32
    //!     "Row-major linear FP32 format"
    //!   Example 2: kCHW2 + FP16
    //!     "Two-wide channel vectorized row-major FP16 format"
    //!   Example 3: kHWC8 + FP16 + Line Stride = 32
    //!     "Channel major FP16 format where C % 8 == 0 and H Stride % 32 == 0"
    //!
    //! \param tensorName The name of an input or output tensor.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //! \warning The function can only return the result of profile 0, and issues a warning message when there are
    //! multiple profiles in the engine, use getTensorFormatDesc with profileIndex when there are multiple profiles.
    //!
    char const* getTensorFormatDesc(char const* tensorName) const noexcept
    {
        return mImpl->getTensorFormatDesc(tensorName);
    }

    //!
    //! \brief Return the human readable description of the tensor format of given profile, or empty string if the
    //! provided name does not map to an input or output tensor.
    //!
    //! The description includes the order, vectorization, data type, and strides.
    //! Examples are shown as follows:
    //!   Example 1: kCHW + FP32
    //!     "Row-major linear FP32 format"
    //!   Example 2: kCHW2 + FP16
    //!     "Two-wide channel vectorized row-major FP16 format"
    //!   Example 3: kHWC8 + FP16 + Line Stride = 32
    //!     "Channel major FP16 format where C % 8 == 0 and H Stride % 32 == 0"
    //!
    //! \param tensorName The name of an input or output tensor.
    //! \param profileIndex The profile index to query the format for.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    char const* getTensorFormatDesc(char const* tensorName, int32_t profileIndex) const noexcept
    {
        return mImpl->getTensorFormatDescV2(tensorName, profileIndex);
    }

    //!
    //! \brief Return the dimension index that the buffer is vectorized, or -1 if the provided name does not
    //! map to an input or output tensor.
    //!
    //! Specifically -1 is returned if scalars per vector is 1.
    //!
    //! \param tensorName The name of an input or output tensor.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //! \warning The function can only return the result of profile 0, and issues a warning message when there are
    //!  multiple profiles in the engine, use getTensorVectorizedDim with profileIndex when there are multiple profiles.
    //!
    int32_t getTensorVectorizedDim(char const* tensorName) const noexcept
    {
        return mImpl->getTensorVectorizedDim(tensorName);
    }

    //!
    //! \brief Return the dimension index that the buffer is vectorized of given profile, or -1 if the provided name
    //! does not map to an input or output tensor.
    //!
    //! Specifically -1 is returned if scalars per vector is 1.
    //!
    //! \param tensorName The name of an input.
    //! \param profileIndex The profile index to query the format for.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    int32_t getTensorVectorizedDim(char const* tensorName, int32_t profileIndex) const noexcept
    {
        return mImpl->getTensorVectorizedDimV2(tensorName, profileIndex);
    }

    //!
    //! \brief Returns the name of the network associated with the engine.
    //!
    //! The name is set during network creation and is retrieved after
    //! building or deserialization.
    //!
    //! \see INetworkDefinition::setName(), INetworkDefinition::getName()
    //!
    //! \return A null-terminated C-style string representing the name of the network.
    //!
    char const* getName() const noexcept
    {
        return mImpl->getName();
    }

    //!
    //! \brief Get the number of optimization profiles defined for this engine.
    //!
    //! \return Number of optimization profiles. It is always at least 1.
    //!
    //! \see IExecutionContext::setOptimizationProfileAsync()
    int32_t getNbOptimizationProfiles() const noexcept
    {
        return mImpl->getNbOptimizationProfiles();
    }

    //!
    //! \brief Get the minimum / optimum / maximum dimensions for an input tensor given its name under an optimization
    //! profile.
    //!
    //! \param tensorName The name of an input tensor.
    //!
    //! \param profileIndex The profile index, which must be between 0 and getNbOptimizationProfiles()-1.
    //!
    //! \param select Whether to query the minimum, optimum, or maximum dimensions for this input tensor.
    //!
    //! \return The minimum / optimum / maximum dimensions for an input tensor in this profile.
    //!         If the profileIndex is invalid or provided name does not map to an input tensor, return Dims{-1, {}}
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    Dims getProfileShape(char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
    {
        return mImpl->getProfileShape(tensorName, profileIndex, select);
    }

    //!
    //! \brief Get the minimum / optimum / maximum values (not dimensions) for an input tensor given
    //! its name under an optimization profile. These correspond to the values set using
    //! IOptimizationProfile::setShapeValues when the engine was built.
    //!
    //! \param tensorName The name of an input tensor.
    //!
    //! \param profileIndex The profile index, which must be between 0 and getNbOptimizationProfiles()-1.
    //!
    //! \param select Whether to query the minimum, optimum, or maximum values for this input tensor.
    //!
    //! \return The minimum / optimum / maximum values for an input tensor in this profile. If the profileIndex is
    //! invalid or the provided name does not map to an input tensor, or the tensor is not a shape binding, return
    //! nullptr.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    //! \deprecated Deprecated in TensorRT 10.11. Superseded by getProfileTensorValuesV2().
    //! \warning If input shapes are set with setShapeValuesV2, getProfileTensorValues will return nullptr
    //!
    TRT_DEPRECATED int32_t const* getProfileTensorValues(
        char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
    {
        return mImpl->getProfileTensorValues(tensorName, profileIndex, select);
    }

    //!
    //! \brief Determine what execution capability this engine has.
    //!
    //! If the engine has EngineCapability::kSTANDARD, then all engine functionality is valid.
    //! If the engine has EngineCapability::kSAFETY, then only the functionality in safe engine is valid.
    //! If the engine has EngineCapability::kDLA_STANDALONE, then only serialize, destroy, and const-accessor functions
    //! are valid.
    //!
    //! \return The EngineCapability flag that the engine was built for.
    //!
    EngineCapability getEngineCapability() const noexcept
    {
        return mImpl->getEngineCapability();
    }

    //!
    //! \brief Set the ErrorRecorder for this interface
    //!
    //! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
    //! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
    //! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
    //! a recorder has been registered.
    //!
    //! If an error recorder is not set, messages will be sent to the global log stream.
    //!
    //! \param recorder The error recorder to register with this interface.
    //!
    //! \see getErrorRecorder()
    //!
    void setErrorRecorder(IErrorRecorder* recorder) noexcept
    {
        return mImpl->setErrorRecorder(recorder);
    }

    //!
    //! \brief Get the ErrorRecorder assigned to this interface.
    //!
    //! Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
    //! an error handler has not been set.
    //!
    //! \return A pointer to the IErrorRecorder object that has been registered.
    //!
    //! \see setErrorRecorder()
    //!
    IErrorRecorder* getErrorRecorder() const noexcept
    {
        return mImpl->getErrorRecorder();
    }

    //!
    //! \brief Query whether the engine was built with an implicit batch dimension.
    //!
    //! \return Always false since TensorRT 10.0 does not support an implicit batch dimension.
    //!
    //! \see createNetworkV2
    //!
    //! \deprecated Deprecated in TensorRT 10.0. Implicit batch is no supported since TensorRT 10.0.
    //!
    TRT_DEPRECATED bool hasImplicitBatchDimension() const noexcept
    {
        return mImpl->hasImplicitBatchDimension();
    }

    //!
    //! \brief return the tactic sources required by this engine.
    //!
    //! The value returned is equal to zero or more tactics sources set
    //! at build time via setTacticSources() in IBuilderConfig. Sources
    //! set by the latter but not returned by \ref ICudaEngine::getTacticSources
    //! do not reduce overall engine execution time, and can be removed from
    //! future builds to reduce build time.
    //!
    //! \see IBuilderConfig::setTacticSources()
    //!
    TacticSources getTacticSources() const noexcept
    {
        return mImpl->getTacticSources();
    }

    //!
    //! \brief Return the \ref ProfilingVerbosity the builder config was set to when the engine was built.
    //!
    //! \return the profiling verbosity the builder config was set to when the engine was built.
    //!
    //! \see IBuilderConfig::setProfilingVerbosity()
    //!
    ProfilingVerbosity getProfilingVerbosity() const noexcept
    {
        return mImpl->getProfilingVerbosity();
    }

    //!
    //! \brief Create a new engine inspector which prints the layer information in an engine or an execution context.
    //!
    //! \see IEngineInspector.
    //!
    IEngineInspector* createEngineInspector() const noexcept
    {
        return mImpl->createEngineInspector();
    }

    //!
    //! \brief Return number of IO tensors.
    //!
    //! It is the number of input and output tensors for the network from which the engine was built.
    //! The names of the IO tensors can be discovered by calling getIOTensorName(i) for i in 0 to getNbIOTensors()-1.
    //!
    //! \see getIOTensorName()
    //!
    int32_t getNbIOTensors() const noexcept
    {
        return mImpl->getNbIOTensors();
    }

    //!
    //! \brief Return name of an IO tensor.
    //!
    //! \param index value between 0 and getNbIOTensors()-1
    //!
    //! \see getNbIOTensors()
    //!
    char const* getIOTensorName(int32_t index) const noexcept
    {
        return mImpl->getIOTensorName(index);
    }

    //!
    //! \brief Return the hardware compatibility level of this engine.
    //!
    //! \return hardwareCompatibilityLevel The level of hardware
    //!        compatibility.
    //!
    HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
    {
        return mImpl->getHardwareCompatibilityLevel();
    }

    //!
    //! \brief Return the number of auxiliary streams used by this engine.
    //!
    //! This number will be less than or equal to the maximum allowed number of auxiliary streams set by
    //! IBuilderConfig::setMaxAuxStreams() API call when the engine was built.
    //!
    //! \return the number of auxiliary streams used by this engine.
    //!
    //! \see IBuilderConfig::setMaxAuxStreams(), IExecutionContext::setAuxStreams()
    //!
    int32_t getNbAuxStreams() const noexcept
    {
        return mImpl->getNbAuxStreams();
    }

    //!
    //! \brief Create a serialization configuration object.
    //!
    //! \see ISerializationConfig
    //!
    ISerializationConfig* createSerializationConfig() noexcept
    {
        return mImpl->createSerializationConfig();
    }

    //!
    //! \brief Serialize the network to a stream with the provided SerializationConfig.
    //!
    //! \return An IHostMemory object that contains the serialized engine.
    //!
    //! The network may be deserialized with IRuntime::deserializeCudaEngine().
    //! Serializing plan file with SerializationFlag::kEXCLUDE_WEIGHTS requires building the engine with kREFIT,
    //! kREFIT_IDENTICAL or kREFIT_INDIVIDUAL.
    //!
    //! The only applicable scenario for SerializationFlag::kINCLUDE_REFIT is when serializing weight-stripping
    //! engines without kEXCLUDE_WEIGHTS. By default, the resulting serialized engine is unrefittable. Setting
    //! SerializationFlag::kINCLUDE_REFIT ensures that the serialized engine remains refittable.
    //!
    //! \see IRuntime::deserializeCudaEngine()
    //!
    IHostMemory* serializeWithConfig(ISerializationConfig& config) const noexcept
    {
        return mImpl->serializeWithConfig(config);
    }

    //!
    //! \brief Limit the maximum amount of GPU memory usable for network weights
    //! in bytes.
    //!
    //! \param gpuMemoryBudget  This parameter may take on 3 types of values:
    //!  -1: Allows TensorRT to choose the budget according to the streamable weights size.
    //!      Free CUDA memory will be queried at createExecutionContext() and accordingly:
    //!       * If streamable weights all fit: weight streaming is not required and disabled.
    //!       * Otherwise: Budget is set to getMinimumWeightStreamingBudget
    //!   0: (default) Disables weight streaming. The execution may fail if the network is too large for GPU memory.
    //!  >0: The maximum bytes of GPU memory that weights can occupy. It must be bounded by
    //!      [getMinimumWeightStreamingBudget, free GPU memory)].
    //!
    //! By setting a weight limit, users can expect a GPU memory usage reduction
    //! of (total bytes for network weights) - gpuMemoryBudget bytes. Maximum memory savings occur
    //! when gpuMemoryBudget is set to getMinimumWeightStreamingBudget(). Creating additional
    //! IExecutionContexts will increase memory usage by O(getMinimumStreamingBudget()).
    //!
    //! Streaming larger amounts of memory will likely result in lower performance
    //! except in some boundary cases where streaming weights allows the user to
    //! run larger batch sizes. The higher throughput offsets the increased
    //! latency in these cases. Tuning the value of the memory limit is
    //! recommended for best performance.
    //!
    //! \warning GPU memory for the weights is allocated in this call and will be deallocated by enabling weight
    //!          streaming or destroying the ICudaEngine.
    //!
    //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
    //!
    //! \warning The weights streaming budget cannot be modified while there are active IExecutionContexts.
    //!
    //! \return true if the memory limit is valid and the call was successful, false otherwise.
    //!
    //! \deprecated Deprecated in TensorRT 10.1. Superseded by setWeightStreamingBudgetV2().
    //!
    //! \see BuilderFlag::kWEIGHT_STREAMING
    //! \see getWeightStreamingBudget()
    //! \see getMinimumWeightStreamingBudget()
    //! \see getStreamableWeightsSize()
    //!
    TRT_DEPRECATED bool setWeightStreamingBudget(int64_t gpuMemoryBudget) noexcept
    {
        return mImpl->setWeightStreamingBudget(gpuMemoryBudget);
    }

    //!
    //! \brief Returns the current weight streaming device memory budget in bytes.
    //!
    //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
    //!
    //! \returns The weight streaming budget in bytes. Please see setWeightStreamingBudget() for the possible
    //!          values.
    //!
    //! \deprecated Deprecated in TensorRT 10.1. Superseded by getWeightStreamingBudgetV2().
    //!
    //! \see BuilderFlag::kWEIGHT_STREAMING,
    //! \see setWeightStreamingBudget()
    //! \see getMinimumWeightStreamingBudget()
    //! \see getStreamableWeightsSize()
    //!
    TRT_DEPRECATED int64_t getWeightStreamingBudget() const noexcept
    {
        return mImpl->getWeightStreamingBudget();
    }

    //!
    //! \brief The minimum number of bytes of GPU memory required by network
    //! weights for successful weight streaming.
    //!
    //! This is a positive integer for engines with streamable weights because a
    //! staging buffer on the GPU is required to temporarily hold the streamed
    //! weights. The size of the staging buffer is determined by TensorRT and must
    //! be at least as large as the size of the largest streamable weight in the
    //! network.
    //!
    //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
    //!
    //! \returns The minimum number of bytes of GPU memory required for streaming.
    //!
    //! \deprecated Deprecated in TensorRT 10.1. The minimum budget is 0 in the V2 APIs.
    //!
    //! \see setWeightStreamingBudget()
    //!
    TRT_DEPRECATED int64_t getMinimumWeightStreamingBudget() const noexcept
    {
        return mImpl->getMinimumWeightStreamingBudget();
    }

    //!
    //! \brief Get the total size in bytes of all streamable weights.
    //!
    //! The set of streamable weights is a subset of all network weights. The
    //! total size may exceed free GPU memory.
    //!
    //! \returns The total size in bytes of all streamable weights.
    //!          Returns 0 if BuilderFlag::kWEIGHT_STREAMING is unset during engine building.
    //!
    //! \see setWeightStreamingBudget()
    //!
    int64_t getStreamableWeightsSize() const noexcept
    {
        return mImpl->getStreamableWeightsSize();
    }

    //!
    //! \brief Limit the maximum amount of GPU memory usable for network weights in bytes.
    //!
    //! \param gpuMemoryBudget This parameter must be a non-negative value.
    //!   0: Only small amounts of scratch memory will required to run the model.
    //!  >= getStreamableWeightsSize (default): Disables weight streaming.
    //!       The execution may fail if the network is too large for GPU memory.
    //!
    //! By setting a weight limit, users can expect a GPU memory usage reduction on the order
    //! of (total bytes for network weights) - gpuMemoryBudget bytes. Maximum memory savings occur
    //! when gpuMemoryBudget is set to 0. Each IExecutionContext will require getWeightStreamingScratchMemorySize()
    //! bytes of additional device memory if the engine is streaming its weights (budget < getStreamableWeightsSize()).
    //!
    //! Streaming larger amounts of memory will likely result in lower performance
    //! except in some boundary cases where streaming weights allows the user to
    //! run larger batch sizes. The higher throughput offsets the increased
    //! latency in these cases. Tuning the value of the memory limit is
    //! recommended for best performance.
    //!
    //! \warning GPU memory for the weights is allocated in this call and will be deallocated by enabling weight
    //! streaming or destroying the ICudaEngine.
    //!
    //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
    //!
    //! \warning The weights streaming budget cannot be modified while there are active IExecutionContexts.
    //!
    //! \warning Using the V2 weight streaming APIs with V1 APIs (setWeightStreamingBudget(),
    //!          getWeightStreamingBudget(), getWeightStreamingMinimumBudget()) leads to undefined behavior.
    //!
    //! \return true if the memory limit is valid and the call was successful, false otherwise.
    //!
    //! \see BuilderFlag::kWEIGHT_STREAMING
    //! \see getWeightStreamingBudgetV2()
    //! \see getWeightStreamingScratchMemorySize()
    //! \see getWeightStreamingAutomaticBudget()
    //! \see getStreamableWeightsSize()
    //!
    bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
    {
        return mImpl->setWeightStreamingBudgetV2(gpuMemoryBudget);
    }

    //!
    //! \brief Returns the current weight streaming device memory budget in bytes.
    //!
    //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
    //!
    //! \returns The weight streaming budget in bytes. Please see setWeightStreamingBudgetV2() for the possible
    //!          return values. Returns getStreamableWeightsSize() if weight streaming is disabled.
    //!
    //! \see BuilderFlag::kWEIGHT_STREAMING
    //! \see setWeightStreamingBudget()
    //! \see getMinimumWeightStreamingBudget()
    //! \see getStreamableWeightsSize()
    //!
    int64_t getWeightStreamingBudgetV2() const noexcept
    {
        return mImpl->getWeightStreamingBudgetV2();
    }

    //!
    //! \brief TensorRT automatically determines a device memory budget for the model to run. The budget is close to the
    //! current free memory size, leaving some space for other memory needs in the user's application. If the budget
    //! exceeds the size obtained from getStreamableWeightsSize(), it is capped to that size, effectively disabling
    //! weight streaming. Since TensorRT lacks information about the user's allocations, the remaining memory size might
    //! be larger than required, leading to wasted memory, or smaller than required, causing an out-of-memory error. For
    //! optimal memory allocation, it is recommended to manually calculate and set the budget.
    //!
    //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
    //!
    //! \warning The return value may change between TensorRT minor versions.
    //!
    //! \warning Setting the returned budget with V1 APIs (setWeightStreamingBudget()) will lead to undefined behavior.
    //! Please use V2 APIs.
    //!
    //! \returns The weight streaming budget in bytes. Please set with setWeightStreamingBudgetV2().
    //!
    //! \see BuilderFlag::kWEIGHT_STREAMING
    //! \see setWeightStreamingBudgetV2()
    //!
    int64_t getWeightStreamingAutomaticBudget() const noexcept
    {
        return mImpl->getWeightStreamingAutomaticBudget();
    }

    //!
    //! \brief Returns the size of the scratch memory required by the current weight streaming budget.
    //!
    //! Weight streaming requires small amounts of scratch memory on the GPU to stage CPU weights right before
    //! execution. This value is typically much smaller than the total streamable weights size. Each IExecutionContext
    //! will then allocate this additional memory or the user can provide the additional memory through
    //! getDeviceMemorySizeV2() and IExecutionContext::setDeviceMemoryV2().
    //!
    //! The return value of this call depends on
    //!    1. setWeightStreamingBudget()
    //!    2. setWeightStreamingBudgetV2()
    //!
    //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
    //!
    //! \returns The weight streaming scratch memory in bytes. Returns 0 if weight streaming is disabled.
    //!
    //! \see BuilderFlag::kWEIGHT_STREAMING
    //! \see setWeightStreamingBudgetV2()
    //! \see getStreamableWeightsSize()
    //! \see getDeviceMemorySizeV2()
    //! \see getDeviceMemorySizeForProfileV2()
    //! \see IExecutionContext::setDeviceMemoryV2()
    //!
    int64_t getWeightStreamingScratchMemorySize() const noexcept
    {
        return mImpl->getWeightStreamingScratchMemorySize();
    }

    //!
    //! \brief Check if a tensor is marked as a debug tensor.
    //!
    //! Determine whether the given name corresponds to a debug tensor.
    //!
    //! \returns True if tensor is a debug tensor, false otherwise.
    //!
    //! \see INetworkDefinition::markDebug
    //!
    bool isDebugTensor(char const* name) const noexcept
    {
        return mImpl->isDebugTensor(name);
    }

    //!
    //! \brief Get the minimum / optimum / maximum values (not dimensions) for an input tensor given
    //! its name under an optimization profile. These correspond to the values set using
    //! IOptimizationProfile::setShapeValuesV2 when the engine was built.
    //!
    //! \param tensorName The name of an input tensor.
    //!
    //! \param profileIndex The profile index, which must be between 0 and getNbOptimizationProfiles()-1.
    //!
    //! \param select Whether to query the minimum, optimum, or maximum values for this input tensor.
    //!
    //! \return The minimum / optimum / maximum values for an input tensor in this profile. If the profileIndex is
    //! invalid or the provided name does not map to an input tensor, or the tensor is not a shape binding, return
    //! nullptr.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    //! \warning If input shapes are set with setShapeValues, getProfileTensorValuesV2 will return nullptr
    //!
    int64_t const* getProfileTensorValuesV2(
        char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
    {
        return mImpl->getProfileTensorValuesV2(tensorName, profileIndex, select);
    }

    //!
    //! \brief Get engine statistics according to the given enum value.
    //!
    //! \param stat The kind of statistics to query.
    //!
    //! If stat is kTOTAL_WEIGHTS_SIZE, the return value is the total weights size in bytes in the engine.
    //! If stat is kSTRIPPED_WEIGHTS_SIZE, the return value is the stripped weight size in bytes for engines
    //! built with BuilderFlag::kSTRIP_PLAN.
    //!
    //! When the BuilderFlag::kWEIGHT_STREAMING flag is enabled, engine weights may not be fully copied to the device.
    //! The reported total weight size reflects the sum of all weights utilized by the engine,
    //! which does not necessarily correspond to the actual GPU memory allocated.
    //!
    //! \return The kind of statistics specified by EngineStat.
    //!
    //! \warning if kSTRIPPED_WEIGHTS_SIZE is passed to query a normal engine, this function will
    //! return -1 to indicate invalid enum value.
    //!
    //! \see EngineStat
    //! \see BuilderFlag::kWEIGHT_STREAMING
    //! \see setWeightStreamingBudget()
    //! \see getStreamableWeightsSize()
    //!
    int64_t getEngineStat(EngineStat stat) const noexcept
    {
        return mImpl->getEngineStat(stat);
    }

protected:
    apiv::VCudaEngine* mImpl;
};

namespace v_1_0
{
class IOutputAllocator : public IVersionedInterface
{
public:
    //!
    //! \brief Return version information associated with this interface. Applications must not override this method.
    //!
    InterfaceInfo getInterfaceInfo() const noexcept override
    {
        return {"IOutputAllocator", 1, 0};
    }

    //!
    //! \brief Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated.
    //!        If the requested memory size exceeds the currentMemory size, the currentMemory can be freed as well.
    //!        If currentMemory is known to be big enough, one option is to return currentMemory.
    //!
    //! \param tensorName name of the output tensor.
    //! \param currentMemory points to the address set by IExecutionContext::setTensorAddress.
    //! \param size number of bytes required. Always positive, even for an empty tensor.
    //! \param alignment required alignment of the allocation.
    //!
    //! \return A pointer to memory to use for the output tensor or nullptr.
    //!
    //!
    //! To preallocate memory and have the engine fail if the preallocation is not big enough,
    //! use IExecutionContext::setTensorAddress to set a pointer to the preallocated memory,
    //! and have reallocateOutput return nullptr if that memory is not big enough.
    //!
    //! \deprecated Deprecated in TensorRT 10.0. Superseded by reallocateOutputAsync with cudaStream_t argument
    //!
    TRT_DEPRECATED virtual void* reallocateOutput(
        char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept
    {
        return nullptr;
    }

    //!
    //! \brief Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated.
    //!        If the requested memory size exceeds the currentMemory size, the currentMemory can be freed as well.
    //!        If currentMemory is known to be big enough, one option is to return currentMemory.
    //!
    //! \param tensorName name of the output tensor.
    //! \param currentMemory points to the address set by IExecutionContext::setTensorAddress.
    //! \param size number of bytes required. Always positive, even for an empty tensor.
    //! \param alignment required alignment of the allocation.
    //! \param stream The stream in which to execute the kernels.
    //!
    //! \return A pointer to memory to use for the output tensor or nullptr.
    //!
    //! To preallocate memory and have the engine fail if the preallocation is not big enough,
    //! use IExecutionContext::setTensorAddress to set a pointer to the preallocated memory,
    //! and have reallocateOutputAsync return nullptr if that memory is not big enough.
    //!
    //! The default definition exists for sake of backward compatibility with earlier versions of TensorRT.
    //! Eventually this method will become a pure virtual method that requires an override, and method
    //! reallocateOutput() will disappear. Code moving away from TensorRT 9.x should override method
    //! reallocateOutputAsync() and NOT override method reallocateOutput().
    //!
    virtual void* reallocateOutputAsync(
        char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment, cudaStream_t /*stream*/)
    {
        return reallocateOutput(tensorName, currentMemory, size, alignment);
    }

    //!
    //! \brief Called by TensorRT when the shape of the output tensor is known.
    //!
    //! Called by TensorRT sometime between when it calls reallocateOutput and enqueueV3 returns.
    //!
    //! \param dims dimensions of the output
    //! \param tensorName name of the tensor
    //!
    virtual void notifyShape(char const* tensorName, Dims const& dims) noexcept = 0;
};
} // namespace v_1_0

//!
//! \class IOutputAllocator
//!
//! \brief Callback from ExecutionContext::enqueueV3()
//!
//! \see IExecutionContext::enqueueV3()
//!
using IOutputAllocator = v_1_0::IOutputAllocator;

namespace v_1_0
{
class IDebugListener : public IVersionedInterface
{
public:
    //!
    //! \brief Return version information associated with this interface. Applications must not override this method.
    //!
    InterfaceInfo getInterfaceInfo() const noexcept override
    {
        return {"IDebugListener", 1, 0};
    }

    //!
    //! \brief Callback function that is called when a debug tensor’s value is updated and the debug state of the tensor
    //! is set to true. Content in the given address is only guaranteed to be valid for the duration of the callback.
    //!
    //! \param location TensorLocation of the tensor.
    //! \param addr pointer to buffer.
    //! \param type data Type of the tensor.
    //! \param shape shape of the tensor.
    //! \param name name of the tensor.
    //! \param stream CUDA stream object.
    //!
    //! \return True on success, false otherwise.
    //!
    virtual bool processDebugTensor(void const* addr, TensorLocation location, DataType type, Dims const& shape,
        char const* name, cudaStream_t stream)
        = 0;

    ~IDebugListener() override = default;
};
} // namespace v_1_0

//!
//! \class IDebugListener
//!
//! \brief User-implemented callback for notification when value of a debug tensor is updated.
//!
using IDebugListener = v_1_0::IDebugListener;

//!
//! \class IExecutionContext
//!
//! \brief Context for executing inference using an engine, with functionally unsafe features.
//!
//! Multiple execution contexts may exist for one ICudaEngine instance, allowing the same
//! engine to be used for the execution of multiple batches simultaneously. If the engine supports
//! dynamic shapes, each execution context in concurrent use must use a separate optimization profile.
//!
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
class IExecutionContext : public INoCopy
{
public:
    virtual ~IExecutionContext() noexcept = default;

    //!
    //! \brief Set the debug sync flag.
    //!
    //! If this flag is set to true, the engine will log the successful execution for each kernel during executeV2(). It
    //! has no effect when using enqueueV3().
    //!
    //! \see getDebugSync()
    //!
    void setDebugSync(bool sync) noexcept
    {
        mImpl->setDebugSync(sync);
    }

    //!
    //! \brief Get the debug sync flag.
    //!
    //! \see setDebugSync()
    //!
    bool getDebugSync() const noexcept
    {
        return mImpl->getDebugSync();
    }

    //!
    //! \brief Set the profiler.
    //!
    //! \see IProfiler getProfiler()
    //!
    void setProfiler(IProfiler* profiler) noexcept
    {
        mImpl->setProfiler(profiler);
    }

    //!
    //! \brief Get the profiler.
    //!
    //! \see IProfiler setProfiler()
    //!
    IProfiler* getProfiler() const noexcept
    {
        return mImpl->getProfiler();
    }

    //!
    //! \brief Get the associated engine.
    //!
    //! \see ICudaEngine
    //!
    ICudaEngine const& getEngine() const noexcept
    {
        return mImpl->getEngine();
    }

    //!
    //! \brief Set the name of the execution context.
    //!
    //! This method copies the name string.
    //!
    //! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    //! \see getName()
    //!
    void setName(char const* name) noexcept
    {
        mImpl->setName(name);
    }

    //!
    //! \brief Return the name of the execution context.
    //!
    //! \see setName()
    //!
    char const* getName() const noexcept
    {
        return mImpl->getName();
    }

    //!
    //! \brief Set the device memory for use by this execution context.
    //!
    //! The memory must be aligned with CUDA memory alignment property (using cudaGetDeviceProperties()), and its size
    //! must be large enough for performing inference with the given network inputs. getDeviceMemorySize() and
    //! getDeviceMemorySizeForProfile() report upper bounds of the size. Setting memory to nullptr is acceptable if the
    //! reported size is 0. If using enqueueV3() to run the network, the memory is in use from the invocation of
    //! enqueueV3() until network execution is complete. If using executeV2(), it is in use until executeV2() returns.
    //! Releasing or otherwise using the memory for other purposes, including using it in another execution context
    //! running in parallel, during this time will result in undefined behavior.
    //!
    //! \deprecated Deprecated in TensorRT 10.1. Superseded by setDeviceMemoryV2().
    //!
    //! \warning Weight streaming related scratch memory will be allocated by TensorRT if the memory is set by this API.
    //!          Please use setDeviceMemoryV2() instead.
    //!
    //! \see ICudaEngine::getDeviceMemorySize()
    //! \see ICudaEngine::getDeviceMemorySizeForProfile()
    //! \see ExecutionContextAllocationStrategy
    //! \see ICudaEngine::createExecutionContext()
    //! \see ICudaEngine::createExecutionContextWithoutDeviceMemory()
    //!
    void setDeviceMemory(void* memory) noexcept
    {
        mImpl->setDeviceMemory(memory);
    }

    //!
    //! \brief Set the device memory and its corresponding size for use by this execution context.
    //!
    //! The memory must be aligned with CUDA memory alignment property (using cudaGetDeviceProperties()), and its size
    //! must be large enough for performing inference with the given network inputs. getDeviceMemorySize() and
    //! getDeviceMemorySizeForProfile() report upper bounds of the size. Setting memory to nullptr is acceptable if the
    //! reported size is 0. If using enqueueV3() to run the network, the memory is in use from the invocation of
    //! enqueueV3() until network execution is complete. If using executeV2(), it is in use until executeV2() returns.
    //! Releasing or otherwise using the memory for other purposes, including using it in another execution context
    //! running in parallel, during this time will result in undefined behavior.
    //!
    //! \see ICudaEngine::getDeviceMemorySizeV2()
    //! \see ICudaEngine::getDeviceMemorySizeForProfileV2()
    //! \see ExecutionContextAllocationStrategy
    //! \see ICudaEngine::createExecutionContext()
    //! \see ICudaEngine::createExecutionContextWithoutDeviceMemory()
    //!
    void setDeviceMemoryV2(void* memory, int64_t size) noexcept
    {
        return mImpl->setDeviceMemoryV2(memory, size);
    }

    //!
    //! \brief Return the strides of the buffer for the given tensor name.
    //!
    //! The strides are in units of elements, not components or bytes.
    //! For example, for TensorFormat::kHWC8, a stride of one spans 8 scalars.
    //!
    //! Note that strides can be different for different execution contexts
    //! with dynamic shapes.
    //!
    //! If the provided name does not map to an input or output tensor, or there are dynamic dimensions that have not
    //! been set yet, return Dims{-1, {}}
    //!
    //! \param tensorName The name of an input or output tensor.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    Dims getTensorStrides(char const* tensorName) const noexcept
    {
        return mImpl->getTensorStrides(tensorName);
    }

public:
    //!
    //! \brief Get the index of the currently selected optimization profile.
    //!
    //! If the profile index has not been set yet (implicitly to 0 if no other execution context has been set to
    //! profile 0, or explicitly for all subsequent contexts), an invalid value of -1 will be returned
    //! and all calls to enqueueV3()/executeV2() will fail until a valid profile index has been set.
    //! This behavior is deprecated in TensorRT 8.6, all profiles will default to optimization
    //! profile 0 and -1 will no longer be returned.
    //!
    int32_t getOptimizationProfile() const noexcept
    {
        return mImpl->getOptimizationProfile();
    }

    //!
    //! \brief Set shape of given input.
    //!
    //! \param tensorName The name of an input tensor.
    //! \param dims The shape of an input tensor.
    //!
    //! \return True on success, false if the provided name does not map to an input tensor, or if some other error
    //! occurred.
    //!
    //! Each dimension must agree with the network dimension unless the latter was -1.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    bool setInputShape(char const* tensorName, Dims const& dims) noexcept
    {
        return mImpl->setInputShape(tensorName, dims);
    }

    //!
    //! \brief Return the shape of the given input or output.
    //!
    //! \param tensorName The name of an input or output tensor.
    //!
    //! Return Dims{-1, {}} if the provided name does not map to an input or output tensor.
    //! Otherwise return the shape of the input or output tensor.
    //!
    //! A dimension in an input tensor will have a -1 wildcard value if all the following are true:
    //!  * setInputShape() has not yet been called for this tensor
    //!  * The dimension is a runtime dimension that is not implicitly constrained to be a single value.
    //!
    //! A dimension in an output tensor will have a -1 wildcard value if the dimension depends
    //! on values of execution tensors OR if all the following are true:
    //!  * It is a runtime dimension.
    //!  * setInputShape() has NOT been called for some input tensor(s) with a runtime shape.
    //!  * setTensorAddress() has NOT been called for some input tensor(s) with isShapeInferenceIO() = true.
    //!
    //! An output tensor may also have -1 wildcard dimensions if its shape depends on values of tensors supplied to
    //! enqueueV3().
    //!
    //! If the request is for the shape of an output tensor with runtime dimensions,
    //! all input tensors with isShapeInferenceIO() = true should have their value already set,
    //! since these values might be needed to compute the output shape.
    //!
    //! Examples of an input dimension that is implicitly constrained to a single value:
    //! * The optimization profile specifies equal min and max values.
    //! * The dimension is named and only one value meets the optimization profile requirements
    //!   for dimensions with that name.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    Dims getTensorShape(char const* tensorName) const noexcept
    {
        return mImpl->getTensorShape(tensorName);
    }

    //!
    //! \brief Whether all dynamic dimensions of input tensors have been specified
    //!
    //! \return True if all dynamic dimensions of input tensors have been specified
    //!         by calling setInputShape().
    //!
    //! Trivially true if network has no dynamically shaped input tensors.
    //!
    //! Does not work with name-base interfaces eg. IExecutionContext::setInputShape(). Use
    //! IExecutionContext::inferShapes() instead.
    //!
    bool allInputDimensionsSpecified() const noexcept
    {
        return mImpl->allInputDimensionsSpecified();
    }

    //!
    //! \brief Whether all input shape bindings have been specified
    //!
    //! \return True if all input shape bindings have been specified by setInputShapeBinding().
    //!
    //! Trivially true if network has no input shape bindings.
    //!
    //! Does not work with name-base interfaces eg. IExecutionContext::setInputShape(). Use
    //! IExecutionContext::inferShapes() instead.
    //!
    //! \deprecated Deprecated in TensorRT 10.0. setInputShapeBinding() is removed since TensorRT 10.0.
    //!
    TRT_DEPRECATED bool allInputShapesSpecified() const noexcept
    {
        return mImpl->allInputShapesSpecified();
    }

    //!
    //! \brief Set the ErrorRecorder for this interface
    //!
    //! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
    //! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
    //! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
    //! a recorder has been registered.
    //!
    //! If an error recorder is not set, messages will be sent to the global log stream.
    //!
    //! \param recorder The error recorder to register with this interface.
    //!
    //! \see getErrorRecorder()
    //!
    void setErrorRecorder(IErrorRecorder* recorder) noexcept
    {
        mImpl->setErrorRecorder(recorder);
    }

    //!
    //! \brief Get the ErrorRecorder assigned to this interface.
    //!
    //! Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
    //! an error handler has not been set.
    //!
    //! \return A pointer to the IErrorRecorder object that has been registered.
    //!
    //! \see setErrorRecorder()
    //!
    IErrorRecorder* getErrorRecorder() const noexcept
    {
        return mImpl->getErrorRecorder();
    }

    //!
    //! \brief Synchronously execute a network.
    //!
    //! This method requires an array of input and output buffers. The mapping
    //! from indices to tensor names can be queried using ICudaEngine::getIOTensorName().
    //!
    //! \param bindings An array of pointers to input and output buffers for the network.
    //!
    //! \return True if execution succeeded.
    //!
    //! \see ICudaEngine::getIOTensorName()
    //!
    bool executeV2(void* const* bindings) noexcept
    {
        return mImpl->executeV2(bindings);
    }

    //!
    //! \brief Select an optimization profile for the current context with async
    //! semantics.
    //!
    //! \param profileIndex Index of the profile. The value must lie between 0 and
    //!        getEngine().getNbOptimizationProfiles() - 1
    //!
    //! \param stream A CUDA stream on which the cudaMemcpyAsyncs may be
    //! enqueued
    //!
    //! When an optimization profile is switched via this API, TensorRT may
    //! require that data is copied via cudaMemcpyAsync. It is the
    //! application’s responsibility to guarantee that synchronization between
    //! the profile sync stream and the enqueue stream occurs.
    //!
    //! The selected profile will be used in subsequent calls to executeV2()/enqueueV3().
    //! If the associated CUDA engine has inputs with dynamic shapes, the optimization profile must
    //! be set with its corresponding profileIndex before calling execute or enqueue. The newly created execution
    //! context will be assigned optimization profile 0.
    //!
    //! If the associated CUDA engine does not have inputs with dynamic shapes,
    //! this method need not be called, in which case the default profile index
    //! of 0 will be used.
    //!
    //! setOptimizationProfileAsync() must be called before calling
    //! setInputShape() for all dynamic input
    //! tensors or input shape tensors, which in turn must be called before
    //! executeV2()/enqueueV3().
    //!
    //! \warning This function will trigger layer resource updates on the next call of
    //!          executeV2()/enqueueV3(), possibly resulting in performance bottlenecks.
    //!
    //! \warning Not synchronizing the stream used at enqueue with the stream
    //! used to set optimization profile asynchronously using this API will
    //! result in undefined behavior.
    //!
    //! \return true if the call succeeded, else false (e.g. input out of range)
    //!
    //! \see ICudaEngine::getNbOptimizationProfiles()
    bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
    {
        return mImpl->setOptimizationProfileAsync(profileIndex, stream);
    }

    //!
    //! \brief Set whether enqueue emits layer timing to the profiler
    //!
    //! If set to true (default), enqueue is synchronous and does layer timing profiling implicitly if
    //! there is a profiler attached.
    //! If set to false, enqueue will be asynchronous if there is a profiler attached. An extra method
    //! reportToProfiler() needs to be called to obtain the profiling data and report to the profiler attached.
    //!
    //! \see IExecutionContext::getEnqueueEmitsProfile()
    //! \see IExecutionContext::reportToProfiler()
    //!
    void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
    {
        mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
    }

    //!
    //! \brief Get the enqueueEmitsProfile state.
    //!
    //! \return The enqueueEmitsProfile state.
    //!
    //! \see IExecutionContext::setEnqueueEmitsProfile()
    //!
    bool getEnqueueEmitsProfile() const noexcept
    {
        return mImpl->getEnqueueEmitsProfile();
    }

    //!
    //! \brief Calculate layer timing info for the current optimization profile in IExecutionContext
    //! and update the profiler after one iteration of inference launch.
    //!
    //! If IExecutionContext::getEnqueueEmitsProfile() returns true, the enqueue function will calculate layer timing
    //! implicitly if a profiler is provided. This function returns true and does nothing.
    //!
    //! If IExecutionContext::getEnqueueEmitsProfile() returns false, the enqueue function will record the CUDA event
    //! timers if a profiler is provided. But it will not perform the layer timing calculation.
    //! IExecutionContext::reportToProfiler() needs to be called explicitly to calculate layer timing for the previous
    //! inference launch.
    //!
    //! In the CUDA graph launch scenario, it will record the same set of CUDA events
    //! as in regular enqueue functions if the graph is captured from an IExecutionContext with profiler enabled.
    //! This function needs to be called after graph launch to report the layer timing info to the profiler.
    //!
    //! \warning profiling CUDA graphs is only available from CUDA 11.1 onwards.
    //! \warning reportToProfiler uses the stream of the previous enqueue call, so the stream must be live otherwise
    //! behavior is undefined.
    //!
    //! \return true if the call succeeded, else false (e.g. profiler not provided, in CUDA graph capture mode, etc.)
    //!
    //! \see IExecutionContext::setEnqueueEmitsProfile()
    //! \see IExecutionContext::getEnqueueEmitsProfile()
    //!
    bool reportToProfiler() const noexcept
    {
        return mImpl->reportToProfiler();
    }

    //!
    //! \brief Set memory address for given input or output tensor.
    //!
    //! \param tensorName The name of an input or output tensor.
    //! \param data The pointer (void*) to the data owned by the user.
    //!
    //! \return True on success, false if error occurred.
    //!
    //! An address defaults to nullptr.
    //! Pass data=nullptr to reset to the default state.
    //!
    //! Return false if the provided name does not map to an input or output tensor.
    //!
    //! If an input pointer has type (void const*), use setInputTensorAddress() instead.
    //!
    //! Before calling enqueueV3(), each input must have a non-null address and
    //! each output must have a non-null address or an IOutputAllocator to set it later.
    //!
    //! If the TensorLocation of the tensor is kHOST:
    //! - The pointer must point to a host buffer of sufficient size.
    //! - Data representing shape values is not copied until enqueueV3 is invoked.
    //!
    //! If the TensorLocation of the tensor is kDEVICE:
    //! - The pointer must point to a device buffer of sufficient size and alignment, or
    //! - Be nullptr if the tensor is an output tensor that will be allocated by IOutputAllocator.
    //!
    //! If getTensorShape(name) reports a -1 for any dimension of an output after all
    //! input shapes have been set, use setOutputAllocator() to associate an IOutputAllocator
    //! to which the dimensions will be reported when known.
    //!
    //! Calling both setTensorAddress and setOutputAllocator() for the same output is allowed,
    //! and can be useful for preallocating memory, and then reallocating if it's not big enough.
    //!
    //! The pointer must have at least 256-byte alignment.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    //! \see setInputTensorAddress() setOutputTensorAddress() getTensorShape() setOutputAllocator() IOutputAllocator
    //!
    bool setTensorAddress(char const* tensorName, void* data) noexcept
    {
        return mImpl->setTensorAddress(tensorName, data);
    }

    //!
    //! \brief Get memory address bound to given input or output tensor, or nullptr if the provided name does not map to
    //! an input or output tensor.
    //!
    //! \param tensorName The name of an input or output tensor.
    //!
    //! Use method getOutputTensorAddress() if a non-const pointer for an output tensor is required.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    //! \see getOutputTensorAddress()
    //!
    void const* getTensorAddress(char const* tensorName) const noexcept
    {
        return mImpl->getTensorAddress(tensorName);
    }

    //!
    //! \brief Set the memory address for a given output tensor.
    //!
    //! \param tensorName The name of an output tensor.
    //! \param data The pointer to the buffer to which to write the output.
    //!
    //! \return True on success, false if the provided name does not map to an output tensor, does not meet alignment
    //! requirements, or some other error occurred.
    //!
    //! Output addresses can also be set using method setTensorAddress. This method is provided for applications which
    //! prefer to use different methods for setting input and output tensors.
    //!
    //! See setTensorAddress() for alignment and data type constraints.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    //! \see setTensorAddress()
    //!
    bool setOutputTensorAddress(char const* tensorName, void* data) noexcept
    {
        return mImpl->setOutputTensorAddress(tensorName, data);
    }

    //!
    //! \brief Set memory address for given input.
    //!
    //! \param tensorName The name of an input tensor.
    //! \param data The pointer (void const*) to the const data owned by the user.
    //!
    //! \return True on success, false if the provided name does not map to an input tensor, does not meet alignment
    //! requirements, or some other error occurred.
    //!
    //! Input addresses can also be set using method setTensorAddress, which requires a (void*).
    //!
    //! See description of method setTensorAddress() for alignment and data type constraints.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    //! \see setTensorAddress()
    //!
    bool setInputTensorAddress(char const* tensorName, void const* data) noexcept
    {
        return mImpl->setInputTensorAddress(tensorName, data);
    }

    //!
    //! \brief Get memory address for given output.
    //!
    //! \param tensorName The name of an output tensor.
    //!
    //! \return Raw output data pointer (void*) for given output tensor, or nullptr if the provided name does not map to
    //! an output tensor.
    //!
    //! If only a (void const*) pointer is needed, an alternative is to call method getTensorAddress().
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    //! \see getTensorAddress()
    //!
    void* getOutputTensorAddress(char const* tensorName) const noexcept
    {
        return mImpl->getOutputTensorAddress(tensorName);
    }

    //!
    //! \brief Run shape calculations.
    //!
    //! \param nbMaxNames Maximum number of names to write to tensorNames.
    //!        When the return value is a positive value n and tensorNames != nullptr,
    //!        the names of min(n,nbMaxNames) insufficiently specified input tensors are
    //!        written to tensorNames.
    //!
    //! \param tensorNames Buffer in which to place names of insufficiently specified input tensors.
    //!
    //! \return 0 on success.
    //!         Positive value n if n input tensors were not sufficiently specified.
    //!         -1 for other errors.
    //!
    //! An input tensor is insufficiently specified if either of the following is true:
    //!
    //! * It has dynamic dimensions and its runtime dimensions have not yet
    //!   been specified via IExecutionContext::setInputShape.
    //!
    //! * isShapeInferenceIO(t)=true and the tensor's address has not yet been set.
    //!
    //! If an output tensor has isShapeInferenceIO(t)=true and its address has been specified,
    //! then its value is written.
    //!
    //! Returns -1 if tensorNames == nullptr and nbMaxNames != 0.
    //! Returns -1 if nbMaxNames < 0.
    //! Returns -1 if a tensor's dimensions are invalid, e.g. a tensor ends up with a negative dimension.
    //!
    int32_t inferShapes(int32_t nbMaxNames, char const** tensorNames) noexcept
    {
        return mImpl->inferShapes(nbMaxNames, tensorNames);
    }

    //!
    //! \brief Recompute the internal activation buffer sizes based on the current input shapes, and return the total
    //! amount of memory required.
    //!
    //! Users can allocate the device memory based on the size returned and provided the memory to TRT with
    //! IExecutionContext::setDeviceMemory(). Must specify all input shapes and the optimization profile to use before
    //! calling this function, otherwise the partition will be invalidated.
    //!
    //! \return Total amount of memory required on success, 0 if error occurred.
    //!
    //! \see IExecutionContext::setDeviceMemory()
    //!
    size_t updateDeviceMemorySizeForShapes() noexcept
    {
        return mImpl->updateDeviceMemorySizeForShapes();
    }

    //!
    //! \brief Mark input as consumed.
    //!
    //! \param event The CUDA event that is triggered after all input tensors have been consumed.
    //!
    //! \warning The set event must be valid during the inference.
    //!
    //! \return True on success, false if error occurred.
    //!
    //! Passing event==nullptr removes whatever event was set, if any.
    //!
    bool setInputConsumedEvent(cudaEvent_t event) noexcept
    {
        return mImpl->setInputConsumedEvent(event);
    }

    //!
    //! \brief The event associated with consuming the input.
    //!
    //! \return The CUDA event. Nullptr will be returned if the event is not set yet.
    //!
    cudaEvent_t getInputConsumedEvent() const noexcept
    {
        return mImpl->getInputConsumedEvent();
    }

    //!
    //! \brief Set output allocator to use for output tensor of given name.
    //! Pass nullptr to outputAllocator to unset.
    //! The allocator is called by enqueueV3().
    //!
    //! \param tensorName The name of an output tensor.
    //! \param outputAllocator IOutputAllocator for the tensors.
    //!
    //! \return True if success, false if the provided name does not map to an output or, if some other error occurred.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    //! \see enqueueV3() IOutputAllocator
    //!
    bool setOutputAllocator(char const* tensorName, IOutputAllocator* outputAllocator) noexcept
    {
        return mImpl->setOutputAllocator(tensorName, outputAllocator);
    }

    //!
    //! \brief Get output allocator associated with output tensor of given name, or nullptr if the provided name does
    //! not map to an output tensor.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    //! \see IOutputAllocator
    //!
    IOutputAllocator* getOutputAllocator(char const* tensorName) const noexcept
    {
        return mImpl->getOutputAllocator(tensorName);
    }

    //!
    //! \brief Get upper bound on an output tensor's size, in bytes, based on
    //! the current optimization profile and input dimensions.
    //!
    //! If the profile or input dimensions are not yet set, or the provided name
    //! does not map to an output, returns -1.
    //!
    //! \param tensorName The name of an output tensor.
    //!
    //! \return Upper bound in bytes.
    //!
    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
    //!
    int64_t getMaxOutputSize(char const* tensorName) const noexcept
    {
        return mImpl->getMaxOutputSize(tensorName);
    }

    //!
    //! \brief Specify allocator to use for internal temporary storage.
    //!
    //! This allocator is used only by enqueueV3() for temporary storage whose size cannot be
    //! predicted ahead of enqueueV3(). It is not used for output tensors, because memory
    //! allocation for those is allocated by the allocator set by setOutputAllocator().
    //! All memory allocated is freed by the time enqueueV3() returns.
    //!
    //! \param allocator pointer to allocator to use. Pass nullptr to revert to using TensorRT's
    //!        default allocator.
    //!
    //! \return True on success, false if error occurred.
    //!
    //! \see enqueueV3() setOutputAllocator()
    //!
    bool setTemporaryStorageAllocator(IGpuAllocator* allocator) noexcept
    {
        return mImpl->setTemporaryStorageAllocator(allocator);
    }

    //!
    //! \brief Get allocator set by setTemporaryStorageAllocator.
    //!
    //! Returns a nullptr if a nullptr was passed with setTemporaryStorageAllocator().
    //!
    IGpuAllocator* getTemporaryStorageAllocator() const noexcept
    {
        return mImpl->getTemporaryStorageAllocator();
    }

    //!
    //! \brief Enqueue inference on a stream.
    //!
    //! \param stream A CUDA stream on which the inference kernels will be enqueued.
    //!
    //! \return True if the kernels were enqueued successfully, false otherwise.
    //!
    //! Modifying or releasing memory that has been registered for the tensors before stream
    //! synchronization or the event passed to setInputConsumedEvent has been being triggered results in undefined
    //! behavior.
    //! Input tensor can be released after the setInputConsumedEvent whereas output tensors require stream
    //! synchronization.
    //!
    //! \warning Using default stream may lead to performance issues due to additional cudaDeviceSynchronize() calls by
    //!          TensorRT to ensure correct synchronizations. Please use non-default stream instead.
    //!
    //! \warning If the Engine is streaming weights, enqueueV3 will become synchronous, and
    //!          the graph will not be capturable.
    //!
    bool enqueueV3(cudaStream_t stream) noexcept
    {
        return mImpl->enqueueV3(stream);
    }

    //!
    //! \brief Set the maximum size for persistent cache usage.
    //!
    //! This function sets the maximum persistent L2 cache that this execution context may use for activation caching.
    //! Activation caching is not supported on all architectures - see "How TensorRT uses Memory" in the developer guide
    //! for details
    //!
    //! \param size the size of persistent cache limitation in bytes.
    //! The default is 0 Bytes.
    //!
    //! \see getPersistentCacheLimit
    void setPersistentCacheLimit(size_t size) noexcept
    {
        mImpl->setPersistentCacheLimit(size);
    }

    //!
    //! \brief Get the maximum size for persistent cache usage.
    //!
    //! \returns The size of the persistent cache limit
    //!
    //! \see setPersistentCacheLimit
    size_t getPersistentCacheLimit() const noexcept
    {
        return mImpl->getPersistentCacheLimit();
    }

    //!
    //! \brief Set the verbosity of the NVTX markers in the execution context.
    //!
    //! Building with kDETAILED verbosity will generally increase latency in enqueueV3(). Call this method
    //! to select NVTX verbosity in this execution context at runtime.
    //!
    //! The default is the verbosity with which the engine was built, and the verbosity may not be raised above that
    //! level.
    //!
    //! This function does not affect how IEngineInspector interacts with the engine.
    //!
    //! \param verbosity The verbosity of the NVTX markers.
    //!
    //! \return True if the NVTX verbosity is set successfully. False if the provided verbosity level is higher than the
    //! profiling verbosity of the corresponding engine.
    //!
    //! \see getNvtxVerbosity()
    //! \see ICudaEngine::getProfilingVerbosity()
    //!
    bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
    {
        return mImpl->setNvtxVerbosity(verbosity);
    }

    //!
    //! \brief Get the NVTX verbosity of the execution context.
    //!
    //! \return The current NVTX verbosity of the execution context.
    //!
    //! \see setNvtxVerbosity()
    //!
    ProfilingVerbosity getNvtxVerbosity() const noexcept
    {
        return mImpl->getNvtxVerbosity();
    }

    //!
    //! \brief Set the auxiliary streams that TensorRT should launch kernels on in the next enqueueV3() call.
    //!
    //! If set, TensorRT will launch the kernels that are supposed to run on the auxiliary streams using the streams
    //! provided by the user with this API. If this API is not called before the enqueueV3() call, then TensorRT will
    //! use the auxiliary streams created by TensorRT internally.
    //!
    //! TensorRT will always insert event synchronizations between the main stream provided via enqueueV3() call and the
    //! auxiliary streams:
    //!  - At the beginning of the enqueueV3() call, TensorRT will make sure that all the auxiliary streams wait on
    //!    the activities on the main stream.
    //!  - At the end of the enqueueV3() call, TensorRT will make sure that the main stream wait on the activities on
    //!    all the auxiliary streams.
    //!
    //! \param auxStreams The pointer to an array of cudaStream_t with the array length equal to nbStreams.
    //! \param nbStreams The number of auxiliary streams provided. If nbStreams is greater than
    //!        `engine->getNbAuxStreams()`, then only the first `engine->getNbAuxStreams()` streams will be used. If
    //!        `nbStreams` is less than `engine->getNbAuxStreams()`, such as setting `nbStreams` to 0, then TensorRT
    //!        will use the provided streams for the first `nbStreams` auxiliary streams, and will create additional
    //!        streams internally for the rest of the auxiliary streams.
    //!
    //! \note The provided auxiliary streams must not be the default stream and must all be different to avoid
    //!       deadlocks.
    //!
    //! \see enqueueV3(), IBuilderConfig::setMaxAuxStreams(), ICudaEngine::getNbAuxStreams()
    //!
    void setAuxStreams(cudaStream_t* auxStreams, int32_t nbStreams) noexcept
    {
        mImpl->setAuxStreams(auxStreams, nbStreams);
    }

    //!
    //! \brief Set DebugListener for this execution context.
    //!
    //! \param listener DebugListener for this execution context.
    //!
    //! \return true if succeed, false if failure.
    //!
    bool setDebugListener(IDebugListener* listener) noexcept
    {
        return mImpl->setDebugListener(listener);
    }

    //!
    //! \brief Get the DebugListener of this execution context.
    //!
    //! \return DebugListener of this execution context.
    //!
    IDebugListener* getDebugListener() noexcept
    {
        return mImpl->getDebugListener();
    }

    //!
    //! \brief Set debug state of tensor given the tensor name.
    //!
    //! Turn the debug state of a tensor on or off.
    //! A tensor with the parameter tensor name must exist in the network, and the tensor must have
    //! been marked as a debug tensor during build time. Otherwise, an error is thrown.
    //!
    //! \param name Name of target tensor.
    //!
    //! \param flag True if turning on debug state, false if turning off debug state of tensor
    //! The default is off.
    //!
    //! \return True if successful, false otherwise.
    //!
    bool setTensorDebugState(char const* name, bool flag) noexcept
    {
        return mImpl->setTensorDebugState(name, flag);
    }

    //!
    //! \brief Get the debug state.
    //!
    //! \param name Name of target tensor.
    //!
    //! \return true if there is a debug tensor with the given name and it has debug state turned on.
    //!
    bool getDebugState(char const* name) const noexcept
    {
        return mImpl->getDebugState(name);
    }

    //!
    //! \brief Get the runtime config object used during execution context creation.
    //!
    //! \return The runtime config object.
    //!
    IRuntimeConfig* getRuntimeConfig() const noexcept
    {
        return mImpl->getRuntimeConfig();
    }

    //! \brief Turn the debug state of all debug tensors on or off.
    //!
    //! \param flag true if turning on debug state, false if turning off debug state.
    //!
    //! \return true if successful, false otherwise.
    //!
    //! The default is off.
    //!
    bool setAllTensorsDebugState(bool flag) noexcept
    {
        return mImpl->setAllTensorsDebugState(flag);
    }

    //!
    //! \brief Turn the debug state of unfused tensors on or off.
    //!
    //! The default is off.
    //!
    //! \param flag true if turning on debug state, false if turning off debug state.
    //!
    //! \return true if successful, false otherwise.
    //!
    //! \see INetworkDefinition::markUnfusedTensorsAsDebugTensors()
    //!
    bool setUnfusedTensorsDebugState(bool flag) noexcept
    {
        return mImpl->setUnfusedTensorsDebugState(flag);
    }

    //!
    //! \brief Get the debug state of unfused tensors.
    //!
    //! \return true if unfused tensors debug state is on. False if unfused tensors debug state is off.
    //!
    bool getUnfusedTensorsDebugState() const noexcept
    {
        return mImpl->getUnfusedTensorsDebugState();
    }
#if ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
    //!
    //! \brief Check if a subsequent call to enqueueV3 is graph-capturable on the provided stream.
    //!
    //! \param stream The stream to check.
    //!
    //! \return true if a subsequent call to enqueueV3 is graph-capturable on the provided stream.
    //! Reasons why graph capture may fail include:
    //!  - blocking runtime allocation due to large dynamically sized tensors that cannot be
    //!     statically allocated,
    //!  - dynamically shaped tensors whose size contains on the tensor contents, like the output
    //!     of an INonZeroLayer,
    //!  - conditional control flow depending on the contents of on-device tensors, like an
    //!     ITripLimitLayer whose input tensor resides on the device,
    //!  - engines that have been built for weight streaming.
    //!
    //! \note If this API returns false, enqueueV3 may not be called on a capturable stream
    //! (i.e. users may not call cudaStreamBeingCapture before starting inference). Otherwise,
    //! inference will fail with an error message.
    bool isStreamCapturable(cudaStream_t stream) const noexcept {
        return mImpl->isStreamCapturable(stream);
    }
#endif // ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION

protected:
    apiv::VExecutionContext* mImpl;
}; // class IExecutionContext

//!
//! \enum LayerInformationFormat
//!
//! \brief The format in which the IEngineInspector prints the layer information.
//!
//! \see IEngineInspector::getLayerInformation(), IEngineInspector::getEngineInformation()
//!
enum class LayerInformationFormat : int32_t
{
    kONELINE = 0, //!< Print layer information in one line per layer.
    kJSON = 1,    //!< Print layer information in JSON format.
};

//! Maximum number of layer information formats in LayerInformationFormat enum.
//! \see LayerInformationFormat
template <>
constexpr inline int32_t EnumMax<LayerInformationFormat>() noexcept
{
    return 2;
}

//!
//! \class IEngineInspector
//!
//! \brief An engine inspector which prints out the layer information of an engine or an execution context.
//!
//! The amount of printed information depends on the profiling verbosity setting of the builder config when the engine
//! is built:
//! - ProfilingVerbosity::kLAYER_NAMES_ONLY: only layer names will be printed.
//! - ProfilingVerbosity::kNONE: no layer information will be printed.
//! - ProfilingVerbosity::kDETAILED: layer names and layer parameters will be printed.
//!
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
//!
//! \see ProfilingVerbosity, IEngineInspector
//!
class IEngineInspector : public INoCopy
{
public:
    virtual ~IEngineInspector() noexcept = default;

    //!
    //! \brief Set an execution context as the inspection source.
    //!
    //! Setting the execution context and specifying all the input shapes allows the inspector
    //! to calculate concrete dimensions for any dynamic shapes and display their format information.
    //! Otherwise, values dependent on input shapes will be displayed as -1 and format information
    //! will not be shown.
    //!
    //! Passing nullptr will remove any association with an execution context.
    //!
    //! \return Whether the action succeeds.
    //!
    bool setExecutionContext(IExecutionContext const* context) noexcept
    {
        return mImpl->setExecutionContext(context);
    }

    //!
    //! \brief Get the context currently being inspected.
    //!
    //! \return The pointer to the context currently being inspected.
    //!
    //! \see setExecutionContext()
    //!
    IExecutionContext const* getExecutionContext() const noexcept
    {
        return mImpl->getExecutionContext();
    }

    //!
    //! \brief Get a string describing the information about a specific layer in the current engine or the execution
    //!        context.
    //!
    //! \param layerIndex the index of the layer. It must lie in range [0, engine.getNbLayers()).
    //!
    //! \param format the format the layer information should be printed in.
    //!
    //! \return A null-terminated C-style string describing the information about a specific layer in the current
    //!         engine or the execution context.
    //!
    //! \warning The content of the returned string may change when another execution context has
    //!          been set, or when another getLayerInformation() or getEngineInformation() has been called.
    //!
    //! \warning In a multi-threaded environment, this function must be protected from other threads changing the
    //!          inspection source. If the inspection source changes, the data that is being pointed to can change.
    //!          Copy the string to another buffer before releasing the lock in order to guarantee consistency.
    //!
    //! \see LayerInformationFormat
    //!
    char const* getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
    {
        return mImpl->getLayerInformation(layerIndex, format);
    }

    //!
    //! \brief Get a string describing the information about all the layers in the current engine or the execution
    //!        context.
    //!
    //! \param format the format the layer information should be printed in.
    //!
    //! \return A null-terminated C-style string describing the information about all the layers in the current
    //!         engine or the execution context.
    //!
    //! \warning The content of the returned string may change when another execution context has
    //!          been set, or when another getLayerInformation() or getEngineInformation() has been called.
    //!
    //! \warning In a multi-threaded environment, this function must be protected from other threads changing the
    //!          inspection source. If the inspection source changes, the data that is being pointed to can change.
    //!          Copy the string to another buffer before releasing the lock in order to guarantee consistency.
    //!
    //! \see LayerInformationFormat
    //!
    char const* getEngineInformation(LayerInformationFormat format) const noexcept
    {
        return mImpl->getEngineInformation(format);
    }

    //!
    //! \brief Set the ErrorRecorder for this interface
    //!
    //! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
    //! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
    //! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
    //! a recorder has been registered.
    //!
    //! If an error recorder is not set, messages will be sent to the global log stream.
    //!
    //! \param recorder The error recorder to register with this interface.
    //!
    //! \see getErrorRecorder()
    //!
    void setErrorRecorder(IErrorRecorder* recorder) noexcept
    {
        mImpl->setErrorRecorder(recorder);
    }

    //!
    //! \brief Get the ErrorRecorder assigned to this interface.
    //!
    //! Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
    //! an error handler has not been set.
    //!
    //! \return A pointer to the IErrorRecorder object that has been registered.
    //!
    //! \see setErrorRecorder()
    //!
    IErrorRecorder* getErrorRecorder() const noexcept
    {
        return mImpl->getErrorRecorder();
    }

protected:
    apiv::VEngineInspector* mImpl;
}; // class IEngineInspector

} // namespace nvinfer1

//!
//! Internal C entry point for creating IRuntime.
//! @private
//!
extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int32_t version) noexcept;

//!
//! Internal C entry point for creating IRefitter.
//! @private
//!
extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int32_t version) noexcept;

//!
//! \brief Return the plugin registry
//!
extern "C" TENSORRTAPI nvinfer1::IPluginRegistry* getPluginRegistry() noexcept;

//!
//! \brief Return the logger object.
//! \note the global logger is used only by standalone functions which have no associated builder, runtime
//! or refitter.
//!
extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger() noexcept;

namespace nvinfer1
{
namespace // unnamed namespace avoids linkage surprises when linking objects built with different versions of this
          // header.
{
//!
//! \brief Create an instance of an IRuntime class.
//!
//! \param logger The logging class for the runtime.
//!
inline IRuntime* createInferRuntime(ILogger& logger) noexcept
{
    return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
}

//!
//! \brief Create an instance of an IRefitter class.
//!
//! \param engine The engine class for the refitter.
//! \param logger The logging class for the refitter.
//!
inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger) noexcept
{
    return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
}

} // namespace

//!
//! \brief Register the plugin creator to the registry
//! The static registry object will be instantiated when the plugin library is
//! loaded. This static object will register all creators available in the
//! library to the registry.
//!
//! \warning Statically registering plugins should be avoided in the automotive
//!  safety context as the application developer should first register an error recorder
//!  with the plugin registry via IPluginRegistry::setErrorRecorder() before using
//!  IPluginRegistry::registerCreator() or other methods.
//!
template <typename T>
class PluginRegistrar
{
public:
    PluginRegistrar()
    {
        getPluginRegistry()->registerCreator(instance, "");
    }

private:
    //! Plugin instance.
    T instance{};
};

} // namespace nvinfer1

#define REGISTER_TENSORRT_PLUGIN(name)                                                                                 \
    static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}

namespace nvinfer1
{
//!
//! \class ILoggerFinder
//!
//! \brief A virtual base class to find a logger.
//! Allows a plugin to find an instance of a logger if it needs to emit a log message.
//! A pointer to an instance of this class is passed to a plugin shared library on initialization when that plugin
//! is serialized as part of a version-compatible plan. See the plugin chapter in the developer guide for details.
//!
class ILoggerFinder
{
public:
    //!
    //! \brief Get the logger used by the engine or execution context which called the plugin method.
    //!
    //! \warning Must be called from the thread in which the plugin method was called.
    //!
    //! \return A pointer to the logger.
    //!
    virtual ILogger* findLogger() = 0;

protected:
    virtual ~ILoggerFinder() = default;
};

//! DO NOT REFER TO namespace v_1_0 IN CODE. ALWAYS USE nvinfer1 INSTEAD.
//! The name v_1_0 may change in future versions of TensorRT.
namespace v_1_0
{

class IGpuAsyncAllocator : public IGpuAllocator
{
public:
    IGpuAsyncAllocator() = default;
    ~IGpuAsyncAllocator() override = default;

    //!
    //! \brief A thread-safe callback implemented by the application to handle stream-ordered asynchronous
    //!        acquisition of GPU memory.
    //!
    //! \param size The size of the memory block required (in bytes).
    //! \param alignment The required alignment of memory. Alignment will be zero
    //!        or a power of 2 not exceeding the alignment guaranteed by cudaMalloc.
    //!        Thus this allocator can be safely implemented with cudaMalloc/cudaFree.
    //!        An alignment value of zero indicates any alignment is acceptable.
    //! \param flags Reserved for future use. In the current release, 0 will be passed.
    //!
    //! \param stream Specifies the cudastream for the asynchronous allocation. If nullptr or 0 is
    //!        passed, the default stream will be used.
    //!
    //! \return If the allocation was successful, the start address of a device memory block of the requested size.
    //!         If an allocation request of size 0 is made, nullptr must be returned.
    //!         If an allocation request cannot be satisfied, nullptr must be returned.
    //!         If a non-null address is returned, it is guaranteed to have the specified alignment.
    //!
    //! \note The implementation must guarantee thread safety for concurrent allocateAsync/deallocateAsync
    //! requests.
    //!
    //! \note The implementation is not required to be asynchronous. It is permitted to synchronize,
    //! albeit doing so will lose the performance advantage of asynchronous allocation.
    //!
    //! \usage
    //! - Allowed context for the API call
    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
    //!
    void* allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags,
        cudaStream_t /*stream*/) noexcept override = 0;

    //!
    //! \brief A thread-safe callback implemented by the application to handle stream-ordered asynchronous
    //! release of GPU memory.
    //!
    //! TensorRT may pass a nullptr to this function if it was previously returned by allocate().
    //!
    //! \param memory A memory address that was previously returned by an allocate() or reallocate() call of the same
    //! allocator object.
    //!
    //! \param stream Specifies the cudastream for the asynchronous deallocation. If nullptr or 0 is
    //!        passed, the default stream will be used.
    //!
    //! \return True if the acquired memory is released successfully.
    //!
    //! \note The implementation must guarantee thread safety for concurrent allocateAsync/deallocateAsync
    //! requests.
    //!
    //! \note The implementation is not required to be asynchronous. It is permitted to synchronize,
    //! albeit doing so will lose the performance advantage of asynchronous deallocation.
    //! Either way, it is critical that it not actually free the memory until the current
    //! stream position is reached.
    //!
    //! \usage
    //! - Allowed context for the API call
    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
    bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept override = 0;

    //!
    //! \brief A thread-safe callback implemented by the application to handle acquisition of GPU memory.
    //!
    //! \param size The size of the memory block required (in bytes).
    //! \param alignment The required alignment of memory. Alignment will be zero
    //!        or a power of 2 not exceeding the alignment guaranteed by cudaMalloc.
    //!        Thus this allocator can be safely implemented with cudaMalloc/cudaFree.
    //!        An alignment value of zero indicates any alignment is acceptable.
    //! \param flags Reserved for future use. In the current release, 0 will be passed.
    //!
    //! \return If the allocation was successful, the start address of a device memory block of the requested size.
    //!         If an allocation request of size 0 is made, nullptr must be returned.
    //!         If an allocation request cannot be satisfied, nullptr must be returned.
    //!         If a non-null address is returned, it is guaranteed to have the specified alignment.
    //!
    //! \note The implementation must guarantee thread safety for concurrent allocateAsync/deallocateAsync/reallocate
    //! requests.
    //!
    //! \usage
    //! - Allowed context for the API call
    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
    //! \deprecated Deprecated in TensorRT 10.0. Superseded by allocateAsync
    //!
    TRT_DEPRECATED void* allocate(
        uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
    {
        return allocateAsync(size, alignment, flags, nullptr);
    }

    //!
    //! \brief A thread-safe callback implemented by the application to handle release of GPU memory.
    //!
    //! TensorRT may pass a nullptr to this function if it was previously returned by allocate().
    //!
    //! \param memory A memory address that was previously returned by an allocate() or reallocate() call of the same
    //! allocator object.
    //!
    //! \return True if the acquired memory is released successfully.
    //!
    //! \note The implementation must guarantee thread safety for concurrent allocate/reallocate/deallocate
    //! requests.
    //!
    //! \usage
    //! - Allowed context for the API call
    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
    //! \deprecated Deprecated in TensorRT 10.0. Superseded by deallocateAsync
    //!
    TRT_DEPRECATED bool deallocate(void* const memory) noexcept override
    {
        return deallocateAsync(memory, nullptr);
    }

    //!
    //! \brief Return version information associated with this interface. Applications must not override this method.
    //!
    InterfaceInfo getInterfaceInfo() const noexcept override
    {
        return {"IGpuAllocator", 1, 0};
    }
};

class IPluginCreatorV3One : public IPluginCreatorInterface
{
public:
    //!
    //! \brief Return version information associated with this interface. Applications must not override this method.
    //!
    InterfaceInfo getInterfaceInfo() const noexcept override
    {
        return InterfaceInfo{"PLUGIN CREATOR_V3ONE", 1, 0};
    }

    //!
    //! \brief Return a plugin object. Return nullptr in case of error.
    //!
    //! \param name A NULL-terminated name string of length 1024 or less, including the NULL terminator.
    //! \param fc A pointer to a collection of fields needed for constructing the plugin.
    //! \param phase The TensorRT phase in which the plugin is being created
    //!
    //! When the phase is TensorRTPhase::kRUNTIME, the PluginFieldCollection provided for serialization by the plugin's
    //! runtime interface will be passed as fc.
    //!
    //! \note The returned plugin object must be in an initialized state
    //!
    //! \note If invoked by the user (e.g. with TensorRTPhase::kBUILD, to add to the network defintion with
    //! addPluginV3()), it is the user's responsibility to delete the plugin object. If invoked by TensorRT (e.g. during
    //! engine deserialization), TensorRT will delete any objects it creates.
    //!
    virtual IPluginV3* createPlugin(
        AsciiChar const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept = 0;

    //!
    //! \brief Return a list of fields that need to be passed to createPlugin() when creating a plugin for use in the
    //! TensorRT build phase.
    //!
    //! \see PluginFieldCollection
    //!
    virtual PluginFieldCollection const* getFieldNames() noexcept = 0;

    //!
    //! \brief Return the plugin name.
    //!
    //! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including
    //! the NULL terminator.
    //!
    virtual AsciiChar const* getPluginName() const noexcept = 0;

    //!
    //! \brief Return the plugin version.
    //!
    //! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including
    //! the NULL terminator.
    //!
    virtual AsciiChar const* getPluginVersion() const noexcept = 0;

    //!
    //! \brief Return the plugin namespace.
    //!
    //! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including
    //! the NULL terminator.
    //!
    virtual AsciiChar const* getPluginNamespace() const noexcept = 0;

    IPluginCreatorV3One() = default;
    virtual ~IPluginCreatorV3One() = default;

protected:
    IPluginCreatorV3One(IPluginCreatorV3One const&) = default;
    IPluginCreatorV3One(IPluginCreatorV3One&&) = default;
    IPluginCreatorV3One& operator=(IPluginCreatorV3One const&) & = default;
    IPluginCreatorV3One& operator=(IPluginCreatorV3One&&) & = default;
};

} // namespace v_1_0

//!
//! \class IGpuAsyncAllocator
//!
//! \brief Application-implemented class for controlling asynchronous (stream ordered) memory allocation on the GPU.
//!
//! \warning The lifetime of an IGpuAsyncAllocator object must exceed that of all objects that use it.
//!
//! The advantage of deriving from IGpuAsyncAllocator instead of IGpuAllocator is that you only have
//! to override two methods: allocateAsync() and deallocateAsync() to implement an allocator with
//! asynchronous capability, whereas deriving from IGpuAllocator requires overriding four methods,
//! including two deprecated methods.
//!
//! \see IGpuAllocator
using IGpuAsyncAllocator = v_1_0::IGpuAsyncAllocator;

//!
//! \class IPluginCreatorV3One
//!
//! \brief A plugin creator class capable of producing IPluginV3 objects
//!
//! \see IPluginV3
//! \see IPluginRegistry
//!
using IPluginCreatorV3One = v_1_0::IPluginCreatorV3One;

} // namespace nvinfer1

//!
//! \brief Return the library major version number.
//!
extern "C" TENSORRTAPI int32_t getInferLibMajorVersion() noexcept;
//!
//! \brief Return the library minor version number.
//!
extern "C" TENSORRTAPI int32_t getInferLibMinorVersion() noexcept;
//!
//! \brief Return the library patch version number.
//!
extern "C" TENSORRTAPI int32_t getInferLibPatchVersion() noexcept;
//!
//! \brief Return the library build version number.
//!
extern "C" TENSORRTAPI int32_t getInferLibBuildVersion() noexcept;

#endif // NV_INFER_RUNTIME_H