5769 lines
229 KiB
C
5769 lines
229 KiB
C
|
|
/*
|
|||
|
|
* SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|||
|
|
* SPDX-License-Identifier: Apache-2.0
|
|||
|
|
*
|
|||
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|||
|
|
* you may not use this file except in compliance with the License.
|
|||
|
|
* You may obtain a copy of the License at
|
|||
|
|
*
|
|||
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|||
|
|
*
|
|||
|
|
* Unless required by applicable law or agreed to in writing, software
|
|||
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|||
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
|
|
* See the License for the specific language governing permissions and
|
|||
|
|
* limitations under the License.
|
|||
|
|
*/
|
|||
|
|
|
|||
|
|
#ifndef NV_INFER_RUNTIME_H
|
|||
|
|
#define NV_INFER_RUNTIME_H
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \file NvInferRuntime.h
|
|||
|
|
//!
|
|||
|
|
//! This is the top-level API file for TensorRT extended runtime library.
|
|||
|
|
//!
|
|||
|
|
|
|||
|
|
#include "NvInferImpl.h" // IWYU pragma: export
|
|||
|
|
#define NV_INFER_INTERNAL_INCLUDE 1
|
|||
|
|
#include "NvInferPluginBase.h" // IWYU pragma: export
|
|||
|
|
#undef NV_INFER_INTERNAL_INCLUDE
|
|||
|
|
#include "NvInferRuntimeCommon.h" // IWYU pragma: export
|
|||
|
|
|
|||
|
|
namespace nvinfer1
|
|||
|
|
{
|
|||
|
|
|
|||
|
|
class IExecutionContext; //!< Forward declaration of IExecutionContext for use by other interfaces.
|
|||
|
|
class ICudaEngine; //!< Forward declaration of ICudaEngine for use by other interfaces.
|
|||
|
|
class IPluginFactory; //!< Forward declaration of IPluginFactory for use by other interfaces.
|
|||
|
|
class IEngineInspector; //!< Forward declaration of IEngineInspector for use by other interfaces.
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class INoCopy
|
|||
|
|
//!
|
|||
|
|
//! \brief Base class for all TensorRT interfaces that are implemented by the TensorRT libraries
|
|||
|
|
//!
|
|||
|
|
//! Objects of such classes are not movable or copyable, and should only be manipulated
|
|||
|
|
//! via pointers.
|
|||
|
|
//!
|
|||
|
|
|
|||
|
|
class INoCopy
|
|||
|
|
{
|
|||
|
|
protected:
|
|||
|
|
INoCopy() = default;
|
|||
|
|
virtual ~INoCopy() = default;
|
|||
|
|
INoCopy(INoCopy const& other) = delete;
|
|||
|
|
INoCopy& operator=(INoCopy const& other) = delete;
|
|||
|
|
INoCopy(INoCopy&& other) = delete;
|
|||
|
|
INoCopy& operator=(INoCopy&& other) = delete;
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \enum EngineCapability
|
|||
|
|
//!
|
|||
|
|
//! \brief List of supported engine capability flows.
|
|||
|
|
//!
|
|||
|
|
//! \details The EngineCapability determines the restrictions of a network during build time and what runtime
|
|||
|
|
//! it targets. When BuilderFlag::kSAFETY_SCOPE is not set (by default), EngineCapability::kSTANDARD does not provide
|
|||
|
|
//! any restrictions on functionality and the resulting serialized engine can be executed with TensorRT's standard
|
|||
|
|
//! runtime APIs in the nvinfer1 namespace. EngineCapability::kSAFETY provides a restricted subset of network
|
|||
|
|
//! operations that are safety certified and the resulting serialized engine can be executed with TensorRT's safe
|
|||
|
|
//! runtime APIs in the nvinfer1::safe namespace. EngineCapability::kDLA_STANDALONE provides a restricted subset of
|
|||
|
|
//! network operations that are DLA compatible and the resulting serialized engine can be executed using standalone
|
|||
|
|
//! DLA runtime APIs. See sampleCudla for an example of integrating cuDLA APIs with TensorRT APIs.
|
|||
|
|
//!
|
|||
|
|
enum class EngineCapability : int32_t
|
|||
|
|
{
|
|||
|
|
//!
|
|||
|
|
//! Standard: TensorRT flow without targeting the safety runtime.
|
|||
|
|
//! This flow supports both DeviceType::kGPU and DeviceType::kDLA.
|
|||
|
|
//!
|
|||
|
|
kSTANDARD = 0,
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! Safety: TensorRT flow with restrictions targeting the safety runtime.
|
|||
|
|
//! See safety documentation for list of supported layers and formats.
|
|||
|
|
//! This flow supports only DeviceType::kGPU.
|
|||
|
|
//!
|
|||
|
|
//! This flag is only supported in NVIDIA Drive(R) products.
|
|||
|
|
kSAFETY = 1,
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! DLA Standalone: TensorRT flow with restrictions targeting external, to TensorRT, DLA runtimes.
|
|||
|
|
//! See DLA documentation for list of supported layers and formats.
|
|||
|
|
//! This flow supports only DeviceType::kDLA.
|
|||
|
|
//!
|
|||
|
|
kDLA_STANDALONE = 2,
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
namespace impl
|
|||
|
|
{
|
|||
|
|
//! Maximum number of elements in EngineCapability enum. \see EngineCapability
|
|||
|
|
template <>
|
|||
|
|
struct EnumMaxImpl<EngineCapability>
|
|||
|
|
{
|
|||
|
|
static constexpr int32_t kVALUE = 3;
|
|||
|
|
};
|
|||
|
|
} // namespace impl
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class Weights
|
|||
|
|
//!
|
|||
|
|
//! \brief An array of weights used as a layer parameter.
|
|||
|
|
//!
|
|||
|
|
//! When using the DLA, the cumulative size of all Weights used in a network
|
|||
|
|
//! must be less than 512MB in size. If the build option kGPU_FALLBACK is specified,
|
|||
|
|
//! then multiple DLA sub-networks may be generated from the single original network.
|
|||
|
|
//!
|
|||
|
|
//! The weights are held by reference until the engine has been built. Therefore the data referenced
|
|||
|
|
//! by \p values field should be preserved until the build is complete.
|
|||
|
|
//!
|
|||
|
|
//! The term "empty weights" refers to Weights with weight coefficients ( \p count == 0 and \p values == nullptr).
|
|||
|
|
//!
|
|||
|
|
class Weights
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
DataType type; //!< The type of the weights.
|
|||
|
|
void const* values; //!< The weight values, in a contiguous array.
|
|||
|
|
int64_t count; //!< The number of weights in the array.
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IHostMemory
|
|||
|
|
//!
|
|||
|
|
//! \brief Class to handle library allocated memory that is accessible to the user.
|
|||
|
|
//!
|
|||
|
|
//! The memory allocated via the host memory object is owned by the library and will
|
|||
|
|
//! be de-allocated when the destroy method is called.
|
|||
|
|
//!
|
|||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
|||
|
|
//!
|
|||
|
|
class IHostMemory : public INoCopy
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
virtual ~IHostMemory() noexcept = default;
|
|||
|
|
|
|||
|
|
//! A pointer to the raw data that is owned by the library.
|
|||
|
|
void* data() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->data();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//! The size in bytes of the data that was allocated.
|
|||
|
|
std::size_t size() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->size();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//! The type of the memory that was allocated.
|
|||
|
|
DataType type() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->type();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
protected:
|
|||
|
|
apiv::VHostMemory* mImpl;
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \enum DimensionOperation
|
|||
|
|
//!
|
|||
|
|
//! \brief An operation on two IDimensionExpr, which represent integer expressions used in dimension computations.
|
|||
|
|
//!
|
|||
|
|
//! For example, given two IDimensionExpr x and y and an IExprBuilder& eb,
|
|||
|
|
//! eb.operation(DimensionOperation::kSUM, x, y) creates a representation of x+y.
|
|||
|
|
//!
|
|||
|
|
//! \see IDimensionExpr, IExprBuilder
|
|||
|
|
//!
|
|||
|
|
enum class DimensionOperation : int32_t
|
|||
|
|
{
|
|||
|
|
kSUM = 0, //!< Sum of the two operands.
|
|||
|
|
kPROD = 1, //!< Product of the two operands.
|
|||
|
|
kMAX = 2, //!< Maximum of the two operands.
|
|||
|
|
kMIN = 3, //!< Minimum of the two operands.
|
|||
|
|
kSUB = 4, //!< Substract the second element from the first.
|
|||
|
|
kEQUAL = 5, //!< 1 if operands are equal, 0 otherwise.
|
|||
|
|
kLESS = 6, //!< 1 if first operand is less than second operand, 0 otherwise.
|
|||
|
|
kFLOOR_DIV = 7, //!< Floor division of the first element by the second.
|
|||
|
|
kCEIL_DIV = 8 //!< Division rounding up
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//! Maximum number of elements in DimensionOperation enum. \see DimensionOperation
|
|||
|
|
template <>
|
|||
|
|
constexpr inline int32_t EnumMax<DimensionOperation>() noexcept
|
|||
|
|
{
|
|||
|
|
return 9;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \enum TensorLocation
|
|||
|
|
//!
|
|||
|
|
//! \brief The location for tensor data storage, device or host.
|
|||
|
|
//!
|
|||
|
|
enum class TensorLocation : int32_t
|
|||
|
|
{
|
|||
|
|
kDEVICE = 0, //!< Data stored on device.
|
|||
|
|
kHOST = 1, //!< Data stored on host.
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
namespace impl
|
|||
|
|
{
|
|||
|
|
//! Maximum number of elements in TensorLocation enum. \see TensorLocation
|
|||
|
|
template <>
|
|||
|
|
struct EnumMaxImpl<TensorLocation>
|
|||
|
|
{
|
|||
|
|
static constexpr int32_t kVALUE = 2;
|
|||
|
|
};
|
|||
|
|
} // namespace impl
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IDimensionExpr
|
|||
|
|
//!
|
|||
|
|
//! \brief An IDimensionExpr represents an integer expression constructed from constants,
|
|||
|
|
//! input dimensions, and binary operations. These expressions are can be used
|
|||
|
|
//! in overrides of IPluginV2DynamicExt::getOutputDimensions or IPluginV3OneBuild::getOutputShapes() to define output
|
|||
|
|
//! dimensions in terms of input dimensions.
|
|||
|
|
//!
|
|||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
|||
|
|
//!
|
|||
|
|
//! \see DimensionOperation, IPluginV2DynamicExt::getOutputDimensions, IPluginV3OneBuild::getOutputShapes()
|
|||
|
|
//!
|
|||
|
|
class IDimensionExpr : public INoCopy
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
//!
|
|||
|
|
//! \brief Return true if expression is a build-time constant.
|
|||
|
|
//!
|
|||
|
|
bool isConstant() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->isConstant();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the value of the constant.
|
|||
|
|
//!
|
|||
|
|
//! If isConstant(), returns value of the constant.
|
|||
|
|
//! If !isConstant(), return std::numeric_limits<int64_t>::min().
|
|||
|
|
//!
|
|||
|
|
int64_t getConstantValue() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getConstantValue();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
protected:
|
|||
|
|
apiv::VDimensionExpr* mImpl;
|
|||
|
|
virtual ~IDimensionExpr() noexcept = default;
|
|||
|
|
|
|||
|
|
public:
|
|||
|
|
//!
|
|||
|
|
//! \brief Return true if this denotes the value of a size tensor.
|
|||
|
|
//!
|
|||
|
|
//! \return True if this was created with method IExprBuilder::declareSizeTensor, false otherwise
|
|||
|
|
//!
|
|||
|
|
bool isSizeTensor() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->isSizeTensor();
|
|||
|
|
}
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IExprBuilder
|
|||
|
|
//!
|
|||
|
|
//! \brief Object for constructing IDimensionExpr.
|
|||
|
|
//!
|
|||
|
|
//! There is no public way to construct an IExprBuilder. It appears as an argument to
|
|||
|
|
//! method IPluginV2DynamicExt::getOutputDimensions() and IPluginV3OneBuild::getOutputShapes(). Overrides of that
|
|||
|
|
//! method can use that IExprBuilder argument to construct expressions that define output dimensions in terms of input
|
|||
|
|
//! dimensions.
|
|||
|
|
//!
|
|||
|
|
//! Clients should assume that any values constructed by the IExprBuilder are destroyed
|
|||
|
|
//! after IPluginV2DynamicExt::getOutputDimensions() or IPluginV3OneBuild::getOutputShapes() returns.
|
|||
|
|
//!
|
|||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
|||
|
|
//!
|
|||
|
|
//! \see IDimensionExpr
|
|||
|
|
//!
|
|||
|
|
class IExprBuilder : public INoCopy
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
//!
|
|||
|
|
//! \brief Return pointer to IDimensionExpr for given value.
|
|||
|
|
//!
|
|||
|
|
IDimensionExpr const* constant(int64_t value) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->constant(value);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the operation.
|
|||
|
|
//!
|
|||
|
|
//! Return pointer to IDimensionExpr that represents the given operation applied to first and second.
|
|||
|
|
//! Returns nullptr if op is not a valid DimensionOperation.
|
|||
|
|
//!
|
|||
|
|
IDimensionExpr const* operation(
|
|||
|
|
DimensionOperation op, IDimensionExpr const& first, IDimensionExpr const& second) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->operation(op, first, second);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
protected:
|
|||
|
|
apiv::VExprBuilder* mImpl;
|
|||
|
|
virtual ~IExprBuilder() noexcept = default;
|
|||
|
|
|
|||
|
|
public:
|
|||
|
|
//!
|
|||
|
|
//! \brief Declare a size tensor at the given output index, with the specified auto-tuning formula and upper bound.
|
|||
|
|
//!
|
|||
|
|
//! A size tensor allows a plugin to have output dimensions that cannot be computed solely from input dimensions.
|
|||
|
|
//! For example, suppose a plugin implements the equivalent of INonZeroLayer for 2D input. The plugin can
|
|||
|
|
//! have one output for the indices of non-zero elements, and a second output containing the number of non-zero
|
|||
|
|
//! elements. Suppose the input has size [M,N] and has K non-zero elements. The plugin can write K to the second
|
|||
|
|
//! output. When telling TensorRT that the first output has shape [2,K], plugin uses IExprBuilder::constant() and
|
|||
|
|
//! IExprBuilder::declareSizeTensor(1,...) to create the IDimensionExpr that respectively denote 2 and K.
|
|||
|
|
//!
|
|||
|
|
//! TensorRT also needs to know the value of K to use for auto-tuning and an upper bound on K so that it can
|
|||
|
|
//! allocate memory for the output tensor. In the example, supposed typically half of the plugin's input elements
|
|||
|
|
//! are non-zero, and all the elements might be nonzero. then using M*N/2 might be a good expression for the opt
|
|||
|
|
//! parameter, and M*N for the upper bound. IDimensionsExpr for these expressions can be constructed from
|
|||
|
|
//! IDimensionsExpr for the input dimensions.
|
|||
|
|
//!
|
|||
|
|
//! \param outputIndex index of a plugin output that is a size tensor.
|
|||
|
|
//! \param opt formula for computing auto-tuning value. Must not depend on a size tensor.
|
|||
|
|
//! \param upper Upper bound on the size tensor.
|
|||
|
|
//!
|
|||
|
|
//! \return IDimensionExpr denoting the value of the size tensor.
|
|||
|
|
//!
|
|||
|
|
//! \see IPluginV3OneBuild::getOutputShapes()
|
|||
|
|
//!
|
|||
|
|
IDimensionExpr const* declareSizeTensor(int32_t outputIndex, IDimensionExpr const& opt, IDimensionExpr const& upper)
|
|||
|
|
{
|
|||
|
|
return mImpl->declareSizeTensor(outputIndex, opt, upper);
|
|||
|
|
}
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class DimsExprs
|
|||
|
|
//!
|
|||
|
|
//! \brief Analog of class Dims with expressions instead of constants for the dimensions.
|
|||
|
|
//!
|
|||
|
|
class DimsExprs
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
int32_t nbDims; //!< The number of dimensions.
|
|||
|
|
IDimensionExpr const* d[Dims::MAX_DIMS]; //!< The extent of each dimension.
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \struct DynamicPluginTensorDesc
|
|||
|
|
//!
|
|||
|
|
//! \brief Summarizes tensors that a plugin might see for an input or output.
|
|||
|
|
//!
|
|||
|
|
struct DynamicPluginTensorDesc
|
|||
|
|
{
|
|||
|
|
//! Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of any runtime dimension.
|
|||
|
|
PluginTensorDesc desc;
|
|||
|
|
|
|||
|
|
//! Lower bounds on tensor’s dimensions
|
|||
|
|
Dims min;
|
|||
|
|
|
|||
|
|
//! Upper bounds on tensor’s dimensions
|
|||
|
|
Dims max;
|
|||
|
|
|
|||
|
|
//! Optimum value of tensor’s dimensions specified for auto-tuning
|
|||
|
|
Dims opt;
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IPluginV2DynamicExt
|
|||
|
|
//!
|
|||
|
|
//! \brief Similar to IPluginV2Ext, but with support for dynamic shapes.
|
|||
|
|
//!
|
|||
|
|
//! Clients should override the public methods, including the following inherited methods:
|
|||
|
|
//!
|
|||
|
|
//! * virtual int32_t getNbOutputs() const noexcept = 0;
|
|||
|
|
//!
|
|||
|
|
//! * virtual DataType getOutputDataType(int32_t index, DataType const* inputTypes,
|
|||
|
|
//! int32_t nbInputs) const noexcept = 0;
|
|||
|
|
//!
|
|||
|
|
//! * virtual size_t getSerializationSize() const noexcept = 0;
|
|||
|
|
//!
|
|||
|
|
//! * virtual void serialize(void* buffer) const noexcept = 0;
|
|||
|
|
//!
|
|||
|
|
//! * virtual void destroy() noexcept = 0;
|
|||
|
|
//!
|
|||
|
|
//! * virtual void setPluginNamespace(char const* pluginNamespace) noexcept = 0;
|
|||
|
|
//!
|
|||
|
|
//! * virtual char const* getPluginNamespace() const noexcept = 0;
|
|||
|
|
//!
|
|||
|
|
//! For weakly typed networks, the inputTypes will always be DataType::kFLOAT or DataType::kINT32,
|
|||
|
|
//! and the returned type is canonicalized to DataType::kFLOAT if it is DataType::kHALF or DataType:kINT8.
|
|||
|
|
//! For strongly typed networks, inputTypes are inferred from previous operations, and getOutputDataType
|
|||
|
|
//! specifies the returned type based on the inputTypes.
|
|||
|
|
//! Details about the floating-point precision are elicited later by method supportsFormatCombination.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.0. Please implement IPluginV3 instead.
|
|||
|
|
//!
|
|||
|
|
class TRT_DEPRECATED IPluginV2DynamicExt : public nvinfer1::IPluginV2Ext
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
IPluginV2DynamicExt* clone() const noexcept override = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get expressions for computing dimensions of an output tensor from dimensions of the input tensors.
|
|||
|
|
//!
|
|||
|
|
//! \param outputIndex The index of the output tensor
|
|||
|
|
//! \param inputs Expressions for dimensions of the input tensors
|
|||
|
|
//! \param nbInputs The number of input tensors
|
|||
|
|
//! \param exprBuilder Object for generating new expressions
|
|||
|
|
//!
|
|||
|
|
//! This function is called by the implementations of IBuilder during analysis of the network.
|
|||
|
|
//!
|
|||
|
|
//! Example #1: A plugin has a single output that transposes the last two dimensions of the plugin's single input.
|
|||
|
|
//! The body of the override of getOutputDimensions can be:
|
|||
|
|
//!
|
|||
|
|
//! DimsExprs output(inputs[0]);
|
|||
|
|
//! std::swap(output.d[output.nbDims-1], output.d[output.nbDims-2]);
|
|||
|
|
//! return output;
|
|||
|
|
//!
|
|||
|
|
//! Example #2: A plugin concatenates its two inputs along the first dimension.
|
|||
|
|
//! The body of the override of getOutputDimensions can be:
|
|||
|
|
//!
|
|||
|
|
//! DimsExprs output(inputs[0]);
|
|||
|
|
//! output.d[0] = exprBuilder.operation(DimensionOperation::kSUM, *inputs[0].d[0], *inputs[1].d[0]);
|
|||
|
|
//! return output;
|
|||
|
|
//!
|
|||
|
|
virtual DimsExprs getOutputDimensions(
|
|||
|
|
int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Limit on number of format combinations accepted.
|
|||
|
|
//!
|
|||
|
|
static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return true if plugin supports the format and datatype for the input/output indexed by pos.
|
|||
|
|
//!
|
|||
|
|
//! For this method inputs are numbered 0..(nbInputs-1) and outputs are numbered nbInputs..(nbInputs+nbOutputs-1).
|
|||
|
|
//! Using this numbering, pos is an index into InOut, where 0 <= pos < nbInputs+nbOutputs.
|
|||
|
|
//!
|
|||
|
|
//! TensorRT invokes this method to ask if the input/output indexed by pos supports the format/datatype specified
|
|||
|
|
//! by inOut[pos].format and inOut[pos].type. The override should return true if that format/datatype at inOut[pos]
|
|||
|
|
//! are supported by the plugin. If support is conditional on other input/output formats/datatypes, the plugin can
|
|||
|
|
//! make its result conditional on the formats/datatypes in inOut[0..pos-1], which will be set to values
|
|||
|
|
//! that the plugin supports. The override should not inspect inOut[pos+1..nbInputs+nbOutputs-1],
|
|||
|
|
//! which will have invalid values. In other words, the decision for pos must be based on inOut[0..pos] only.
|
|||
|
|
//!
|
|||
|
|
//! Some examples:
|
|||
|
|
//!
|
|||
|
|
//! * A definition for a plugin that supports only FP16 NCHW:
|
|||
|
|
//!
|
|||
|
|
//! return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kHALF;
|
|||
|
|
//!
|
|||
|
|
//! * A definition for a plugin that supports only FP16 NCHW for its two inputs,
|
|||
|
|
//! and FP32 NCHW for its single output:
|
|||
|
|
//!
|
|||
|
|
//! return inOut[pos].format == TensorFormat::kLINEAR && (inOut[pos].type == (pos < 2 ? DataType::kHALF :
|
|||
|
|
//! DataType::kFLOAT));
|
|||
|
|
//!
|
|||
|
|
//! * A definition for a "polymorphic" plugin with two inputs and one output that supports
|
|||
|
|
//! any format or type, but the inputs and output must have the same format and type:
|
|||
|
|
//!
|
|||
|
|
//! return pos == 0 || (inOut[pos].format == inOut.format[0] && inOut[pos].type == inOut[0].type);
|
|||
|
|
//!
|
|||
|
|
//! Warning: TensorRT will stop asking for formats once it finds kFORMAT_COMBINATION_LIMIT on combinations.
|
|||
|
|
//!
|
|||
|
|
virtual bool supportsFormatCombination(
|
|||
|
|
int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Configure the plugin.
|
|||
|
|
//!
|
|||
|
|
//! configurePlugin() can be called multiple times in both the build and execution phases. The build phase happens
|
|||
|
|
//! before initialize() is called and only occurs during creation of an engine by IBuilder. The execution phase
|
|||
|
|
//! happens after initialize() is called and occurs during both creation of an engine by IBuilder and execution
|
|||
|
|
//! of an engine by IExecutionContext.
|
|||
|
|
//!
|
|||
|
|
//! Build phase:
|
|||
|
|
//! IPluginV2DynamicExt->configurePlugin is called when a plugin is being prepared for profiling but not for any
|
|||
|
|
//! specific input size. This provides an opportunity for the plugin to make algorithmic choices on the basis of
|
|||
|
|
//! input and output formats, along with the bound of possible dimensions. The min and max value of the
|
|||
|
|
//! DynamicPluginTensorDesc correspond to the kMIN and kMAX value of the current profile that the plugin is being
|
|||
|
|
//! profiled for, with the desc.dims field corresponding to the dimensions of plugin specified at network creation.
|
|||
|
|
//! Wildcard dimensions will exist during this phase in the desc.dims field.
|
|||
|
|
//!
|
|||
|
|
//! Execution phase:
|
|||
|
|
//! IPluginV2DynamicExt->configurePlugin is called when a plugin is being prepared for executing the plugin for a
|
|||
|
|
//! specific dimensions. This provides an opportunity for the plugin to change algorithmic choices based on the
|
|||
|
|
//! explicit input dimensions stored in desc.dims field.
|
|||
|
|
//! * IBuilder will call this function once per profile, with desc.dims resolved to the values specified by the
|
|||
|
|
//! kOPT
|
|||
|
|
//! field of the current profile. Wildcard dimensions will not exist during this phase.
|
|||
|
|
//! * IExecutionContext will call this during the next subsequent instance enqueue[V2]() or execute[V2]() if:
|
|||
|
|
//! - The batch size is changed from previous call of execute()/enqueue() if hasImplicitBatchDimension() returns
|
|||
|
|
//! true.
|
|||
|
|
//! - The optimization profile is changed via setOptimizationProfileAsync().
|
|||
|
|
//! - An input execution binding is changed via setInputShape().
|
|||
|
|
//! \warning The execution phase is timing critical during IExecutionContext but is not part of the timing loop when
|
|||
|
|
//! called from IBuilder. Performance bottlenecks of configurePlugin won't show up during engine building but will
|
|||
|
|
//! be visible during execution after calling functions that trigger layer resource updates.
|
|||
|
|
//!
|
|||
|
|
//! \param in The input tensors attributes that are used for configuration.
|
|||
|
|
//! \param nbInputs Number of input tensors.
|
|||
|
|
//! \param out The output tensors attributes that are used for configuration.
|
|||
|
|
//! \param nbOutputs Number of output tensors.
|
|||
|
|
//!
|
|||
|
|
virtual void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
|
|||
|
|
DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Find the workspace size required by the layer.
|
|||
|
|
//!
|
|||
|
|
//! This function is called after the plugin is configured, and possibly during execution.
|
|||
|
|
//! The result should be a sufficient workspace size to deal with inputs and outputs of the given size
|
|||
|
|
//! or any smaller problem.
|
|||
|
|
//!
|
|||
|
|
//! \return The workspace size.
|
|||
|
|
//!
|
|||
|
|
virtual size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs,
|
|||
|
|
int32_t nbOutputs) const noexcept = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Execute the layer.
|
|||
|
|
//!
|
|||
|
|
//! \param inputDesc how to interpret the memory for the input tensors.
|
|||
|
|
//! \param outputDesc how to interpret the memory for the output tensors.
|
|||
|
|
//! \param inputs The memory for the input tensors.
|
|||
|
|
//! \param outputs The memory for the output tensors.
|
|||
|
|
//! \param workspace Workspace for execution.
|
|||
|
|
//! \param stream The stream in which to execute the kernels.
|
|||
|
|
//!
|
|||
|
|
//! \return 0 for success, else non-zero (which will cause engine termination).
|
|||
|
|
//!
|
|||
|
|
virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
|
|||
|
|
void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
|
|||
|
|
|
|||
|
|
protected:
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the API version with which this plugin was built. The
|
|||
|
|
//! upper byte reserved by TensorRT and is used to differentiate this from IPluginV2.
|
|||
|
|
//!
|
|||
|
|
//! Do not override this method as it is used by the TensorRT library to maintain backwards-compatibility with
|
|||
|
|
//! plugins.
|
|||
|
|
//!
|
|||
|
|
int32_t getTensorRTVersion() const noexcept override
|
|||
|
|
{
|
|||
|
|
return (static_cast<int32_t>(PluginVersion::kV2_DYNAMICEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
virtual ~IPluginV2DynamicExt() noexcept {}
|
|||
|
|
|
|||
|
|
private:
|
|||
|
|
// Following are obsolete base class methods, and must not be implemented or used.
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set plugin configuration
|
|||
|
|
//!
|
|||
|
|
void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,
|
|||
|
|
bool const*, PluginFormat, int32_t) noexcept override final
|
|||
|
|
{
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Check if provided data type is supported
|
|||
|
|
//!
|
|||
|
|
bool supportsFormat(DataType, PluginFormat) const noexcept override final
|
|||
|
|
{
|
|||
|
|
return false;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get output dimensions.
|
|||
|
|
//!
|
|||
|
|
Dims getOutputDimensions(int32_t, Dims const*, int32_t) noexcept override final
|
|||
|
|
{
|
|||
|
|
return Dims{-1, {}};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Is output broadcasted across batch.
|
|||
|
|
//!
|
|||
|
|
//! \warning Expected to return false as implicit batch support was removed in TensorRT 10.0.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.0. Implicit batch support is removed in TensorRT 10.0.
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED bool isOutputBroadcastAcrossBatch(int32_t, bool const*, int32_t) const noexcept override final
|
|||
|
|
{
|
|||
|
|
return false;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Can output broadcasted across batch.
|
|||
|
|
//!
|
|||
|
|
//! \warning Expected to return false as implicit batch support was removed in TensorRT 10.0.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.0. Implicit batch support is removed in TensorRT 10.0.
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED bool canBroadcastInputAcrossBatch(int32_t) const noexcept override final
|
|||
|
|
{
|
|||
|
|
return true;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get required workspace size in bytes.
|
|||
|
|
//!
|
|||
|
|
size_t getWorkspaceSize(int32_t) const noexcept override final
|
|||
|
|
{
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Run inference.
|
|||
|
|
//!
|
|||
|
|
int32_t enqueue(int32_t, void const* const*, void* const*, void*, cudaStream_t) noexcept override final
|
|||
|
|
{
|
|||
|
|
return 1;
|
|||
|
|
}
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
namespace v_1_0
|
|||
|
|
{
|
|||
|
|
class IStreamReader : public IVersionedInterface
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
//!
|
|||
|
|
//! TensorRT never calls the destructor for an IStreamReader defined by the
|
|||
|
|
//! application.
|
|||
|
|
//!
|
|||
|
|
~IStreamReader() override = default;
|
|||
|
|
IStreamReader() = default;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return version information associated with this interface. Applications must not override this method.
|
|||
|
|
//!
|
|||
|
|
InterfaceInfo getInterfaceInfo() const noexcept override
|
|||
|
|
{
|
|||
|
|
return InterfaceInfo{"IStreamReader", 1, 0};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Read the next number of bytes in the stream.
|
|||
|
|
//!
|
|||
|
|
//! \param destination The memory to write to
|
|||
|
|
//! \param nbBytes The number of bytes to read
|
|||
|
|
//!
|
|||
|
|
//! \returns The number of bytes read. Negative values will be considered an automatic error.
|
|||
|
|
//!
|
|||
|
|
virtual int64_t read(void* destination, int64_t nbBytes) = 0;
|
|||
|
|
|
|||
|
|
protected:
|
|||
|
|
IStreamReader(IStreamReader const&) = default;
|
|||
|
|
IStreamReader(IStreamReader&&) = default;
|
|||
|
|
IStreamReader& operator=(IStreamReader const&) & = default;
|
|||
|
|
IStreamReader& operator=(IStreamReader&&) & = default;
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
class IStreamWriter : public IVersionedInterface
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
//!
|
|||
|
|
//! TensorRT never calls the destructor for an IStreamWriter defined by the
|
|||
|
|
//! application.
|
|||
|
|
//!
|
|||
|
|
~IStreamWriter() override = default;
|
|||
|
|
IStreamWriter() = default;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return version information associated with this interface. Applications must not override this method.
|
|||
|
|
//!
|
|||
|
|
InterfaceInfo getInterfaceInfo() const noexcept final
|
|||
|
|
{
|
|||
|
|
return InterfaceInfo{"IStreamWriter", 1, 0};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief write nbBytes of data into the stream.
|
|||
|
|
//!
|
|||
|
|
//! \param data The data to be written to stream
|
|||
|
|
//! \param nbBytes The number of bytes to write
|
|||
|
|
//!
|
|||
|
|
//! \returns The number of bytes written. A value that is negative or less than nBytes indicates that an error
|
|||
|
|
//! occurred and TensorRT will give up on writing to the stream.
|
|||
|
|
//!
|
|||
|
|
virtual int64_t write(void const* data, int64_t nbBytes) = 0;
|
|||
|
|
|
|||
|
|
protected:
|
|||
|
|
IStreamWriter(IStreamWriter const&) = default;
|
|||
|
|
IStreamWriter(IStreamWriter&&) = default;
|
|||
|
|
IStreamWriter& operator=(IStreamWriter const&) & = default;
|
|||
|
|
IStreamWriter& operator=(IStreamWriter&&) & = default;
|
|||
|
|
};
|
|||
|
|
} // namespace v_1_0
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IStreamReader
|
|||
|
|
//!
|
|||
|
|
//! \brief Application-implemented class for reading data in a stream-based manner.
|
|||
|
|
//!
|
|||
|
|
//! \note To ensure compatibility of source code with future versions of TensorRT, use IStreamReader, not
|
|||
|
|
//! v_1_0::IStreamReader
|
|||
|
|
//!
|
|||
|
|
using IStreamReader = v_1_0::IStreamReader;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IStreamWriter
|
|||
|
|
//!
|
|||
|
|
//! \brief Application-implemented class for writing data in a stream-based manner.
|
|||
|
|
//!
|
|||
|
|
//! \note To ensure compatibility of source code with future versions of TensorRT, use IStreamWriter, not
|
|||
|
|
//! v_1_0::IStreamWriter
|
|||
|
|
//!
|
|||
|
|
using IStreamWriter = v_1_0::IStreamWriter;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \enum SeekPosition
|
|||
|
|
//! \brief Controls the seek mode of IStreamReaderV2.
|
|||
|
|
//!
|
|||
|
|
enum class SeekPosition : int32_t
|
|||
|
|
{
|
|||
|
|
//! From the beginning of the file.
|
|||
|
|
kSET = 0,
|
|||
|
|
|
|||
|
|
//! From the current position of the file.
|
|||
|
|
kCUR = 1,
|
|||
|
|
|
|||
|
|
//! From the tail of the file.
|
|||
|
|
kEND = 2,
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
namespace v_1_0
|
|||
|
|
{
|
|||
|
|
class IStreamReaderV2 : public IVersionedInterface
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
//!
|
|||
|
|
//! TensorRT never calls the destructor for an IStreamReaderV2 defined by the
|
|||
|
|
//! application.
|
|||
|
|
//!
|
|||
|
|
~IStreamReaderV2() override = default;
|
|||
|
|
IStreamReaderV2() = default;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return version information associated with this interface. Applications must not override this method.
|
|||
|
|
//!
|
|||
|
|
InterfaceInfo getInterfaceInfo() const noexcept override
|
|||
|
|
{
|
|||
|
|
return InterfaceInfo{"IStreamReaderV2", 1, 0};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Read the next number of bytes in the stream asynchronously.
|
|||
|
|
//!
|
|||
|
|
//! \param destination The memory to write to, call cudaPointerGetAttributes to get the memory location
|
|||
|
|
//! \param nbBytes The number of bytes to read
|
|||
|
|
//! \param stream The CUDA stream used to do the copy
|
|||
|
|
//!
|
|||
|
|
//! \returns The number of bytes read. Negative values indicate an unrecoverable error.
|
|||
|
|
//! A zero indicates that the end of the stream has been reached.
|
|||
|
|
//!
|
|||
|
|
virtual int64_t read(void* destination, int64_t nbBytes, cudaStream_t stream) noexcept = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Sets the position of the stream to the given offset.
|
|||
|
|
//!
|
|||
|
|
//! \param offset The number of bytes to offset from where.
|
|||
|
|
//! \param where The position from where the offset is added. \see SeekPosition
|
|||
|
|
//!
|
|||
|
|
//! \returns True if the position is updated successfully.
|
|||
|
|
//!
|
|||
|
|
virtual bool seek(int64_t offset, SeekPosition where) noexcept = 0;
|
|||
|
|
|
|||
|
|
protected:
|
|||
|
|
IStreamReaderV2(IStreamReaderV2 const&) = default;
|
|||
|
|
IStreamReaderV2(IStreamReaderV2&&) = default;
|
|||
|
|
IStreamReaderV2& operator=(IStreamReaderV2 const&) & = default;
|
|||
|
|
IStreamReaderV2& operator=(IStreamReaderV2&&) & = default;
|
|||
|
|
};
|
|||
|
|
} // namespace v_1_0
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IStreamReaderV2
|
|||
|
|
//!
|
|||
|
|
//! \brief Application-implemented class for reading data in a stream-based manner asynchronously. Intended for use with
|
|||
|
|
//! the GDS API for optimizing load times.
|
|||
|
|
//!
|
|||
|
|
//! \note To ensure compatibility of source code with future versions of TensorRT, use IStreamReaderV2, not
|
|||
|
|
//! v_1_0::IStreamReaderV2
|
|||
|
|
//!
|
|||
|
|
using IStreamReaderV2 = v_1_0::IStreamReaderV2;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IPluginResourceContext
|
|||
|
|
//!
|
|||
|
|
//! \brief Interface for plugins to access per context resources provided by TensorRT
|
|||
|
|
//!
|
|||
|
|
//! There is no public way to construct an IPluginResourceContext. It appears as an argument to
|
|||
|
|
//! IPluginV3OneRuntime::attachToContext(). Overrides of that method can use the IPluginResourceContext object to access
|
|||
|
|
//! any available per context resources.
|
|||
|
|
//!
|
|||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
|||
|
|
//!
|
|||
|
|
//! \see IPluginV3OneRuntime::attachToContext()
|
|||
|
|
//!
|
|||
|
|
class IPluginResourceContext
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
//! \brief Get the GPU allocator associated with the resource context
|
|||
|
|
//!
|
|||
|
|
//! \see IPluginV3OneRuntime::attachToContext()
|
|||
|
|
//!
|
|||
|
|
virtual IGpuAllocator* getGpuAllocator() const noexcept = 0;
|
|||
|
|
|
|||
|
|
//! \brief Get the error recorder associated with the resource context
|
|||
|
|
//!
|
|||
|
|
//! \see IPluginV3OneRuntime::attachToContext()
|
|||
|
|
//!
|
|||
|
|
virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
|
|||
|
|
virtual ~IPluginResourceContext() noexcept = default;
|
|||
|
|
|
|||
|
|
protected:
|
|||
|
|
IPluginResourceContext() = default;
|
|||
|
|
IPluginResourceContext(IPluginResourceContext const&) = default;
|
|||
|
|
IPluginResourceContext(IPluginResourceContext&&) = default;
|
|||
|
|
IPluginResourceContext& operator=(IPluginResourceContext const&) & = default;
|
|||
|
|
IPluginResourceContext& operator=(IPluginResourceContext&&) & = default;
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
namespace v_1_0
|
|||
|
|
{
|
|||
|
|
class IPluginV3OneCore : public IPluginCapability
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
//!
|
|||
|
|
//! \brief Return version information associated with this interface. Applications must not override this method.
|
|||
|
|
//!
|
|||
|
|
InterfaceInfo getInterfaceInfo() const noexcept override
|
|||
|
|
{
|
|||
|
|
return InterfaceInfo{"PLUGIN_V3ONE_CORE", 1, 0};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the plugin name. Should match the plugin name returned by the corresponding plugin creator.
|
|||
|
|
//!
|
|||
|
|
//! \see IPluginCreatorV3One::getPluginName()
|
|||
|
|
//!
|
|||
|
|
//! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including the
|
|||
|
|
//! NULL terminator.
|
|||
|
|
//!
|
|||
|
|
virtual AsciiChar const* getPluginName() const noexcept = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the plugin version. Should match the plugin version returned by the corresponding plugin creator.
|
|||
|
|
//!
|
|||
|
|
//! \see IPluginCreatorV3One::getPluginVersion()
|
|||
|
|
//!
|
|||
|
|
//! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including the
|
|||
|
|
//! NULL terminator.
|
|||
|
|
//!
|
|||
|
|
virtual AsciiChar const* getPluginVersion() const noexcept = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the namespace of the plugin object. Should match the plugin namespace returned by the
|
|||
|
|
//! corresponding plugin creator.
|
|||
|
|
//!
|
|||
|
|
//! \see IPluginCreatorV3One::getPluginNamespace()
|
|||
|
|
//!
|
|||
|
|
//! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including the
|
|||
|
|
//! NULL terminator.
|
|||
|
|
//!
|
|||
|
|
virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
class IPluginV3OneBuild : public IPluginCapability
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
//!
|
|||
|
|
//! \brief The default maximum number of format combinations that will be timed by TensorRT during the build phase
|
|||
|
|
//!
|
|||
|
|
//! \see getFormatCombinationLimit
|
|||
|
|
//!
|
|||
|
|
static constexpr int32_t kDEFAULT_FORMAT_COMBINATION_LIMIT = 100;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return version information associated with this interface. Applications must not override this method.
|
|||
|
|
//!
|
|||
|
|
InterfaceInfo getInterfaceInfo() const noexcept override
|
|||
|
|
{
|
|||
|
|
return InterfaceInfo{"PLUGIN_V3ONE_BUILD", 1, 0};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Configure the plugin.
|
|||
|
|
//!
|
|||
|
|
//! configurePlugin() can be called multiple times in the build phase during creation of an engine by IBuilder.
|
|||
|
|
//!
|
|||
|
|
//! configurePlugin() is called when a plugin is being prepared for profiling but not for any
|
|||
|
|
//! specific input size. This provides an opportunity for the plugin to make algorithmic choices on the basis of
|
|||
|
|
//! input and output formats, along with the bound of possible dimensions. The min, opt and max value of the
|
|||
|
|
//! DynamicPluginTensorDesc correspond to the kMIN, kOPT and kMAX value of the current profile that the plugin is
|
|||
|
|
//! being profiled for, with the desc.dims field corresponding to the dimensions of plugin specified at network
|
|||
|
|
//! creation. Wildcard dimensions may exist during this phase in the desc.dims field.
|
|||
|
|
//!
|
|||
|
|
//! \param in The input tensors attributes that are used for configuration.
|
|||
|
|
//! \param nbInputs Number of input tensors.
|
|||
|
|
//! \param out The output tensors attributes that are used for configuration.
|
|||
|
|
//! \param nbOutputs Number of output tensors.
|
|||
|
|
//!
|
|||
|
|
//! \return 0 for success, else non-zero (which will cause engine termination, if invoked by TensorRT).
|
|||
|
|
//!
|
|||
|
|
virtual int32_t configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
|
|||
|
|
DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Provide the data types of the plugin outputs if the input tensors have the data types provided.
|
|||
|
|
//!
|
|||
|
|
//! \param outputTypes Pre-allocated array to which the output data types should be written.
|
|||
|
|
//! \param nbOutputs The number of output tensors. This matches the value returned from getNbOutputs().
|
|||
|
|
//! \param inputTypes The input data types.
|
|||
|
|
//! \param nbInputs The number of input tensors.
|
|||
|
|
//!
|
|||
|
|
//! \return 0 for success, else non-zero (which will cause engine termination). The returned code will be reported
|
|||
|
|
//! through the error recorder.
|
|||
|
|
//!
|
|||
|
|
//! \note Provide `DataType::kFLOAT`s if the layer has no inputs. The data type for any size tensor outputs must be
|
|||
|
|
//! `DataType::kINT32`. The returned data types must each have a format that is supported by the plugin.
|
|||
|
|
//!
|
|||
|
|
//! \warning DataType:kBOOL and DataType::kUINT8 are not supported.
|
|||
|
|
//!
|
|||
|
|
virtual int32_t getOutputDataTypes(
|
|||
|
|
DataType* outputTypes, int32_t nbOutputs, const DataType* inputTypes, int32_t nbInputs) const noexcept = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Provide expressions for computing dimensions of the output tensors from dimensions of the input tensors.
|
|||
|
|
//!
|
|||
|
|
//! \param inputs Expressions for dimensions of the input tensors
|
|||
|
|
//! \param nbInputs The number of input tensors
|
|||
|
|
//! \param shapeInputs Expressions for values of the shape tensor inputs
|
|||
|
|
//! \param nbShapeInputs The number of shape tensor inputs
|
|||
|
|
//! \param outputs Pre-allocated array to which the output dimensions must be written
|
|||
|
|
//! \param nbOutputs Number of outputs.
|
|||
|
|
//! \param exprBuilder Object for generating new dimension expressions
|
|||
|
|
//!
|
|||
|
|
//! \note Any size tensor outputs must be declared to be 0D.
|
|||
|
|
//!
|
|||
|
|
//! \note The declaration of shapeInputs as DimsExprs is slightly abusive, because the "dimensions"
|
|||
|
|
//! are actually the values of the shape tensor. For example, if the input shape tensor
|
|||
|
|
//! is a 2x3 matrix, the DimsExprs will have six "dimensions": the three values from the first
|
|||
|
|
//! row of the matrix followed by the three values from the second row of the matrix.
|
|||
|
|
//!
|
|||
|
|
//! \return 0 for success, else non-zero (which will cause engine termination). Returned code will be reported
|
|||
|
|
//! through the error recorder.
|
|||
|
|
//!
|
|||
|
|
virtual int32_t getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs,
|
|||
|
|
int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return true if plugin supports the format and datatype for the input/output indexed by pos.
|
|||
|
|
//!
|
|||
|
|
//! For this method inputs are numbered 0.. (nbInputs - 1) and outputs are numbered nbInputs.. (nbInputs + nbOutputs
|
|||
|
|
//! - 1). Using this numbering, pos is an index into InOut, where 0 <= pos < nbInputs + nbOutputs - 1.
|
|||
|
|
//!
|
|||
|
|
//! TensorRT invokes this method to ask if the input/output indexed by pos supports the format/datatype specified
|
|||
|
|
//! by inOut[pos].format and inOut[pos].type. The override should return true if that format/datatype at inOut[pos]
|
|||
|
|
//! are supported by the plugin. If support is conditional on other input/output formats/datatypes, the plugin can
|
|||
|
|
//! make its result conditional on the formats/datatypes in inOut[0.. pos - 1], which will be set to values
|
|||
|
|
//! that the plugin supports. The override should not inspect inOut[pos1.. nbInputs + nbOutputs - 1],
|
|||
|
|
//! which will have invalid values. In other words, the decision for pos must be based on inOut[0..pos] only.
|
|||
|
|
//!
|
|||
|
|
//! Some examples:
|
|||
|
|
//!
|
|||
|
|
//! * A definition for a plugin that supports only FP16 NCHW:
|
|||
|
|
//!
|
|||
|
|
//! return inOut.format[pos] == TensorFormat::kLINEAR && inOut.type[pos] == DataType::kHALF;
|
|||
|
|
//!
|
|||
|
|
//! * A definition for a plugin that supports only FP16 NCHW for its two inputs,
|
|||
|
|
//! and FP32 NCHW for its single output:
|
|||
|
|
//!
|
|||
|
|
//! return inOut.format[pos] == TensorFormat::kLINEAR && (inOut.type[pos] == pos < 2 ? DataType::kHALF :
|
|||
|
|
//! DataType::kFLOAT);
|
|||
|
|
//!
|
|||
|
|
//! * A definition for a "polymorphic" plugin with two inputs and one output that supports
|
|||
|
|
//! any format or type, but the inputs and output must have the same format and type:
|
|||
|
|
//!
|
|||
|
|
//! return pos == 0 || (inOut.format[pos] == inOut.format[0] && inOut.type[pos] == inOut.type[0]);
|
|||
|
|
//!
|
|||
|
|
//! \warning TensorRT will stop querying once it finds getFormatCombinationLimit() of combinations.
|
|||
|
|
//!
|
|||
|
|
//! \see getFormatCombinationLimit
|
|||
|
|
//!
|
|||
|
|
virtual bool supportsFormatCombination(
|
|||
|
|
int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the number of outputs from the plugin.
|
|||
|
|
//!
|
|||
|
|
//! \return The number of outputs, which must be a positive integer.
|
|||
|
|
//!
|
|||
|
|
virtual int32_t getNbOutputs() const noexcept = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Find the workspace size required by the layer.
|
|||
|
|
//!
|
|||
|
|
//! This function is called after the plugin is configured, and possibly during execution.
|
|||
|
|
//! The result should be a sufficient workspace size to deal with inputs and outputs of the given size
|
|||
|
|
//! or any smaller problem.
|
|||
|
|
//!
|
|||
|
|
//! \return The workspace size.
|
|||
|
|
//!
|
|||
|
|
virtual size_t getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs,
|
|||
|
|
DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept
|
|||
|
|
{
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Query for any custom tactics that the plugin intends to use
|
|||
|
|
//!
|
|||
|
|
//! This method queries for the set of tactics T(f) supported by the plugin for the format combination f indicated
|
|||
|
|
//! by the immediately preceding call to configurePlugin(). It is guaranteed to be called after configurePlugin().
|
|||
|
|
//!
|
|||
|
|
//! For each format combination provided through configurePlugin(), up to a maximum of getFormatCombinationLimit(),
|
|||
|
|
//! the plugin will be timed for each tactic advertised through this method for that format combination. i.e. The
|
|||
|
|
//! plugin will be timed \f$N = \sum_{i=0}^{i<getFormatCombinationLimit()} (T(f[i]))\f$ times. If \f$N = 1\f$, the
|
|||
|
|
//! plugin may not be timed. In pseudocode, the timing protocol appears as the following:
|
|||
|
|
//!
|
|||
|
|
//! counter = 0
|
|||
|
|
//! for each supported format combination
|
|||
|
|
//! ++counter
|
|||
|
|
//! if counter > getFormatCombinationLimit()
|
|||
|
|
//! goto done
|
|||
|
|
//! configurePlugin(...)
|
|||
|
|
//! for each tactic in getValidTactics(...)
|
|||
|
|
//! time tactic
|
|||
|
|
//! done:
|
|||
|
|
//!
|
|||
|
|
//!
|
|||
|
|
//! \param tactics Pre-allocated buffer to which the tactic values should be written
|
|||
|
|
//! \param nbTactics The number of tactics advertised through getNbTactics()
|
|||
|
|
//!
|
|||
|
|
//! \note The provided tactic values must be unique and non-zero. The tactic value 0 is reserved for the default
|
|||
|
|
//! tactic attached to each format combination.
|
|||
|
|
//!
|
|||
|
|
//! \return 0 for success, else non-zero (which will cause engine termination). The returned code will be reported
|
|||
|
|
//! through the error recorder.
|
|||
|
|
//!
|
|||
|
|
virtual int32_t getValidTactics(int32_t* tactics, int32_t nbTactics) noexcept
|
|||
|
|
{
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Query for the number of custom tactics the plugin intends to use
|
|||
|
|
//!
|
|||
|
|
virtual int32_t getNbTactics() noexcept
|
|||
|
|
{
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Called to query the suffix to use for the timing cache ID. May be called anytime after plugin creation.
|
|||
|
|
//!
|
|||
|
|
//! \return Suffix to use for timing cache ID, considering only the creation state of the plugin.
|
|||
|
|
//! Returning nullptr will disable timing caching for the plugin altogether.
|
|||
|
|
//!
|
|||
|
|
//! \note If timing caching is enabled for the plugin (by returning non-null), the I/O shape and format information
|
|||
|
|
//! will be automatically considered to form the prefix of the timing cache ID. Therefore, only other factors
|
|||
|
|
//! determining the creation state of the plugin, such as its attribute values, should be considered to compose the
|
|||
|
|
//! return value.
|
|||
|
|
//!
|
|||
|
|
virtual char const* getTimingCacheID() noexcept
|
|||
|
|
{
|
|||
|
|
return nullptr;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the maximum number of format combinations that will be timed by TensorRT during the build phase
|
|||
|
|
//!
|
|||
|
|
virtual int32_t getFormatCombinationLimit() noexcept
|
|||
|
|
{
|
|||
|
|
return kDEFAULT_FORMAT_COMBINATION_LIMIT;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Query for a string representing the configuration of the plugin. May be called anytime after
|
|||
|
|
//! plugin creation.
|
|||
|
|
//!
|
|||
|
|
//! \return A string representing the plugin's creation state, especially with regard to its attribute values.
|
|||
|
|
//!
|
|||
|
|
virtual char const* getMetadataString() noexcept
|
|||
|
|
{
|
|||
|
|
return nullptr;
|
|||
|
|
}
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
class IPluginV3OneRuntime : public IPluginCapability
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
//!
|
|||
|
|
//! \brief Return version information associated with this interface. Applications must not override this method.
|
|||
|
|
//!
|
|||
|
|
InterfaceInfo getInterfaceInfo() const noexcept override
|
|||
|
|
{
|
|||
|
|
return InterfaceInfo{"PLUGIN_V3ONE_RUNTIME", 1, 0};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the tactic to be used in the subsequent call to enqueue(). If no custom tactics were advertised, this
|
|||
|
|
//! will have a value of 0, which is designated as the default tactic.
|
|||
|
|
//!
|
|||
|
|
//! \return 0 for success, else non-zero (which will cause engine termination). The returned code will be reported
|
|||
|
|
//! through the error recorder.
|
|||
|
|
//!
|
|||
|
|
virtual int32_t setTactic(int32_t tactic) noexcept
|
|||
|
|
{
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Called when a plugin is being prepared for execution for specific dimensions. This could
|
|||
|
|
//! happen multiple times in the execution phase, both during creation of an engine by IBuilder and execution of an
|
|||
|
|
//! engine by IExecutionContext.
|
|||
|
|
//! * IBuilder will call this function once per profile, with `in` resolved to the values specified by the
|
|||
|
|
//! kOPT field of the current profile.
|
|||
|
|
//! * IExecutionContext will call this during the next subsequent instance of enqueueV3() or executeV2() if:
|
|||
|
|
//! - The optimization profile is changed via setOptimizationProfile() or setOptimizationProfileAsync().
|
|||
|
|
//! - An input binding is changed via setInputTensorAddress() or setTensorAddress() or setInputShape().
|
|||
|
|
//! \warning The execution phase is timing critical during IExecutionContext but is not part of the timing loop when
|
|||
|
|
//! called from IBuilder. Performance bottlenecks of onShapeChange() will not show up during engine building but
|
|||
|
|
//! will be visible during execution if any triggering functions are called.
|
|||
|
|
//!
|
|||
|
|
//! \param in The input tensors attributes that are used for configuration.
|
|||
|
|
//! \param nbInputs Number of input tensors.
|
|||
|
|
//! \param out The output tensors attributes that are used for configuration.
|
|||
|
|
//! \param nbOutputs Number of output tensors.
|
|||
|
|
//!
|
|||
|
|
virtual int32_t onShapeChange(
|
|||
|
|
PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Execute the layer.
|
|||
|
|
//!
|
|||
|
|
//! \param inputDesc how to interpret the memory for the input tensors.
|
|||
|
|
//! \param outputDesc how to interpret the memory for the output tensors.
|
|||
|
|
//! \param inputs The memory for the input tensors.
|
|||
|
|
//! \param outputs The memory for the output tensors.
|
|||
|
|
//! \param workspace Workspace for execution.
|
|||
|
|
//! \param stream The stream in which to execute the kernels.
|
|||
|
|
//!
|
|||
|
|
//! \return 0 for success, else non-zero (which will cause engine termination). The returned code will be reported
|
|||
|
|
//! through the error recorder.
|
|||
|
|
//!
|
|||
|
|
virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
|
|||
|
|
void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Clone the plugin, attach the cloned plugin object to a execution context and grant the cloned plugin
|
|||
|
|
//! access to some context resources.
|
|||
|
|
//!
|
|||
|
|
//! This function is called automatically for each plugin when a new execution context is created. The plugin may
|
|||
|
|
//! use resources provided by the IPluginResourceContext until the plugin is deleted by TensorRT.
|
|||
|
|
//!
|
|||
|
|
//! If the plugin needs per-context resources, it can be allocated here.
|
|||
|
|
//!
|
|||
|
|
//! \param context A resource context that exposes methods to get access to execution context specific resources.
|
|||
|
|
//! A different resource context is guaranteed for each different execution context to which the
|
|||
|
|
//! plugin is attached.
|
|||
|
|
//! \see IPluginResourceContext
|
|||
|
|
//!
|
|||
|
|
//! \note This method should clone the entire IPluginV3 object, not just the runtime interface
|
|||
|
|
//!
|
|||
|
|
//! \return A clone of the IPluginV3 object whose runtime interface on which this method is invoked, which has
|
|||
|
|
//! attached to the provided resource context.
|
|||
|
|
//!
|
|||
|
|
virtual IPluginV3* attachToContext(IPluginResourceContext* context) noexcept = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the plugin fields which should be serialized.
|
|||
|
|
//!
|
|||
|
|
//! \note The set of plugin fields returned does not necessarily need to match that advertised through
|
|||
|
|
//! getFieldNames() of the corresponding plugin creator.
|
|||
|
|
|
|||
|
|
//! \note To serialize arbitrary plugin data, use a PluginField of
|
|||
|
|
//! PluginFieldType::kUNKNOWN, with the length of the PluginField set to the correct number of bytes.
|
|||
|
|
//!
|
|||
|
|
virtual PluginFieldCollection const* getFieldsToSerialize() noexcept = 0;
|
|||
|
|
};
|
|||
|
|
} // namespace v_1_0
|
|||
|
|
|
|||
|
|
namespace v_2_0
|
|||
|
|
{
|
|||
|
|
|
|||
|
|
class IPluginV3OneBuild : public v_1_0::IPluginV3OneBuild
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
InterfaceInfo getInterfaceInfo() const noexcept override
|
|||
|
|
{
|
|||
|
|
return InterfaceInfo{"PLUGIN_V3ONE_BUILD", 2, 0};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Communicates to TensorRT that the output at the specified output index is aliased to the input at the
|
|||
|
|
//! returned index
|
|||
|
|
//!
|
|||
|
|
//! Enables read-modify-write behavior in plugins. TensorRT may insert copies to facilitate this capability.
|
|||
|
|
//!
|
|||
|
|
//! \return An integer denoting the index of the input which is aliased to the output at outputIndex.
|
|||
|
|
//! Returning -1 indicates that the output is not aliased to any input. Otherwise, the valid range for
|
|||
|
|
//! return value is [0, nbInputs - 1].
|
|||
|
|
//!
|
|||
|
|
//! \note A given plugin input can only be aliased to a single plugin output.
|
|||
|
|
//!
|
|||
|
|
//! \note This API will only be called and have an effect when PreviewFeature::kALIASED_PLUGIN_IO_10_03 is turned
|
|||
|
|
//! on.
|
|||
|
|
//!
|
|||
|
|
//! \warning If an input is not shallow copyable, a copy inserted by TensorRT may not work as intended. Therefore,
|
|||
|
|
//! using this feature with tensors requiring deep copies is not supported.
|
|||
|
|
//!
|
|||
|
|
//! \warning If a given tensor is requested to be aliased by two different plugins, this may result in divergent
|
|||
|
|
//! copies of the tensor after writes from each plugin. e.g. In the below example, t1 and t2 could be divergent.
|
|||
|
|
//!
|
|||
|
|
//! +-----+ +--------+
|
|||
|
|
//! +->|Copy +--> t* ---->|Plugin0 +--> t1
|
|||
|
|
//! | +-----+ +--------+
|
|||
|
|
//! t
|
|||
|
|
//! | +-----+ +--------+
|
|||
|
|
//! +->|Copy +--> t** --->|Plugin1 +--> t2
|
|||
|
|
//! +-----+ +--------+
|
|||
|
|
//!
|
|||
|
|
virtual int32_t getAliasedInput(int32_t outputIndex) noexcept
|
|||
|
|
{
|
|||
|
|
return -1;
|
|||
|
|
}
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
} // namespace v_2_0
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IPluginV3OneCore
|
|||
|
|
//!
|
|||
|
|
//! \brief A plugin capability interface that enables the core capability (PluginCapabilityType::kCORE).
|
|||
|
|
//!
|
|||
|
|
//! \see IPluginCapability
|
|||
|
|
//! \see PluginCapabilityType
|
|||
|
|
//! \see IPluginV3::getCapabilityInterface()
|
|||
|
|
//!
|
|||
|
|
using IPluginV3OneCore = v_1_0::IPluginV3OneCore;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IPluginV3OneBuild
|
|||
|
|
//!
|
|||
|
|
//! \brief A plugin capability interface that enables the build capability (PluginCapabilityType::kBUILD). Exposes
|
|||
|
|
//! methods that allow the expression of the build time properties and behavior of a plugin.
|
|||
|
|
//!
|
|||
|
|
//! \see IPluginCapability
|
|||
|
|
//! \see PluginCapabilityType
|
|||
|
|
//! \see IPluginV3::getCapabilityInterface()
|
|||
|
|
//!
|
|||
|
|
using IPluginV3OneBuild = v_1_0::IPluginV3OneBuild;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IPluginV3OneRuntime
|
|||
|
|
//!
|
|||
|
|
//! \brief A plugin capability interface that enables the runtime capability (PluginCapabilityType::kRUNTIME). Exposes
|
|||
|
|
//! methods that allow the expression of the runtime properties and behavior of a plugin.
|
|||
|
|
//!
|
|||
|
|
//! \see IPluginCapability
|
|||
|
|
//! \see PluginCapabilityType
|
|||
|
|
//! \see IPluginV3::getCapabilityInterface()
|
|||
|
|
//!
|
|||
|
|
using IPluginV3OneRuntime = v_1_0::IPluginV3OneRuntime;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IPluginV3OneBuildV2
|
|||
|
|
//!
|
|||
|
|
//! \brief A plugin capability interface that extends IPluginV3OneBuild by providing I/O aliasing functionality.
|
|||
|
|
//!
|
|||
|
|
//! \see IPluginV3OneBuild
|
|||
|
|
//!
|
|||
|
|
using IPluginV3OneBuildV2 = v_2_0::IPluginV3OneBuild;
|
|||
|
|
|
|||
|
|
namespace v_1_0
|
|||
|
|
{
|
|||
|
|
class IProfiler
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
//!
|
|||
|
|
//! \brief Layer time reporting callback.
|
|||
|
|
//!
|
|||
|
|
//! \param layerName The name of the layer, set when constructing the network definition. If the engine is built
|
|||
|
|
//! with profiling verbosity set to kNONE, the layerName is the decimal index of the layer.
|
|||
|
|
//! \param ms The time in milliseconds to execute the layer.
|
|||
|
|
//!
|
|||
|
|
virtual void reportLayerTime(char const* layerName, float ms) noexcept = 0;
|
|||
|
|
|
|||
|
|
virtual ~IProfiler() noexcept {}
|
|||
|
|
};
|
|||
|
|
} // namespace v_1_0
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IProfiler
|
|||
|
|
//!
|
|||
|
|
//! \brief Application-implemented interface for profiling.
|
|||
|
|
//!
|
|||
|
|
//! When this class is added to an execution context, the profiler will be called once per layer for each invocation of
|
|||
|
|
//! executeV2()/enqueueV3().
|
|||
|
|
//!
|
|||
|
|
//! It is not recommended to run inference with profiler enabled when the inference execution time is critical since the
|
|||
|
|
//! profiler may affect execution time negatively.
|
|||
|
|
//!
|
|||
|
|
using IProfiler = v_1_0::IProfiler;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \enum WeightsRole
|
|||
|
|
//!
|
|||
|
|
//! \brief How a layer uses particular Weights.
|
|||
|
|
//!
|
|||
|
|
//! The power weights of an IScaleLayer are omitted. Refitting those is not supported.
|
|||
|
|
//!
|
|||
|
|
enum class WeightsRole : int32_t
|
|||
|
|
{
|
|||
|
|
kKERNEL = 0, //!< kernel for IConvolutionLayer or IDeconvolutionLayer
|
|||
|
|
kBIAS = 1, //!< bias for IConvolutionLayer or IDeconvolutionLayer
|
|||
|
|
kSHIFT = 2, //!< shift part of IScaleLayer
|
|||
|
|
kSCALE = 3, //!< scale part of IScaleLayer
|
|||
|
|
kCONSTANT = 4, //!< weights for IConstantLayer
|
|||
|
|
kANY = 5, //!< Any other weights role
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//! Maximum number of elements in WeightsRole enum. \see WeightsRole
|
|||
|
|
template <>
|
|||
|
|
constexpr inline int32_t EnumMax<WeightsRole>() noexcept
|
|||
|
|
{
|
|||
|
|
return 6;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \enum DeviceType
|
|||
|
|
//! \brief The device that this layer/network will execute on.
|
|||
|
|
//!
|
|||
|
|
//!
|
|||
|
|
enum class DeviceType : int32_t
|
|||
|
|
{
|
|||
|
|
kGPU = 0, //!< GPU Device
|
|||
|
|
kDLA = 1, //!< DLA Core
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//! Maximum number of elements in DeviceType enum. \see DeviceType
|
|||
|
|
template <>
|
|||
|
|
constexpr inline int32_t EnumMax<DeviceType>() noexcept
|
|||
|
|
{
|
|||
|
|
return 2;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \enum TempfileControlFlag
|
|||
|
|
//!
|
|||
|
|
//! \brief Flags used to control TensorRT's behavior when creating executable temporary files.
|
|||
|
|
//!
|
|||
|
|
//! On some platforms the TensorRT runtime may need to create files in a temporary directory or use platform-specific
|
|||
|
|
//! APIs to create files in-memory to load temporary DLLs that implement runtime code. These flags allow the
|
|||
|
|
//! application to explicitly control TensorRT's use of these files. This will preclude the use of certain TensorRT
|
|||
|
|
//! APIs for deserializing and loading lean runtimes.
|
|||
|
|
//!
|
|||
|
|
enum class TempfileControlFlag : int32_t
|
|||
|
|
{
|
|||
|
|
//! Allow creating and loading files in-memory (or unnamed files).
|
|||
|
|
kALLOW_IN_MEMORY_FILES = 0,
|
|||
|
|
|
|||
|
|
//! Allow creating and loading named files in a temporary directory on the filesystem.
|
|||
|
|
//!
|
|||
|
|
//! \see IRuntime::setTemporaryDirectory()
|
|||
|
|
kALLOW_TEMPORARY_FILES = 1,
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//! Maximum number of elements in TempfileControlFlag enum. \see TempfileControlFlag
|
|||
|
|
template <>
|
|||
|
|
constexpr inline int32_t EnumMax<TempfileControlFlag>() noexcept
|
|||
|
|
{
|
|||
|
|
return 2;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operations.
|
|||
|
|
//!
|
|||
|
|
//! \see TempfileControlFlag,
|
|||
|
|
//! IRuntime::setTempfileControlFlags(),
|
|||
|
|
//! IRuntime::getTempfileControlFlags()
|
|||
|
|
using TempfileControlFlags = uint32_t;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \enum TensorFormat
|
|||
|
|
//!
|
|||
|
|
//! \brief Format of the input/output tensors.
|
|||
|
|
//!
|
|||
|
|
//! This enum is used by both plugins and network I/O tensors.
|
|||
|
|
//!
|
|||
|
|
//! \see IPluginV2::supportsFormat(), safe::ICudaEngine::getBindingFormat()
|
|||
|
|
//!
|
|||
|
|
//! Many of the formats are **vector-major** or **vector-minor**. These formats specify
|
|||
|
|
//! a <em>vector dimension</em> and <em>scalars per vector</em>.
|
|||
|
|
//! For example, suppose that the tensor has has dimensions [M,N,C,H,W],
|
|||
|
|
//! the vector dimension is C and there are V scalars per vector.
|
|||
|
|
//!
|
|||
|
|
//! * A **vector-major** format splits the vectorized dimension into two axes in the
|
|||
|
|
//! memory layout. The vectorized dimension is replaced by an axis of length ceil(C/V)
|
|||
|
|
//! and a new dimension of length V is appended. For the example tensor, the memory layout
|
|||
|
|
//! is equivalent to an array with dimensions [M][N][ceil(C/V)][H][W][V].
|
|||
|
|
//! Tensor coordinate (m,n,c,h,w) maps to array location [m][n][c/V][h][w][c\%V].
|
|||
|
|
//!
|
|||
|
|
//! * A **vector-minor** format moves the vectorized dimension to become the last axis
|
|||
|
|
//! in the memory layout. For the example tensor, the memory layout is equivalent to an
|
|||
|
|
//! array with dimensions [M][N][H][W][ceil(C/V)*V]. Tensor coordinate (m,n,c,h,w) maps
|
|||
|
|
//! array location subscript [m][n][h][w][c].
|
|||
|
|
//!
|
|||
|
|
//! In interfaces that refer to "components per element", that's the value of V above.
|
|||
|
|
//!
|
|||
|
|
//! For more information about data formats, see the topic "Data Format Description" located in the
|
|||
|
|
//! TensorRT Developer Guide.
|
|||
|
|
//! https://docs.nvidia.com/deeplearning/tensorrt/latest/inference-library/advanced.html#i-o-formats
|
|||
|
|
//!
|
|||
|
|
enum class TensorFormat : int32_t
|
|||
|
|
{
|
|||
|
|
//! Memory layout is similar to an array in C or C++.
|
|||
|
|
//! The stride of each dimension is the product of the dimensions after it.
|
|||
|
|
//! The last dimension has unit stride.
|
|||
|
|
//!
|
|||
|
|
//! This format supports all TensorRT types.
|
|||
|
|
//! For DLA usage, the tensor sizes are limited to C,H,W in the range [1,8192].
|
|||
|
|
kLINEAR = 0,
|
|||
|
|
|
|||
|
|
//! Vector-major format with two scalars per vector.
|
|||
|
|
//! Vector dimension is third to last.
|
|||
|
|
//!
|
|||
|
|
//! This format requires FP16 or BF16 and at least three dimensions.
|
|||
|
|
kCHW2 = 1,
|
|||
|
|
|
|||
|
|
//! Vector-minor format with eight scalars per vector.
|
|||
|
|
//! Vector dimension is third to last.
|
|||
|
|
//! This format requires FP16 or BF16 and at least three dimensions.
|
|||
|
|
kHWC8 = 2,
|
|||
|
|
|
|||
|
|
//! Vector-major format with four scalars per vector.
|
|||
|
|
//! Vector dimension is third to last.
|
|||
|
|
//!
|
|||
|
|
//! This format requires INT8 and at least three dimensions.
|
|||
|
|
//! For INT8, the length of the vector dimension must be a build-time constant.
|
|||
|
|
//!
|
|||
|
|
//! Deprecated usage:
|
|||
|
|
//!
|
|||
|
|
//! If running on the DLA, this format can be used for acceleration
|
|||
|
|
//! with the caveat that C must be less than or equal to 4.
|
|||
|
|
//! If used as DLA input and the build option kGPU_FALLBACK is not specified,
|
|||
|
|
//! it needs to meet line stride requirement of DLA format. Column stride in
|
|||
|
|
//! bytes must be a multiple of 64 on Orin.
|
|||
|
|
kCHW4 = 3,
|
|||
|
|
|
|||
|
|
//! Vector-major format with 16 scalars per vector.
|
|||
|
|
//! Vector dimension is third to last.
|
|||
|
|
//!
|
|||
|
|
//! This format is only supported by DLA and requires FP16 and at least three dimensions.
|
|||
|
|
//! This format maps to the native feature format for FP16,
|
|||
|
|
//! and the tensor sizes are limited to C,H,W in the range [1,8192].
|
|||
|
|
kCHW16 = 4,
|
|||
|
|
|
|||
|
|
//! Vector-major format with 32 scalars per vector.
|
|||
|
|
//! Vector dimension is third to last.
|
|||
|
|
//!
|
|||
|
|
//! This format requires INT8, FP32, or FP16 and at least three dimensions.
|
|||
|
|
//!
|
|||
|
|
//! For DLA usage, this format maps to the native feature format for INT8,
|
|||
|
|
//! and the tensor sizes are limited to C,H,W in the range [1,8192].
|
|||
|
|
kCHW32 = 5,
|
|||
|
|
|
|||
|
|
//! Vector-minor format with eight scalars per vector.
|
|||
|
|
//! Vector dimension is fourth to last.
|
|||
|
|
//!
|
|||
|
|
//! This format requires FP16 or BF16 and at least four dimensions.
|
|||
|
|
kDHWC8 = 6,
|
|||
|
|
|
|||
|
|
//! Vector-major format with 32 scalars per vector.
|
|||
|
|
//! Vector dimension is fourth to last.
|
|||
|
|
//!
|
|||
|
|
//! This format requires FP16 or INT8 and at least four dimensions.
|
|||
|
|
kCDHW32 = 7,
|
|||
|
|
|
|||
|
|
//! Vector-minor format where channel dimension is third to last and unpadded.
|
|||
|
|
//!
|
|||
|
|
//! This format requires either FP32 or UINT8 and at least three dimensions.
|
|||
|
|
kHWC = 8,
|
|||
|
|
|
|||
|
|
//! DLA planar format. For a tensor with dimension {N, C, H, W}, the W axis
|
|||
|
|
//! always has unit stride. The stride for stepping along the H axis is
|
|||
|
|
//! rounded up to 64 bytes.
|
|||
|
|
//!
|
|||
|
|
//! The memory layout is equivalent to a C array with dimensions
|
|||
|
|
//! [N][C][H][roundUp(W, 64/elementSize)] where elementSize is
|
|||
|
|
//! 2 for FP16 and 1 for Int8, with the tensor coordinates (n, c, h, w)
|
|||
|
|
//! mapping to array subscript [n][c][h][w].
|
|||
|
|
kDLA_LINEAR = 9,
|
|||
|
|
|
|||
|
|
//! DLA image format. For a tensor with dimension {N, C, H, W} the C axis
|
|||
|
|
//! always has unit stride. The stride for stepping along the H axis is rounded up
|
|||
|
|
//! to 64 bytes on Orin. C can only be 1, 3 or 4.
|
|||
|
|
//! If C == 1, it will map to grayscale format.
|
|||
|
|
//! If C == 3 or C == 4, it will map to color image format. And if C == 3,
|
|||
|
|
//! the stride for stepping along the W axis needs to be padded to 4 in elements.
|
|||
|
|
//!
|
|||
|
|
//! When C is {1, 3, 4}, then C' is {1, 4, 4} respectively,
|
|||
|
|
//! the memory layout is equivalent to a C array with dimensions
|
|||
|
|
//! [N][H][roundUp(W, 64/C'/elementSize)][C'] on Orin
|
|||
|
|
//! where elementSize is 2 for FP16
|
|||
|
|
//! and 1 for Int8. The tensor coordinates (n, c, h, w) mapping to array
|
|||
|
|
//! subscript [n][h][w][c].
|
|||
|
|
kDLA_HWC4 = 10,
|
|||
|
|
|
|||
|
|
//! Vector-minor format with 16 scalars per vector.
|
|||
|
|
//! Vector dimension is third to last.
|
|||
|
|
//!
|
|||
|
|
//! This requires FP16, INT8 or FP8 and at least three dimensions.
|
|||
|
|
kHWC16 = 11,
|
|||
|
|
|
|||
|
|
//! Vector-minor format with one scalar per vector.
|
|||
|
|
//! Vector dimension is fourth to last.
|
|||
|
|
//!
|
|||
|
|
//! This format requires FP32 and at least four dimensions.
|
|||
|
|
kDHWC = 12
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
namespace impl
|
|||
|
|
{
|
|||
|
|
//! Maximum number of elements in TensorFormat enum. \see TensorFormat
|
|||
|
|
template <>
|
|||
|
|
struct EnumMaxImpl<TensorFormat>
|
|||
|
|
{
|
|||
|
|
//! Declaration of kVALUE that represents the maximum number of elements in the TensorFormat enum.
|
|||
|
|
static constexpr int32_t kVALUE = 13;
|
|||
|
|
};
|
|||
|
|
} // namespace impl
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \enum AllocatorFlag
|
|||
|
|
//!
|
|||
|
|
//! \brief Allowed type of memory allocation.
|
|||
|
|
//!
|
|||
|
|
enum class AllocatorFlag : int32_t
|
|||
|
|
{
|
|||
|
|
//! TensorRT may call realloc() on this allocation.
|
|||
|
|
kRESIZABLE = 0,
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
namespace impl
|
|||
|
|
{
|
|||
|
|
//! Maximum number of elements in AllocatorFlag enum. \see AllocatorFlag
|
|||
|
|
template <>
|
|||
|
|
struct EnumMaxImpl<AllocatorFlag>
|
|||
|
|
{
|
|||
|
|
//! Declaration of kVALUE that represents the maximum number of elements in the AllocatorFlag enum.
|
|||
|
|
static constexpr int32_t kVALUE = 1;
|
|||
|
|
};
|
|||
|
|
} // namespace impl
|
|||
|
|
|
|||
|
|
using AllocatorFlags = uint32_t;
|
|||
|
|
|
|||
|
|
//! DO NOT REFER TO namespace v_1_0 IN CODE. ALWAYS USE nvinfer1 INSTEAD.
|
|||
|
|
//! The name v_1_0 may change in future versions of TensorRT.
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class ILogger
|
|||
|
|
//!
|
|||
|
|
//! \brief Application-implemented logging interface for the builder, refitter and runtime.
|
|||
|
|
//!
|
|||
|
|
//! The logger used to create an instance of IBuilder, IRuntime or IRefitter is used for all objects created through
|
|||
|
|
//! that interface. The logger must be valid until all objects created are released.
|
|||
|
|
//!
|
|||
|
|
//! The Logger object implementation must be thread safe. All locking and synchronization is pushed to the
|
|||
|
|
//! interface implementation and TensorRT does not hold any synchronization primitives when calling the interface
|
|||
|
|
//! functions.
|
|||
|
|
//!
|
|||
|
|
class ILogger
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
//!
|
|||
|
|
//! \enum Severity
|
|||
|
|
//!
|
|||
|
|
//! \brief The severity corresponding to a log message.
|
|||
|
|
//!
|
|||
|
|
enum class Severity : int32_t
|
|||
|
|
{
|
|||
|
|
//! An internal error has occurred. Execution is unrecoverable.
|
|||
|
|
kINTERNAL_ERROR = 0,
|
|||
|
|
//! An application error has occurred.
|
|||
|
|
kERROR = 1,
|
|||
|
|
//! An application error has been discovered, but TensorRT has recovered or fallen back to a default.
|
|||
|
|
kWARNING = 2,
|
|||
|
|
//! Informational messages with instructional information.
|
|||
|
|
kINFO = 3,
|
|||
|
|
//! Verbose messages with debugging information.
|
|||
|
|
kVERBOSE = 4,
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief A callback implemented by the application to handle logging messages;
|
|||
|
|
//!
|
|||
|
|
//! \param severity The severity of the message.
|
|||
|
|
//! \param msg A null-terminated log message.
|
|||
|
|
//!
|
|||
|
|
//! \warning Loggers used in the safety certified runtime must set a maximum message length and truncate
|
|||
|
|
//! messages exceeding this length. It is up to the implementer of the derived class to define
|
|||
|
|
//! a suitable limit that will prevent buffer overruns, resource exhaustion, and other security
|
|||
|
|
//! vulnerabilities in their implementation. The TensorRT safety certified runtime will never
|
|||
|
|
//! emit messages longer than 1024 bytes.
|
|||
|
|
//!
|
|||
|
|
//! \usage
|
|||
|
|
//! - Allowed context for the API call
|
|||
|
|
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
|
|||
|
|
//! when multiple execution contexts are used during runtime, or if the same logger is used
|
|||
|
|
//! for multiple runtimes, builders, or refitters.
|
|||
|
|
//!
|
|||
|
|
virtual void log(Severity severity, AsciiChar const* msg) noexcept = 0;
|
|||
|
|
|
|||
|
|
ILogger() = default;
|
|||
|
|
virtual ~ILogger() = default;
|
|||
|
|
|
|||
|
|
protected:
|
|||
|
|
// @cond SuppressDoxyWarnings
|
|||
|
|
ILogger(ILogger const&) = default;
|
|||
|
|
ILogger(ILogger&&) = default;
|
|||
|
|
ILogger& operator=(ILogger const&) & = default;
|
|||
|
|
ILogger& operator=(ILogger&&) & = default;
|
|||
|
|
// @endcond
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
namespace impl
|
|||
|
|
{
|
|||
|
|
//! Maximum number of elements in ILogger::Severity enum. \see ILogger::Severity
|
|||
|
|
template <>
|
|||
|
|
struct EnumMaxImpl<ILogger::Severity>
|
|||
|
|
{
|
|||
|
|
//! Declaration of kVALUE that represents the maximum number of elements in the ILogger::Severity enum.
|
|||
|
|
static constexpr int32_t kVALUE = 5;
|
|||
|
|
};
|
|||
|
|
} // namespace impl
|
|||
|
|
|
|||
|
|
namespace v_1_0
|
|||
|
|
{
|
|||
|
|
|
|||
|
|
class IGpuAllocator : public IVersionedInterface
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
//!
|
|||
|
|
//! \brief A thread-safe callback implemented by the application to handle acquisition of GPU memory.
|
|||
|
|
//!
|
|||
|
|
//! \param size The size of the memory block required (in bytes).
|
|||
|
|
//! \param alignment The required alignment of memory. Alignment will be zero
|
|||
|
|
//! or a power of 2 not exceeding the alignment guaranteed by cudaMalloc.
|
|||
|
|
//! Thus this allocator can be safely implemented with cudaMalloc/cudaFree.
|
|||
|
|
//! An alignment value of zero indicates any alignment is acceptable.
|
|||
|
|
//! \param flags Reserved for future use. In the current release, 0 will be passed.
|
|||
|
|
//!
|
|||
|
|
//! \return If the allocation was successful, the start address of a device memory block of the requested size.
|
|||
|
|
//! If an allocation request of size 0 is made, nullptr must be returned.
|
|||
|
|
//! If an allocation request cannot be satisfied, nullptr must be returned.
|
|||
|
|
//! If a non-null address is returned, it is guaranteed to have the specified alignment.
|
|||
|
|
//!
|
|||
|
|
//! \note The implementation must guarantee thread safety for concurrent allocate/reallocate/deallocate
|
|||
|
|
//! requests.
|
|||
|
|
//!
|
|||
|
|
//! \usage
|
|||
|
|
//! - Allowed context for the API call
|
|||
|
|
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.0. Superseded by allocateAsync
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED virtual void* allocate(
|
|||
|
|
uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept = 0;
|
|||
|
|
|
|||
|
|
~IGpuAllocator() override = default;
|
|||
|
|
IGpuAllocator() = default;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief A thread-safe callback implemented by the application to resize an existing allocation.
|
|||
|
|
//!
|
|||
|
|
//! Only allocations which were allocated with AllocatorFlag::kRESIZABLE will be resized.
|
|||
|
|
//!
|
|||
|
|
//! Options are one of:
|
|||
|
|
//! * resize in place leaving min(oldSize, newSize) bytes unchanged and return the original address
|
|||
|
|
//! * move min(oldSize, newSize) bytes to a new location of sufficient size and return its address
|
|||
|
|
//! * return nullptr, to indicate that the request could not be fulfilled.
|
|||
|
|
//!
|
|||
|
|
//! If nullptr is returned, TensorRT will assume that resize() is not implemented, and that the
|
|||
|
|
//! allocation at baseAddr is still valid.
|
|||
|
|
//!
|
|||
|
|
//! This method is made available for use cases where delegating the resize
|
|||
|
|
//! strategy to the application provides an opportunity to improve memory management.
|
|||
|
|
//! One possible implementation is to allocate a large virtual device buffer and
|
|||
|
|
//! progressively commit physical memory with cuMemMap. CU_MEM_ALLOC_GRANULARITY_RECOMMENDED
|
|||
|
|
//! is suggested in this case.
|
|||
|
|
//!
|
|||
|
|
//! TensorRT may call realloc to increase the buffer by relatively small amounts.
|
|||
|
|
//!
|
|||
|
|
//! \param baseAddr the address of the original allocation, which will have been returned by previously calling
|
|||
|
|
//! allocate() or reallocate() on the same object.
|
|||
|
|
//! \param alignment The alignment used by the original allocation. This will be the same value that was previously
|
|||
|
|
//! passed to the allocate() or reallocate() call that returned baseAddr.
|
|||
|
|
//! \param newSize The new memory size required (in bytes).
|
|||
|
|
//!
|
|||
|
|
//! \return The address of the reallocated memory, or nullptr. If a non-null address is returned, it is
|
|||
|
|
//! guaranteed to have the specified alignment.
|
|||
|
|
//!
|
|||
|
|
//! \note The implementation must guarantee thread safety for concurrent allocate/reallocate/deallocate
|
|||
|
|
//! requests.
|
|||
|
|
//!
|
|||
|
|
//! \usage
|
|||
|
|
//! - Allowed context for the API call
|
|||
|
|
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
|
|||
|
|
//!
|
|||
|
|
virtual void* reallocate(void* const /*baseAddr*/, uint64_t /*alignment*/, uint64_t /*newSize*/) noexcept
|
|||
|
|
{
|
|||
|
|
return nullptr;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief A thread-safe callback implemented by the application to handle release of GPU memory.
|
|||
|
|
//!
|
|||
|
|
//! TensorRT may pass a nullptr to this function if it was previously returned by allocate().
|
|||
|
|
//!
|
|||
|
|
//! \param memory A memory address that was previously returned by an allocate() or reallocate() call of the same
|
|||
|
|
//! allocator object.
|
|||
|
|
//!
|
|||
|
|
//! \return True if the acquired memory is released successfully.
|
|||
|
|
//!
|
|||
|
|
//! \note The implementation must guarantee thread safety for concurrent allocate/reallocate/deallocate
|
|||
|
|
//! requests.
|
|||
|
|
//!
|
|||
|
|
//! \usage
|
|||
|
|
//! - Allowed context for the API call
|
|||
|
|
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.0. Superseded by deallocateAsync
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED virtual bool deallocate(void* const memory) noexcept = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief A thread-safe callback implemented by the application to handle stream-ordered acquisition of GPU memory.
|
|||
|
|
//!
|
|||
|
|
//! The default behavior is to call method allocate(), which is synchronous and thus loses
|
|||
|
|
//! any performance benefits of asynchronous allocation. If you want the benefits of asynchronous
|
|||
|
|
//! allocation, see discussion of IGpuAsyncAllocator vs. IGpuAllocator in the documentation
|
|||
|
|
//! for nvinfer1::IGpuAllocator.
|
|||
|
|
//!
|
|||
|
|
//! \param size The size of the memory block required (in bytes).
|
|||
|
|
//! \param alignment The required alignment of memory. Alignment will be zero
|
|||
|
|
//! or a power of 2 not exceeding the alignment guaranteed by cudaMalloc.
|
|||
|
|
//! Thus this allocator can be safely implemented with cudaMalloc/cudaFree.
|
|||
|
|
//! An alignment value of zero indicates any alignment is acceptable.
|
|||
|
|
//! \param flags Reserved for future use. In the current release, 0 will be passed.
|
|||
|
|
//! \param stream specifies the cudaStream for asynchronous usage.
|
|||
|
|
//!
|
|||
|
|
//! \return If the allocation was successful, the start address of a device memory block of the requested size.
|
|||
|
|
//! If an allocation request of size 0 is made, nullptr must be returned.
|
|||
|
|
//! If an allocation request cannot be satisfied, nullptr must be returned.
|
|||
|
|
//! If a non-null address is returned, it is guaranteed to have the specified alignment.
|
|||
|
|
//!
|
|||
|
|
//! \note The implementation must guarantee thread safety for concurrent allocate/reallocate/deallocate
|
|||
|
|
//! requests.
|
|||
|
|
//!
|
|||
|
|
//! \usage
|
|||
|
|
//! - Allowed context for the API call
|
|||
|
|
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
|
|||
|
|
//!
|
|||
|
|
virtual void* allocateAsync(
|
|||
|
|
uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t /*stream*/) noexcept
|
|||
|
|
{
|
|||
|
|
return allocate(size, alignment, flags);
|
|||
|
|
}
|
|||
|
|
//!
|
|||
|
|
//! \brief A thread-safe callback implemented by the application to handle stream-ordered release of GPU memory.
|
|||
|
|
//!
|
|||
|
|
//! The default behavior is to call method deallocate(), which is synchronous and thus loses
|
|||
|
|
//! any performance benefits of asynchronous deallocation. If you want the benefits of asynchronous
|
|||
|
|
//! deallocation, see discussion of IGpuAsyncAllocator vs. IGpuAllocator in the documentation
|
|||
|
|
//! for nvinfer1::IGpuAllocator.
|
|||
|
|
//!
|
|||
|
|
//! TensorRT may pass a nullptr to this function if it was previously returned by allocate().
|
|||
|
|
//!
|
|||
|
|
//! \param memory A memory address that was previously returned by an allocate() or reallocate() call of the same
|
|||
|
|
//! allocator object.
|
|||
|
|
//! \param stream specifies the cudaStream for asynchronous usage.
|
|||
|
|
//!
|
|||
|
|
//! \return True if the acquired memory is released successfully.
|
|||
|
|
//!
|
|||
|
|
//! \note The implementation must guarantee thread safety for concurrent allocate/reallocate/deallocate
|
|||
|
|
//! requests.
|
|||
|
|
//!
|
|||
|
|
//! \note The implementation is not required to be asynchronous. It is permitted to synchronize,
|
|||
|
|
//! albeit doing so will lose the performance advantage of asynchronous deallocation.
|
|||
|
|
//! Either way, it is critical that it not actually free the memory until the current
|
|||
|
|
//! stream position is reached.
|
|||
|
|
//!
|
|||
|
|
//! \usage
|
|||
|
|
//! - Allowed context for the API call
|
|||
|
|
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
|
|||
|
|
//!
|
|||
|
|
virtual bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept
|
|||
|
|
{
|
|||
|
|
return deallocate(memory);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return version information associated with this interface. Applications must not override this method.
|
|||
|
|
//!
|
|||
|
|
InterfaceInfo getInterfaceInfo() const noexcept override
|
|||
|
|
{
|
|||
|
|
return {"IGpuAllocator", 1, 0};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
protected:
|
|||
|
|
// @cond SuppressDoxyWarnings
|
|||
|
|
IGpuAllocator(IGpuAllocator const&) = default;
|
|||
|
|
IGpuAllocator(IGpuAllocator&&) = default;
|
|||
|
|
IGpuAllocator& operator=(IGpuAllocator const&) & = default;
|
|||
|
|
IGpuAllocator& operator=(IGpuAllocator&&) & = default;
|
|||
|
|
// @endcond
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
} // namespace v_1_0
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IGpuAllocator
|
|||
|
|
//!
|
|||
|
|
//! \brief Application-implemented class for controlling allocation on the GPU.
|
|||
|
|
//!
|
|||
|
|
//! \warning The lifetime of an IGpuAllocator object must exceed that of all objects that use it.
|
|||
|
|
//!
|
|||
|
|
//! This class is intended as a base class for allocators that implement synchronous allocation.
|
|||
|
|
//! If you want the benefits of asynchronous allocation, you can do either of:
|
|||
|
|
//!
|
|||
|
|
//! * Derive your class from IGpuAllocator and override all four of its virtual methods
|
|||
|
|
//! for allocation/deallocation, including the two deprecated methods.
|
|||
|
|
//!
|
|||
|
|
//! * Derive your class from IGpuAsyncAllocator and override its two pure virtual
|
|||
|
|
//! methods for allocation/deallocation.
|
|||
|
|
//!
|
|||
|
|
//! The latter style is preferred because it does not tie code to deprecated methods.
|
|||
|
|
//!
|
|||
|
|
//! \see IGpuAsyncAllocator.
|
|||
|
|
//!
|
|||
|
|
using IGpuAllocator = v_1_0::IGpuAllocator;
|
|||
|
|
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IRuntime
|
|||
|
|
//!
|
|||
|
|
//! \brief Allows a serialized functionally unsafe engine to be deserialized.
|
|||
|
|
//!
|
|||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
|||
|
|
//!
|
|||
|
|
class IRuntime : public INoCopy
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
virtual ~IRuntime() noexcept = default;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Sets the DLA core used by the network. Defaults to -1.
|
|||
|
|
//!
|
|||
|
|
//! \param dlaCore The DLA core to execute the engine on, in the range [0,getNbDlaCores()).
|
|||
|
|
//!
|
|||
|
|
//! This function is used to specify which DLA core to use via indexing, if multiple DLA cores are available.
|
|||
|
|
//!
|
|||
|
|
//! \warning if getNbDLACores() returns 0, then this function does nothing.
|
|||
|
|
//!
|
|||
|
|
//! \see getDLACore()
|
|||
|
|
//!
|
|||
|
|
void setDLACore(int32_t dlaCore) noexcept
|
|||
|
|
{
|
|||
|
|
mImpl->setDLACore(dlaCore);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the DLA core that the engine executes on.
|
|||
|
|
//!
|
|||
|
|
//! \return assigned DLA core or -1 for DLA not present or unset.
|
|||
|
|
//!
|
|||
|
|
int32_t getDLACore() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getDLACore();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Returns number of DLA hardware cores accessible or 0 if DLA is unavailable.
|
|||
|
|
//!
|
|||
|
|
int32_t getNbDLACores() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getNbDLACores();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the GPU allocator.
|
|||
|
|
//!
|
|||
|
|
//! \param allocator Set the GPU allocator to be used by the runtime. All GPU memory acquired will use this
|
|||
|
|
//! allocator. If NULL is passed, the default allocator will be used.
|
|||
|
|
//!
|
|||
|
|
//! Default: allocateAsync uses cudaMallocAsync if cudaDevAttrMemoryPoolsSupported returns true, otherwise falls
|
|||
|
|
//! back to cudaMalloc. allocate always uses cudaMalloc.
|
|||
|
|
//!
|
|||
|
|
//! If nullptr is passed, the default allocator will be used.
|
|||
|
|
//!
|
|||
|
|
void setGpuAllocator(IGpuAllocator* allocator) noexcept
|
|||
|
|
{
|
|||
|
|
mImpl->setGpuAllocator(allocator);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the ErrorRecorder for this interface
|
|||
|
|
//!
|
|||
|
|
//! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
|
|||
|
|
//! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
|
|||
|
|
//! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
|
|||
|
|
//! a recorder has been registered.
|
|||
|
|
//!
|
|||
|
|
//! If an error recorder is not set, messages will be sent to the global log stream.
|
|||
|
|
//!
|
|||
|
|
//! \param recorder The error recorder to register with this interface.
|
|||
|
|
//
|
|||
|
|
//! \see getErrorRecorder()
|
|||
|
|
//!
|
|||
|
|
void setErrorRecorder(IErrorRecorder* recorder) noexcept
|
|||
|
|
{
|
|||
|
|
mImpl->setErrorRecorder(recorder);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief get the ErrorRecorder assigned to this interface.
|
|||
|
|
//!
|
|||
|
|
//! Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
|
|||
|
|
//! an error handler has not been set.
|
|||
|
|
//!
|
|||
|
|
//! \return A pointer to the IErrorRecorder object that has been registered.
|
|||
|
|
//!
|
|||
|
|
//! \see setErrorRecorder()
|
|||
|
|
//!
|
|||
|
|
IErrorRecorder* getErrorRecorder() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getErrorRecorder();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Deserialize an engine from host memory.
|
|||
|
|
//!
|
|||
|
|
//! If an error recorder has been set for the runtime, it will also be passed to the engine.
|
|||
|
|
//!
|
|||
|
|
//! \warning Destroying the IRuntime before destroying all associated ICudaEngine instances results in undefined
|
|||
|
|
//! behavior.
|
|||
|
|
//!
|
|||
|
|
//! \param blob The memory that holds the serialized engine.
|
|||
|
|
//! \param size The size of the memory.
|
|||
|
|
//!
|
|||
|
|
//! \return The engine, or nullptr if it could not be deserialized.
|
|||
|
|
//!
|
|||
|
|
ICudaEngine* deserializeCudaEngine(void const* blob, std::size_t size) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->deserializeCudaEngine(blob, size);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Deserialize an engine from a stream.
|
|||
|
|
//!
|
|||
|
|
//! If an error recorder has been set for the runtime, it will also be passed to the
|
|||
|
|
//! engine.
|
|||
|
|
//!
|
|||
|
|
//! This deserialization path will reduce host memory usage when weight streaming is enabled.
|
|||
|
|
//!
|
|||
|
|
//! \warning Destroying the IRuntime before destroying all associated ICudaEngine instances results in undefined
|
|||
|
|
//! behavior.
|
|||
|
|
//!
|
|||
|
|
//! \param streamReader a read-only stream from which TensorRT will deserialize a
|
|||
|
|
//! previously serialized engine.
|
|||
|
|
//!
|
|||
|
|
//! \return The engine, or nullptr if it could not be deserialized.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.7. Superseded by deserializeCudaEngine that takes an IStreamReaderV2
|
|||
|
|
//! instead of IStreamReader.
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED ICudaEngine* deserializeCudaEngine(IStreamReader& streamReader)
|
|||
|
|
{
|
|||
|
|
return mImpl->deserializeCudaEngine(streamReader);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Deserialize an engine from a stream. IStreamReaderV2 is expected to support reading to both host and
|
|||
|
|
//! device pointers.
|
|||
|
|
//!
|
|||
|
|
//! If an error recorder has been set for the runtime, it will also be passed to the
|
|||
|
|
//! engine.
|
|||
|
|
//!
|
|||
|
|
//! This deserialization path will reduce engine load time when applied with GDS (GPU Direct storage), or when
|
|||
|
|
//! weight streaming is enabled.
|
|||
|
|
//!
|
|||
|
|
//! \warning Destroying the IRuntime before destroying all associated ICudaEngine instances results in undefined
|
|||
|
|
//! behavior.
|
|||
|
|
//!
|
|||
|
|
//! \param streamReader a read-only stream from which TensorRT will deserialize a previously serialized engine.
|
|||
|
|
//!
|
|||
|
|
//! \return The engine, or nullptr if it could not be deserialized. The pointer may not be valid immediately after
|
|||
|
|
//! the function returns.
|
|||
|
|
//!
|
|||
|
|
ICudaEngine* deserializeCudaEngine(IStreamReaderV2& streamReader)
|
|||
|
|
{
|
|||
|
|
return mImpl->deserializeCudaEngineV2(streamReader);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief get the logger with which the runtime was created
|
|||
|
|
//!
|
|||
|
|
//! \return the logger
|
|||
|
|
//!
|
|||
|
|
ILogger* getLogger() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getLogger();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the maximum number of threads.
|
|||
|
|
//!
|
|||
|
|
//! \param maxThreads The maximum number of threads that can be used by the runtime.
|
|||
|
|
//! \return True if successful, false otherwise.
|
|||
|
|
//!
|
|||
|
|
//! The default value is 1 and includes the current thread.
|
|||
|
|
//! A value greater than 1 permits TensorRT to use multi-threaded algorithms.
|
|||
|
|
//! A value less than 1 triggers a kINVALID_ARGUMENT error.
|
|||
|
|
//!
|
|||
|
|
bool setMaxThreads(int32_t maxThreads) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setMaxThreads(maxThreads);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the maximum number of threads that can be used by the runtime.
|
|||
|
|
//!
|
|||
|
|
//! Retrieves the maximum number of threads that can be used by the runtime.
|
|||
|
|
//!
|
|||
|
|
//! \return The maximum number of threads that can be used by the runtime.
|
|||
|
|
//!
|
|||
|
|
//! \see setMaxThreads()
|
|||
|
|
//!
|
|||
|
|
int32_t getMaxThreads() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getMaxThreads();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the directory that will be used by this runtime for temporary files.
|
|||
|
|
//!
|
|||
|
|
//! On some platforms the TensorRT runtime may need to create and use temporary files
|
|||
|
|
//! with read/write/execute permissions to implement runtime functionality.
|
|||
|
|
//!
|
|||
|
|
//! \param path Path to the temporary directory for use, or nullptr.
|
|||
|
|
//!
|
|||
|
|
//! If path is nullptr, then TensorRT will use platform-specific heuristics to pick
|
|||
|
|
//! a default temporary directory if required:
|
|||
|
|
//!
|
|||
|
|
//! - On UNIX/Linux platforms, TensorRT will first try the TMPDIR environment variable, then fall back to /tmp
|
|||
|
|
//! - On Windows, TensorRT will try the TEMP environment variable.
|
|||
|
|
//!
|
|||
|
|
//! See the TensorRT Developer Guide for more information.
|
|||
|
|
//!
|
|||
|
|
//! The default value is nullptr.
|
|||
|
|
//!
|
|||
|
|
//! \warning If path is not nullptr, it must be a non-empty string representing a relative
|
|||
|
|
//! or absolute path in the format expected by the host operating system.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string path must be null-terminated, and be at most 4096 bytes including the
|
|||
|
|
//! terminator. Note that the operating system may have stricter path length requirements.
|
|||
|
|
//!
|
|||
|
|
//! \warning The process using TensorRT must have rwx permissions for the temporary directory,
|
|||
|
|
//! and the directory shall be configured to disallow other users from modifying created files
|
|||
|
|
//! (e.g. on Linux, if the directory is shared with other users, the sticky bit must be set).
|
|||
|
|
//!
|
|||
|
|
//! \see getTemporaryDirectory()
|
|||
|
|
//!
|
|||
|
|
void setTemporaryDirectory(char const* path) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setTemporaryDirectory(path);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the directory that will be used by this runtime for temporary files.
|
|||
|
|
//!
|
|||
|
|
//! \returns A path to the temporary directory in use, or nullptr if no path is specified.
|
|||
|
|
//!
|
|||
|
|
//! \see setTemporaryDirectory()
|
|||
|
|
char const* getTemporaryDirectory() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTemporaryDirectory();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the tempfile control flags for this runtime.
|
|||
|
|
//!
|
|||
|
|
//! \param flags The flags to set.
|
|||
|
|
//!
|
|||
|
|
//! The default value is all flags set, i.e.
|
|||
|
|
//!
|
|||
|
|
//! (1U << static_cast<uint32_t>(kALLOW_IN_MEMORY_FILES)) | (1U << static_cast<uint32_t>(kALLOW_TEMPORARY_FILES))
|
|||
|
|
//!
|
|||
|
|
//! \see TempfileControlFlag, TempfileControlFlags, getTempfileControlFlags()
|
|||
|
|
//!
|
|||
|
|
void setTempfileControlFlags(TempfileControlFlags flags) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setTempfileControlFlags(flags);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the tempfile control flags for this runtime.
|
|||
|
|
//!
|
|||
|
|
//! \return The flags currently set.
|
|||
|
|
//!
|
|||
|
|
//! \see TempfileControlFlag, TempfileControlFlags, setTempfileControlFlags()
|
|||
|
|
//!
|
|||
|
|
TempfileControlFlags getTempfileControlFlags() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTempfileControlFlags();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the local plugin registry that can be used by the runtime.
|
|||
|
|
//!
|
|||
|
|
//! \return The local plugin registry that can be used by the runtime.
|
|||
|
|
//!
|
|||
|
|
IPluginRegistry& getPluginRegistry() noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getPluginRegistry();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Load IRuntime from the file.
|
|||
|
|
//!
|
|||
|
|
//! This method loads a runtime library from a shared library file. The runtime can then be used to execute
|
|||
|
|
//! a plan file built with BuilderFlag::kVERSION_COMPATIBLE and BuilderFlag::kEXCLUDE_LEAN_RUNTIME both set
|
|||
|
|
//! and built with the same version of TensorRT as the loaded runtime library.
|
|||
|
|
//!
|
|||
|
|
//! \param path Path to the runtime lean library.
|
|||
|
|
//!
|
|||
|
|
//! \return the runtime library, or nullptr if it could not be loaded
|
|||
|
|
//!
|
|||
|
|
//! \warning The path string must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
IRuntime* loadRuntime(char const* path) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->loadRuntime(path);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set whether the runtime is allowed to deserialize engines with host executable code.
|
|||
|
|
//!
|
|||
|
|
//! \param allowed Whether the runtime is allowed to deserialize engines with host executable code.
|
|||
|
|
//!
|
|||
|
|
//! The default value is false.
|
|||
|
|
//!
|
|||
|
|
void setEngineHostCodeAllowed(bool allowed) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setEngineHostCodeAllowed(allowed);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get whether the runtime is allowed to deserialize engines with host executable code.
|
|||
|
|
//!
|
|||
|
|
//! \return Whether the runtime is allowed to deserialize engines with host executable code.
|
|||
|
|
//!
|
|||
|
|
bool getEngineHostCodeAllowed() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getEngineHostCodeAllowed();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
protected:
|
|||
|
|
apiv::VRuntime* mImpl;
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IRefitter
|
|||
|
|
//!
|
|||
|
|
//! \brief Updates weights in an engine.
|
|||
|
|
//!
|
|||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
|||
|
|
//!
|
|||
|
|
class IRefitter : public INoCopy
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
virtual ~IRefitter() noexcept = default;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Specify new weights for a layer of given name.
|
|||
|
|
//! Returns true on success, or false if new weights are rejected.
|
|||
|
|
//! Possible reasons for rejection are:
|
|||
|
|
//!
|
|||
|
|
//! * There is no such layer by that name.
|
|||
|
|
//! * The layer does not have weights with the specified role.
|
|||
|
|
//! * The count of weights is inconsistent with the layer’s original specification.
|
|||
|
|
//! * The type of weights is inconsistent with the layer’s original specification.
|
|||
|
|
//!
|
|||
|
|
//! Modifying the weights before method refitCudaEngine or refitCudaEngineAsync returns will result in undefined
|
|||
|
|
//! behavior.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string layerName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
bool setWeights(char const* layerName, WeightsRole role, Weights weights) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setWeights(layerName, role, weights);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Refits associated engine.
|
|||
|
|
//!
|
|||
|
|
//! \return True on success, or false if new weights validation fails or getMissingWeights() != 0 before the call.
|
|||
|
|
//! If false is returned, a subset of weights may have been refitted.
|
|||
|
|
//!
|
|||
|
|
//! The behavior is undefined if the engine has pending enqueued work.
|
|||
|
|
//! Provided weights on CPU or GPU can be unset and released, or updated after refitCudaEngine returns.
|
|||
|
|
//!
|
|||
|
|
//! IExecutionContexts associated with the engine remain valid for use afterwards. There is no need to set the same
|
|||
|
|
//! weights repeatedly for multiple refit calls as the weights memory can be updated directly instead.
|
|||
|
|
//!
|
|||
|
|
bool refitCudaEngine() noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->refitCudaEngine();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get description of missing weights.
|
|||
|
|
//!
|
|||
|
|
//! For example, if some Weights have been set, but the engine was optimized
|
|||
|
|
//! in a way that combines weights, any unsupplied Weights in the combination
|
|||
|
|
//! are considered missing.
|
|||
|
|
//!
|
|||
|
|
//! \param size The number of items that can be safely written to a non-null layerNames or roles.
|
|||
|
|
//! \param layerNames Where to write the layer names.
|
|||
|
|
//! \param roles Where to write the weights roles.
|
|||
|
|
//!
|
|||
|
|
//! \return The number of missing Weights.
|
|||
|
|
//!
|
|||
|
|
//! If layerNames!=nullptr, each written pointer points to a string owned by
|
|||
|
|
//! the engine being refit, and becomes invalid when the engine is destroyed.
|
|||
|
|
//!
|
|||
|
|
int32_t getMissing(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getMissing(size, layerNames, roles);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get description of all weights that could be refit.
|
|||
|
|
//!
|
|||
|
|
//! \param size The number of items that can be safely written to a non-null layerNames or roles.
|
|||
|
|
//! \param layerNames Where to write the layer names.
|
|||
|
|
//! \param roles Where to write the weights roles.
|
|||
|
|
//!
|
|||
|
|
//! \return The number of Weights that could be refit.
|
|||
|
|
//!
|
|||
|
|
//! If layerNames!=nullptr, each written pointer points to a string owned by
|
|||
|
|
//! the engine being refit, and becomes invalid when the engine is destroyed.
|
|||
|
|
//!
|
|||
|
|
int32_t getAll(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getAll(size, layerNames, roles);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! Update dynamic range for a tensor.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an ITensor in the network.
|
|||
|
|
//! \param min The minimum of the dynamic range for the tensor.
|
|||
|
|
//! \param max The maximum of the dynamic range for the tensor.
|
|||
|
|
//!
|
|||
|
|
//! \return True if successful; false otherwise.
|
|||
|
|
//!
|
|||
|
|
//! Returns false if there is no Int8 engine tensor derived from
|
|||
|
|
//! a network tensor of that name. If successful, then getMissing
|
|||
|
|
//! may report that some weights need to be supplied.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED bool setDynamicRange(char const* tensorName, float min, float max) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setDynamicRange(tensorName, min, max);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get minimum of dynamic range.
|
|||
|
|
//!
|
|||
|
|
//! \return Minimum of dynamic range.
|
|||
|
|
//!
|
|||
|
|
//! If the dynamic range was never set, returns the minimum computed during calibration.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED float getDynamicRangeMin(char const* tensorName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getDynamicRangeMin(tensorName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get maximum of dynamic range.
|
|||
|
|
//!
|
|||
|
|
//! \return Maximum of dynamic range.
|
|||
|
|
//!
|
|||
|
|
//! If the dynamic range was never set, returns the maximum computed during calibration.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED float getDynamicRangeMax(char const* tensorName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getDynamicRangeMax(tensorName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get names of all tensors that have refittable dynamic ranges.
|
|||
|
|
//!
|
|||
|
|
//! \param size The number of items that can be safely written to a non-null tensorNames.
|
|||
|
|
//! \param tensorNames Where to write the layer names.
|
|||
|
|
//!
|
|||
|
|
//! \return The number of Weights that could be refit.
|
|||
|
|
//!
|
|||
|
|
//! If tensorNames!=nullptr, each written pointer points to a string owned by
|
|||
|
|
//! the engine being refit, and becomes invalid when the engine is destroyed.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED int32_t getTensorsWithDynamicRange(int32_t size, char const** tensorNames) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTensorsWithDynamicRange(size, tensorNames);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the ErrorRecorder for this interface
|
|||
|
|
//!
|
|||
|
|
//! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
|
|||
|
|
//! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
|
|||
|
|
//! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
|
|||
|
|
//! a recorder has been registered.
|
|||
|
|
//!
|
|||
|
|
//! If an error recorder is not set, messages will be sent to the global log stream.
|
|||
|
|
//!
|
|||
|
|
//! \param recorder The error recorder to register with this interface.
|
|||
|
|
//
|
|||
|
|
//! \see getErrorRecorder()
|
|||
|
|
//!
|
|||
|
|
void setErrorRecorder(IErrorRecorder* recorder) noexcept
|
|||
|
|
{
|
|||
|
|
mImpl->setErrorRecorder(recorder);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the ErrorRecorder assigned to this interface.
|
|||
|
|
//!
|
|||
|
|
//! Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
|
|||
|
|
//! an error handler has not been set.
|
|||
|
|
//!
|
|||
|
|
//! \return A pointer to the IErrorRecorder object that has been registered.
|
|||
|
|
//!
|
|||
|
|
//! \see setErrorRecorder()
|
|||
|
|
//!
|
|||
|
|
IErrorRecorder* getErrorRecorder() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getErrorRecorder();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Specify new weights of given name.
|
|||
|
|
//!
|
|||
|
|
//! \param name The name of the weights to be refit.
|
|||
|
|
//! \param weights The new weights to associate with the name.
|
|||
|
|
//!
|
|||
|
|
//! Returns true on success, or false if new weights are rejected.
|
|||
|
|
//! Possible reasons for rejection are:
|
|||
|
|
//!
|
|||
|
|
//! * The name of weights is nullptr or does not correspond to any refittable weights.
|
|||
|
|
//! * The count of the weights is inconsistent with the count returned from calling getWeightsPrototype() with the
|
|||
|
|
//! same name.
|
|||
|
|
//! * The type of the weights is inconsistent with the type returned from calling getWeightsPrototype() with the
|
|||
|
|
//! same name.
|
|||
|
|
//!
|
|||
|
|
//! Modifying the weights before method refitCudaEngine or refitCudaEngineAsync returns will result in undefined
|
|||
|
|
//! behavior.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
bool setNamedWeights(char const* name, Weights weights) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setNamedWeights(name, weights);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get names of missing weights.
|
|||
|
|
//!
|
|||
|
|
//! For example, if some Weights have been set, but the engine was optimized
|
|||
|
|
//! in a way that combines weights, any unsupplied Weights in the combination
|
|||
|
|
//! are considered missing.
|
|||
|
|
//!
|
|||
|
|
//! \param size The number of weights names that can be safely written to.
|
|||
|
|
//! \param weightsNames The names of the weights to be updated, or nullptr for unnamed weights.
|
|||
|
|
//!
|
|||
|
|
//! \return The number of missing Weights.
|
|||
|
|
//!
|
|||
|
|
//! If layerNames!=nullptr, each written pointer points to a string owned by
|
|||
|
|
//! the engine being refit, and becomes invalid when the engine is destroyed.
|
|||
|
|
//!
|
|||
|
|
int32_t getMissingWeights(int32_t size, char const** weightsNames) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getMissingWeights(size, weightsNames);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get names of all weights that could be refit.
|
|||
|
|
//!
|
|||
|
|
//! \param size The number of weights names that can be safely written to.
|
|||
|
|
//! \param weightsNames The names of the weights to be updated, or nullptr for unnamed weights.
|
|||
|
|
//!
|
|||
|
|
//! \return The number of Weights that could be refit.
|
|||
|
|
//!
|
|||
|
|
//! If layerNames!=nullptr, each written pointer points to a string owned by
|
|||
|
|
//! the engine being refit, and becomes invalid when the engine is destroyed.
|
|||
|
|
//!
|
|||
|
|
int32_t getAllWeights(int32_t size, char const** weightsNames) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getAllWeights(size, weightsNames);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief get the logger with which the refitter was created
|
|||
|
|
//!
|
|||
|
|
//! \return the logger
|
|||
|
|
//!
|
|||
|
|
ILogger* getLogger() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getLogger();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the maximum number of threads.
|
|||
|
|
//!
|
|||
|
|
//! \param maxThreads The maximum number of threads that can be used by the refitter.
|
|||
|
|
//!
|
|||
|
|
//! \return True if successful, false otherwise.
|
|||
|
|
//!
|
|||
|
|
//! The default value is 1 and includes the current thread.
|
|||
|
|
//! A value greater than 1 permits TensorRT to use multi-threaded algorithms.
|
|||
|
|
//! A value less than 1 triggers a kINVALID_ARGUMENT error.
|
|||
|
|
//!
|
|||
|
|
bool setMaxThreads(int32_t maxThreads) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setMaxThreads(maxThreads);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief get the maximum number of threads that can be used by the refitter.
|
|||
|
|
//!
|
|||
|
|
//! Retrieves the maximum number of threads that can be used by the refitter.
|
|||
|
|
//!
|
|||
|
|
//! \return The maximum number of threads that can be used by the refitter.
|
|||
|
|
//!
|
|||
|
|
//! \see setMaxThreads()
|
|||
|
|
//!
|
|||
|
|
int32_t getMaxThreads() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getMaxThreads();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Specify new weights on a specified device of given name.
|
|||
|
|
//!
|
|||
|
|
//! \param name The name of the weights to be refitted.
|
|||
|
|
//! \param weights The new weights on the specified device.
|
|||
|
|
//! \param location The location (host vs. device) of the new weights.
|
|||
|
|
//!
|
|||
|
|
//! \return True on success, or false if new weights are rejected.
|
|||
|
|
//! Possible reasons for rejection are:
|
|||
|
|
//!
|
|||
|
|
//! * The name of the weights is nullptr or does not correspond to any refittable weights.
|
|||
|
|
//! * The count of the weights is inconsistent with the count returned from calling getWeightsPrototype() with the
|
|||
|
|
//! same name.
|
|||
|
|
//! * The type of the weights is inconsistent with the type returned from calling getWeightsPrototype() with the
|
|||
|
|
//! same name.
|
|||
|
|
//!
|
|||
|
|
//! It is allowed to provide some weights on CPU and others on GPU.
|
|||
|
|
//! Modifying the weights before the method refitCudaEngine() or refitCudaEngineAsync() completes will result in
|
|||
|
|
//! undefined behavior.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
bool setNamedWeights(char const* name, Weights weights, TensorLocation location) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setNamedWeightsWithLocation(name, weights, location);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get weights associated with the given name.
|
|||
|
|
//!
|
|||
|
|
//! \param weightsName The name of the weights to be refitted.
|
|||
|
|
//!
|
|||
|
|
//! \return Weights associated with the given name.
|
|||
|
|
//!
|
|||
|
|
//! If the weights were never set, returns null weights and reports an error to the refitter errorRecorder.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string weightsName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
Weights getNamedWeights(char const* weightsName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getNamedWeights(weightsName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get location for the weights associated with the given name.
|
|||
|
|
//!
|
|||
|
|
//! \param weightsName The name of the weights to be refitted.
|
|||
|
|
//!
|
|||
|
|
//! \return Location for the weights associated with the given name.
|
|||
|
|
//!
|
|||
|
|
//! If the weights were never set, returns TensorLocation::kHOST and reports an error to the refitter errorRecorder.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string weightsName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
TensorLocation getWeightsLocation(char const* weightsName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getWeightsLocation(weightsName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Unset weights associated with the given name.
|
|||
|
|
//!
|
|||
|
|
//! \param weightsName The name of the weights to be refitted.
|
|||
|
|
//!
|
|||
|
|
//! \return False if the weights were never set, returns true otherwise.
|
|||
|
|
//!
|
|||
|
|
//! Unset weights before releasing them.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string weightsName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
bool unsetNamedWeights(char const* weightsName) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->unsetNamedWeights(weightsName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set whether to validate weights during refitting.
|
|||
|
|
//!
|
|||
|
|
//! \param weightsValidation Indicate whether to validate weights during refitting.
|
|||
|
|
//!
|
|||
|
|
//! When set to true, TensorRT will validate weights during FP32 to FP16/BF16 weights conversions or
|
|||
|
|
//! sparsifying weights in the refit call. If provided weights are not proper for some weights transformations,
|
|||
|
|
//! TensorRT will issue a warning and continue the transformation for minor issues (such as overflow during
|
|||
|
|
//! narrowing conversion), or issue an error and stop the refitting process for severe issues (such as sparsifying
|
|||
|
|
//! dense weights). By default the flag is true. Set the flag to false for faster refitting performance.
|
|||
|
|
//!
|
|||
|
|
void setWeightsValidation(bool weightsValidation) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setWeightsValidation(weightsValidation);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get whether to validate weights values during refitting.
|
|||
|
|
//!
|
|||
|
|
bool getWeightsValidation() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getWeightsValidation();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Enqueue weights refitting of the associated engine on the given stream.
|
|||
|
|
//!
|
|||
|
|
//! \param stream The stream to enqueue the weights updating task.
|
|||
|
|
//!
|
|||
|
|
//! \return True on success, or false if new weights validation fails or getMissingWeights() != 0 before the call.
|
|||
|
|
//! If false is returned, a subset of weights may have been refitted.
|
|||
|
|
//!
|
|||
|
|
//! The behavior is undefined if the engine has pending enqueued work on a different stream from the provided one.
|
|||
|
|
//! Provided weights on CPU can be unset and released, or updated after refitCudaEngineAsync returns.
|
|||
|
|
//! Freeing or updating of the provided weights on GPU can be enqueued on the same stream after refitCudaEngineAsync
|
|||
|
|
//! returns.
|
|||
|
|
//!
|
|||
|
|
//! IExecutionContexts associated with the engine remain valid for use afterwards. There is no need to set the same
|
|||
|
|
//! weights repeatedly for multiple refit calls as the weights memory can be updated directly instead. The weights
|
|||
|
|
//! updating task should use the same stream as the one used for the refit call.
|
|||
|
|
//!
|
|||
|
|
bool refitCudaEngineAsync(cudaStream_t stream) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->refitCudaEngineAsync(stream);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the Weights prototype associated with the given name.
|
|||
|
|
//!
|
|||
|
|
//! \param weightsName The name of the weights to be refitted.
|
|||
|
|
//!
|
|||
|
|
//! \return Weights prototype associated with the given name.
|
|||
|
|
//!
|
|||
|
|
//! The type and count of weights prototype is the same as weights used for engine building. The values property
|
|||
|
|
//! is nullptr for weights prototypes. The count of the weights prototype is -1 when the name of the weights is
|
|||
|
|
//! nullptr or does not correspond to any refittable weights.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string weightsName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
Weights getWeightsPrototype(char const* weightsName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getWeightsPrototype(weightsName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
protected:
|
|||
|
|
apiv::VRefitter* mImpl;
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \enum OptProfileSelector
|
|||
|
|
//!
|
|||
|
|
//! \brief When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dimensions),
|
|||
|
|
//! select whether we are interested in the minimum, optimum, or maximum values for these parameters.
|
|||
|
|
//! The minimum and maximum specify the permitted range that is supported at runtime, while the optimum value
|
|||
|
|
//! is used for the kernel selection. This should be the "typical" value that is expected to occur at runtime.
|
|||
|
|
//!
|
|||
|
|
//! \see IOptimizationProfile::setDimensions(), IOptimizationProfile::setShapeValuesV2(), IOptimizationProfile::setShapeValues()
|
|||
|
|
//!
|
|||
|
|
enum class OptProfileSelector : int32_t
|
|||
|
|
{
|
|||
|
|
kMIN = 0, //!< This is used to set or get the minimum permitted value for dynamic dimensions etc.
|
|||
|
|
kOPT = 1, //!< This is used to set or get the value that is used in the optimization (kernel selection).
|
|||
|
|
kMAX = 2 //!< This is used to set or get the maximum permitted value for dynamic dimensions etc.
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Number of different values of OptProfileSelector enum.
|
|||
|
|
//!
|
|||
|
|
//! \see OptProfileSelector
|
|||
|
|
//!
|
|||
|
|
template <>
|
|||
|
|
constexpr inline int32_t EnumMax<OptProfileSelector>() noexcept
|
|||
|
|
{
|
|||
|
|
return 3;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IOptimizationProfile
|
|||
|
|
//! \brief Optimization profile for dynamic input dimensions and shape tensors.
|
|||
|
|
//!
|
|||
|
|
//! When building an ICudaEngine from an INetworkDefinition that has dynamically resizable inputs (at least
|
|||
|
|
//! one input tensor has one or more of its dimensions specified as -1) or shape input tensors, users need to specify
|
|||
|
|
//! at least one optimization profile. Optimization profiles are numbered 0, 1, ...
|
|||
|
|
//! The first optimization profile that has been defined (with index 0) will be used by the ICudaEngine whenever no
|
|||
|
|
//! optimization profile has been selected explicitly. If none of the inputs are dynamic, the default optimization
|
|||
|
|
//! profile will be generated automatically unless it is explicitly provided by the user (this is possible but not
|
|||
|
|
//! required in this case). If more than a single optimization profile is defined, users may set a target how
|
|||
|
|
//! much additional weight space should be maximally allocated to each additional profile (as a fraction of the
|
|||
|
|
//! maximum, unconstrained memory).
|
|||
|
|
//!
|
|||
|
|
//! Users set optimum input tensor dimensions, as well as minimum and maximum input tensor dimensions. The builder
|
|||
|
|
//! selects the kernels that result in the lowest runtime for the optimum input tensor dimensions, and are valid for
|
|||
|
|
//! all input tensor sizes in the valid range between minimum and maximum dimensions. A runtime error will be raised
|
|||
|
|
//! if the input tensor dimensions fall outside the valid range for this profile. Likewise, users provide minimum,
|
|||
|
|
//! optimum, and maximum values for all shape tensor input values.
|
|||
|
|
//!
|
|||
|
|
//! \see IBuilderConfig::addOptimizationProfile()
|
|||
|
|
//!
|
|||
|
|
class IOptimizationProfile : public INoCopy
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
|
|||
|
|
//!
|
|||
|
|
//! This function must be called three times (for the minimum, optimum, and maximum) for any network input tensor
|
|||
|
|
//! that has dynamic dimensions. If minDims, optDims, and maxDims are the minimum, optimum, and maximum dimensions,
|
|||
|
|
//! and networkDims are the dimensions for this input tensor that are provided to the INetworkDefinition object,
|
|||
|
|
//! then the following conditions must all hold:
|
|||
|
|
//!
|
|||
|
|
//! (1) minDims.nbDims == optDims.nbDims == maxDims.nbDims == networkDims.nbDims
|
|||
|
|
//! (2) 0 <= minDims.d[i] <= optDims.d[i] <= maxDims.d[i] for i = 0, ..., networkDims.nbDims-1
|
|||
|
|
//! (3) if networkDims.d[i] != -1, then minDims.d[i] == optDims.d[i] == maxDims.d[i] == networkDims.d[i]
|
|||
|
|
//!
|
|||
|
|
//! This function may (but need not be) called for an input tensor that does not have dynamic dimensions. In this
|
|||
|
|
//! case, the third argument must always equal networkDims.
|
|||
|
|
//!
|
|||
|
|
//! \param inputName The input tensor name
|
|||
|
|
//! \param select Whether to set the minimum, optimum, or maximum dimensions
|
|||
|
|
//! \param dims The minimum, optimum, or maximum dimensions for this input tensor
|
|||
|
|
//!
|
|||
|
|
//! \return false if an inconsistency was detected (e.g. the rank does not match another dimension that was
|
|||
|
|
//! previously set for the same input), true if no inconsistency was detected. Note that inputs can be
|
|||
|
|
//! validated only partially; a full validation is performed at engine build time.
|
|||
|
|
//!
|
|||
|
|
//! \warning If run on DLA, minimum, optimum, and maximum dimensions must to be the same.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
bool setDimensions(char const* inputName, OptProfileSelector select, Dims const& dims) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setDimensions(inputName, select, dims);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
|
|||
|
|
//!
|
|||
|
|
//! If the dimensions have not been previously set via setDimensions(), return an invalid Dims with nbDims == -1.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
Dims getDimensions(char const* inputName, OptProfileSelector select) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getDimensions(inputName, select);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the minimum / optimum / maximum values for an input shape tensor.
|
|||
|
|
//!
|
|||
|
|
//! This function must be called three times for every input tensor t that is a shape tensor (t.isShape() == true).
|
|||
|
|
//! This implies that the dimensions of t are fixed at network definition time and the volume does not exceed 64.
|
|||
|
|
//! This function must not be called for any input tensor that is not a shape tensor.
|
|||
|
|
//!
|
|||
|
|
//! Each time this function is called for the same input tensor, the same nbValues must be supplied (either 1
|
|||
|
|
//! if the tensor rank is 0, or dims.d[0] if the rank is 1). Furthermore, if minVals, optVals, maxVals are the
|
|||
|
|
//! minimum, optimum, and maximum values, it must be true that minVals[i] <= optVals[i] <= maxVals[i] for
|
|||
|
|
//! i = 0, ..., nbValues - 1. Execution of the network must be valid for the optVals.
|
|||
|
|
//!
|
|||
|
|
//! Shape tensors are tensors that contribute to shape calculations in some way. While input shape tensors can be
|
|||
|
|
//! type kINT32 or kINT64, the values used to set the minimum, optimum, and maximum values must fit in int32_t.
|
|||
|
|
//!
|
|||
|
|
//! Examples:
|
|||
|
|
//!
|
|||
|
|
//! * A shape tensor used as the second input to IShuffleLayer can contain a -1 wildcard.
|
|||
|
|
//! The corresponding minVal[i] should be -1.
|
|||
|
|
//!
|
|||
|
|
//! * A shape tensor used as the stride input to ISliceLayer can contain any valid strides.
|
|||
|
|
//! The values could be positive, negative, or zero.
|
|||
|
|
//!
|
|||
|
|
//! * A shape tensor subtracted from zero to compute the size input of an ISliceLayer can
|
|||
|
|
//! contain any non-positive values that yield a valid slice operation.
|
|||
|
|
//!
|
|||
|
|
//! Tightening the minVals and maxVals bounds to cover only values that are necessary may help optimization.
|
|||
|
|
//!
|
|||
|
|
//! \param inputName The input tensor name
|
|||
|
|
//! \param select Whether to set the minimum, optimum, or maximum input values.
|
|||
|
|
//! \param values An array of length nbValues containing the minimum, optimum, or maximum shape tensor elements.
|
|||
|
|
//! For multidimensional tensors, the array is in row-major order.
|
|||
|
|
//! \param nbValues The length of the value array, which must equal the number of shape tensor elements (>= 1)
|
|||
|
|
//!
|
|||
|
|
//! \return false if an inconsistency was detected (e.g. nbValues does not match a previous call for the same
|
|||
|
|
//! tensor), else true. As for setDimensions(), a full validation can only be performed at engine build
|
|||
|
|
//! time.
|
|||
|
|
//!
|
|||
|
|
//! \warning If run on DLA, minimum, optimum, and maximum shape values must to be the same.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
//! \warning When setShapeValuesV2 is called after setShapeValues, a following call to getShapeValues will
|
|||
|
|
//! return nullptr. Vice versa, a call to setShapeValues undoes the effects of setShapeValuesV2.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.11. Superseded by setShapeValuesV2().
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED bool setShapeValues(
|
|||
|
|
char const* inputName, OptProfileSelector select, int32_t const* values, int32_t nbValues) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setShapeValues(inputName, select, values, nbValues);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the number of values for an input shape tensor.
|
|||
|
|
//!
|
|||
|
|
//! This will return the number of shape values if setShapeValues() has been called before for this input tensor.
|
|||
|
|
//! Otherwise, return -1.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
int32_t getNbShapeValues(char const* inputName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getNbShapeValues(inputName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the minimum / optimum / maximum values for an input shape tensor.
|
|||
|
|
//!
|
|||
|
|
//! If the shape values have not been set previously with setShapeValues(), this returns nullptr.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.11. Superseded by getShapeValuesV2().
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED int32_t const* getShapeValues(char const* inputName, OptProfileSelector select) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getShapeValues(inputName, select);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set a target for extra GPU memory that may be used by this profile.
|
|||
|
|
//!
|
|||
|
|
//! \param target Additional memory that the builder should aim to maximally allocate for this profile, as a
|
|||
|
|
//! fraction of the memory it would use if the user did not impose any constraints on memory. This
|
|||
|
|
//! unconstrained case is the default; it corresponds to target == 1.0. If target == 0.0, the builder
|
|||
|
|
//! aims to create the new optimization profile without allocating any additional weight memory.
|
|||
|
|
//! Valid inputs lie between 0.0 and 1.0. This parameter is only a hint, and TensorRT does not guarantee
|
|||
|
|
//! that the target will be reached. This parameter is ignored for the first (default) optimization profile
|
|||
|
|
//! that is defined.
|
|||
|
|
//!
|
|||
|
|
//! \return true if the input is in the valid range (between 0 and 1 inclusive), else false.
|
|||
|
|
//!
|
|||
|
|
bool setExtraMemoryTarget(float target) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setExtraMemoryTarget(target);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the extra memory target that has been defined for this profile.
|
|||
|
|
//!
|
|||
|
|
//! This defaults to 1.0F.
|
|||
|
|
//!
|
|||
|
|
//! \return the valid value set by setExtraMemoryTarget or 1.0F.
|
|||
|
|
//!
|
|||
|
|
float getExtraMemoryTarget() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getExtraMemoryTarget();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Check whether the optimization profile can be passed to an IBuilderConfig object.
|
|||
|
|
//!
|
|||
|
|
//! This function performs partial validation, by e.g. checking that whenever one of the minimum, optimum, or
|
|||
|
|
//! maximum dimensions of a tensor have been set, the others have also been set and have the same rank, as
|
|||
|
|
//! well as checking that the optimum dimensions are always as least as large as the minimum dimensions, and
|
|||
|
|
//! that the maximum dimensions are at least as large as the optimum dimensions. Some validation steps require
|
|||
|
|
//! knowledge of the network definition and are deferred to engine build time.
|
|||
|
|
//!
|
|||
|
|
//!
|
|||
|
|
//! \return true if the optimization profile is valid and may be passed to an IBuilderConfig, else false.
|
|||
|
|
//!
|
|||
|
|
bool isValid() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->isValid();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the minimum / optimum / maximum values for an input shape tensor.
|
|||
|
|
//!
|
|||
|
|
//! This function must be called three times for every input tensor t that is a shape tensor (t.isShape() == true).
|
|||
|
|
//! This implies that the dimensions of t are fixed at network definition time and the volume does not exceed 64.
|
|||
|
|
//! This function must not be called for any input tensor that is not a shape tensor.
|
|||
|
|
//!
|
|||
|
|
//! Each time this function is called for the same input tensor, the same nbValues must be supplied (either 1
|
|||
|
|
//! if the tensor rank is 0, or dims.d[0] if the rank is 1). Furthermore, if minVals, optVals, maxVals are the
|
|||
|
|
//! minimum, optimum, and maximum values, it must be true that minVals[i] <= optVals[i] <= maxVals[i] for
|
|||
|
|
//! i = 0, ..., nbValues - 1. Execution of the network must be valid for the optVals.
|
|||
|
|
//!
|
|||
|
|
//! Shape tensors are tensors that contribute to shape calculations in some way. While input shape tensors can be
|
|||
|
|
//! type kINT32 or kINT64, the values used to set the minimum, optimum, and maximum values must fit in int64_t.
|
|||
|
|
//!
|
|||
|
|
//! Examples:
|
|||
|
|
//!
|
|||
|
|
//! * A shape tensor used as the second input to IShuffleLayer can contain a -1 wildcard.
|
|||
|
|
//! The corresponding minVal[i] should be -1.
|
|||
|
|
//!
|
|||
|
|
//! * A shape tensor used as the stride input to ISliceLayer can contain any valid strides.
|
|||
|
|
//! The values could be positive, negative, or zero.
|
|||
|
|
//!
|
|||
|
|
//! * A shape tensor subtracted from zero to compute the size input of an ISliceLayer can
|
|||
|
|
//! contain any non-positive values that yield a valid slice operation.
|
|||
|
|
//!
|
|||
|
|
//! Tightening the minVals and maxVals bounds to cover only values that are necessary may help optimization.
|
|||
|
|
//!
|
|||
|
|
//! \param inputName The input tensor name
|
|||
|
|
//! \param select Whether to set the minimum, optimum, or maximum input values.
|
|||
|
|
//! \param values An array of length nbValues containing the minimum, optimum, or maximum shape tensor elements.
|
|||
|
|
//! For multidimensional tensors, the array is in row-major order.
|
|||
|
|
//! \param nbValues The length of the value array, which must equal the number of shape tensor elements (>= 1)
|
|||
|
|
//!
|
|||
|
|
//! \return false if an inconsistency was detected (e.g. nbValues does not match a previous call for the same
|
|||
|
|
//! tensor), else true. As for setDimensions(), a full validation can only be performed at engine build
|
|||
|
|
//! time.
|
|||
|
|
//!
|
|||
|
|
//! \warning If run on DLA, minimum, optimum, and maximum shape values must to be the same.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
//! \warning When setShapeValues is called after setShapeValuesV2, input shape would be overwritten as 32 bit
|
|||
|
|
//! and getShapeValuesV2 would return nullptr.
|
|||
|
|
//!
|
|||
|
|
bool setShapeValuesV2(
|
|||
|
|
char const* inputName, OptProfileSelector select, int64_t const* values, int32_t nbValues) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setShapeValuesV2(inputName, select, values, nbValues);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the minimum / optimum / maximum values for an input shape tensor.
|
|||
|
|
//!
|
|||
|
|
//! If the shape values have not been set previously with setShapeValuesV2(), this returns nullptr.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
int64_t const* getShapeValuesV2(char const* inputName, OptProfileSelector select) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getShapeValuesV2(inputName, select);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
protected:
|
|||
|
|
apiv::VOptimizationProfile* mImpl;
|
|||
|
|
virtual ~IOptimizationProfile() noexcept = default;
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \enum TacticSource
|
|||
|
|
//!
|
|||
|
|
//! \brief List of tactic sources for TensorRT.
|
|||
|
|
//!
|
|||
|
|
//! \see TacticSources, IBuilderConfig::setTacticSources(), IBuilderConfig::getTacticSources()
|
|||
|
|
//!
|
|||
|
|
enum class TacticSource : int32_t
|
|||
|
|
{
|
|||
|
|
//! cuBLAS tactics. Disabled by default.
|
|||
|
|
//! \note Disabling kCUBLAS will cause the cuBLAS handle passed to plugins in attachToContext to be null.
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.0.
|
|||
|
|
kCUBLAS TRT_DEPRECATED_ENUM = 0,
|
|||
|
|
|
|||
|
|
//! cuBLAS LT tactics. Disabled by default.
|
|||
|
|
//! \deprecated Deprecated in TensorRT 9.0.
|
|||
|
|
kCUBLAS_LT TRT_DEPRECATED_ENUM = 1,
|
|||
|
|
|
|||
|
|
//! cuDNN tactics. Disabled by default.
|
|||
|
|
//! \note Disabling kCUDNN will cause the cuDNN handle passed to plugins in attachToContext to be null.
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.0.
|
|||
|
|
kCUDNN TRT_DEPRECATED_ENUM = 2,
|
|||
|
|
|
|||
|
|
//! Enables convolution tactics implemented with edge mask tables. These tactics tradeoff memory for performance by
|
|||
|
|
//! consuming additional memory space proportional to the input size.
|
|||
|
|
//! Enabled by default.
|
|||
|
|
kEDGE_MASK_CONVOLUTIONS = 3,
|
|||
|
|
|
|||
|
|
//! Enables convolution tactics implemented with source-code JIT fusion. The engine building time may increase
|
|||
|
|
//! when this is enabled. Enabled by default.
|
|||
|
|
kJIT_CONVOLUTIONS = 4,
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
template <>
|
|||
|
|
constexpr inline int32_t EnumMax<TacticSource>() noexcept
|
|||
|
|
{
|
|||
|
|
return 5;
|
|||
|
|
} //!< Maximum number of tactic sources in TacticSource enum. \see TacticSource
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Represents a collection of one or more TacticSource values
|
|||
|
|
//! combine using bitwise-OR operations.
|
|||
|
|
//!
|
|||
|
|
//! \see IBuilderConfig::setTacticSources(), IBuilderConfig::getTacticSources()
|
|||
|
|
//!
|
|||
|
|
using TacticSources = uint32_t;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \enum ProfilingVerbosity
|
|||
|
|
//!
|
|||
|
|
//! \brief List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
|
|||
|
|
//!
|
|||
|
|
//! \see IBuilderConfig::setProfilingVerbosity(),
|
|||
|
|
//! IBuilderConfig::getProfilingVerbosity(),
|
|||
|
|
//! IEngineInspector
|
|||
|
|
//!
|
|||
|
|
enum class ProfilingVerbosity : int32_t
|
|||
|
|
{
|
|||
|
|
kLAYER_NAMES_ONLY = 0, //!< Print only the layer names. This is the default setting.
|
|||
|
|
kNONE = 1, //!< Do not print any layer information.
|
|||
|
|
kDETAILED = 2, //!< Print detailed layer information including layer names and layer parameters.
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//! Maximum number of profile verbosity levels in ProfilingVerbosity enum. \see ProfilingVerbosity
|
|||
|
|
template <>
|
|||
|
|
constexpr inline int32_t EnumMax<ProfilingVerbosity>() noexcept
|
|||
|
|
{
|
|||
|
|
return 3;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Represents one or more SerializationFlag values using binary OR
|
|||
|
|
//! operations, e.g., 1U << SerializationFlag::kEXCLUDE_LEAN_RUNTIME
|
|||
|
|
//!
|
|||
|
|
//! \see ISerializationConfig::setFlags(), ISerializationConfig::getFlags()
|
|||
|
|
//!
|
|||
|
|
using SerializationFlags = uint32_t;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \enum SerializationFlag
|
|||
|
|
//!
|
|||
|
|
//! \brief List of valid flags that the engine can enable when serializing the bytes.
|
|||
|
|
//!
|
|||
|
|
//! \see ISerializationConfig::setFlags(), ISerializationConfig::getFlags()
|
|||
|
|
//!
|
|||
|
|
enum class SerializationFlag : int32_t
|
|||
|
|
{
|
|||
|
|
kEXCLUDE_WEIGHTS = 0, //!< Exclude the weights that can be refitted.
|
|||
|
|
kEXCLUDE_LEAN_RUNTIME = 1, //!< Exclude the lean runtime.
|
|||
|
|
kINCLUDE_REFIT = 2, //!< Remain refittable if originally so.
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//! Maximum number of serialization flags in SerializationFlag enum. \see SerializationFlag
|
|||
|
|
template <>
|
|||
|
|
constexpr inline int32_t EnumMax<SerializationFlag>() noexcept
|
|||
|
|
{
|
|||
|
|
return 3;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class ISerializationConfig
|
|||
|
|
//!
|
|||
|
|
//! \brief Holds properties for configuring an engine to serialize the binary.
|
|||
|
|
//!
|
|||
|
|
//! \see SerializationFlag
|
|||
|
|
//!
|
|||
|
|
class ISerializationConfig : public INoCopy
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
virtual ~ISerializationConfig() noexcept = default;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the serialization flags to turn on for this config.
|
|||
|
|
//!
|
|||
|
|
//! The flags are listed in the SerializationFlag enum.
|
|||
|
|
//!
|
|||
|
|
//! \param serializationFlags The serialization flags for an engine.
|
|||
|
|
//!
|
|||
|
|
//! \note This function will override the previous set flags, rather than bitwise ORing the new flag.
|
|||
|
|
//!
|
|||
|
|
//! \see getFlags()
|
|||
|
|
//!
|
|||
|
|
bool setFlags(SerializationFlags serializationFlags) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setFlags(serializationFlags);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the serialization flags for this config.
|
|||
|
|
//!
|
|||
|
|
//! \return The serialization flags as a bitmask.
|
|||
|
|
//!
|
|||
|
|
//! \see setFlags()
|
|||
|
|
//!
|
|||
|
|
SerializationFlags getFlags() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getFlags();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief clear a serialization flag.
|
|||
|
|
//!
|
|||
|
|
//! clears the serialization flag from the config.
|
|||
|
|
//!
|
|||
|
|
//! \see setFlags()
|
|||
|
|
//!
|
|||
|
|
bool clearFlag(SerializationFlag serializationFlag) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->clearFlag(serializationFlag);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set a serialization flag.
|
|||
|
|
//!
|
|||
|
|
//! Add the input serialization flag to the already enabled flags.
|
|||
|
|
//!
|
|||
|
|
//! \see setFlags()
|
|||
|
|
//!
|
|||
|
|
bool setFlag(SerializationFlag serializationFlag) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setFlag(serializationFlag);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Returns true if the serialization flag is set
|
|||
|
|
//!
|
|||
|
|
//! \see getFlags()
|
|||
|
|
//!
|
|||
|
|
//! \return True if flag is set, false if unset.
|
|||
|
|
//!
|
|||
|
|
bool getFlag(SerializationFlag serializationFlag) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getFlag(serializationFlag);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
protected:
|
|||
|
|
apiv::VSerializationConfig* mImpl;
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \enum ExecutionContextAllocationStrategy
|
|||
|
|
//!
|
|||
|
|
//! \brief Different memory allocation behaviors for IExecutionContext.
|
|||
|
|
//!
|
|||
|
|
//! IExecutionContext requires a block of device memory for internal activation tensors during inference. The user can
|
|||
|
|
//! either let the execution context manage the memory in various ways or allocate the memory themselves.
|
|||
|
|
//!
|
|||
|
|
//! \see ICudaEngine::createExecutionContext()
|
|||
|
|
//! \see IExecutionContext::setDeviceMemory()
|
|||
|
|
//!
|
|||
|
|
enum class ExecutionContextAllocationStrategy : int32_t
|
|||
|
|
{
|
|||
|
|
kSTATIC = 0, //!< Default static allocation with the maximum size across all profiles.
|
|||
|
|
kON_PROFILE_CHANGE = 1, //!< Reallocate for a profile when it's selected.
|
|||
|
|
kUSER_MANAGED = 2, //!< The user supplies custom allocation to the execution context.
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Maximum number of memory allocation strategies in ExecutionContextAllocationStrategy enum.
|
|||
|
|
//!
|
|||
|
|
//! \see ExecutionContextAllocationStrategy
|
|||
|
|
//!
|
|||
|
|
template <>
|
|||
|
|
constexpr inline int32_t EnumMax<ExecutionContextAllocationStrategy>() noexcept
|
|||
|
|
{
|
|||
|
|
return 3;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
//! \class IRuntimeConfig
|
|||
|
|
//!
|
|||
|
|
//! \brief A class for runtime configuration. This class is used during execution context creation.
|
|||
|
|
//!
|
|||
|
|
//! \see IRuntime, IBuilderConfig
|
|||
|
|
//!
|
|||
|
|
class IRuntimeConfig : public INoCopy
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
virtual ~IRuntimeConfig() noexcept = default;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the execution context allocation strategy. Default value is kSTATIC.
|
|||
|
|
//!
|
|||
|
|
//! \param strategy The execution context allocation strategy.
|
|||
|
|
//!
|
|||
|
|
void setExecutionContextAllocationStrategy(ExecutionContextAllocationStrategy strategy) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setExecutionContextAllocationStrategy(strategy);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the execution context allocation strategy.
|
|||
|
|
//!
|
|||
|
|
//! \return The execution context allocation strategy.
|
|||
|
|
//!
|
|||
|
|
ExecutionContextAllocationStrategy getExecutionContextAllocationStrategy() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getExecutionContextAllocationStrategy();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
protected:
|
|||
|
|
apiv::VRuntimeConfig* mImpl;
|
|||
|
|
}; // class IRuntimeConfig
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \enum EngineStat
|
|||
|
|
//!
|
|||
|
|
//! \brief The kind of engine statistics that queried from the ICudaEngine.
|
|||
|
|
//!
|
|||
|
|
//! \see ICudaEngine::getEngineStat()
|
|||
|
|
//! \see BuilderFlag::kSTRIP_PLAN
|
|||
|
|
//!
|
|||
|
|
enum class EngineStat : int32_t
|
|||
|
|
{
|
|||
|
|
//! Return the total weight size in bytes.
|
|||
|
|
kTOTAL_WEIGHTS_SIZE = 0,
|
|||
|
|
|
|||
|
|
//! Return the stripped weight size in bytes for engines built with BuilderFlag::kSTRIP_PLAN.
|
|||
|
|
kSTRIPPED_WEIGHTS_SIZE = 1,
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Maximum number of engine statistic kinds in EngineStat enum.
|
|||
|
|
//!
|
|||
|
|
//! \see EngineStat
|
|||
|
|
//!
|
|||
|
|
template <>
|
|||
|
|
constexpr inline int32_t EnumMax<EngineStat>() noexcept
|
|||
|
|
{
|
|||
|
|
return 2;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class ICudaEngine
|
|||
|
|
//!
|
|||
|
|
//! \brief An engine for executing inference on a built network, with functionally unsafe features.
|
|||
|
|
//!
|
|||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
|||
|
|
//!
|
|||
|
|
class ICudaEngine : public INoCopy
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
virtual ~ICudaEngine() noexcept = default;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get shape of an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \return shape of the tensor, with -1 in place of each dynamic runtime dimension,
|
|||
|
|
//! or Dims{-1, {}} if the provided name does not map to an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
Dims getTensorShape(char const* tensorName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTensorShape(tensorName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Determine the required data type for a buffer from its tensor name.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \return The type of the data in the buffer, or DataType::kFLOAT if the provided name does not map to an input or
|
|||
|
|
//! output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
DataType getTensorDataType(char const* tensorName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTensorDataType(tensorName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the number of layers in the network.
|
|||
|
|
//!
|
|||
|
|
//! The number of layers in the network is not necessarily the number in the original network definition, as layers
|
|||
|
|
//! may be combined or eliminated as the engine is optimized. This value can be useful when building per-layer
|
|||
|
|
//! tables, such as when aggregating profiling data over a number of executions.
|
|||
|
|
//!
|
|||
|
|
//! \return The number of layers in the network.
|
|||
|
|
//!
|
|||
|
|
int32_t getNbLayers() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getNbLayers();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Serialize the network to a stream.
|
|||
|
|
//!
|
|||
|
|
//! \return A IHostMemory object that contains the serialized engine.
|
|||
|
|
//!
|
|||
|
|
//! The network may be deserialized with IRuntime::deserializeCudaEngine().
|
|||
|
|
//!
|
|||
|
|
//! \see IRuntime::deserializeCudaEngine()
|
|||
|
|
//!
|
|||
|
|
IHostMemory* serialize() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->serialize();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Create an execution context and specify the strategy for allocating internal activation memory.
|
|||
|
|
//!
|
|||
|
|
//! The default value for the allocation strategy is ExecutionContextAllocationStrategy::kSTATIC, which means the
|
|||
|
|
//! context will pre-allocate a block of device memory that is sufficient for all profiles. The newly created
|
|||
|
|
//! execution context will be assigned optimization profile 0. If an error recorder has been set for the engine, it
|
|||
|
|
//! will also be passed to the execution context.
|
|||
|
|
//!
|
|||
|
|
//! \see IExecutionContext
|
|||
|
|
//! \see IExecutionContext::setOptimizationProfileAsync()
|
|||
|
|
//! \see ExecutionContextAllocationStrategy
|
|||
|
|
//!
|
|||
|
|
IExecutionContext* createExecutionContext(
|
|||
|
|
ExecutionContextAllocationStrategy strategy = ExecutionContextAllocationStrategy::kSTATIC) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->createExecutionContext(strategy);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get whether an input or output tensor must be on GPU or CPU.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \return TensorLocation::kDEVICE if tensorName must be on GPU, or TensorLocation::kHOST if on CPU, or
|
|||
|
|
//! TensorLocation::kDEVICE if the provided name does not map to an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! The location is established at build time. E.g. shape tensors inputs are typically required to be on the CPU.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
TensorLocation getTensorLocation(char const* tensorName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTensorLocation(tensorName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief True if tensor is required as input for shape calculations or is output from shape calculations.
|
|||
|
|
//!
|
|||
|
|
//! Return true for either of the following conditions:
|
|||
|
|
//!
|
|||
|
|
//! * The tensor is a network input, and its value is required for IExecutionContext::getTensorShape()
|
|||
|
|
//! to return the shape of a network output.
|
|||
|
|
//!
|
|||
|
|
//! * The tensor is a network output, and inferShape() will compute its values.
|
|||
|
|
//!
|
|||
|
|
//! For example, if a network uses an input tensor "foo" as an addend to an IElementWiseLayer
|
|||
|
|
//! that computes the "reshape dimensions" for IShuffleLayer, then isShapeInferenceIO("foo") == true.
|
|||
|
|
//! If the network copies said input tensor "foo" to an output "bar", then
|
|||
|
|
//! isShapeInferenceIO("bar") == true and IExecutionContext::inferShapes() will write to "bar".
|
|||
|
|
//!
|
|||
|
|
bool isShapeInferenceIO(char const* tensorName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->isShapeInferenceIO(tensorName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Determine whether a tensor is an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \return kINPUT if tensorName is an input, kOUTPUT if tensorName is an output, or kNONE if neither.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
TensorIOMode getTensorIOMode(char const* tensorName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTensorIOMode(tensorName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the input tensor name that an output tensor should alias with.
|
|||
|
|
//!
|
|||
|
|
//! Some operations (e.g., KVCacheUpdate) require that certain output tensors share memory with input tensors.
|
|||
|
|
//! This method returns the name of the input tensor that a given output tensor should alias with.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \return The name of the input tensor to alias with, or nullptr if tensorName is not an output tensor or
|
|||
|
|
//! the output does not alias with any input.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the
|
|||
|
|
//! terminator.
|
|||
|
|
//!
|
|||
|
|
TRT_NODISCARD char const* getAliasedInputTensor(char const* tensorName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getAliasedInputTensor(tensorName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief create an execution context without any device memory allocated
|
|||
|
|
//!
|
|||
|
|
//! The memory for execution of this device context must be supplied by the application.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.0. Superseded by createExecutionContext() with parameter.
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED IExecutionContext* createExecutionContextWithoutDeviceMemory() noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->createExecutionContextWithoutDeviceMemory();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Create an execution context with TensorRT JIT runtime config.
|
|||
|
|
//!
|
|||
|
|
//! \param runtimeConfig The runtime config for TensorRT JIT.
|
|||
|
|
//!
|
|||
|
|
//! \see IRuntimeConfig
|
|||
|
|
//!
|
|||
|
|
IExecutionContext* createExecutionContext(IRuntimeConfig* runtimeConfig) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->createExecutionContextWithRuntimeConfig(runtimeConfig);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Create a runtime config for TensorRT JIT.
|
|||
|
|
//! The caller is responsible for ownership of the returned IRuntimeConfig object.
|
|||
|
|
//!
|
|||
|
|
//! \return A IRuntimeConfig object.
|
|||
|
|
//!
|
|||
|
|
//! \see IRuntimeConfig
|
|||
|
|
//!
|
|||
|
|
IRuntimeConfig* createRuntimeConfig() noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->createRuntimeConfig();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the maximum device memory required by the context over all profiles.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by getDeviceMemorySizeV2().
|
|||
|
|
//!
|
|||
|
|
//! \see IExecutionContext::setDeviceMemory()
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED size_t getDeviceMemorySize() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getDeviceMemorySize();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the maximum device memory required by the context for a profile.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by getDeviceMemorySizeForProfileV2(int32_t).
|
|||
|
|
//!
|
|||
|
|
//! \see IExecutionContext::setDeviceMemoryV2()
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED size_t getDeviceMemorySizeForProfile(int32_t profileIndex) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getDeviceMemorySizeForProfile(profileIndex);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the maximum device memory required by the context over all profiles.
|
|||
|
|
//!
|
|||
|
|
//! This API is stateful, so its call returns different values based on the following calls:
|
|||
|
|
//! * setWeightStreamingBudget()
|
|||
|
|
//! * setWeightStreamingBudgetV2()
|
|||
|
|
//!
|
|||
|
|
//! \see IExecutionContext::setDeviceMemoryV2()
|
|||
|
|
//! \see setWeightStreamingBudget()
|
|||
|
|
//! \see setWeightStreamingBudgetV2()
|
|||
|
|
//!
|
|||
|
|
int64_t getDeviceMemorySizeV2() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getDeviceMemorySizeV2();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the maximum device memory required by the context for a profile.
|
|||
|
|
//!
|
|||
|
|
//! This API is stateful, so its call returns different values based on the following calls:
|
|||
|
|
//! * setWeightStreamingBudget()
|
|||
|
|
//! * setWeightStreamingBudgetV2()
|
|||
|
|
//!
|
|||
|
|
//! \see IExecutionContext::setDeviceMemoryV2()
|
|||
|
|
//! \see setWeightStreamingBudget()
|
|||
|
|
//! \see setWeightStreamingBudgetV2()
|
|||
|
|
//!
|
|||
|
|
int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getDeviceMemorySizeForProfileV2(profileIndex);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return true if an engine can be refit.
|
|||
|
|
//!
|
|||
|
|
//! \see nvinfer1::createInferRefitter()
|
|||
|
|
//!
|
|||
|
|
bool isRefittable() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->isRefittable();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the number of bytes per component of an element, or -1 if the
|
|||
|
|
//! tensor is not vectorized or provided name does not map to an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! The vector component size is returned if getTensorVectorizedDim(tensorName) != -1.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//! \warning The function can only return the result of profile 0, and issues a warning message when there are
|
|||
|
|
//! multiple profiles in the engine, use getTensorBytesPerComponent with profileIndex when there are multiple
|
|||
|
|
//! profiles.
|
|||
|
|
//!
|
|||
|
|
//! \see getTensorVectorizedDim()
|
|||
|
|
//! \see getTensorBytesPerComponent(tensorName, profileIndex)
|
|||
|
|
//!
|
|||
|
|
int32_t getTensorBytesPerComponent(char const* tensorName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTensorBytesPerComponent(tensorName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the number of bytes per component of an element given of given profile, or -1 if the tensor is not
|
|||
|
|
//! vectorized or provided name does not map to an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! The vector component size is returned if getTensorVectorizedDim(tensorName, profileIndex) != -1.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input or output tensor.
|
|||
|
|
//! \param profileIndex The profile index to query
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
//! \see getTensorVectorizedDim(tensorName, profileIndex)
|
|||
|
|
//!
|
|||
|
|
int32_t getTensorBytesPerComponent(char const* tensorName, int32_t profileIndex) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTensorBytesPerComponentV2(tensorName, profileIndex);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the number of components included in one element, or -1 if tensor is
|
|||
|
|
//! not vectorized or if the provided name does not map to an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! The number of elements in the vectors is returned if getTensorVectorizedDim(tensorName) != -1.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//! \warning The function can only return the result of profile 0, and issues a warning message when there
|
|||
|
|
//! are multiple profiles in the engine, use getTensorComponentsPerElement with profileIndex when there are
|
|||
|
|
//! multiple profiles.
|
|||
|
|
//!
|
|||
|
|
//! \see getTensorVectorizedDim()
|
|||
|
|
//! \see getTensorComponentsPerElement(tensorName, profileIndex)
|
|||
|
|
//!
|
|||
|
|
int32_t getTensorComponentsPerElement(char const* tensorName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTensorComponentsPerElement(tensorName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the number of components included in one element of given profile, or -1 if tensor is not
|
|||
|
|
//! vectorized or the provided name does not map to an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! The number of elements in the vectors is returned if getTensorVectorizedDim(tensorName, profileIndex) != -1.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input or output tensor.
|
|||
|
|
//! \param profileIndex The profile index to query
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
//! \see getTensorVectorizedDim(tensorName, profileIndex)
|
|||
|
|
//!
|
|||
|
|
int32_t getTensorComponentsPerElement(char const* tensorName, int32_t profileIndex) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTensorComponentsPerElementV2(tensorName, profileIndex);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the tensor format, or TensorFormat::kLINEAR if the provided name does not map to an input or
|
|||
|
|
//! output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//! \warning This API can only return the tensor format of profile 0, and issues a warning message when there are
|
|||
|
|
//! multiple profiles in the engine, use getTensorFormat with profileIndex when there are multiple profiles.
|
|||
|
|
//!
|
|||
|
|
//! \see getTensorFormat(tensorName, profileIndex)
|
|||
|
|
//!
|
|||
|
|
TensorFormat getTensorFormat(char const* tensorName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTensorFormat(tensorName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the tensor format of given profile, or TensorFormat::kLINEAR if the provided name does not map to
|
|||
|
|
//! an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input or output tensor.
|
|||
|
|
//! \param profileIndex The profile index to query the format for.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
TensorFormat getTensorFormat(char const* tensorName, int32_t profileIndex) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTensorFormatV2(tensorName, profileIndex);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the human readable description of the tensor format, or empty string if the provided name does not
|
|||
|
|
//! map to an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! The description includes the order, vectorization, data type, and strides.
|
|||
|
|
//! Examples are shown as follows:
|
|||
|
|
//! Example 1: kCHW + FP32
|
|||
|
|
//! "Row-major linear FP32 format"
|
|||
|
|
//! Example 2: kCHW2 + FP16
|
|||
|
|
//! "Two-wide channel vectorized row-major FP16 format"
|
|||
|
|
//! Example 3: kHWC8 + FP16 + Line Stride = 32
|
|||
|
|
//! "Channel major FP16 format where C % 8 == 0 and H Stride % 32 == 0"
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//! \warning The function can only return the result of profile 0, and issues a warning message when there are
|
|||
|
|
//! multiple profiles in the engine, use getTensorFormatDesc with profileIndex when there are multiple profiles.
|
|||
|
|
//!
|
|||
|
|
char const* getTensorFormatDesc(char const* tensorName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTensorFormatDesc(tensorName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the human readable description of the tensor format of given profile, or empty string if the
|
|||
|
|
//! provided name does not map to an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! The description includes the order, vectorization, data type, and strides.
|
|||
|
|
//! Examples are shown as follows:
|
|||
|
|
//! Example 1: kCHW + FP32
|
|||
|
|
//! "Row-major linear FP32 format"
|
|||
|
|
//! Example 2: kCHW2 + FP16
|
|||
|
|
//! "Two-wide channel vectorized row-major FP16 format"
|
|||
|
|
//! Example 3: kHWC8 + FP16 + Line Stride = 32
|
|||
|
|
//! "Channel major FP16 format where C % 8 == 0 and H Stride % 32 == 0"
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input or output tensor.
|
|||
|
|
//! \param profileIndex The profile index to query the format for.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
char const* getTensorFormatDesc(char const* tensorName, int32_t profileIndex) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTensorFormatDescV2(tensorName, profileIndex);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the dimension index that the buffer is vectorized, or -1 if the provided name does not
|
|||
|
|
//! map to an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! Specifically -1 is returned if scalars per vector is 1.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//! \warning The function can only return the result of profile 0, and issues a warning message when there are
|
|||
|
|
//! multiple profiles in the engine, use getTensorVectorizedDim with profileIndex when there are multiple profiles.
|
|||
|
|
//!
|
|||
|
|
int32_t getTensorVectorizedDim(char const* tensorName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTensorVectorizedDim(tensorName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the dimension index that the buffer is vectorized of given profile, or -1 if the provided name
|
|||
|
|
//! does not map to an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! Specifically -1 is returned if scalars per vector is 1.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input.
|
|||
|
|
//! \param profileIndex The profile index to query the format for.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
int32_t getTensorVectorizedDim(char const* tensorName, int32_t profileIndex) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTensorVectorizedDimV2(tensorName, profileIndex);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Returns the name of the network associated with the engine.
|
|||
|
|
//!
|
|||
|
|
//! The name is set during network creation and is retrieved after
|
|||
|
|
//! building or deserialization.
|
|||
|
|
//!
|
|||
|
|
//! \see INetworkDefinition::setName(), INetworkDefinition::getName()
|
|||
|
|
//!
|
|||
|
|
//! \return A null-terminated C-style string representing the name of the network.
|
|||
|
|
//!
|
|||
|
|
char const* getName() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getName();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the number of optimization profiles defined for this engine.
|
|||
|
|
//!
|
|||
|
|
//! \return Number of optimization profiles. It is always at least 1.
|
|||
|
|
//!
|
|||
|
|
//! \see IExecutionContext::setOptimizationProfileAsync()
|
|||
|
|
int32_t getNbOptimizationProfiles() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getNbOptimizationProfiles();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the minimum / optimum / maximum dimensions for an input tensor given its name under an optimization
|
|||
|
|
//! profile.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input tensor.
|
|||
|
|
//!
|
|||
|
|
//! \param profileIndex The profile index, which must be between 0 and getNbOptimizationProfiles()-1.
|
|||
|
|
//!
|
|||
|
|
//! \param select Whether to query the minimum, optimum, or maximum dimensions for this input tensor.
|
|||
|
|
//!
|
|||
|
|
//! \return The minimum / optimum / maximum dimensions for an input tensor in this profile.
|
|||
|
|
//! If the profileIndex is invalid or provided name does not map to an input tensor, return Dims{-1, {}}
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
Dims getProfileShape(char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getProfileShape(tensorName, profileIndex, select);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the minimum / optimum / maximum values (not dimensions) for an input tensor given
|
|||
|
|
//! its name under an optimization profile. These correspond to the values set using
|
|||
|
|
//! IOptimizationProfile::setShapeValues when the engine was built.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input tensor.
|
|||
|
|
//!
|
|||
|
|
//! \param profileIndex The profile index, which must be between 0 and getNbOptimizationProfiles()-1.
|
|||
|
|
//!
|
|||
|
|
//! \param select Whether to query the minimum, optimum, or maximum values for this input tensor.
|
|||
|
|
//!
|
|||
|
|
//! \return The minimum / optimum / maximum values for an input tensor in this profile. If the profileIndex is
|
|||
|
|
//! invalid or the provided name does not map to an input tensor, or the tensor is not a shape binding, return
|
|||
|
|
//! nullptr.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.11. Superseded by getProfileTensorValuesV2().
|
|||
|
|
//! \warning If input shapes are set with setShapeValuesV2, getProfileTensorValues will return nullptr
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED int32_t const* getProfileTensorValues(
|
|||
|
|
char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getProfileTensorValues(tensorName, profileIndex, select);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Determine what execution capability this engine has.
|
|||
|
|
//!
|
|||
|
|
//! If the engine has EngineCapability::kSTANDARD, then all engine functionality is valid.
|
|||
|
|
//! If the engine has EngineCapability::kSAFETY, then only the functionality in safe engine is valid.
|
|||
|
|
//! If the engine has EngineCapability::kDLA_STANDALONE, then only serialize, destroy, and const-accessor functions
|
|||
|
|
//! are valid.
|
|||
|
|
//!
|
|||
|
|
//! \return The EngineCapability flag that the engine was built for.
|
|||
|
|
//!
|
|||
|
|
EngineCapability getEngineCapability() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getEngineCapability();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the ErrorRecorder for this interface
|
|||
|
|
//!
|
|||
|
|
//! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
|
|||
|
|
//! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
|
|||
|
|
//! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
|
|||
|
|
//! a recorder has been registered.
|
|||
|
|
//!
|
|||
|
|
//! If an error recorder is not set, messages will be sent to the global log stream.
|
|||
|
|
//!
|
|||
|
|
//! \param recorder The error recorder to register with this interface.
|
|||
|
|
//!
|
|||
|
|
//! \see getErrorRecorder()
|
|||
|
|
//!
|
|||
|
|
void setErrorRecorder(IErrorRecorder* recorder) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setErrorRecorder(recorder);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the ErrorRecorder assigned to this interface.
|
|||
|
|
//!
|
|||
|
|
//! Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
|
|||
|
|
//! an error handler has not been set.
|
|||
|
|
//!
|
|||
|
|
//! \return A pointer to the IErrorRecorder object that has been registered.
|
|||
|
|
//!
|
|||
|
|
//! \see setErrorRecorder()
|
|||
|
|
//!
|
|||
|
|
IErrorRecorder* getErrorRecorder() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getErrorRecorder();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Query whether the engine was built with an implicit batch dimension.
|
|||
|
|
//!
|
|||
|
|
//! \return Always false since TensorRT 10.0 does not support an implicit batch dimension.
|
|||
|
|
//!
|
|||
|
|
//! \see createNetworkV2
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.0. Implicit batch is no supported since TensorRT 10.0.
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED bool hasImplicitBatchDimension() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->hasImplicitBatchDimension();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief return the tactic sources required by this engine.
|
|||
|
|
//!
|
|||
|
|
//! The value returned is equal to zero or more tactics sources set
|
|||
|
|
//! at build time via setTacticSources() in IBuilderConfig. Sources
|
|||
|
|
//! set by the latter but not returned by \ref ICudaEngine::getTacticSources
|
|||
|
|
//! do not reduce overall engine execution time, and can be removed from
|
|||
|
|
//! future builds to reduce build time.
|
|||
|
|
//!
|
|||
|
|
//! \see IBuilderConfig::setTacticSources()
|
|||
|
|
//!
|
|||
|
|
TacticSources getTacticSources() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTacticSources();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the \ref ProfilingVerbosity the builder config was set to when the engine was built.
|
|||
|
|
//!
|
|||
|
|
//! \return the profiling verbosity the builder config was set to when the engine was built.
|
|||
|
|
//!
|
|||
|
|
//! \see IBuilderConfig::setProfilingVerbosity()
|
|||
|
|
//!
|
|||
|
|
ProfilingVerbosity getProfilingVerbosity() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getProfilingVerbosity();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Create a new engine inspector which prints the layer information in an engine or an execution context.
|
|||
|
|
//!
|
|||
|
|
//! \see IEngineInspector.
|
|||
|
|
//!
|
|||
|
|
IEngineInspector* createEngineInspector() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->createEngineInspector();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return number of IO tensors.
|
|||
|
|
//!
|
|||
|
|
//! It is the number of input and output tensors for the network from which the engine was built.
|
|||
|
|
//! The names of the IO tensors can be discovered by calling getIOTensorName(i) for i in 0 to getNbIOTensors()-1.
|
|||
|
|
//!
|
|||
|
|
//! \see getIOTensorName()
|
|||
|
|
//!
|
|||
|
|
int32_t getNbIOTensors() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getNbIOTensors();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return name of an IO tensor.
|
|||
|
|
//!
|
|||
|
|
//! \param index value between 0 and getNbIOTensors()-1
|
|||
|
|
//!
|
|||
|
|
//! \see getNbIOTensors()
|
|||
|
|
//!
|
|||
|
|
char const* getIOTensorName(int32_t index) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getIOTensorName(index);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the hardware compatibility level of this engine.
|
|||
|
|
//!
|
|||
|
|
//! \return hardwareCompatibilityLevel The level of hardware
|
|||
|
|
//! compatibility.
|
|||
|
|
//!
|
|||
|
|
HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getHardwareCompatibilityLevel();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the number of auxiliary streams used by this engine.
|
|||
|
|
//!
|
|||
|
|
//! This number will be less than or equal to the maximum allowed number of auxiliary streams set by
|
|||
|
|
//! IBuilderConfig::setMaxAuxStreams() API call when the engine was built.
|
|||
|
|
//!
|
|||
|
|
//! \return the number of auxiliary streams used by this engine.
|
|||
|
|
//!
|
|||
|
|
//! \see IBuilderConfig::setMaxAuxStreams(), IExecutionContext::setAuxStreams()
|
|||
|
|
//!
|
|||
|
|
int32_t getNbAuxStreams() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getNbAuxStreams();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Create a serialization configuration object.
|
|||
|
|
//!
|
|||
|
|
//! \see ISerializationConfig
|
|||
|
|
//!
|
|||
|
|
ISerializationConfig* createSerializationConfig() noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->createSerializationConfig();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Serialize the network to a stream with the provided SerializationConfig.
|
|||
|
|
//!
|
|||
|
|
//! \return An IHostMemory object that contains the serialized engine.
|
|||
|
|
//!
|
|||
|
|
//! The network may be deserialized with IRuntime::deserializeCudaEngine().
|
|||
|
|
//! Serializing plan file with SerializationFlag::kEXCLUDE_WEIGHTS requires building the engine with kREFIT,
|
|||
|
|
//! kREFIT_IDENTICAL or kREFIT_INDIVIDUAL.
|
|||
|
|
//!
|
|||
|
|
//! The only applicable scenario for SerializationFlag::kINCLUDE_REFIT is when serializing weight-stripping
|
|||
|
|
//! engines without kEXCLUDE_WEIGHTS. By default, the resulting serialized engine is unrefittable. Setting
|
|||
|
|
//! SerializationFlag::kINCLUDE_REFIT ensures that the serialized engine remains refittable.
|
|||
|
|
//!
|
|||
|
|
//! \see IRuntime::deserializeCudaEngine()
|
|||
|
|
//!
|
|||
|
|
IHostMemory* serializeWithConfig(ISerializationConfig& config) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->serializeWithConfig(config);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Limit the maximum amount of GPU memory usable for network weights
|
|||
|
|
//! in bytes.
|
|||
|
|
//!
|
|||
|
|
//! \param gpuMemoryBudget This parameter may take on 3 types of values:
|
|||
|
|
//! -1: Allows TensorRT to choose the budget according to the streamable weights size.
|
|||
|
|
//! Free CUDA memory will be queried at createExecutionContext() and accordingly:
|
|||
|
|
//! * If streamable weights all fit: weight streaming is not required and disabled.
|
|||
|
|
//! * Otherwise: Budget is set to getMinimumWeightStreamingBudget
|
|||
|
|
//! 0: (default) Disables weight streaming. The execution may fail if the network is too large for GPU memory.
|
|||
|
|
//! >0: The maximum bytes of GPU memory that weights can occupy. It must be bounded by
|
|||
|
|
//! [getMinimumWeightStreamingBudget, free GPU memory)].
|
|||
|
|
//!
|
|||
|
|
//! By setting a weight limit, users can expect a GPU memory usage reduction
|
|||
|
|
//! of (total bytes for network weights) - gpuMemoryBudget bytes. Maximum memory savings occur
|
|||
|
|
//! when gpuMemoryBudget is set to getMinimumWeightStreamingBudget(). Creating additional
|
|||
|
|
//! IExecutionContexts will increase memory usage by O(getMinimumStreamingBudget()).
|
|||
|
|
//!
|
|||
|
|
//! Streaming larger amounts of memory will likely result in lower performance
|
|||
|
|
//! except in some boundary cases where streaming weights allows the user to
|
|||
|
|
//! run larger batch sizes. The higher throughput offsets the increased
|
|||
|
|
//! latency in these cases. Tuning the value of the memory limit is
|
|||
|
|
//! recommended for best performance.
|
|||
|
|
//!
|
|||
|
|
//! \warning GPU memory for the weights is allocated in this call and will be deallocated by enabling weight
|
|||
|
|
//! streaming or destroying the ICudaEngine.
|
|||
|
|
//!
|
|||
|
|
//! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
|
|||
|
|
//!
|
|||
|
|
//! \warning The weights streaming budget cannot be modified while there are active IExecutionContexts.
|
|||
|
|
//!
|
|||
|
|
//! \return true if the memory limit is valid and the call was successful, false otherwise.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by setWeightStreamingBudgetV2().
|
|||
|
|
//!
|
|||
|
|
//! \see BuilderFlag::kWEIGHT_STREAMING
|
|||
|
|
//! \see getWeightStreamingBudget()
|
|||
|
|
//! \see getMinimumWeightStreamingBudget()
|
|||
|
|
//! \see getStreamableWeightsSize()
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED bool setWeightStreamingBudget(int64_t gpuMemoryBudget) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setWeightStreamingBudget(gpuMemoryBudget);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Returns the current weight streaming device memory budget in bytes.
|
|||
|
|
//!
|
|||
|
|
//! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
|
|||
|
|
//!
|
|||
|
|
//! \returns The weight streaming budget in bytes. Please see setWeightStreamingBudget() for the possible
|
|||
|
|
//! values.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by getWeightStreamingBudgetV2().
|
|||
|
|
//!
|
|||
|
|
//! \see BuilderFlag::kWEIGHT_STREAMING,
|
|||
|
|
//! \see setWeightStreamingBudget()
|
|||
|
|
//! \see getMinimumWeightStreamingBudget()
|
|||
|
|
//! \see getStreamableWeightsSize()
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED int64_t getWeightStreamingBudget() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getWeightStreamingBudget();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief The minimum number of bytes of GPU memory required by network
|
|||
|
|
//! weights for successful weight streaming.
|
|||
|
|
//!
|
|||
|
|
//! This is a positive integer for engines with streamable weights because a
|
|||
|
|
//! staging buffer on the GPU is required to temporarily hold the streamed
|
|||
|
|
//! weights. The size of the staging buffer is determined by TensorRT and must
|
|||
|
|
//! be at least as large as the size of the largest streamable weight in the
|
|||
|
|
//! network.
|
|||
|
|
//!
|
|||
|
|
//! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
|
|||
|
|
//!
|
|||
|
|
//! \returns The minimum number of bytes of GPU memory required for streaming.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.1. The minimum budget is 0 in the V2 APIs.
|
|||
|
|
//!
|
|||
|
|
//! \see setWeightStreamingBudget()
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED int64_t getMinimumWeightStreamingBudget() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getMinimumWeightStreamingBudget();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the total size in bytes of all streamable weights.
|
|||
|
|
//!
|
|||
|
|
//! The set of streamable weights is a subset of all network weights. The
|
|||
|
|
//! total size may exceed free GPU memory.
|
|||
|
|
//!
|
|||
|
|
//! \returns The total size in bytes of all streamable weights.
|
|||
|
|
//! Returns 0 if BuilderFlag::kWEIGHT_STREAMING is unset during engine building.
|
|||
|
|
//!
|
|||
|
|
//! \see setWeightStreamingBudget()
|
|||
|
|
//!
|
|||
|
|
int64_t getStreamableWeightsSize() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getStreamableWeightsSize();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Limit the maximum amount of GPU memory usable for network weights in bytes.
|
|||
|
|
//!
|
|||
|
|
//! \param gpuMemoryBudget This parameter must be a non-negative value.
|
|||
|
|
//! 0: Only small amounts of scratch memory will required to run the model.
|
|||
|
|
//! >= getStreamableWeightsSize (default): Disables weight streaming.
|
|||
|
|
//! The execution may fail if the network is too large for GPU memory.
|
|||
|
|
//!
|
|||
|
|
//! By setting a weight limit, users can expect a GPU memory usage reduction on the order
|
|||
|
|
//! of (total bytes for network weights) - gpuMemoryBudget bytes. Maximum memory savings occur
|
|||
|
|
//! when gpuMemoryBudget is set to 0. Each IExecutionContext will require getWeightStreamingScratchMemorySize()
|
|||
|
|
//! bytes of additional device memory if the engine is streaming its weights (budget < getStreamableWeightsSize()).
|
|||
|
|
//!
|
|||
|
|
//! Streaming larger amounts of memory will likely result in lower performance
|
|||
|
|
//! except in some boundary cases where streaming weights allows the user to
|
|||
|
|
//! run larger batch sizes. The higher throughput offsets the increased
|
|||
|
|
//! latency in these cases. Tuning the value of the memory limit is
|
|||
|
|
//! recommended for best performance.
|
|||
|
|
//!
|
|||
|
|
//! \warning GPU memory for the weights is allocated in this call and will be deallocated by enabling weight
|
|||
|
|
//! streaming or destroying the ICudaEngine.
|
|||
|
|
//!
|
|||
|
|
//! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
|
|||
|
|
//!
|
|||
|
|
//! \warning The weights streaming budget cannot be modified while there are active IExecutionContexts.
|
|||
|
|
//!
|
|||
|
|
//! \warning Using the V2 weight streaming APIs with V1 APIs (setWeightStreamingBudget(),
|
|||
|
|
//! getWeightStreamingBudget(), getWeightStreamingMinimumBudget()) leads to undefined behavior.
|
|||
|
|
//!
|
|||
|
|
//! \return true if the memory limit is valid and the call was successful, false otherwise.
|
|||
|
|
//!
|
|||
|
|
//! \see BuilderFlag::kWEIGHT_STREAMING
|
|||
|
|
//! \see getWeightStreamingBudgetV2()
|
|||
|
|
//! \see getWeightStreamingScratchMemorySize()
|
|||
|
|
//! \see getWeightStreamingAutomaticBudget()
|
|||
|
|
//! \see getStreamableWeightsSize()
|
|||
|
|
//!
|
|||
|
|
bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setWeightStreamingBudgetV2(gpuMemoryBudget);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Returns the current weight streaming device memory budget in bytes.
|
|||
|
|
//!
|
|||
|
|
//! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
|
|||
|
|
//!
|
|||
|
|
//! \returns The weight streaming budget in bytes. Please see setWeightStreamingBudgetV2() for the possible
|
|||
|
|
//! return values. Returns getStreamableWeightsSize() if weight streaming is disabled.
|
|||
|
|
//!
|
|||
|
|
//! \see BuilderFlag::kWEIGHT_STREAMING
|
|||
|
|
//! \see setWeightStreamingBudget()
|
|||
|
|
//! \see getMinimumWeightStreamingBudget()
|
|||
|
|
//! \see getStreamableWeightsSize()
|
|||
|
|
//!
|
|||
|
|
int64_t getWeightStreamingBudgetV2() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getWeightStreamingBudgetV2();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief TensorRT automatically determines a device memory budget for the model to run. The budget is close to the
|
|||
|
|
//! current free memory size, leaving some space for other memory needs in the user's application. If the budget
|
|||
|
|
//! exceeds the size obtained from getStreamableWeightsSize(), it is capped to that size, effectively disabling
|
|||
|
|
//! weight streaming. Since TensorRT lacks information about the user's allocations, the remaining memory size might
|
|||
|
|
//! be larger than required, leading to wasted memory, or smaller than required, causing an out-of-memory error. For
|
|||
|
|
//! optimal memory allocation, it is recommended to manually calculate and set the budget.
|
|||
|
|
//!
|
|||
|
|
//! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
|
|||
|
|
//!
|
|||
|
|
//! \warning The return value may change between TensorRT minor versions.
|
|||
|
|
//!
|
|||
|
|
//! \warning Setting the returned budget with V1 APIs (setWeightStreamingBudget()) will lead to undefined behavior.
|
|||
|
|
//! Please use V2 APIs.
|
|||
|
|
//!
|
|||
|
|
//! \returns The weight streaming budget in bytes. Please set with setWeightStreamingBudgetV2().
|
|||
|
|
//!
|
|||
|
|
//! \see BuilderFlag::kWEIGHT_STREAMING
|
|||
|
|
//! \see setWeightStreamingBudgetV2()
|
|||
|
|
//!
|
|||
|
|
int64_t getWeightStreamingAutomaticBudget() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getWeightStreamingAutomaticBudget();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Returns the size of the scratch memory required by the current weight streaming budget.
|
|||
|
|
//!
|
|||
|
|
//! Weight streaming requires small amounts of scratch memory on the GPU to stage CPU weights right before
|
|||
|
|
//! execution. This value is typically much smaller than the total streamable weights size. Each IExecutionContext
|
|||
|
|
//! will then allocate this additional memory or the user can provide the additional memory through
|
|||
|
|
//! getDeviceMemorySizeV2() and IExecutionContext::setDeviceMemoryV2().
|
|||
|
|
//!
|
|||
|
|
//! The return value of this call depends on
|
|||
|
|
//! 1. setWeightStreamingBudget()
|
|||
|
|
//! 2. setWeightStreamingBudgetV2()
|
|||
|
|
//!
|
|||
|
|
//! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
|
|||
|
|
//!
|
|||
|
|
//! \returns The weight streaming scratch memory in bytes. Returns 0 if weight streaming is disabled.
|
|||
|
|
//!
|
|||
|
|
//! \see BuilderFlag::kWEIGHT_STREAMING
|
|||
|
|
//! \see setWeightStreamingBudgetV2()
|
|||
|
|
//! \see getStreamableWeightsSize()
|
|||
|
|
//! \see getDeviceMemorySizeV2()
|
|||
|
|
//! \see getDeviceMemorySizeForProfileV2()
|
|||
|
|
//! \see IExecutionContext::setDeviceMemoryV2()
|
|||
|
|
//!
|
|||
|
|
int64_t getWeightStreamingScratchMemorySize() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getWeightStreamingScratchMemorySize();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Check if a tensor is marked as a debug tensor.
|
|||
|
|
//!
|
|||
|
|
//! Determine whether the given name corresponds to a debug tensor.
|
|||
|
|
//!
|
|||
|
|
//! \returns True if tensor is a debug tensor, false otherwise.
|
|||
|
|
//!
|
|||
|
|
//! \see INetworkDefinition::markDebug
|
|||
|
|
//!
|
|||
|
|
bool isDebugTensor(char const* name) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->isDebugTensor(name);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the minimum / optimum / maximum values (not dimensions) for an input tensor given
|
|||
|
|
//! its name under an optimization profile. These correspond to the values set using
|
|||
|
|
//! IOptimizationProfile::setShapeValuesV2 when the engine was built.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input tensor.
|
|||
|
|
//!
|
|||
|
|
//! \param profileIndex The profile index, which must be between 0 and getNbOptimizationProfiles()-1.
|
|||
|
|
//!
|
|||
|
|
//! \param select Whether to query the minimum, optimum, or maximum values for this input tensor.
|
|||
|
|
//!
|
|||
|
|
//! \return The minimum / optimum / maximum values for an input tensor in this profile. If the profileIndex is
|
|||
|
|
//! invalid or the provided name does not map to an input tensor, or the tensor is not a shape binding, return
|
|||
|
|
//! nullptr.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
//! \warning If input shapes are set with setShapeValues, getProfileTensorValuesV2 will return nullptr
|
|||
|
|
//!
|
|||
|
|
int64_t const* getProfileTensorValuesV2(
|
|||
|
|
char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getProfileTensorValuesV2(tensorName, profileIndex, select);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get engine statistics according to the given enum value.
|
|||
|
|
//!
|
|||
|
|
//! \param stat The kind of statistics to query.
|
|||
|
|
//!
|
|||
|
|
//! If stat is kTOTAL_WEIGHTS_SIZE, the return value is the total weights size in bytes in the engine.
|
|||
|
|
//! If stat is kSTRIPPED_WEIGHTS_SIZE, the return value is the stripped weight size in bytes for engines
|
|||
|
|
//! built with BuilderFlag::kSTRIP_PLAN.
|
|||
|
|
//!
|
|||
|
|
//! When the BuilderFlag::kWEIGHT_STREAMING flag is enabled, engine weights may not be fully copied to the device.
|
|||
|
|
//! The reported total weight size reflects the sum of all weights utilized by the engine,
|
|||
|
|
//! which does not necessarily correspond to the actual GPU memory allocated.
|
|||
|
|
//!
|
|||
|
|
//! \return The kind of statistics specified by EngineStat.
|
|||
|
|
//!
|
|||
|
|
//! \warning if kSTRIPPED_WEIGHTS_SIZE is passed to query a normal engine, this function will
|
|||
|
|
//! return -1 to indicate invalid enum value.
|
|||
|
|
//!
|
|||
|
|
//! \see EngineStat
|
|||
|
|
//! \see BuilderFlag::kWEIGHT_STREAMING
|
|||
|
|
//! \see setWeightStreamingBudget()
|
|||
|
|
//! \see getStreamableWeightsSize()
|
|||
|
|
//!
|
|||
|
|
int64_t getEngineStat(EngineStat stat) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getEngineStat(stat);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
protected:
|
|||
|
|
apiv::VCudaEngine* mImpl;
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
namespace v_1_0
|
|||
|
|
{
|
|||
|
|
class IOutputAllocator : public IVersionedInterface
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
//!
|
|||
|
|
//! \brief Return version information associated with this interface. Applications must not override this method.
|
|||
|
|
//!
|
|||
|
|
InterfaceInfo getInterfaceInfo() const noexcept override
|
|||
|
|
{
|
|||
|
|
return {"IOutputAllocator", 1, 0};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated.
|
|||
|
|
//! If the requested memory size exceeds the currentMemory size, the currentMemory can be freed as well.
|
|||
|
|
//! If currentMemory is known to be big enough, one option is to return currentMemory.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName name of the output tensor.
|
|||
|
|
//! \param currentMemory points to the address set by IExecutionContext::setTensorAddress.
|
|||
|
|
//! \param size number of bytes required. Always positive, even for an empty tensor.
|
|||
|
|
//! \param alignment required alignment of the allocation.
|
|||
|
|
//!
|
|||
|
|
//! \return A pointer to memory to use for the output tensor or nullptr.
|
|||
|
|
//!
|
|||
|
|
//!
|
|||
|
|
//! To preallocate memory and have the engine fail if the preallocation is not big enough,
|
|||
|
|
//! use IExecutionContext::setTensorAddress to set a pointer to the preallocated memory,
|
|||
|
|
//! and have reallocateOutput return nullptr if that memory is not big enough.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.0. Superseded by reallocateOutputAsync with cudaStream_t argument
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED virtual void* reallocateOutput(
|
|||
|
|
char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept
|
|||
|
|
{
|
|||
|
|
return nullptr;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated.
|
|||
|
|
//! If the requested memory size exceeds the currentMemory size, the currentMemory can be freed as well.
|
|||
|
|
//! If currentMemory is known to be big enough, one option is to return currentMemory.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName name of the output tensor.
|
|||
|
|
//! \param currentMemory points to the address set by IExecutionContext::setTensorAddress.
|
|||
|
|
//! \param size number of bytes required. Always positive, even for an empty tensor.
|
|||
|
|
//! \param alignment required alignment of the allocation.
|
|||
|
|
//! \param stream The stream in which to execute the kernels.
|
|||
|
|
//!
|
|||
|
|
//! \return A pointer to memory to use for the output tensor or nullptr.
|
|||
|
|
//!
|
|||
|
|
//! To preallocate memory and have the engine fail if the preallocation is not big enough,
|
|||
|
|
//! use IExecutionContext::setTensorAddress to set a pointer to the preallocated memory,
|
|||
|
|
//! and have reallocateOutputAsync return nullptr if that memory is not big enough.
|
|||
|
|
//!
|
|||
|
|
//! The default definition exists for sake of backward compatibility with earlier versions of TensorRT.
|
|||
|
|
//! Eventually this method will become a pure virtual method that requires an override, and method
|
|||
|
|
//! reallocateOutput() will disappear. Code moving away from TensorRT 9.x should override method
|
|||
|
|
//! reallocateOutputAsync() and NOT override method reallocateOutput().
|
|||
|
|
//!
|
|||
|
|
virtual void* reallocateOutputAsync(
|
|||
|
|
char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment, cudaStream_t /*stream*/)
|
|||
|
|
{
|
|||
|
|
return reallocateOutput(tensorName, currentMemory, size, alignment);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Called by TensorRT when the shape of the output tensor is known.
|
|||
|
|
//!
|
|||
|
|
//! Called by TensorRT sometime between when it calls reallocateOutput and enqueueV3 returns.
|
|||
|
|
//!
|
|||
|
|
//! \param dims dimensions of the output
|
|||
|
|
//! \param tensorName name of the tensor
|
|||
|
|
//!
|
|||
|
|
virtual void notifyShape(char const* tensorName, Dims const& dims) noexcept = 0;
|
|||
|
|
};
|
|||
|
|
} // namespace v_1_0
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IOutputAllocator
|
|||
|
|
//!
|
|||
|
|
//! \brief Callback from ExecutionContext::enqueueV3()
|
|||
|
|
//!
|
|||
|
|
//! \see IExecutionContext::enqueueV3()
|
|||
|
|
//!
|
|||
|
|
using IOutputAllocator = v_1_0::IOutputAllocator;
|
|||
|
|
|
|||
|
|
namespace v_1_0
|
|||
|
|
{
|
|||
|
|
class IDebugListener : public IVersionedInterface
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
//!
|
|||
|
|
//! \brief Return version information associated with this interface. Applications must not override this method.
|
|||
|
|
//!
|
|||
|
|
InterfaceInfo getInterfaceInfo() const noexcept override
|
|||
|
|
{
|
|||
|
|
return {"IDebugListener", 1, 0};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Callback function that is called when a debug tensor’s value is updated and the debug state of the tensor
|
|||
|
|
//! is set to true. Content in the given address is only guaranteed to be valid for the duration of the callback.
|
|||
|
|
//!
|
|||
|
|
//! \param location TensorLocation of the tensor.
|
|||
|
|
//! \param addr pointer to buffer.
|
|||
|
|
//! \param type data Type of the tensor.
|
|||
|
|
//! \param shape shape of the tensor.
|
|||
|
|
//! \param name name of the tensor.
|
|||
|
|
//! \param stream CUDA stream object.
|
|||
|
|
//!
|
|||
|
|
//! \return True on success, false otherwise.
|
|||
|
|
//!
|
|||
|
|
virtual bool processDebugTensor(void const* addr, TensorLocation location, DataType type, Dims const& shape,
|
|||
|
|
char const* name, cudaStream_t stream)
|
|||
|
|
= 0;
|
|||
|
|
|
|||
|
|
~IDebugListener() override = default;
|
|||
|
|
};
|
|||
|
|
} // namespace v_1_0
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IDebugListener
|
|||
|
|
//!
|
|||
|
|
//! \brief User-implemented callback for notification when value of a debug tensor is updated.
|
|||
|
|
//!
|
|||
|
|
using IDebugListener = v_1_0::IDebugListener;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IExecutionContext
|
|||
|
|
//!
|
|||
|
|
//! \brief Context for executing inference using an engine, with functionally unsafe features.
|
|||
|
|
//!
|
|||
|
|
//! Multiple execution contexts may exist for one ICudaEngine instance, allowing the same
|
|||
|
|
//! engine to be used for the execution of multiple batches simultaneously. If the engine supports
|
|||
|
|
//! dynamic shapes, each execution context in concurrent use must use a separate optimization profile.
|
|||
|
|
//!
|
|||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
|||
|
|
class IExecutionContext : public INoCopy
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
virtual ~IExecutionContext() noexcept = default;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the debug sync flag.
|
|||
|
|
//!
|
|||
|
|
//! If this flag is set to true, the engine will log the successful execution for each kernel during executeV2(). It
|
|||
|
|
//! has no effect when using enqueueV3().
|
|||
|
|
//!
|
|||
|
|
//! \see getDebugSync()
|
|||
|
|
//!
|
|||
|
|
void setDebugSync(bool sync) noexcept
|
|||
|
|
{
|
|||
|
|
mImpl->setDebugSync(sync);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the debug sync flag.
|
|||
|
|
//!
|
|||
|
|
//! \see setDebugSync()
|
|||
|
|
//!
|
|||
|
|
bool getDebugSync() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getDebugSync();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the profiler.
|
|||
|
|
//!
|
|||
|
|
//! \see IProfiler getProfiler()
|
|||
|
|
//!
|
|||
|
|
void setProfiler(IProfiler* profiler) noexcept
|
|||
|
|
{
|
|||
|
|
mImpl->setProfiler(profiler);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the profiler.
|
|||
|
|
//!
|
|||
|
|
//! \see IProfiler setProfiler()
|
|||
|
|
//!
|
|||
|
|
IProfiler* getProfiler() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getProfiler();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the associated engine.
|
|||
|
|
//!
|
|||
|
|
//! \see ICudaEngine
|
|||
|
|
//!
|
|||
|
|
ICudaEngine const& getEngine() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getEngine();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the name of the execution context.
|
|||
|
|
//!
|
|||
|
|
//! This method copies the name string.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
//! \see getName()
|
|||
|
|
//!
|
|||
|
|
void setName(char const* name) noexcept
|
|||
|
|
{
|
|||
|
|
mImpl->setName(name);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the name of the execution context.
|
|||
|
|
//!
|
|||
|
|
//! \see setName()
|
|||
|
|
//!
|
|||
|
|
char const* getName() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getName();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the device memory for use by this execution context.
|
|||
|
|
//!
|
|||
|
|
//! The memory must be aligned with CUDA memory alignment property (using cudaGetDeviceProperties()), and its size
|
|||
|
|
//! must be large enough for performing inference with the given network inputs. getDeviceMemorySize() and
|
|||
|
|
//! getDeviceMemorySizeForProfile() report upper bounds of the size. Setting memory to nullptr is acceptable if the
|
|||
|
|
//! reported size is 0. If using enqueueV3() to run the network, the memory is in use from the invocation of
|
|||
|
|
//! enqueueV3() until network execution is complete. If using executeV2(), it is in use until executeV2() returns.
|
|||
|
|
//! Releasing or otherwise using the memory for other purposes, including using it in another execution context
|
|||
|
|
//! running in parallel, during this time will result in undefined behavior.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by setDeviceMemoryV2().
|
|||
|
|
//!
|
|||
|
|
//! \warning Weight streaming related scratch memory will be allocated by TensorRT if the memory is set by this API.
|
|||
|
|
//! Please use setDeviceMemoryV2() instead.
|
|||
|
|
//!
|
|||
|
|
//! \see ICudaEngine::getDeviceMemorySize()
|
|||
|
|
//! \see ICudaEngine::getDeviceMemorySizeForProfile()
|
|||
|
|
//! \see ExecutionContextAllocationStrategy
|
|||
|
|
//! \see ICudaEngine::createExecutionContext()
|
|||
|
|
//! \see ICudaEngine::createExecutionContextWithoutDeviceMemory()
|
|||
|
|
//!
|
|||
|
|
void setDeviceMemory(void* memory) noexcept
|
|||
|
|
{
|
|||
|
|
mImpl->setDeviceMemory(memory);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the device memory and its corresponding size for use by this execution context.
|
|||
|
|
//!
|
|||
|
|
//! The memory must be aligned with CUDA memory alignment property (using cudaGetDeviceProperties()), and its size
|
|||
|
|
//! must be large enough for performing inference with the given network inputs. getDeviceMemorySize() and
|
|||
|
|
//! getDeviceMemorySizeForProfile() report upper bounds of the size. Setting memory to nullptr is acceptable if the
|
|||
|
|
//! reported size is 0. If using enqueueV3() to run the network, the memory is in use from the invocation of
|
|||
|
|
//! enqueueV3() until network execution is complete. If using executeV2(), it is in use until executeV2() returns.
|
|||
|
|
//! Releasing or otherwise using the memory for other purposes, including using it in another execution context
|
|||
|
|
//! running in parallel, during this time will result in undefined behavior.
|
|||
|
|
//!
|
|||
|
|
//! \see ICudaEngine::getDeviceMemorySizeV2()
|
|||
|
|
//! \see ICudaEngine::getDeviceMemorySizeForProfileV2()
|
|||
|
|
//! \see ExecutionContextAllocationStrategy
|
|||
|
|
//! \see ICudaEngine::createExecutionContext()
|
|||
|
|
//! \see ICudaEngine::createExecutionContextWithoutDeviceMemory()
|
|||
|
|
//!
|
|||
|
|
void setDeviceMemoryV2(void* memory, int64_t size) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setDeviceMemoryV2(memory, size);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the strides of the buffer for the given tensor name.
|
|||
|
|
//!
|
|||
|
|
//! The strides are in units of elements, not components or bytes.
|
|||
|
|
//! For example, for TensorFormat::kHWC8, a stride of one spans 8 scalars.
|
|||
|
|
//!
|
|||
|
|
//! Note that strides can be different for different execution contexts
|
|||
|
|
//! with dynamic shapes.
|
|||
|
|
//!
|
|||
|
|
//! If the provided name does not map to an input or output tensor, or there are dynamic dimensions that have not
|
|||
|
|
//! been set yet, return Dims{-1, {}}
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
Dims getTensorStrides(char const* tensorName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTensorStrides(tensorName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
public:
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the index of the currently selected optimization profile.
|
|||
|
|
//!
|
|||
|
|
//! If the profile index has not been set yet (implicitly to 0 if no other execution context has been set to
|
|||
|
|
//! profile 0, or explicitly for all subsequent contexts), an invalid value of -1 will be returned
|
|||
|
|
//! and all calls to enqueueV3()/executeV2() will fail until a valid profile index has been set.
|
|||
|
|
//! This behavior is deprecated in TensorRT 8.6, all profiles will default to optimization
|
|||
|
|
//! profile 0 and -1 will no longer be returned.
|
|||
|
|
//!
|
|||
|
|
int32_t getOptimizationProfile() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getOptimizationProfile();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set shape of given input.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input tensor.
|
|||
|
|
//! \param dims The shape of an input tensor.
|
|||
|
|
//!
|
|||
|
|
//! \return True on success, false if the provided name does not map to an input tensor, or if some other error
|
|||
|
|
//! occurred.
|
|||
|
|
//!
|
|||
|
|
//! Each dimension must agree with the network dimension unless the latter was -1.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
bool setInputShape(char const* tensorName, Dims const& dims) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setInputShape(tensorName, dims);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the shape of the given input or output.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! Return Dims{-1, {}} if the provided name does not map to an input or output tensor.
|
|||
|
|
//! Otherwise return the shape of the input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! A dimension in an input tensor will have a -1 wildcard value if all the following are true:
|
|||
|
|
//! * setInputShape() has not yet been called for this tensor
|
|||
|
|
//! * The dimension is a runtime dimension that is not implicitly constrained to be a single value.
|
|||
|
|
//!
|
|||
|
|
//! A dimension in an output tensor will have a -1 wildcard value if the dimension depends
|
|||
|
|
//! on values of execution tensors OR if all the following are true:
|
|||
|
|
//! * It is a runtime dimension.
|
|||
|
|
//! * setInputShape() has NOT been called for some input tensor(s) with a runtime shape.
|
|||
|
|
//! * setTensorAddress() has NOT been called for some input tensor(s) with isShapeInferenceIO() = true.
|
|||
|
|
//!
|
|||
|
|
//! An output tensor may also have -1 wildcard dimensions if its shape depends on values of tensors supplied to
|
|||
|
|
//! enqueueV3().
|
|||
|
|
//!
|
|||
|
|
//! If the request is for the shape of an output tensor with runtime dimensions,
|
|||
|
|
//! all input tensors with isShapeInferenceIO() = true should have their value already set,
|
|||
|
|
//! since these values might be needed to compute the output shape.
|
|||
|
|
//!
|
|||
|
|
//! Examples of an input dimension that is implicitly constrained to a single value:
|
|||
|
|
//! * The optimization profile specifies equal min and max values.
|
|||
|
|
//! * The dimension is named and only one value meets the optimization profile requirements
|
|||
|
|
//! for dimensions with that name.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
Dims getTensorShape(char const* tensorName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTensorShape(tensorName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Whether all dynamic dimensions of input tensors have been specified
|
|||
|
|
//!
|
|||
|
|
//! \return True if all dynamic dimensions of input tensors have been specified
|
|||
|
|
//! by calling setInputShape().
|
|||
|
|
//!
|
|||
|
|
//! Trivially true if network has no dynamically shaped input tensors.
|
|||
|
|
//!
|
|||
|
|
//! Does not work with name-base interfaces eg. IExecutionContext::setInputShape(). Use
|
|||
|
|
//! IExecutionContext::inferShapes() instead.
|
|||
|
|
//!
|
|||
|
|
bool allInputDimensionsSpecified() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->allInputDimensionsSpecified();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Whether all input shape bindings have been specified
|
|||
|
|
//!
|
|||
|
|
//! \return True if all input shape bindings have been specified by setInputShapeBinding().
|
|||
|
|
//!
|
|||
|
|
//! Trivially true if network has no input shape bindings.
|
|||
|
|
//!
|
|||
|
|
//! Does not work with name-base interfaces eg. IExecutionContext::setInputShape(). Use
|
|||
|
|
//! IExecutionContext::inferShapes() instead.
|
|||
|
|
//!
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.0. setInputShapeBinding() is removed since TensorRT 10.0.
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED bool allInputShapesSpecified() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->allInputShapesSpecified();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the ErrorRecorder for this interface
|
|||
|
|
//!
|
|||
|
|
//! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
|
|||
|
|
//! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
|
|||
|
|
//! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
|
|||
|
|
//! a recorder has been registered.
|
|||
|
|
//!
|
|||
|
|
//! If an error recorder is not set, messages will be sent to the global log stream.
|
|||
|
|
//!
|
|||
|
|
//! \param recorder The error recorder to register with this interface.
|
|||
|
|
//!
|
|||
|
|
//! \see getErrorRecorder()
|
|||
|
|
//!
|
|||
|
|
void setErrorRecorder(IErrorRecorder* recorder) noexcept
|
|||
|
|
{
|
|||
|
|
mImpl->setErrorRecorder(recorder);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the ErrorRecorder assigned to this interface.
|
|||
|
|
//!
|
|||
|
|
//! Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
|
|||
|
|
//! an error handler has not been set.
|
|||
|
|
//!
|
|||
|
|
//! \return A pointer to the IErrorRecorder object that has been registered.
|
|||
|
|
//!
|
|||
|
|
//! \see setErrorRecorder()
|
|||
|
|
//!
|
|||
|
|
IErrorRecorder* getErrorRecorder() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getErrorRecorder();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Synchronously execute a network.
|
|||
|
|
//!
|
|||
|
|
//! This method requires an array of input and output buffers. The mapping
|
|||
|
|
//! from indices to tensor names can be queried using ICudaEngine::getIOTensorName().
|
|||
|
|
//!
|
|||
|
|
//! \param bindings An array of pointers to input and output buffers for the network.
|
|||
|
|
//!
|
|||
|
|
//! \return True if execution succeeded.
|
|||
|
|
//!
|
|||
|
|
//! \see ICudaEngine::getIOTensorName()
|
|||
|
|
//!
|
|||
|
|
bool executeV2(void* const* bindings) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->executeV2(bindings);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Select an optimization profile for the current context with async
|
|||
|
|
//! semantics.
|
|||
|
|
//!
|
|||
|
|
//! \param profileIndex Index of the profile. The value must lie between 0 and
|
|||
|
|
//! getEngine().getNbOptimizationProfiles() - 1
|
|||
|
|
//!
|
|||
|
|
//! \param stream A CUDA stream on which the cudaMemcpyAsyncs may be
|
|||
|
|
//! enqueued
|
|||
|
|
//!
|
|||
|
|
//! When an optimization profile is switched via this API, TensorRT may
|
|||
|
|
//! require that data is copied via cudaMemcpyAsync. It is the
|
|||
|
|
//! application’s responsibility to guarantee that synchronization between
|
|||
|
|
//! the profile sync stream and the enqueue stream occurs.
|
|||
|
|
//!
|
|||
|
|
//! The selected profile will be used in subsequent calls to executeV2()/enqueueV3().
|
|||
|
|
//! If the associated CUDA engine has inputs with dynamic shapes, the optimization profile must
|
|||
|
|
//! be set with its corresponding profileIndex before calling execute or enqueue. The newly created execution
|
|||
|
|
//! context will be assigned optimization profile 0.
|
|||
|
|
//!
|
|||
|
|
//! If the associated CUDA engine does not have inputs with dynamic shapes,
|
|||
|
|
//! this method need not be called, in which case the default profile index
|
|||
|
|
//! of 0 will be used.
|
|||
|
|
//!
|
|||
|
|
//! setOptimizationProfileAsync() must be called before calling
|
|||
|
|
//! setInputShape() for all dynamic input
|
|||
|
|
//! tensors or input shape tensors, which in turn must be called before
|
|||
|
|
//! executeV2()/enqueueV3().
|
|||
|
|
//!
|
|||
|
|
//! \warning This function will trigger layer resource updates on the next call of
|
|||
|
|
//! executeV2()/enqueueV3(), possibly resulting in performance bottlenecks.
|
|||
|
|
//!
|
|||
|
|
//! \warning Not synchronizing the stream used at enqueue with the stream
|
|||
|
|
//! used to set optimization profile asynchronously using this API will
|
|||
|
|
//! result in undefined behavior.
|
|||
|
|
//!
|
|||
|
|
//! \return true if the call succeeded, else false (e.g. input out of range)
|
|||
|
|
//!
|
|||
|
|
//! \see ICudaEngine::getNbOptimizationProfiles()
|
|||
|
|
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setOptimizationProfileAsync(profileIndex, stream);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set whether enqueue emits layer timing to the profiler
|
|||
|
|
//!
|
|||
|
|
//! If set to true (default), enqueue is synchronous and does layer timing profiling implicitly if
|
|||
|
|
//! there is a profiler attached.
|
|||
|
|
//! If set to false, enqueue will be asynchronous if there is a profiler attached. An extra method
|
|||
|
|
//! reportToProfiler() needs to be called to obtain the profiling data and report to the profiler attached.
|
|||
|
|
//!
|
|||
|
|
//! \see IExecutionContext::getEnqueueEmitsProfile()
|
|||
|
|
//! \see IExecutionContext::reportToProfiler()
|
|||
|
|
//!
|
|||
|
|
void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
|
|||
|
|
{
|
|||
|
|
mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the enqueueEmitsProfile state.
|
|||
|
|
//!
|
|||
|
|
//! \return The enqueueEmitsProfile state.
|
|||
|
|
//!
|
|||
|
|
//! \see IExecutionContext::setEnqueueEmitsProfile()
|
|||
|
|
//!
|
|||
|
|
bool getEnqueueEmitsProfile() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getEnqueueEmitsProfile();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Calculate layer timing info for the current optimization profile in IExecutionContext
|
|||
|
|
//! and update the profiler after one iteration of inference launch.
|
|||
|
|
//!
|
|||
|
|
//! If IExecutionContext::getEnqueueEmitsProfile() returns true, the enqueue function will calculate layer timing
|
|||
|
|
//! implicitly if a profiler is provided. This function returns true and does nothing.
|
|||
|
|
//!
|
|||
|
|
//! If IExecutionContext::getEnqueueEmitsProfile() returns false, the enqueue function will record the CUDA event
|
|||
|
|
//! timers if a profiler is provided. But it will not perform the layer timing calculation.
|
|||
|
|
//! IExecutionContext::reportToProfiler() needs to be called explicitly to calculate layer timing for the previous
|
|||
|
|
//! inference launch.
|
|||
|
|
//!
|
|||
|
|
//! In the CUDA graph launch scenario, it will record the same set of CUDA events
|
|||
|
|
//! as in regular enqueue functions if the graph is captured from an IExecutionContext with profiler enabled.
|
|||
|
|
//! This function needs to be called after graph launch to report the layer timing info to the profiler.
|
|||
|
|
//!
|
|||
|
|
//! \warning profiling CUDA graphs is only available from CUDA 11.1 onwards.
|
|||
|
|
//! \warning reportToProfiler uses the stream of the previous enqueue call, so the stream must be live otherwise
|
|||
|
|
//! behavior is undefined.
|
|||
|
|
//!
|
|||
|
|
//! \return true if the call succeeded, else false (e.g. profiler not provided, in CUDA graph capture mode, etc.)
|
|||
|
|
//!
|
|||
|
|
//! \see IExecutionContext::setEnqueueEmitsProfile()
|
|||
|
|
//! \see IExecutionContext::getEnqueueEmitsProfile()
|
|||
|
|
//!
|
|||
|
|
bool reportToProfiler() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->reportToProfiler();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set memory address for given input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input or output tensor.
|
|||
|
|
//! \param data The pointer (void*) to the data owned by the user.
|
|||
|
|
//!
|
|||
|
|
//! \return True on success, false if error occurred.
|
|||
|
|
//!
|
|||
|
|
//! An address defaults to nullptr.
|
|||
|
|
//! Pass data=nullptr to reset to the default state.
|
|||
|
|
//!
|
|||
|
|
//! Return false if the provided name does not map to an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! If an input pointer has type (void const*), use setInputTensorAddress() instead.
|
|||
|
|
//!
|
|||
|
|
//! Before calling enqueueV3(), each input must have a non-null address and
|
|||
|
|
//! each output must have a non-null address or an IOutputAllocator to set it later.
|
|||
|
|
//!
|
|||
|
|
//! If the TensorLocation of the tensor is kHOST:
|
|||
|
|
//! - The pointer must point to a host buffer of sufficient size.
|
|||
|
|
//! - Data representing shape values is not copied until enqueueV3 is invoked.
|
|||
|
|
//!
|
|||
|
|
//! If the TensorLocation of the tensor is kDEVICE:
|
|||
|
|
//! - The pointer must point to a device buffer of sufficient size and alignment, or
|
|||
|
|
//! - Be nullptr if the tensor is an output tensor that will be allocated by IOutputAllocator.
|
|||
|
|
//!
|
|||
|
|
//! If getTensorShape(name) reports a -1 for any dimension of an output after all
|
|||
|
|
//! input shapes have been set, use setOutputAllocator() to associate an IOutputAllocator
|
|||
|
|
//! to which the dimensions will be reported when known.
|
|||
|
|
//!
|
|||
|
|
//! Calling both setTensorAddress and setOutputAllocator() for the same output is allowed,
|
|||
|
|
//! and can be useful for preallocating memory, and then reallocating if it's not big enough.
|
|||
|
|
//!
|
|||
|
|
//! The pointer must have at least 256-byte alignment.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
//! \see setInputTensorAddress() setOutputTensorAddress() getTensorShape() setOutputAllocator() IOutputAllocator
|
|||
|
|
//!
|
|||
|
|
bool setTensorAddress(char const* tensorName, void* data) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setTensorAddress(tensorName, data);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get memory address bound to given input or output tensor, or nullptr if the provided name does not map to
|
|||
|
|
//! an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input or output tensor.
|
|||
|
|
//!
|
|||
|
|
//! Use method getOutputTensorAddress() if a non-const pointer for an output tensor is required.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
//! \see getOutputTensorAddress()
|
|||
|
|
//!
|
|||
|
|
void const* getTensorAddress(char const* tensorName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTensorAddress(tensorName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the memory address for a given output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an output tensor.
|
|||
|
|
//! \param data The pointer to the buffer to which to write the output.
|
|||
|
|
//!
|
|||
|
|
//! \return True on success, false if the provided name does not map to an output tensor, does not meet alignment
|
|||
|
|
//! requirements, or some other error occurred.
|
|||
|
|
//!
|
|||
|
|
//! Output addresses can also be set using method setTensorAddress. This method is provided for applications which
|
|||
|
|
//! prefer to use different methods for setting input and output tensors.
|
|||
|
|
//!
|
|||
|
|
//! See setTensorAddress() for alignment and data type constraints.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
//! \see setTensorAddress()
|
|||
|
|
//!
|
|||
|
|
bool setOutputTensorAddress(char const* tensorName, void* data) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setOutputTensorAddress(tensorName, data);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set memory address for given input.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an input tensor.
|
|||
|
|
//! \param data The pointer (void const*) to the const data owned by the user.
|
|||
|
|
//!
|
|||
|
|
//! \return True on success, false if the provided name does not map to an input tensor, does not meet alignment
|
|||
|
|
//! requirements, or some other error occurred.
|
|||
|
|
//!
|
|||
|
|
//! Input addresses can also be set using method setTensorAddress, which requires a (void*).
|
|||
|
|
//!
|
|||
|
|
//! See description of method setTensorAddress() for alignment and data type constraints.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
//! \see setTensorAddress()
|
|||
|
|
//!
|
|||
|
|
bool setInputTensorAddress(char const* tensorName, void const* data) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setInputTensorAddress(tensorName, data);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get memory address for given output.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \return Raw output data pointer (void*) for given output tensor, or nullptr if the provided name does not map to
|
|||
|
|
//! an output tensor.
|
|||
|
|
//!
|
|||
|
|
//! If only a (void const*) pointer is needed, an alternative is to call method getTensorAddress().
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
//! \see getTensorAddress()
|
|||
|
|
//!
|
|||
|
|
void* getOutputTensorAddress(char const* tensorName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getOutputTensorAddress(tensorName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Run shape calculations.
|
|||
|
|
//!
|
|||
|
|
//! \param nbMaxNames Maximum number of names to write to tensorNames.
|
|||
|
|
//! When the return value is a positive value n and tensorNames != nullptr,
|
|||
|
|
//! the names of min(n,nbMaxNames) insufficiently specified input tensors are
|
|||
|
|
//! written to tensorNames.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorNames Buffer in which to place names of insufficiently specified input tensors.
|
|||
|
|
//!
|
|||
|
|
//! \return 0 on success.
|
|||
|
|
//! Positive value n if n input tensors were not sufficiently specified.
|
|||
|
|
//! -1 for other errors.
|
|||
|
|
//!
|
|||
|
|
//! An input tensor is insufficiently specified if either of the following is true:
|
|||
|
|
//!
|
|||
|
|
//! * It has dynamic dimensions and its runtime dimensions have not yet
|
|||
|
|
//! been specified via IExecutionContext::setInputShape.
|
|||
|
|
//!
|
|||
|
|
//! * isShapeInferenceIO(t)=true and the tensor's address has not yet been set.
|
|||
|
|
//!
|
|||
|
|
//! If an output tensor has isShapeInferenceIO(t)=true and its address has been specified,
|
|||
|
|
//! then its value is written.
|
|||
|
|
//!
|
|||
|
|
//! Returns -1 if tensorNames == nullptr and nbMaxNames != 0.
|
|||
|
|
//! Returns -1 if nbMaxNames < 0.
|
|||
|
|
//! Returns -1 if a tensor's dimensions are invalid, e.g. a tensor ends up with a negative dimension.
|
|||
|
|
//!
|
|||
|
|
int32_t inferShapes(int32_t nbMaxNames, char const** tensorNames) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->inferShapes(nbMaxNames, tensorNames);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Recompute the internal activation buffer sizes based on the current input shapes, and return the total
|
|||
|
|
//! amount of memory required.
|
|||
|
|
//!
|
|||
|
|
//! Users can allocate the device memory based on the size returned and provided the memory to TRT with
|
|||
|
|
//! IExecutionContext::setDeviceMemory(). Must specify all input shapes and the optimization profile to use before
|
|||
|
|
//! calling this function, otherwise the partition will be invalidated.
|
|||
|
|
//!
|
|||
|
|
//! \return Total amount of memory required on success, 0 if error occurred.
|
|||
|
|
//!
|
|||
|
|
//! \see IExecutionContext::setDeviceMemory()
|
|||
|
|
//!
|
|||
|
|
size_t updateDeviceMemorySizeForShapes() noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->updateDeviceMemorySizeForShapes();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Mark input as consumed.
|
|||
|
|
//!
|
|||
|
|
//! \param event The CUDA event that is triggered after all input tensors have been consumed.
|
|||
|
|
//!
|
|||
|
|
//! \warning The set event must be valid during the inference.
|
|||
|
|
//!
|
|||
|
|
//! \return True on success, false if error occurred.
|
|||
|
|
//!
|
|||
|
|
//! Passing event==nullptr removes whatever event was set, if any.
|
|||
|
|
//!
|
|||
|
|
bool setInputConsumedEvent(cudaEvent_t event) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setInputConsumedEvent(event);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief The event associated with consuming the input.
|
|||
|
|
//!
|
|||
|
|
//! \return The CUDA event. Nullptr will be returned if the event is not set yet.
|
|||
|
|
//!
|
|||
|
|
cudaEvent_t getInputConsumedEvent() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getInputConsumedEvent();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set output allocator to use for output tensor of given name.
|
|||
|
|
//! Pass nullptr to outputAllocator to unset.
|
|||
|
|
//! The allocator is called by enqueueV3().
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an output tensor.
|
|||
|
|
//! \param outputAllocator IOutputAllocator for the tensors.
|
|||
|
|
//!
|
|||
|
|
//! \return True if success, false if the provided name does not map to an output or, if some other error occurred.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
//! \see enqueueV3() IOutputAllocator
|
|||
|
|
//!
|
|||
|
|
bool setOutputAllocator(char const* tensorName, IOutputAllocator* outputAllocator) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setOutputAllocator(tensorName, outputAllocator);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get output allocator associated with output tensor of given name, or nullptr if the provided name does
|
|||
|
|
//! not map to an output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
//! \see IOutputAllocator
|
|||
|
|
//!
|
|||
|
|
IOutputAllocator* getOutputAllocator(char const* tensorName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getOutputAllocator(tensorName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get upper bound on an output tensor's size, in bytes, based on
|
|||
|
|
//! the current optimization profile and input dimensions.
|
|||
|
|
//!
|
|||
|
|
//! If the profile or input dimensions are not yet set, or the provided name
|
|||
|
|
//! does not map to an output, returns -1.
|
|||
|
|
//!
|
|||
|
|
//! \param tensorName The name of an output tensor.
|
|||
|
|
//!
|
|||
|
|
//! \return Upper bound in bytes.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
|
|||
|
|
//!
|
|||
|
|
int64_t getMaxOutputSize(char const* tensorName) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getMaxOutputSize(tensorName);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Specify allocator to use for internal temporary storage.
|
|||
|
|
//!
|
|||
|
|
//! This allocator is used only by enqueueV3() for temporary storage whose size cannot be
|
|||
|
|
//! predicted ahead of enqueueV3(). It is not used for output tensors, because memory
|
|||
|
|
//! allocation for those is allocated by the allocator set by setOutputAllocator().
|
|||
|
|
//! All memory allocated is freed by the time enqueueV3() returns.
|
|||
|
|
//!
|
|||
|
|
//! \param allocator pointer to allocator to use. Pass nullptr to revert to using TensorRT's
|
|||
|
|
//! default allocator.
|
|||
|
|
//!
|
|||
|
|
//! \return True on success, false if error occurred.
|
|||
|
|
//!
|
|||
|
|
//! \see enqueueV3() setOutputAllocator()
|
|||
|
|
//!
|
|||
|
|
bool setTemporaryStorageAllocator(IGpuAllocator* allocator) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setTemporaryStorageAllocator(allocator);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get allocator set by setTemporaryStorageAllocator.
|
|||
|
|
//!
|
|||
|
|
//! Returns a nullptr if a nullptr was passed with setTemporaryStorageAllocator().
|
|||
|
|
//!
|
|||
|
|
IGpuAllocator* getTemporaryStorageAllocator() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getTemporaryStorageAllocator();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Enqueue inference on a stream.
|
|||
|
|
//!
|
|||
|
|
//! \param stream A CUDA stream on which the inference kernels will be enqueued.
|
|||
|
|
//!
|
|||
|
|
//! \return True if the kernels were enqueued successfully, false otherwise.
|
|||
|
|
//!
|
|||
|
|
//! Modifying or releasing memory that has been registered for the tensors before stream
|
|||
|
|
//! synchronization or the event passed to setInputConsumedEvent has been being triggered results in undefined
|
|||
|
|
//! behavior.
|
|||
|
|
//! Input tensor can be released after the setInputConsumedEvent whereas output tensors require stream
|
|||
|
|
//! synchronization.
|
|||
|
|
//!
|
|||
|
|
//! \warning Using default stream may lead to performance issues due to additional cudaDeviceSynchronize() calls by
|
|||
|
|
//! TensorRT to ensure correct synchronizations. Please use non-default stream instead.
|
|||
|
|
//!
|
|||
|
|
//! \warning If the Engine is streaming weights, enqueueV3 will become synchronous, and
|
|||
|
|
//! the graph will not be capturable.
|
|||
|
|
//!
|
|||
|
|
bool enqueueV3(cudaStream_t stream) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->enqueueV3(stream);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the maximum size for persistent cache usage.
|
|||
|
|
//!
|
|||
|
|
//! This function sets the maximum persistent L2 cache that this execution context may use for activation caching.
|
|||
|
|
//! Activation caching is not supported on all architectures - see "How TensorRT uses Memory" in the developer guide
|
|||
|
|
//! for details
|
|||
|
|
//!
|
|||
|
|
//! \param size the size of persistent cache limitation in bytes.
|
|||
|
|
//! The default is 0 Bytes.
|
|||
|
|
//!
|
|||
|
|
//! \see getPersistentCacheLimit
|
|||
|
|
void setPersistentCacheLimit(size_t size) noexcept
|
|||
|
|
{
|
|||
|
|
mImpl->setPersistentCacheLimit(size);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the maximum size for persistent cache usage.
|
|||
|
|
//!
|
|||
|
|
//! \returns The size of the persistent cache limit
|
|||
|
|
//!
|
|||
|
|
//! \see setPersistentCacheLimit
|
|||
|
|
size_t getPersistentCacheLimit() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getPersistentCacheLimit();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the verbosity of the NVTX markers in the execution context.
|
|||
|
|
//!
|
|||
|
|
//! Building with kDETAILED verbosity will generally increase latency in enqueueV3(). Call this method
|
|||
|
|
//! to select NVTX verbosity in this execution context at runtime.
|
|||
|
|
//!
|
|||
|
|
//! The default is the verbosity with which the engine was built, and the verbosity may not be raised above that
|
|||
|
|
//! level.
|
|||
|
|
//!
|
|||
|
|
//! This function does not affect how IEngineInspector interacts with the engine.
|
|||
|
|
//!
|
|||
|
|
//! \param verbosity The verbosity of the NVTX markers.
|
|||
|
|
//!
|
|||
|
|
//! \return True if the NVTX verbosity is set successfully. False if the provided verbosity level is higher than the
|
|||
|
|
//! profiling verbosity of the corresponding engine.
|
|||
|
|
//!
|
|||
|
|
//! \see getNvtxVerbosity()
|
|||
|
|
//! \see ICudaEngine::getProfilingVerbosity()
|
|||
|
|
//!
|
|||
|
|
bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setNvtxVerbosity(verbosity);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the NVTX verbosity of the execution context.
|
|||
|
|
//!
|
|||
|
|
//! \return The current NVTX verbosity of the execution context.
|
|||
|
|
//!
|
|||
|
|
//! \see setNvtxVerbosity()
|
|||
|
|
//!
|
|||
|
|
ProfilingVerbosity getNvtxVerbosity() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getNvtxVerbosity();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the auxiliary streams that TensorRT should launch kernels on in the next enqueueV3() call.
|
|||
|
|
//!
|
|||
|
|
//! If set, TensorRT will launch the kernels that are supposed to run on the auxiliary streams using the streams
|
|||
|
|
//! provided by the user with this API. If this API is not called before the enqueueV3() call, then TensorRT will
|
|||
|
|
//! use the auxiliary streams created by TensorRT internally.
|
|||
|
|
//!
|
|||
|
|
//! TensorRT will always insert event synchronizations between the main stream provided via enqueueV3() call and the
|
|||
|
|
//! auxiliary streams:
|
|||
|
|
//! - At the beginning of the enqueueV3() call, TensorRT will make sure that all the auxiliary streams wait on
|
|||
|
|
//! the activities on the main stream.
|
|||
|
|
//! - At the end of the enqueueV3() call, TensorRT will make sure that the main stream wait on the activities on
|
|||
|
|
//! all the auxiliary streams.
|
|||
|
|
//!
|
|||
|
|
//! \param auxStreams The pointer to an array of cudaStream_t with the array length equal to nbStreams.
|
|||
|
|
//! \param nbStreams The number of auxiliary streams provided. If nbStreams is greater than
|
|||
|
|
//! `engine->getNbAuxStreams()`, then only the first `engine->getNbAuxStreams()` streams will be used. If
|
|||
|
|
//! `nbStreams` is less than `engine->getNbAuxStreams()`, such as setting `nbStreams` to 0, then TensorRT
|
|||
|
|
//! will use the provided streams for the first `nbStreams` auxiliary streams, and will create additional
|
|||
|
|
//! streams internally for the rest of the auxiliary streams.
|
|||
|
|
//!
|
|||
|
|
//! \note The provided auxiliary streams must not be the default stream and must all be different to avoid
|
|||
|
|
//! deadlocks.
|
|||
|
|
//!
|
|||
|
|
//! \see enqueueV3(), IBuilderConfig::setMaxAuxStreams(), ICudaEngine::getNbAuxStreams()
|
|||
|
|
//!
|
|||
|
|
void setAuxStreams(cudaStream_t* auxStreams, int32_t nbStreams) noexcept
|
|||
|
|
{
|
|||
|
|
mImpl->setAuxStreams(auxStreams, nbStreams);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set DebugListener for this execution context.
|
|||
|
|
//!
|
|||
|
|
//! \param listener DebugListener for this execution context.
|
|||
|
|
//!
|
|||
|
|
//! \return true if succeed, false if failure.
|
|||
|
|
//!
|
|||
|
|
bool setDebugListener(IDebugListener* listener) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setDebugListener(listener);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the DebugListener of this execution context.
|
|||
|
|
//!
|
|||
|
|
//! \return DebugListener of this execution context.
|
|||
|
|
//!
|
|||
|
|
IDebugListener* getDebugListener() noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getDebugListener();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set debug state of tensor given the tensor name.
|
|||
|
|
//!
|
|||
|
|
//! Turn the debug state of a tensor on or off.
|
|||
|
|
//! A tensor with the parameter tensor name must exist in the network, and the tensor must have
|
|||
|
|
//! been marked as a debug tensor during build time. Otherwise, an error is thrown.
|
|||
|
|
//!
|
|||
|
|
//! \param name Name of target tensor.
|
|||
|
|
//!
|
|||
|
|
//! \param flag True if turning on debug state, false if turning off debug state of tensor
|
|||
|
|
//! The default is off.
|
|||
|
|
//!
|
|||
|
|
//! \return True if successful, false otherwise.
|
|||
|
|
//!
|
|||
|
|
bool setTensorDebugState(char const* name, bool flag) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setTensorDebugState(name, flag);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the debug state.
|
|||
|
|
//!
|
|||
|
|
//! \param name Name of target tensor.
|
|||
|
|
//!
|
|||
|
|
//! \return true if there is a debug tensor with the given name and it has debug state turned on.
|
|||
|
|
//!
|
|||
|
|
bool getDebugState(char const* name) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getDebugState(name);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the runtime config object used during execution context creation.
|
|||
|
|
//!
|
|||
|
|
//! \return The runtime config object.
|
|||
|
|
//!
|
|||
|
|
IRuntimeConfig* getRuntimeConfig() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getRuntimeConfig();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//! \brief Turn the debug state of all debug tensors on or off.
|
|||
|
|
//!
|
|||
|
|
//! \param flag true if turning on debug state, false if turning off debug state.
|
|||
|
|
//!
|
|||
|
|
//! \return true if successful, false otherwise.
|
|||
|
|
//!
|
|||
|
|
//! The default is off.
|
|||
|
|
//!
|
|||
|
|
bool setAllTensorsDebugState(bool flag) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setAllTensorsDebugState(flag);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Turn the debug state of unfused tensors on or off.
|
|||
|
|
//!
|
|||
|
|
//! The default is off.
|
|||
|
|
//!
|
|||
|
|
//! \param flag true if turning on debug state, false if turning off debug state.
|
|||
|
|
//!
|
|||
|
|
//! \return true if successful, false otherwise.
|
|||
|
|
//!
|
|||
|
|
//! \see INetworkDefinition::markUnfusedTensorsAsDebugTensors()
|
|||
|
|
//!
|
|||
|
|
bool setUnfusedTensorsDebugState(bool flag) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setUnfusedTensorsDebugState(flag);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the debug state of unfused tensors.
|
|||
|
|
//!
|
|||
|
|
//! \return true if unfused tensors debug state is on. False if unfused tensors debug state is off.
|
|||
|
|
//!
|
|||
|
|
bool getUnfusedTensorsDebugState() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getUnfusedTensorsDebugState();
|
|||
|
|
}
|
|||
|
|
#if ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
|
|||
|
|
//!
|
|||
|
|
//! \brief Check if a subsequent call to enqueueV3 is graph-capturable on the provided stream.
|
|||
|
|
//!
|
|||
|
|
//! \param stream The stream to check.
|
|||
|
|
//!
|
|||
|
|
//! \return true if a subsequent call to enqueueV3 is graph-capturable on the provided stream.
|
|||
|
|
//! Reasons why graph capture may fail include:
|
|||
|
|
//! - blocking runtime allocation due to large dynamically sized tensors that cannot be
|
|||
|
|
//! statically allocated,
|
|||
|
|
//! - dynamically shaped tensors whose size contains on the tensor contents, like the output
|
|||
|
|
//! of an INonZeroLayer,
|
|||
|
|
//! - conditional control flow depending on the contents of on-device tensors, like an
|
|||
|
|
//! ITripLimitLayer whose input tensor resides on the device,
|
|||
|
|
//! - engines that have been built for weight streaming.
|
|||
|
|
//!
|
|||
|
|
//! \note If this API returns false, enqueueV3 may not be called on a capturable stream
|
|||
|
|
//! (i.e. users may not call cudaStreamBeingCapture before starting inference). Otherwise,
|
|||
|
|
//! inference will fail with an error message.
|
|||
|
|
bool isStreamCapturable(cudaStream_t stream) const noexcept {
|
|||
|
|
return mImpl->isStreamCapturable(stream);
|
|||
|
|
}
|
|||
|
|
#endif // ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
|
|||
|
|
|
|||
|
|
protected:
|
|||
|
|
apiv::VExecutionContext* mImpl;
|
|||
|
|
}; // class IExecutionContext
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \enum LayerInformationFormat
|
|||
|
|
//!
|
|||
|
|
//! \brief The format in which the IEngineInspector prints the layer information.
|
|||
|
|
//!
|
|||
|
|
//! \see IEngineInspector::getLayerInformation(), IEngineInspector::getEngineInformation()
|
|||
|
|
//!
|
|||
|
|
enum class LayerInformationFormat : int32_t
|
|||
|
|
{
|
|||
|
|
kONELINE = 0, //!< Print layer information in one line per layer.
|
|||
|
|
kJSON = 1, //!< Print layer information in JSON format.
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//! Maximum number of layer information formats in LayerInformationFormat enum.
|
|||
|
|
//! \see LayerInformationFormat
|
|||
|
|
template <>
|
|||
|
|
constexpr inline int32_t EnumMax<LayerInformationFormat>() noexcept
|
|||
|
|
{
|
|||
|
|
return 2;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IEngineInspector
|
|||
|
|
//!
|
|||
|
|
//! \brief An engine inspector which prints out the layer information of an engine or an execution context.
|
|||
|
|
//!
|
|||
|
|
//! The amount of printed information depends on the profiling verbosity setting of the builder config when the engine
|
|||
|
|
//! is built:
|
|||
|
|
//! - ProfilingVerbosity::kLAYER_NAMES_ONLY: only layer names will be printed.
|
|||
|
|
//! - ProfilingVerbosity::kNONE: no layer information will be printed.
|
|||
|
|
//! - ProfilingVerbosity::kDETAILED: layer names and layer parameters will be printed.
|
|||
|
|
//!
|
|||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
|||
|
|
//!
|
|||
|
|
//! \see ProfilingVerbosity, IEngineInspector
|
|||
|
|
//!
|
|||
|
|
class IEngineInspector : public INoCopy
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
virtual ~IEngineInspector() noexcept = default;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set an execution context as the inspection source.
|
|||
|
|
//!
|
|||
|
|
//! Setting the execution context and specifying all the input shapes allows the inspector
|
|||
|
|
//! to calculate concrete dimensions for any dynamic shapes and display their format information.
|
|||
|
|
//! Otherwise, values dependent on input shapes will be displayed as -1 and format information
|
|||
|
|
//! will not be shown.
|
|||
|
|
//!
|
|||
|
|
//! Passing nullptr will remove any association with an execution context.
|
|||
|
|
//!
|
|||
|
|
//! \return Whether the action succeeds.
|
|||
|
|
//!
|
|||
|
|
bool setExecutionContext(IExecutionContext const* context) noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->setExecutionContext(context);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the context currently being inspected.
|
|||
|
|
//!
|
|||
|
|
//! \return The pointer to the context currently being inspected.
|
|||
|
|
//!
|
|||
|
|
//! \see setExecutionContext()
|
|||
|
|
//!
|
|||
|
|
IExecutionContext const* getExecutionContext() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getExecutionContext();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get a string describing the information about a specific layer in the current engine or the execution
|
|||
|
|
//! context.
|
|||
|
|
//!
|
|||
|
|
//! \param layerIndex the index of the layer. It must lie in range [0, engine.getNbLayers()).
|
|||
|
|
//!
|
|||
|
|
//! \param format the format the layer information should be printed in.
|
|||
|
|
//!
|
|||
|
|
//! \return A null-terminated C-style string describing the information about a specific layer in the current
|
|||
|
|
//! engine or the execution context.
|
|||
|
|
//!
|
|||
|
|
//! \warning The content of the returned string may change when another execution context has
|
|||
|
|
//! been set, or when another getLayerInformation() or getEngineInformation() has been called.
|
|||
|
|
//!
|
|||
|
|
//! \warning In a multi-threaded environment, this function must be protected from other threads changing the
|
|||
|
|
//! inspection source. If the inspection source changes, the data that is being pointed to can change.
|
|||
|
|
//! Copy the string to another buffer before releasing the lock in order to guarantee consistency.
|
|||
|
|
//!
|
|||
|
|
//! \see LayerInformationFormat
|
|||
|
|
//!
|
|||
|
|
char const* getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getLayerInformation(layerIndex, format);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get a string describing the information about all the layers in the current engine or the execution
|
|||
|
|
//! context.
|
|||
|
|
//!
|
|||
|
|
//! \param format the format the layer information should be printed in.
|
|||
|
|
//!
|
|||
|
|
//! \return A null-terminated C-style string describing the information about all the layers in the current
|
|||
|
|
//! engine or the execution context.
|
|||
|
|
//!
|
|||
|
|
//! \warning The content of the returned string may change when another execution context has
|
|||
|
|
//! been set, or when another getLayerInformation() or getEngineInformation() has been called.
|
|||
|
|
//!
|
|||
|
|
//! \warning In a multi-threaded environment, this function must be protected from other threads changing the
|
|||
|
|
//! inspection source. If the inspection source changes, the data that is being pointed to can change.
|
|||
|
|
//! Copy the string to another buffer before releasing the lock in order to guarantee consistency.
|
|||
|
|
//!
|
|||
|
|
//! \see LayerInformationFormat
|
|||
|
|
//!
|
|||
|
|
char const* getEngineInformation(LayerInformationFormat format) const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getEngineInformation(format);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Set the ErrorRecorder for this interface
|
|||
|
|
//!
|
|||
|
|
//! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
|
|||
|
|
//! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
|
|||
|
|
//! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
|
|||
|
|
//! a recorder has been registered.
|
|||
|
|
//!
|
|||
|
|
//! If an error recorder is not set, messages will be sent to the global log stream.
|
|||
|
|
//!
|
|||
|
|
//! \param recorder The error recorder to register with this interface.
|
|||
|
|
//!
|
|||
|
|
//! \see getErrorRecorder()
|
|||
|
|
//!
|
|||
|
|
void setErrorRecorder(IErrorRecorder* recorder) noexcept
|
|||
|
|
{
|
|||
|
|
mImpl->setErrorRecorder(recorder);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the ErrorRecorder assigned to this interface.
|
|||
|
|
//!
|
|||
|
|
//! Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
|
|||
|
|
//! an error handler has not been set.
|
|||
|
|
//!
|
|||
|
|
//! \return A pointer to the IErrorRecorder object that has been registered.
|
|||
|
|
//!
|
|||
|
|
//! \see setErrorRecorder()
|
|||
|
|
//!
|
|||
|
|
IErrorRecorder* getErrorRecorder() const noexcept
|
|||
|
|
{
|
|||
|
|
return mImpl->getErrorRecorder();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
protected:
|
|||
|
|
apiv::VEngineInspector* mImpl;
|
|||
|
|
}; // class IEngineInspector
|
|||
|
|
|
|||
|
|
} // namespace nvinfer1
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! Internal C entry point for creating IRuntime.
|
|||
|
|
//! @private
|
|||
|
|
//!
|
|||
|
|
extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int32_t version) noexcept;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! Internal C entry point for creating IRefitter.
|
|||
|
|
//! @private
|
|||
|
|
//!
|
|||
|
|
extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int32_t version) noexcept;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the plugin registry
|
|||
|
|
//!
|
|||
|
|
extern "C" TENSORRTAPI nvinfer1::IPluginRegistry* getPluginRegistry() noexcept;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the logger object.
|
|||
|
|
//! \note the global logger is used only by standalone functions which have no associated builder, runtime
|
|||
|
|
//! or refitter.
|
|||
|
|
//!
|
|||
|
|
extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger() noexcept;
|
|||
|
|
|
|||
|
|
namespace nvinfer1
|
|||
|
|
{
|
|||
|
|
namespace // unnamed namespace avoids linkage surprises when linking objects built with different versions of this
|
|||
|
|
// header.
|
|||
|
|
{
|
|||
|
|
//!
|
|||
|
|
//! \brief Create an instance of an IRuntime class.
|
|||
|
|
//!
|
|||
|
|
//! \param logger The logging class for the runtime.
|
|||
|
|
//!
|
|||
|
|
inline IRuntime* createInferRuntime(ILogger& logger) noexcept
|
|||
|
|
{
|
|||
|
|
return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Create an instance of an IRefitter class.
|
|||
|
|
//!
|
|||
|
|
//! \param engine The engine class for the refitter.
|
|||
|
|
//! \param logger The logging class for the refitter.
|
|||
|
|
//!
|
|||
|
|
inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger) noexcept
|
|||
|
|
{
|
|||
|
|
return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
} // namespace
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Register the plugin creator to the registry
|
|||
|
|
//! The static registry object will be instantiated when the plugin library is
|
|||
|
|
//! loaded. This static object will register all creators available in the
|
|||
|
|
//! library to the registry.
|
|||
|
|
//!
|
|||
|
|
//! \warning Statically registering plugins should be avoided in the automotive
|
|||
|
|
//! safety context as the application developer should first register an error recorder
|
|||
|
|
//! with the plugin registry via IPluginRegistry::setErrorRecorder() before using
|
|||
|
|
//! IPluginRegistry::registerCreator() or other methods.
|
|||
|
|
//!
|
|||
|
|
template <typename T>
|
|||
|
|
class PluginRegistrar
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
PluginRegistrar()
|
|||
|
|
{
|
|||
|
|
getPluginRegistry()->registerCreator(instance, "");
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
private:
|
|||
|
|
//! Plugin instance.
|
|||
|
|
T instance{};
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
} // namespace nvinfer1
|
|||
|
|
|
|||
|
|
#define REGISTER_TENSORRT_PLUGIN(name) \
|
|||
|
|
static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
|
|||
|
|
|
|||
|
|
namespace nvinfer1
|
|||
|
|
{
|
|||
|
|
//!
|
|||
|
|
//! \class ILoggerFinder
|
|||
|
|
//!
|
|||
|
|
//! \brief A virtual base class to find a logger.
|
|||
|
|
//! Allows a plugin to find an instance of a logger if it needs to emit a log message.
|
|||
|
|
//! A pointer to an instance of this class is passed to a plugin shared library on initialization when that plugin
|
|||
|
|
//! is serialized as part of a version-compatible plan. See the plugin chapter in the developer guide for details.
|
|||
|
|
//!
|
|||
|
|
class ILoggerFinder
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
//!
|
|||
|
|
//! \brief Get the logger used by the engine or execution context which called the plugin method.
|
|||
|
|
//!
|
|||
|
|
//! \warning Must be called from the thread in which the plugin method was called.
|
|||
|
|
//!
|
|||
|
|
//! \return A pointer to the logger.
|
|||
|
|
//!
|
|||
|
|
virtual ILogger* findLogger() = 0;
|
|||
|
|
|
|||
|
|
protected:
|
|||
|
|
virtual ~ILoggerFinder() = default;
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//! DO NOT REFER TO namespace v_1_0 IN CODE. ALWAYS USE nvinfer1 INSTEAD.
|
|||
|
|
//! The name v_1_0 may change in future versions of TensorRT.
|
|||
|
|
namespace v_1_0
|
|||
|
|
{
|
|||
|
|
|
|||
|
|
class IGpuAsyncAllocator : public IGpuAllocator
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
IGpuAsyncAllocator() = default;
|
|||
|
|
~IGpuAsyncAllocator() override = default;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief A thread-safe callback implemented by the application to handle stream-ordered asynchronous
|
|||
|
|
//! acquisition of GPU memory.
|
|||
|
|
//!
|
|||
|
|
//! \param size The size of the memory block required (in bytes).
|
|||
|
|
//! \param alignment The required alignment of memory. Alignment will be zero
|
|||
|
|
//! or a power of 2 not exceeding the alignment guaranteed by cudaMalloc.
|
|||
|
|
//! Thus this allocator can be safely implemented with cudaMalloc/cudaFree.
|
|||
|
|
//! An alignment value of zero indicates any alignment is acceptable.
|
|||
|
|
//! \param flags Reserved for future use. In the current release, 0 will be passed.
|
|||
|
|
//!
|
|||
|
|
//! \param stream Specifies the cudastream for the asynchronous allocation. If nullptr or 0 is
|
|||
|
|
//! passed, the default stream will be used.
|
|||
|
|
//!
|
|||
|
|
//! \return If the allocation was successful, the start address of a device memory block of the requested size.
|
|||
|
|
//! If an allocation request of size 0 is made, nullptr must be returned.
|
|||
|
|
//! If an allocation request cannot be satisfied, nullptr must be returned.
|
|||
|
|
//! If a non-null address is returned, it is guaranteed to have the specified alignment.
|
|||
|
|
//!
|
|||
|
|
//! \note The implementation must guarantee thread safety for concurrent allocateAsync/deallocateAsync
|
|||
|
|
//! requests.
|
|||
|
|
//!
|
|||
|
|
//! \note The implementation is not required to be asynchronous. It is permitted to synchronize,
|
|||
|
|
//! albeit doing so will lose the performance advantage of asynchronous allocation.
|
|||
|
|
//!
|
|||
|
|
//! \usage
|
|||
|
|
//! - Allowed context for the API call
|
|||
|
|
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
|
|||
|
|
//!
|
|||
|
|
void* allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags,
|
|||
|
|
cudaStream_t /*stream*/) noexcept override = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief A thread-safe callback implemented by the application to handle stream-ordered asynchronous
|
|||
|
|
//! release of GPU memory.
|
|||
|
|
//!
|
|||
|
|
//! TensorRT may pass a nullptr to this function if it was previously returned by allocate().
|
|||
|
|
//!
|
|||
|
|
//! \param memory A memory address that was previously returned by an allocate() or reallocate() call of the same
|
|||
|
|
//! allocator object.
|
|||
|
|
//!
|
|||
|
|
//! \param stream Specifies the cudastream for the asynchronous deallocation. If nullptr or 0 is
|
|||
|
|
//! passed, the default stream will be used.
|
|||
|
|
//!
|
|||
|
|
//! \return True if the acquired memory is released successfully.
|
|||
|
|
//!
|
|||
|
|
//! \note The implementation must guarantee thread safety for concurrent allocateAsync/deallocateAsync
|
|||
|
|
//! requests.
|
|||
|
|
//!
|
|||
|
|
//! \note The implementation is not required to be asynchronous. It is permitted to synchronize,
|
|||
|
|
//! albeit doing so will lose the performance advantage of asynchronous deallocation.
|
|||
|
|
//! Either way, it is critical that it not actually free the memory until the current
|
|||
|
|
//! stream position is reached.
|
|||
|
|
//!
|
|||
|
|
//! \usage
|
|||
|
|
//! - Allowed context for the API call
|
|||
|
|
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
|
|||
|
|
bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept override = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief A thread-safe callback implemented by the application to handle acquisition of GPU memory.
|
|||
|
|
//!
|
|||
|
|
//! \param size The size of the memory block required (in bytes).
|
|||
|
|
//! \param alignment The required alignment of memory. Alignment will be zero
|
|||
|
|
//! or a power of 2 not exceeding the alignment guaranteed by cudaMalloc.
|
|||
|
|
//! Thus this allocator can be safely implemented with cudaMalloc/cudaFree.
|
|||
|
|
//! An alignment value of zero indicates any alignment is acceptable.
|
|||
|
|
//! \param flags Reserved for future use. In the current release, 0 will be passed.
|
|||
|
|
//!
|
|||
|
|
//! \return If the allocation was successful, the start address of a device memory block of the requested size.
|
|||
|
|
//! If an allocation request of size 0 is made, nullptr must be returned.
|
|||
|
|
//! If an allocation request cannot be satisfied, nullptr must be returned.
|
|||
|
|
//! If a non-null address is returned, it is guaranteed to have the specified alignment.
|
|||
|
|
//!
|
|||
|
|
//! \note The implementation must guarantee thread safety for concurrent allocateAsync/deallocateAsync/reallocate
|
|||
|
|
//! requests.
|
|||
|
|
//!
|
|||
|
|
//! \usage
|
|||
|
|
//! - Allowed context for the API call
|
|||
|
|
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.0. Superseded by allocateAsync
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED void* allocate(
|
|||
|
|
uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
|
|||
|
|
{
|
|||
|
|
return allocateAsync(size, alignment, flags, nullptr);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief A thread-safe callback implemented by the application to handle release of GPU memory.
|
|||
|
|
//!
|
|||
|
|
//! TensorRT may pass a nullptr to this function if it was previously returned by allocate().
|
|||
|
|
//!
|
|||
|
|
//! \param memory A memory address that was previously returned by an allocate() or reallocate() call of the same
|
|||
|
|
//! allocator object.
|
|||
|
|
//!
|
|||
|
|
//! \return True if the acquired memory is released successfully.
|
|||
|
|
//!
|
|||
|
|
//! \note The implementation must guarantee thread safety for concurrent allocate/reallocate/deallocate
|
|||
|
|
//! requests.
|
|||
|
|
//!
|
|||
|
|
//! \usage
|
|||
|
|
//! - Allowed context for the API call
|
|||
|
|
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
|
|||
|
|
//! \deprecated Deprecated in TensorRT 10.0. Superseded by deallocateAsync
|
|||
|
|
//!
|
|||
|
|
TRT_DEPRECATED bool deallocate(void* const memory) noexcept override
|
|||
|
|
{
|
|||
|
|
return deallocateAsync(memory, nullptr);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return version information associated with this interface. Applications must not override this method.
|
|||
|
|
//!
|
|||
|
|
InterfaceInfo getInterfaceInfo() const noexcept override
|
|||
|
|
{
|
|||
|
|
return {"IGpuAllocator", 1, 0};
|
|||
|
|
}
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
class IPluginCreatorV3One : public IPluginCreatorInterface
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
//!
|
|||
|
|
//! \brief Return version information associated with this interface. Applications must not override this method.
|
|||
|
|
//!
|
|||
|
|
InterfaceInfo getInterfaceInfo() const noexcept override
|
|||
|
|
{
|
|||
|
|
return InterfaceInfo{"PLUGIN CREATOR_V3ONE", 1, 0};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return a plugin object. Return nullptr in case of error.
|
|||
|
|
//!
|
|||
|
|
//! \param name A NULL-terminated name string of length 1024 or less, including the NULL terminator.
|
|||
|
|
//! \param fc A pointer to a collection of fields needed for constructing the plugin.
|
|||
|
|
//! \param phase The TensorRT phase in which the plugin is being created
|
|||
|
|
//!
|
|||
|
|
//! When the phase is TensorRTPhase::kRUNTIME, the PluginFieldCollection provided for serialization by the plugin's
|
|||
|
|
//! runtime interface will be passed as fc.
|
|||
|
|
//!
|
|||
|
|
//! \note The returned plugin object must be in an initialized state
|
|||
|
|
//!
|
|||
|
|
//! \note If invoked by the user (e.g. with TensorRTPhase::kBUILD, to add to the network defintion with
|
|||
|
|
//! addPluginV3()), it is the user's responsibility to delete the plugin object. If invoked by TensorRT (e.g. during
|
|||
|
|
//! engine deserialization), TensorRT will delete any objects it creates.
|
|||
|
|
//!
|
|||
|
|
virtual IPluginV3* createPlugin(
|
|||
|
|
AsciiChar const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return a list of fields that need to be passed to createPlugin() when creating a plugin for use in the
|
|||
|
|
//! TensorRT build phase.
|
|||
|
|
//!
|
|||
|
|
//! \see PluginFieldCollection
|
|||
|
|
//!
|
|||
|
|
virtual PluginFieldCollection const* getFieldNames() noexcept = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the plugin name.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including
|
|||
|
|
//! the NULL terminator.
|
|||
|
|
//!
|
|||
|
|
virtual AsciiChar const* getPluginName() const noexcept = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the plugin version.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including
|
|||
|
|
//! the NULL terminator.
|
|||
|
|
//!
|
|||
|
|
virtual AsciiChar const* getPluginVersion() const noexcept = 0;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the plugin namespace.
|
|||
|
|
//!
|
|||
|
|
//! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including
|
|||
|
|
//! the NULL terminator.
|
|||
|
|
//!
|
|||
|
|
virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
|
|||
|
|
|
|||
|
|
IPluginCreatorV3One() = default;
|
|||
|
|
virtual ~IPluginCreatorV3One() = default;
|
|||
|
|
|
|||
|
|
protected:
|
|||
|
|
IPluginCreatorV3One(IPluginCreatorV3One const&) = default;
|
|||
|
|
IPluginCreatorV3One(IPluginCreatorV3One&&) = default;
|
|||
|
|
IPluginCreatorV3One& operator=(IPluginCreatorV3One const&) & = default;
|
|||
|
|
IPluginCreatorV3One& operator=(IPluginCreatorV3One&&) & = default;
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
} // namespace v_1_0
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IGpuAsyncAllocator
|
|||
|
|
//!
|
|||
|
|
//! \brief Application-implemented class for controlling asynchronous (stream ordered) memory allocation on the GPU.
|
|||
|
|
//!
|
|||
|
|
//! \warning The lifetime of an IGpuAsyncAllocator object must exceed that of all objects that use it.
|
|||
|
|
//!
|
|||
|
|
//! The advantage of deriving from IGpuAsyncAllocator instead of IGpuAllocator is that you only have
|
|||
|
|
//! to override two methods: allocateAsync() and deallocateAsync() to implement an allocator with
|
|||
|
|
//! asynchronous capability, whereas deriving from IGpuAllocator requires overriding four methods,
|
|||
|
|
//! including two deprecated methods.
|
|||
|
|
//!
|
|||
|
|
//! \see IGpuAllocator
|
|||
|
|
using IGpuAsyncAllocator = v_1_0::IGpuAsyncAllocator;
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \class IPluginCreatorV3One
|
|||
|
|
//!
|
|||
|
|
//! \brief A plugin creator class capable of producing IPluginV3 objects
|
|||
|
|
//!
|
|||
|
|
//! \see IPluginV3
|
|||
|
|
//! \see IPluginRegistry
|
|||
|
|
//!
|
|||
|
|
using IPluginCreatorV3One = v_1_0::IPluginCreatorV3One;
|
|||
|
|
|
|||
|
|
} // namespace nvinfer1
|
|||
|
|
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the library major version number.
|
|||
|
|
//!
|
|||
|
|
extern "C" TENSORRTAPI int32_t getInferLibMajorVersion() noexcept;
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the library minor version number.
|
|||
|
|
//!
|
|||
|
|
extern "C" TENSORRTAPI int32_t getInferLibMinorVersion() noexcept;
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the library patch version number.
|
|||
|
|
//!
|
|||
|
|
extern "C" TENSORRTAPI int32_t getInferLibPatchVersion() noexcept;
|
|||
|
|
//!
|
|||
|
|
//! \brief Return the library build version number.
|
|||
|
|
//!
|
|||
|
|
extern "C" TENSORRTAPI int32_t getInferLibBuildVersion() noexcept;
|
|||
|
|
|
|||
|
|
#endif // NV_INFER_RUNTIME_H
|