11922 lines
398 KiB
C
11922 lines
398 KiB
C
|
|
/*
|
||
|
|
* SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||
|
|
* SPDX-License-Identifier: Apache-2.0
|
||
|
|
*
|
||
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
|
* you may not use this file except in compliance with the License.
|
||
|
|
* You may obtain a copy of the License at
|
||
|
|
*
|
||
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
*
|
||
|
|
* Unless required by applicable law or agreed to in writing, software
|
||
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
|
* See the License for the specific language governing permissions and
|
||
|
|
* limitations under the License.
|
||
|
|
*/
|
||
|
|
|
||
|
|
#ifndef NV_INFER_H
|
||
|
|
#define NV_INFER_H
|
||
|
|
|
||
|
|
#include "NvInferLegacyDims.h"
|
||
|
|
#include "NvInferRuntime.h" // IWYU pragma: export
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \mainpage
|
||
|
|
//!
|
||
|
|
//! This is the API documentation for the NVIDIA TensorRT library. It provides information on individual
|
||
|
|
//! functions, classes and methods. Use the index on the left to navigate the documentation.
|
||
|
|
//!
|
||
|
|
//! Please see the accompanying user guide and samples for higher-level information and general advice on
|
||
|
|
//! using TensorRT.
|
||
|
|
//!
|
||
|
|
//! TensorRT Versioning follows Semantic Versioning Guidelines specified here: https://semver.org/
|
||
|
|
//!
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \file NvInfer.h
|
||
|
|
//!
|
||
|
|
//! This is the top-level API file for TensorRT.
|
||
|
|
//!
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \namespace nvinfer1
|
||
|
|
//!
|
||
|
|
//! \brief The TensorRT API version 1 namespace.
|
||
|
|
//!
|
||
|
|
namespace nvinfer1
|
||
|
|
{
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum LayerType
|
||
|
|
//!
|
||
|
|
//! \brief The type values of layer classes.
|
||
|
|
//!
|
||
|
|
//! \see ILayer::getType()
|
||
|
|
//!
|
||
|
|
enum class LayerType : int32_t
|
||
|
|
{
|
||
|
|
kCONVOLUTION = 0, //!< Convolution layer.
|
||
|
|
kCAST = 1, //!< Cast layer
|
||
|
|
kACTIVATION = 2, //!< Activation layer.
|
||
|
|
kPOOLING = 3, //!< Pooling layer.
|
||
|
|
kLRN = 4, //!< LRN layer.
|
||
|
|
kSCALE = 5, //!< Scale layer.
|
||
|
|
kSOFTMAX = 6, //!< SoftMax layer.
|
||
|
|
kDECONVOLUTION = 7, //!< Deconvolution layer.
|
||
|
|
kCONCATENATION = 8, //!< Concatenation layer.
|
||
|
|
kELEMENTWISE = 9, //!< Elementwise layer.
|
||
|
|
kPLUGIN = 10, //!< Plugin layer.
|
||
|
|
kUNARY = 11, //!< UnaryOp operation Layer.
|
||
|
|
kPADDING = 12, //!< Padding layer.
|
||
|
|
kSHUFFLE = 13, //!< Shuffle layer.
|
||
|
|
kREDUCE = 14, //!< Reduce layer.
|
||
|
|
kTOPK = 15, //!< TopK layer.
|
||
|
|
kGATHER = 16, //!< Gather layer.
|
||
|
|
kMATRIX_MULTIPLY = 17, //!< Matrix multiply layer.
|
||
|
|
kRAGGED_SOFTMAX = 18, //!< Ragged softmax layer.
|
||
|
|
kCONSTANT = 19, //!< Constant layer.
|
||
|
|
kIDENTITY = 20, //!< Identity layer.
|
||
|
|
kPLUGIN_V2 = 21, //!< PluginV2 layer.
|
||
|
|
kSLICE = 22, //!< Slice layer.
|
||
|
|
kSHAPE = 23, //!< Shape layer.
|
||
|
|
kPARAMETRIC_RELU = 24, //!< Parametric ReLU layer.
|
||
|
|
kRESIZE = 25, //!< Resize Layer.
|
||
|
|
kTRIP_LIMIT = 26, //!< Loop Trip limit layer
|
||
|
|
kRECURRENCE = 27, //!< Loop Recurrence layer
|
||
|
|
kITERATOR = 28, //!< Loop Iterator layer
|
||
|
|
kLOOP_OUTPUT = 29, //!< Loop output layer
|
||
|
|
kSELECT = 30, //!< Select layer.
|
||
|
|
kFILL = 31, //!< Fill layer
|
||
|
|
kQUANTIZE = 32, //!< Quantize layer
|
||
|
|
kDEQUANTIZE = 33, //!< Dequantize layer
|
||
|
|
kCONDITION = 34, //!< Condition layer
|
||
|
|
kCONDITIONAL_INPUT = 35, //!< Conditional Input layer
|
||
|
|
kCONDITIONAL_OUTPUT = 36, //!< Conditional Output layer
|
||
|
|
kSCATTER = 37, //!< Scatter layer
|
||
|
|
kEINSUM = 38, //!< Einsum layer
|
||
|
|
kASSERTION = 39, //!< Assertion layer
|
||
|
|
kONE_HOT = 40, //!< OneHot layer
|
||
|
|
kNON_ZERO = 41, //!< NonZero layer
|
||
|
|
kGRID_SAMPLE = 42, //!< Grid sample layer
|
||
|
|
kNMS = 43, //!< NMS layer
|
||
|
|
kREVERSE_SEQUENCE = 44, //!< Reverse sequence layer
|
||
|
|
kNORMALIZATION = 45, //!< Normalization layer
|
||
|
|
kPLUGIN_V3 = 46, //!< PluginV3 layer.
|
||
|
|
kSQUEEZE = 47, //!< Squeeze Layer.
|
||
|
|
kUNSQUEEZE = 48, //!< Unsqueeze Layer.
|
||
|
|
kCUMULATIVE = 49, //!< Cumulative layer.
|
||
|
|
kDYNAMIC_QUANTIZE = 50, //!< Dynamic Quantize layer.
|
||
|
|
kATTENTION_INPUT = 51, //!< Attention Input.
|
||
|
|
kATTENTION_OUTPUT = 52, //!< Attention Output.
|
||
|
|
kROTARY_EMBEDDING = 53, //!< Rotary Embedding layer.
|
||
|
|
kKVCACHE_UPDATE = 54, //!< KV Cache Update layer.
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in LayerType enum.
|
||
|
|
//!
|
||
|
|
//! \see LayerType
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
constexpr inline int32_t EnumMax<LayerType>() noexcept
|
||
|
|
{
|
||
|
|
return 55;
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief It is capable of representing one or more TensorFormat by binary OR
|
||
|
|
//! operations, e.g., 1U << TensorFormat::kCHW4 | 1U << TensorFormat::kCHW32.
|
||
|
|
//!
|
||
|
|
//! \see ITensor::getAllowedFormats(), ITensor::setAllowedFormats(),
|
||
|
|
//!
|
||
|
|
using TensorFormats = uint32_t;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum ActivationType
|
||
|
|
//!
|
||
|
|
//! \brief Enumerates the types of activation to perform in an activation layer.
|
||
|
|
//!
|
||
|
|
enum class ActivationType : int32_t
|
||
|
|
{
|
||
|
|
kRELU = 0, //!< Rectified linear activation.
|
||
|
|
kSIGMOID = 1, //!< Sigmoid activation.
|
||
|
|
kTANH = 2, //!< TanH activation.
|
||
|
|
kLEAKY_RELU = 3, //!< LeakyRelu activation: x>=0 ? x : alpha * x.
|
||
|
|
kELU = 4, //!< Elu activation: x>=0 ? x : alpha * (exp(x) - 1).
|
||
|
|
kSELU = 5, //!< Selu activation: x>0 ? beta * x : beta * (alpha*exp(x) - alpha)
|
||
|
|
kSOFTSIGN = 6, //!< Softsign activation: x / (1+|x|)
|
||
|
|
kSOFTPLUS = 7, //!< Parametric softplus activation: alpha*log(exp(beta*x)+1)
|
||
|
|
kCLIP = 8, //!< Clip activation: max(alpha, min(beta, x))
|
||
|
|
kHARD_SIGMOID = 9, //!< Hard sigmoid activation: max(0, min(1, alpha*x+beta))
|
||
|
|
kSCALED_TANH = 10, //!< Scaled tanh activation: alpha*tanh(beta*x)
|
||
|
|
kTHRESHOLDED_RELU = 11, //!< Thresholded ReLU activation: x>alpha ? x : 0
|
||
|
|
kGELU_ERF = 12, //!< GELU erf activation: 0.5 * x * (1 + erf(sqrt(0.5) * x))
|
||
|
|
kGELU_TANH = 13 //!< GELU tanh activation: 0.5 * x * (1 + tanh(sqrt(2/pi) * (0.044715F * pow(x, 3) + x)))
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
namespace impl
|
||
|
|
{
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in ActivationType enum.
|
||
|
|
//!
|
||
|
|
//! \see ActivationType
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
struct EnumMaxImpl<ActivationType>
|
||
|
|
{
|
||
|
|
static constexpr int32_t kVALUE = 14;
|
||
|
|
};
|
||
|
|
} // namespace impl
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class ITensor
|
||
|
|
//!
|
||
|
|
//! \brief A tensor in a network definition.
|
||
|
|
//!
|
||
|
|
//! To remove a tensor from a network definition, use INetworkDefinition::removeTensor().
|
||
|
|
//!
|
||
|
|
//! When using the DLA, the cumulative size of all Tensors that are not marked as Network Input or Output tensors,
|
||
|
|
//! must be less than 1GB in size to fit into a single subgraph. If the build option kGPU_FALLBACK is specified, then
|
||
|
|
//! multiple subgraphs can be created, with each subgraph limited to less than 1GB of internal tensors data.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and
|
||
|
|
//! ABI.
|
||
|
|
//!
|
||
|
|
class ITensor : public INoCopy
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the tensor name.
|
||
|
|
//!
|
||
|
|
//! For a network input, the name is assigned by the application. For tensors which are layer outputs,
|
||
|
|
//! a default name is assigned consisting of the layer name followed by the index of the output in brackets.
|
||
|
|
//! Each input and output tensor must have a unique name.
|
||
|
|
//!
|
||
|
|
//! This method copies the name string.
|
||
|
|
//!
|
||
|
|
//! \param name The name.
|
||
|
|
//!
|
||
|
|
//! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
|
||
|
|
//!
|
||
|
|
//! \see getName()
|
||
|
|
//!
|
||
|
|
void setName(char const* name) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setName(name);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the tensor name.
|
||
|
|
//!
|
||
|
|
//! \return The name as a null-terminated C-style string.
|
||
|
|
//!
|
||
|
|
//! \see setName()
|
||
|
|
//!
|
||
|
|
char const* getName() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getName();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the dimensions of a tensor.
|
||
|
|
//!
|
||
|
|
//! For a network input, the dimensions are assigned by the application. For a network output, the dimensions are
|
||
|
|
//! computed based on the layer parameters and the inputs to the layer. If a tensor size or a parameter is modified
|
||
|
|
//! in the network, the dimensions of all dependent tensors will be recomputed.
|
||
|
|
//!
|
||
|
|
//! This call is only legal for network input tensors, since the dimensions of layer output tensors are inferred
|
||
|
|
//! based on layer inputs and parameters.
|
||
|
|
//!
|
||
|
|
//! \param dimensions The dimensions of the tensor.
|
||
|
|
//!
|
||
|
|
//! \see getDimensions()
|
||
|
|
//!
|
||
|
|
void setDimensions(Dims const& dimensions) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setDimensions(dimensions);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the dimensions of a tensor.
|
||
|
|
//!
|
||
|
|
//! \return The dimensions of the tensor.
|
||
|
|
//!
|
||
|
|
//! \warning getDimensions() returns a -1 for dimensions that are derived from a wildcard dimension.
|
||
|
|
//!
|
||
|
|
//! \see setDimensions()
|
||
|
|
//!
|
||
|
|
Dims getDimensions() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getDimensions();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the data type of a tensor.
|
||
|
|
//!
|
||
|
|
//! \param type The data type of the tensor when the type is not inferred.
|
||
|
|
//!
|
||
|
|
//! For strongly typed networks, this method should be used only for network inputs,
|
||
|
|
//! since the types of all other tensors are inferred. Setting the type of a network
|
||
|
|
//! output is tolerated if the type equals the inferred type, otherwise an error occurs
|
||
|
|
//! and the type is not updated.
|
||
|
|
//!
|
||
|
|
//! For weakly typed networks, this method can be used for network outputs too, but
|
||
|
|
//! the type merely has to be implicitly convertible from the inferred type to the
|
||
|
|
//! specified type. In this case it does not matter whether the type is set first
|
||
|
|
//! or the tensor is marked as an output first (via `INetworkDefinition::markOutput`
|
||
|
|
//! or `INetworkDefinition::markOutputForShapes`).
|
||
|
|
//!
|
||
|
|
//! However, marking it first has two advantages:
|
||
|
|
//!
|
||
|
|
//! * It avoids warnings that the tensor is not yet a network I/O tensor.
|
||
|
|
//! * It causes method `getType()` to return the type that was set instead of the inferred type.
|
||
|
|
//!
|
||
|
|
//! \see getType()
|
||
|
|
//!
|
||
|
|
//! \note This function does more than just set the type, so `t.setType(t.getType())` is not necessarily a no-op,
|
||
|
|
//! particularly for input and output tensors!
|
||
|
|
//!
|
||
|
|
//! \note Repeated consecutive applications of `t.setType(t.getType())`
|
||
|
|
//! would be idempotent, provided the state of the `ITensor` isn't changed between calls.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED void setType(DataType type) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setType(type);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the data type of a tensor.
|
||
|
|
//!
|
||
|
|
//! \return The data type of the tensor.
|
||
|
|
//!
|
||
|
|
//! The type is the type set by `setType` if the tensor is a network input or output.
|
||
|
|
//! Otherwise the type is the inferred type.
|
||
|
|
//!
|
||
|
|
//! \see setType()
|
||
|
|
//!
|
||
|
|
DataType getType() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getType();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set dynamic range for the tensor
|
||
|
|
//!
|
||
|
|
//! Currently, only symmetric ranges are supported.
|
||
|
|
//! Therefore, the larger of the absolute values of the provided bounds is used.
|
||
|
|
//!
|
||
|
|
//! \return Whether the dynamic range was set successfully.
|
||
|
|
//!
|
||
|
|
//! Requires that min and max be finite, and min <= max.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED bool setDynamicRange(float min, float max) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setDynamicRange(min, max);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Whether the tensor is a network input.
|
||
|
|
//!
|
||
|
|
bool isNetworkInput() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->isNetworkInput();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Whether the tensor is a network output.
|
||
|
|
//!
|
||
|
|
bool isNetworkOutput() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->isNetworkOutput();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set whether to enable broadcast of tensor across the implicit batch dimension.
|
||
|
|
//!
|
||
|
|
//! \warning This method has no effect other than issuing a warning.
|
||
|
|
//!
|
||
|
|
//! \param broadcastAcrossBatch Whether to broadcast the tensor across the implicit
|
||
|
|
//! batch dimension that was a feature of TensorRT 9.x and prior.
|
||
|
|
//!
|
||
|
|
//! \see getBroadcastAcrossBatch()
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.0. Implicit batch is not supported since TensorRT 10.0.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED void setBroadcastAcrossBatch(bool broadcastAcrossBatch) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setBroadcastAcrossBatch(broadcastAcrossBatch);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Check if tensor is broadcast across the implicit batch dimension.
|
||
|
|
//!
|
||
|
|
//! \return Always false since TensorRT 10.0 does not support an implicit batch dimension.
|
||
|
|
//!
|
||
|
|
//! \see setBroadcastAcrossBatch()
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.0. Implicit batch is not supported since TensorRT 10.0.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED bool getBroadcastAcrossBatch() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getBroadcastAcrossBatch();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the storage location of a tensor.
|
||
|
|
//!
|
||
|
|
//! \return The location of tensor data.
|
||
|
|
//!
|
||
|
|
//! \see setLocation()
|
||
|
|
//!
|
||
|
|
TensorLocation getLocation() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getLocation();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the storage location of a tensor
|
||
|
|
//!
|
||
|
|
//! \param location the location of tensor data
|
||
|
|
//!
|
||
|
|
//! Only network input tensors for storing sequence lengths for RNNv2 are supported.
|
||
|
|
//! Using host storage for layers that do not support it will generate
|
||
|
|
//! errors at build time.
|
||
|
|
//!
|
||
|
|
//! \see getLocation()
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.0. RNNv2 is not supported and the location must
|
||
|
|
//! always be TensorLocation::kDEVICE since TensorRT 10.0.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED void setLocation(TensorLocation location) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setLocation(location);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Query whether dynamic range is set.
|
||
|
|
//!
|
||
|
|
//! \return True if dynamic range is set, false otherwise.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED bool dynamicRangeIsSet() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->dynamicRangeIsSet();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Undo effect of setDynamicRange.
|
||
|
|
//!
|
||
|
|
void resetDynamicRange() noexcept
|
||
|
|
{
|
||
|
|
mImpl->resetDynamicRange();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get minimum of dynamic range.
|
||
|
|
//!
|
||
|
|
//! \return Minimum of dynamic range, or quiet NaN if range was not set.
|
||
|
|
//!
|
||
|
|
float getDynamicRangeMin() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getDynamicRangeMin();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get maximum of dynamic range.
|
||
|
|
//!
|
||
|
|
//! \return Maximum of dynamic range, or quiet NaN if range was not set.
|
||
|
|
//!
|
||
|
|
float getDynamicRangeMax() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getDynamicRangeMax();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set allowed formats for an input or output tensor. By default all formats are allowed.
|
||
|
|
//! Shape tensors (for which isShapeTensor() returns true) may only have row-major linear format.
|
||
|
|
//!
|
||
|
|
//! When running network on DLA and the build option kGPU_FALLBACK is not specified, if DLA format(kCHW4 with Int8,
|
||
|
|
//! kCHW4 with FP16, kCHW16 with FP16, kCHW32 with Int8) is set, the input format is treated as native DLA format
|
||
|
|
//! with line stride requirement. Input/output binding with these format should have correct layout during
|
||
|
|
//! inference.
|
||
|
|
//!
|
||
|
|
//! Tensor formats are determined at build time by TensorRT for tensors not marked as input or output.
|
||
|
|
//!
|
||
|
|
//! \param formats A bitmask of TensorFormat values that are supported for this tensor.
|
||
|
|
//!
|
||
|
|
//! \see ITensor::getAllowedFormats()
|
||
|
|
//!
|
||
|
|
//! \see TensorFormats
|
||
|
|
//!
|
||
|
|
void setAllowedFormats(TensorFormats formats) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setAllowedFormats(formats);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get a bitmask of TensorFormat values that the tensor supports.
|
||
|
|
//! For a shape tensor, only row-major linear format is allowed.
|
||
|
|
//!
|
||
|
|
//! \return The value specified by setAllowedFormats or all possible formats.
|
||
|
|
//!
|
||
|
|
//! \see ITensor::setAllowedFormats()
|
||
|
|
//!
|
||
|
|
TensorFormats getAllowedFormats() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAllowedFormats();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Whether the tensor is a shape tensor.
|
||
|
|
//!
|
||
|
|
//! A shape tensor is a tensor that is related to shape calculations.
|
||
|
|
//! It must have type Int32, Int64, Bool, or Float, and its shape must be determinable at build time.
|
||
|
|
//! Furthermore, it must be needed as a shape tensor, either marked as a network shape
|
||
|
|
//! output via markOutputForShapes(), or as a layer input that is required to be a shape
|
||
|
|
//! tensor, such as the second input to IShuffleLayer. Some layers are "polymorphic" in
|
||
|
|
//! this respect. For example, the inputs to IElementWiseLayer must be shape tensors
|
||
|
|
//! if the output is a shape tensor.
|
||
|
|
//!
|
||
|
|
//! The TensorRT Developer Guide gives the formal rules for what tensors are shape tensors.
|
||
|
|
//!
|
||
|
|
//! The result of isShapeTensor() is reliable only when network construction is complete.
|
||
|
|
//! For example, if a partially built network sums two tensors T1 and T2 to create
|
||
|
|
//! tensor T3, and none are yet needed as shape tensors, isShapeTensor() returns false
|
||
|
|
//! for all three tensors. Setting the second input of IShuffleLayer to be T3 would
|
||
|
|
//! cause all three tensors to be shape tensors, because IShuffleLayer requires that its
|
||
|
|
//! second optional input be a shape tensor, and IElementWiseLayer is "polymorphic".
|
||
|
|
//!
|
||
|
|
//! It is possible for a tensor to be both a shape tensor and an execution tensor.
|
||
|
|
//!
|
||
|
|
//! \return True if tensor is a shape tensor, false otherwise.
|
||
|
|
//!
|
||
|
|
//! \see INetworkDefinition::markOutputForShapes()
|
||
|
|
//!
|
||
|
|
bool isShapeTensor() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->isShapeTensor();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Whether the tensor is an execution tensor.
|
||
|
|
//!
|
||
|
|
//! Tensors are usually execution tensors. The exceptions are tensors used
|
||
|
|
//! solely for shape calculations or whose contents are not needed to compute the outputs.
|
||
|
|
//!
|
||
|
|
//! The result of isExecutionTensor() is reliable only when network construction is complete.
|
||
|
|
//! For example, if a partially built network has no path from a tensor to a network output,
|
||
|
|
//! isExecutionTensor() returns false. Completing the path would cause it to become true.
|
||
|
|
//!
|
||
|
|
//!
|
||
|
|
//! A tensor with isShapeTensor() == false and isExecutionTensor() == false
|
||
|
|
//! can still show up as an input to the engine if its dimensions are required.
|
||
|
|
//! In that case, only its dimensions need to be set at runtime and a nullptr
|
||
|
|
//! can be passed instead of a pointer to its contents.
|
||
|
|
//!
|
||
|
|
bool isExecutionTensor() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->isExecutionTensor();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Name a dimension of an input tensor.
|
||
|
|
//!
|
||
|
|
//! Associate a runtime dimension of an input tensor with a symbolic name.
|
||
|
|
//! Dimensions with the same non-empty name must be equal at runtime.
|
||
|
|
//! Knowing this equality for runtime dimensions may help the TensorRT optimizer.
|
||
|
|
//! Both runtime and build-time dimensions can be named.
|
||
|
|
//!
|
||
|
|
//! For example, setDimensionName(0, "n") associates the symbolic name "n" with the leading dimension.
|
||
|
|
//!
|
||
|
|
//! This method copies the name string.
|
||
|
|
//! If the function is called again, with the same index, it will overwrite the previous name.
|
||
|
|
//! If nullptr is passed as name, it will clear the name of the dimension.
|
||
|
|
//!
|
||
|
|
//! \param index index of the dimension
|
||
|
|
//! \param name of the dimension, as a pointer to a null-terminated character sequence.
|
||
|
|
//!
|
||
|
|
//! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
|
||
|
|
//!
|
||
|
|
//! \see getDimensionName()
|
||
|
|
//!
|
||
|
|
void setDimensionName(int32_t index, char const* name) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setDimensionName(index, name);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the name of an input dimension.
|
||
|
|
//!
|
||
|
|
//! \param index index of the dimension
|
||
|
|
//!
|
||
|
|
//! \return The name of the input dimension, or nullptr if the dimension has no name.
|
||
|
|
//! The name is a pointer to a null-terminated character sequence.
|
||
|
|
//!
|
||
|
|
//! \see setDimensionName()
|
||
|
|
//!
|
||
|
|
char const* getDimensionName(int32_t index) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getDimensionName(index);
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VTensor* mImpl;
|
||
|
|
virtual ~ITensor() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class ILayer
|
||
|
|
//!
|
||
|
|
//! \brief Base class for all layer classes in a network definition.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class ILayer : public INoCopy
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Return the type of a layer.
|
||
|
|
//!
|
||
|
|
//! \see LayerType
|
||
|
|
//!
|
||
|
|
LayerType getType() const noexcept
|
||
|
|
{
|
||
|
|
return mLayer->getType();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the name of a layer.
|
||
|
|
//!
|
||
|
|
//! This method copies the name string.
|
||
|
|
//!
|
||
|
|
//! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
|
||
|
|
//!
|
||
|
|
//! \see getName()
|
||
|
|
//!
|
||
|
|
void setName(char const* name) noexcept
|
||
|
|
{
|
||
|
|
mLayer->setName(name);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return the name of a layer.
|
||
|
|
//!
|
||
|
|
//! \see setName()
|
||
|
|
//!
|
||
|
|
char const* getName() const noexcept
|
||
|
|
{
|
||
|
|
return mLayer->getName();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the number of inputs of a layer.
|
||
|
|
//!
|
||
|
|
int32_t getNbInputs() const noexcept
|
||
|
|
{
|
||
|
|
return mLayer->getNbInputs();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the layer input corresponding to the given index.
|
||
|
|
//!
|
||
|
|
//! \param index The index of the input tensor.
|
||
|
|
//!
|
||
|
|
//! \return The input tensor, or nullptr if the index is out of range or the tensor is optional
|
||
|
|
//! (\ref ISliceLayer).
|
||
|
|
//!
|
||
|
|
ITensor* getInput(int32_t index) const noexcept
|
||
|
|
{
|
||
|
|
return mLayer->getInput(index);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the number of outputs of a layer.
|
||
|
|
//!
|
||
|
|
int32_t getNbOutputs() const noexcept
|
||
|
|
{
|
||
|
|
return mLayer->getNbOutputs();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the layer output corresponding to the given index.
|
||
|
|
//!
|
||
|
|
//! \return The indexed output tensor, or nullptr if the index is out of range or the tensor is optional.
|
||
|
|
//!
|
||
|
|
ITensor* getOutput(int32_t index) const noexcept
|
||
|
|
{
|
||
|
|
return mLayer->getOutput(index);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Replace an input of this layer with a specific tensor.
|
||
|
|
//!
|
||
|
|
//! \param index the index of the input to modify.
|
||
|
|
//! \param tensor the new input tensor
|
||
|
|
//!
|
||
|
|
//! Except for IFillLayer, ILoopOutputLayer, INMSLayer, IResizeLayer, IShuffleLayer, and ISliceLayer,
|
||
|
|
//! this method cannot change the number of inputs to a layer. The index argument must be
|
||
|
|
//! less than the value of getNbInputs().
|
||
|
|
//!
|
||
|
|
//! See comments for overloads of setInput() for layers with special behavior.
|
||
|
|
//!
|
||
|
|
void setInput(int32_t index, ITensor& tensor) noexcept
|
||
|
|
{
|
||
|
|
return mLayer->setInput(index, tensor);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the preferred or required computational precision of this layer in a weakly-typed network.
|
||
|
|
//!
|
||
|
|
//! Setting the precision directs TensorRT to choose an implementation that runs at this computational precision.
|
||
|
|
//! TensorRT could still choose a non-conforming fastest implementation that ignores the requested precision.
|
||
|
|
//! To force choosing an implementation with the requested precision, set exactly one of the following flags,
|
||
|
|
//! which differ in what happens if no such implementation exists:
|
||
|
|
//!
|
||
|
|
//! * BuilderFlag::kOBEY_PRECISION_CONSTRAINTS - build fails with an error message.
|
||
|
|
//!
|
||
|
|
//! * BuilderFlag::kPREFER_PRECISION_CONSTRAINTS - TensorRT falls back to an
|
||
|
|
//! implementation without the requested precision.
|
||
|
|
//!
|
||
|
|
//! If precision is not set, or falling back, TensorRT will select the layer computational precision
|
||
|
|
//! and layer input type based on global performance considerations and the flags specified to the builder.
|
||
|
|
//!
|
||
|
|
//! For a IIdentityLayer: If it casts to/from float/half/int8/uint8, the precision must be one of those types,
|
||
|
|
//! otherwise it must be either the input or output type.
|
||
|
|
//!
|
||
|
|
//! Strongly-typed networks reject calls to method setPrecision. In strongly-typed networks, the computation
|
||
|
|
//! precision is typically controlled by casting the input tensors to the desired type.
|
||
|
|
//!
|
||
|
|
//! \param dataType the computational precision.
|
||
|
|
//!
|
||
|
|
//! \see getPrecision() precisionIsSet() resetPrecision()
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED void setPrecision(DataType dataType) noexcept
|
||
|
|
{
|
||
|
|
mLayer->setPrecision(dataType);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief get the computational precision of this layer
|
||
|
|
//!
|
||
|
|
//! \return the computational precision
|
||
|
|
//!
|
||
|
|
//! \see setPrecision() precisionIsSet() resetPrecision()
|
||
|
|
//!
|
||
|
|
DataType getPrecision() const noexcept
|
||
|
|
{
|
||
|
|
return mLayer->getPrecision();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief whether the computational precision has been set for this layer
|
||
|
|
//!
|
||
|
|
//! \return whether the computational precision has been explicitly set
|
||
|
|
//!
|
||
|
|
//! \see setPrecision() getPrecision() resetPrecision()
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED bool precisionIsSet() const noexcept
|
||
|
|
{
|
||
|
|
return mLayer->precisionIsSet();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief reset the computational precision for this layer
|
||
|
|
//!
|
||
|
|
//! \see setPrecision() getPrecision() precisionIsSet()
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED void resetPrecision() noexcept
|
||
|
|
{
|
||
|
|
mLayer->resetPrecision();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the output type of this layer in a weakly-typed network.
|
||
|
|
//!
|
||
|
|
//! Setting the output type constrains TensorRT to choose implementations which generate output data with the
|
||
|
|
//! given type. If it is not set, TensorRT will select output type based on layer computational precision. TensorRT
|
||
|
|
//! could still choose non-conforming output type based on fastest implementation. To force choosing the requested
|
||
|
|
//! output type, set exactly one of the following flags, which differ in what happens if no such implementation
|
||
|
|
//! exists:
|
||
|
|
//!
|
||
|
|
//! * BuilderFlag::kOBEY_PRECISION_CONSTRAINTS - build fails with an error message.
|
||
|
|
//!
|
||
|
|
//! * BuilderFlag::kPREFER_PRECISION_CONSTRAINTS - TensorRT falls back to an
|
||
|
|
//! implementation with a non-conforming output type.
|
||
|
|
//!
|
||
|
|
//! In case layer precision is not specified, or falling back, the output type depends on the
|
||
|
|
//! chosen implementation, based on performance considerations and the flags specified to the builder.
|
||
|
|
//!
|
||
|
|
//! This method cannot be used to set the data type of the second output tensor of the TopK layer. The data type of
|
||
|
|
//! the second output tensor of the topK layer is always Int32. Also the output type of all layers that are shape
|
||
|
|
//! operations must be DataType::kINT32, and all attempts to set the output type to some other data type will be
|
||
|
|
//! ignored except for issuing an error message.
|
||
|
|
//!
|
||
|
|
//! Note that the layer output type is generally not identical to the data type of the output tensor, as TensorRT
|
||
|
|
//! may insert implicit reformatting operations to convert the former to the latter. Calling layer->setOutputType(i,
|
||
|
|
//! type) has no effect on the data type of the i-th output tensor of layer, and users need to call
|
||
|
|
//! layer->getOutput(i)->setType(type) to change the tensor data type. This is particularly relevant if the tensor
|
||
|
|
//! is marked as a network output, since only setType() [but not setOutputType()] will affect the data
|
||
|
|
//! representation in the corresponding output binding.
|
||
|
|
//!
|
||
|
|
//! Strongly-typed networks reject calls to method setOutputType. Instead, the output type can be set
|
||
|
|
//! only for layers that define method setToType(). Those layers are:
|
||
|
|
//!
|
||
|
|
//! * ICastLayer
|
||
|
|
//! * IDequantizeLayer
|
||
|
|
//! * IDynamicQuantizeLayer
|
||
|
|
//! * IFillLayer
|
||
|
|
//! * IQuantizeLayer
|
||
|
|
//!
|
||
|
|
//! \param index the index of the output to set
|
||
|
|
//! \param dataType the type of the output
|
||
|
|
//!
|
||
|
|
//! \see getOutputType() outputTypeIsSet() resetOutputType()
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED void setOutputType(int32_t index, DataType dataType) noexcept
|
||
|
|
{
|
||
|
|
mLayer->setOutputType(index, dataType);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief get the output type of this layer
|
||
|
|
//!
|
||
|
|
//! \param index the index of the output
|
||
|
|
//!
|
||
|
|
//! \return the output precision. If no precision has been set, DataType::kFLOAT will be returned,
|
||
|
|
//! unless the output type is inherently DataType::kINT32.
|
||
|
|
//!
|
||
|
|
//! \see getOutputType() outputTypeIsSet() resetOutputType()
|
||
|
|
//!
|
||
|
|
DataType getOutputType(int32_t index) const noexcept
|
||
|
|
{
|
||
|
|
return mLayer->getOutputType(index);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief whether the output type has been set for this layer
|
||
|
|
//!
|
||
|
|
//! \param index the index of the output
|
||
|
|
//!
|
||
|
|
//! \return whether the output type has been explicitly set
|
||
|
|
//!
|
||
|
|
//! \see setOutputType() getOutputType() resetOutputType()
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED bool outputTypeIsSet(int32_t index) const noexcept
|
||
|
|
{
|
||
|
|
return mLayer->outputTypeIsSet(index);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief reset the output type for this layer
|
||
|
|
//!
|
||
|
|
//! \param index the index of the output
|
||
|
|
//!
|
||
|
|
//! \see setOutputType() getOutputType() outputTypeIsSet()
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED void resetOutputType(int32_t index) noexcept
|
||
|
|
{
|
||
|
|
return mLayer->resetOutputType(index);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the metadata for this layer.
|
||
|
|
//!
|
||
|
|
//! The metadata is emitted in the JSON returned by IEngineInspector with
|
||
|
|
//! ProfilingVerbosity set to kDETAILED.
|
||
|
|
//!
|
||
|
|
//! \param metadata The per-layer metadata.
|
||
|
|
//!
|
||
|
|
//! \warning The string name must be null-terminated and be at most 4096 bytes including the terminator.
|
||
|
|
//!
|
||
|
|
//! \see getMetadata()
|
||
|
|
//! \see getLayerInformation()
|
||
|
|
//!
|
||
|
|
void setMetadata(char const* metadata) noexcept
|
||
|
|
{
|
||
|
|
mLayer->setMetadata(metadata);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the metadata of the layer.
|
||
|
|
//!
|
||
|
|
//! \return The metadata as a null-terminated C-style string. If setMetadata() has not been called,
|
||
|
|
//! an empty string "" will be returned as a default value.
|
||
|
|
//!
|
||
|
|
//! \see setMetadata()
|
||
|
|
//!
|
||
|
|
char const* getMetadata() const noexcept
|
||
|
|
{
|
||
|
|
return mLayer->getMetadata();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~ILayer() noexcept = default;
|
||
|
|
apiv::VLayer* mLayer;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum PaddingMode
|
||
|
|
//!
|
||
|
|
//! \brief Enumerates the modes of padding to perform in convolution, deconvolution and pooling layer,
|
||
|
|
//! padding mode takes precedence if setPaddingMode() and setPrePadding() are also used.
|
||
|
|
//!
|
||
|
|
//! There are two padding styles, EXPLICIT and SAME with each style having two variants.
|
||
|
|
//! The EXPLICIT style determine if the final sampling location is used or not.
|
||
|
|
//! The SAME style determine if the asymmetry in the padding is on the pre or post padding.
|
||
|
|
//!
|
||
|
|
//! \code
|
||
|
|
//! Shorthand:
|
||
|
|
//! I = dimensions of input image.
|
||
|
|
//! B = prePadding, before the image data.
|
||
|
|
//! A = postPadding, after the image data.
|
||
|
|
//! P = delta between input and output
|
||
|
|
//! S = stride
|
||
|
|
//! F = filter
|
||
|
|
//! O = output
|
||
|
|
//! D = dilation
|
||
|
|
//! M = I + B + A ; The image data plus any padding
|
||
|
|
//! DK = 1 + D * (F - 1)
|
||
|
|
//! \endcode
|
||
|
|
//!
|
||
|
|
//! Formulas for Convolution:
|
||
|
|
//! - EXPLICIT_ROUND_DOWN:
|
||
|
|
//! \code
|
||
|
|
//! O = floor((M - DK) / S) + 1
|
||
|
|
//! \endcode
|
||
|
|
//! - EXPLICIT_ROUND_UP:
|
||
|
|
//! \code
|
||
|
|
//! O = ceil((M - DK) / S) + 1
|
||
|
|
//! \endcode
|
||
|
|
//! - SAME_UPPER:
|
||
|
|
//! \code
|
||
|
|
//! O = ceil(I / S)
|
||
|
|
//! P = floor((I - 1) / S) * S + DK - I;
|
||
|
|
//! B = floor(P / 2)
|
||
|
|
//! A = P - B
|
||
|
|
//! \endcode
|
||
|
|
//! - SAME_LOWER:
|
||
|
|
//! \code
|
||
|
|
//! O = ceil(I / S)
|
||
|
|
//! P = floor((I - 1) / S) * S + DK - I;
|
||
|
|
//! A = floor(P / 2)
|
||
|
|
//! B = P - A
|
||
|
|
//! \endcode
|
||
|
|
//!
|
||
|
|
//! Formulas for Deconvolution:
|
||
|
|
//! - EXPLICIT_ROUND_DOWN:
|
||
|
|
//! - EXPLICIT_ROUND_UP:
|
||
|
|
//! \code
|
||
|
|
//! O = (I - 1) * S + DK - (B + A)
|
||
|
|
//! \endcode
|
||
|
|
//! - SAME_UPPER:
|
||
|
|
//! \code
|
||
|
|
//! O = min(I * S, (I - 1) * S + DK)
|
||
|
|
//! P = max(DK - S, 0)
|
||
|
|
//! B = floor(P / 2)
|
||
|
|
//! A = P - B
|
||
|
|
//! \endcode
|
||
|
|
//! - SAME_LOWER:
|
||
|
|
//! \code
|
||
|
|
//! O = min(I * S, (I - 1) * S + DK)
|
||
|
|
//! P = max(DK - S, 0)
|
||
|
|
//! A = floor(P / 2)
|
||
|
|
//! B = P - A
|
||
|
|
//! \endcode
|
||
|
|
//!
|
||
|
|
//! Formulas for Pooling:
|
||
|
|
//! - EXPLICIT_ROUND_DOWN:
|
||
|
|
//! \code
|
||
|
|
//! O = floor((M - F) / S) + 1
|
||
|
|
//! \endcode
|
||
|
|
//! - EXPLICIT_ROUND_UP:
|
||
|
|
//! \code
|
||
|
|
//! O = ceil((M - F) / S) + 1
|
||
|
|
//! \endcode
|
||
|
|
//! - SAME_UPPER:
|
||
|
|
//! \code
|
||
|
|
//! O = ceil(I / S)
|
||
|
|
//! P = floor((I - 1) / S) * S + F - I;
|
||
|
|
//! B = floor(P / 2)
|
||
|
|
//! A = P - B
|
||
|
|
//! \endcode
|
||
|
|
//! - SAME_LOWER:
|
||
|
|
//! \code
|
||
|
|
//! O = ceil(I / S)
|
||
|
|
//! P = floor((I - 1) / S) * S + F - I;
|
||
|
|
//! A = floor(P / 2)
|
||
|
|
//! B = P - A
|
||
|
|
//! \endcode
|
||
|
|
//!
|
||
|
|
//! Pooling Example 1:
|
||
|
|
//! \code
|
||
|
|
//! Given I = {6, 6}, B = {3, 3}, A = {2, 2}, S = {2, 2}, F = {3, 3}. What is O?
|
||
|
|
//! (B, A can be calculated for SAME_UPPER and SAME_LOWER mode)
|
||
|
|
//! \endcode
|
||
|
|
//!
|
||
|
|
//! - EXPLICIT_ROUND_DOWN:
|
||
|
|
//! \code
|
||
|
|
//! Computation:
|
||
|
|
//! M = {6, 6} + {3, 3} + {2, 2} ==> {11, 11}
|
||
|
|
//! O ==> floor((M - F) / S) + 1
|
||
|
|
//! ==> floor(({11, 11} - {3, 3}) / {2, 2}) + {1, 1}
|
||
|
|
//! ==> floor({8, 8} / {2, 2}) + {1, 1}
|
||
|
|
//! ==> {5, 5}
|
||
|
|
//! \endcode
|
||
|
|
//! - EXPLICIT_ROUND_UP:
|
||
|
|
//! \code
|
||
|
|
//! Computation:
|
||
|
|
//! M = {6, 6} + {3, 3} + {2, 2} ==> {11, 11}
|
||
|
|
//! O ==> ceil((M - F) / S) + 1
|
||
|
|
//! ==> ceil(({11, 11} - {3, 3}) / {2, 2}) + {1, 1}
|
||
|
|
//! ==> ceil({8, 8} / {2, 2}) + {1, 1}
|
||
|
|
//! ==> {5, 5}
|
||
|
|
//! \endcode
|
||
|
|
//! The sample points are {0, 2, 4, 6, 8} in each dimension.
|
||
|
|
//!
|
||
|
|
//! - SAME_UPPER:
|
||
|
|
//! \code
|
||
|
|
//! Computation:
|
||
|
|
//! I = {6, 6}
|
||
|
|
//! S = {2, 2}
|
||
|
|
//! O = ceil(I / S) = {3, 3}
|
||
|
|
//! P = floor((I - 1) / S) * S + F - I
|
||
|
|
//! ==> floor(({6, 6} - {1, 1}) / {2, 2}) * {2, 2} + {3, 3} - {6, 6}
|
||
|
|
//! ==> {4, 4} + {3, 3} - {6, 6}
|
||
|
|
//! ==> {1, 1}
|
||
|
|
//! B = floor({1, 1} / {2, 2})
|
||
|
|
//! ==> {0, 0}
|
||
|
|
//! A = {1, 1} - {0, 0}
|
||
|
|
//! ==> {1, 1}
|
||
|
|
//! \endcode
|
||
|
|
//! - SAME_LOWER:
|
||
|
|
//! \code
|
||
|
|
//! Computation:
|
||
|
|
//! I = {6, 6}
|
||
|
|
//! S = {2, 2}
|
||
|
|
//! O = ceil(I / S) = {3, 3}
|
||
|
|
//! P = floor((I - 1) / S) * S + F - I
|
||
|
|
//! ==> {1, 1}
|
||
|
|
//! A = floor({1, 1} / {2, 2})
|
||
|
|
//! ==> {0, 0}
|
||
|
|
//! B = {1, 1} - {0, 0}
|
||
|
|
//! ==> {1, 1}
|
||
|
|
//! \endcode
|
||
|
|
//! The sample pointers are {0, 2, 4} in each dimension.
|
||
|
|
//! SAMPLE_UPPER has {O0, O1, O2, pad} in output in each dimension.
|
||
|
|
//! SAMPLE_LOWER has {pad, O0, O1, O2} in output in each dimension.
|
||
|
|
//!
|
||
|
|
//! Pooling Example 2:
|
||
|
|
//! \code
|
||
|
|
//! Given I = {6, 6}, B = {3, 3}, A = {3, 3}, S = {2, 2}, F = {3, 3}. What is O?
|
||
|
|
//! \endcode
|
||
|
|
//!
|
||
|
|
enum class PaddingMode : int32_t
|
||
|
|
{
|
||
|
|
kEXPLICIT_ROUND_DOWN = 0, //!< Use explicit padding, rounding output size down.
|
||
|
|
kEXPLICIT_ROUND_UP = 1, //!< Use explicit padding, rounding output size up.
|
||
|
|
kSAME_UPPER = 2, //!< Use SAME padding, with prePadding <= postPadding.
|
||
|
|
kSAME_LOWER = 3, //!< Use SAME padding, with prePadding >= postPadding.
|
||
|
|
};
|
||
|
|
|
||
|
|
namespace impl
|
||
|
|
{
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in PaddingMode enum.
|
||
|
|
//!
|
||
|
|
//! \see PaddingMode
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
struct EnumMaxImpl<PaddingMode>
|
||
|
|
{
|
||
|
|
static constexpr int32_t kVALUE = 4;
|
||
|
|
};
|
||
|
|
} // namespace impl
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IConvolutionLayer
|
||
|
|
//!
|
||
|
|
//! \brief A convolution layer in a network definition.
|
||
|
|
//!
|
||
|
|
//! This layer performs a correlation operation between 3 or 4 dimensional filter with a 4 or 5 dimensional tensor to
|
||
|
|
//! produce another 4 or 5 dimensional tensor.
|
||
|
|
//!
|
||
|
|
//! An optional bias argument is supported, which adds a per-channel constant to each value in the output.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IConvolutionLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the number of output maps for the convolution.
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, the number of output maps must be in the range [1,8192].
|
||
|
|
//!
|
||
|
|
//! \see getNbOutputMaps()
|
||
|
|
//!
|
||
|
|
void setNbOutputMaps(int64_t nbOutputMaps) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setNbOutputMaps(nbOutputMaps);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the number of output maps for the convolution.
|
||
|
|
//!
|
||
|
|
//! \see setNbOutputMaps()
|
||
|
|
//!
|
||
|
|
int64_t getNbOutputMaps() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getNbOutputMaps();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the number of groups for a convolution.
|
||
|
|
//!
|
||
|
|
//! The input tensor channels are divided into \p nbGroups groups, and a convolution is executed for each group,
|
||
|
|
//! using a filter per group. The results of the group convolutions are concatenated to form the output.
|
||
|
|
//!
|
||
|
|
//! \note When using groups in int8 mode, the size of the groups (i.e. the channel count divided by the group
|
||
|
|
//! count) must be a multiple of 4 for both input and output.
|
||
|
|
//!
|
||
|
|
//! Default: 1
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, the max number of groups is 8192.
|
||
|
|
//!
|
||
|
|
//! \see getNbGroups()
|
||
|
|
//!
|
||
|
|
void setNbGroups(int64_t nbGroups) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setNbGroups(nbGroups);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the number of groups of the convolution.
|
||
|
|
//!
|
||
|
|
//! \see setNbGroups()
|
||
|
|
//!
|
||
|
|
int64_t getNbGroups() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getNbGroups();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the kernel weights for the convolution.
|
||
|
|
//!
|
||
|
|
//! The weights are specified as a contiguous array in \p GKCRS order, where \p G is the number of groups, \p K
|
||
|
|
//! the number of output feature maps, \p C the number of input channels, and \p R and \p S are the height and
|
||
|
|
//! width of the filter.
|
||
|
|
//!
|
||
|
|
//! \see getKernelWeights()
|
||
|
|
//!
|
||
|
|
void setKernelWeights(Weights weights) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setKernelWeights(weights);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the kernel weights of the convolution.
|
||
|
|
//!
|
||
|
|
//! \see setKernelWeights()
|
||
|
|
//!
|
||
|
|
Weights getKernelWeights() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getKernelWeights();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the bias weights for the convolution.
|
||
|
|
//!
|
||
|
|
//! Bias is optional. To omit bias, set the count value of the weights structure to zero.
|
||
|
|
//!
|
||
|
|
//! The bias is applied per-channel, so the number of weights (if non-zero) must be equal to the number of output
|
||
|
|
//! feature maps.
|
||
|
|
//!
|
||
|
|
//! \see getBiasWeights()
|
||
|
|
//!
|
||
|
|
void setBiasWeights(Weights weights) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setBiasWeights(weights);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the bias weights for the convolution.
|
||
|
|
//!
|
||
|
|
//! \see setBiasWeights()
|
||
|
|
//!
|
||
|
|
Weights getBiasWeights() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getBiasWeights();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the multi-dimension pre-padding of the convolution.
|
||
|
|
//!
|
||
|
|
//! The start of the input will be zero-padded by this number of elements in each dimension.
|
||
|
|
//!
|
||
|
|
//! Default: (0, 0, ..., 0)
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
|
||
|
|
//! [0,31], and the padding must be less than the kernel size.
|
||
|
|
//!
|
||
|
|
//! \see getPrePadding()
|
||
|
|
//!
|
||
|
|
void setPrePadding(Dims const& padding) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setPrePadding(padding);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the pre-padding.
|
||
|
|
//!
|
||
|
|
//! \see setPrePadding()
|
||
|
|
//!
|
||
|
|
Dims getPrePadding() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPrePadding();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the multi-dimension post-padding of the convolution.
|
||
|
|
//!
|
||
|
|
//! The end of the input will be zero-padded by this number of elements in each dimension.
|
||
|
|
//!
|
||
|
|
//! Default: (0, 0, ..., 0)
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
|
||
|
|
//! [0,31], and the padding must be less than the kernel size.
|
||
|
|
//!
|
||
|
|
//! \see getPostPadding()
|
||
|
|
//!
|
||
|
|
void setPostPadding(Dims const& padding) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setPostPadding(padding);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the post-padding.
|
||
|
|
//!
|
||
|
|
//! \see setPostPadding()
|
||
|
|
//!
|
||
|
|
Dims getPostPadding() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPostPadding();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the padding mode.
|
||
|
|
//!
|
||
|
|
//! Padding mode takes precedence if both setPaddingMode and setPre/PostPadding are used.
|
||
|
|
//!
|
||
|
|
//! Default: kEXPLICIT_ROUND_DOWN
|
||
|
|
//!
|
||
|
|
//! \see getPaddingMode()
|
||
|
|
//!
|
||
|
|
void setPaddingMode(PaddingMode paddingMode) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setPaddingMode(paddingMode);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the padding mode.
|
||
|
|
//!
|
||
|
|
//! Default: kEXPLICIT_ROUND_DOWN
|
||
|
|
//!
|
||
|
|
//! \see setPaddingMode()
|
||
|
|
//!
|
||
|
|
PaddingMode getPaddingMode() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPaddingMode();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the multi-dimension kernel size of the convolution.
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, only support 2D kernel size, both height and width of kernel size must be in the
|
||
|
|
//! range [1,32].
|
||
|
|
//!
|
||
|
|
//! \see getKernelSizeNd()
|
||
|
|
//!
|
||
|
|
void setKernelSizeNd(Dims const& kernelSize) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setKernelSizeNd(kernelSize);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the multi-dimension kernel size of the convolution.
|
||
|
|
//!
|
||
|
|
//! \see setKernelSizeNd()
|
||
|
|
//!
|
||
|
|
Dims getKernelSizeNd() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getKernelSizeNd();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the multi-dimension stride of the convolution.
|
||
|
|
//!
|
||
|
|
//! Default: (1, 1, ..., 1)
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, only support 2D stride, both height and width of stride must be in the range
|
||
|
|
//! [1,8].
|
||
|
|
//!
|
||
|
|
//! \see getStrideNd()
|
||
|
|
//!
|
||
|
|
void setStrideNd(Dims const& stride) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setStrideNd(stride);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the multi-dimension stride of the convolution.
|
||
|
|
//!
|
||
|
|
//! \see setStrideNd()
|
||
|
|
//!
|
||
|
|
Dims getStrideNd() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getStrideNd();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the multi-dimension padding of the convolution.
|
||
|
|
//!
|
||
|
|
//! The input will be zero-padded by this number of elements in each dimension.
|
||
|
|
//! Padding is symmetric.
|
||
|
|
//!
|
||
|
|
//! Default: (0, 0, ..., 0)
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
|
||
|
|
//! [0,31], and the padding must be less than the kernel size.
|
||
|
|
//!
|
||
|
|
//! \see getPaddingNd() setPadding() getPadding()
|
||
|
|
//!
|
||
|
|
void setPaddingNd(Dims const& padding) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setPaddingNd(padding);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the multi-dimension padding of the convolution.
|
||
|
|
//!
|
||
|
|
//! If the padding is asymmetric, the pre-padding is returned.
|
||
|
|
//!
|
||
|
|
//! \see setPaddingNd()
|
||
|
|
//!
|
||
|
|
Dims getPaddingNd() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPaddingNd();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the multi-dimension dilation of the convolution.
|
||
|
|
//!
|
||
|
|
//! Default: (1, 1, ..., 1)
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, only support 2D padding, both height and width must be in the range [1,32].
|
||
|
|
//!
|
||
|
|
//! \see getDilationNd()
|
||
|
|
//!
|
||
|
|
void setDilationNd(Dims const& dilation) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setDilationNd(dilation);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the multi-dimension dilation of the convolution.
|
||
|
|
//!
|
||
|
|
//! \see setDilationNd()
|
||
|
|
//!
|
||
|
|
Dims getDilationNd() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getDilationNd();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Append or replace an input of this layer with a specific tensor
|
||
|
|
//!
|
||
|
|
//! \param index the index of the input to modify.
|
||
|
|
//! \param tensor the new input tensor
|
||
|
|
//!
|
||
|
|
//! The indices are as follows:
|
||
|
|
//!
|
||
|
|
//! Input 0 is the input activation tensor.
|
||
|
|
//! Input 1 is the kernel tensor. If used, the kernel weights parameter must be set to empty weights.
|
||
|
|
//! Input 2 is the bias tensor. If used, the bias parameter must be set to empty weights.
|
||
|
|
//!
|
||
|
|
//! \see getKernelWeights(), setKernelWeights(), getBiasWeights(), setBiasWeights()
|
||
|
|
//!
|
||
|
|
using ILayer::setInput;
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IConvolutionLayer() noexcept = default;
|
||
|
|
apiv::VConvolutionLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IActivationLayer
|
||
|
|
//!
|
||
|
|
//! \brief An Activation layer in a network definition.
|
||
|
|
//!
|
||
|
|
//! This layer applies a per-element activation function to its input.
|
||
|
|
//!
|
||
|
|
//! The output has the same shape as the input.
|
||
|
|
//!
|
||
|
|
//! The input is a shape tensor if the output is a shape tensor.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IActivationLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the type of activation to be performed.
|
||
|
|
//!
|
||
|
|
//! On the DLA, the valid activation types are kRELU, kSIGMOID, kTANH, and kCLIP.
|
||
|
|
//!
|
||
|
|
//! \see getActivationType(), ActivationType
|
||
|
|
//!
|
||
|
|
void setActivationType(ActivationType type) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setActivationType(type);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the type of activation to be performed.
|
||
|
|
//!
|
||
|
|
//! \see setActivationType(), ActivationType
|
||
|
|
//!
|
||
|
|
ActivationType getActivationType() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getActivationType();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the alpha parameter (must be finite).
|
||
|
|
//!
|
||
|
|
//! This parameter is used by the following activations:
|
||
|
|
//! LeakyRelu, Elu, Selu, Softplus, Clip, HardSigmoid, ScaledTanh,
|
||
|
|
//! ThresholdedRelu.
|
||
|
|
//!
|
||
|
|
//! It is ignored by the other activations.
|
||
|
|
//!
|
||
|
|
//! \see getAlpha(), setBeta()
|
||
|
|
void setAlpha(float alpha) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setAlpha(alpha);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the beta parameter (must be finite).
|
||
|
|
//!
|
||
|
|
//! This parameter is used by the following activations:
|
||
|
|
//! Selu, Softplus, Clip, HardSigmoid, ScaledTanh.
|
||
|
|
//!
|
||
|
|
//! It is ignored by the other activations.
|
||
|
|
//!
|
||
|
|
//! \see getBeta(), setAlpha()
|
||
|
|
void setBeta(float beta) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setBeta(beta);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the alpha parameter.
|
||
|
|
//!
|
||
|
|
//! \see getBeta(), setAlpha()
|
||
|
|
float getAlpha() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAlpha();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the beta parameter.
|
||
|
|
//!
|
||
|
|
//! \see getAlpha(), setBeta()
|
||
|
|
float getBeta() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getBeta();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IActivationLayer() noexcept = default;
|
||
|
|
apiv::VActivationLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum PoolingType
|
||
|
|
//!
|
||
|
|
//! \brief The type of pooling to perform in a pooling layer.
|
||
|
|
//!
|
||
|
|
enum class PoolingType : int32_t
|
||
|
|
{
|
||
|
|
kMAX = 0, //!< Maximum over elements
|
||
|
|
kAVERAGE = 1, //!< Average over elements. If the tensor is padded, the count includes the padding
|
||
|
|
kMAX_AVERAGE_BLEND = 2 //!< Blending between max and average pooling: (1-blendFactor)*maxPool + blendFactor*avgPool
|
||
|
|
};
|
||
|
|
|
||
|
|
namespace impl
|
||
|
|
{
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in PoolingType enum.
|
||
|
|
//!
|
||
|
|
//! \see PoolingType
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
struct EnumMaxImpl<PoolingType>
|
||
|
|
{
|
||
|
|
static constexpr int32_t kVALUE = 3;
|
||
|
|
};
|
||
|
|
} // namespace impl
|
||
|
|
|
||
|
|
//! \class IPoolingLayer
|
||
|
|
//!
|
||
|
|
//! \brief A Pooling layer in a network definition.
|
||
|
|
//!
|
||
|
|
//! The layer applies a reduction operation within a window over the input.
|
||
|
|
//!
|
||
|
|
//! \warning When running pooling layer with DeviceType::kDLA in Int8 mode, the dynamic ranges
|
||
|
|
//! for input and output tensors must be equal.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IPoolingLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the type of activation to be performed.
|
||
|
|
//!
|
||
|
|
//! DLA only supports kMAX and kAVERAGE pooling types.
|
||
|
|
//!
|
||
|
|
//! \see getPoolingType(), PoolingType
|
||
|
|
//!
|
||
|
|
void setPoolingType(PoolingType type) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setPoolingType(type);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the type of activation to be performed.
|
||
|
|
//!
|
||
|
|
//! \see setPoolingType(), PoolingType
|
||
|
|
//!
|
||
|
|
PoolingType getPoolingType() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPoolingType();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the blending factor for the max_average_blend mode:
|
||
|
|
//! max_average_blendPool = (1-blendFactor)*maxPool + blendFactor*avgPool
|
||
|
|
//! blendFactor is a user value in [0,1] with the default value of 0.0
|
||
|
|
//! This value only applies for the kMAX_AVERAGE_BLEND mode.
|
||
|
|
//!
|
||
|
|
//! Since DLA does not support kMAX_AVERAGE_BLEND, blendFactor is ignored on the DLA.
|
||
|
|
//!
|
||
|
|
//! \see getBlendFactor()
|
||
|
|
//!
|
||
|
|
void setBlendFactor(float blendFactor) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setBlendFactor(blendFactor);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the blending factor for the max_average_blend mode:
|
||
|
|
//! max_average_blendPool = (1-blendFactor)*maxPool + blendFactor*avgPool
|
||
|
|
//! blendFactor is a user value in [0,1] with the default value of 0.0
|
||
|
|
//! In modes other than kMAX_AVERAGE_BLEND, blendFactor is ignored.
|
||
|
|
//!
|
||
|
|
//! \see setBlendFactor()
|
||
|
|
//!
|
||
|
|
float getBlendFactor() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getBlendFactor();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set whether average pooling uses as a denominator the overlap area between the window
|
||
|
|
//! and the unpadded input.
|
||
|
|
//! If this is not set, the denominator is the overlap between the pooling window and the padded input.
|
||
|
|
//!
|
||
|
|
//! Default: true
|
||
|
|
//!
|
||
|
|
//! \see getAverageCountExcludesPadding()
|
||
|
|
//!
|
||
|
|
void setAverageCountExcludesPadding(bool exclusive) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setAverageCountExcludesPadding(exclusive);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get whether average pooling uses as a denominator the overlap area between the window
|
||
|
|
//! and the unpadded input.
|
||
|
|
//!
|
||
|
|
//! \see setAverageCountExcludesPadding()
|
||
|
|
//!
|
||
|
|
bool getAverageCountExcludesPadding() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAverageCountExcludesPadding();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the multi-dimension pre-padding for pooling.
|
||
|
|
//!
|
||
|
|
//! The start of the input will be padded by this number of elements in each dimension.
|
||
|
|
//! Padding value depends on pooling type, -inf is used for max pooling and zero padding for average pooling.
|
||
|
|
//!
|
||
|
|
//! Default: (0, 0, ..., 0)
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
|
||
|
|
//! [0,7].
|
||
|
|
//!
|
||
|
|
//! \see getPrePadding()
|
||
|
|
//!
|
||
|
|
void setPrePadding(Dims const& padding) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setPrePadding(padding);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the pre-padding.
|
||
|
|
//!
|
||
|
|
//! \see setPrePadding()
|
||
|
|
//!
|
||
|
|
Dims getPrePadding() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPrePadding();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the multi-dimension post-padding for pooling.
|
||
|
|
//!
|
||
|
|
//! The end of the input will be padded by this number of elements in each dimension.
|
||
|
|
//! Padding value depends on pooling type, -inf is used for max pooling and zero padding for average pooling.
|
||
|
|
//!
|
||
|
|
//! Default: (0, 0, ..., 0)
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
|
||
|
|
//! [0,7].
|
||
|
|
//!
|
||
|
|
//! \see getPostPadding()
|
||
|
|
//!
|
||
|
|
void setPostPadding(Dims const& padding) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setPostPadding(padding);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the padding.
|
||
|
|
//!
|
||
|
|
//! \see setPostPadding()
|
||
|
|
//!
|
||
|
|
Dims getPostPadding() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPostPadding();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the padding mode.
|
||
|
|
//!
|
||
|
|
//! Padding mode takes precedence if both setPaddingMode and setPre/PostPadding are used.
|
||
|
|
//!
|
||
|
|
//! Default: kEXPLICIT_ROUND_DOWN
|
||
|
|
//!
|
||
|
|
//! \see getPaddingMode()
|
||
|
|
void setPaddingMode(PaddingMode paddingMode) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setPaddingMode(paddingMode);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the padding mode.
|
||
|
|
//!
|
||
|
|
//! Default: kEXPLICIT_ROUND_DOWN
|
||
|
|
//!
|
||
|
|
//! \see setPaddingMode()
|
||
|
|
PaddingMode getPaddingMode() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPaddingMode();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the multi-dimension window size for pooling.
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, only support 2D window size, both height and width of window size must be in the
|
||
|
|
//! range [1,8].
|
||
|
|
//!
|
||
|
|
//! \see getWindowSizeNd() setWindowSize() getWindowSize()
|
||
|
|
//!
|
||
|
|
void setWindowSizeNd(Dims const& windowSize) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setWindowSizeNd(windowSize);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the multi-dimension window size for pooling.
|
||
|
|
//!
|
||
|
|
//! \see setWindowSizeNd()
|
||
|
|
//!
|
||
|
|
Dims getWindowSizeNd() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getWindowSizeNd();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the multi-dimension stride for pooling.
|
||
|
|
//!
|
||
|
|
//! Default: (1, 1, ..., 1)
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, only support 2D stride, both height and width of stride must be in the range
|
||
|
|
//! [1,16].
|
||
|
|
//!
|
||
|
|
//! \see getStrideNd()
|
||
|
|
//!
|
||
|
|
void setStrideNd(Dims const& stride) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setStrideNd(stride);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the multi-dimension stride for pooling.
|
||
|
|
//!
|
||
|
|
//! \see setStrideNd()
|
||
|
|
//!
|
||
|
|
Dims getStrideNd() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getStrideNd();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the multi-dimension padding for pooling.
|
||
|
|
//!
|
||
|
|
//! The input will be padded by this number of elements in each dimension.
|
||
|
|
//! Padding is symmetric.
|
||
|
|
//! Padding value depends on pooling type, -inf is used for max pooling and zero padding for average pooling.
|
||
|
|
//!
|
||
|
|
//! Default: (0, 0, ..., 0)
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
|
||
|
|
//! [0,7].
|
||
|
|
//!
|
||
|
|
//! \see getPaddingNd() setPadding() getPadding()
|
||
|
|
//!
|
||
|
|
void setPaddingNd(Dims const& padding) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setPaddingNd(padding);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the multi-dimension padding for pooling.
|
||
|
|
//!
|
||
|
|
//! If the padding is asymmetric, the pre-padding is returned.
|
||
|
|
//!
|
||
|
|
//! \see setPaddingNd()
|
||
|
|
//!
|
||
|
|
Dims getPaddingNd() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPaddingNd();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IPoolingLayer() noexcept = default;
|
||
|
|
apiv::VPoolingLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class ILRNLayer
|
||
|
|
//!
|
||
|
|
//! \brief A LRN layer in a network definition.
|
||
|
|
//!
|
||
|
|
//! The output size is the same as the input size.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class ILRNLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the LRN window size.
|
||
|
|
//!
|
||
|
|
//! The window size must be odd and in the range of [1, 15].
|
||
|
|
//!
|
||
|
|
//! If executing this layer on the DLA, only values in the set, [3, 5, 7, 9], are valid.
|
||
|
|
//!
|
||
|
|
//! \see setWindowStride()
|
||
|
|
//!
|
||
|
|
void setWindowSize(int64_t windowSize) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setWindowSize(windowSize);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the LRN window size.
|
||
|
|
//!
|
||
|
|
//! \see getWindowStride()
|
||
|
|
//!
|
||
|
|
int64_t getWindowSize() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getWindowSize();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the LRN alpha value.
|
||
|
|
//!
|
||
|
|
//! The valid range is [-1e20, 1e20].
|
||
|
|
//!
|
||
|
|
//! \see getAlpha()
|
||
|
|
//!
|
||
|
|
void setAlpha(float alpha) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setAlpha(alpha);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the LRN alpha value.
|
||
|
|
//!
|
||
|
|
//! \see setAlpha()
|
||
|
|
//!
|
||
|
|
float getAlpha() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAlpha();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the LRN beta value.
|
||
|
|
//!
|
||
|
|
//! The valid range is [0.01, 1e5f].
|
||
|
|
//!
|
||
|
|
//! \see getBeta()
|
||
|
|
//!
|
||
|
|
void setBeta(float beta) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setBeta(beta);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the LRN beta value.
|
||
|
|
//!
|
||
|
|
//! \see setBeta()
|
||
|
|
//!
|
||
|
|
float getBeta() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getBeta();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the LRN K value.
|
||
|
|
//!
|
||
|
|
//! The valid range is [1e-5, 1e10].
|
||
|
|
//!
|
||
|
|
//! \see getK()
|
||
|
|
//!
|
||
|
|
void setK(float k) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setK(k);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the LRN K value.
|
||
|
|
//!
|
||
|
|
//! \see setK()
|
||
|
|
//!
|
||
|
|
float getK() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getK();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~ILRNLayer() noexcept = default;
|
||
|
|
apiv::VLRNLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Controls how shift, scale and power are applied in a Scale layer.
|
||
|
|
//!
|
||
|
|
//! \see IScaleLayer
|
||
|
|
//!
|
||
|
|
enum class ScaleMode : int32_t
|
||
|
|
{
|
||
|
|
kUNIFORM = 0, //!< Identical coefficients across all elements of the tensor.
|
||
|
|
kCHANNEL = 1, //!< Per-channel coefficients.
|
||
|
|
kELEMENTWISE = 2 //!< Elementwise coefficients.
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in ScaleMode enum.
|
||
|
|
//!
|
||
|
|
//! \see ScaleMode
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
constexpr inline int32_t EnumMax<ScaleMode>() noexcept
|
||
|
|
{
|
||
|
|
return 3;
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IScaleLayer
|
||
|
|
//!
|
||
|
|
//! \brief A Scale layer in a network definition.
|
||
|
|
//!
|
||
|
|
//! This layer applies a per-element computation to its input:
|
||
|
|
//!
|
||
|
|
//! \p output = (\p input* \p scale + \p shift)^ \p power
|
||
|
|
//!
|
||
|
|
//! The coefficients can be applied on a per-tensor, per-channel, or per-element basis.
|
||
|
|
//!
|
||
|
|
//! \note If the number of weights is 0, then a default value is used for shift, power, and scale.
|
||
|
|
//! The default shift is 0, the default power is 1, and the default scale is 1.
|
||
|
|
//!
|
||
|
|
//! The output size is the same as the input size.
|
||
|
|
//!
|
||
|
|
//! \note The input tensor is required to have at least 4 dimensions.
|
||
|
|
//!
|
||
|
|
//! A scale layer may be used as an INT8 quantization node in a graph, if the output is constrained to INT8 and
|
||
|
|
//! the input to FP32. Quantization rounds ties to even, and clamps to [-128, 127].
|
||
|
|
//!
|
||
|
|
//! \see ScaleMode
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IScaleLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the scale mode.
|
||
|
|
//!
|
||
|
|
//! \see getMode()
|
||
|
|
//!
|
||
|
|
void setMode(ScaleMode mode) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setMode(mode);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the scale mode.
|
||
|
|
//!
|
||
|
|
//! \see setMode()
|
||
|
|
//!
|
||
|
|
ScaleMode getMode() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getMode();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the shift value.
|
||
|
|
//!
|
||
|
|
//! \see getShift()
|
||
|
|
//!
|
||
|
|
void setShift(Weights shift) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setShift(shift);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the shift value.
|
||
|
|
//!
|
||
|
|
//! \see setShift()
|
||
|
|
//!
|
||
|
|
Weights getShift() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getShift();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the scale value.
|
||
|
|
//!
|
||
|
|
//! \see getScale()
|
||
|
|
//!
|
||
|
|
void setScale(Weights scale) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setScale(scale);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the scale value.
|
||
|
|
//!
|
||
|
|
//! \see setScale()
|
||
|
|
//!
|
||
|
|
Weights getScale() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getScale();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the power value.
|
||
|
|
//!
|
||
|
|
//! \see getPower()
|
||
|
|
//!
|
||
|
|
void setPower(Weights power) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setPower(power);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the power value.
|
||
|
|
//!
|
||
|
|
//! \see setPower()
|
||
|
|
//!
|
||
|
|
Weights getPower() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPower();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the channel axis.
|
||
|
|
//!
|
||
|
|
//! \return channelAxis parameter passed to addScaleNd() or set by setChannelAxis()
|
||
|
|
//!
|
||
|
|
//! The value is the index of the channel axis in the input tensor's dimensions.
|
||
|
|
//! Scaling happens along the channel axis when ScaleMode::kCHANNEL is enabled.
|
||
|
|
//!
|
||
|
|
//! \see addScaleNd()
|
||
|
|
//!
|
||
|
|
int32_t getChannelAxis() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getChannelAxis();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the channel axis.
|
||
|
|
//!
|
||
|
|
//! The value is the index of the channel axis in the input tensor's dimensions.
|
||
|
|
//!
|
||
|
|
//! For ScaleMode::kCHANNEL, there can be distinct scale, shift, and power weights for each channel coordinate.
|
||
|
|
//! For ScaleMode::kELEMENTWISE, there can be distinct scale, shift, and power weights for each combination of
|
||
|
|
//! coordinates from the channel axis and axes after it.
|
||
|
|
//!
|
||
|
|
//! For example, suppose the input tensor has dimensions [10,20,30,40] and the channel axis is 1.
|
||
|
|
//! Let [n,c,h,w] denote an input coordinate.
|
||
|
|
//! For ScaleMode::kCHANNEL, the scale, shift, and power weights are indexed by c.
|
||
|
|
//! For ScaleMode::kELEMENTWISE, the scale, shift, and power weights are indexed by [c,h,w].
|
||
|
|
//!
|
||
|
|
//! \see addScaleNd()
|
||
|
|
//!
|
||
|
|
void setChannelAxis(int32_t channelAxis) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setChannelAxis(channelAxis);
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IScaleLayer() noexcept = default;
|
||
|
|
apiv::VScaleLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class ISoftMaxLayer
|
||
|
|
//!
|
||
|
|
//! \brief A Softmax layer in a network definition.
|
||
|
|
//!
|
||
|
|
//! This layer applies a per-channel softmax to its input.
|
||
|
|
//!
|
||
|
|
//! The output size is the same as the input size.
|
||
|
|
//!
|
||
|
|
//! The following constraints must be satisfied to execute this layer on DLA:
|
||
|
|
//! * Axis must be one of the channel or spatial dimensions.
|
||
|
|
//! * There are two classes of supported input sizes:
|
||
|
|
//! 1. Non-axis, non-batch dimensions are all 1 and the axis dimension is at most 8192.
|
||
|
|
//! This is the recommended case for using softmax since it is the most accurate.
|
||
|
|
//! 2. At least one non-axis, non-batch dimension greater than 1 and the axis dimension is at most 1024.
|
||
|
|
//! Note that in this case, there may be some approximation error as the axis dimension size approaches
|
||
|
|
//! the upper bound. See the TensorRT Developer Guide for more details on the approximation error.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class ISoftMaxLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the axis along which softmax is computed. Currently, only one axis can be set.
|
||
|
|
//!
|
||
|
|
//! The axis is specified by setting the bit corresponding to the axis to 1.
|
||
|
|
//! For example, consider an NCHW tensor as input.
|
||
|
|
//!
|
||
|
|
//! Bit 0 corresponds to the N dimension boolean.
|
||
|
|
//! Bit 1 corresponds to the C dimension boolean.
|
||
|
|
//! Bit 2 corresponds to the H dimension boolean.
|
||
|
|
//! Bit 3 corresponds to the W dimension boolean.
|
||
|
|
//! By default, softmax is performed on the axis which is the number of axes minus three. It is 0 if
|
||
|
|
//! there are fewer than 3 axes. For example, if the input is NCHW, the default axis is C. If the input
|
||
|
|
//! is NHW, then the default axis is N.
|
||
|
|
//!
|
||
|
|
//! For example, to perform softmax on axis R of a NPQRCHW input, set bit 3.
|
||
|
|
//!
|
||
|
|
//! \param axes The axis along which softmax is computed.
|
||
|
|
//! Here axes is a bitmap. For example, when doing softmax along axis 0, bit 0 is set to 1, axes = 1 << axis
|
||
|
|
//! = 1.
|
||
|
|
//!
|
||
|
|
void setAxes(uint32_t axes) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setAxes(axes);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the axis along which softmax occurs.
|
||
|
|
//!
|
||
|
|
//! \see setAxes()
|
||
|
|
//!
|
||
|
|
uint32_t getAxes() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAxes();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~ISoftMaxLayer() noexcept = default;
|
||
|
|
apiv::VSoftMaxLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IConcatenationLayer
|
||
|
|
//!
|
||
|
|
//! \brief A concatenation layer in a network definition.
|
||
|
|
//!
|
||
|
|
//! The output dimension along the concatenation axis is the sum of the corresponding input dimensions.
|
||
|
|
//! Every other output dimension is the same as the corresponding dimension of the inputs.
|
||
|
|
//!
|
||
|
|
//! \warning All tensors must have the same dimensions except along the concatenation axis.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IConcatenationLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the axis along which concatenation occurs.
|
||
|
|
//!
|
||
|
|
//! The default axis is the number of tensor dimensions minus three, or zero if the tensor has fewer than three
|
||
|
|
//! dimensions. For example, for a tensor with dimensions NCHW, it is C.
|
||
|
|
//!
|
||
|
|
//! When running this layer on the DLA, the concatenation axis must be the third to last axis, e.g. C if tensor
|
||
|
|
//! dimensions are NCHW.
|
||
|
|
//!
|
||
|
|
//! \param axis The axis along which concatenation occurs.
|
||
|
|
//!
|
||
|
|
void setAxis(int32_t axis) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setAxis(axis);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the axis along which concatenation occurs.
|
||
|
|
//!
|
||
|
|
//! \see setAxis()
|
||
|
|
//!
|
||
|
|
int32_t getAxis() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAxis();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IConcatenationLayer() noexcept = default;
|
||
|
|
apiv::VConcatenationLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IDeconvolutionLayer
|
||
|
|
//!
|
||
|
|
//! \brief A deconvolution layer in a network definition.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IDeconvolutionLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the number of output feature maps for the deconvolution.
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, the number of output maps must be in the range [1,8192].
|
||
|
|
//!
|
||
|
|
//! \see getNbOutputMaps()
|
||
|
|
//!
|
||
|
|
void setNbOutputMaps(int64_t nbOutputMaps) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setNbOutputMaps(nbOutputMaps);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the number of output feature maps for the deconvolution.
|
||
|
|
//!
|
||
|
|
//! \see setNbOutputMaps()
|
||
|
|
//!
|
||
|
|
int64_t getNbOutputMaps() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getNbOutputMaps();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the number of groups for a deconvolution.
|
||
|
|
//!
|
||
|
|
//! The input tensor channels are divided into \p nbGroups groups, and a deconvolution is executed for each group,
|
||
|
|
//! using a filter per group. The results of the group convolutions are concatenated to form the output.
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, nbGroups must be one
|
||
|
|
//!
|
||
|
|
//! \note When using groups in int8 mode, the size of the groups (i.e. the channel count divided by the group count)
|
||
|
|
//! must be a multiple of 4 for both input and output.
|
||
|
|
//!
|
||
|
|
//! Default: 1
|
||
|
|
//!
|
||
|
|
//! \see getNbGroups()
|
||
|
|
//!
|
||
|
|
void setNbGroups(int64_t nbGroups) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setNbGroups(nbGroups);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the number of groups for a deconvolution.
|
||
|
|
//!
|
||
|
|
//! \see setNbGroups()
|
||
|
|
//!
|
||
|
|
int64_t getNbGroups() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getNbGroups();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the kernel weights for the deconvolution.
|
||
|
|
//!
|
||
|
|
//! The weights are specified as a contiguous array in \p CKRS order, where \p C the number of
|
||
|
|
//! input channels, \p K the number of output feature maps, and \p R and \p S are the height and width
|
||
|
|
//! of the filter.
|
||
|
|
//!
|
||
|
|
//! \see getWeights()
|
||
|
|
//!
|
||
|
|
void setKernelWeights(Weights weights) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setKernelWeights(weights);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the kernel weights for the deconvolution.
|
||
|
|
//!
|
||
|
|
//! \see setNbGroups()
|
||
|
|
//!
|
||
|
|
Weights getKernelWeights() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getKernelWeights();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the bias weights for the deconvolution.
|
||
|
|
//!
|
||
|
|
//! Bias is optional. To omit bias, set the count value of the weights structure to zero.
|
||
|
|
//!
|
||
|
|
//! The bias is applied per-feature-map, so the number of weights (if non-zero) must be equal to the number of
|
||
|
|
//! output feature maps.
|
||
|
|
//!
|
||
|
|
//! \see getBiasWeights()
|
||
|
|
//!
|
||
|
|
void setBiasWeights(Weights weights) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setBiasWeights(weights);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the bias weights for the deconvolution.
|
||
|
|
//!
|
||
|
|
//! \see getBiasWeights()
|
||
|
|
//!
|
||
|
|
Weights getBiasWeights() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getBiasWeights();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the multi-dimension pre-padding of the deconvolution.
|
||
|
|
//!
|
||
|
|
//! The output will be trimmed by this number of elements on the start of every dimension.
|
||
|
|
//! In other words, it resembles the inverse of a convolution layer with this padding size.
|
||
|
|
//! Negative padding is not supported.
|
||
|
|
//!
|
||
|
|
//! Default: (0, 0, ..., 0)
|
||
|
|
//!
|
||
|
|
//!
|
||
|
|
//! \see getPrePadding()
|
||
|
|
//!
|
||
|
|
void setPrePadding(Dims const& padding) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setPrePadding(padding);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the pre-padding.
|
||
|
|
//!
|
||
|
|
//! \see setPrePadding()
|
||
|
|
//!
|
||
|
|
Dims getPrePadding() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPrePadding();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the multi-dimension post-padding of the deconvolution.
|
||
|
|
//!
|
||
|
|
//! The output will be trimmed by this number of elements on the end of every dimension.
|
||
|
|
//! In other words, it resembles the inverse of a convolution layer with this padding size.
|
||
|
|
//! Negative padding is not supported.
|
||
|
|
//!
|
||
|
|
//! Default: (0, 0, ..., 0)
|
||
|
|
//!
|
||
|
|
//!
|
||
|
|
//! \see getPostPadding()
|
||
|
|
//!
|
||
|
|
void setPostPadding(Dims const& padding) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setPostPadding(padding);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the padding.
|
||
|
|
//!
|
||
|
|
//! \see setPostPadding()
|
||
|
|
//!
|
||
|
|
Dims getPostPadding() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPostPadding();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the padding mode.
|
||
|
|
//!
|
||
|
|
//! Padding mode takes precedence if both setPaddingMode and setPre/PostPadding are used.
|
||
|
|
//!
|
||
|
|
//! Default: kEXPLICIT_ROUND_DOWN
|
||
|
|
//!
|
||
|
|
//! \see getPaddingMode()
|
||
|
|
//!
|
||
|
|
void setPaddingMode(PaddingMode paddingMode) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setPaddingMode(paddingMode);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the padding mode.
|
||
|
|
//!
|
||
|
|
//! Default: kEXPLICIT_ROUND_DOWN
|
||
|
|
//!
|
||
|
|
//! \see setPaddingMode()
|
||
|
|
//!
|
||
|
|
PaddingMode getPaddingMode() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPaddingMode();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the multi-dimension kernel size of the deconvolution.
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, there are two restrictions:
|
||
|
|
//! 1) Only 2D Kernel is supported.
|
||
|
|
//! 2) Kernel height and width must be in the range [1,32] or the combinations of [64, 96, 128] in one
|
||
|
|
//! dimension and 1 in the other dimensions, i.e. [1x64] or [64x1] are valid, but not [64x64].
|
||
|
|
//!
|
||
|
|
//! \see getKernelSizeNd()
|
||
|
|
//!
|
||
|
|
void setKernelSizeNd(Dims const& kernelSize) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setKernelSizeNd(kernelSize);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the multi-dimension kernel size of the deconvolution.
|
||
|
|
//!
|
||
|
|
//! \see setKernelSizeNd()
|
||
|
|
//!
|
||
|
|
Dims getKernelSizeNd() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getKernelSizeNd();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the multi-dimension stride of the deconvolution.
|
||
|
|
//!
|
||
|
|
//! Default: (1, 1, ..., 1)
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, there are two restrictions:
|
||
|
|
//! 1) Only 2D Stride is supported.
|
||
|
|
//! 2) Stride height and width must be in the range [1,32] or the combinations of [64, 96, 128] in one
|
||
|
|
//! dimension and 1 in the other dimensions, i.e. [1x64] or [64x1] are valid, but not [64x64].
|
||
|
|
//!
|
||
|
|
//! \see getStrideNd()
|
||
|
|
//!
|
||
|
|
void setStrideNd(Dims const& stride) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setStrideNd(stride);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the multi-dimension stride of the deconvolution.
|
||
|
|
//!
|
||
|
|
//! \see setStrideNd()
|
||
|
|
//!
|
||
|
|
Dims getStrideNd() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getStrideNd();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the multi-dimension padding of the deconvolution.
|
||
|
|
//!
|
||
|
|
//! The output will be trimmed by this number of elements on both sides of every dimension.
|
||
|
|
//! In other words, it resembles the inverse of a convolution layer with this padding size.
|
||
|
|
//! Padding is symmetric, and negative padding is not supported.
|
||
|
|
//!
|
||
|
|
//! Default: (0, 0, ..., 0)
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, padding must be 0.
|
||
|
|
//!
|
||
|
|
//! \see getPaddingNd() setPadding() getPadding()
|
||
|
|
//!
|
||
|
|
void setPaddingNd(Dims const& padding) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setPaddingNd(padding);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the multi-dimension padding of the deconvolution.
|
||
|
|
//!
|
||
|
|
//! If the padding is asymmetric, the pre-padding is returned.
|
||
|
|
//!
|
||
|
|
//! \see setPaddingNd()
|
||
|
|
//!
|
||
|
|
Dims getPaddingNd() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPaddingNd();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Append or replace an input of this layer with a specific tensor
|
||
|
|
//!
|
||
|
|
//! \param index the index of the input to modify.
|
||
|
|
//! \param tensor the new input tensor
|
||
|
|
//!
|
||
|
|
//! Input 0 is the input activation tensor.
|
||
|
|
//! Input 1 is the kernel tensor. If used, the kernel weights parameter must be set to empty weights.
|
||
|
|
//! Input 2 is the bias tensor. If used, the bias parameter must be set to empty weights.
|
||
|
|
//!
|
||
|
|
//! \see getKernelWeights(), setKernelWeights(), getBiasWeights(), setBiasWeights()
|
||
|
|
//!
|
||
|
|
using ILayer::setInput;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the multi-dimension dilation of the deconvolution.
|
||
|
|
//!
|
||
|
|
//! Default: (1, 1, ..., 1)
|
||
|
|
//!
|
||
|
|
//! \see getDilationNd()
|
||
|
|
//!
|
||
|
|
void setDilationNd(Dims const& dilation) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setDilationNd(dilation);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the multi-dimension dilation of the deconvolution.
|
||
|
|
//!
|
||
|
|
//! \see setDilationNd()
|
||
|
|
//!
|
||
|
|
Dims getDilationNd() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getDilationNd();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IDeconvolutionLayer() noexcept = default;
|
||
|
|
apiv::VDeconvolutionLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum ElementWiseOperation
|
||
|
|
//!
|
||
|
|
//! \brief Enumerates the binary operations that may be performed by an ElementWise layer.
|
||
|
|
//!
|
||
|
|
//! Operations kAND, kOR, and kXOR must have inputs of DataType::kBOOL.
|
||
|
|
//!
|
||
|
|
//! All other operations must have inputs of floating-point type, DataType::kINT8, DataType::kINT32, or
|
||
|
|
//! DataType::kINT64.
|
||
|
|
//!
|
||
|
|
//! \see IElementWiseLayer
|
||
|
|
//!
|
||
|
|
enum class ElementWiseOperation : int32_t
|
||
|
|
{
|
||
|
|
kSUM = 0, //!< Sum of the two elements.
|
||
|
|
kPROD = 1, //!< Product of the two elements.
|
||
|
|
kMAX = 2, //!< Maximum of the two elements.
|
||
|
|
kMIN = 3, //!< Minimum of the two elements.
|
||
|
|
kSUB = 4, //!< Subtract the second element from the first.
|
||
|
|
kDIV = 5, //!< Divide the first element by the second.
|
||
|
|
kPOW = 6, //!< The first element to the power of the second element.
|
||
|
|
kFLOOR_DIV = 7, //!< Floor division of the first element by the second.
|
||
|
|
kAND = 8, //!< Logical AND of two elements.
|
||
|
|
kOR = 9, //!< Logical OR of two elements.
|
||
|
|
kXOR = 10, //!< Logical XOR of two elements.
|
||
|
|
kEQUAL = 11, //!< Check if two elements are equal.
|
||
|
|
kGREATER = 12, //!< Check if element in first tensor is greater than corresponding element in second tensor.
|
||
|
|
kLESS = 13 //!< Check if element in first tensor is less than corresponding element in second tensor.
|
||
|
|
};
|
||
|
|
|
||
|
|
namespace impl
|
||
|
|
{
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in ElementWiseOperation enum.
|
||
|
|
//!
|
||
|
|
//! \see ElementWiseOperation
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
struct EnumMaxImpl<ElementWiseOperation>
|
||
|
|
{
|
||
|
|
static constexpr int32_t kVALUE = 14;
|
||
|
|
};
|
||
|
|
} // namespace impl
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IElementWiseLayer
|
||
|
|
//!
|
||
|
|
//! \brief A elementwise layer in a network definition.
|
||
|
|
//!
|
||
|
|
//! This layer applies a per-element binary operation between corresponding elements of two tensors.
|
||
|
|
//!
|
||
|
|
//! The input tensors must have the same rank. For each dimension, their lengths must
|
||
|
|
//! match, or one of them must be one. In the latter case, the tensor is broadcast along that axis.
|
||
|
|
//!
|
||
|
|
//! The output tensor has the same rank as the inputs. For each output dimension,
|
||
|
|
//! its length is equal to the lengths of the corresponding input dimensions if they match,
|
||
|
|
//! otherwise it is equal to the length that is not one.
|
||
|
|
//!
|
||
|
|
//! \warning When running this layer on the DLA with Int8 data type, the dynamic ranges of two input tensors shall be
|
||
|
|
//! equal. If the dynamic ranges are generated using calibrator, the largest value shall be used.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IElementWiseLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the binary operation for the layer.
|
||
|
|
//!
|
||
|
|
//! DLA supports only kSUM, kPROD, kMAX, kMIN, and kSUB.
|
||
|
|
//!
|
||
|
|
//! \see getOperation(), ElementWiseOperation
|
||
|
|
//!
|
||
|
|
//! \see getBiasWeights()
|
||
|
|
//!
|
||
|
|
void setOperation(ElementWiseOperation op) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setOperation(op);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the binary operation for the layer.
|
||
|
|
//!
|
||
|
|
//! \see setOperation(), ElementWiseOperation
|
||
|
|
//!
|
||
|
|
//! \see setBiasWeights()
|
||
|
|
//!
|
||
|
|
ElementWiseOperation getOperation() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getOperation();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VElementWiseLayer* mImpl;
|
||
|
|
virtual ~IElementWiseLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Control form of IGatherLayer
|
||
|
|
//!
|
||
|
|
//! \see IGatherLayer
|
||
|
|
//!
|
||
|
|
enum class GatherMode : int32_t
|
||
|
|
{
|
||
|
|
kDEFAULT = 0, //!< Similar to ONNX Gather
|
||
|
|
kELEMENT = 1, //!< Similar to ONNX GatherElements
|
||
|
|
kND = 2 //!< Similar to ONNX GatherND
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in GatherMode enum.
|
||
|
|
//!
|
||
|
|
//! \see GatherMode
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
constexpr inline int32_t EnumMax<GatherMode>() noexcept
|
||
|
|
{
|
||
|
|
return 3;
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IGatherLayer
|
||
|
|
//!
|
||
|
|
//! \brief A Gather layer in a network definition. Supports several kinds of gathering.
|
||
|
|
//!
|
||
|
|
//! The Gather layer has two input tensors, Data and Indices, and an output tensor Output.
|
||
|
|
//! Additionally, there are three parameters: mode, nbElementwiseDims, and axis that control
|
||
|
|
//! how the indices are interpreted.
|
||
|
|
//!
|
||
|
|
//! * Data is a tensor of rank r >= 1 that stores the values to be gathered in Output.
|
||
|
|
//! * Indices is a tensor of rank q that determines which locations in Data to gather.
|
||
|
|
//! * GatherMode::kDEFAULT: q >= 0
|
||
|
|
//! * GatherMode::kND: q >= 1 and the last dimension of Indices must be a build time constant.
|
||
|
|
//! * GatherMode::kELEMENT: q = r
|
||
|
|
//! * Output stores the gathered results. Its rank s depends on the mode:
|
||
|
|
//! * GatherMode::kDEFAULT: s = q + r - 1 - nbElementwiseDims
|
||
|
|
//! * GatherMode::kND: s = q + r - indices.d[q-1] - 1 - nbElementwiseDims
|
||
|
|
//! * GatherMode::kELEMENT: s = q = r.
|
||
|
|
//!
|
||
|
|
//! The dimensions of the output likewise depends on the mode:
|
||
|
|
//!
|
||
|
|
//! GatherMode::kDEFAULT:
|
||
|
|
//!
|
||
|
|
//! First nbElementwiseDims of output are computed by applying broadcast rules to
|
||
|
|
//! first nbElementwiseDims of indices and data. Note that nbElementwiseDims <= 1.
|
||
|
|
//! Rest of dimensions are computed by copying dimensions of Data, and replacing
|
||
|
|
//! the dimension for axis gatherAxis with the dimensions of indices.
|
||
|
|
//!
|
||
|
|
//! GatherMode::kND:
|
||
|
|
//! If indices.d[q-1] = r - nbElementwiseDims
|
||
|
|
//! output.d = [indices.d[0], ... , indices.d[q-2]]
|
||
|
|
//! Else if indices.d[q-1] < r - nbElementwiseDims
|
||
|
|
//! output.d = [indices.d[0], ... , indices.d[q-1], data.d[nbElementwiseDims + indices.d[q-1] + q],
|
||
|
|
//! data.d[r-1]]
|
||
|
|
//! Else
|
||
|
|
//! This is build time error
|
||
|
|
//!
|
||
|
|
//! GatherMode::kELEMENT:
|
||
|
|
//! The output dimensions match the dimensions of the indices tensor.
|
||
|
|
//!
|
||
|
|
//! The types of Data and Output must be the same, and Indices shall be DataType::kINT32 or DataType::kINT64.
|
||
|
|
//!
|
||
|
|
//! How the elements of Data are gathered depends on the mode:
|
||
|
|
//!
|
||
|
|
//! GatherMode::kDEFAULT:
|
||
|
|
//! Each index in indices is used to index Data along axis gatherAxis.
|
||
|
|
//!
|
||
|
|
//! GatherMode::kND:
|
||
|
|
//! Indices is a rank q integer tensor, best thought of as a rank (q-1) tensor of
|
||
|
|
//! indices into data, where each element defines a slice of data
|
||
|
|
//! The operation can be formulated as output[i_1, ..., i_{q-1}] = data[indices[i_1, ..., i_{q-1}]]
|
||
|
|
//!
|
||
|
|
//! GatherMode::kELEMENT:
|
||
|
|
//!
|
||
|
|
//! Here "axis" denotes the result of getGatherAxis().
|
||
|
|
//! For each element X of indices:
|
||
|
|
//! Let J denote a sequence for the subscripts of X
|
||
|
|
//! Let K = sequence J with element [axis] replaced by X
|
||
|
|
//! output[J] = data[K]
|
||
|
|
//!
|
||
|
|
//! The handling of nbElementWiseDims depends on the mode:
|
||
|
|
//! * GatherMode::kDEFAULT: nbElementWiseDims <= 1. Broadcast is supported across the elementwise dimension if
|
||
|
|
//! present.
|
||
|
|
//! * GatherMode::kND: 0 <= nbElementWiseDims < rank(Data)-1. Broadcast is not supported across the elementwise
|
||
|
|
//! dimensions.
|
||
|
|
//! * GatherMode::kELEMENT: nbElementWiseDims = 0
|
||
|
|
//!
|
||
|
|
//! Notes:
|
||
|
|
//! * For modes GatherMode::kND and GatherMode::kELEMENT, the first nbElementWiseDims dimensions of data and index must
|
||
|
|
//! be equal. If not, an error will be reported at build time or run time.
|
||
|
|
//! * If an axis of Data has dynamic length, using a negative index for it has undefined behavior.
|
||
|
|
//! * No DLA support
|
||
|
|
//! * Zero will be stored for OOB access
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IGatherLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the axis used by GatherMode::kELEMENTS and GatherMode::kDEFAULT
|
||
|
|
//! The axis must be less than the number of dimensions in the data input.
|
||
|
|
//! The axis defaults to 0.
|
||
|
|
//!
|
||
|
|
//! \warning Undefined behavior when used with GatherMode::kND.
|
||
|
|
//!
|
||
|
|
//! \see getGatherAxis()
|
||
|
|
//!
|
||
|
|
void setGatherAxis(int32_t axis) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setGatherAxis(axis);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the axis to gather on.
|
||
|
|
//!
|
||
|
|
//! \warning Undefined behavior when used with GatherMode::kND.
|
||
|
|
//!
|
||
|
|
//! \see setGatherAxis()
|
||
|
|
//!
|
||
|
|
int32_t getGatherAxis() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getGatherAxis();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the number of leading dimensions of indices tensor to be handled elementwise.
|
||
|
|
//!
|
||
|
|
//! The gathering of indexing starts from the dimension of data[NbElementWiseDims:].
|
||
|
|
//! The NbElementWiseDims must be less than the Rank of the data input.
|
||
|
|
//!
|
||
|
|
//! \param elementWiseDims number of dims to be handled as elementwise.
|
||
|
|
//!
|
||
|
|
//! Default: 0
|
||
|
|
//!
|
||
|
|
//! The value of nbElementWiseDims and GatherMode are checked during network validation:
|
||
|
|
//!
|
||
|
|
//! GatherMode::kDEFAULT: nbElementWiseDims can be 0 or 1.
|
||
|
|
//! GatherMode::kND: nbElementWiseDims can be between 0 and one less than rank(data).
|
||
|
|
//! GatherMode::kELEMENT: nbElementWiseDims must be 0
|
||
|
|
//!
|
||
|
|
//! \see getNbElementWiseDims()
|
||
|
|
//!
|
||
|
|
void setNbElementWiseDims(int32_t elementWiseDims) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setNbElementWiseDims(elementWiseDims);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the number of leading dimensions of indices tensor to be handled elementwise.
|
||
|
|
//!
|
||
|
|
//! \see setNbElementWiseDims()
|
||
|
|
//!
|
||
|
|
int32_t getNbElementWiseDims() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getNbElementWiseDims();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the gather mode.
|
||
|
|
//!
|
||
|
|
//! \see getMode()
|
||
|
|
//!
|
||
|
|
void setMode(GatherMode mode) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setMode(mode);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the gather mode.
|
||
|
|
//!
|
||
|
|
//! \see setMode()
|
||
|
|
//!
|
||
|
|
GatherMode getMode() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getMode();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VGatherLayer* mImpl;
|
||
|
|
virtual ~IGatherLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IPluginV2Layer
|
||
|
|
//!
|
||
|
|
//! \brief Layer type for pluginV2
|
||
|
|
//!
|
||
|
|
//! \see IPluginV2
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.8. Superseded by IPluginV3Layer.
|
||
|
|
//!
|
||
|
|
class TRT_DEPRECATED IPluginV2Layer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Get the plugin for the layer.
|
||
|
|
//!
|
||
|
|
//! \see IPluginV2
|
||
|
|
//!
|
||
|
|
IPluginV2& getPlugin() noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPlugin();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VPluginV2Layer* mImpl;
|
||
|
|
virtual ~IPluginV2Layer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IPluginV3Layer
|
||
|
|
//!
|
||
|
|
//! \brief Layer type for V3 plugins
|
||
|
|
//!
|
||
|
|
//! \see IPluginV3
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IPluginV3Layer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Get the plugin for the layer.
|
||
|
|
//!
|
||
|
|
//! \see IPluginV3
|
||
|
|
//!
|
||
|
|
IPluginV3& getPlugin() noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPlugin();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VPluginV3Layer* mImpl;
|
||
|
|
virtual ~IPluginV3Layer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum UnaryOperation
|
||
|
|
//!
|
||
|
|
//! \brief Enumerates the unary operations that may be performed by a Unary layer.
|
||
|
|
//!
|
||
|
|
//! Operations kNOT must have inputs of DataType::kBOOL.
|
||
|
|
//!
|
||
|
|
//! Operation kSIGN and kABS must have inputs of floating-point type, DataType::kINT8, DataType::kINT32 or
|
||
|
|
//! DataType::kINT64.
|
||
|
|
//!
|
||
|
|
//! Operation kISINF must have inputs of floating-point type.
|
||
|
|
//!
|
||
|
|
//! All other operations must have inputs of floating-point type.
|
||
|
|
//!
|
||
|
|
//! \see IUnaryLayer
|
||
|
|
//!
|
||
|
|
enum class UnaryOperation : int32_t
|
||
|
|
{
|
||
|
|
kEXP = 0, //!< Exponentiation.
|
||
|
|
kLOG = 1, //!< Log (base e).
|
||
|
|
kSQRT = 2, //!< Square root.
|
||
|
|
kRECIP = 3, //!< Reciprocal.
|
||
|
|
kABS = 4, //!< Absolute value.
|
||
|
|
kNEG = 5, //!< Negation.
|
||
|
|
kSIN = 6, //!< Sine.
|
||
|
|
kCOS = 7, //!< Cosine.
|
||
|
|
kTAN = 8, //!< Tangent.
|
||
|
|
kSINH = 9, //!< Hyperbolic sine.
|
||
|
|
kCOSH = 10, //!< Hyperbolic cosine.
|
||
|
|
kASIN = 11, //!< Inverse sine.
|
||
|
|
kACOS = 12, //!< Inverse cosine.
|
||
|
|
kATAN = 13, //!< Inverse tangent.
|
||
|
|
kASINH = 14, //!< Inverse hyperbolic sine.
|
||
|
|
kACOSH = 15, //!< Inverse hyperbolic cosine.
|
||
|
|
kATANH = 16, //!< Inverse hyperbolic tangent.
|
||
|
|
kCEIL = 17, //!< Ceiling.
|
||
|
|
kFLOOR = 18, //!< Floor.
|
||
|
|
kERF = 19, //!< Gauss error function.
|
||
|
|
kNOT = 20, //!< Logical NOT.
|
||
|
|
kSIGN = 21, //!< Sign, If input > 0, output 1; if input < 0, output -1; if input == 0, output 0.
|
||
|
|
kROUND = 22, //!< Round to nearest even for floating-point data type.
|
||
|
|
kISINF = 23, //!< Return true if input value equals +/- infinity for floating-point data type.
|
||
|
|
kISNAN = 24, //!< Return true if input value is a NaN for floating-point data type.
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in UnaryOperation enum.
|
||
|
|
//!
|
||
|
|
//! \see UnaryOperation
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
constexpr inline int32_t EnumMax<UnaryOperation>() noexcept
|
||
|
|
{
|
||
|
|
return 25;
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IUnaryLayer
|
||
|
|
//!
|
||
|
|
//! \brief Layer that represents an unary operation.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IUnaryLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the unary operation for the layer.
|
||
|
|
//!
|
||
|
|
//! When running this layer on DLA, only UnaryOperation::kABS is supported.
|
||
|
|
//!
|
||
|
|
//! \see getOperation(), UnaryOperation
|
||
|
|
//!
|
||
|
|
void setOperation(UnaryOperation op) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setOperation(op);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the unary operation for the layer.
|
||
|
|
//!
|
||
|
|
//! \see setOperation(), UnaryOperation
|
||
|
|
//!
|
||
|
|
UnaryOperation getOperation() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getOperation();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VUnaryLayer* mImpl;
|
||
|
|
virtual ~IUnaryLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum ReduceOperation
|
||
|
|
//!
|
||
|
|
//! \brief Enumerates the reduce operations that may be performed by a Reduce layer.
|
||
|
|
//!
|
||
|
|
//! The table shows the result of reducing across an empty volume of a given type.
|
||
|
|
//!
|
||
|
|
//! Operation | kFLOAT and kHALF | kINT32 | kINT8
|
||
|
|
//! --------- | ----------------- | ------- | -----
|
||
|
|
//! kSUM | 0 | 0 | 0
|
||
|
|
//! kPROD | 1 | 1 | 1
|
||
|
|
//! kMAX | negative infinity | INT_MIN | -128
|
||
|
|
//! kMIN | positive infinity | INT_MAX | 127
|
||
|
|
//! kAVG | NaN | 0 | -128
|
||
|
|
//!
|
||
|
|
//! The current version of TensorRT usually performs reduction for kINT8 via kFLOAT or kHALF.
|
||
|
|
//! The kINT8 values show the quantized representations of the floating-point values.
|
||
|
|
//!
|
||
|
|
enum class ReduceOperation : int32_t
|
||
|
|
{
|
||
|
|
kSUM = 0,
|
||
|
|
kPROD = 1,
|
||
|
|
kMAX = 2,
|
||
|
|
kMIN = 3,
|
||
|
|
kAVG = 4
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in ReduceOperation enum.
|
||
|
|
//!
|
||
|
|
//! \see ReduceOperation
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
constexpr inline int32_t EnumMax<ReduceOperation>() noexcept
|
||
|
|
{
|
||
|
|
return 5;
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IReduceLayer
|
||
|
|
//!
|
||
|
|
//! \brief Layer that represents a reduction across a non-bool tensor.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IReduceLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the reduce operation for the layer.
|
||
|
|
//!
|
||
|
|
//! \see getOperation(), ReduceOperation
|
||
|
|
//!
|
||
|
|
void setOperation(ReduceOperation op) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setOperation(op);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the reduce operation for the layer.
|
||
|
|
//!
|
||
|
|
//! \see setOperation(), ReduceOperation
|
||
|
|
//!
|
||
|
|
ReduceOperation getOperation() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getOperation();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the axes over which to reduce.
|
||
|
|
//!
|
||
|
|
//! \see getReduceAxes
|
||
|
|
//!
|
||
|
|
void setReduceAxes(uint32_t reduceAxes) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setReduceAxes(reduceAxes);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the axes over which to reduce for the layer.
|
||
|
|
//!
|
||
|
|
//! \see setReduceAxes
|
||
|
|
//!
|
||
|
|
uint32_t getReduceAxes() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getReduceAxes();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the boolean that specifies whether or not to keep the reduced dimensions for the layer.
|
||
|
|
//!
|
||
|
|
//! \see getKeepDimensions
|
||
|
|
//!
|
||
|
|
void setKeepDimensions(bool keepDimensions) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setKeepDimensions(keepDimensions);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the boolean that specifies whether or not to keep the reduced dimensions for the layer.
|
||
|
|
//!
|
||
|
|
//! \see setKeepDimensions
|
||
|
|
//!
|
||
|
|
bool getKeepDimensions() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getKeepDimensions();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VReduceLayer* mImpl;
|
||
|
|
virtual ~IReduceLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IPaddingLayer
|
||
|
|
//!
|
||
|
|
//! \brief Layer that represents a padding operation.
|
||
|
|
//!
|
||
|
|
//! The padding layer adds zero-padding at the start and end of the input tensor. It supports padding
|
||
|
|
//! only the last two dimensions. Applying negative padding results in cropping of the input.
|
||
|
|
//!
|
||
|
|
//! To pad across any subset of dimensions, use ISliceLayer with SampleMode::kFILL.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IPaddingLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the padding that is applied at the start of the tensor.
|
||
|
|
//!
|
||
|
|
//! Negative padding results in trimming the edge by the specified amount.
|
||
|
|
//!
|
||
|
|
//! \warning Only 2 dimensional padding is currently supported.
|
||
|
|
//!
|
||
|
|
//! \see getPrePaddingNd
|
||
|
|
//!
|
||
|
|
void setPrePaddingNd(Dims const& padding) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setPrePaddingNd(padding);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the padding that is applied at the start of the tensor.
|
||
|
|
//!
|
||
|
|
//! \warning Only 2 dimensional padding is currently supported.
|
||
|
|
//!
|
||
|
|
//! \see setPrePaddingNd
|
||
|
|
//!
|
||
|
|
Dims getPrePaddingNd() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPrePaddingNd();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the padding that is applied at the end of the tensor.
|
||
|
|
//!
|
||
|
|
//! Negative padding results in trimming the edge by the specified amount
|
||
|
|
//!
|
||
|
|
//! \warning Only 2 dimensional padding is currently supported.
|
||
|
|
//!
|
||
|
|
//! \see getPostPaddingNd
|
||
|
|
//!
|
||
|
|
void setPostPaddingNd(Dims const& padding) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setPostPaddingNd(padding);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the padding that is applied at the end of the tensor.
|
||
|
|
//!
|
||
|
|
//! \warning Only 2 dimensional padding is currently supported.
|
||
|
|
//!
|
||
|
|
//! \see setPostPaddingNd
|
||
|
|
//!
|
||
|
|
Dims getPostPaddingNd() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPostPaddingNd();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VPaddingLayer* mImpl;
|
||
|
|
virtual ~IPaddingLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \struct Permutation
|
||
|
|
//!
|
||
|
|
//! \brief Represents a permutation of dimensions.
|
||
|
|
//!
|
||
|
|
struct Permutation
|
||
|
|
{
|
||
|
|
//!
|
||
|
|
//! The elements of the permutation.
|
||
|
|
//! The permutation is applied as outputDimensionIndex = permutation.order[inputDimensionIndex], so to
|
||
|
|
//! permute from CHW order to HWC order, the required permutation is [1, 2, 0], and to permute
|
||
|
|
//! from HWC to CHW, the required permutation is [2, 0, 1].
|
||
|
|
//!
|
||
|
|
int32_t order[Dims::MAX_DIMS];
|
||
|
|
};
|
||
|
|
|
||
|
|
//! \class IShuffleLayer
|
||
|
|
//!
|
||
|
|
//! \brief Layer type for shuffling data.
|
||
|
|
//!
|
||
|
|
//! This layer shuffles data by applying in sequence: a transpose operation, a reshape operation
|
||
|
|
//! and a second transpose operation. The dimension types of the output are those of the reshape dimension.
|
||
|
|
//!
|
||
|
|
//! The layer has an optional second input. If present, it must be a 1D tensor of type Int32 or Int64,
|
||
|
|
//! and the reshape dimensions are taken from it.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IShuffleLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the permutation applied by the first transpose operation.
|
||
|
|
//!
|
||
|
|
//! \param permutation The dimension permutation applied before the reshape.
|
||
|
|
//!
|
||
|
|
//! The default is the identity permutation.
|
||
|
|
//!
|
||
|
|
//! \see getFirstTranspose
|
||
|
|
//!
|
||
|
|
void setFirstTranspose(Permutation permutation) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setFirstTranspose(permutation);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the permutation applied by the first transpose operation.
|
||
|
|
//!
|
||
|
|
//! \return The dimension permutation applied before the reshape.
|
||
|
|
//!
|
||
|
|
//! \see setFirstTranspose
|
||
|
|
//!
|
||
|
|
Permutation getFirstTranspose() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getFirstTranspose();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the reshaped dimensions.
|
||
|
|
//!
|
||
|
|
//! \param dimensions The reshaped dimensions.
|
||
|
|
//!
|
||
|
|
//! Two special values can be used as dimensions.
|
||
|
|
//!
|
||
|
|
//! Value 0 copies the corresponding dimension from input. This special value
|
||
|
|
//! can be used more than once in the dimensions. If number of reshape
|
||
|
|
//! dimensions is less than input, 0s are resolved by aligning the most
|
||
|
|
//! significant dimensions of input.
|
||
|
|
//!
|
||
|
|
//! Value -1 infers that particular dimension by looking at input and rest
|
||
|
|
//! of the reshape dimensions. Note that only a maximum of one dimension is
|
||
|
|
//! permitted to be specified as -1.
|
||
|
|
//! Avoid using -1 if the input can have zero volume and any of the other
|
||
|
|
//! reshape dimensions can be zero (after resolving special treatment of 0),
|
||
|
|
//! because the solution for the -1 becomes indeterminate and TensorRT will report an error.
|
||
|
|
//!
|
||
|
|
//! The product of the new dimensions must be equal to the product of the old.
|
||
|
|
//!
|
||
|
|
//! If a second input had been used to create this layer, that input is reset to null by this method.
|
||
|
|
//!
|
||
|
|
void setReshapeDimensions(Dims const& dimensions) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setReshapeDimensions(dimensions);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the reshaped dimensions.
|
||
|
|
//!
|
||
|
|
//! \return The reshaped dimensions.
|
||
|
|
//!
|
||
|
|
//! If a second input is present and non-null, or setReshapeDimensions has
|
||
|
|
//! not yet been called, this function returns Dims with nbDims == -1.
|
||
|
|
//!
|
||
|
|
Dims getReshapeDimensions() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getReshapeDimensions();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Append or replace an input of this layer with a specific tensor
|
||
|
|
//!
|
||
|
|
//! \param index the index of the input to modify.
|
||
|
|
//! \param tensor the new input tensor
|
||
|
|
//
|
||
|
|
//! Sets the input tensor for the given index. The index must be 0 for a static shuffle layer.
|
||
|
|
//! A static shuffle layer is converted to a dynamic shuffle layer by calling setInput with an index 1.
|
||
|
|
//! A dynamic shuffle layer cannot be converted back to a static shuffle layer.
|
||
|
|
//!
|
||
|
|
//! For a dynamic shuffle layer, the values 0 and 1 are valid.
|
||
|
|
//! The indices in the dynamic case are as follows:
|
||
|
|
//!
|
||
|
|
//! - 0: Data or Shape tensor to be shuffled.
|
||
|
|
//! - 1: The dimensions for the reshape operation, as a 1D tensor of type Int32 or Int64.
|
||
|
|
//!
|
||
|
|
//! If this function is called with the value 1, then the function getNbInputs() changes
|
||
|
|
//! from returning 1 to 2.
|
||
|
|
//!
|
||
|
|
//! The reshape dimensions are treated identically to how they are treated if set statically
|
||
|
|
//! via setReshapeDimensions. In particular, a -1 is treated as a wildcard even if dynamically
|
||
|
|
//! supplied at runtime, and a 0 is treated as a placeholder if getZeroIsPlaceholder() = true,
|
||
|
|
//! which is the default. If the placeholder interpretation of 0 is unwanted because the
|
||
|
|
//! runtime dimension should be 0 when the reshape dimension is 0, be sure to call
|
||
|
|
//! setZeroIsPlacholder(false) on the IShuffleLayer.
|
||
|
|
//!
|
||
|
|
//! \see setReshapeDimensions.
|
||
|
|
//!
|
||
|
|
using ILayer::setInput;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the permutation applied by the second transpose operation.
|
||
|
|
//!
|
||
|
|
//! \param permutation The dimension permutation applied after the reshape.
|
||
|
|
//!
|
||
|
|
//! The default is the identity permutation.
|
||
|
|
//!
|
||
|
|
//! The permutation is applied as outputDimensionIndex = permutation.order[inputDimensionIndex], so to
|
||
|
|
//! permute from CHW order to HWC order, the required permutation is [1, 2, 0].
|
||
|
|
//!
|
||
|
|
//! \see getSecondTranspose
|
||
|
|
//!
|
||
|
|
void setSecondTranspose(Permutation permutation) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setSecondTranspose(permutation);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the permutation applied by the second transpose operation.
|
||
|
|
//!
|
||
|
|
//! \return The dimension permutation applied after the reshape.
|
||
|
|
//!
|
||
|
|
//! \see setSecondTranspose
|
||
|
|
//!
|
||
|
|
Permutation getSecondTranspose() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getSecondTranspose();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set meaning of 0 in reshape dimensions.
|
||
|
|
//!
|
||
|
|
//! If true, then a 0 in the reshape dimensions denotes copying the corresponding
|
||
|
|
//! dimension from the first input tensor. If false, then a 0 in the reshape
|
||
|
|
//! dimensions denotes a zero-length dimension.
|
||
|
|
//!
|
||
|
|
//! Default: true
|
||
|
|
//!
|
||
|
|
//! \see getZeroIsPlaceholder();
|
||
|
|
//!
|
||
|
|
void setZeroIsPlaceholder(bool zeroIsPlaceholder) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setZeroIsPlaceholder(zeroIsPlaceholder);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get meaning of 0 in reshape dimensions.
|
||
|
|
//!
|
||
|
|
//! \return true if 0 is placeholder for corresponding input dimension,
|
||
|
|
//! false if 0 denotes a zero-length dimension.
|
||
|
|
//!
|
||
|
|
//! \see setZeroIsPlaceholder
|
||
|
|
//!
|
||
|
|
bool getZeroIsPlaceholder() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getZeroIsPlaceholder();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VShuffleLayer* mImpl;
|
||
|
|
virtual ~IShuffleLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Controls how ISliceLayer and IGridSample handle out-of-bounds coordinates.
|
||
|
|
//!
|
||
|
|
//! \see ISliceLayer and IGridSample
|
||
|
|
//!
|
||
|
|
enum class SampleMode : int32_t
|
||
|
|
{
|
||
|
|
kSTRICT_BOUNDS = 0, //!< Fail with error when the coordinates are out of bounds.
|
||
|
|
kWRAP = 1, //!< Coordinates wrap around periodically.
|
||
|
|
kCLAMP = 2, //!< Out of bounds indices are clamped to bounds.
|
||
|
|
kFILL = 3, //!< Use fill input value when coordinates are out of bounds.
|
||
|
|
kREFLECT = 4, //!< Coordinates reflect. The axis of reflection is the middle of the perimeter pixel and the
|
||
|
|
//!< reflections are repeated indefinitely within the padded regions. Repeats values for a single
|
||
|
|
//!< pixel and throws error for zero pixels.
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in SampleMode enum.
|
||
|
|
//!
|
||
|
|
//! \see SampleMode
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
constexpr inline int32_t EnumMax<SampleMode>() noexcept
|
||
|
|
{
|
||
|
|
return 5;
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Slices an input tensor into an output tensor based on the offset and strides.
|
||
|
|
//!
|
||
|
|
//! The slice layer has two variants, static and dynamic. Static slice specifies the start, size, and stride
|
||
|
|
//! dimensions at layer creation time via Dims and can use the get/set accessor functions of the ISliceLayer.
|
||
|
|
//! Static slice layers can also optionally specify axes through the get/set accessor functions of the ISliceLayer.
|
||
|
|
//! Dynamic slice specifies one or more of start, size, stride, or axes as ITensors, by using ILayer::setInput to add
|
||
|
|
//! a second, third, fourth, or sixth input respectively. The corresponding Dims are used if an input
|
||
|
|
//! is missing or null.
|
||
|
|
//!
|
||
|
|
//! An application can determine if the ISliceLayer has a dynamic output shape based on whether
|
||
|
|
//! the size or axes input is present and non-null.
|
||
|
|
//!
|
||
|
|
//! The slice layer selects for each dimension a start location from within the input tensor, and
|
||
|
|
//! copies elements to the output tensor using the specified stride across the input tensor.
|
||
|
|
//! Start, size, and stride tensors must be 1D tensors of type Int32 or Int64 if not specified via Dims.
|
||
|
|
//!
|
||
|
|
//! An example of using slice on a tensor:
|
||
|
|
//! input = {{0, 2, 4}, {1, 3, 5}}
|
||
|
|
//! start = {1, 0}
|
||
|
|
//! size = {1, 2}
|
||
|
|
//! stride = {1, 2}
|
||
|
|
//! output = {{1, 5}}
|
||
|
|
//!
|
||
|
|
//! If axes are provided then starts, ends, and strides must have the same length as axes
|
||
|
|
//! and specifies a subset of dimensions to slice. If axes are not provided, starts, ends, and strides
|
||
|
|
//! must be of the same length as the rank of the input tensor.
|
||
|
|
//!
|
||
|
|
//! An example of using slice on a tensor with axes specified:
|
||
|
|
//! input = {{0, 2, 4}, {1, 3, 5}}
|
||
|
|
//! start = {1}
|
||
|
|
//! size = {2}
|
||
|
|
//! stride = {1}
|
||
|
|
//! axes = {1}
|
||
|
|
//! output = {{2, 4}, {3, 5}}
|
||
|
|
//!
|
||
|
|
//! When the sampleMode is kCLAMP or kREFLECT, for each input dimension, if its size is 0 then the corresponding output
|
||
|
|
//! dimension must be 0 too.
|
||
|
|
//!
|
||
|
|
//! When the sampleMode is kFILL, the fifth input to the slice layer is used to determine the value to fill in out-of-bound
|
||
|
|
//! indices. It is an error to specify the fifth input in any other sampleMode.
|
||
|
|
//!
|
||
|
|
//! A slice layer can produce a shape tensor if the following conditions are met:
|
||
|
|
//!
|
||
|
|
//! * start, size, and stride are build time constants, either as static Dims or as constant input tensors.
|
||
|
|
//! * axes, if provided, are build time constants, either as static Dims or as a constant input tensor.
|
||
|
|
//! * The number of elements in the output tensor does not exceed 2 * Dims::MAX_DIMS.
|
||
|
|
//!
|
||
|
|
//! The input tensor is a shape tensor if the output is a shape tensor.
|
||
|
|
//!
|
||
|
|
//! The following constraints must be satisfied to execute this layer on DLA:
|
||
|
|
//! * start, size, and stride are build time constants, either as static Dims or as constant input tensors.
|
||
|
|
//! * axes, if provided, are build time constants, either as static Dims or as a constant input tensor.
|
||
|
|
//! * sampleMode is kDEFAULT, kWRAP, or kFILL.
|
||
|
|
//! * Strides are 1 for all dimensions.
|
||
|
|
//! * Slicing is not performed on the first dimension.
|
||
|
|
//! * The input tensor has four dimensions.
|
||
|
|
//! * For kFILL sliceMode, the fill value input is a scalar output of an IConstantLayer with value 0 that is not
|
||
|
|
//! consumed by any other layer.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class ISliceLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the start offset that the slice layer uses to create the output slice.
|
||
|
|
//!
|
||
|
|
//! \param start The start offset to read data from the input tensor.
|
||
|
|
//!
|
||
|
|
//! If a second input had been used to create this layer, that input is reset to null by this method.
|
||
|
|
//!
|
||
|
|
//! \see getStart
|
||
|
|
//!
|
||
|
|
void setStart(Dims const& start) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setStart(start);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the start offset for the slice layer.
|
||
|
|
//!
|
||
|
|
//! \return The start offset, or an invalid Dims structure.
|
||
|
|
//!
|
||
|
|
//! If the second input is present and non-null,
|
||
|
|
//! this function returns a Dims with nbDims = -1.
|
||
|
|
//!
|
||
|
|
//! \see setStart
|
||
|
|
//!
|
||
|
|
Dims getStart() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getStart();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the dimensions of the output slice.
|
||
|
|
//!
|
||
|
|
//! \param size The dimensions of the output slice.
|
||
|
|
//!
|
||
|
|
//! If a third input had been used to create this layer, that input is reset to null by this method.
|
||
|
|
//!
|
||
|
|
//! \see getSize
|
||
|
|
//!
|
||
|
|
void setSize(Dims const& size) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setSize(size);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get dimensions of the output slice.
|
||
|
|
//!
|
||
|
|
//! \return The output dimension, or an invalid Dims structure.
|
||
|
|
//!
|
||
|
|
//! If the third input is present and non-null,
|
||
|
|
//! this function returns a Dims with nbDims = -1.
|
||
|
|
//!
|
||
|
|
//! \see setSize
|
||
|
|
//!
|
||
|
|
Dims getSize() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getSize();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the stride for computing the output slice data.
|
||
|
|
//!
|
||
|
|
//! \param stride The dimensions of the stride to compute the values to store in the output slice.
|
||
|
|
//!
|
||
|
|
//! If a fourth input had been used to create this layer, that input is reset to null by this method.
|
||
|
|
//!
|
||
|
|
//! \see getStride
|
||
|
|
//!
|
||
|
|
void setStride(Dims const& stride) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setStride(stride);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the stride for the output slice.
|
||
|
|
//!
|
||
|
|
//! \return The slicing stride, or an invalid Dims structure.
|
||
|
|
//!
|
||
|
|
//! If the fourth input is present and non-null,
|
||
|
|
//! this function returns a Dims with nbDims = -1.
|
||
|
|
//!
|
||
|
|
//! \see setStride
|
||
|
|
//!
|
||
|
|
Dims getStride() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getStride();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the slice mode.
|
||
|
|
//!
|
||
|
|
//! \see getMode()
|
||
|
|
//!
|
||
|
|
void setMode(SampleMode mode) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setMode(mode);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the slice mode.
|
||
|
|
//!
|
||
|
|
//! \see setMode()
|
||
|
|
//!
|
||
|
|
SampleMode getMode() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getMode();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Append or replace an input of this layer with a specific tensor
|
||
|
|
//!
|
||
|
|
//! \param index the index of the input to modify.
|
||
|
|
//! \param tensor the new input tensor
|
||
|
|
//!
|
||
|
|
//! For a slice layer, the values 0-5 are valid.
|
||
|
|
//! The indices are as follows:
|
||
|
|
//!
|
||
|
|
//! - 0: Tensor to be sliced.
|
||
|
|
//! - 1: The start tensor to begin slicing, as a 1D tensor of type Int32 or Int64.
|
||
|
|
//! - 2: The size tensor of the resulting slice, as a 1D tensor of type Int32 or Int64.
|
||
|
|
//! - 3: The stride of the slicing operation, as a 1D tensor of type Int32 or Int64.
|
||
|
|
//! - 4: Value for the kFILL slice mode. The fill value data type should either be the same
|
||
|
|
//! or be implicitly convertible to the input data type.
|
||
|
|
//! Implicit data type conversion is supported among kFLOAT, kHALF, kINT8, and kFP8 data types.
|
||
|
|
//! This input is disallowed for other modes.
|
||
|
|
//! - 5: The axes tensor indicating the corresponding axes that start, size, and stride
|
||
|
|
//! should apply to, as a 1D tensor or type Int32 or Int64. Negative values for axes
|
||
|
|
//! indicate indexing from the back of the input tensor. Values must be unique and be
|
||
|
|
//! within the interval of [-rank(input), rank(input)-1].
|
||
|
|
//!
|
||
|
|
//! Using the corresponding setter resets the input to null.
|
||
|
|
//!
|
||
|
|
//! If this function is called with a value greater than 0, then the function getNbInputs() changes
|
||
|
|
//! from returning 1 to index + 1.
|
||
|
|
//!
|
||
|
|
using ILayer::setInput;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the axes for this ISliceLayer.
|
||
|
|
//!
|
||
|
|
//! \param axes The axes on which the starts, ends, and strides parameters of the slice apply to.
|
||
|
|
//!
|
||
|
|
//! If a sixth input had been used to create this layer, that input is reset to null by this method.
|
||
|
|
//!
|
||
|
|
//! \see getAxes
|
||
|
|
//!
|
||
|
|
void setAxes(Dims const& axes) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setAxes(axes);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the axes for this ISliceLayer.
|
||
|
|
//!
|
||
|
|
//! \return The axes on which the starts, ends, and strides parameters of this slice apply to.
|
||
|
|
//!
|
||
|
|
//! If the sixth input is present and non-null,
|
||
|
|
//! this function returns a Dims with nbDims = -1.
|
||
|
|
//!
|
||
|
|
//! \see setAxes
|
||
|
|
//!
|
||
|
|
Dims getAxes() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAxes();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VSliceLayer* mImpl;
|
||
|
|
virtual ~ISliceLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//! \class IShapeLayer
|
||
|
|
//!
|
||
|
|
//! \brief Layer type for getting shape of a tensor.
|
||
|
|
//!
|
||
|
|
//! This layer sets the output to a 1D tensor of type Int64 with the dimensions of the input tensor.
|
||
|
|
//!
|
||
|
|
//! For example, if the input is a four-dimensional tensor (of any type) with
|
||
|
|
//! dimensions [2,3,5,7], the output tensor is a one-dimensional Int64 tensor
|
||
|
|
//! of length 4 containing the sequence 2, 3, 5, 7.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IShapeLayer : public ILayer
|
||
|
|
{
|
||
|
|
protected:
|
||
|
|
apiv::VShapeLayer* mImpl;
|
||
|
|
virtual ~IShapeLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum TopKOperation
|
||
|
|
//!
|
||
|
|
//! \brief Enumerates the operations that may be performed by a TopK layer.
|
||
|
|
//!
|
||
|
|
enum class TopKOperation : int32_t
|
||
|
|
{
|
||
|
|
kMAX = 0, //!< Maximum of the elements.
|
||
|
|
kMIN = 1, //!< Minimum of the elements.
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in TopKOperation enum.
|
||
|
|
//!
|
||
|
|
//! \see TopKOperation
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
constexpr inline int32_t EnumMax<TopKOperation>() noexcept
|
||
|
|
{
|
||
|
|
return 2;
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class ITopKLayer
|
||
|
|
//!
|
||
|
|
//! \brief Layer that represents a TopK reduction.
|
||
|
|
//!
|
||
|
|
//! This layer can accept both static and dynamic k. Static k can be set through the addTopK() API function,
|
||
|
|
//! or accessed using the getK() and setK() functions after layer creation. For dynamic k, use the setInput()
|
||
|
|
//! method to pass in k as a tensor with index 1, which overrides the static k value in calculations.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class ITopKLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the operation for the layer.
|
||
|
|
//!
|
||
|
|
//! \see getOperation(), TopKOperation
|
||
|
|
//!
|
||
|
|
void setOperation(TopKOperation op) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setOperation(op);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the operation for the layer.
|
||
|
|
//!
|
||
|
|
//! \see setOperation(), TopKOperation
|
||
|
|
//!
|
||
|
|
TopKOperation getOperation() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getOperation();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the static k value for the layer.
|
||
|
|
//!
|
||
|
|
//! Currently only values up to 3840 are supported.
|
||
|
|
//!
|
||
|
|
//! If a second input to this layer has been set, it will be reset to null by this method.
|
||
|
|
//!
|
||
|
|
//! \see getK()
|
||
|
|
//!
|
||
|
|
void setK(int32_t k) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setK(k);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the k value for the layer.
|
||
|
|
//!
|
||
|
|
//! This function will return the static k value passed into addTopK(), or the value passed into setK().
|
||
|
|
//!
|
||
|
|
//! If a second layer input is present and non-null, this function returns -1.
|
||
|
|
//!
|
||
|
|
//! \see setK()
|
||
|
|
//!
|
||
|
|
int32_t getK() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getK();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set which axes to reduce for the layer.
|
||
|
|
//!
|
||
|
|
//! \see getReduceAxes()
|
||
|
|
//!
|
||
|
|
void setReduceAxes(uint32_t reduceAxes) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setReduceAxes(reduceAxes);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the axes to reduce for the layer.
|
||
|
|
//!
|
||
|
|
//! \see setReduceAxes()
|
||
|
|
//!
|
||
|
|
uint32_t getReduceAxes() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getReduceAxes();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Append or replace an input of this layer with a specific tensor
|
||
|
|
//!
|
||
|
|
//! \param index The index of the input to modify.
|
||
|
|
//! \param tensor The new input tensor.
|
||
|
|
//!
|
||
|
|
//! For a TopK layer, the values 0-1 are valid.
|
||
|
|
//! The indices are as follows:
|
||
|
|
//!
|
||
|
|
//! - 0: Input data tensor.
|
||
|
|
//! - 1: A scalar Int32 tensor containing a positive value corresponding to the number of top
|
||
|
|
//! elements to retrieve. Values larger than 3840 will result in a runtime error. If provided,
|
||
|
|
//! this will override the static k value in calculations.
|
||
|
|
//!
|
||
|
|
using ILayer::setInput;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the indices type for the layer.
|
||
|
|
//!
|
||
|
|
//! \param type The DataType of the indices tensor.
|
||
|
|
//!
|
||
|
|
//! \return true if set successfully, false otherwise.
|
||
|
|
//!
|
||
|
|
//! Set the indices (the second output) type of the TopK layer. Valid values are DataType::kINT32 and
|
||
|
|
//! DataType::kINT64, otherwise an error occurs and the type is not updated.
|
||
|
|
//!
|
||
|
|
bool setIndicesType(DataType type) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setIndicesType(type);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return the TopK layer indices type.
|
||
|
|
//!
|
||
|
|
//! \return indices type set during layer creation or by setIndicesType().
|
||
|
|
//! The return value is the indices type of the TopK layer.
|
||
|
|
//! The default value is DataType::kINT32.
|
||
|
|
//!
|
||
|
|
DataType getIndicesType() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getIndicesType();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VTopKLayer* mImpl;
|
||
|
|
virtual ~ITopKLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum MatrixOperation
|
||
|
|
//!
|
||
|
|
//! \brief Enumerates the operations that may be performed on a tensor
|
||
|
|
//! by IMatrixMultiplyLayer before multiplication.
|
||
|
|
//!
|
||
|
|
enum class MatrixOperation : int32_t
|
||
|
|
{
|
||
|
|
//! Treat x as a matrix if it has two dimensions, or as a collection of
|
||
|
|
//! matrices if x has more than two dimensions, where the last two dimensions
|
||
|
|
//! are the matrix dimensions. x must have at least two dimensions.
|
||
|
|
kNONE = 0,
|
||
|
|
|
||
|
|
//! Like kNONE, but transpose the matrix dimensions.
|
||
|
|
kTRANSPOSE = 1,
|
||
|
|
|
||
|
|
//! Treat x as a vector if it has one dimension, or as a collection of
|
||
|
|
//! vectors if x has more than one dimension. x must have at least one dimension.
|
||
|
|
//!
|
||
|
|
//! The first input tensor with dimensions [M,K] used with MatrixOperation::kVECTOR is equivalent to a tensor
|
||
|
|
//! with dimensions [M, 1, K] with MatrixOperation::kNONE, i.e. is treated as M row vectors of length K,
|
||
|
|
//! or dimensions [M, K, 1] with MatrixOperation::kTRANSPOSE.
|
||
|
|
//!
|
||
|
|
//! The second input tensor with dimensions [M,K] used with MatrixOperation::kVECTOR is equivalent to a tensor
|
||
|
|
//! with dimensions [M, K, 1] with MatrixOperation::kNONE, i.e. is treated as M column vectors of length K,
|
||
|
|
//! or dimensions [M, 1, K] with MatrixOperation::kTRANSPOSE.
|
||
|
|
kVECTOR = 2,
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in MatrixOperation enum.
|
||
|
|
//!
|
||
|
|
//! \see DataType
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
constexpr inline int32_t EnumMax<MatrixOperation>() noexcept
|
||
|
|
{
|
||
|
|
return 3;
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IMatrixMultiplyLayer
|
||
|
|
//!
|
||
|
|
//! \brief Layer that represents a Matrix Multiplication.
|
||
|
|
//!
|
||
|
|
//! Let A be op(getInput(0)) and B be op(getInput(1)) where
|
||
|
|
//! op(x) denotes the corresponding MatrixOperation.
|
||
|
|
//!
|
||
|
|
//! When A and B are matrices or vectors, computes the inner product A * B:
|
||
|
|
//!
|
||
|
|
//! matrix * matrix -> matrix
|
||
|
|
//! matrix * vector -> vector
|
||
|
|
//! vector * matrix -> vector
|
||
|
|
//! vector * vector -> scalar
|
||
|
|
//!
|
||
|
|
//! Inputs of higher rank are treated as collections of matrices or vectors.
|
||
|
|
//! The output will be a corresponding collection of matrices, vectors, or scalars.
|
||
|
|
//!
|
||
|
|
//! For a dimension that is not one of the matrix or vector dimensions:
|
||
|
|
//! If the dimension is 1 for one of the tensors but not the other tensor,
|
||
|
|
//! the former tensor is broadcast along that dimension to match the dimension of the latter tensor.
|
||
|
|
//! The number of these extra dimensions for A and B must match.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IMatrixMultiplyLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the operation for an input tensor.
|
||
|
|
//!
|
||
|
|
//! \param index Input tensor number (0 or 1).
|
||
|
|
//! \param op New operation.
|
||
|
|
//!
|
||
|
|
//! \see getOperation()
|
||
|
|
//!
|
||
|
|
void setOperation(int32_t index, MatrixOperation op) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setOperation(index, op);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the operation for an input tensor.
|
||
|
|
//!
|
||
|
|
//! \param index Input tensor number (0 or 1).
|
||
|
|
//!
|
||
|
|
//! \see setOperation()
|
||
|
|
//!
|
||
|
|
MatrixOperation getOperation(int32_t index) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getOperation(index);
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VMatrixMultiplyLayer* mImpl;
|
||
|
|
virtual ~IMatrixMultiplyLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//! \class INonZero
|
||
|
|
//!
|
||
|
|
//! \brief A NonZero layer in a network.
|
||
|
|
//!
|
||
|
|
//! This layer gets the positions of elements that are non-zero in the input.
|
||
|
|
//! For boolean input, "non-zero" means "true". Semantics are similar to ONNX NonZero.
|
||
|
|
//!
|
||
|
|
//! The input may have type kFLOAT, kHALF, kINT32, or kBOOL.
|
||
|
|
//!
|
||
|
|
//! The output is a matrix of type kINT32 or kINT64.
|
||
|
|
//! For an input with dimensions [L1, L2, ..., Lm], the output has dimensions [m,n],
|
||
|
|
//! where n is the number of non-zero elements. I.e., each column denotes a m-D position.
|
||
|
|
//!
|
||
|
|
//! The columns are lexically ordered.
|
||
|
|
//! E.g., a column with [3,2,4,7] precedes a column with [3,2,5,6].
|
||
|
|
//!
|
||
|
|
//! Tip: "compress" can be implemented with INonZero+IShuffle+Gather.
|
||
|
|
//! For example, to compress a tensor x over axis k using mask vector v,
|
||
|
|
//! use nonzero(v) to compute the subscripts, shuffle with reshape dimensions = [-1]
|
||
|
|
//! to make the subscripts 1D, and then gather with the subscripts.
|
||
|
|
//!
|
||
|
|
class INonZeroLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the indices type for the layer.
|
||
|
|
//!
|
||
|
|
//! \param type The DataType of the indices tensor.
|
||
|
|
//!
|
||
|
|
//! \return true if set successfully, false otherwise.
|
||
|
|
//!
|
||
|
|
//! Set the indices (the first output) type of the NonZero layer. Valid values are DataType::kINT32 and
|
||
|
|
//! DataType::kINT64, otherwise an error occurs and the type is not updated.
|
||
|
|
//!
|
||
|
|
bool setIndicesType(DataType type) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setIndicesType(type);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return the NonZero layer indices type.
|
||
|
|
//!
|
||
|
|
//! \return indices type set during layer creation or by setIndicesType().
|
||
|
|
//! The return value is the indices type of the NonZero layer.
|
||
|
|
//! The default value is DataType::kINT32.
|
||
|
|
//!
|
||
|
|
DataType getIndicesType() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getIndicesType();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~INonZeroLayer() noexcept = default;
|
||
|
|
apiv::VNonZeroLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IRaggedSoftMaxLayer
|
||
|
|
//!
|
||
|
|
//! \brief A RaggedSoftmax layer in a network definition.
|
||
|
|
//!
|
||
|
|
//! This layer takes a ZxS input tensor and an additional Zx1 bounds tensor
|
||
|
|
//! holding the lengths of the Z sequences.
|
||
|
|
//!
|
||
|
|
//! This layer computes a softmax across each of the Z sequences.
|
||
|
|
//!
|
||
|
|
//! The output tensor is of the same size as the input tensor.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IRaggedSoftMaxLayer : public ILayer
|
||
|
|
{
|
||
|
|
protected:
|
||
|
|
apiv::VRaggedSoftMaxLayer* mImpl;
|
||
|
|
virtual ~IRaggedSoftMaxLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//! \class IIdentityLayer
|
||
|
|
//!
|
||
|
|
//! \brief A layer that represents the identity function.
|
||
|
|
//!
|
||
|
|
//! For a strongly typed network, the layer is an identity function, i.e. the output
|
||
|
|
//! tensor elements are identical to the input tensor elements, possibly with a change
|
||
|
|
//! in layout. For example, if a network consists of a single IIdentityLayer, the network
|
||
|
|
//! input and output must have the same type, but the input can have NCHW layout and
|
||
|
|
//! the output can have NHWC layout.
|
||
|
|
//!
|
||
|
|
//! If the network is weakly typed, the layer is additionally permitted some type conversions
|
||
|
|
//! as described below.
|
||
|
|
//!
|
||
|
|
//! If the output type is explicitly specified via setOutputType, IIdentityLayer can be
|
||
|
|
//! used to convert from one type to another. Other than conversions between the same
|
||
|
|
//! type (kFLOAT -> kFLOAT for example), the only valid conversions are:
|
||
|
|
//!
|
||
|
|
//! (kFLOAT | kHALF | kINT32 | kBOOL) -> (kFLOAT | kHALF | kINT32 | kBOOL)
|
||
|
|
//!
|
||
|
|
//! (kFLOAT | kHALF) -> kUINT8
|
||
|
|
//!
|
||
|
|
//! kUINT8 -> (kFLOAT | kHALF)
|
||
|
|
//!
|
||
|
|
//! Conversion also happens implicitly, without calling setOutputType, if the output
|
||
|
|
//! tensor is a network output.
|
||
|
|
//!
|
||
|
|
//! Two types are compatible if they are identical, or are both in {kFLOAT, kHALF}.
|
||
|
|
//! Implicit conversion between incompatible types, i.e. without using setOutputType,
|
||
|
|
//! was recognized as incorrect as of TensorRT 8.4, but was retained for API compatibility
|
||
|
|
//! within TensorRT 8.x releases. In TensorRT 10.0 onwards it is an error if the network
|
||
|
|
//! output tensor type is incompatible with the layer output type. E.g., implicit conversion
|
||
|
|
//! from kFLOAT to kINT32 is not allowed.
|
||
|
|
//!
|
||
|
|
//! To explicitly convert kFLOAT to kINT32:
|
||
|
|
//!
|
||
|
|
//! * Preferred: use ICastLayer.
|
||
|
|
//!
|
||
|
|
//! * Legacy alternative: use IIdentityLayer and setOutputType(DataType::kINT32).
|
||
|
|
//!
|
||
|
|
//! Similar advice applies for explicit conversion in the other direction.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IIdentityLayer : public ILayer
|
||
|
|
{
|
||
|
|
protected:
|
||
|
|
apiv::VIdentityLayer* mImpl;
|
||
|
|
virtual ~IIdentityLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//! \class ICastLayer
|
||
|
|
//!
|
||
|
|
//! \brief A cast layer in a network.
|
||
|
|
//!
|
||
|
|
//! This layer casts a given tensor to the datatype specified by \p toType.
|
||
|
|
//!
|
||
|
|
class ICastLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set cast layer output type.
|
||
|
|
//!
|
||
|
|
//! \param toType The DataType of the output tensor.
|
||
|
|
//!
|
||
|
|
//! Set the output type of the cast layer.
|
||
|
|
//!
|
||
|
|
void setToType(DataType toType) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setToType(toType);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return cast layer output type.
|
||
|
|
//!
|
||
|
|
//! \return toType parameter set during layer creation or by setToType().
|
||
|
|
//! The return value is the output type of the cast layer.
|
||
|
|
//!
|
||
|
|
DataType getToType() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getToType();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VCastLayer* mImpl;
|
||
|
|
virtual ~ICastLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//! \class IConstantLayer
|
||
|
|
//!
|
||
|
|
//! \brief Layer that represents a constant value.
|
||
|
|
//!
|
||
|
|
//! \note This layer does not support boolean types.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IConstantLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the weights for the layer.
|
||
|
|
//!
|
||
|
|
//! The output type is weights.type. If the network is weakly typed and the weights have a real type,
|
||
|
|
//! the output type might be different per TensorRT's type conversion rules.
|
||
|
|
//!
|
||
|
|
//! \see getWeights()
|
||
|
|
//!
|
||
|
|
void setWeights(Weights weights) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setWeights(weights);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the weights for the layer.
|
||
|
|
//!
|
||
|
|
//! \see setWeights
|
||
|
|
//!
|
||
|
|
Weights getWeights() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getWeights();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the dimensions for the layer.
|
||
|
|
//!
|
||
|
|
//! \param dimensions The dimensions of the layer
|
||
|
|
//!
|
||
|
|
//! \see setDimensions
|
||
|
|
//!
|
||
|
|
void setDimensions(Dims const& dimensions) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setDimensions(dimensions);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the dimensions for the layer.
|
||
|
|
//!
|
||
|
|
//! \return the dimensions for the layer
|
||
|
|
//!
|
||
|
|
//! \see getDimensions
|
||
|
|
//!
|
||
|
|
Dims getDimensions() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getDimensions();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VConstantLayer* mImpl;
|
||
|
|
virtual ~IConstantLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IParametricReLULayer
|
||
|
|
//!
|
||
|
|
//! \brief Layer that represents a parametric ReLU operation.
|
||
|
|
//!
|
||
|
|
//! When running this layer on DLA, the slopes input must be a build-time constant.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IParametricReLULayer : public ILayer
|
||
|
|
{
|
||
|
|
protected:
|
||
|
|
apiv::VParametricReLULayer* mImpl;
|
||
|
|
virtual ~IParametricReLULayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//! \enum InterpolationMode
|
||
|
|
//!
|
||
|
|
//! \brief Enumerates various modes of interpolation
|
||
|
|
//!
|
||
|
|
//!
|
||
|
|
enum class InterpolationMode : int32_t
|
||
|
|
{
|
||
|
|
kNEAREST = 0, //!< ND (0 < N <= 8) nearest neighbor resizing.
|
||
|
|
kLINEAR = 1, //!< Supports linear (1D), bilinear (2D), and trilinear (3D) interpolation
|
||
|
|
kCUBIC = 2 //!< Supports bicubic (2D) interpolation
|
||
|
|
};
|
||
|
|
|
||
|
|
namespace impl
|
||
|
|
{
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in InterpolationMode enum.
|
||
|
|
//!
|
||
|
|
//! \see InterpolationMode
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
struct EnumMaxImpl<InterpolationMode>
|
||
|
|
{
|
||
|
|
static constexpr int32_t kVALUE = 3;
|
||
|
|
};
|
||
|
|
} // namespace impl
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum ResizeCoordinateTransformation
|
||
|
|
//!
|
||
|
|
//! \brief The resize coordinate transformation function.
|
||
|
|
//!
|
||
|
|
//! \see IResizeLayer::setCoordinateTransformation()
|
||
|
|
//!
|
||
|
|
enum class ResizeCoordinateTransformation : int32_t
|
||
|
|
{
|
||
|
|
//! Think of each value in the tensor as a unit volume, and the coordinate is a point inside this volume.
|
||
|
|
//! The coordinate point is drawn as a star `(*)` in the below diagram, and multiple values range has a length.
|
||
|
|
//! Define `x_origin` as the coordinate of axis x in the input tensor, `x_resized` as the coordinate of axis x in
|
||
|
|
//! the output tensor, `length_origin` as length of the input tensor in axis x, and `length_resize` as length of the
|
||
|
|
//! output tensor in axis x.
|
||
|
|
//!
|
||
|
|
//! |<--------------length---------->|
|
||
|
|
//! | 0 | 1 | 2 | 3 |
|
||
|
|
//! * * * *
|
||
|
|
//!
|
||
|
|
//! x_origin = x_resized * (length_origin - 1) / (length_resize - 1)
|
||
|
|
//!
|
||
|
|
kALIGN_CORNERS = 0,
|
||
|
|
|
||
|
|
//! |<--------------length--------------------->|
|
||
|
|
//! | 0 | 1 | 2 | 3 |
|
||
|
|
//! * * * *
|
||
|
|
//!
|
||
|
|
//! x_origin = x_resized * (length_origin / length_resize)
|
||
|
|
//!
|
||
|
|
kASYMMETRIC = 1,
|
||
|
|
|
||
|
|
//! |<--------------length--------------------->|
|
||
|
|
//! | 0 | 1 | 2 | 3 |
|
||
|
|
//! * * * *
|
||
|
|
//!
|
||
|
|
//! x_origin = (x_resized + 0.5) * (length_origin / length_resize) - 0.5
|
||
|
|
//!
|
||
|
|
kHALF_PIXEL = 2,
|
||
|
|
};
|
||
|
|
|
||
|
|
namespace impl
|
||
|
|
{
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in ResizeCoordinateTransformation enum.
|
||
|
|
//!
|
||
|
|
//! \see ResizeCoordinateTransformation
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
struct EnumMaxImpl<ResizeCoordinateTransformation>
|
||
|
|
{
|
||
|
|
static constexpr int32_t kVALUE = 3;
|
||
|
|
};
|
||
|
|
} // namespace impl
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum ResizeSelector
|
||
|
|
//!
|
||
|
|
//! \brief The coordinate selector when resize to single pixel output.
|
||
|
|
//!
|
||
|
|
//! \see IResizeLayer::setSelectorForSinglePixel()
|
||
|
|
//!
|
||
|
|
enum class ResizeSelector : int32_t
|
||
|
|
{
|
||
|
|
//! Use formula to map the original index.
|
||
|
|
kFORMULA = 0,
|
||
|
|
|
||
|
|
//! Select the upper left pixel.
|
||
|
|
kUPPER = 1,
|
||
|
|
};
|
||
|
|
|
||
|
|
namespace impl
|
||
|
|
{
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in ResizeSelector enum.
|
||
|
|
//!
|
||
|
|
//! \see ResizeSelector
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
struct EnumMaxImpl<ResizeSelector>
|
||
|
|
{
|
||
|
|
static constexpr int32_t kVALUE = 2;
|
||
|
|
};
|
||
|
|
} // namespace impl
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum ResizeRoundMode
|
||
|
|
//!
|
||
|
|
//! \brief The rounding mode for nearest neighbor resize.
|
||
|
|
//!
|
||
|
|
//! \see IResizeLayer::setNearestRounding()
|
||
|
|
//!
|
||
|
|
enum class ResizeRoundMode : int32_t
|
||
|
|
{
|
||
|
|
//! Round half up.
|
||
|
|
kHALF_UP = 0,
|
||
|
|
|
||
|
|
//! Round half down.
|
||
|
|
kHALF_DOWN = 1,
|
||
|
|
|
||
|
|
//! Round to floor.
|
||
|
|
kFLOOR = 2,
|
||
|
|
|
||
|
|
//! Round to ceil.
|
||
|
|
kCEIL = 3,
|
||
|
|
};
|
||
|
|
|
||
|
|
namespace impl
|
||
|
|
{
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in ResizeRoundMode enum.
|
||
|
|
//!
|
||
|
|
//! \see ResizeRoundMode
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
struct EnumMaxImpl<ResizeRoundMode>
|
||
|
|
{
|
||
|
|
static constexpr int32_t kVALUE = 4;
|
||
|
|
};
|
||
|
|
} // namespace impl
|
||
|
|
|
||
|
|
//! \class IResizeLayer
|
||
|
|
//!
|
||
|
|
//! \brief A resize layer in a network definition.
|
||
|
|
//!
|
||
|
|
//! Resize layer can be used for resizing a N-D tensor.
|
||
|
|
//!
|
||
|
|
//! Resize layer currently supports the following configurations:
|
||
|
|
//! - InterpolationMode::kNEAREST - resizes last `m` dimensions of N-D, where 0 < m <= min(8, N) and N > 0
|
||
|
|
//! - InterpolationMode::kLINEAR - resizes last `m` dimensions of N-D, where 0 < m <= min(3, N) and N > 0
|
||
|
|
//!
|
||
|
|
//! Default resize mode is InterpolationMode::kNEAREST.
|
||
|
|
//!
|
||
|
|
//! The coordinates in the output tensor are mapped to coordinates in the input tensor using a function set by calling
|
||
|
|
//! setCoordinateTransformation(). The default for all InterpolationMode settings (nearest, linear, bilinear, etc.) is
|
||
|
|
//! ResizeCoordinateTransformation::kASYMMETRIC.
|
||
|
|
//!
|
||
|
|
//! The resize layer provides two ways to resize tensor dimensions.
|
||
|
|
//! - Set output dimensions directly. It can be done for static as well as dynamic resize layer.
|
||
|
|
//! Static resize layer requires output dimensions to be known at build-time.
|
||
|
|
//! Dynamic resize layer requires output dimensions to be set as one of the input tensors.
|
||
|
|
//! - Set scales for resize. Each output dimension is calculated as floor(input dimension * scale).
|
||
|
|
//! Only static resize layer allows setting scales where the scales are known at build-time.
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, the following combinations of parameters are supported:
|
||
|
|
//!
|
||
|
|
//! - In kNEAREST mode:
|
||
|
|
//! * (ResizeCoordinateTransformation::kASYMMETRIC, ResizeSelector::kFORMULA, ResizeRoundMode::kFLOOR)
|
||
|
|
//! * (ResizeCoordinateTransformation::kHALF_PIXEL, ResizeSelector::kFORMULA, ResizeRoundMode::kHALF_DOWN)
|
||
|
|
//! * (ResizeCoordinateTransformation::kHALF_PIXEL, ResizeSelector::kFORMULA, ResizeRoundMode::kHALF_UP)
|
||
|
|
//!
|
||
|
|
//! - In kLINEAR mode:
|
||
|
|
//! * (ResizeCoordinateTransformation::kHALF_PIXEL, ResizeSelector::kFORMULA)
|
||
|
|
//! * (ResizeCoordinateTransformation::kHALF_PIXEL, ResizeSelector::kUPPER)
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IResizeLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the output dimensions.
|
||
|
|
//!
|
||
|
|
//! \param dimensions The output dimensions. Number of output dimensions must be the same as the number of input
|
||
|
|
//! dimensions.
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, setOutputDimensions() is not supported.
|
||
|
|
//!
|
||
|
|
//! If there is a second input, i.e. resize layer is dynamic,
|
||
|
|
//! calling setOutputDimensions() is an error and does not update the
|
||
|
|
//! dimensions.
|
||
|
|
//!
|
||
|
|
//! Output dimensions can be specified directly, or via scale factors relative to input dimensions.
|
||
|
|
//! Scales for resize can be provided using setScales().
|
||
|
|
//!
|
||
|
|
//! \see setScales
|
||
|
|
//! \see getOutputDimensions
|
||
|
|
//!
|
||
|
|
void setOutputDimensions(Dims const& dimensions) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setOutputDimensions(dimensions);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the output dimensions.
|
||
|
|
//!
|
||
|
|
//! \return The output dimensions.
|
||
|
|
//!
|
||
|
|
Dims getOutputDimensions() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getOutputDimensions();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the resize scales.
|
||
|
|
//!
|
||
|
|
//! \param scales An array of resize scales.
|
||
|
|
//! \param nbScales Number of scales. Number of scales must be equal to the number of input dimensions.
|
||
|
|
//!
|
||
|
|
//! If executing this layer on DLA, there are three restrictions:
|
||
|
|
//! 1) nbScales has to be exactly 4.
|
||
|
|
//! 2) the first two elements in scales need to be exactly 1 (for unchanged batch and channel dimensions).
|
||
|
|
//! 3) The last two elements in scales, representing the scale values along height and width dimensions,
|
||
|
|
//! respectively, need to be integer values in the range of [1, 32] for kNEAREST mode and [1, 4] for kLINEAR.
|
||
|
|
//! Example of DLA-supported scales: {1, 1, 2, 2}.
|
||
|
|
//!
|
||
|
|
//! If there is a second input, i.e. resize layer is dynamic,
|
||
|
|
//! calling setScales() is an error and does not update the scales.
|
||
|
|
//!
|
||
|
|
//! Output dimensions are calculated as follows:
|
||
|
|
//! outputDims[i] = floor(inputDims[i] * scales[i])
|
||
|
|
//!
|
||
|
|
//! Output dimensions can be specified directly, or via scale factors relative to input dimensions.
|
||
|
|
//! Output dimensions can be provided directly using setOutputDimensions().
|
||
|
|
//!
|
||
|
|
//! \see setOutputDimensions
|
||
|
|
//! \see getScales
|
||
|
|
//!
|
||
|
|
void setScales(float const* scales, int32_t nbScales) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setScales(scales, nbScales);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Copies resize scales to scales[0, ..., nbScales-1], where nbScales is the number of scales that were set.
|
||
|
|
//!
|
||
|
|
//! \param size The number of scales to get. If size != nbScales, no scales will be copied.
|
||
|
|
//!
|
||
|
|
//! \param scales Pointer to where to copy the scales. Scales will be copied only if
|
||
|
|
//! size == nbScales and scales != nullptr.
|
||
|
|
//!
|
||
|
|
//! In case the size is not known consider using size = 0 and scales = nullptr. This method will return
|
||
|
|
//! the number of resize scales.
|
||
|
|
//!
|
||
|
|
//! \return The number of resize scales i.e. nbScales if scales were set.
|
||
|
|
//! Return -1 in case no scales were set or resize layer is used in dynamic mode.
|
||
|
|
//!
|
||
|
|
int32_t getScales(int32_t size, float* scales) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getScales(size, scales);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set resize mode for an input tensor.
|
||
|
|
//!
|
||
|
|
//! Supported resize modes are Nearest Neighbor and Linear.
|
||
|
|
//!
|
||
|
|
//! \see InterpolationMode
|
||
|
|
//!
|
||
|
|
void setResizeMode(InterpolationMode interpolationMode) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setResizeMode(interpolationMode);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get resize mode for an input tensor.
|
||
|
|
//!
|
||
|
|
//! \return The resize mode.
|
||
|
|
//!
|
||
|
|
InterpolationMode getResizeMode() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getResizeMode();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Append or replace an input of this layer with a specific tensor
|
||
|
|
//!
|
||
|
|
//! \param index the index of the input to modify.
|
||
|
|
//! \param tensor the new input tensor.
|
||
|
|
//!
|
||
|
|
//! Sets the input tensor for the given index. The index must be 0 for a static resize layer.
|
||
|
|
//! A static resize layer is converted to a dynamic resize layer by calling setInput with an index 1.
|
||
|
|
//! A dynamic resize layer cannot be converted back to a static resize layer.
|
||
|
|
//!
|
||
|
|
//! For a dynamic resize layer, the values 0 and 1 are valid.
|
||
|
|
//! The indices in the dynamic case are as follows:
|
||
|
|
//!
|
||
|
|
//! - 0: Execution tensor to be resized.
|
||
|
|
//! - 1: The output dimensions, as a 1D tensor of type Int32 or Int64.
|
||
|
|
//!
|
||
|
|
//! If this function is called with the value 1, then the function getNbInputs() changes
|
||
|
|
//! from returning 1 to 2.
|
||
|
|
//!
|
||
|
|
using ILayer::setInput;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set coordinate transformation function.
|
||
|
|
//!
|
||
|
|
//! The function maps a coordinate in the output tensor to a coordinate in the input tensor.
|
||
|
|
//!
|
||
|
|
//! Default function is ResizeCoordinateTransformation::kASYMMETRIC.
|
||
|
|
//!
|
||
|
|
//! \see ResizeCoordinateTransformation
|
||
|
|
//!
|
||
|
|
void setCoordinateTransformation(ResizeCoordinateTransformation coordTransform) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setCoordinateTransformation(coordTransform);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get coordinate transformation function.
|
||
|
|
//!
|
||
|
|
//! \return The coordinate transformation function.
|
||
|
|
//!
|
||
|
|
ResizeCoordinateTransformation getCoordinateTransformation() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getCoordinateTransformation();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set coordinate selector function when resized to single pixel.
|
||
|
|
//!
|
||
|
|
//! When resize to single pixel image, use this function to decide how to map the coordinate in the original
|
||
|
|
//! image.
|
||
|
|
//!
|
||
|
|
//! Default is ResizeSelector::kFORMULA.
|
||
|
|
//!
|
||
|
|
//! \see ResizeSelector
|
||
|
|
//!
|
||
|
|
void setSelectorForSinglePixel(ResizeSelector selector) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setSelectorForSinglePixel(selector);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the coordinate selector function when resized to single pixel.
|
||
|
|
//!
|
||
|
|
//! \return The selector function.
|
||
|
|
//!
|
||
|
|
ResizeSelector getSelectorForSinglePixel() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getSelectorForSinglePixel();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set rounding mode for nearest neighbor resize.
|
||
|
|
//!
|
||
|
|
//! This value is used for nearest neighbor interpolation rounding. It is applied after coordinate transformation.
|
||
|
|
//!
|
||
|
|
//! Default is kFLOOR.
|
||
|
|
//!
|
||
|
|
//! \see ResizeRoundMode
|
||
|
|
//!
|
||
|
|
void setNearestRounding(ResizeRoundMode value) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setNearestRounding(value);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get rounding mode for nearest neighbor resize.
|
||
|
|
//!
|
||
|
|
//! \return The rounding mode.
|
||
|
|
//!
|
||
|
|
ResizeRoundMode getNearestRounding() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getNearestRounding();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the coefficient 'A' used in cubic interpolation.
|
||
|
|
//!
|
||
|
|
//! Cubic uses the coefficient 'A' to calculate the weight of input pixels:
|
||
|
|
//!
|
||
|
|
//! <pre>
|
||
|
|
//! x := The relative distance between the sampled pixels and the input coordinates.
|
||
|
|
//!
|
||
|
|
//! weight(x) := for |x| <= 1, ((A + 2) * x - (A + 3)) * x * x + 1,
|
||
|
|
//! for 1 < |x| < 2, ((A * x - 5 * A) * x + 8 * A) * x - 4 * A,
|
||
|
|
//! others 0;
|
||
|
|
//! </pre>
|
||
|
|
//!
|
||
|
|
//! This attribute is valid only if "resize mode" is "cubic".
|
||
|
|
//!
|
||
|
|
//! The default value is -0.75.
|
||
|
|
//!
|
||
|
|
void setCubicCoeff(float A) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setCubicCoeff(A);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the coefficient 'A' used in cubic interpolation.
|
||
|
|
//!
|
||
|
|
//! \see setCubicCoeff()
|
||
|
|
//!
|
||
|
|
float getCubicCoeff() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getCubicCoeff();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the state for excluding outside pixels.
|
||
|
|
//!
|
||
|
|
//! If set to true, the weight of sampling locations outside the input tensor will be set to false, and the weight
|
||
|
|
//! will be renormalized so that their sum is 1.0.
|
||
|
|
//!
|
||
|
|
//! The default value is false.
|
||
|
|
//!
|
||
|
|
void setExcludeOutside(bool excludeFlag) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setExcludeOutside(excludeFlag);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the state for excluding outside pixels.
|
||
|
|
//!
|
||
|
|
//! \see setExcludeOutside()
|
||
|
|
//!
|
||
|
|
bool getExcludeOutside() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getExcludeOutside();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IResizeLayer() noexcept = default;
|
||
|
|
apiv::VResizeLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum LoopOutput
|
||
|
|
//!
|
||
|
|
//! \brief Enum that describes kinds of loop outputs.
|
||
|
|
//!
|
||
|
|
enum class LoopOutput : int32_t
|
||
|
|
{
|
||
|
|
//! Output value is value of tensor for last iteration.
|
||
|
|
kLAST_VALUE = 0,
|
||
|
|
|
||
|
|
//! Output value is concatenation of values of tensor for each iteration, in forward order.
|
||
|
|
kCONCATENATE = 1,
|
||
|
|
|
||
|
|
//! Output value is concatenation of values of tensor for each iteration, in reverse order.
|
||
|
|
kREVERSE = 2
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in LoopOutput enum.
|
||
|
|
//!
|
||
|
|
//! \see DataType
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
constexpr inline int32_t EnumMax<LoopOutput>() noexcept
|
||
|
|
{
|
||
|
|
return 3;
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum TripLimit
|
||
|
|
//!
|
||
|
|
//! \brief Enum that describes kinds of trip limits.
|
||
|
|
//!
|
||
|
|
enum class TripLimit : int32_t
|
||
|
|
{
|
||
|
|
|
||
|
|
kCOUNT = 0, //!< Tensor is a scalar of type kINT32 or kINT64 that contains the trip count.
|
||
|
|
kWHILE = 1 //!< Tensor is a scalar of type kBOOL. Loop terminates when value is false.
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in TripLimit enum.
|
||
|
|
//!
|
||
|
|
//! \see DataType
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
constexpr inline int32_t EnumMax<TripLimit>() noexcept
|
||
|
|
{
|
||
|
|
return 2;
|
||
|
|
}
|
||
|
|
|
||
|
|
class ILoop;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class ILoopBoundaryLayer
|
||
|
|
//!
|
||
|
|
//! \brief This is a base class for Loop boundary layers.
|
||
|
|
//!
|
||
|
|
//! The loop boundary layers are used to define loops within a network, enabling the implementation
|
||
|
|
//! of recurrences. The boundary layers for a loop are created by class ILoop.
|
||
|
|
//!
|
||
|
|
//! There are four kinds of boundary layers.
|
||
|
|
//! * ITripLimitLayer: controls the number of loop iterations.
|
||
|
|
//! * IIterationLayer: iterates over an input tensor.
|
||
|
|
//! * IRecurrenceLayer: returns an initial value or value from the previous loop iteration.
|
||
|
|
//! * ILoopOutputLayer: generates an output tensor from the loop iterations.
|
||
|
|
class ILoopBoundaryLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Get a pointer to ILoop associated with this boundary layer.
|
||
|
|
//!
|
||
|
|
ILoop* getLoop() const noexcept
|
||
|
|
{
|
||
|
|
return mBoundary->getLoop();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~ILoopBoundaryLayer() noexcept = default;
|
||
|
|
apiv::VLoopBoundaryLayer* mBoundary;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IIfConditionalBoundaryLayer
|
||
|
|
//!
|
||
|
|
//! \brief This is a base class for Conditional boundary layers.
|
||
|
|
//!
|
||
|
|
//! Boundary layers are used to demarcate the boundaries of Conditionals.
|
||
|
|
//!
|
||
|
|
class IIfConditionalBoundaryLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Get a pointer to the IIfConditional associated with this boundary layer.
|
||
|
|
//!
|
||
|
|
IIfConditional* getConditional() const noexcept
|
||
|
|
{
|
||
|
|
return mBoundary->getConditional();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IIfConditionalBoundaryLayer() noexcept = default;
|
||
|
|
apiv::VConditionalBoundaryLayer* mBoundary;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IConditionLayer
|
||
|
|
//!
|
||
|
|
//! \brief This layer represents a condition input to an IIfConditional.
|
||
|
|
//!
|
||
|
|
class IConditionLayer : public IIfConditionalBoundaryLayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
protected:
|
||
|
|
virtual ~IConditionLayer() noexcept = default;
|
||
|
|
apiv::VConditionLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IIfConditionalOutputLayer
|
||
|
|
//!
|
||
|
|
//! \brief This layer represents an output of an IIfConditional.
|
||
|
|
//!
|
||
|
|
//! An IIfConditionalOutputLayer has two inputs and one output.
|
||
|
|
//!
|
||
|
|
//! \see IIfConditional::addOutput
|
||
|
|
//!
|
||
|
|
class IIfConditionalOutputLayer : public IIfConditionalBoundaryLayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
protected:
|
||
|
|
virtual ~IIfConditionalOutputLayer() noexcept = default;
|
||
|
|
apiv::VConditionalOutputLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IIfConditionalInputLayer
|
||
|
|
//!
|
||
|
|
//! \brief This layer represents an input to an IIfConditional.
|
||
|
|
//!
|
||
|
|
class IIfConditionalInputLayer : public IIfConditionalBoundaryLayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
protected:
|
||
|
|
virtual ~IIfConditionalInputLayer() noexcept = default;
|
||
|
|
apiv::VConditionalInputLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IIfConditional
|
||
|
|
//!
|
||
|
|
//! \brief Helper for constructing conditionally-executed subgraphs.
|
||
|
|
//!
|
||
|
|
//! An If-conditional conditionally executes part of the network according
|
||
|
|
//! to the following pseudo-code:
|
||
|
|
//!
|
||
|
|
//! If condition is true then:
|
||
|
|
//! output = trueSubgraph(trueInputs);
|
||
|
|
//! Else
|
||
|
|
//! output = falseSubgraph(falseInputs);
|
||
|
|
//! Emit output
|
||
|
|
//!
|
||
|
|
//! Condition is a 0D boolean tensor (representing a scalar).
|
||
|
|
//! trueSubgraph represents a network subgraph that is executed when condition evaluates to True.
|
||
|
|
//! falseSubgraph represents a network subgraph that is executed when condition evaluates to False.
|
||
|
|
//!
|
||
|
|
//! The following constraints apply to If-conditionals:
|
||
|
|
//! - Both the trueSubgraph and falseSubgraph must be defined.
|
||
|
|
//! - The number of output tensors in both subgraphs is the same.
|
||
|
|
//! - Corresponding output tensors from the true/false subgraphs have the same type and rank.
|
||
|
|
//!
|
||
|
|
//! The subgraphs may directly use tensors defined outside of the IIfConditional.
|
||
|
|
class IIfConditional : public INoCopy
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the condition tensor for this If-Conditional construct.
|
||
|
|
//!
|
||
|
|
//! \param condition The condition tensor that will determine which subgraph to execute.
|
||
|
|
//!
|
||
|
|
//! \p condition tensor must be a 0D execution tensor (scalar) with type DataType::kBOOL.
|
||
|
|
//!
|
||
|
|
//! \see IConditionLayer
|
||
|
|
//!
|
||
|
|
IConditionLayer* setCondition(ITensor& condition) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setCondition(condition);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add an If-conditional output.
|
||
|
|
//!
|
||
|
|
//! \param trueSubgraphOutput The output of the subgraph executed when the conditional evaluates to true.
|
||
|
|
//! \param falseSubgraphOutput The output of the subgraph executed when the conditional evaluates to false.
|
||
|
|
//!
|
||
|
|
//! Each output layer of an IIfConditional represents a single output of either the true-subgraph or the
|
||
|
|
//! false-subgraph of an IIfConditional, depending on which subgraph was executed.
|
||
|
|
//!
|
||
|
|
//! The ranks of the two tensors must be equal unless the condition is a build-time constant.
|
||
|
|
//!
|
||
|
|
//! \see IIfConditionalOutputLayer
|
||
|
|
//!
|
||
|
|
IIfConditionalOutputLayer* addOutput(ITensor& trueSubgraphOutput, ITensor& falseSubgraphOutput) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addOutput(trueSubgraphOutput, falseSubgraphOutput);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add an If-conditional input.
|
||
|
|
//!
|
||
|
|
//! \param input An input to the conditional that can be used by either or both of the conditional's subgraphs.
|
||
|
|
//!
|
||
|
|
//! \see IIfConditionalInputLayer
|
||
|
|
//!
|
||
|
|
IIfConditionalInputLayer* addInput(ITensor& input) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addInput(input);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the name of the conditional.
|
||
|
|
//!
|
||
|
|
//! The name is used in error diagnostics.
|
||
|
|
//! This method copies the name string.
|
||
|
|
//!
|
||
|
|
//! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
|
||
|
|
//!
|
||
|
|
//! \see getName()
|
||
|
|
//!
|
||
|
|
void setName(char const* name) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setName(name);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return the name of the conditional.
|
||
|
|
//!
|
||
|
|
//! \see setName()
|
||
|
|
//!
|
||
|
|
char const* getName() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getName();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IIfConditional() noexcept = default;
|
||
|
|
apiv::VIfConditional* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IRecurrenceLayer
|
||
|
|
//!
|
||
|
|
//! \brief A recurrence layer in a network definition.
|
||
|
|
//!
|
||
|
|
//! The recurrence layer allows a loop iteration to compute a result from a value computed in the previous iteration.
|
||
|
|
//!
|
||
|
|
class IRecurrenceLayer : public ILoopBoundaryLayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Append or replace an input of this layer with a specific tensor
|
||
|
|
//!
|
||
|
|
//! \param index the index of the input to modify.
|
||
|
|
//! \param tensor the new input tensor
|
||
|
|
//
|
||
|
|
//! Sets the input tensor for the given index.
|
||
|
|
//!
|
||
|
|
//! For a recurrence layer, the values 0 and 1 are valid.
|
||
|
|
//! The indices are as follows:
|
||
|
|
//!
|
||
|
|
//! - 0: The initial value of the output tensor. The value must come from outside the loop.
|
||
|
|
//! - 1: The next value of the output tensor. The value usually comes from inside the loop, and must have the same
|
||
|
|
//! dimensions as input 0.
|
||
|
|
//!
|
||
|
|
//! If this function is called with the value 1, then the function getNbInputs() changes
|
||
|
|
//! from returning 1 to 2.
|
||
|
|
//!
|
||
|
|
using ILayer::setInput;
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IRecurrenceLayer() noexcept = default;
|
||
|
|
apiv::VRecurrenceLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class ILoopOutputLayer
|
||
|
|
//!
|
||
|
|
//! \brief An ILoopOutputLayer is the sole way to get output from a loop.
|
||
|
|
//!
|
||
|
|
//! The first input tensor must be defined inside the loop; the output tensor is outside the loop.
|
||
|
|
//! The second input tensor, if present, must be defined outside the loop.
|
||
|
|
//!
|
||
|
|
//! If getLoopOutput() is kLAST_VALUE, a single input must be provided,
|
||
|
|
//! and that input must be from an IRecurrenceLayer in the same loop.
|
||
|
|
//!
|
||
|
|
//! If getLoopOutput() is kCONCATENATE or kREVERSE, a second input must be provided.
|
||
|
|
//! The second input must be a 0D shape tensor, defined before the loop commences,
|
||
|
|
//! that specifies the concatenation length of the output.
|
||
|
|
//!
|
||
|
|
//! The output tensor has j more dimensions than the input tensor, where
|
||
|
|
//! j == 0 if getLoopOutput() is kLAST_VALUE
|
||
|
|
//! j == 1 if getLoopOutput() is kCONCATENATE or kREVERSE.
|
||
|
|
//!
|
||
|
|
class ILoopOutputLayer : public ILoopBoundaryLayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Get which kind a loop output has.
|
||
|
|
//!
|
||
|
|
LoopOutput getLoopOutput() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getLoopOutput();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set where to insert the contenation axis. Ignored if getLoopOutput() is kLAST_VALUE.
|
||
|
|
//!
|
||
|
|
//! For example, if the input tensor has dimensions [b,c,d],
|
||
|
|
//! and getLoopOutput() is kCONCATENATE, the output has four dimensions.
|
||
|
|
//! Let a be the value of the second input.
|
||
|
|
//! setAxis(0) causes the output to have dimensions [a,b,c,d].
|
||
|
|
//! setAxis(1) causes the output to have dimensions [b,a,c,d].
|
||
|
|
//! setAxis(2) causes the output to have dimensions [b,c,a,d].
|
||
|
|
//! setAxis(3) causes the output to have dimensions [b,c,d,a].
|
||
|
|
//! Default is axis is 0.
|
||
|
|
//!
|
||
|
|
void setAxis(int32_t axis) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setAxis(axis);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get axis being concatenated over.
|
||
|
|
//!
|
||
|
|
int32_t getAxis() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAxis();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Append or replace an input of this layer with a specific tensor
|
||
|
|
//!
|
||
|
|
//! \param index the index of the input to modify.
|
||
|
|
//! \param tensor the new input tensor
|
||
|
|
//
|
||
|
|
//! Sets the input tensor for the given index. The index must be 0 for a kLAST_VALUE loop output layer.
|
||
|
|
//! Loop output layer is converted to a kCONCATENATE or kREVERSE loop output layer by calling setInput with an
|
||
|
|
//! index 1. A kCONCATENATE or kREVERSE loop output layer cannot be converted back to a kLAST_VALUE loop output
|
||
|
|
//! layer.
|
||
|
|
//!
|
||
|
|
//! For a kCONCATENATE or kREVERSE loop output layer, the values 0 and 1 are valid.
|
||
|
|
//! The indices in the kCONCATENATE or kREVERSE cases are as follows:
|
||
|
|
//!
|
||
|
|
//! - 0: Contribution to the output tensor. The contribution must come from inside the loop.
|
||
|
|
//! - 1: The concatenation length scalar value, must come from outside the loop, as a 0D shape tensor of type Int32 or Int64.
|
||
|
|
//!
|
||
|
|
//! If this function is called with the value 1, then the function getNbInputs() changes
|
||
|
|
//! from returning 1 to 2.
|
||
|
|
//!
|
||
|
|
using ILayer::setInput;
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~ILoopOutputLayer() noexcept = default;
|
||
|
|
apiv::VLoopOutputLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class ITripLimitLayer
|
||
|
|
//!
|
||
|
|
//! \brief A layer that represents a trip-count limiter.
|
||
|
|
//!
|
||
|
|
//! The trip limit layer sets the execution condition for loops, using kCOUNT to define the number of iterations or
|
||
|
|
//! kWHILE for a conditional loop. A loop can have one of each kind of limit, in which case the loop exits when
|
||
|
|
//! the trip count is reached or the condition becomes false.
|
||
|
|
//!
|
||
|
|
//! See INetworkDefinition::addTripLimit().
|
||
|
|
//!
|
||
|
|
class ITripLimitLayer : public ILoopBoundaryLayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Get a trip limiter type.
|
||
|
|
//!
|
||
|
|
TripLimit getTripLimit() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getTripLimit();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~ITripLimitLayer() noexcept = default;
|
||
|
|
apiv::VTripLimitLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IIteratorLayer
|
||
|
|
//!
|
||
|
|
//! \brief A layer to do iterations.
|
||
|
|
//!
|
||
|
|
//! The iterator layer iterates over a tensor along the given axis and in the given direction.
|
||
|
|
//! It enables each loop iteration to inspect a different slice of the tensor.
|
||
|
|
//!
|
||
|
|
//! \see ILoop::addIterator()
|
||
|
|
//!
|
||
|
|
class IIteratorLayer : public ILoopBoundaryLayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set axis to iterate over.
|
||
|
|
//!
|
||
|
|
void setAxis(int32_t axis) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setAxis(axis);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get axis being iterated over.
|
||
|
|
//!
|
||
|
|
int32_t getAxis() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAxis();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set iteration order to be reverse.
|
||
|
|
//!
|
||
|
|
//! For reverse=false, the layer is equivalent to addGather(tensor, I, 0) where I is a
|
||
|
|
//! scalar tensor containing the loop iteration number.
|
||
|
|
//! For reverse=true, the layer is equivalent to addGather(tensor, M-1-I, 0) where M is the trip count
|
||
|
|
//! computed from TripLimits of kind kCOUNT.
|
||
|
|
//! The default is reverse=false.
|
||
|
|
//!
|
||
|
|
void setReverse(bool reverse) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setReverse(reverse);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Check if the iteration order is reverse.
|
||
|
|
//!
|
||
|
|
//! \return True if and only if reversing input.
|
||
|
|
//!
|
||
|
|
bool getReverse() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getReverse();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IIteratorLayer() noexcept = default;
|
||
|
|
apiv::VIteratorLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class ILoop
|
||
|
|
//!
|
||
|
|
//! \brief Helper for creating a recurrent subgraph.
|
||
|
|
//!
|
||
|
|
//! An ILoop defines a loop within a network. It supports the implementation of recurrences,
|
||
|
|
//! which are crucial for iterative computations, such as RNNs for natural language processing and
|
||
|
|
//! time-series analysis.
|
||
|
|
//!
|
||
|
|
//! The subgraph may directly use tensors defined outside of the ILoop.
|
||
|
|
class ILoop : public INoCopy
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Create a recurrence layer for this loop with initialValue as its first input.
|
||
|
|
//!
|
||
|
|
//! IRecurrenceLayer requires exactly two inputs. The 2nd input must be added, via method
|
||
|
|
//! IRecurrenceLayer::setInput(1,...) before an Engine can be built.
|
||
|
|
//!
|
||
|
|
IRecurrenceLayer* addRecurrence(ITensor& initialValue) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addRecurrence(initialValue);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a trip-count limiter, based on the given tensor.
|
||
|
|
//!
|
||
|
|
//! There may be at most one kCOUNT and one kWHILE limiter for a loop.
|
||
|
|
//! When both trip limits exist, the loop exits when the
|
||
|
|
//! count is reached or condition is falsified.
|
||
|
|
//! It is an error to not add at least one trip limiter.
|
||
|
|
//!
|
||
|
|
//! For kCOUNT, the input tensor must be available before the loop starts.
|
||
|
|
//!
|
||
|
|
//! For kWHILE, the input tensor must be the output of a subgraph that contains
|
||
|
|
//! only layers that are not ITripLimitLayer, IIteratorLayer or ILoopOutputLayer.
|
||
|
|
//! Any IRecurrenceLayers in the subgraph must belong to the same loop as the
|
||
|
|
//! ITripLimitLayer. A trivial example of this rule is that the input to the kWHILE
|
||
|
|
//! is the output of an IRecurrenceLayer for the same loop.
|
||
|
|
//!
|
||
|
|
ITripLimitLayer* addTripLimit(ITensor& tensor, TripLimit limit) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addTripLimit(tensor, limit);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return layer that subscripts tensor by loop iteration.
|
||
|
|
//!
|
||
|
|
//! For reverse=false, this is equivalent to addGather(tensor, I, 0) where I is a
|
||
|
|
//! scalar tensor containing the loop iteration number.
|
||
|
|
//! For reverse=true, this is equivalent to addGather(tensor, M-1-I, 0) where M is the trip count
|
||
|
|
//! computed from TripLimits of kind kCOUNT.
|
||
|
|
//!
|
||
|
|
IIteratorLayer* addIterator(ITensor& tensor, int32_t axis = 0, bool reverse = false) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addIterator(tensor, axis, reverse);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Make an output for this loop, based on the given tensor.
|
||
|
|
//!
|
||
|
|
//! axis is the axis for concatenation (if using outputKind of kCONCATENATE or kREVERSE).
|
||
|
|
//!
|
||
|
|
//! If outputKind is kCONCATENATE or kREVERSE, a second input specifying the
|
||
|
|
//! concatenation dimension must be added via method ILoopOutputLayer::setInput.
|
||
|
|
//!
|
||
|
|
ILoopOutputLayer* addLoopOutput(ITensor& tensor, LoopOutput outputKind, int32_t axis = 0) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addLoopOutput(tensor, outputKind, axis);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the name of the loop.
|
||
|
|
//!
|
||
|
|
//! The name is used in error diagnostics.
|
||
|
|
//! This method copies the name string.
|
||
|
|
//!
|
||
|
|
//! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
|
||
|
|
//!
|
||
|
|
//! \see getName()
|
||
|
|
//!
|
||
|
|
void setName(char const* name) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setName(name);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return the name of the loop.
|
||
|
|
//!
|
||
|
|
//! \see setName()
|
||
|
|
//!
|
||
|
|
char const* getName() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getName();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~ILoop() noexcept = default;
|
||
|
|
apiv::VLoop* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class ISelectLayer
|
||
|
|
//!
|
||
|
|
//! \brief Select elements from two data tensors based on a condition tensor.
|
||
|
|
//!
|
||
|
|
//! The select layer makes elementwise selections from two data tensors based on a condition tensor,
|
||
|
|
//! behaving similarly to the `numpy.where` function with three parameters.
|
||
|
|
//! The three input tensors must share the same rank. Multidirectional broadcasting is supported.
|
||
|
|
//! The output tensor has the dimensions of the inputs AFTER applying the broadcast rule.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class ISelectLayer : public ILayer
|
||
|
|
{
|
||
|
|
protected:
|
||
|
|
virtual ~ISelectLayer() noexcept = default;
|
||
|
|
apiv::VSelectLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IAssertionLayer
|
||
|
|
//!
|
||
|
|
//! \brief An assertion layer in a network
|
||
|
|
//!
|
||
|
|
//! The layer has a single input and no output. The input must be a boolean shape tensor.
|
||
|
|
//! If any element of the input is provably false at build time, the network is rejected.
|
||
|
|
//! If any element of the input is false at runtime for the supplied runtime dimensions,
|
||
|
|
//! an error occurs, much the same as if any other runtime error (e.g. using IShuffleLayer
|
||
|
|
//! to change the volume of a tensor) is handled.
|
||
|
|
//!
|
||
|
|
//! Asserting equality of input dimensions may help the optimizer.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IAssertionLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the message to print if the assertion fails.
|
||
|
|
//!
|
||
|
|
//! The name is used in error diagnostics.
|
||
|
|
//! This method copies the message string.
|
||
|
|
//!
|
||
|
|
//! \see getMessage()
|
||
|
|
//!
|
||
|
|
void setMessage(char const* message) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setMessage(message);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return the assertion message.
|
||
|
|
//!
|
||
|
|
//! \see setMessage()
|
||
|
|
//!
|
||
|
|
char const* getMessage() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getMessage();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IAssertionLayer() noexcept = default;
|
||
|
|
|
||
|
|
apiv::VAssertionLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum FillOperation
|
||
|
|
//!
|
||
|
|
//! \brief Enumerates the tensor fill operations that may performed by a fill layer.
|
||
|
|
//!
|
||
|
|
//! \see IFillLayer
|
||
|
|
//!
|
||
|
|
enum class FillOperation : int32_t
|
||
|
|
{
|
||
|
|
//! Compute each value via an affine function of its indices.
|
||
|
|
//! For example, suppose the parameters for the IFillLayer are:
|
||
|
|
//!
|
||
|
|
//! * Dimensions = [3,4]
|
||
|
|
//! * Alpha = 1
|
||
|
|
//! * Beta = [100,10]
|
||
|
|
//!
|
||
|
|
//! Element [i,j] of the output is Alpha + Beta[0]*i + Beta[1]*j.
|
||
|
|
//! Thus the output matrix is:
|
||
|
|
//!
|
||
|
|
//! 1 11 21 31
|
||
|
|
//! 101 111 121 131
|
||
|
|
//! 201 211 221 231
|
||
|
|
//!
|
||
|
|
//! A static beta b is implicitly a 1D tensor, i.e. Beta = [b].
|
||
|
|
kLINSPACE = 0,
|
||
|
|
|
||
|
|
//! Randomly draw values from a uniform distribution.
|
||
|
|
kRANDOM_UNIFORM = 1,
|
||
|
|
|
||
|
|
//! Randomly draw values from a normal distribution.
|
||
|
|
kRANDOM_NORMAL = 2
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in FillOperation enum.
|
||
|
|
//!
|
||
|
|
//! \see FillOperation
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
constexpr inline int32_t EnumMax<FillOperation>() noexcept
|
||
|
|
{
|
||
|
|
return 3;
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IFillLayer
|
||
|
|
//!
|
||
|
|
//! \brief Generate a tensor according to a specified mode.
|
||
|
|
//!
|
||
|
|
//! The fill layer generates a tensor with values that are drawn from a random distribution
|
||
|
|
//! or an affine function of their indices, as specified by the FillMode.
|
||
|
|
//!
|
||
|
|
//! When an IFillLayer is initially added to a network, all of its parameters are static.
|
||
|
|
//! Each parameter may be changed to dynamic by setting a corresponding input.
|
||
|
|
//! A parameter is considered dynamic even if that input is the output of an IConstantLayer.
|
||
|
|
//! The inputs for each parameter are:
|
||
|
|
//!
|
||
|
|
//! - 0: Dimensions
|
||
|
|
//! - 1: Alpha
|
||
|
|
//! - 2: Beta
|
||
|
|
//!
|
||
|
|
//! The parameter Dimensions describes the shape of the output. If the Dimensions input is provided,
|
||
|
|
//! it must be a 1D tensor of type Int32 or Int64 whose length is computable by constant folding.
|
||
|
|
//!
|
||
|
|
//! The meanings of Alpha and Beta depend on the mode, as described in IFillLayer::setAlpha(),
|
||
|
|
//! IFillLayer::setBeta(), and IFillLayer::setInput(). Parameters Alpha and Beta must both be static
|
||
|
|
//! or both be dynamic.
|
||
|
|
//!
|
||
|
|
//! An IFillLayer can produce a shape tensor if the following restrictions are met:
|
||
|
|
//!
|
||
|
|
//! * The FillOperation is kLINSPACE.
|
||
|
|
//! * The output has type Int32, Int64, or Float.
|
||
|
|
//! * The volume of the output is within the volume limit imposed on shape tensors.
|
||
|
|
//! * If input 0 exists, the values of input 0 must be computable by constant folding.
|
||
|
|
//!
|
||
|
|
//! \see FillOperation
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IFillLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the output tensor's dimensions.
|
||
|
|
//!
|
||
|
|
//! \param dimensions The output tensor's dimensions.
|
||
|
|
//!
|
||
|
|
//! If the first input had been used to create this layer, that input is reset to null by this method.
|
||
|
|
//!
|
||
|
|
//! \see getDimensions
|
||
|
|
//
|
||
|
|
void setDimensions(Dims const& dimensions) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setDimensions(dimensions);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the output tensor's dimensions.
|
||
|
|
//!
|
||
|
|
//! \return The output tensor's dimensions, or an invalid Dims structure.
|
||
|
|
//!
|
||
|
|
//! If the first input is present and non-null,
|
||
|
|
//! this function returns a Dims with nbDims = -1.
|
||
|
|
//!
|
||
|
|
//! \see setDimensions
|
||
|
|
//!
|
||
|
|
Dims getDimensions() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getDimensions();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the fill operation for the layer.
|
||
|
|
//!
|
||
|
|
//! \see getOperation(), FillOperation
|
||
|
|
//!
|
||
|
|
void setOperation(FillOperation op) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setOperation(op);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the fill operation for the layer.
|
||
|
|
//!
|
||
|
|
//! \see setOperation(), FillOperation
|
||
|
|
//!
|
||
|
|
FillOperation getOperation() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getOperation();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the alpha parameter.
|
||
|
|
//!
|
||
|
|
//! \param alpha has different meanings for each operator:
|
||
|
|
//!
|
||
|
|
//! Operation | Usage
|
||
|
|
//! kLINSPACE | the start value, defaults to 0.0;
|
||
|
|
//! kRANDOM_UNIFORM | the minimum value, defaults to 0.0;
|
||
|
|
//! kRANDOM_NORMAL | the mean of the normal distribution, default is 0.0;
|
||
|
|
//!
|
||
|
|
//! If input 1 exists, it is reset to null by this method.
|
||
|
|
//!
|
||
|
|
//! \see getAlpha, setAlphaInt64
|
||
|
|
//
|
||
|
|
void setAlpha(double alpha) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setAlpha(alpha);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the value of alpha parameter.
|
||
|
|
//!
|
||
|
|
//! \return A double value of alpha.
|
||
|
|
//!
|
||
|
|
//! If the second input is present and non-null,
|
||
|
|
//! this function returns -1.0.
|
||
|
|
//!
|
||
|
|
//! \see setAlpha
|
||
|
|
//!
|
||
|
|
double getAlpha() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAlpha();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the beta parameter.
|
||
|
|
//!
|
||
|
|
//! \param beta has different meanings for each operator:
|
||
|
|
//!
|
||
|
|
//! Operation | Usage
|
||
|
|
//! kLINSPACE | the delta value, defaults to 1.0;
|
||
|
|
//! kRANDOM_UNIFORM | the maximal value, defaults to 1.0;
|
||
|
|
//! kRANDOM_NORMAL | the standard deviation of the normal distribution, default is 1.0;
|
||
|
|
//!
|
||
|
|
//! If input 2 exists, it is reset to null by this method.
|
||
|
|
//!
|
||
|
|
//! \see getBeta
|
||
|
|
//!
|
||
|
|
void setBeta(double beta) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setBeta(beta);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the value of beta parameter.
|
||
|
|
//!
|
||
|
|
//! \return A double value of beta.
|
||
|
|
//!
|
||
|
|
//! If the third input is present and non-null,
|
||
|
|
//! this function returns -1.0.
|
||
|
|
//!
|
||
|
|
//! \see setBeta, setBetaInt64
|
||
|
|
//!
|
||
|
|
double getBeta() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getBeta();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Replace an input of this layer with a specific tensor.
|
||
|
|
//!
|
||
|
|
//! \param index the index of the input to set.
|
||
|
|
//! \param tensor the new input tensor
|
||
|
|
//!
|
||
|
|
//! The three inputs correspond to these setters of IFillLayer:
|
||
|
|
//!
|
||
|
|
//! - 0: setDimensions
|
||
|
|
//! - 1: setAlpha
|
||
|
|
//! - 2: setBeta
|
||
|
|
//!
|
||
|
|
//! The following descriptions give more intuitive names for the inputs.
|
||
|
|
//!
|
||
|
|
//! Indices for kLINSPACE are:
|
||
|
|
//!
|
||
|
|
//! - 0: Shape, a 1D shape tensor, specifies the output tensor's dimensions.
|
||
|
|
//! - 1: Start, a scalar, specifies the start value.
|
||
|
|
//! - 2: Delta, a 1D tensor, specifies the delta value for each dimension.
|
||
|
|
//!
|
||
|
|
//! Indices for kRANDOM_UNIFORM are:
|
||
|
|
//!
|
||
|
|
//! - 0: Shape, a 1D shape tensor, specifies the output tensor's dimensions.
|
||
|
|
//! - 1: Minimum, a scalar, specifies the minimum random value.
|
||
|
|
//! - 2: Maximum, a scalar, specifies the maximal random value.
|
||
|
|
//!
|
||
|
|
//! Indices for kRANDOM_NORMAL are:
|
||
|
|
//!
|
||
|
|
//! - 0: Shape, a 1D shape tensor, specifies the output tensor's dimensions.
|
||
|
|
//! - 1: Mean, a scalar, specifies the mean of the normal distribution,.
|
||
|
|
//! - 2: Scale, a scalar, specifies the standard deviation of the normal distribution.
|
||
|
|
//!
|
||
|
|
//! Using the corresponding setter resets the input to null.
|
||
|
|
//!
|
||
|
|
//! If either inputs 1 or 2 is non-null, then both must be non-null and have the same data type.
|
||
|
|
//!
|
||
|
|
//! If this function is called for an index greater or equal to getNbInputs(),
|
||
|
|
//! then afterwards getNbInputs() returns index + 1, and any missing intervening
|
||
|
|
//! inputs are set to null.
|
||
|
|
//!
|
||
|
|
using ILayer::setInput;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the alpha parameter with int64 datatype.
|
||
|
|
//!
|
||
|
|
//! \param alpha has different meanings for each operator:
|
||
|
|
//!
|
||
|
|
//! Operation | Usage
|
||
|
|
//! kLINSPACE | the start value, defaults to 0;
|
||
|
|
//! kRANDOM_UNIFORM | the minimum value, defaults to 0;
|
||
|
|
//! kRANDOM_NORMAL | the mean of the normal distribution, default is 0;
|
||
|
|
//!
|
||
|
|
//! If a third input had been used to create this layer, that input is reset to null by this method.
|
||
|
|
//!
|
||
|
|
//! \see getAlphaInt64
|
||
|
|
//
|
||
|
|
void setAlphaInt64(int64_t alpha) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setAlphaInt64(alpha);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the value of alpha parameter with int64 datatype.
|
||
|
|
//!
|
||
|
|
//! \return A int64 value of alpha.
|
||
|
|
//!
|
||
|
|
//! If the second input is present and non-null,
|
||
|
|
//! this function returns -1.
|
||
|
|
//!
|
||
|
|
//! \see setAlphaInt64
|
||
|
|
//!
|
||
|
|
int64_t getAlphaInt64() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAlphaInt64();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the beta parameter with int64 datatype.
|
||
|
|
//!
|
||
|
|
//! \param beta has different meanings for each operator:
|
||
|
|
//!
|
||
|
|
//! Operation | Usage
|
||
|
|
//! kLINSPACE | the delta value, defaults to 1;
|
||
|
|
//! kRANDOM_UNIFORM | the maximal value, defaults to 1;
|
||
|
|
//! kRANDOM_NORMAL | the standard deviation of the normal distribution, default is 1;
|
||
|
|
//!
|
||
|
|
//! If a third input had been used to create this layer, that input is reset to null by this method.
|
||
|
|
//!
|
||
|
|
//! \see getBetaInt64
|
||
|
|
//!
|
||
|
|
void setBetaInt64(int64_t beta) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setBetaInt64(beta);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the value of beta parameter with int64 datatype.
|
||
|
|
//!
|
||
|
|
//! \return A int64 value of beta.
|
||
|
|
//!
|
||
|
|
//! If the third input is present and non-null,
|
||
|
|
//! this function returns -1.0.
|
||
|
|
//!
|
||
|
|
//! \see setBetaInt64
|
||
|
|
//!
|
||
|
|
int64_t getBetaInt64() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getBetaInt64();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return true if alpha/beta have type int64, false if they have type double.
|
||
|
|
//!
|
||
|
|
bool isAlphaBetaInt64() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->isAlphaBetaInt64();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the fill layer output type.
|
||
|
|
//!
|
||
|
|
//! \param toType The DataType of the output tensor.
|
||
|
|
//!
|
||
|
|
//! Set the output type of the fill layer. Valid values are DataType::kFLOAT, DataType::kINT32,
|
||
|
|
//! and DataType::kINT64.
|
||
|
|
//! If the network is strongly typed, setToType must be used to set the output type, and use of setOutputType
|
||
|
|
//! is an error. Otherwise, types passed to setOutputType and setToType must be the same.
|
||
|
|
//!
|
||
|
|
//! \see NetworkDefinitionCreationFlag::kSTRONGLY_TYPED
|
||
|
|
//!
|
||
|
|
void setToType(DataType toType) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setToType(toType);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the fill layer output type.
|
||
|
|
//!
|
||
|
|
//! \return toType parameter set during layer creation or by setToType().
|
||
|
|
//! The return value is the output type of the fill layer.
|
||
|
|
//! The default value is DataType::kFLOAT.
|
||
|
|
//!
|
||
|
|
DataType getToType() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getToType();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IFillLayer() noexcept = default;
|
||
|
|
apiv::VFillLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IQuantizeLayer
|
||
|
|
//!
|
||
|
|
//! \brief A Quantize layer in a network definition.
|
||
|
|
//!
|
||
|
|
//! This layer accepts a floating-point data input tensor, and uses the scale and zeroPt inputs to
|
||
|
|
//! quantize the data according to:
|
||
|
|
//! \p output = clamp(round(\p input / \p scale) + \p zeroPt)
|
||
|
|
//!
|
||
|
|
//! Rounding type is rounding-to-nearest ties-to-even (https://en.wikipedia.org/wiki/Rounding#Round_half_to_even).
|
||
|
|
//! Clamping range according to data type:
|
||
|
|
//! - FP8: [-448, 448]
|
||
|
|
//! - INT4: [-8, 7]
|
||
|
|
//! - INT8: [-128, 127]
|
||
|
|
//!
|
||
|
|
//! The first input (index 0) is the tensor to be quantized.
|
||
|
|
//! The second (index 1) and third (index 2) are the scale and zero point respectively.
|
||
|
|
//! \p scale and \p zeroPt should have identical dimensions, and rank lower or equal to 2.
|
||
|
|
//!
|
||
|
|
//! The \p zeroPt tensor is optional, and if not set, will be assumed to be zero. Its data type must match the
|
||
|
|
//! output data type. \p zeroPt must only contain zero-valued coefficients, because only symmetric quantization is
|
||
|
|
//! supported.
|
||
|
|
//! The \p scale value must be a scalar for per-tensor quantization, a 1D tensor for per-channel quantization, or the
|
||
|
|
//! same rank as the input tensor for block quantization. All \p scale coefficients must have strictly positive values.
|
||
|
|
//! The size of the 1D \p scale tensor must match the size of the quantization axis. For block quantization, the shape
|
||
|
|
//! of \p scale tensor must match the shape of the input, except for the blocking dimension (the last or second to last
|
||
|
|
//! dimension). The size of \p zeroPt must match the size of \p scale.
|
||
|
|
//!
|
||
|
|
//! The subgraph which terminates with the \p zeroPt tensor must be a build-time constant containing only zeros.
|
||
|
|
//! The output type, if constrained, must be constrained to DataType::kINT8, DataType::kFP8, DataType::kINT4 or
|
||
|
|
//! DataType::kFP4. The input type, if constrained, must be constrained to DataType::kFLOAT, DataType::kHALF, or
|
||
|
|
//! DataType::kBF16. The output size is the same as the input size. The quantization axis is in reference to the input
|
||
|
|
//! tensor's dimensions.
|
||
|
|
//!
|
||
|
|
//! IQuantizeLayer supports DataType::kFLOAT, DataType::kHALF, or DataType::kBF16 precision and will default to
|
||
|
|
//! DataType::kFLOAT precision during instantiation. For strongly typed networks, if the scale data type is
|
||
|
|
//! DataType::kHALF or DataType::kBF16, it must match the input data type. For MXFP8 quantization, the \p scale
|
||
|
|
//! data type must be DataType::kE8M0.
|
||
|
|
//!
|
||
|
|
//! IQuantizeLayer supports DataType::kINT8, DataType::kFP8, DataType::kINT4 or DataType::kFP4 output.
|
||
|
|
//!
|
||
|
|
//! As an example of the operation of this layer, imagine a 4D NCHW activation input which can be quantized using a
|
||
|
|
//! single scale coefficient (referred to as per-tensor quantization):
|
||
|
|
//! For each n in N:
|
||
|
|
//! For each c in C:
|
||
|
|
//! For each h in H:
|
||
|
|
//! For each w in W:
|
||
|
|
//! output[n,c,h,w] = clamp(round(\p input[n,c,h,w] / \p scale) + \p zeroPt)
|
||
|
|
//!
|
||
|
|
//! Per-channel quantization is supported only for weight inputs. Thus, Activations cannot be quantized per-channel.
|
||
|
|
//! As an example of per-channel operation, imagine a 4D KCRS weights input and K (dimension 0) as the quantization
|
||
|
|
//! axis. The scale is an array of coefficients, and must have the same size as the quantization axis.
|
||
|
|
//! For each k in K:
|
||
|
|
//! For each c in C:
|
||
|
|
//! For each r in R:
|
||
|
|
//! For each s in S:
|
||
|
|
//! output[k,c,r,s] = clamp(round(\p input[k,c,r,s] / \p scale[k]) + \p zeroPt[k])
|
||
|
|
//!
|
||
|
|
//! Block quantization is supported for input types DataType::kFP4, DataType::kFP8 and DataType::kINT4.
|
||
|
|
//! As an example of blocked operation, imagine a 2D RS input with R (dimension 0) as the blocking axis and B as the
|
||
|
|
//! block size. The scale is a 2D array of coefficients, with dimensions (R//B, S).
|
||
|
|
//! For each r in R:
|
||
|
|
//! For each s in S:
|
||
|
|
//! output[r,s] = clamp(round(\p input[r,s] / \p scale[r//B, s]) + \p zeroPt[r//B, s])
|
||
|
|
//!
|
||
|
|
//! \note Only symmetric quantization is supported.
|
||
|
|
//! \note Currently the only allowed build-time constant \p zeroPt subgraphs are:
|
||
|
|
//! 1. Constant -> Quantize
|
||
|
|
//! 2. Constant -> Cast -> Quantize
|
||
|
|
//!
|
||
|
|
//! \note The input tensor for this layer must not be a scalar.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IQuantizeLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Get the quantization axis.
|
||
|
|
//!
|
||
|
|
//! \return axis parameter set by setAxis().
|
||
|
|
//! The return value is the index of the quantization axis in the input tensor's dimensions.
|
||
|
|
//! A value of -1 indicates per-tensor quantization.
|
||
|
|
//! The default value is -1.
|
||
|
|
//!
|
||
|
|
int32_t getAxis() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAxis();
|
||
|
|
}
|
||
|
|
//!
|
||
|
|
//! \brief Set the quantization axis.
|
||
|
|
//!
|
||
|
|
//! Set the index of the quantization axis (with reference to the input tensor's dimensions).
|
||
|
|
//! The axis must be a valid axis if the scale tensor has more than one coefficient.
|
||
|
|
//! The axis value is used only for per-axis (per-channel) quantization.
|
||
|
|
//!
|
||
|
|
void setAxis(int32_t axis) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setAxis(axis);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the shape of the quantization block.
|
||
|
|
//!
|
||
|
|
//! \see getBlockShape()
|
||
|
|
//! Allowed values are positive values and -1 which denotes a fully blocked dimension.
|
||
|
|
//! Returns true if the block shape was set successfully, false if the block shape is invalid.
|
||
|
|
//! The default value is empty Dims.
|
||
|
|
//!
|
||
|
|
bool setBlockShape(Dims const& blockShape) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setBlockShape(blockShape);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the shape of the quantization block.
|
||
|
|
//!
|
||
|
|
//! The default value is empty Dims.
|
||
|
|
//! \see setBlockShape()
|
||
|
|
//!
|
||
|
|
TRT_NODISCARD Dims getBlockShape() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getBlockShape();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the Quantize layer output type.
|
||
|
|
//!
|
||
|
|
//! \param toType The DataType of the output tensor.
|
||
|
|
//!
|
||
|
|
//! Set the output type of the quantize layer. Valid values are DataType::kINT8, DataType::kFP8, DataType::kINT4 and
|
||
|
|
//! DataType::kFP4. If the network is strongly typed, setToType must be used to set the output type, and use of
|
||
|
|
//! setOutputType is an error. Otherwise, types passed to setOutputType and setToType must be the same.
|
||
|
|
//!
|
||
|
|
//! \see NetworkDefinitionCreationFlag::kSTRONGLY_TYPED
|
||
|
|
//!
|
||
|
|
void setToType(DataType toType) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setToType(toType);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return the Quantize layer output type.
|
||
|
|
//!
|
||
|
|
//! \return toType parameter set during layer creation or by setToType().
|
||
|
|
//! The return value is the output type of the quantize layer.
|
||
|
|
//! The default value is DataType::kINT8.
|
||
|
|
//!
|
||
|
|
DataType getToType() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getToType();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IQuantizeLayer() noexcept = default;
|
||
|
|
apiv::VQuantizeLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IDequantizeLayer
|
||
|
|
//!
|
||
|
|
//! \brief A Dequantize layer in a network definition.
|
||
|
|
//!
|
||
|
|
//! This layer accepts a quantized type input tensor, and uses the configured scale and zeroPt inputs to
|
||
|
|
//! dequantize the input according to:
|
||
|
|
//! \p output = (\p input - \p zeroPt) * \p scale
|
||
|
|
//!
|
||
|
|
//! The first input (index 0) is the tensor to be dequantized.
|
||
|
|
//! The second (index 1) and third (index 2) are the scale and zero point respectively.
|
||
|
|
//! \p scale and \p zeroPt should have identical dimensions, and a rank that is lower or equal to 2.
|
||
|
|
//!
|
||
|
|
//! The \p zeroPt tensor is optional, and if not set, will be assumed to be zero. Its data type must be identical to
|
||
|
|
//! the input's data type. \p zeroPt must only contain zero-valued coefficients, because only symmetric quantization is
|
||
|
|
//! supported.
|
||
|
|
//! The \p scale value must be a scalar for per-tensor quantization, a 1D tensor for per-channel quantization, or the
|
||
|
|
//! same rank as the input tensor for block quantization. All \p scale coefficients must have strictly positive values.
|
||
|
|
//! The size of the 1D \p scale tensor must match the size of the quantization axis. For block quantization, the shape
|
||
|
|
//! of \p scale tensor must match the shape of the input, except for one dimension (the last or second to last
|
||
|
|
//! dimension) in which blocking occurs. The size of \p zeroPt must match the size of \p scale.
|
||
|
|
//!
|
||
|
|
//! The subgraph which terminates with the \p zeroPt tensor must be a build-time constant containing only zeros.
|
||
|
|
//! The output type, if constrained, must be constrained to DataType::kFLOAT, DataType::kHALF, or DataType::kBF16. The
|
||
|
|
//! input type, if constrained, must be constrained to DataType::kINT8, DataType::kFP8, DataType::kINT4 or
|
||
|
|
//! DataType::kFP4. The output size is the same as the input size. The quantization axis is in reference to the input
|
||
|
|
//! tensor's dimensions.
|
||
|
|
//!
|
||
|
|
//! IDequantizeLayer supports DataType::kINT8 (default), DataType::kFP8, DataType::kINT4 or DataType::kFP4. For strongly
|
||
|
|
//! typed networks, \p input data type must be the same as \p zeroPt data type.
|
||
|
|
//!
|
||
|
|
//! IDequantizeLayer supports DataType::kFLOAT, DataType::kHALF, or DataType::kBF16 output. The output data type must
|
||
|
|
//! be configured explicitly using \p setToType.
|
||
|
|
//!
|
||
|
|
//! As an example of the operation of this layer, imagine a 4D NCHW activation input which can be quantized using a
|
||
|
|
//! single scale coefficient (referred to as per-tensor quantization):
|
||
|
|
//! For each n in N:
|
||
|
|
//! For each c in C:
|
||
|
|
//! For each h in H:
|
||
|
|
//! For each w in W:
|
||
|
|
//! output[n,c,h,w] = (\p input[n,c,h,w] - \p zeroPt) * \p scale
|
||
|
|
//!
|
||
|
|
//! Per-channel dequantization is supported only for input that is rooted at an IConstantLayer (i.e. weights).
|
||
|
|
//! Activations cannot be quantized per-channel. As an example of per-channel operation, imagine a 4D KCRS weights input
|
||
|
|
//! and K (dimension 0) as the quantization axis. The scale is an array of coefficients, which is the same size as the
|
||
|
|
//! quantization axis.
|
||
|
|
//! For each k in K:
|
||
|
|
//! For each c in C:
|
||
|
|
//! For each r in R:
|
||
|
|
//! For each s in S:
|
||
|
|
//! output[k,c,r,s] = (\p input[k,c,r,s] - \p zeroPt[k]) * \p scale[k]
|
||
|
|
//!
|
||
|
|
//! Block dequantization is supported for input types DataType::kFP4, DataType::kFP8 and DataType::kINT4.
|
||
|
|
//! As an example of blocked operation, imagine a 2D RS input with R (dimension 0) as the blocking axis and B as the
|
||
|
|
//! block size. The scale is a 2D array of coefficients, with dimensions (R//B, S).
|
||
|
|
//! For each r in R:
|
||
|
|
//! For each s in S:
|
||
|
|
//! output[r,s] = (\p input[r,s] - \p zeroPt[r//B, s]) * \p scale[r//B, s]
|
||
|
|
//!
|
||
|
|
//! \note Only symmetric quantization is supported.
|
||
|
|
//! \note Currently the only allowed build-time constant \p zeroPt subgraphs are:
|
||
|
|
//! 1. Constant -> Quantize
|
||
|
|
//! 2. Constant -> Cast -> Quantize
|
||
|
|
//!
|
||
|
|
//! \note The input tensor for this layer must not be a scalar.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IDequantizeLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Get the quantization axis.
|
||
|
|
//!
|
||
|
|
//! \return axis parameter set by setAxis().
|
||
|
|
//! The return value is the index of the quantization axis in the input tensor's dimensions.
|
||
|
|
//! A value of -1 indicates per-tensor quantization.
|
||
|
|
//! The default value is -1.
|
||
|
|
//!
|
||
|
|
int32_t getAxis() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAxis();
|
||
|
|
}
|
||
|
|
//!
|
||
|
|
//! \brief Set the quantization axis.
|
||
|
|
//!
|
||
|
|
//! Set the index of the quantization axis (with reference to the input tensor's dimensions).
|
||
|
|
//! The axis must be a valid axis if the scale tensor has more than one coefficient.
|
||
|
|
//! The axis value will be ignored if the scale tensor has exactly one coefficient (per-tensor quantization).
|
||
|
|
//!
|
||
|
|
void setAxis(int32_t axis) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setAxis(axis);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the shape of the quantization block.
|
||
|
|
//!
|
||
|
|
//! \param blockShape The shape of the quantization block.
|
||
|
|
//!
|
||
|
|
//! Set the shape of the quantization block.
|
||
|
|
//! Allowed values are positive values and -1 which denotes a fully blocked dimension.
|
||
|
|
//! Returns true if the block shape was set successfully, false if the block shape is invalid.
|
||
|
|
//! The default value is empty Dims.
|
||
|
|
//!
|
||
|
|
//! \see getBlockShape()
|
||
|
|
//!
|
||
|
|
bool setBlockShape(Dims const& blockShape) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setBlockShape(blockShape);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the shape of the quantization block.
|
||
|
|
//!
|
||
|
|
//! The default value is empty Dims.
|
||
|
|
//! \see setBlockShape()
|
||
|
|
//!
|
||
|
|
TRT_NODISCARD Dims getBlockShape() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getBlockShape();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the Dequantize layer output type.
|
||
|
|
//!
|
||
|
|
//! \param toType The DataType of the output tensor.
|
||
|
|
//!
|
||
|
|
//! Set the output type of the dequantize layer. Valid values are DataType::kFLOAT, DataType::kHALF and
|
||
|
|
//! DataType::kBF16. If the network is strongly typed, setToType must be used to set the output type, and use of
|
||
|
|
//! setOutputType is an error. Otherwise, types passed to setOutputType and setToType must be the same.
|
||
|
|
//!
|
||
|
|
//! \see NetworkDefinitionCreationFlag::kSTRONGLY_TYPED
|
||
|
|
//!
|
||
|
|
void setToType(DataType toType) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setToType(toType);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return the Dequantize layer output type.
|
||
|
|
//!
|
||
|
|
//! \return toType parameter set during layer creation or by setToType().
|
||
|
|
//! The return value is the output type of the quantize layer.
|
||
|
|
//! The default value is DataType::kFLOAT.
|
||
|
|
//!
|
||
|
|
DataType getToType() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getToType();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IDequantizeLayer() noexcept = default;
|
||
|
|
apiv::VDequantizeLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IDynamicQuantizeLayer
|
||
|
|
//!
|
||
|
|
//! \brief A network layer to perform dynamic quantization.
|
||
|
|
//!
|
||
|
|
//! This layer accepts a floating-point input tensor and computes the block scale factors needed to
|
||
|
|
//! quantize the input's data. It outputs the quantized tensor as its first output and
|
||
|
|
//! the scale factors as its second output.
|
||
|
|
//!
|
||
|
|
//! Use ILayer::setInput to add an input for the double-quantization scale factor.
|
||
|
|
//!
|
||
|
|
//! \note Only symmetric quantization is supported.
|
||
|
|
//! \note The input tensor for this layer must not be a scalar.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the
|
||
|
|
//! API and ABI.
|
||
|
|
//!
|
||
|
|
class IDynamicQuantizeLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Append or replace an input of this layer with a specific tensor
|
||
|
|
//!
|
||
|
|
//! \param index the index of the input to modify.
|
||
|
|
//! \param tensor the new input tensor
|
||
|
|
//!
|
||
|
|
//! Input 0 is the input activation tensor.
|
||
|
|
//! Input 1 is the double-quantization scale factor. This scale is used to quantize the
|
||
|
|
//! dynamically computed high-precision scale factors that are used to quantize the
|
||
|
|
//! activation data. Currently this input must be a positive scalar (a 0D tensor).
|
||
|
|
//!
|
||
|
|
using ILayer::setInput;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set DynamicQuantizeLayer's quantized output type.
|
||
|
|
//!
|
||
|
|
//! \param toType The data type of the quantized output tensor.
|
||
|
|
//!
|
||
|
|
//! Set the type of the dynamic quantization layer's quantized output.If the network is strongly typed, setToType
|
||
|
|
//! must be used to set the output type, and use of setOutputType is an error. Otherwise, types passed to
|
||
|
|
//! setOutputType and setToType must be the same.
|
||
|
|
//! Valid values for \p toType are DataType::kFP4 (NVFP4 quantization) and DataType::kFP8 (MXFP8 quantization).
|
||
|
|
//!
|
||
|
|
//! \see NetworkDefinitionCreationFlag::kSTRONGLY_TYPED
|
||
|
|
//!
|
||
|
|
void setToType(DataType toType) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setToType(toType);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return DynamicQuantizeLayer's quantized output type.
|
||
|
|
//!
|
||
|
|
//! \return toType parameter set during layer creation or by setToType().
|
||
|
|
//!
|
||
|
|
//! The return value is the type of the quantized output tensor.
|
||
|
|
//! The default value is DataType::kFP4.
|
||
|
|
//!
|
||
|
|
DataType getToType() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getToType();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the data type of the scale factors used to quantize the data.
|
||
|
|
//!
|
||
|
|
//! \param scaleType The scale factors data type.
|
||
|
|
//!
|
||
|
|
//! Set the scale-factors type.
|
||
|
|
//! Valid values are DataType::kFP8, DataType::kE8M0 or DataType::kFLOAT.
|
||
|
|
//!
|
||
|
|
void setScaleType(DataType scaleType) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setScaleType(scaleType);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return the scale factors data type.
|
||
|
|
//!
|
||
|
|
//! \return scaleType parameter set during layer creation or by setScaleType().
|
||
|
|
//!
|
||
|
|
//! The return value is the type of the scale factors used to quantize the dynamic data.
|
||
|
|
//! The default value is DataType::kFP8.
|
||
|
|
//!
|
||
|
|
DataType getScaleType() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getScaleType();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the axis along which block quantization occurs.
|
||
|
|
//!
|
||
|
|
//! The axis must be the last dimension or second to last dimension.
|
||
|
|
//! The input's shape along the axis must be constant.
|
||
|
|
//!
|
||
|
|
//! \see getAxis()
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED void setAxis(int32_t axis) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setAxis(axis);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the axis along which blocking occurs.
|
||
|
|
//!
|
||
|
|
//! \see setAxis()
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED int32_t getAxis() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAxis();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the size of the quantization block.
|
||
|
|
//!
|
||
|
|
//! Note: The block size must divide the input in the blocked axis without remainder.
|
||
|
|
//! Valid values are 16 (NVFP4 quantization) and 32 (MXFP8 quantization).
|
||
|
|
//!
|
||
|
|
//! \see getBlockSize()
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED void setBlockSize(int32_t size) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setBlockSize(size);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the size of the quantization block.
|
||
|
|
//!
|
||
|
|
//! \see setBlockSize()
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED int32_t getBlockSize() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getBlockSize();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the shape of the quantization block.
|
||
|
|
//!
|
||
|
|
//! Note: The block shape rank must match the input rank.
|
||
|
|
//! The default value is empty Dims.
|
||
|
|
//!
|
||
|
|
//! \see getBlockShape()
|
||
|
|
//!
|
||
|
|
void setBlockShape(Dims const& blockShape) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setBlockShape(blockShape);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the shape of the quantization block.
|
||
|
|
//!
|
||
|
|
//! The default value is empty Dims.
|
||
|
|
//!
|
||
|
|
//! \see setBlockShape()
|
||
|
|
//!
|
||
|
|
Dims getBlockShape() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getBlockShape();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IDynamicQuantizeLayer() noexcept = default;
|
||
|
|
apiv::VDynamicQuantizeLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IEinsumLayer
|
||
|
|
//!
|
||
|
|
//! \brief An Einsum layer in a network
|
||
|
|
//!
|
||
|
|
//! This layer implements a summation over the elements of the inputs along dimensions specified by the equation
|
||
|
|
//! parameter, based on the Einstein summation convention.
|
||
|
|
//! The layer can have one or more inputs of rank >= 0. All the inputs must have type DataType::kFLOAT
|
||
|
|
//! or DataType::kHALF, not necessarily the same. There is one output of type DataType::kFLOAT.
|
||
|
|
//! The shape of the output tensor is determined by the equation.
|
||
|
|
//!
|
||
|
|
//! The equation specifies ASCII lower-case letters for each dimension in the inputs in the same order as the
|
||
|
|
//! dimensions, separated by comma for each input. The dimensions labeled with the same subscript must match or be
|
||
|
|
//! broadcastable. Repeated subscript labels in one input take the diagonal. Repeating a label across multiple inputs
|
||
|
|
//! means that those axes will be multiplied. Omitting a label from the output means values along those axes will be
|
||
|
|
//! summed. In implicit mode, the indices which appear once in the expression will be part of the output in increasing
|
||
|
|
//! alphabetical order. In explicit mode, the output can be controlled by specifying output subscript labels by adding
|
||
|
|
//! an arrow ('->') followed by subscripts for the output.
|
||
|
|
//! For example, "ij,jk->ik" is equivalent to "ij,jk".
|
||
|
|
//! Ellipsis ('...') can be used in place of subscripts to broadcast the dimensions.
|
||
|
|
//! See the TensorRT Developer Guide for more details on equation syntax.
|
||
|
|
//!
|
||
|
|
//! Many common operations can be expressed using the Einsum equation.
|
||
|
|
//! For example:
|
||
|
|
//! Matrix Transpose: ij->ji
|
||
|
|
//! Sum: ij->
|
||
|
|
//! Matrix-Matrix Multiplication: ik,kj->ij
|
||
|
|
//! Dot Product: i,i->
|
||
|
|
//! Matrix-Vector Multiplication: ik,k->i
|
||
|
|
//! Batch Matrix Multiplication: ijk,ikl->ijl
|
||
|
|
//! Batch Diagonal: ...ii->...i
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IEinsumLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the equation.
|
||
|
|
//! The equation is a comma-separated list of subscript labels, where each label refers to a
|
||
|
|
//! dimension of the corresponding tensor.
|
||
|
|
//!
|
||
|
|
//! \return true if the equation was syntactically valid and set successfully, false otherwise.
|
||
|
|
//!
|
||
|
|
//! \see setEquation()
|
||
|
|
//!
|
||
|
|
bool setEquation(char const* equation) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setEquation(equation);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return the equation.
|
||
|
|
//!
|
||
|
|
//! \see setEquation()
|
||
|
|
//!
|
||
|
|
char const* getEquation() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getEquation();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IEinsumLayer() noexcept = default;
|
||
|
|
apiv::VEinsumLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum ScatterMode
|
||
|
|
//!
|
||
|
|
//! \brief Control form of IScatterLayer
|
||
|
|
//!
|
||
|
|
//! \see IScatterLayer
|
||
|
|
//!
|
||
|
|
enum class ScatterMode : int32_t
|
||
|
|
{
|
||
|
|
kELEMENT = 0, //!< Similar to ONNX ScatterElements
|
||
|
|
kND = 1, //!< Similar to ONNX ScatterND
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in ScatterMode enum.
|
||
|
|
//!
|
||
|
|
//! \see ScatterMode
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
constexpr inline int32_t EnumMax<ScatterMode>() noexcept
|
||
|
|
{
|
||
|
|
return 2;
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IScatterLayer
|
||
|
|
//!
|
||
|
|
//! \brief A scatter layer in a network definition. Supports several kinds of scattering.
|
||
|
|
//!
|
||
|
|
//! The Scatter layer has three input tensors: Data, Indices, and Updates, one output tensor
|
||
|
|
//! Output, and a scatter mode. When kELEMENT mode is used an optional axis parameter is available.
|
||
|
|
//! * Data is a tensor of rank r >= 1 that stores the values to be duplicated in Output.
|
||
|
|
//! * Indices is a tensor of rank q that determines which locations in Output to write new
|
||
|
|
//! values to. Constraints on the rank q depend on the mode:
|
||
|
|
//! ScatterMode::kND: q >= 1
|
||
|
|
//! ScatterMode::kELEMENT: q must be the same as r
|
||
|
|
//! * Updates is a tensor of rank s >= 1 that provides the data
|
||
|
|
//! to write to Output specified by its corresponding location in Indices.
|
||
|
|
//! Constraints on the rank of Updates depend on the mode:
|
||
|
|
//! ScatterMode::kND: s = r + q - shape(Indices)[-1] - 1
|
||
|
|
//! Scattermode::kELEMENT: s = q = r
|
||
|
|
//! * Output is a tensor with the same dimensions as Data that stores the resulting values of the
|
||
|
|
//! transformation. It must not be a shape tensor.
|
||
|
|
//! The types of Data, Update, and Output shall be the same, and Indices shall be of type DataType::kINT32 or
|
||
|
|
//! DataType::kINT64.
|
||
|
|
//!
|
||
|
|
//! The output is computed by copying the data, and then updating elements of it based on indices.
|
||
|
|
//! How Indices are interpreted depends upon the ScatterMode.
|
||
|
|
//!
|
||
|
|
//! ScatterMode::kND
|
||
|
|
//!
|
||
|
|
//! The indices are interpreted as a tensor of rank q-1 of indexing tuples.
|
||
|
|
//! The axis parameter is ignored.
|
||
|
|
//!
|
||
|
|
//! Given that data dims are {d_0,...,d_{r-1}} and indices dims are {i_0,...,i_{q-1}},
|
||
|
|
//! define k = indices[q-1], it follows that updates dims are {i_0,...,i_{q-2},d_k,...,d_{r-1}}
|
||
|
|
//! The updating can be computed by:
|
||
|
|
//! foreach slice in indices[i_0,...,i_{q-2}]
|
||
|
|
//! output[indices[slice]] = updates[slice]
|
||
|
|
//!
|
||
|
|
//! ScatterMode::kELEMENT
|
||
|
|
//!
|
||
|
|
//! Here "axis" denotes the result of getAxis().
|
||
|
|
//!
|
||
|
|
//! For each element X of indices:
|
||
|
|
//! Let J denote a sequence for the subscripts of X
|
||
|
|
//! Let K = sequence J with element [axis] replaced by X
|
||
|
|
//! output[K] = updates[J]
|
||
|
|
//!
|
||
|
|
//! For example, if indices has dimensions [N,C,H,W] and axis is 2, then the updates happen as:
|
||
|
|
//!
|
||
|
|
//! for n in [0,n)
|
||
|
|
//! for c in [0,n)
|
||
|
|
//! for h in [0,n)
|
||
|
|
//! for w in [0,n)
|
||
|
|
//! output[n,c,indices[n,c,h,w],w] = updates[n,c,h,w]
|
||
|
|
//!
|
||
|
|
//! Writes to the same output element cause undefined behavior.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IScatterLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the scatter mode.
|
||
|
|
//!
|
||
|
|
//! \see getMode()
|
||
|
|
//!
|
||
|
|
void setMode(ScatterMode mode) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setMode(mode);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the scatter mode.
|
||
|
|
//!
|
||
|
|
//! \see setMode()
|
||
|
|
//!
|
||
|
|
ScatterMode getMode() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getMode();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the axis used by ScatterMode::kELEMENTS.
|
||
|
|
//!
|
||
|
|
//! The axis defaults to 0.
|
||
|
|
//!
|
||
|
|
void setAxis(int32_t axis) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setAxis(axis);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the axis.
|
||
|
|
//!
|
||
|
|
int32_t getAxis() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAxis();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VScatterLayer* mImpl;
|
||
|
|
virtual ~IScatterLayer() noexcept = default;
|
||
|
|
}; // class IScatterLayer
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IOneHotLayer
|
||
|
|
//!
|
||
|
|
//! \brief A OneHot layer in a network definition.
|
||
|
|
//!
|
||
|
|
//! The OneHot layer has three input tensors: Indices, Values, and Depth, one output tensor:
|
||
|
|
//! Output, and an axis attribute.
|
||
|
|
//! * Indices is an Int32 tensor that determines which locations in Output to set as on_value.
|
||
|
|
//! * Values is a two-element (rank=1) tensor that consists of [off_value, on_value]
|
||
|
|
//! * Depth is an 0D tensor of type Int32 or Int64, which contains the depth (number of classes) of the one-hot encoding.
|
||
|
|
//! The depth tensor must be a positive build-time constant.
|
||
|
|
//! * Output is a tensor with rank = rank(indices)+1, where the added dimension contains the one-hot encoding.
|
||
|
|
//! The data types of Output is equal to the Values data type.
|
||
|
|
//! * Axis is a scalar specifying to which dimension of the output one-hot encoding is added.
|
||
|
|
//! Valid range for axis is -rank(indices)-1 <= axis <= rank(indices).
|
||
|
|
//!
|
||
|
|
//! The output is computed by copying off_values to all output elements, then setting on_value on the indices
|
||
|
|
//! specified by the indices tensor.
|
||
|
|
//! when axis = 0:
|
||
|
|
//! output[indices[i, j, k], i, j, k] = on_value for all i, j, k and off_value otherwise.
|
||
|
|
//!
|
||
|
|
//! when axis = -1:
|
||
|
|
//! output[i, j, k, indices[i, j, k]] = on_value for all i, j, k and off_value otherwise.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IOneHotLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the axis parameter.
|
||
|
|
//!
|
||
|
|
//! \see IOneHotLayer
|
||
|
|
//!
|
||
|
|
void setAxis(int32_t axis) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setAxis(axis);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the value of the axis parameter.
|
||
|
|
//!
|
||
|
|
int32_t getAxis() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAxis();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VOneHotLayer* mImpl;
|
||
|
|
virtual ~IOneHotLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IGridSampleLayer
|
||
|
|
//!
|
||
|
|
//! \brief A GridSample layer in a network definition.
|
||
|
|
//!
|
||
|
|
//! This layer uses an input tensor and a grid tensor to produce an interpolated output tensor.
|
||
|
|
//! The input and grid tensors must be shape tensors of rank 4. The only supported SampleMode
|
||
|
|
//! values are SampleMode::kCLAMP, SampleMode::kFILL, and SampleMode::kREFLECT.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IGridSampleLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the grid sample interpolation mode.
|
||
|
|
//!
|
||
|
|
//! \see getInterpolationMode()
|
||
|
|
//!
|
||
|
|
void setInterpolationMode(InterpolationMode mode) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setInterpolationMode(mode);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the grid sample interpolation mode.
|
||
|
|
//!
|
||
|
|
//! \see setInterpolationMode()
|
||
|
|
//!
|
||
|
|
//! \return The value specified by setInterpolationMode, or InterpolationMode::kLINEAR otherwise.
|
||
|
|
//!
|
||
|
|
InterpolationMode getInterpolationMode() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getInterpolationMode();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the align corners mode.
|
||
|
|
//!
|
||
|
|
//! \see getAlignCorners()
|
||
|
|
//!
|
||
|
|
void setAlignCorners(bool alignCorners) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setAlignCorners(alignCorners);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the align corners mode.
|
||
|
|
//!
|
||
|
|
//! \see setAlignCorners()
|
||
|
|
//!
|
||
|
|
//! \return The value specified by setAlignCorners(), or false otherwise.
|
||
|
|
//!
|
||
|
|
bool getAlignCorners() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAlignCorners();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the sample mode.
|
||
|
|
//!
|
||
|
|
//! \see getSampleMode()
|
||
|
|
//!
|
||
|
|
//! \return true if layer's sample mode was set to mode, false otherwise.
|
||
|
|
//!
|
||
|
|
bool setSampleMode(SampleMode mode) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setSampleMode(mode);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the sample mode.
|
||
|
|
//!
|
||
|
|
//! \see setSampleMode()
|
||
|
|
//!
|
||
|
|
//! \returns the value specified by a successful call to setSampleMode(), or SampleMode::kFILL otherwise.
|
||
|
|
//!
|
||
|
|
SampleMode getSampleMode() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getSampleMode();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VGridSampleLayer* mImpl;
|
||
|
|
virtual ~IGridSampleLayer() noexcept = default;
|
||
|
|
}; // class IGridSampleLayer
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum BoundingBoxFormat
|
||
|
|
//!
|
||
|
|
//! \brief Representation of bounding box data used for the Boxes input tensor in INMSLayer
|
||
|
|
//!
|
||
|
|
//! \see INMSLayer
|
||
|
|
//!
|
||
|
|
enum class BoundingBoxFormat : int32_t
|
||
|
|
{
|
||
|
|
//! (x1, y1, x2, y2) where (x1, y1) and (x2, y2) are any pair of diagonal corners
|
||
|
|
kCORNER_PAIRS = 0,
|
||
|
|
//! (x_center, y_center, width, height) where (x_center, y_center) is the center point of the box
|
||
|
|
kCENTER_SIZES = 1
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in BoundingBoxFormat enum.
|
||
|
|
//!
|
||
|
|
//! \see BoundingBoxFormat
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
constexpr inline int32_t EnumMax<BoundingBoxFormat>() noexcept
|
||
|
|
{
|
||
|
|
return 2;
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class INMSLayer
|
||
|
|
//!
|
||
|
|
//! \brief A non-maximum suppression layer in a network definition.
|
||
|
|
//!
|
||
|
|
//! The NMS algorithm iterates through a set of bounding boxes and their confidence scores, in decreasing
|
||
|
|
//! order of score. Boxes are selected if their score is above a given threshold, and their
|
||
|
|
//! intersection-over-union (IoU) with previously selected boxes is less than or equal to a given threshold.
|
||
|
|
//! This layer implements NMS per batch item and per class.
|
||
|
|
//!
|
||
|
|
//! Per batch item, boxes are initially sorted by their scores without regard to class. Only boxes up to a maximum of
|
||
|
|
//! the TopK limit are considered for selection (per batch). During selection, only overlapping boxes of the same class
|
||
|
|
//! are compared, so that overlapping boxes of different classes do not suppress each other.
|
||
|
|
//!
|
||
|
|
//! For each batch item, the ordering of candidate bounding boxes with the same score is unspecified, but the ordering
|
||
|
|
//! will be consistent across different runs for the same inputs.
|
||
|
|
//!
|
||
|
|
//! The layer has the following inputs, in order of input index:
|
||
|
|
//!
|
||
|
|
//! * Boxes contains the input bounding boxes. It is a linear tensor of type kFLOAT or kHALF. It has
|
||
|
|
//! shape [batchSize, numInputBoundingBoxes, numClasses, 4] if the boxes are per class, or
|
||
|
|
//! [batchSize, numInputBoundingBoxes, 4] if the same boxes are to be used for each class.
|
||
|
|
//! * Scores contains the per-box scores. It is a linear tensor of the same type as Boxes. It has shape
|
||
|
|
//! [batchSize, numInputBoundingBoxes, numClasses].
|
||
|
|
//! * MaxOutputBoxesPerClass is the maximum number of output boxes per batch item per class.
|
||
|
|
//! It is a scalar (0D tensor) of type kINT32.
|
||
|
|
//! * IoUThreshold is the maximum IoU for selected boxes. It is a scalar (0D tensor) of type kFLOAT in the range
|
||
|
|
//! [0.0f, 1.0f]. It is an optional input with default 0.0f.
|
||
|
|
//! * ScoreThreshold is the value that a box score must exceed in order to be selected. It is a scalar (0D tensor) of
|
||
|
|
//! type kFLOAT. It is an optional
|
||
|
|
//! input with default 0.0f.
|
||
|
|
//!
|
||
|
|
//! The layer has the following outputs, in order of output index:
|
||
|
|
//!
|
||
|
|
//! * SelectedIndices contains the indices of the selected boxes. It is a linear tensor of type kINT32 or kINT64. It has
|
||
|
|
//! shape
|
||
|
|
//! [NumOutputBoxes, 3]. Each row contains a (batchIndex, classIndex, boxIndex) tuple.
|
||
|
|
//! The output boxes are sorted in order of increasing batchIndex and then in order of decreasing score within each
|
||
|
|
//! batchIndex. For each batchIndex, the ordering of output boxes with the same score is unspecified. If
|
||
|
|
//! MaxOutputBoxesPerClass is a constant input, the maximum number of output boxes is batchSize * numClasses *
|
||
|
|
//! min(numInputBoundingBoxes, MaxOutputBoxesPerClass). Otherwise, the maximum number of output boxes is batchSize *
|
||
|
|
//! numClasses * numInputBoundingBoxes. The maximum number of output boxes is used to determine the upper-bound on
|
||
|
|
//! allocated memory for this output tensor.
|
||
|
|
//! * NumOutputBoxes is the number of output boxes in SelectedIndices. It is a scalar (0D tensor) of type kINT32.
|
||
|
|
//!
|
||
|
|
//! \warning There is a hardware-dependent limit K such that only the K highest scoring boxes in each batch item
|
||
|
|
//! will be considered for selection. The value of K is 2000 for SM 5.3 and 6.2 devices, and 5000 otherwise.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class INMSLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the bounding box format parameter for the layer.
|
||
|
|
//!
|
||
|
|
//! The default value for the bounding box format parameter is kCORNER_PAIRS.
|
||
|
|
//!
|
||
|
|
//! \see BoundingBoxFormat
|
||
|
|
//!
|
||
|
|
//! \see getBoundingBoxFormat()
|
||
|
|
//!
|
||
|
|
void setBoundingBoxFormat(BoundingBoxFormat fmt) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setBoundingBoxFormat(fmt);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the bounding box format parameter for the layer.
|
||
|
|
//!
|
||
|
|
//! \see BoundingBoxFormat
|
||
|
|
//!
|
||
|
|
//! \see setBoundingBoxFormat()
|
||
|
|
//!
|
||
|
|
BoundingBoxFormat getBoundingBoxFormat() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getBoundingBoxFormat();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the TopK box limit parameter for the layer.
|
||
|
|
//!
|
||
|
|
//! The TopK box limit is the maximum number of filtered boxes considered for selection per batch item.
|
||
|
|
//! The default value for the TopK box limit parameter is 2000 for SM 5.3 and 6.2 devices, and 5000 otherwise.
|
||
|
|
//! The TopK box limit must be less than or equal to {2000 for SM 5.3 and 6.2 devices, 5000 otherwise}.
|
||
|
|
//!
|
||
|
|
//! \see getTopKBoxLimit()
|
||
|
|
//!
|
||
|
|
void setTopKBoxLimit(int32_t limit) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setTopKBoxLimit(limit);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the TopK box limit parameter for the layer.
|
||
|
|
//!
|
||
|
|
//! \see setTopKBoxLimit()
|
||
|
|
//!
|
||
|
|
int32_t getTopKBoxLimit() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getTopKBoxLimit();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Append or replace an input of this layer with a specific tensor
|
||
|
|
//!
|
||
|
|
//! \param index the index of the input to modify.
|
||
|
|
//! \param tensor the new input tensor
|
||
|
|
//!
|
||
|
|
//! The indices are as follows:
|
||
|
|
//!
|
||
|
|
//! - 0: The required Boxes tensor.
|
||
|
|
//! - 1: The required Scores tensor.
|
||
|
|
//! - 2: The required MaxOutputBoxesPerClass tensor.
|
||
|
|
//! - 3: The optional IoUThreshold tensor.
|
||
|
|
//! - 4: The optional ScoreThreshold tensor.
|
||
|
|
//!
|
||
|
|
//! If this function is called for an index greater or equal to getNbInputs(),
|
||
|
|
//! then afterwards getNbInputs() returns index + 1, and any missing intervening
|
||
|
|
//! inputs are set to null. Note that only optional inputs can be missing.
|
||
|
|
//!
|
||
|
|
using ILayer::setInput;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the indices type for the layer.
|
||
|
|
//!
|
||
|
|
//! \param type The DataType of the indices tensor.
|
||
|
|
//!
|
||
|
|
//! \return true if set successfully, false otherwise.
|
||
|
|
//!
|
||
|
|
//! Set the indices (the first output) type of the NMS layer. Valid values are DataType::kINT32 and
|
||
|
|
//! DataType::kINT64, otherwise an error occurs and the type is not updated.
|
||
|
|
//!
|
||
|
|
bool setIndicesType(DataType type) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setIndicesType(type);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return the NMS layer indices type.
|
||
|
|
//!
|
||
|
|
//! \return indices type set during layer creation or by setIndicesType().
|
||
|
|
//! The return value is the indices type of the NMS layer.
|
||
|
|
//! The default value is DataType::kINT32.
|
||
|
|
//!
|
||
|
|
DataType getIndicesType() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getIndicesType();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VNMSLayer* mImpl;
|
||
|
|
virtual ~INMSLayer() noexcept = default;
|
||
|
|
}; // class INMSLayer
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IReverseSequenceLayer
|
||
|
|
//!
|
||
|
|
//! \brief A ReverseSequence layer in a network definition.
|
||
|
|
//!
|
||
|
|
//! This layer performs batch-wise reversal, which slices the input tensor along the axis batchAxis. For the
|
||
|
|
//! i-th slice, the operation reverses the first N elements, specified by the corresponding i-th value in
|
||
|
|
//! sequenceLens, along sequenceAxis and keeps the remaining elements unchanged. The output tensor will have
|
||
|
|
//! the same shape as the input tensor.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IReverseSequenceLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the batch axis. Default is 1.
|
||
|
|
//!
|
||
|
|
//! batchAxis should be between zero (inclusive) and the rank of input (exclusive), and different from
|
||
|
|
//! sequenceAxis. Otherwise, ErrorCode::kINVALID_ARGUMENT will be triggered.
|
||
|
|
//!
|
||
|
|
//! \see setBatchAxis()
|
||
|
|
//!
|
||
|
|
void setBatchAxis(int32_t batchAxis) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setBatchAxis(batchAxis);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return the batch axis. Return 1 if no batch axis was set.
|
||
|
|
//!
|
||
|
|
//! \see getBatchAxis()
|
||
|
|
//!
|
||
|
|
int32_t getBatchAxis() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getBatchAxis();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the sequence axis. Default is 0.
|
||
|
|
//!
|
||
|
|
//! sequenceAxis should be between zero (inclusive) and the rank of input (exclusive), and different from
|
||
|
|
//! batchAxis. Otherwise, ErrorCode::kINVALID_ARGUMENT will be triggered.
|
||
|
|
//!
|
||
|
|
//! \see setSequenceAxis()
|
||
|
|
//!
|
||
|
|
void setSequenceAxis(int32_t sequenceAxis) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setSequenceAxis(sequenceAxis);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return the sequence axis. Return 0 if no sequence axis was set.
|
||
|
|
//!
|
||
|
|
//! \see getSequenceAxis()
|
||
|
|
//!
|
||
|
|
int32_t getSequenceAxis() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getSequenceAxis();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VReverseSequenceLayer* mImpl;
|
||
|
|
virtual ~IReverseSequenceLayer() noexcept = default;
|
||
|
|
}; // class IReverseSequenceLayer
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class INormalizationLayer
|
||
|
|
//!
|
||
|
|
//! \brief A normalization layer in a network definition.
|
||
|
|
//!
|
||
|
|
//! The normalization layer performs the following operation:
|
||
|
|
//!
|
||
|
|
//! X - input Tensor
|
||
|
|
//! Y - output Tensor
|
||
|
|
//! S - scale Tensor
|
||
|
|
//! B - bias Tensor
|
||
|
|
//!
|
||
|
|
//! Y = (X - Mean(X, axes)) / Sqrt(Variance(X) + epsilon) * S + B
|
||
|
|
//!
|
||
|
|
//! Where Mean(X, axes) is a reduction over a set of axes, and Variance(X) = Mean((X - Mean(X, axes)) ^ 2, axes).
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class INormalizationLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the epsilon value used for the normalization calculation.
|
||
|
|
//!
|
||
|
|
//! The default value of \p eps is 1e-5F.
|
||
|
|
//!
|
||
|
|
//! \param eps The epsilon value used for the normalization calculation.
|
||
|
|
//!
|
||
|
|
void setEpsilon(float eps) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setEpsilon(eps);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the epsilon value used for the normalization calculation.
|
||
|
|
//!
|
||
|
|
//! \return The epsilon value used for the normalization calculation.
|
||
|
|
//!
|
||
|
|
float getEpsilon() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getEpsilon();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the reduction axes for the normalization calculation.
|
||
|
|
//!
|
||
|
|
//! \param axesMask The axes used for the normalization calculation.
|
||
|
|
//!
|
||
|
|
void setAxes(uint32_t axesMask) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setAxes(axesMask);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the axes value used for the normalization calculation.
|
||
|
|
//!
|
||
|
|
//! \return The axes used for the normalization calculation.
|
||
|
|
//!
|
||
|
|
uint32_t getAxes() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAxes();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the number of groups used to split the channels in the normalization calculation.
|
||
|
|
//!
|
||
|
|
//! The input tensor channels are divided into \p nbGroups groups, and normalization is performed per group.
|
||
|
|
//! The channel dimension is considered to be the second dimension in a [N, C, H, W, ...] formatted tensor.
|
||
|
|
//!
|
||
|
|
//! The default \p nbGroups is 1.
|
||
|
|
//!
|
||
|
|
//! \warning It is an error to set \p nbGroups to a value that does not evenly divide into the number of channels
|
||
|
|
//! of the input tensor.
|
||
|
|
//!
|
||
|
|
//! \warning When \p nbGroups is != 1, it is expected that the provided axesMask will have all bits corresponding
|
||
|
|
//! to dimensions after the channel dimension set to 1, with all other bits set to 0.
|
||
|
|
//!
|
||
|
|
//! \param nbGroups The number of groups to split the channels into for the normalization calculation.
|
||
|
|
//!
|
||
|
|
void setNbGroups(int64_t nbGroups) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setNbGroups(nbGroups);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the number of groups used to split the channels for the normalization calculation.
|
||
|
|
//!
|
||
|
|
//! \return The number of groups used to split the channel used for the normalization calculation.
|
||
|
|
//!
|
||
|
|
int64_t getNbGroups() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getNbGroups();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the compute precision of this layer.
|
||
|
|
//!
|
||
|
|
//! \param type The datatype used for the compute precision of this layer.
|
||
|
|
//!
|
||
|
|
//! The method is used to avoid overflow errors by controlling the normalization computation in
|
||
|
|
//! mixed precision mode. The compute precision defaults to DataType::kFLOAT32.
|
||
|
|
//! To override this default, use this method to set the desired compute precision.
|
||
|
|
//!
|
||
|
|
//! For a weakly typed network:
|
||
|
|
//!
|
||
|
|
//! * Method setOutputType() can still be called to control the output data type.
|
||
|
|
//!
|
||
|
|
//! * Method setPrecision() can still be called. The input data is cast to that precision before
|
||
|
|
//! being cast to the compute precision.
|
||
|
|
//!
|
||
|
|
//! Strongly typed network rejects calls to this method since the compute precision is typically
|
||
|
|
//! controlled by casting the input tensors to the desired type.
|
||
|
|
//!
|
||
|
|
//! Only DataType::kFLOAT32 and DataType::kHALF are valid types for \p type.
|
||
|
|
//!
|
||
|
|
void setComputePrecision(DataType type) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setComputePrecision(type);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the compute precision of this layer.
|
||
|
|
//!
|
||
|
|
//! \return The datatype used for the compute precision of this layer.
|
||
|
|
//!
|
||
|
|
DataType getComputePrecision() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getComputePrecision();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Returns true if this layer was created through addNormalizationV2().
|
||
|
|
//!
|
||
|
|
//! \return Whether the layer was created through addNormalizationV2().
|
||
|
|
//!
|
||
|
|
TRT_NODISCARD bool isV2() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->isV2();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VNormalizationLayer* mImpl;
|
||
|
|
virtual ~INormalizationLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class ISqueezeLayer
|
||
|
|
//!
|
||
|
|
//! \brief Layer that represents a squeeze operation, removing unit dimensions of the first input tensor
|
||
|
|
//! on a set of axes specified by the second input tensor.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class ISqueezeLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Append or replace an input of this layer with a specific tensor.
|
||
|
|
//!
|
||
|
|
//! \param index The index of the input to modify.
|
||
|
|
//! \param tensor The new input tensor.
|
||
|
|
//!
|
||
|
|
//! For a Squeeze layer, the values 0-1 are valid for index.
|
||
|
|
//! The indices are as follows:
|
||
|
|
//!
|
||
|
|
//! - 0: Input data tensor.
|
||
|
|
//! - 1: The axes to remove. Must resolve to a constant Int32 or Int64 1D shape tensor.
|
||
|
|
//!
|
||
|
|
using ILayer::setInput;
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VSqueezeLayer* mImpl;
|
||
|
|
virtual ~ISqueezeLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IUnsqueezeLayer
|
||
|
|
//!
|
||
|
|
//! \brief Layer that represents an unsqueeze operation, which reshapes the first input tensor by inserting unit-length
|
||
|
|
//! dimensions to the output at the axes specified by the second input tensor.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IUnsqueezeLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Append or replace an input of this layer with a specific tensor.
|
||
|
|
//!
|
||
|
|
//! \param index The index of the input to modify.
|
||
|
|
//! \param tensor The new input tensor.
|
||
|
|
//!
|
||
|
|
//! For an Unsqueeze layer, the values 0-1 are valid for index.
|
||
|
|
//! The indices are as follows:
|
||
|
|
//!
|
||
|
|
//! - 0: Input data tensor.
|
||
|
|
//! - 1: The output axes at which unit-length dimensions are inserted. Must resolve to a constant Int32 or
|
||
|
|
//! Int64 1D shape tensor.
|
||
|
|
//!
|
||
|
|
using ILayer::setInput;
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VUnsqueezeLayer* mImpl;
|
||
|
|
virtual ~IUnsqueezeLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum CumulativeOperation
|
||
|
|
//!
|
||
|
|
//! \brief Enumerates the cumulative operations that may be performed by a Cumulative layer.
|
||
|
|
//!
|
||
|
|
//! The table shows the initial value of each Cumulative operation.
|
||
|
|
//!
|
||
|
|
//! Operation | kFLOAT, kHALF, kBF16 | kINT32, kINT64 |
|
||
|
|
//! --------- | -------------------- | -------------- |
|
||
|
|
//! kSUM | +0.0 | 0 |
|
||
|
|
//!
|
||
|
|
enum class CumulativeOperation : int32_t
|
||
|
|
{
|
||
|
|
kSUM = 0, //!< Calculate cumulative sum.
|
||
|
|
};
|
||
|
|
|
||
|
|
namespace impl
|
||
|
|
{
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Maximum number of elements in CumulativeOperation enum.
|
||
|
|
//!
|
||
|
|
//! \see CumulativeOperation
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
struct EnumMaxImpl<CumulativeOperation>
|
||
|
|
{
|
||
|
|
static constexpr int32_t kVALUE = 1;
|
||
|
|
};
|
||
|
|
|
||
|
|
} // namespace impl
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class ICumulativeLayer
|
||
|
|
//!
|
||
|
|
//! \brief Layer that represents a cumulative operation across a tensor.
|
||
|
|
//!
|
||
|
|
//! It computes successive reductions across an axis of a tensor. The output
|
||
|
|
//! always has the same shape as the input.
|
||
|
|
//!
|
||
|
|
//! If the reduction operation is summation, then this is also known as
|
||
|
|
//! prefix-sum or cumulative sum.
|
||
|
|
//!
|
||
|
|
//! The operation has forward vs. reverse variants, and inclusive vs. exclusive variants.
|
||
|
|
//!
|
||
|
|
//! For example, let the input be a vector x of length n and the output be vector y.
|
||
|
|
//! Then y[j] = sum(x[...]) where ... denotes a sequence of indices from this table:
|
||
|
|
//!
|
||
|
|
//! | forward | reverse
|
||
|
|
//! ----------|-----------| ---------
|
||
|
|
//! inclusive | 0..j | j..n-1
|
||
|
|
//! exclusive | 0..j-1 | j+1..n-1
|
||
|
|
//!
|
||
|
|
//! For multidimensional tensors, the reductions apply across a specified axis. For
|
||
|
|
//! example, given a 2D input, a forward inclusive cumulative operation across axis 0 generates
|
||
|
|
//! cumulative sums within each column.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class ICumulativeLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the cumulative operation for the layer.
|
||
|
|
//!
|
||
|
|
//! \param op The reduction operation to be performed
|
||
|
|
//!
|
||
|
|
//! \return Whether \p op is valid and the operation successfully set
|
||
|
|
//!
|
||
|
|
//! \see getOperation(), CumulativeOperation
|
||
|
|
//!
|
||
|
|
bool setOperation(CumulativeOperation op) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setOperation(op);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the cumulative operation for the layer.
|
||
|
|
//!
|
||
|
|
//! \return The reduction operation to be performed
|
||
|
|
//!
|
||
|
|
//! \see setOperation(), CumulativeOperation
|
||
|
|
//!
|
||
|
|
CumulativeOperation getOperation() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getOperation();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set whether it is an exclusive accumulation or inclusive accumulation.
|
||
|
|
//!
|
||
|
|
//! \param exclusive Whether the operation will exclude the element at the current index
|
||
|
|
//!
|
||
|
|
//! \see getExclusive
|
||
|
|
//!
|
||
|
|
void setExclusive(bool exclusive) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setExclusive(exclusive);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get whether it is exclusive accumulation or inclusive accumulation.
|
||
|
|
//!
|
||
|
|
//! \return Whether the operation will exclude the element at the current index
|
||
|
|
//!
|
||
|
|
//! \see setExclusive
|
||
|
|
//!
|
||
|
|
bool getExclusive() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getExclusive();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Specify whether the cumulative operation should be applied backward.
|
||
|
|
//!
|
||
|
|
//! \param reverse Whether the cumulative will run in the reverse direction from the last element
|
||
|
|
//!
|
||
|
|
//! \see getReverse
|
||
|
|
//!
|
||
|
|
void setReverse(bool reverse) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setReverse(reverse);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the boolean that specifies whether the cumulative operation should be applied backward.
|
||
|
|
//!
|
||
|
|
//! \return Whether the cumulative will run in the reverse direction from the last element
|
||
|
|
//!
|
||
|
|
//! \see setReverse
|
||
|
|
//!
|
||
|
|
bool getReverse() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getReverse();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VCumulativeLayer* mImpl;
|
||
|
|
virtual ~ICumulativeLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum AttentionNormalizationOp
|
||
|
|
//!
|
||
|
|
//! \brief Enumerates the operations that may be performed by the normalization in the attention subgraph.
|
||
|
|
//!
|
||
|
|
enum class AttentionNormalizationOp : int32_t
|
||
|
|
{
|
||
|
|
kNONE
|
||
|
|
= 0, //!< Apply no normalization on the attention scores. Must be used with decomposable=True on pre-Blackwell GPUs
|
||
|
|
kSOFTMAX = 1, //!< Apply softmax normalization on the attention scores on the `s_kv` dimension.
|
||
|
|
};
|
||
|
|
|
||
|
|
namespace impl
|
||
|
|
{
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in AttentionNormalizationOp enum.
|
||
|
|
//!
|
||
|
|
//! \see AttentionNormalizationOp
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
struct EnumMaxImpl<AttentionNormalizationOp>
|
||
|
|
{
|
||
|
|
static constexpr int32_t kVALUE = 2;
|
||
|
|
};
|
||
|
|
|
||
|
|
} // namespace impl
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IAttentionBoundaryLayer
|
||
|
|
//!
|
||
|
|
//! \brief This is a base class for Attention boundary layers.
|
||
|
|
//!
|
||
|
|
//! Boundary layers are used to demarcate the boundaries of IAttention.
|
||
|
|
//! Typically client code does not deal directly with the boundary layers.
|
||
|
|
//! However, they are indirectly visible via method `INetworkDefinition::getLayer(int32_t index)`.
|
||
|
|
//!
|
||
|
|
class IAttentionBoundaryLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Get a pointer to the IAttention associated with this boundary layer.
|
||
|
|
//!
|
||
|
|
IAttention* getAttention() const noexcept
|
||
|
|
{
|
||
|
|
return mBoundary->getAttention();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IAttentionBoundaryLayer() noexcept = default;
|
||
|
|
apiv::VAttentionBoundaryLayer* mBoundary;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IAttentionInputLayer
|
||
|
|
//!
|
||
|
|
//! \brief This layer represents an input to an attention subgraph.
|
||
|
|
//!
|
||
|
|
//! This layer is automatically created when an `IAttention` is created. Clients typically do not
|
||
|
|
//! deal with the layer directly, but instead specify its input via `addAttention` or `IAttention::setInput`.
|
||
|
|
//!
|
||
|
|
//! An IAttentionInputLayer has three to four inputs and one output.
|
||
|
|
//!
|
||
|
|
class IAttentionInputLayer : public IAttentionBoundaryLayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Append or replace an input of this layer with a specific tensor
|
||
|
|
//!
|
||
|
|
//! \param index the index of the input to modify.
|
||
|
|
//! \param tensor the new input tensor
|
||
|
|
//!
|
||
|
|
//! The indices are as follows:
|
||
|
|
//!
|
||
|
|
//! Input 0 is the input query tensor.
|
||
|
|
//! Input 1 is the input key tensor.
|
||
|
|
//! Input 2 is the input value tensor.
|
||
|
|
//! Input 3 is the optional mask tensor. setMask should be used instead of setInput
|
||
|
|
//! Input 4 is the optional normalizationQuantizeScale tensor. setNormalizationQuantizeScale should be used instead
|
||
|
|
//! of setInput
|
||
|
|
//!
|
||
|
|
using ILayer::setInput;
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IAttentionInputLayer() noexcept = default;
|
||
|
|
apiv::VAttentionInputLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IAttentionOutputLayer
|
||
|
|
//!
|
||
|
|
//! \brief This layer represents an output of an IAttention.
|
||
|
|
//!
|
||
|
|
//! This layer is automatically created when an `IAttention` is created. Clients typically do not
|
||
|
|
//! deal with the layer directly, but instead getting its output via `IAttention::getOutput`.
|
||
|
|
//!
|
||
|
|
//! An IAttentionOutputLayer has one input and one output.
|
||
|
|
//!
|
||
|
|
class IAttentionOutputLayer : public IAttentionBoundaryLayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
protected:
|
||
|
|
virtual ~IAttentionOutputLayer() noexcept = default;
|
||
|
|
apiv::VAttentionOutputLayer* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IAttention
|
||
|
|
//!
|
||
|
|
//! \brief Helper for constructing an attention that consumes query, key and value tensors.
|
||
|
|
//!
|
||
|
|
//! An attention subgraph implicitly includes three main components, two MatrixMultiply layers
|
||
|
|
//! known as BMM1 and BMM2, and one normalization operation which defaults to be a Softmax.
|
||
|
|
//! By default, IAttention is not decomposable and TensorRT will try to use a single fused kernel, which may be more
|
||
|
|
//! efficient than if the subgraph is expressed without IAttention. Setting the IAttention to decomposable=True can
|
||
|
|
//! allow IAttention to be decomposed to use multiple kernels if no fused kernel support found.
|
||
|
|
//!
|
||
|
|
//! Query Key Value Mask (optional) NormalizationQuantizeScale (optional)
|
||
|
|
//! | | | | |
|
||
|
|
//! | Transpose | | |
|
||
|
|
//! | | | | |
|
||
|
|
//! ----BMM1---- | | |
|
||
|
|
//! | | | |
|
||
|
|
//! *--------------------------- |
|
||
|
|
//! | | |
|
||
|
|
//! Normalization | |
|
||
|
|
//! | | |
|
||
|
|
//! *------------------------------------------------
|
||
|
|
//! | |
|
||
|
|
//! -------BMM2------
|
||
|
|
//! |
|
||
|
|
//! Output
|
||
|
|
//!
|
||
|
|
//! The attention has the following inputs, in order of input index:
|
||
|
|
//!
|
||
|
|
//! * Query contains the input query. It is a tensor of type kFLOAT, kHALF or kBF16 with
|
||
|
|
//! shape [batchSize, numHeadsQuery, sequenceLengthQuery, dimHead]
|
||
|
|
//! * Key contains the input key. It is a tensor of type kFLOAT, kHALF or kBF16 with
|
||
|
|
//! shape [batchSize, numHeadsKeyValue, sequenceLengthKeyValue, dimHead]
|
||
|
|
//! * Value contains the input value. It is a tensor of type kFLOAT, kHALF or kBF16 with
|
||
|
|
//! shape [batchSize, numHeadsKeyValue, sequenceLengthKeyValue, dimHead]
|
||
|
|
//! * Mask (optional) contains the mask value. It is a tensor of type kBOOL or the same data type of
|
||
|
|
//! BMM1 output with shape [batchSize, numHeadsQuery, sequenceLengthQuery, sequenceLengthKeyValue]
|
||
|
|
//! with batchSize and numHeadsQuery broadcastable. For a kBOOL mask, a True value indicates that the corresponding
|
||
|
|
//! position is allowed to attend. For other data types, the mask values will be added to the BMM1 output, known
|
||
|
|
//! as an add mask.
|
||
|
|
//! * NormalizationQuantizeScale (optional) contains the quantization scale for the attention normalization output.
|
||
|
|
//! It is a tensor of type kFLOAT, kHALF or kBF16 with dimension 0 or 1.
|
||
|
|
//!
|
||
|
|
//! \see
|
||
|
|
//! https://docs.nvidia.com/deeplearning/tensorrt/latest/inference-library/work-with-transformers.html#multi-head-attention-fusion
|
||
|
|
//! for the complete matrix of fused kernel support.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IAttention : public INoCopy
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set the normalization operation for the attention.
|
||
|
|
//!
|
||
|
|
//! \see getNormalizationOperation(), AttentionNormalizationOp
|
||
|
|
//!
|
||
|
|
//! \return True if the normalization operation is set successfully, false otherwise.
|
||
|
|
//!
|
||
|
|
bool setNormalizationOperation(AttentionNormalizationOp op) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setNormalizationOperation(op);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the normalization operation for the attention.
|
||
|
|
//!
|
||
|
|
//! \see setNormalizationOperation(), AttentionNormalizationOp
|
||
|
|
//!
|
||
|
|
//! \return The normalization operation for the attention. Default is kSOFTMAX.
|
||
|
|
//!
|
||
|
|
AttentionNormalizationOp getNormalizationOperation() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getNormalizationOperation();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set whether a mask will be used for the normalization operation.
|
||
|
|
//!
|
||
|
|
//! \param mask the mask tensor of type kBOOL or the same data type of BMM1 output with 4d shape broadcastable to
|
||
|
|
//! [batchSize, numHeadsQuery, sequenceLengthQuery, sequenceLengthKeyValue]. For a kBOOL mask, a True value
|
||
|
|
//! indicates that the corresponding position is allowed to attend. For other data types, the mask values will be
|
||
|
|
//! added to the BMM1 output, known as an add mask.
|
||
|
|
//!
|
||
|
|
//! \see getMask
|
||
|
|
//!
|
||
|
|
//! \return True if the mask is set successfully, false otherwise.
|
||
|
|
//!
|
||
|
|
bool setMask(ITensor& mask) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setMask(mask);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the optional mask in attention.
|
||
|
|
//!
|
||
|
|
//! \see setMask
|
||
|
|
//!
|
||
|
|
//! \return The optional mask in attention, nullptr if no mask is set.
|
||
|
|
//!
|
||
|
|
ITensor* getMask() noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getMask();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set whether the attention will run a causal inference.
|
||
|
|
//! Cannot be used together with setMask().
|
||
|
|
//!
|
||
|
|
//! \see getCausal
|
||
|
|
//!
|
||
|
|
//! \return True if the causal inference is set successfully, false otherwise.
|
||
|
|
//!
|
||
|
|
bool setCausal(bool isCausal) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setCausal(isCausal);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get whether the attention will run a causal inference.
|
||
|
|
//!
|
||
|
|
//! \see setCausal
|
||
|
|
//!
|
||
|
|
//! \return True if the attention will run a causal inference, false otherwise. Default is false.
|
||
|
|
//!
|
||
|
|
bool getCausal() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getCausal();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set whether the attention can be decomposed to use multiple kernels if no fused kernel support found.
|
||
|
|
//!
|
||
|
|
//! \see getDecomposable
|
||
|
|
//!
|
||
|
|
//! \return True if the decomposable attention is set successfully, false otherwise.
|
||
|
|
//!
|
||
|
|
bool setDecomposable(bool decomposable) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setDecomposable(decomposable);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get whether the attention can be decomposed to use multiple kernels if no fused kernel support found.
|
||
|
|
//!
|
||
|
|
//! \return True if the attention can be decomposed to use multiple kernels by the compiler,
|
||
|
|
//! false otherwise. Default is false.
|
||
|
|
//!
|
||
|
|
//! \see setDecomposable
|
||
|
|
//!
|
||
|
|
bool getDecomposable() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getDecomposable();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Append or replace an input of this layer with a specific tensor.
|
||
|
|
//!
|
||
|
|
//! \param index the index of the input to modify.
|
||
|
|
//! \param input the new input tensor.
|
||
|
|
//!
|
||
|
|
//! The indices are as follows:
|
||
|
|
//!
|
||
|
|
//! Input 0 is the input query tensor.
|
||
|
|
//! Input 1 is the input key tensor.
|
||
|
|
//! Input 2 is the input value tensor.
|
||
|
|
//!
|
||
|
|
//! \return True if the input tensor is set successfully, false otherwise.
|
||
|
|
//!
|
||
|
|
bool setInput(int32_t index, ITensor& input) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setInput(index, input);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the number of inputs of IAttention. IAttention has three inputs.
|
||
|
|
//!
|
||
|
|
//! \return The number of inputs of IAttention.
|
||
|
|
int32_t getNbInputs() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getNbInputs();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the IAttention input corresponding to the given index.
|
||
|
|
//!
|
||
|
|
//! \param index The index of the input tensor.
|
||
|
|
//!
|
||
|
|
//! \return The input tensor, or nullptr if the index is out of range.
|
||
|
|
//!
|
||
|
|
ITensor* getInput(int32_t index) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getInput(index);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the number of outputs of a layer. IAttention has one output.
|
||
|
|
//!
|
||
|
|
int32_t getNbOutputs() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getNbOutputs();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the IAttention output corresponding to the given index. IAttention has only one output.
|
||
|
|
//!
|
||
|
|
//! \param index The index of the output tensor.
|
||
|
|
//!
|
||
|
|
//! \return The indexed output tensor, or nullptr if the index is out of range.
|
||
|
|
//!
|
||
|
|
ITensor* getOutput(int32_t index) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getOutput(index);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the name of the attention.
|
||
|
|
//!
|
||
|
|
//! The name is used in error diagnostics.
|
||
|
|
//! This method copies the name string.
|
||
|
|
//!
|
||
|
|
//! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
|
||
|
|
//!
|
||
|
|
//! \see getName()
|
||
|
|
//!
|
||
|
|
//! \return True if the name is set successfully, false otherwise.
|
||
|
|
//!
|
||
|
|
bool setName(char const* name) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setName(name);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return the name of the attention.
|
||
|
|
//!
|
||
|
|
//! \see setName()
|
||
|
|
//!
|
||
|
|
//! \return The name of the attention.
|
||
|
|
//!
|
||
|
|
char const* getName() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getName();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the quantization scale for the attention normalization output.
|
||
|
|
//!
|
||
|
|
//! \param tensor for quantization scale. Data type must be DataType::kFLOAT, DataType::kHALF or DataType::kBF16.
|
||
|
|
//! Must be a 0-d or 1-d.
|
||
|
|
//!
|
||
|
|
//! \return True if the quantization scale is set successfully, false otherwise.
|
||
|
|
//!
|
||
|
|
//! \warning Must be used together with setNormalizationQuantizeToType to set normalization output datatype to
|
||
|
|
//! DataType::kFP8 or DataType::kINT8.
|
||
|
|
//!
|
||
|
|
bool setNormalizationQuantizeScale(ITensor& tensor) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setNormalizationQuantizeScale(tensor);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the quantization scale for the attention normalization output.
|
||
|
|
//!
|
||
|
|
//! \return The quantization scale for the attention normalization output or nullptr if no quantization scale is
|
||
|
|
//! set.
|
||
|
|
//!
|
||
|
|
ITensor* getNormalizationQuantizeScale() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getNormalizationQuantizeScale();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the datatype the attention normalization is quantized to.
|
||
|
|
//!
|
||
|
|
//! \param type the datatype the attention normalization is quantized to. Must be one of DataType::kFP8,
|
||
|
|
//! DataType::kINT8.
|
||
|
|
//!
|
||
|
|
//! \return True if the quantization to type is set successfully, false otherwise.
|
||
|
|
//!
|
||
|
|
bool setNormalizationQuantizeToType(DataType type) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setNormalizationQuantizeToType(type);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the datatype the attention normalization is quantized to.
|
||
|
|
//!
|
||
|
|
//! \return The datatype the attention normalization is quantized to.
|
||
|
|
//! The default value is DataType::kFLOAT.
|
||
|
|
//!
|
||
|
|
//! \warning Must be used after normalization quantization to type is set by setNormalizationQuantizeToType.
|
||
|
|
DataType getNormalizationQuantizeToType() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getNormalizationQuantizeToType();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the metadata for IAttention.
|
||
|
|
//!
|
||
|
|
//! The metadata is emitted in the JSON returned by IEngineInspector with
|
||
|
|
//! ProfilingVerbosity set to kDETAILED.
|
||
|
|
//!
|
||
|
|
//! \param metadata The per-layer metadata.
|
||
|
|
//!
|
||
|
|
//! \warning The string name must be null-terminated and be at most 4096 bytes including the terminator.
|
||
|
|
//!
|
||
|
|
//! \see getMetadata()
|
||
|
|
//! \see getLayerInformation()
|
||
|
|
//!
|
||
|
|
//! \return True if the metadata is set successfully, false otherwise.
|
||
|
|
//!
|
||
|
|
bool setMetadata(char const* metadata) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setMetadata(metadata);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the metadata of IAttention.
|
||
|
|
//!
|
||
|
|
//! \return The metadata as a null-terminated C-style string. If setMetadata() has not been called,
|
||
|
|
//! an empty string "" will be returned as a default value.
|
||
|
|
//!
|
||
|
|
//! \see setMetadata()
|
||
|
|
//!
|
||
|
|
char const* getMetadata() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getMetadata();
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VAttention* mImpl;
|
||
|
|
virtual ~IAttention() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//! \class IRotaryEmbeddingLayer
|
||
|
|
//!
|
||
|
|
//! \brief Layer that implements Rotary Position Embedding (RoPE) (https://arxiv.org/abs/2104.09864).
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IRotaryEmbeddingLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Set whether the input is in interleaved format, i.e., whether the 2-d vectors rotated are taken from adjacent 2 elements in the hidden dimension. The default value is false.
|
||
|
|
//!
|
||
|
|
//! \see getInterleaved
|
||
|
|
//!
|
||
|
|
void setInterleaved(bool interleaved) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setInterleaved(interleaved);
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get whether the input is in interleaved format. The default value is false.
|
||
|
|
//!
|
||
|
|
//! \see setInterleaved
|
||
|
|
//!
|
||
|
|
TRT_NODISCARD bool getInterleaved() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getInterleaved();
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the number of hidden dimensions participating in RoPE. The default value is 0, representing H, i.e., all hidden dimensions in each head. Must be non-negative and even.
|
||
|
|
//!
|
||
|
|
//! \see getRotaryEmbeddingDim
|
||
|
|
//!
|
||
|
|
TRT_NODISCARD bool setRotaryEmbeddingDim(int32_t rotaryEmbeddingDim) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setRotaryEmbeddingDim(rotaryEmbeddingDim);
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the number of hidden dimensions participating in RoPE. The default value is 0, representing H, i.e., all hidden dimensions in each head.
|
||
|
|
//!
|
||
|
|
//! \see setRotaryEmbeddingDim
|
||
|
|
//!
|
||
|
|
TRT_NODISCARD int32_t getRotaryEmbeddingDim() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getRotaryEmbeddingDim();
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Append or replace an input of this layer with a specific tensor
|
||
|
|
//!
|
||
|
|
//! \param index the index of the input to modify.
|
||
|
|
//! \param tensor the new input tensor
|
||
|
|
//!
|
||
|
|
//! The indices are as follows:
|
||
|
|
//!
|
||
|
|
//! Input 0 is the input activation tensor.
|
||
|
|
//! Input 1 is the cosine cache tensor.
|
||
|
|
//! Input 2 is the sine cache tensor.
|
||
|
|
//! Input 3 (optional) is the positionIds tensor, which is used for indexing into the cosine and sine caches.
|
||
|
|
//!
|
||
|
|
using ILayer::setInput;
|
||
|
|
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VRotaryEmbeddingLayer* mImpl;
|
||
|
|
virtual ~IRotaryEmbeddingLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum KVCacheMode
|
||
|
|
//!
|
||
|
|
//! \brief Enumerates the KVCache modes that may be performed by a KVCacheUpdate layer.
|
||
|
|
//!
|
||
|
|
enum class KVCacheMode : int32_t
|
||
|
|
{
|
||
|
|
kLINEAR = 0, //!< Linear mode.
|
||
|
|
};
|
||
|
|
|
||
|
|
namespace impl
|
||
|
|
{
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in KVCacheMode enum.
|
||
|
|
//!
|
||
|
|
//! \see KVCacheMode
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
struct EnumMaxImpl<KVCacheMode>
|
||
|
|
{
|
||
|
|
static constexpr int32_t kVALUE = 1;
|
||
|
|
};
|
||
|
|
|
||
|
|
} // namespace impl
|
||
|
|
|
||
|
|
//! \class IKVCacheUpdateLayer
|
||
|
|
//!
|
||
|
|
//! \brief Layer that represents a KVCacheUpdate operation.
|
||
|
|
//!
|
||
|
|
//! The KVCacheUpdate layer is used to cache the key or value tensors for the attention mechanism.
|
||
|
|
//! K and V use separate KVCacheUpdate layers.
|
||
|
|
//!
|
||
|
|
//! An IKVCacheUpdateLayer has three inputs (`cache`, `update`, `writeIndices`) and one output.
|
||
|
|
//! In `kLINEAR` mode, for each batch element i, the layer copies the update tensor into the cache starting at
|
||
|
|
//! position `writeIndices[i]`. Assuming no out-of-bounds writes occur, the operation for each sequence position
|
||
|
|
//! s in [0, sequenceLength) is:
|
||
|
|
//! \code
|
||
|
|
//! output[i, :, writeIndices[i] + s, :] = update[i, :, s, :]
|
||
|
|
//! \endcode
|
||
|
|
//!
|
||
|
|
//! The output performs in-place updates on the cache tensor, so they must share the same device memory address.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IKVCacheUpdateLayer : public ILayer
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Append or replace an input of this layer with a specific tensor.
|
||
|
|
//!
|
||
|
|
//! \param index the index of the input to modify.
|
||
|
|
//! \param tensor the new input tensor.
|
||
|
|
//!
|
||
|
|
//! The indices are as follows:
|
||
|
|
//!
|
||
|
|
//! Input 0 is the input cache tensor.
|
||
|
|
//! Input 1 is the input update tensor.
|
||
|
|
//! Input 2 is the input writeIndices tensor.
|
||
|
|
//!
|
||
|
|
using ILayer::setInput;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the mode of the KVCacheUpdate layer.
|
||
|
|
//!
|
||
|
|
//! \param cacheMode The mode of the KVCacheUpdate layer. For TensorRT 10.15, only `kLINEAR` mode is supported.
|
||
|
|
//!
|
||
|
|
//! \return True if cache mode is set successfully, false otherwise.
|
||
|
|
//!
|
||
|
|
bool setCacheMode(KVCacheMode cacheMode) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setCacheMode(cacheMode);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the mode of the KVCacheUpdate layer.
|
||
|
|
//!
|
||
|
|
//! \return The mode of the KVCacheUpdate layer.
|
||
|
|
//!
|
||
|
|
KVCacheMode getCacheMode() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getCacheMode();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VKVCacheUpdateLayer* mImpl;
|
||
|
|
virtual ~IKVCacheUpdateLayer() noexcept = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class INetworkDefinition
|
||
|
|
//!
|
||
|
|
//! \brief A network definition for input to the builder.
|
||
|
|
//!
|
||
|
|
//! A network definition defines the structure of the network, and combined with a IBuilderConfig, is built
|
||
|
|
//! into an engine using an IBuilder. An INetworkDefinition can have all dimensions explicit, full dims mode, in the
|
||
|
|
//! network definition. The former mode, i.e. the implicit batch size mode, has been deprecated.
|
||
|
|
//!
|
||
|
|
//! A network with implicit batch dimensions returns the dimensions of a layer without the implicit dimension,
|
||
|
|
//! and instead the batch is specified at execute/enqueue time. If the network has all dimensions specified, then
|
||
|
|
//! the first dimension follows elementwise broadcast rules: if it is 1 for some inputs and is some value N for all
|
||
|
|
//! other inputs, then the first dimension of each output is N, and the inputs with 1 for the first dimension are
|
||
|
|
//! broadcast. Having divergent batch sizes across inputs to a layer is not supported.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class INetworkDefinition : public INoCopy
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
virtual ~INetworkDefinition() noexcept = default;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add an input tensor to the network.
|
||
|
|
//!
|
||
|
|
//! Each input and output tensor must have a unique name.
|
||
|
|
//!
|
||
|
|
//! For networks with wildcard dimensions, the volume
|
||
|
|
//! is based on the maxima specified by an IOptimizationProfile.Dimensions are normally non-negative integers. The
|
||
|
|
//! exception is that in networks with all explicit dimensions, -1 can be used as a wildcard for a dimension to
|
||
|
|
//! be specified at runtime. Input tensors with such a wildcard must have a corresponding entry in the
|
||
|
|
//! IOptimizationProfiles indicating the permitted extrema, and the input dimensions must be set by
|
||
|
|
//! IExecutionContext::setInputShape. Different IExecutionContext instances can have different dimensions.
|
||
|
|
//! Wildcard dimensions are only supported for EngineCapability::kSTANDARD. They are not
|
||
|
|
//! supported in safety contexts. DLA does not support Wildcard dimensions.
|
||
|
|
//!
|
||
|
|
//! Tensor dimensions are specified independent of format. For example, if a
|
||
|
|
//! tensor is formatted in "NHWC" or a vectorized format, the dimensions are
|
||
|
|
//! still specified in the order{N, C, H, W}. For 2D images with a channel
|
||
|
|
//! dimension, the last three dimensions are always {C,H,W}. For 3D images
|
||
|
|
//! with a channel dimension, the last four dimensions are always {C,D,H,W}.
|
||
|
|
//!
|
||
|
|
//! \param name The name of the tensor.
|
||
|
|
//! \param type The type of the data held in the tensor.
|
||
|
|
//! \param dimensions The dimensions of the tensor.
|
||
|
|
//!
|
||
|
|
//! \warning It is an error to specify a wildcard value on a dimension that is determined by trained parameters.
|
||
|
|
//!
|
||
|
|
//! \warning If run on DLA with explicit dimensions, only leading dimension can be a wildcard. And provided profile
|
||
|
|
//! must have same minimum, optimum, and maximum dimensions.
|
||
|
|
//!
|
||
|
|
//! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
|
||
|
|
//!
|
||
|
|
//! \see ITensor
|
||
|
|
//!
|
||
|
|
//! \return The new tensor or nullptr if there is an error.
|
||
|
|
//!
|
||
|
|
ITensor* addInput(char const* name, DataType type, Dims const& dimensions) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addInput(name, type, dimensions);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Mark a tensor as a network output.
|
||
|
|
//!
|
||
|
|
//! \param tensor The tensor to mark as an output tensor.
|
||
|
|
//!
|
||
|
|
//! \warning It is an error to mark a network input as an output.
|
||
|
|
//! \warning It is an error to mark a tensor inside an ILoop or an
|
||
|
|
//! IIfConditional as an output.
|
||
|
|
//!
|
||
|
|
void markOutput(ITensor& tensor) noexcept
|
||
|
|
{
|
||
|
|
mImpl->markOutput(tensor);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Mark a tensor as a debug tensor.
|
||
|
|
//!
|
||
|
|
//! A debug tensor can be optionally emitted at runtime.
|
||
|
|
//! Note that tensor names are required to specify debug
|
||
|
|
//! tensors at runtime.
|
||
|
|
//!
|
||
|
|
//! \param tensor Tensor to be marked as debug
|
||
|
|
//!
|
||
|
|
//! \return True if tensor successfully marked (or was already marked), false otherwise.
|
||
|
|
//!
|
||
|
|
//! \see unmarkDebug(), IExecutionContext::setDebugListener(), ITensor::setName()
|
||
|
|
//!
|
||
|
|
bool markDebug(ITensor& tensor) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->markDebug(tensor);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Unmark a tensor as a debug tensor.
|
||
|
|
//!
|
||
|
|
//! Remove the marking of a tensor as a debug tensor.
|
||
|
|
//!
|
||
|
|
//! \param tensor Tensor to be unmarked as debug.
|
||
|
|
//!
|
||
|
|
//! \return True if tensor successfully unmarked (or was already unmarked), false otherwise.
|
||
|
|
//!
|
||
|
|
//! \see markDebug(), IExecutionContext::setDebugListener()
|
||
|
|
//!
|
||
|
|
bool unmarkDebug(ITensor& tensor) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->unmarkDebug(tensor);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Check if a tensor is marked as debug tensor.
|
||
|
|
//!
|
||
|
|
//! \return true if tensor is marked as debug tensor, false otherwise.
|
||
|
|
//!
|
||
|
|
bool isDebugTensor(ITensor const& tensor) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->isDebugTensor(tensor);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Mark unfused tensors as debug tensors.
|
||
|
|
//!
|
||
|
|
//! Debug tensors can be optionally emitted at runtime.
|
||
|
|
//! Tensors that are fused by the optimizer will not be emitted.
|
||
|
|
//! Tensors marked this way will not prevent fusion like markDebug() does, thus preserving performance.
|
||
|
|
//!
|
||
|
|
//! \warning Tensors marked this way cannot be detected by isDebugTensor().
|
||
|
|
//! \warning DebugListener can only get internal tensor names instead of the original tensor
|
||
|
|
//! names in the NetworkDefinition for tensors marked this way. But the names correspond to the
|
||
|
|
//! names obtained by IEngineInspector.
|
||
|
|
//! \warning There is no guarantee that all unfused tensors are marked.
|
||
|
|
//!
|
||
|
|
//! \return True if tensors were successfully marked (or were already marked), false otherwise.
|
||
|
|
//!
|
||
|
|
//! \see unmarkUnfusedTensorsAsDebugTensors(), markDebug(), IExecutionContext::setDebugListener()
|
||
|
|
//!
|
||
|
|
bool markUnfusedTensorsAsDebugTensors() noexcept
|
||
|
|
{
|
||
|
|
return mImpl->markUnfusedTensorsAsDebugTensors();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Undo the marking of unfused tensors as debug tensors.
|
||
|
|
//!
|
||
|
|
//! This has no effect on tensors marked by markDebug().
|
||
|
|
//!
|
||
|
|
//! \return True if tensor successfully unmarked (or was already unmarked), false otherwise.
|
||
|
|
//!
|
||
|
|
//! \see markUnfusedTensorsAsDebugTensors(), IExecutionContext::setDebugListener()
|
||
|
|
//!
|
||
|
|
bool unmarkUnfusedTensorsAsDebugTensors() noexcept
|
||
|
|
{
|
||
|
|
return mImpl->unmarkUnfusedTensorsAsDebugTensors();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add an activation layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//! \param type The type of activation function to apply.
|
||
|
|
//!
|
||
|
|
//! Note that the setAlpha() and setBeta() methods must be used on the
|
||
|
|
//! output for activations that require these parameters.
|
||
|
|
//!
|
||
|
|
//! \see IActivationLayer ActivationType
|
||
|
|
//!
|
||
|
|
//! \warning Int32 and Int64 are valid only for activation type kRELU.
|
||
|
|
//!
|
||
|
|
//! \return The new activation layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IActivationLayer* addActivation(ITensor& input, ActivationType type) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addActivation(input, type);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a LRN layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//! \param window The size of the window.
|
||
|
|
//! \param alpha The alpha value for the LRN computation.
|
||
|
|
//! \param beta The beta value for the LRN computation.
|
||
|
|
//! \param k The k value for the LRN computation.
|
||
|
|
//!
|
||
|
|
//! \see ILRNLayer
|
||
|
|
//! \warning Int32 tensors are not valid input tensors.
|
||
|
|
//!
|
||
|
|
//! \return The new LRN layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
ILRNLayer* addLRN(ITensor& input, int64_t window, float alpha, float beta, float k) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addLRN(input, window, alpha, beta, k);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a Scale layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//! This tensor must have at least 4 dimensions.
|
||
|
|
//! \param mode The scaling mode.
|
||
|
|
//! \param shift The shift value.
|
||
|
|
//! \param scale The scale value.
|
||
|
|
//! \param power The power value.
|
||
|
|
//!
|
||
|
|
//! If the weights are available, then the size of weights are dependent on the ScaleMode.
|
||
|
|
//! For ScaleMode::kUNIFORM, the number of weights equals 1.
|
||
|
|
//! For ScaleMode::kCHANNEL, the number of weights equals the channel dimension.
|
||
|
|
//! For ScaleMode::kELEMENTWISE, the number of weights equals the product of the last three dimensions of the input.
|
||
|
|
//!
|
||
|
|
//! \see addScaleNd
|
||
|
|
//! \see IScaleLayer
|
||
|
|
//! \warning Int32 tensors are not valid input tensors.
|
||
|
|
//!
|
||
|
|
//! \return The new Scale layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IScaleLayer* addScale(ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addScale(input, mode, shift, scale, power);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a SoftMax layer to the network.
|
||
|
|
//!
|
||
|
|
//! \see ISoftMaxLayer
|
||
|
|
//! \warning Int32 tensors are not valid input tensors.
|
||
|
|
//!
|
||
|
|
//! \return The new SoftMax layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
ISoftMaxLayer* addSoftMax(ITensor& input) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addSoftMax(input);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a concatenation layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param inputs The input tensors to the layer.
|
||
|
|
//! \param nbInputs The number of input tensors.
|
||
|
|
//!
|
||
|
|
//! \see IConcatenationLayer
|
||
|
|
//!
|
||
|
|
//! \return The new concatenation layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
//! \warning All tensors must have the same dimensions except along the concatenation axis.
|
||
|
|
//!
|
||
|
|
IConcatenationLayer* addConcatenation(ITensor* const* inputs, int32_t nbInputs) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addConcatenation(inputs, nbInputs);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add an elementwise layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input1 The first input tensor to the layer.
|
||
|
|
//! \param input2 The second input tensor to the layer.
|
||
|
|
//! \param op The binary operation that the layer applies.
|
||
|
|
//!
|
||
|
|
//! The input tensors must have the same rank and compatible type.
|
||
|
|
//! Two types are compatible if they are the same type or are both in the set {kFLOAT, kHALF}.
|
||
|
|
//! For each dimension, their lengths must match, or one of them must be one.
|
||
|
|
//! In the latter case, the tensor is broadcast along that axis.
|
||
|
|
//!
|
||
|
|
//! The output tensor has the same rank as the inputs.
|
||
|
|
//! For each dimension, its length is the maximum of the lengths of the
|
||
|
|
//! corresponding input dimension.
|
||
|
|
//!
|
||
|
|
//! The inputs are shape tensors if the output is a shape tensor.
|
||
|
|
//!
|
||
|
|
//! \see IElementWiseLayer
|
||
|
|
//!
|
||
|
|
//! \return The new elementwise layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IElementWiseLayer* addElementWise(ITensor& input1, ITensor& input2, ElementWiseOperation op) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addElementWise(input1, input2, op);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a unary layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//! \param operation The operation to apply.
|
||
|
|
//!
|
||
|
|
//! \see IUnaryLayer
|
||
|
|
//!
|
||
|
|
//! Generally the input must have a floating-point type (or kINT8 as a quantized float),
|
||
|
|
//! except for the following operations:
|
||
|
|
//! * kSIGN accepts a floating-point or Int32 tensor.
|
||
|
|
//! * kNOT requires a Bool tensor.
|
||
|
|
//!
|
||
|
|
//! The input is a shape tensor if the output is a shape tensor.
|
||
|
|
//!
|
||
|
|
//! \return The new unary layer, or nullptr if it could not be created
|
||
|
|
//!
|
||
|
|
IUnaryLayer* addUnary(ITensor& input, UnaryOperation operation) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addUnary(input, operation);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a shuffle layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//!
|
||
|
|
//! \see IShuffleLayer
|
||
|
|
//!
|
||
|
|
//! \return The new shuffle layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IShuffleLayer* addShuffle(ITensor& input) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addShuffle(input);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a OneHot layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param indices - tensor containing indices where on_value should be set.
|
||
|
|
//! \param values - a 2-element tensor, consisting of [off_value, on_value].
|
||
|
|
//! \param depth - a shape tensor containing the width of the added one-hot dimension.
|
||
|
|
//! \param axis - the axis to add the one-hot encoding to.
|
||
|
|
//!
|
||
|
|
//! \see IOneHotLayer
|
||
|
|
//!
|
||
|
|
//! \return The new OneHot layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IOneHotLayer* addOneHot(ITensor& indices, ITensor& values, ITensor& depth, int32_t axis) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addOneHot(indices, values, depth, axis);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the number of layers in the network.
|
||
|
|
//!
|
||
|
|
//! \return The number of layers in the network.
|
||
|
|
//!
|
||
|
|
//! \see getLayer()
|
||
|
|
//!
|
||
|
|
int32_t getNbLayers() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getNbLayers();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the layer specified by the given index.
|
||
|
|
//!
|
||
|
|
//! \param index The index of the layer.
|
||
|
|
//!
|
||
|
|
//! \return The layer, or nullptr if the index is out of range.
|
||
|
|
//!
|
||
|
|
//! \see getNbLayers()
|
||
|
|
//!
|
||
|
|
ILayer* getLayer(int32_t index) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getLayer(index);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the number of inputs in the network.
|
||
|
|
//!
|
||
|
|
//! \return The number of inputs in the network.
|
||
|
|
//!
|
||
|
|
//! \see getInput()
|
||
|
|
//!
|
||
|
|
int32_t getNbInputs() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getNbInputs();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the input tensor specified by the given index.
|
||
|
|
//!
|
||
|
|
//! \param index The index of the input tensor.
|
||
|
|
//!
|
||
|
|
//! \return The input tensor, or nullptr if the index is out of range.
|
||
|
|
//!
|
||
|
|
//! \note adding inputs invalidates indexing here
|
||
|
|
//!
|
||
|
|
//! \see getNbInputs()
|
||
|
|
//!
|
||
|
|
ITensor* getInput(int32_t index) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getInput(index);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the number of outputs in the network.
|
||
|
|
//!
|
||
|
|
//! The outputs include those marked by markOutput or markOutputForShapes.
|
||
|
|
//!
|
||
|
|
//! \return The number of outputs in the network.
|
||
|
|
//!
|
||
|
|
//! \see getOutput()
|
||
|
|
//!
|
||
|
|
int32_t getNbOutputs() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getNbOutputs();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the output tensor specified by the given index.
|
||
|
|
//!
|
||
|
|
//! \param index The index of the output tensor.
|
||
|
|
//!
|
||
|
|
//! \return The output tensor, or nullptr if the index is out of range.
|
||
|
|
//!
|
||
|
|
//! \note adding inputs invalidates indexing here
|
||
|
|
//!
|
||
|
|
//! \see getNbOutputs()
|
||
|
|
//!
|
||
|
|
ITensor* getOutput(int32_t index) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getOutput(index);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a reduce layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//! \param operation The reduction operation to perform.
|
||
|
|
//! \param reduceAxes The reduction dimensions.
|
||
|
|
//! The bit in position i of bitmask reduceAxes corresponds to explicit dimension i if result.
|
||
|
|
//! E.g., the least significant bit corresponds to the first explicit dimension and the next to least
|
||
|
|
//! significant bit corresponds to the second explicit dimension.
|
||
|
|
//! \param keepDimensions The boolean that specifies whether or not to keep the reduced dimensions in the
|
||
|
|
//! output of the layer.
|
||
|
|
//!
|
||
|
|
//! The reduce layer works by performing an operation specified by \p operation to reduce the tensor \p input
|
||
|
|
//! across the axes specified by \p reduceAxes.
|
||
|
|
//!
|
||
|
|
//! \see IReduceLayer
|
||
|
|
//!
|
||
|
|
//! \warning If output is an Int32 or Int64 shape tensor, ReduceOperation::kAVG is unsupported.
|
||
|
|
//!
|
||
|
|
//! \return The new reduce layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IReduceLayer* addReduce(
|
||
|
|
ITensor& input, ReduceOperation operation, uint32_t reduceAxes, bool keepDimensions) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addReduce(input, operation, reduceAxes, keepDimensions);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a TopK layer to the network.
|
||
|
|
//!
|
||
|
|
//! The TopK layer has two outputs of the same dimensions. The first contains data values,
|
||
|
|
//! the second contains index positions for the values. Output values are sorted, largest first
|
||
|
|
//! for operation kMAX and smallest first for operation kMIN.
|
||
|
|
//!
|
||
|
|
//! Currently only values of K up to 3840 are supported.
|
||
|
|
//!
|
||
|
|
//! The default indices tensor (the second output) data type is DataType::kINT32.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//!
|
||
|
|
//! \param op Operation to perform.
|
||
|
|
//!
|
||
|
|
//! \param k The number of elements to keep. For dynamic k, use the setInput() method to pass in k as a tensor
|
||
|
|
//! instead, which will override the static k value passed here in calculations.
|
||
|
|
//!
|
||
|
|
//! \param reduceAxes The reduction dimensions.
|
||
|
|
//! The bit in position i of bitmask reduceAxes corresponds to explicit dimension i of the result.
|
||
|
|
//! E.g., the least significant bit corresponds to the first explicit dimension and the next to least
|
||
|
|
//! significant bit corresponds to the second explicit dimension. Currently reduceAxes must specify
|
||
|
|
//! exactly one dimension, and it must be one of the last four dimensions.
|
||
|
|
//!
|
||
|
|
//! \see ITopKLayer
|
||
|
|
//!
|
||
|
|
//! \return The new TopK layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.14. Superseded by five-argument addTopK.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED ITopKLayer* addTopK(ITensor& input, TopKOperation op, int32_t k, uint32_t reduceAxes) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addTopK(input, op, k, reduceAxes);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a TopK layer to the network.
|
||
|
|
//!
|
||
|
|
//! The TopK layer has two outputs of the same dimensions. The first contains data values,
|
||
|
|
//! the second contains index positions for the values. Output values are sorted, largest first
|
||
|
|
//! for operation kMAX and smallest first for operation kMIN.
|
||
|
|
//!
|
||
|
|
//! Currently only values of K up to 3840 are supported.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//!
|
||
|
|
//! \param op Operation to perform.
|
||
|
|
//!
|
||
|
|
//! \param k The number of elements to keep. For dynamic k, use the setInput() method to pass in k as a tensor
|
||
|
|
//! instead, which will override the static k value passed here in calculations.
|
||
|
|
//!
|
||
|
|
//! \param reduceAxes The reduction dimensions.
|
||
|
|
//! The bit in position i of bitmask reduceAxes corresponds to explicit dimension i of the result.
|
||
|
|
//! E.g., the least significant bit corresponds to the first explicit dimension and the next to least
|
||
|
|
//! significant bit corresponds to the second explicit dimension. Currently reduceAxes must specify
|
||
|
|
//! exactly one dimension, and it must be one of the last four dimensions.
|
||
|
|
//!
|
||
|
|
//! \param indicesType Indices tensor (the second output) data type, must be DataType::kINT32 or DataType::kINT64.
|
||
|
|
//!
|
||
|
|
//! \see ITopKLayer
|
||
|
|
//!
|
||
|
|
//! \return The new TopK layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
ITopKLayer* addTopK(ITensor& input, TopKOperation op, int32_t k, uint32_t reduceAxes, DataType indicesType) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addTopKV2(input, op, k, reduceAxes, indicesType);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add gather with mode GatherMode::kDEFAULT and specified axis and nbElementWiseDims=0.
|
||
|
|
//!
|
||
|
|
//! \param data The tensor to gather values from.
|
||
|
|
//! \param indices The tensor to get indices from to populate the output tensor.
|
||
|
|
//! \param axis The axis in the data tensor to gather on.
|
||
|
|
//!
|
||
|
|
//! \see IGatherLayer
|
||
|
|
//!
|
||
|
|
//! \return The new gather layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IGatherLayer* addGather(ITensor& data, ITensor& indices, int32_t axis) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addGather(data, indices, axis);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add gather with specified mode, axis=0 and nbElementWiseDims=0.
|
||
|
|
//!
|
||
|
|
//! \param data The tensor to gather values from.
|
||
|
|
//! \param indices The tensor to get indices from to populate the output tensor.
|
||
|
|
//! \param mode The gather mode.
|
||
|
|
//!
|
||
|
|
//! \see IGatherLayer
|
||
|
|
//!
|
||
|
|
//! \return The new gather layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IGatherLayer* addGatherV2(ITensor& data, ITensor& indices, GatherMode mode) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addGatherV2(data, indices, mode);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a RaggedSoftMax layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The ZxS input tensor.
|
||
|
|
//! \param bounds The Zx1 bounds tensor.
|
||
|
|
//!
|
||
|
|
//! \see IRaggedSoftMaxLayer
|
||
|
|
//!
|
||
|
|
//! \warning The bounds tensor cannot have the last dimension be the wildcard character.
|
||
|
|
//! \warning Int32 tensors are not valid input tensors.
|
||
|
|
//! \warning The input and bounds tensors should be 3D tensors.
|
||
|
|
//!
|
||
|
|
//! \return The new RaggedSoftMax layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IRaggedSoftMaxLayer* addRaggedSoftMax(ITensor& input, ITensor& bounds) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addRaggedSoftMax(input, bounds);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a MatrixMultiply layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input0 The first input tensor (commonly A).
|
||
|
|
//! \param op0 The operation to apply to input0.
|
||
|
|
//! \param input1 The second input tensor (commonly B).
|
||
|
|
//! \param op1 The operation to apply to input1.
|
||
|
|
//!
|
||
|
|
//! The inputs are shape tensors if the output is a shape tensor.
|
||
|
|
//!
|
||
|
|
//! \see IMatrixMultiplyLayer
|
||
|
|
//!
|
||
|
|
//! \warning Int32 tensors are not valid input tensors.
|
||
|
|
//!
|
||
|
|
//! \return The new matrix multiply layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IMatrixMultiplyLayer* addMatrixMultiply(
|
||
|
|
ITensor& input0, MatrixOperation op0, ITensor& input1, MatrixOperation op1) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addMatrixMultiply(input0, op0, input1, op1);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a nonzero layer to the network.
|
||
|
|
//!
|
||
|
|
//! The default indices tensor (the first output) data type is DataType::kINT32.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//!
|
||
|
|
//! \see INonZeroLayer
|
||
|
|
//!
|
||
|
|
//! \return The new nonzero layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.14. Superseded by two-argument addNonZero.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED INonZeroLayer* addNonZero(ITensor& input) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addNonZero(input);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a nonzero layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//!
|
||
|
|
//! \param indicesType Indices tensor (the first output) data type, must be DataType::kINT32 or DataType::kINT64.
|
||
|
|
//!
|
||
|
|
//! \see INonZeroLayer
|
||
|
|
//!
|
||
|
|
//! \return The new nonzero layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
INonZeroLayer* addNonZero(ITensor& input, DataType indicesType) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addNonZeroV2(input, indicesType);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a constant layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param dimensions The dimensions of the constant.
|
||
|
|
//! \param weights The constant value, represented as weights.
|
||
|
|
//!
|
||
|
|
//! \see IConstantLayer
|
||
|
|
//!
|
||
|
|
//! \return The new constant layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
//! If weights.type is DataType::kINT32, the output is a tensor of 32-bit indices.
|
||
|
|
//! Otherwise the output is a tensor of real values and the output type will be
|
||
|
|
//! follow TensorRT's normal precision rules.
|
||
|
|
//!
|
||
|
|
//! If a wildcard dimension is used, the volume of the runtime dimensions must equal
|
||
|
|
//! the number of weights specified.
|
||
|
|
//!
|
||
|
|
//! \warning DataType::kUINT8 not supported.
|
||
|
|
//!
|
||
|
|
IConstantLayer* addConstant(Dims const& dimensions, Weights weights) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addConstant(dimensions, weights);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add an identity layer.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//!
|
||
|
|
//! \see IIdentityLayer
|
||
|
|
//!
|
||
|
|
//! \return The new identity layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IIdentityLayer* addIdentity(ITensor& input) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addIdentity(input);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a cast layer.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//! \param toType The DataType of the output tensor
|
||
|
|
//!
|
||
|
|
//! \see ICastLayer
|
||
|
|
//!
|
||
|
|
//! \return The new cast layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
ICastLayer* addCast(ITensor& input, DataType toType) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addCast(input, toType);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief remove a tensor from the network definition.
|
||
|
|
//!
|
||
|
|
//! \param tensor the tensor to remove
|
||
|
|
//!
|
||
|
|
//! It is illegal to remove a tensor that is the input or output of a layer.
|
||
|
|
//! if this method is called with such a tensor, a warning will be emitted on the log
|
||
|
|
//! and the call will be ignored. Its intended use is to remove detached tensors after
|
||
|
|
//! e.g. concatenating two networks with Layer::setInput().
|
||
|
|
//!
|
||
|
|
void removeTensor(ITensor& tensor) noexcept
|
||
|
|
{
|
||
|
|
mImpl->removeTensor(tensor);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief unmark a tensor as a network output.
|
||
|
|
//!
|
||
|
|
//! \param tensor The tensor to unmark as an output tensor.
|
||
|
|
//!
|
||
|
|
//! see markOutput()
|
||
|
|
//!
|
||
|
|
void unmarkOutput(ITensor& tensor) noexcept
|
||
|
|
{
|
||
|
|
mImpl->unmarkOutput(tensor);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a plugin layer to the network using the IPluginV2 interface.
|
||
|
|
//!
|
||
|
|
//! \param inputs The input tensors to the layer.
|
||
|
|
//! \param nbInputs The number of input tensors.
|
||
|
|
//! \param plugin The layer plugin.
|
||
|
|
//!
|
||
|
|
//! \see IPluginV2Layer
|
||
|
|
//!
|
||
|
|
//! \warning Dimension wildcard are only supported with IPluginV2DynamicExt or IPluginV2IOExt plugins.
|
||
|
|
//! \warning Int32 tensors are not valid input tensors.
|
||
|
|
//!
|
||
|
|
//! \return The new plugin layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.8. Superseded by addPluginV3.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED IPluginV2Layer* addPluginV2(ITensor* const* inputs, int32_t nbInputs, IPluginV2& plugin) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addPluginV2(inputs, nbInputs, plugin);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a plugin layer implementing the IPluginV3 interface to the network.
|
||
|
|
//!
|
||
|
|
//! \param inputs The input tensors to the layer.
|
||
|
|
//! \param nbInputs The number of input tensors.
|
||
|
|
//! \param shapeInputs Shape tensor inputs to the layer.
|
||
|
|
//! \param nbShapeInputs The number of shape tensor inputs.
|
||
|
|
//! \param plugin The layer plugin.
|
||
|
|
//!
|
||
|
|
//! \see IPluginV3Layer
|
||
|
|
//!
|
||
|
|
//! \return The new plugin layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IPluginV3Layer* addPluginV3(ITensor* const* inputs, int32_t nbInputs, ITensor* const* shapeInputs,
|
||
|
|
int32_t nbShapeInputs, IPluginV3& plugin) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addPluginV3(inputs, nbInputs, shapeInputs, nbShapeInputs, plugin);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a slice layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//! \param start The start offset
|
||
|
|
//! \param size The output dimension
|
||
|
|
//! \param stride The slicing stride
|
||
|
|
//!
|
||
|
|
//! Positive, negative, zero stride values, and combinations of them in different dimensions are allowed.
|
||
|
|
//!
|
||
|
|
//! \see ISliceLayer
|
||
|
|
//!
|
||
|
|
//! \return The new slice layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
ISliceLayer* addSlice(ITensor& input, Dims const& start, Dims const& size, Dims const& stride) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addSlice(input, start, size, stride);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Sets the name of the network.
|
||
|
|
//!
|
||
|
|
//! \param name The name to assign to this network.
|
||
|
|
//!
|
||
|
|
//! Set the name of the network so that it can be associated with a built
|
||
|
|
//! engine. The \p name must be a null-terminated C-style string.
|
||
|
|
//! TensorRT makes no use of this string except storing it as part of the engine
|
||
|
|
//! so that it may be retrieved at runtime.
|
||
|
|
//! A name unique to the builder will be generated by default.
|
||
|
|
//!
|
||
|
|
//! This method copies the name string.
|
||
|
|
//!
|
||
|
|
//! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
|
||
|
|
//!
|
||
|
|
//! \see INetworkDefinition::getName(), ISafeCudaEngine::getName()
|
||
|
|
//!
|
||
|
|
//! \return none
|
||
|
|
//!
|
||
|
|
void setName(char const* name) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setName(name);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Returns the name associated with the network.
|
||
|
|
//!
|
||
|
|
//! The memory pointed to by getName() is owned by the INetworkDefinition object.
|
||
|
|
//!
|
||
|
|
//! \see INetworkDefinition::setName()
|
||
|
|
//!
|
||
|
|
//! \return A null-terminated C-style string representing the name of the network.
|
||
|
|
//!
|
||
|
|
char const* getName() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getName();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a shape layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//!
|
||
|
|
//! \see IShapeLayer
|
||
|
|
//!
|
||
|
|
//! \warning addShape is only supported when hasImplicitBatchDimensions is false.
|
||
|
|
//!
|
||
|
|
//! \return The new shape layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IShapeLayer* addShape(ITensor& input) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addShape(input);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Query whether the network was created with an implicit batch dimension.
|
||
|
|
//!
|
||
|
|
//! \return Always false since TensorRT 10.0 does not support an implicit batch dimension.
|
||
|
|
//!
|
||
|
|
//! \see createNetworkV2
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.0. Implicit batch is not supported since TensorRT 10.0.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED bool hasImplicitBatchDimension() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->hasImplicitBatchDimension();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the network definition creation flags for this network definition object. Defaults to 0.
|
||
|
|
//!
|
||
|
|
//! \return The network definition creation options as a bitmask.
|
||
|
|
//!
|
||
|
|
NetworkDefinitionCreationFlags getFlags() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getFlags();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Returns true if the network definition creation flag is set
|
||
|
|
//!
|
||
|
|
//! \see getFlags()
|
||
|
|
//!
|
||
|
|
//! \return True if flag is set, false if unset.
|
||
|
|
//!
|
||
|
|
bool getFlag(NetworkDefinitionCreationFlag networkDefinitionCreationFlag) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getFlag(networkDefinitionCreationFlag);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Enable tensor's value to be computed by IExecutionContext::getShapeBinding.
|
||
|
|
//!
|
||
|
|
//! \return True if successful, false if tensor is already marked as an output.
|
||
|
|
//!
|
||
|
|
//! The tensor must be of type DataType::kINT32 and have no more than one dimension.
|
||
|
|
//!
|
||
|
|
//! \warning The tensor must have dimensions that can be determined to be constants at build time.
|
||
|
|
//!
|
||
|
|
//! \warning It is an error to mark a network input as a shape output.
|
||
|
|
//!
|
||
|
|
//!
|
||
|
|
bool markOutputForShapes(ITensor& tensor) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->markOutputForShapes(tensor);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Undo markOutputForShapes.
|
||
|
|
//!
|
||
|
|
//! \warning inputs to addShape cannot contain wildcard dimension values.
|
||
|
|
//!
|
||
|
|
//! \return True if successful, false if tensor is not marked as an output.
|
||
|
|
//!
|
||
|
|
bool unmarkOutputForShapes(ITensor& tensor) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->unmarkOutputForShapes(tensor);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a parametric ReLU layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//! \param slope The slope tensor to the layer. This tensor should be unidirectionally broadcastable
|
||
|
|
//! to the input tensor.
|
||
|
|
//!
|
||
|
|
//! \see IParametricReLULayer
|
||
|
|
//!
|
||
|
|
//! \warning Tensors of type Int32, Int64, Bool, or UInt8 are not allowed as inputs.
|
||
|
|
//!
|
||
|
|
//! \return The new parametric ReLU layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IParametricReLULayer* addParametricReLU(ITensor& input, ITensor& slope) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addParametricReLU(input, slope);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a multi-dimension convolution layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the convolution.
|
||
|
|
//! \param nbOutputMaps The number of output feature maps for the convolution.
|
||
|
|
//! \param kernelSize The multi-dimensions of the convolution kernel.
|
||
|
|
//! \param kernelWeights The kernel weights for the convolution.
|
||
|
|
//! \param biasWeights The bias weights for the convolution. Weights{} represents no bias.
|
||
|
|
//!
|
||
|
|
//! \see IConvolutionLayer
|
||
|
|
//!
|
||
|
|
//! \warning It is an error to specify a wildcard value for the 'C' dimension of the input tensor.
|
||
|
|
//! \warning Int32 tensors are not valid input tensors.
|
||
|
|
//! \warning Only 2D or 3D convolution is supported.
|
||
|
|
//!
|
||
|
|
//! \return The new convolution layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IConvolutionLayer* addConvolutionNd(
|
||
|
|
ITensor& input, int64_t nbOutputMaps, Dims const& kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addConvolutionNd(input, nbOutputMaps, kernelSize, kernelWeights, biasWeights);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a multi-dimension pooling layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//! \param type The type of pooling to apply.
|
||
|
|
//! \param windowSize The size of the pooling window.
|
||
|
|
//!
|
||
|
|
//! \see IPoolingLayer PoolingType
|
||
|
|
//!
|
||
|
|
//! \warning Int32 tensors are not valid input tensors.
|
||
|
|
//! \warning Only 2D or 3D pooling is supported.
|
||
|
|
//!
|
||
|
|
//! \return The new pooling layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IPoolingLayer* addPoolingNd(ITensor& input, PoolingType type, Dims const& windowSize) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addPoolingNd(input, type, windowSize);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a multi-dimension deconvolution layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//! \param nbOutputMaps The number of output feature maps.
|
||
|
|
//! \param kernelSize The multi-dimensions of the deconvolution kernel.
|
||
|
|
//! \param kernelWeights The kernel weights for the deconvolution.
|
||
|
|
//! \param biasWeights The bias weights for the deconvolution. Weights{} represents no bias.
|
||
|
|
//!
|
||
|
|
//! \see IDeconvolutionLayer
|
||
|
|
//!
|
||
|
|
//! \warning It is an error to specify a wildcard value for the 'C' dimension of the input tensor.
|
||
|
|
//! \warning Int32 tensors are not valid input tensors.
|
||
|
|
//! \warning Only 2D or 3D deconvolution is supported.
|
||
|
|
//
|
||
|
|
//! \return The new deconvolution layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IDeconvolutionLayer* addDeconvolutionNd(
|
||
|
|
ITensor& input, int64_t nbOutputMaps, Dims kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addDeconvolutionNd(input, nbOutputMaps, kernelSize, kernelWeights, biasWeights);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a multi-dimension scale layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//! \param mode The scaling mode.
|
||
|
|
//! \param shift The shift value.
|
||
|
|
//! \param scale The scale value.
|
||
|
|
//! \param power The power value.
|
||
|
|
//! \param channelAxis The channel axis.
|
||
|
|
//!
|
||
|
|
//! If the weights are available, then the size of weights are dependent on the ScaleMode.
|
||
|
|
//! For ScaleMode::kUNIFORM, the number of weights equals 1.
|
||
|
|
//! For ScaleMode::kCHANNEL, the number of weights equals the channel dimension.
|
||
|
|
//! For ScaleMode::kELEMENTWISE, the number of weights equals the product of all input dimensions at channelAxis and
|
||
|
|
//! beyond.
|
||
|
|
//!
|
||
|
|
//! For example, if the inputs dimensions are [A,B,C,D,E,F], and channelAxis=2:
|
||
|
|
//! For ScaleMode::kUNIFORM, the number of weights is equal to 1.
|
||
|
|
//! For ScaleMode::kCHANNEL, the number of weights is C.
|
||
|
|
//! For ScaleMode::kELEMENTWISE, the number of weights is C*D*E*F.
|
||
|
|
//!
|
||
|
|
//! channelAxis can also be set explicitly using setChannelAxis().
|
||
|
|
//!
|
||
|
|
//! \see IScaleLayer
|
||
|
|
//! \see setChannelAxis()
|
||
|
|
//!
|
||
|
|
//! \warning Int32 tensors are not valid input tensors.
|
||
|
|
//! \warning Only 2D or 3D scale is supported.
|
||
|
|
//!
|
||
|
|
//! \return The new Scale layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IScaleLayer* addScaleNd(
|
||
|
|
ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power, int32_t channelAxis) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addScaleNd(input, mode, shift, scale, power, channelAxis);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a resize layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//!
|
||
|
|
//! \see IResizeLayer
|
||
|
|
//!
|
||
|
|
//! \warning Int32 tensors are not valid input tensors.
|
||
|
|
//!
|
||
|
|
//! \return The new resize layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IResizeLayer* addResize(ITensor& input) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addResize(input);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a loop to the network.
|
||
|
|
//!
|
||
|
|
//! An ILoop provides a way to specify a recurrent subgraph.
|
||
|
|
//!
|
||
|
|
//! \return Pointer to ILoop that can be used to add loop-boundary layers for the loop.
|
||
|
|
//!
|
||
|
|
//! \see ILoop
|
||
|
|
//!
|
||
|
|
ILoop* addLoop() noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addLoop();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add an if-then-else to the network.
|
||
|
|
//!
|
||
|
|
//! An IIfConditional provides a way to conditionally execute parts of the network.
|
||
|
|
//!
|
||
|
|
//! \return Pointer to the IIfConditional that can be used to add conditional-boundary layers
|
||
|
|
//! for the if-then-else.
|
||
|
|
//!
|
||
|
|
//! \see IIfConditional
|
||
|
|
//!
|
||
|
|
IIfConditional* addIfConditional() noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addIfConditional();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a select layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param condition The condition tensor to the layer. Must have type DataType::kBOOL.
|
||
|
|
//! \param thenInput The "then" input tensor to the layer.
|
||
|
|
//! \param elseInput The "else" input tensor to the layer.
|
||
|
|
//!
|
||
|
|
//! All three input tensors must have the same rank, and along each axis
|
||
|
|
//! must have the same length or a length of one. If the length is one, the tensor
|
||
|
|
//! is broadcast along that axis. The output tensor has the dimensions of the inputs AFTER
|
||
|
|
//! the broadcast rule is applied. For example, given:
|
||
|
|
//!
|
||
|
|
//! dimensions of condition: [1,1,5,9]
|
||
|
|
//! dimensions of thenInput: [1,1,5,9]
|
||
|
|
//! dimensions of elseInput: [1,3,1,9]
|
||
|
|
//!
|
||
|
|
//! the output dimensions are [1,3,5,9], and the output contents are defined by:
|
||
|
|
//!
|
||
|
|
//! output[0,i,j,k] = condition[0,0,j,k] ? thenInput[0,0,j,k] : elseInput[0,i,0,k]
|
||
|
|
//!
|
||
|
|
//! The output dimensions are not necessarily the max of the input dimensions if any input
|
||
|
|
//! is an empty tensor. For example, if in the preceding example, 5 is changed to 0:
|
||
|
|
//!
|
||
|
|
//! dimensions of condition: [1,1,0,9]
|
||
|
|
//! dimensions of thenInput: [1,1,0,9]
|
||
|
|
//! dimensions of elseInput: [1,3,1,9]
|
||
|
|
//!
|
||
|
|
//! then the output dimensions are [1,3,0,9].
|
||
|
|
//!
|
||
|
|
//! The inputs are shape tensors if the output is a shape tensor.
|
||
|
|
//!
|
||
|
|
//! \see ISelectLayer
|
||
|
|
//!
|
||
|
|
//! \return The new select layer, or nullptr if it could not be created.
|
||
|
|
ISelectLayer* addSelect(ITensor& condition, ITensor& thenInput, ITensor& elseInput) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addSelect(condition, thenInput, elseInput);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add an assertion layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param condition The input tensor to the layer.
|
||
|
|
//! \param message A message to print if the assertion fails.
|
||
|
|
//!
|
||
|
|
//! \see IAssertionLayer
|
||
|
|
//!
|
||
|
|
//! \return The new assertion layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
//! The input tensor must be a boolean shape tensor.
|
||
|
|
//!
|
||
|
|
IAssertionLayer* addAssertion(ITensor& condition, char const* message) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addAssertion(condition, message);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a fill layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param dimensions The output tensor dimensions if input 0 is missing.
|
||
|
|
//! \param op The fill operation that the layer applies.
|
||
|
|
//!
|
||
|
|
//! \warning For FillOperation::kLINSPACE, dimensions.nbDims must be 1 for static start/delta. If delta is provided
|
||
|
|
//! as a 1D tensor, the length of delta must match dimensions.nbDims.
|
||
|
|
//!
|
||
|
|
//! This layer is non-deterministic across subsequent calls as the same inputs will produce different
|
||
|
|
//! output tensors if \p op is either FillOperation::kRANDOM_UNIFORM or FillOperation::kRANDOM_NORMAL
|
||
|
|
//! due to random state being shared across calls. The output tensors generated are determinstic when
|
||
|
|
//! starting from the same initial state.
|
||
|
|
//!
|
||
|
|
//! \see IFillLayer
|
||
|
|
//!
|
||
|
|
//! \return The new fill layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 9.0. Superseded by three-argument addFill.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED IFillLayer* addFill(Dims const& dimensions, FillOperation op) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addFill(dimensions, op);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a fill layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param dimensions The output tensor dimensions if input 0 is missing.
|
||
|
|
//! \param op The fill operation that the layer applies.
|
||
|
|
//! \param outputType Optional output tensor data type, must be DataType::kFLOAT, DataType::kHALF, DataType::kINT32,
|
||
|
|
//! or DataType::kINT64. This parameter is only used for static alpha/beta. Future calls to set output type using
|
||
|
|
//! setToType or setOutputType must be consistent.
|
||
|
|
//!
|
||
|
|
//! \warning For FillOperation::kLINSPACE, dimensions.nbDims must be 1 for static start/delta. If delta is provided
|
||
|
|
//! as a 1D tensor, the length of delta must match dimensions.nbDims.
|
||
|
|
//!
|
||
|
|
//! This layer is non-deterministic across subsequent calls as the same inputs will produce different
|
||
|
|
//! output tensors if \p op is either FillOperation::kRANDOM_UNIFORM or FillOperation::kRANDOM_NORMAL
|
||
|
|
//! due to random state being shared across calls. The output tensors generated are deterministic when
|
||
|
|
//! starting from the same initial state.
|
||
|
|
//!
|
||
|
|
//! \see IFillLayer
|
||
|
|
//!
|
||
|
|
//! \return The new fill layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IFillLayer* addFill(Dims const& dimensions, FillOperation op, DataType outputType) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addFillV2(dimensions, op, outputType);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a padding layer to the network. Only 2D padding is currently supported.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//! \param prePadding The padding to apply to the start of the tensor.
|
||
|
|
//! \param postPadding The padding to apply to the end of the tensor.
|
||
|
|
//!
|
||
|
|
//! \see IPaddingLayer
|
||
|
|
//!
|
||
|
|
//! \return The new padding layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IPaddingLayer* addPaddingNd(ITensor& input, Dims const& prePadding, Dims const& postPadding) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addPaddingNd(input, prePadding, postPadding);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Associate a name with all current uses of the given weights.
|
||
|
|
//!
|
||
|
|
//! The name must be set after the Weights are used in the network.
|
||
|
|
//! Lookup is associative. The name applies to all Weights with matching
|
||
|
|
//! type, value pointer, and count. If Weights with a matching value
|
||
|
|
//! pointer, but different type or count exists in the network, an
|
||
|
|
//! error message is issued, the name is rejected, and return false.
|
||
|
|
//! If the name has already been used for other weights,
|
||
|
|
//! return false. A nullptr causes the weights to become unnamed,
|
||
|
|
//! i.e. clears any previous name.
|
||
|
|
//!
|
||
|
|
//! \param weights The weights to be named.
|
||
|
|
//! \param name The name to associate with the weights.
|
||
|
|
//!
|
||
|
|
//! \return true on success.
|
||
|
|
//!
|
||
|
|
//! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
|
||
|
|
//!
|
||
|
|
bool setWeightsName(Weights weights, char const* name) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setWeightsName(weights, name);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the ErrorRecorder for this interface
|
||
|
|
//!
|
||
|
|
//! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
|
||
|
|
//! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
|
||
|
|
//! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
|
||
|
|
//! a recorder has been registered.
|
||
|
|
//!
|
||
|
|
//! If an error recorder is not set, messages will be sent to the global log stream.
|
||
|
|
//!
|
||
|
|
//! \param recorder The error recorder to register with this interface.
|
||
|
|
//
|
||
|
|
//! \see getErrorRecorder()
|
||
|
|
//!
|
||
|
|
void setErrorRecorder(IErrorRecorder* recorder) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setErrorRecorder(recorder);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief get the ErrorRecorder assigned to this interface.
|
||
|
|
//!
|
||
|
|
//! Retrieves the assigned error recorder object for the given class.
|
||
|
|
//! A nullptr will be returned if setErrorRecorder has not been called.
|
||
|
|
//!
|
||
|
|
//! \return A pointer to the IErrorRecorder object that has been registered.
|
||
|
|
//!
|
||
|
|
//! \see setErrorRecorder()
|
||
|
|
//!
|
||
|
|
IErrorRecorder* getErrorRecorder() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getErrorRecorder();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a dequantization layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to be quantized.
|
||
|
|
//! \param scale A tensor with the scale value.
|
||
|
|
//!
|
||
|
|
//! \see IDequantizeLayer
|
||
|
|
//!
|
||
|
|
//! \p input tensor data type must be DataType::kINT8 or DataType::kFP8.
|
||
|
|
//! \p scale tensor data type must be DataType::kFLOAT. The subgraph which terminates with the \p scale tensor must
|
||
|
|
//! be a build-time constant.
|
||
|
|
//!
|
||
|
|
//! \return The new quantization layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 9.0. Superseded by three-argument addDequantize.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED IDequantizeLayer* addDequantize(ITensor& input, ITensor& scale) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addDequantize(input, scale);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a dequantization layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to be dequantized.
|
||
|
|
//! \param scale A tensor with the scale value.
|
||
|
|
//! \param outputType Output tensor data type.
|
||
|
|
//!
|
||
|
|
//! \see IDequantizeLayer
|
||
|
|
//!
|
||
|
|
//! \p input tensor data type must be DataType::kINT8, DataType::kFP8, DataType::kINT4 or DataType::kFP4.
|
||
|
|
//! \p scale tensor data type must be one of the following: DataType::kFLOAT (default), DataType::kHALF,
|
||
|
|
//! DataType::kBF16 or DataType::kE8M0 (for MXFP8 quantization).
|
||
|
|
//! \p outputType output tensor data type must be DataType::kFLOAT (default), DataType::kHALF or DataType::kBF16.
|
||
|
|
//! Future calls to set output type using setToType or setOutputType must be consistent. For strongly typed
|
||
|
|
//! networks, if the scale type is DataType::kHALF or DataType::kBF16 the output type must match.
|
||
|
|
//!
|
||
|
|
//! \return The new quantization layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IDequantizeLayer* addDequantize(ITensor& input, ITensor& scale, DataType outputType) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addDequantizeV2(input, scale, outputType);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a Scatter layer to the network with specified mode and axis=0.
|
||
|
|
//!
|
||
|
|
//! \param data The input tensor to be updated with additional values.
|
||
|
|
//! \param indices indices of the elements to be updated.
|
||
|
|
//! \param updates values to be used for updates.
|
||
|
|
//! \param mode scatter mode.
|
||
|
|
//!
|
||
|
|
//! \see IScatterLayer
|
||
|
|
//!
|
||
|
|
//! \p indices tensor data type must be DataType::kINT32.
|
||
|
|
//! \p updates tensor data type must be the same as \p data
|
||
|
|
//!
|
||
|
|
//! \return The new Scatter layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IScatterLayer* addScatter(ITensor& data, ITensor& indices, ITensor& updates, ScatterMode mode) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addScatter(data, indices, updates, mode);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a quantization layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to be quantized.
|
||
|
|
//! \param scale A tensor with the scale value.
|
||
|
|
//!
|
||
|
|
//! \see IQuantizeLayer
|
||
|
|
//!
|
||
|
|
//! \p input tensor data type must be DataType::kFLOAT or DataType::kHALF.
|
||
|
|
//! \p scale tensor data type must be DataType::kFLOAT. The subgraph which terminates with the \p scale tensor must
|
||
|
|
//! be a build-time constant.
|
||
|
|
//!
|
||
|
|
//! \return The new quantization layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 9.0. Superseded by three-argument addQuantize.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED IQuantizeLayer* addQuantize(ITensor& input, ITensor& scale) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addQuantize(input, scale);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a quantization layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to be quantized.
|
||
|
|
//! \param scale A tensor with the scale value.
|
||
|
|
//! \param outputType Output tensor data type.
|
||
|
|
//!
|
||
|
|
//! \see IQuantizeLayer
|
||
|
|
//!
|
||
|
|
//! \p input tensor data type must be DataType::kFLOAT, DataType::kHALF or DataType::kBF16.
|
||
|
|
//! \p scale tensor data type must be one of the following: DataType::kFLOAT (default), DataType::kHALF,
|
||
|
|
//! DataType::kBF16 or DataType::kE8M0 (for MXFP8 quantization).
|
||
|
|
//! \p outputType output tensor data type must be DataType::kINT8 (default), DataType::kFP8, DataType::kINT4 or
|
||
|
|
//! DataType::kFP4.
|
||
|
|
//! Future calls to set output type using setToType or setOutputType must be consistent. For strongly typed
|
||
|
|
//! networks, if the scale type is DataType::kHALF or DataType::kBF16 the output type must match.
|
||
|
|
//!
|
||
|
|
//! \return The new quantization layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IQuantizeLayer* addQuantize(ITensor& input, ITensor& scale, DataType outputType) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addQuantizeV2(input, scale, outputType);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a dynamic quantization layer to the network.
|
||
|
|
//!
|
||
|
|
//! This layer performs dynamic block quantization of its input tensor and outputs the
|
||
|
|
//! quantized data and the computed block scale-factors.
|
||
|
|
//! The blocked axis dimension size must be divisible by the block size.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to be quantized. Its data type must be one of DataType::kFLOAT,
|
||
|
|
//! DataType::kHALF, or DataType::kBF16. Currently only 2D and 3D inputs are supported.
|
||
|
|
//! \param axis The axis that is sliced into blocks. The axis must be the last or second to last dimension.
|
||
|
|
//! \param blockSize The number of elements that are quantized using a shared scale factor.
|
||
|
|
//! Valid values are 16 (NVFP4 quantization) and 32 (MXFP8 quantization).
|
||
|
|
//! \param outputType The data type of the quantized output tensor, must be DataType::kFP4 (NVFP4 quantization) or
|
||
|
|
//! DataType::kFP8 (MXFP8 quantization). Future calls to set output type using setToType or setOutputType must be
|
||
|
|
//! consistent.
|
||
|
|
//! \param scaleType The data type of the scale factor used for quantizing the input data, must be DataType::kFP8
|
||
|
|
//! (NVFP4 quantization) or DataType::kE8M0 (MXFP8 quantization).
|
||
|
|
//!
|
||
|
|
//! \return The new dynamic quantization layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
//! \see IDynamicQuantizeLayer
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED IDynamicQuantizeLayer* addDynamicQuantize(
|
||
|
|
ITensor& input, int32_t axis, int32_t blockSize, DataType outputType, DataType scaleType) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addDynamicQuantize(input, axis, blockSize, outputType, scaleType);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a dynamic quantization layer to the network.
|
||
|
|
//!
|
||
|
|
//! This layer performs dynamic block quantization of its input tensor and outputs the
|
||
|
|
//! quantized data and the computed block scale factors.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to be quantized. Its data type must be one of DataType::kFLOAT,
|
||
|
|
//! DataType::kHALF, or DataType::kBF16.
|
||
|
|
//! \param blockShape Defines the block shape for the quantization. Must match the input tensor rank.
|
||
|
|
//! \param outputType The data type of the quantized output tensor, must be DataType::kFP4, DataType::kFP8 or
|
||
|
|
//! DataType::kINT8. Future calls to set output type using setToType or setOutputType must be consistent.
|
||
|
|
//! \param scaleType The data type of the scale factor used for quantizing the input data, must be DataType::kFP8,
|
||
|
|
//! DataType::kE8M0 or DataType::kFLOAT.
|
||
|
|
//!
|
||
|
|
//! \return The new dynamic quantization layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
//! \see IDynamicQuantizeLayer
|
||
|
|
//!
|
||
|
|
IDynamicQuantizeLayer* addDynamicQuantizeV2(
|
||
|
|
ITensor& input, Dims const& blockShape, DataType outputType, DataType scaleType) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addDynamicQuantizeV2(input, blockShape, outputType, scaleType);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add an Einsum layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param inputs The input tensors to the layer.
|
||
|
|
//! \param nbInputs The number of input tensors.
|
||
|
|
//! \param equation The equation of the layer
|
||
|
|
//! \see IEinsumLayer
|
||
|
|
//!
|
||
|
|
//! \return The new Einsum layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IEinsumLayer* addEinsum(ITensor* const* inputs, int32_t nbInputs, char const* equation) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addEinsum(inputs, nbInputs, equation);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a GridSample layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//! \param grid The grid tensor to the layer.
|
||
|
|
//!
|
||
|
|
//! \see IGridSampleLayer
|
||
|
|
//!
|
||
|
|
//! Creates a GridSample layer with a InterpolationMode::kLINEAR, unaligned corners,
|
||
|
|
//! and SampleMode::kFILL for 4d-shape input tensors.
|
||
|
|
//!
|
||
|
|
//! \return The new GridSample layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IGridSampleLayer* addGridSample(ITensor& input, ITensor& grid) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addGridSample(input, grid);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a non-maximum suppression layer to the network.
|
||
|
|
//!
|
||
|
|
//! The default indices tensor (the first output) data type is DataType::kINT32.
|
||
|
|
//!
|
||
|
|
//! \param boxes The input boxes tensor to the layer.
|
||
|
|
//!
|
||
|
|
//! \param scores The input scores tensor to the layer.
|
||
|
|
//!
|
||
|
|
//! \param maxOutputBoxesPerClass The input maxOutputBoxesPerClass tensor to the layer.
|
||
|
|
//!
|
||
|
|
//! \see INMSLayer
|
||
|
|
//!
|
||
|
|
//! \return The new NMS layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.14. Superseded by four-argument addNMS.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED INMSLayer* addNMS(ITensor& boxes, ITensor& scores, ITensor& maxOutputBoxesPerClass) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addNMS(boxes, scores, maxOutputBoxesPerClass);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a non-maximum suppression layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param boxes The input boxes tensor to the layer.
|
||
|
|
//!
|
||
|
|
//! \param scores The input scores tensor to the layer.
|
||
|
|
//!
|
||
|
|
//! \param maxOutputBoxesPerClass The input maxOutputBoxesPerClass tensor to the layer.
|
||
|
|
//!
|
||
|
|
//! \param indicesType Indices tensor (the first output) data type, must be DataType::kINT32 or DataType::kINT64.
|
||
|
|
//!
|
||
|
|
//! \see INMSLayer
|
||
|
|
//!
|
||
|
|
//! \return The new NMS layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
INMSLayer* addNMS(ITensor& boxes, ITensor& scores, ITensor& maxOutputBoxesPerClass, DataType indicesType) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addNMSV2(boxes, scores, maxOutputBoxesPerClass, indicesType);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a ReverseSequence layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer. Must have rank >= 2.
|
||
|
|
//!
|
||
|
|
//! \param sequenceLens 1D tensor specifying lengths of sequences to reverse in a batch. The length of the
|
||
|
|
//! sequenceLens tensor must be equal to the size of the dimension in input tensor specified by batchAxis.
|
||
|
|
//!
|
||
|
|
//! \see IReverseSequenceLayer
|
||
|
|
//!
|
||
|
|
//! \return The new ReverseSequence layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IReverseSequenceLayer* addReverseSequence(ITensor& input, ITensor& sequenceLens) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addReverseSequence(input, sequenceLens);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a normalization layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//! \param scale The scale tensor used to scale the normalized output.
|
||
|
|
//! \param bias The bias tensor used to scale the normalized output.
|
||
|
|
//! \param axesMask The axes on which to perform mean calculations.
|
||
|
|
//! The bit in position i of bitmask axesMask corresponds to explicit dimension i of the result.
|
||
|
|
//! E.g., the least significant bit corresponds to the first explicit dimension and the next to least
|
||
|
|
//! significant bit corresponds to the second explicit dimension.
|
||
|
|
//!
|
||
|
|
//! The normalization layer works by performing normalization of the tensor \p input on the specified \p axesMask.
|
||
|
|
//! The result is then scaled by multiplying with \p scale and adding \p bias.
|
||
|
|
//!
|
||
|
|
//! The shapes of \p scale and \p bias must be the same, and must have the same rank and be
|
||
|
|
//! unidirectionally broadcastable to the shape of \p input. Given a 4D NCHW input tensor, the expected shapes
|
||
|
|
//! for \p scale and \p bias are:
|
||
|
|
//! * [1, C, 1, 1] for InstanceNormalization
|
||
|
|
//! * [1, G, 1, 1] for GroupNormalization. Use addNormalizationV2() instead if [1, C, 1, 1] shapes for \p scale
|
||
|
|
//! and \p bias are required.
|
||
|
|
//!
|
||
|
|
//! \see INormalizationLayer
|
||
|
|
//!
|
||
|
|
//! \return The new normalization layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.15. Superseded by addNormalizationV2().
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED INormalizationLayer* addNormalization(ITensor& input, ITensor& scale, ITensor& bias, uint32_t axesMask) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addNormalization(input, scale, bias, axesMask);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a cumulative layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//! \param axis The axis tensor to apply the cumulative operation on. Currently, it must be a build-time constant 0D
|
||
|
|
//! shape tensor and must be in the range [-rank(input), rank(input)-1]. Negative value means counting dimensions
|
||
|
|
//! from the back. \param operation The reduction operation to perform. \param exclusive The boolean that specifies
|
||
|
|
//! whether it is an exclusive cumulative or inclusive cumulative. \param reverse The boolean that specifies whether
|
||
|
|
//! the cumulative operation should be applied backward.
|
||
|
|
//!
|
||
|
|
//! The cumulative layer works by performing the specified cumulative \p operation to the tensor \p input
|
||
|
|
//! on the axis specified by \p axis.
|
||
|
|
//!
|
||
|
|
//! \see ICumulativeLayer
|
||
|
|
//!
|
||
|
|
//! \return The new cumulative layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
ICumulativeLayer* addCumulative(ITensor& input, ITensor& axis, CumulativeOperation operation, bool exclusive, bool reverse) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addCumulative(input, axis, operation, exclusive, reverse);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add an attention to the network.
|
||
|
|
//!
|
||
|
|
//! \param query A 4d input query tensor to the layer.
|
||
|
|
//! \param key A 4d input key tensor to the layer.
|
||
|
|
//! \param value A 4d input value tensor to the layer.
|
||
|
|
//! \param normOp The normalization operation to perform.
|
||
|
|
//! \param causal Use causual inference or not.
|
||
|
|
//!
|
||
|
|
//! query must have shape [batchSize, numHeadsQuery, sequenceLengthQuery, dimHead].
|
||
|
|
//! key and value must have shape [batchSize, numHeadsKeyValue, sequenceLengthKeyValue, dimHead].
|
||
|
|
//! pastKey and pastValue must have shape [batchSize, numHeadsKeyValue, sequenceLengthKeyValue, dimHead].
|
||
|
|
//! normOp defaults to kSOFTMAX isCausal defaults to false.
|
||
|
|
//!
|
||
|
|
//! By default, IAttention is not decomposable and TensorRT will try to use a single fused kernel, which may be more
|
||
|
|
//! efficient than if the subgraph is expressed without IAttention. Setting the IAttention to decomposable=True can
|
||
|
|
//! allow IAttention to be to use multiple kernels if no fused kernel support found.
|
||
|
|
//!
|
||
|
|
//! \see IAttention
|
||
|
|
//!
|
||
|
|
//! \return The new attention, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IAttention* addAttention(
|
||
|
|
ITensor& query, ITensor& key, ITensor& value, AttentionNormalizationOp normOp, bool causal) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addAttention(query, key, value, normOp, causal);
|
||
|
|
}
|
||
|
|
|
||
|
|
//! \brief Add a Rotary Position Embedding (RoPE) layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input activation tensor to the layer. The shape must be (batchSize, numHeads, sequenceLength, headSize).
|
||
|
|
//! \param cosCache The cosine cache tensor for use in RoPE computation. See the following explanation for the shape requirement.
|
||
|
|
//! \param sinCache The sine cache tensor for use in RoPE computation. See the following explanation for the shape requirement.
|
||
|
|
//! \param interleaved Whether the \p input is in interleaved format, i.e., whether the 2-d vectors rotated are taken from adjacent 2 elements in the hidden dimension.
|
||
|
|
//! \param rotaryEmbeddingDim The hidden dimension that participates in RoPE.
|
||
|
|
//!
|
||
|
|
//! The RotaryEmbedding layer applies RoPE to the \p input, using \p cosCache and \p sinCache.
|
||
|
|
//! An optional input, positionIds, can be provided using setInput with index 3. If provided, it is used to index into \p cosCache and \p sinCache.
|
||
|
|
//!
|
||
|
|
//! If \p positionIds is not provided, \p cosCache and \p sinCache must have shape (batchSize, sequenceLength, headSize / 2) if \p rotaryEmbeddingDim is 0, or (batchSize, sequenceLength, rotaryEmbeddingDim / 2) otherwise.
|
||
|
|
//! If \p positionIds is provided, \p cosCache and \p sinCache must have shape (maxPositionId+1, headSize / 2) if \p rotaryEmbeddingDim is 0, or (maxPositionId+1, rotaryEmbeddingDim / 2) otherwise.
|
||
|
|
//! \p positionIds, if provided, must have shape (batchSize, sequenceLength).
|
||
|
|
//!
|
||
|
|
//! \see IRotaryEmbeddingLayer
|
||
|
|
//!
|
||
|
|
//! \return The new RotaryEmbedding layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IRotaryEmbeddingLayer* addRotaryEmbedding(ITensor& input, ITensor& cosCache, ITensor& sinCache, bool interleaved, int32_t rotaryEmbeddingDim) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addRotaryEmbedding(input, cosCache, sinCache, interleaved, rotaryEmbeddingDim);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a KVCacheUpdate layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param cache The key/value cache tensor for the layer. The user is responsible for properly allocating
|
||
|
|
//! and binding the tensor memory.
|
||
|
|
//! \param update The newly updated key/value tensor for the layer.
|
||
|
|
//! \param writeIndices The write indices tensor for key/value cache updates.
|
||
|
|
//! \param cacheMode The mode of the KVCacheUpdate layer. For TensorRT 10.15, only `kLINEAR` mode is supported.
|
||
|
|
//!
|
||
|
|
//! The expected tensor shapes are as follows:
|
||
|
|
//! - `cache`: [batchSize, numHeads, maxSequenceLength, headSize]
|
||
|
|
//! - `update`: [batchSize, numHeads, sequenceLength, headSize]
|
||
|
|
//! - `writeIndices`: [batchSize]
|
||
|
|
//!
|
||
|
|
//! The `cache` and `update` tensors must have the same data type, which can be DataType::kFLOAT,
|
||
|
|
//! DataType::kHALF, or DataType::kBF16. Quantized data types are not supported.
|
||
|
|
//! The `writeIndices` tensor must be DataType::kINT32 or DataType::kINT64.
|
||
|
|
//!
|
||
|
|
//! The layer performs in-place updates on the cache tensor. Therefore, the user must ensure that
|
||
|
|
//! the `cache` tensor and the corresponding output tensor share the same device memory address
|
||
|
|
//! before execution.
|
||
|
|
//!
|
||
|
|
//! \warning In `kLINEAR` mode, each update must satisfy the condition
|
||
|
|
//! `writeIndices[i] + sequenceLength <= maxSequenceLength`. Out-of-bound updates will be ignored silently.
|
||
|
|
//!
|
||
|
|
//! \see IKVCacheUpdateLayer
|
||
|
|
//!
|
||
|
|
//! \return The new KVCacheUpdate layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
IKVCacheUpdateLayer* addKVCacheUpdate(
|
||
|
|
ITensor& cache, ITensor& update, ITensor& writeIndices, KVCacheMode cacheMode) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addKVCacheUpdate(cache, update, writeIndices, cacheMode);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return the builder from which this INetworkDefinition was created.
|
||
|
|
//!
|
||
|
|
//! \see IBuilder::createNetworkV2
|
||
|
|
//!
|
||
|
|
//! \return the builder
|
||
|
|
virtual IBuilder& getBuilder() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getBuilder();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Mark weights as refittable when the builder flag kREFIT_INDIVIDUAL is set.
|
||
|
|
//!
|
||
|
|
//! \param name The name of the weights.
|
||
|
|
//!
|
||
|
|
//! \return True if the weights were successfully marked as refittable, false if the weights do not exist or cannot
|
||
|
|
//! be refitted.
|
||
|
|
//!
|
||
|
|
bool markWeightsRefittable(char const* name) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->markWeightsRefittable(name);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Unmark weights as refittable when the builder flag kREFIT_INDIVIDUAL is set.
|
||
|
|
//!
|
||
|
|
//! \param name The name of the weights.
|
||
|
|
//!
|
||
|
|
//! \return True if the weights were successfully marked as unrefittable, false if the weights do not exist.
|
||
|
|
//!
|
||
|
|
bool unmarkWeightsRefittable(char const* name) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->unmarkWeightsRefittable(name);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Whether the weight has been marked as refittable.
|
||
|
|
//!
|
||
|
|
//! \param name The name of the weights to check.
|
||
|
|
//!
|
||
|
|
//! \return True if the weights are marked as refittable, false if the weights do not exist or are marked as
|
||
|
|
//! non-refittable.
|
||
|
|
//!
|
||
|
|
bool areWeightsMarkedRefittable(char const* name) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->areWeightsMarkedRefittable(name);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a squeeze layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//! \param axes The axes to remove unit dimensions on.
|
||
|
|
//!
|
||
|
|
//! \see ISqueezeLayer
|
||
|
|
//!
|
||
|
|
//! Axes must be resolvable to a constant Int32 or Int64 1D shape tensor.
|
||
|
|
//! Values in axes must be unique and in the range of [-r, r-1], where r is the rank of the input tensor.
|
||
|
|
//! For each axis value, the corresponding dimension in the input tensor must be one.
|
||
|
|
//!
|
||
|
|
//! \return The new Squeeze layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
ISqueezeLayer* addSqueeze(ITensor& input, ITensor& axes) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addSqueeze(input, axes);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add an unsqueeze layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//! \param axes The axes to add unit dimensions.
|
||
|
|
//!
|
||
|
|
//! \see IUnsqueezeLayer
|
||
|
|
//!
|
||
|
|
//! Axes must be resolvable to a constant Int32 or Int64 shape tensor.
|
||
|
|
//! Values in axes must be unique and in the range of [-r_final, r_final-1], where r_final
|
||
|
|
//! is the sum of rank(input) and len(axes).
|
||
|
|
//!
|
||
|
|
//! r_final must be less than Dims::MAX_DIMS.
|
||
|
|
//!
|
||
|
|
//! \return The new Unsqueeze layer, or nullptr if it could not be created
|
||
|
|
//!
|
||
|
|
IUnsqueezeLayer* addUnsqueeze(ITensor& input, ITensor& axes) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addUnsqueeze(input, axes);
|
||
|
|
}
|
||
|
|
|
||
|
|
//! \brief Add a normalization layer to the network.
|
||
|
|
//!
|
||
|
|
//! \param input The input tensor to the layer.
|
||
|
|
//! \param scale The scale tensor used to scale the normalized output.
|
||
|
|
//! \param bias The bias tensor used to scale the normalized output.
|
||
|
|
//! \param axesMask The axes on which to perform mean calculations.
|
||
|
|
//! The bit in position i of bitmask axesMask corresponds to explicit dimension i of the result.
|
||
|
|
//! E.g., the least significant bit corresponds to the first explicit dimension and the next to least
|
||
|
|
//! significant bit corresponds to the second explicit dimension.
|
||
|
|
//!
|
||
|
|
//! The normalization layer works by performing normalization of the tensor \p input on the specified \p axesMask.
|
||
|
|
//! The result is then scaled by multiplying with \p scale and adding \p bias.
|
||
|
|
//!
|
||
|
|
//! The shapes of \p scale and \p bias are expected the be the same, and must have the same rank and be
|
||
|
|
//! unidirectionally broadcastable to the shape of \p input. In the case of InstanceNorm or GroupNorm,
|
||
|
|
//! the shapes of \p scale and \p bias are expected to be [1, C, 1, 1] in the case of a 4D NCHW input tensor.
|
||
|
|
//!
|
||
|
|
//! \see INormalizationLayer
|
||
|
|
//!
|
||
|
|
//! \return The new normalization layer, or nullptr if it could not be created.
|
||
|
|
//!
|
||
|
|
TRT_NODISCARD INormalizationLayer* addNormalizationV2(ITensor& input, ITensor& scale, ITensor& bias, uint32_t axesMask) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addNormalizationV2(input, scale, bias, axesMask);
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VNetworkDefinition* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum CalibrationAlgoType
|
||
|
|
//!
|
||
|
|
//! \brief Version of calibration algorithm to use.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
|
||
|
|
//!
|
||
|
|
enum class CalibrationAlgoType : int32_t
|
||
|
|
{
|
||
|
|
kLEGACY_CALIBRATION TRT_DEPRECATED_ENUM = 0, //!< Legacy calibration
|
||
|
|
kENTROPY_CALIBRATION TRT_DEPRECATED_ENUM = 1, //!< Legacy entropy calibration
|
||
|
|
kENTROPY_CALIBRATION_2 TRT_DEPRECATED_ENUM = 2, //!< Entropy calibration
|
||
|
|
kMINMAX_CALIBRATION TRT_DEPRECATED_ENUM = 3, //!< Minmax calibration
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in CalibrationAlgoType enum.
|
||
|
|
//!
|
||
|
|
//! \see DataType
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
constexpr inline int32_t EnumMax<CalibrationAlgoType>() noexcept
|
||
|
|
{
|
||
|
|
return 4;
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IInt8Calibrator
|
||
|
|
//!
|
||
|
|
//! \brief Application-implemented interface for calibration.
|
||
|
|
//!
|
||
|
|
//! Calibration is a step performed by the builder when deciding suitable scale factors for 8-bit inference.
|
||
|
|
//!
|
||
|
|
//! It must also provide a method for retrieving representative images which the calibration process can use to examine
|
||
|
|
//! the distribution of activations. It may optionally implement a method for caching the calibration result for reuse
|
||
|
|
//! on subsequent runs.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
|
||
|
|
//!
|
||
|
|
class TRT_DEPRECATED IInt8Calibrator : public IVersionedInterface
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Get the batch size used for calibration batches.
|
||
|
|
//!
|
||
|
|
//! \return The batch size.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.0. Implicit batch support is removed in TensorRT 10.0.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED virtual int32_t getBatchSize() const noexcept = 0;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get a batch of input for calibration.
|
||
|
|
//!
|
||
|
|
//! The batch size of the input must match the batch size returned by getBatchSize().
|
||
|
|
//!
|
||
|
|
//! \param bindings An array of pointers to device memory that must be updated to point to device memory
|
||
|
|
//! containing each network input data.
|
||
|
|
//! \param names The names of the network input for each pointer in the binding array.
|
||
|
|
//! \param nbBindings The number of pointers in the bindings array.
|
||
|
|
//!
|
||
|
|
//! \return False if there are no more batches for calibration.
|
||
|
|
//!
|
||
|
|
//! \see getBatchSize()
|
||
|
|
//!
|
||
|
|
virtual bool getBatch(void* bindings[], char const* names[], int32_t nbBindings) noexcept = 0;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Load a calibration cache.
|
||
|
|
//!
|
||
|
|
//! Calibration is potentially expensive, so it can be useful to generate the calibration data once, then use it on
|
||
|
|
//! subsequent builds of the network. The cache includes the regression cutoff and quantile values used to generate
|
||
|
|
//! it, and will not be used if these do not batch the settings of the current calibrator. However, the network
|
||
|
|
//! should also be recalibrated if its structure changes, or the input data set changes, and it is the
|
||
|
|
//! responsibility of the application to ensure this.
|
||
|
|
//!
|
||
|
|
//! \param length The length of the cached data, that should be set by the called function. If there is no data,
|
||
|
|
//! this should be zero.
|
||
|
|
//!
|
||
|
|
//! \return A pointer to the cache, or nullptr if there is no data.
|
||
|
|
//!
|
||
|
|
virtual void const* readCalibrationCache(std::size_t& length) noexcept = 0;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Save a calibration cache.
|
||
|
|
//!
|
||
|
|
//! \param ptr A pointer to the data to cache.
|
||
|
|
//! \param length The length in bytes of the data to cache.
|
||
|
|
//!
|
||
|
|
//! \see readCalibrationCache()
|
||
|
|
//!
|
||
|
|
virtual void writeCalibrationCache(void const* ptr, std::size_t length) noexcept = 0;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the algorithm used by this calibrator.
|
||
|
|
//!
|
||
|
|
//! \return The algorithm used by the calibrator.
|
||
|
|
//!
|
||
|
|
virtual CalibrationAlgoType getAlgorithm() noexcept = 0;
|
||
|
|
|
||
|
|
~IInt8Calibrator() noexcept override = default;
|
||
|
|
};
|
||
|
|
|
||
|
|
namespace v_1_0
|
||
|
|
{
|
||
|
|
class TRT_DEPRECATED IInt8EntropyCalibrator : public IInt8Calibrator
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Return version information associated with this interface. Applications must not override this method.
|
||
|
|
//!
|
||
|
|
InterfaceInfo getInterfaceInfo() const noexcept override
|
||
|
|
{
|
||
|
|
return InterfaceInfo{"IInt8EntropyCalibrator", 1, 0};
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Signal that this is the entropy calibrator.
|
||
|
|
//!
|
||
|
|
CalibrationAlgoType getAlgorithm() noexcept override
|
||
|
|
{
|
||
|
|
return CalibrationAlgoType::kENTROPY_CALIBRATION;
|
||
|
|
}
|
||
|
|
|
||
|
|
~IInt8EntropyCalibrator() noexcept override = default;
|
||
|
|
};
|
||
|
|
} // namespace v_1_0
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IInt8EntropyCalibrator
|
||
|
|
//!
|
||
|
|
//! \brief Entropy calibrator.
|
||
|
|
//!
|
||
|
|
//! This is the Legacy Entropy calibrator. It is less complicated than the legacy calibrator and
|
||
|
|
//! produces better results.
|
||
|
|
//!
|
||
|
|
//! \note To ensure compatibility of source code with future versions of TensorRT, use IEntropyCalibrator, not
|
||
|
|
//! v_1_0::IEntropyCalibrator
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
|
||
|
|
//!
|
||
|
|
using IInt8EntropyCalibrator = v_1_0::IInt8EntropyCalibrator;
|
||
|
|
|
||
|
|
namespace v_1_0
|
||
|
|
{
|
||
|
|
class TRT_DEPRECATED IInt8EntropyCalibrator2 : public IInt8Calibrator
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Return version information associated with this interface. Applications must not override this method.
|
||
|
|
//!
|
||
|
|
InterfaceInfo getInterfaceInfo() const noexcept override
|
||
|
|
{
|
||
|
|
return InterfaceInfo{"IInt8EntropyCalibrator2", 1, 0};
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Signal that this is the entropy calibrator 2.
|
||
|
|
//!
|
||
|
|
CalibrationAlgoType getAlgorithm() noexcept override
|
||
|
|
{
|
||
|
|
return CalibrationAlgoType::kENTROPY_CALIBRATION_2;
|
||
|
|
}
|
||
|
|
|
||
|
|
~IInt8EntropyCalibrator2() noexcept override = default;
|
||
|
|
};
|
||
|
|
} // namespace v_1_0
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IInt8EntropyCalibrator2
|
||
|
|
//!
|
||
|
|
//! \brief Entropy calibrator 2.
|
||
|
|
//!
|
||
|
|
//! This is the preferred calibrator. This is the required calibrator for DLA, as it supports per
|
||
|
|
//! activation tensor scaling.
|
||
|
|
//!
|
||
|
|
//! \note To ensure compatibility of source code with future versions of TensorRT, use IEntropyCalibrator2, not
|
||
|
|
//! v_1_0::IEntropyCalibrator2
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
|
||
|
|
//!
|
||
|
|
using IInt8EntropyCalibrator2 = v_1_0::IInt8EntropyCalibrator2;
|
||
|
|
|
||
|
|
namespace v_1_0
|
||
|
|
{
|
||
|
|
class TRT_DEPRECATED IInt8MinMaxCalibrator : public IInt8Calibrator
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Return version information associated with this interface. Applications must not override this method.
|
||
|
|
//!
|
||
|
|
InterfaceInfo getInterfaceInfo() const noexcept override
|
||
|
|
{
|
||
|
|
return InterfaceInfo{"IInt8MinMaxCalibrator", 1, 0};
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Signal that this is the MinMax Calibrator.
|
||
|
|
//!
|
||
|
|
CalibrationAlgoType getAlgorithm() noexcept override
|
||
|
|
{
|
||
|
|
return CalibrationAlgoType::kMINMAX_CALIBRATION;
|
||
|
|
}
|
||
|
|
|
||
|
|
~IInt8MinMaxCalibrator() noexcept override = default;
|
||
|
|
};
|
||
|
|
} // namespace v_1_0
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IInt8MinMaxCalibrator
|
||
|
|
//!
|
||
|
|
//! \brief MinMax Calibrator.
|
||
|
|
//!
|
||
|
|
//! It supports per activation tensor scaling.
|
||
|
|
//!
|
||
|
|
//! \note To ensure compatibility of source code with future versions of TensorRT, use IMinMaxCalibrator>, not
|
||
|
|
//! v_1_0::IMinMaxCalibrator
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
|
||
|
|
//!
|
||
|
|
using IInt8MinMaxCalibrator = v_1_0::IInt8MinMaxCalibrator;
|
||
|
|
|
||
|
|
namespace v_1_0
|
||
|
|
{
|
||
|
|
class TRT_DEPRECATED IInt8LegacyCalibrator : public IInt8Calibrator
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Return version information associated with this interface. Applications must not override this method.
|
||
|
|
//!
|
||
|
|
InterfaceInfo getInterfaceInfo() const noexcept override
|
||
|
|
{
|
||
|
|
return InterfaceInfo{"IInt8Calibrator", 1, 0};
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Signal that this is the legacy calibrator.
|
||
|
|
//!
|
||
|
|
CalibrationAlgoType getAlgorithm() noexcept override
|
||
|
|
{
|
||
|
|
return CalibrationAlgoType::kLEGACY_CALIBRATION;
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief The quantile (between 0 and 1) that will be used to select the region maximum when the quantile method
|
||
|
|
//! is in use.
|
||
|
|
//!
|
||
|
|
//! See the user guide for more details on how the quantile is used.
|
||
|
|
//!
|
||
|
|
virtual double getQuantile() const noexcept = 0;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief The fraction (between 0 and 1) of the maximum used to define the regression cutoff when using regression
|
||
|
|
//! to determine the region maximum.
|
||
|
|
//!
|
||
|
|
//! See the user guide for more details on how the regression cutoff is used
|
||
|
|
//!
|
||
|
|
virtual double getRegressionCutoff() const noexcept = 0;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Load a histogram.
|
||
|
|
//!
|
||
|
|
//! Histogram generation is potentially expensive, so it can be useful to generate the histograms once, then use
|
||
|
|
//! them when exploring the space of calibrations. The histograms should be regenerated if the network structure
|
||
|
|
//! changes, or the input data set changes, and it is the responsibility of the application to ensure this.
|
||
|
|
//!
|
||
|
|
//! \param length The length of the cached data, that should be set by the called function. If there is no data,
|
||
|
|
//! this should be zero.
|
||
|
|
//!
|
||
|
|
//! \return A pointer to the cache, or nullptr if there is no data.
|
||
|
|
//!
|
||
|
|
virtual void const* readHistogramCache(std::size_t& length) noexcept = 0;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Save a histogram cache.
|
||
|
|
//!
|
||
|
|
//! \param ptr A pointer to the data to cache.
|
||
|
|
//! \param length The length in bytes of the data to cache.
|
||
|
|
//!
|
||
|
|
//! \see readHistogramCache()
|
||
|
|
//!
|
||
|
|
virtual void writeHistogramCache(void const* ptr, std::size_t length) noexcept = 0;
|
||
|
|
|
||
|
|
~IInt8LegacyCalibrator() noexcept override = default;
|
||
|
|
};
|
||
|
|
} // namespace v_1_0
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IInt8LegacyCalibrator
|
||
|
|
//!
|
||
|
|
//! \brief Legacy calibrator.
|
||
|
|
//!
|
||
|
|
//! This calibrator requires user parameterization,
|
||
|
|
//! and is provided as a fallback option if the other calibrators yield poor results.
|
||
|
|
//!
|
||
|
|
//! \note To ensure compatibility of source code with future versions of TensorRT, use ILegacyCalibrator, not
|
||
|
|
//! v_1_0::ILegacyCalibrator
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
|
||
|
|
//!
|
||
|
|
using IInt8LegacyCalibrator = v_1_0::IInt8LegacyCalibrator;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IAlgorithmIOInfo
|
||
|
|
//!
|
||
|
|
//! \brief Carries information about input or output of the algorithm.
|
||
|
|
//! IAlgorithmIOInfo for all the input and output along with IAlgorithmVariant denotes the variation of algorithm
|
||
|
|
//! and can be used to select or reproduce an algorithm using IAlgorithmSelector::selectAlgorithms().
|
||
|
|
//! \see IAlgorithmVariant, IAlgorithm, IAlgorithmSelector::selectAlgorithms()
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.8. Please use editable mode in ITimingCache instead.
|
||
|
|
//!
|
||
|
|
class TRT_DEPRECATED IAlgorithmIOInfo : public INoCopy
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Return DataType of the input/output of algorithm.
|
||
|
|
//!
|
||
|
|
//! \return the data type.
|
||
|
|
//!
|
||
|
|
DataType getDataType() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getDataType();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return strides of the input/output tensor of algorithm.
|
||
|
|
//! For vectorized formats, strides are given in units of vectors.
|
||
|
|
//!
|
||
|
|
//! \return the strides of the tensor.
|
||
|
|
//!
|
||
|
|
Dims getStrides() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getStrides();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return the index of the vectorized dimension or -1 for non-vectorized formats.
|
||
|
|
//!
|
||
|
|
//! \return the index of the vectorized dimension.
|
||
|
|
//!
|
||
|
|
int64_t getVectorizedDim() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getVectorizedDim();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return the number of components per element.
|
||
|
|
//! This is always 1 for non-vectorized formats.
|
||
|
|
//!
|
||
|
|
//! \return the number of components per element.
|
||
|
|
//!
|
||
|
|
int64_t getComponentsPerElement() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getComponentsPerElement();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IAlgorithmIOInfo() noexcept = default;
|
||
|
|
apiv::VAlgorithmIOInfo* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IAlgorithmVariant
|
||
|
|
//!
|
||
|
|
//! \brief provides a unique 128-bit identifier, which along with the input and output information
|
||
|
|
//! denotes the variation of algorithm and can be used to select or reproduce an algorithm,
|
||
|
|
//! using IAlgorithmSelector::selectAlgorithms()
|
||
|
|
//! \see IAlgorithmIOInfo, IAlgorithm, IAlgorithmSelector::selectAlgorithms()
|
||
|
|
//! \note A single implementation can have multiple tactics.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.8. Please use editable mode in ITimingCache instead.
|
||
|
|
//!
|
||
|
|
class TRT_DEPRECATED IAlgorithmVariant : public INoCopy
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Return implementation of the algorithm.
|
||
|
|
//!
|
||
|
|
int64_t getImplementation() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getImplementation();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return tactic of the algorithm.
|
||
|
|
//!
|
||
|
|
int64_t getTactic() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getTactic();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IAlgorithmVariant() noexcept = default;
|
||
|
|
apiv::VAlgorithmVariant* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IAlgorithmContext
|
||
|
|
//!
|
||
|
|
//! \brief Describes the context and requirements, that could be fulfilled by one or more instances of IAlgorithm.
|
||
|
|
//! \see IAlgorithm
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.8. Please use editable mode in ITimingCache instead.
|
||
|
|
//!
|
||
|
|
class TRT_DEPRECATED IAlgorithmContext : public INoCopy
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Return name of the algorithm node.
|
||
|
|
//!
|
||
|
|
//! This is a unique identifier for the IAlgorithmContext.
|
||
|
|
//!
|
||
|
|
char const* getName() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getName();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the minimum / optimum / maximum dimensions for input or output tensor.
|
||
|
|
//!
|
||
|
|
//! \param index Index of the input or output of the algorithm. Incremental numbers assigned to indices of inputs
|
||
|
|
//! and the outputs.
|
||
|
|
//! \param select Which of the minimum, optimum, or maximum dimensions to be queried.
|
||
|
|
//!
|
||
|
|
Dims getDimensions(int32_t index, OptProfileSelector select) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getDimensions(index, select);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return number of inputs of the algorithm.
|
||
|
|
//!
|
||
|
|
int32_t getNbInputs() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getNbInputs();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return number of outputs of the algorithm.
|
||
|
|
//!
|
||
|
|
int32_t getNbOutputs() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getNbOutputs();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IAlgorithmContext() noexcept = default;
|
||
|
|
apiv::VAlgorithmContext* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IAlgorithm
|
||
|
|
//!
|
||
|
|
//! \brief Describes a variation of execution of a layer.
|
||
|
|
//! An algorithm is represented by IAlgorithmVariant and the IAlgorithmIOInfo for each of its inputs and outputs.
|
||
|
|
//! An algorithm can be selected or reproduced using AlgorithmSelector::selectAlgorithms().
|
||
|
|
//!
|
||
|
|
//! \see IAlgorithmIOInfo, IAlgorithmVariant, IAlgorithmSelector::selectAlgorithms()
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.8. Please use editable mode in ITimingCache instead.
|
||
|
|
//!
|
||
|
|
class TRT_DEPRECATED IAlgorithm : public INoCopy
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Returns the algorithm variant.
|
||
|
|
//!
|
||
|
|
IAlgorithmVariant const& getAlgorithmVariant() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAlgorithmVariant();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief The time in milliseconds to execute the algorithm.
|
||
|
|
//!
|
||
|
|
float getTimingMSec() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getTimingMSec();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief The size of the GPU temporary memory in bytes which the algorithm uses at execution time.
|
||
|
|
//!
|
||
|
|
std::size_t getWorkspaceSize() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getWorkspaceSize();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Returns the format of an Algorithm input or output. Algorithm inputs are incrementally numbered first,
|
||
|
|
//! followed by algorithm outputs.
|
||
|
|
//!
|
||
|
|
//! \param index Index of the input or output of the algorithm. Incremental numbers assigned to indices of inputs
|
||
|
|
//! and the outputs.
|
||
|
|
//!
|
||
|
|
//! \return a pointer to a IAlgorithmIOInfo interface or nullptr if index is out of range.
|
||
|
|
//!
|
||
|
|
IAlgorithmIOInfo const* getAlgorithmIOInfoByIndex(int32_t index) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAlgorithmIOInfoByIndex(index);
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
virtual ~IAlgorithm() noexcept = default;
|
||
|
|
apiv::VAlgorithm* mImpl;
|
||
|
|
}; // IAlgorithm
|
||
|
|
|
||
|
|
namespace v_1_0
|
||
|
|
{
|
||
|
|
class TRT_DEPRECATED IAlgorithmSelector : public IVersionedInterface
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
//!
|
||
|
|
//! \brief Return version information associated with this interface. Applications must not override this method.
|
||
|
|
//!
|
||
|
|
InterfaceInfo getInterfaceInfo() const noexcept override
|
||
|
|
{
|
||
|
|
return InterfaceInfo{"IAlgorithmSelector", 1, 0};
|
||
|
|
}
|
||
|
|
//!
|
||
|
|
//! \brief Select Algorithms for a layer from the given list of algorithm choices.
|
||
|
|
//!
|
||
|
|
//! \return The number of choices selected from [0, nbChoices-1].
|
||
|
|
//! \param context The context for which the algorithm choices are valid.
|
||
|
|
//! \param choices The list of algorithm choices to select for implementation of this layer.
|
||
|
|
//! \param nbChoices Number of algorithm choices.
|
||
|
|
//! \param selection The user writes indices of selected choices in to selection buffer which is of size nbChoices.
|
||
|
|
//!
|
||
|
|
//! \note TensorRT uses its default algorithm selection to choose from the list provided.
|
||
|
|
//! If return value is 0, TensorRT's default algorithm selection is used unless
|
||
|
|
//! BuilderFlag::kREJECT_EMPTY_ALGORITHMS is set.
|
||
|
|
//! The list of choices is valid only for this specific algorithm context.
|
||
|
|
//!
|
||
|
|
virtual int32_t selectAlgorithms(IAlgorithmContext const& context, IAlgorithm const* const* choices,
|
||
|
|
int32_t nbChoices, int32_t* selection) noexcept = 0;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Called by TensorRT to report choices it made.
|
||
|
|
//!
|
||
|
|
//! \note For a given optimization profile, this call comes after all calls to selectAlgorithms.
|
||
|
|
//! algoChoices[i] is the choice that TensorRT made for algoContexts[i], for i in [0, nbAlgorithms-1]
|
||
|
|
//!
|
||
|
|
//! \param algoContexts The list of all algorithm contexts.
|
||
|
|
//! \param algoChoices The list of algorithm choices made by TensorRT
|
||
|
|
//! \param nbAlgorithms The size of algoContexts as well as algoChoices.
|
||
|
|
//!
|
||
|
|
virtual void reportAlgorithms(IAlgorithmContext const* const* algoContexts, IAlgorithm const* const* algoChoices,
|
||
|
|
int32_t nbAlgorithms) noexcept = 0;
|
||
|
|
|
||
|
|
virtual ~IAlgorithmSelector() noexcept = default;
|
||
|
|
};
|
||
|
|
} // namespace v_1_0
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IAlgorithmSelector
|
||
|
|
//!
|
||
|
|
//! \brief Interface implemented by application for selecting and reporting algorithms of a layer provided by the
|
||
|
|
//! builder.
|
||
|
|
//! \note A layer in context of algorithm selection may be different from ILayer in INetworkDefinition.
|
||
|
|
//! For example, an algorithm might be implementing a conglomeration of multiple ILayers in INetworkDefinition.
|
||
|
|
//! \note To ensure compatibility of source code with future versions of TensorRT, use IAlgorithmSelector, not
|
||
|
|
//! v_1_0::IAlgorithmSelector
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.8. Please use editable mode in ITimingCache instead.
|
||
|
|
//!
|
||
|
|
using IAlgorithmSelector = v_1_0::IAlgorithmSelector;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Represents one or more QuantizationFlag values using binary OR
|
||
|
|
//! operations.
|
||
|
|
//!
|
||
|
|
//! \see IBuilderConfig::getQuantizationFlags(), IBuilderConfig::setQuantizationFlags()
|
||
|
|
//!
|
||
|
|
using QuantizationFlags = uint32_t;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum QuantizationFlag
|
||
|
|
//!
|
||
|
|
//! \brief List of valid flags for quantizing the network to int8
|
||
|
|
//!
|
||
|
|
//! \see IBuilderConfig::setQuantizationFlag(), IBuilderConfig::getQuantizationFlag()
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
|
||
|
|
//!
|
||
|
|
enum class QuantizationFlag : int32_t
|
||
|
|
{
|
||
|
|
//! Run int8 calibration pass before layer fusion. Only valid for IInt8LegacyCalibrator and
|
||
|
|
//! IInt8EntropyCalibrator. The builder always runs the int8 calibration pass before layer fusion for
|
||
|
|
//! IInt8MinMaxCalibrator and IInt8EntropyCalibrator2. Disabled by default.
|
||
|
|
kCALIBRATE_BEFORE_FUSION TRT_DEPRECATED_ENUM = 0
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Maximum number of quantization flags in QuantizationFlag enum.
|
||
|
|
//!
|
||
|
|
//! \see QuantizationFlag
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
constexpr inline int32_t EnumMax<QuantizationFlag>() noexcept
|
||
|
|
{
|
||
|
|
return 1;
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum RuntimePlatform
|
||
|
|
//!
|
||
|
|
//! \brief Describes the intended runtime platform (operating system and CPU architecture) for the execution of the
|
||
|
|
//! TensorRT engine. TensorRT provides support for cross-platform engine compatibility when the target runtime
|
||
|
|
//! platform is different from the build platform.
|
||
|
|
//!
|
||
|
|
//! \note The cross-platform engine will not be able to run on the host platform it was built on.
|
||
|
|
//!
|
||
|
|
//! \note When building a cross-platform engine that also requires version forward compatibility,
|
||
|
|
//! kEXCLUDE_LEAN_RUNTIME must be set to exclude the target platform lean runtime.
|
||
|
|
//!
|
||
|
|
//! \note The cross-platform engine might have performance differences compared to the natively built engine on the
|
||
|
|
//! target platform.
|
||
|
|
//!
|
||
|
|
//! \see IBuilderConfig::setRuntimePlatform(), IBuilderConfig::getRuntimePlatform()
|
||
|
|
//!
|
||
|
|
enum class RuntimePlatform : int32_t
|
||
|
|
{
|
||
|
|
//! No requirement for cross-platform compatibility. The engine constructed by TensorRT can only run on the
|
||
|
|
//! identical platform it was built on.
|
||
|
|
kSAME_AS_BUILD = 0,
|
||
|
|
|
||
|
|
//! Designates the target platform for engine execution as Windows AMD64 system. Currently this flag can only be
|
||
|
|
//! enabled when building engines on Linux AMD64 platforms.
|
||
|
|
kWINDOWS_AMD64 = 1,
|
||
|
|
|
||
|
|
|
||
|
|
};
|
||
|
|
|
||
|
|
namespace impl
|
||
|
|
{
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in RuntimePlatform enum.
|
||
|
|
//!
|
||
|
|
//! \see RuntimePlatform
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
struct EnumMaxImpl<RuntimePlatform>
|
||
|
|
{
|
||
|
|
static constexpr int32_t kVALUE = 2;
|
||
|
|
};
|
||
|
|
} // namespace impl
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Represents one or more BuilderFlag values using binary OR
|
||
|
|
//! operations, e.g., 1U << BuilderFlag::kFP16 | 1U << BuilderFlag::kDEBUG.
|
||
|
|
//!
|
||
|
|
//! \see IBuilderConfig::setFlags(), IBuilderConfig::getFlags()
|
||
|
|
//!
|
||
|
|
using BuilderFlags = uint32_t;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum BuilderFlag
|
||
|
|
//!
|
||
|
|
//! \brief List of valid modes that the builder can enable when creating an engine from a network definition.
|
||
|
|
//!
|
||
|
|
//! \see IBuilderConfig::setFlags(), IBuilderConfig::getFlags()
|
||
|
|
//!
|
||
|
|
enum class BuilderFlag : int32_t
|
||
|
|
{
|
||
|
|
//! Enable FP16 layer selection, with FP32 fallback.
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
|
||
|
|
kFP16 TRT_DEPRECATED_ENUM = 0,
|
||
|
|
|
||
|
|
//! Enable Int8 layer selection, with FP32 fallback with FP16 fallback if kFP16 also specified.
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
|
||
|
|
kINT8 TRT_DEPRECATED_ENUM = 1,
|
||
|
|
|
||
|
|
//! Enable debugging of layers via synchronizing after every layer.
|
||
|
|
kDEBUG = 2,
|
||
|
|
|
||
|
|
//! Enable layers marked to execute on GPU if layer cannot execute on DLA.
|
||
|
|
kGPU_FALLBACK = 3,
|
||
|
|
|
||
|
|
//! Enable building a refittable engine.
|
||
|
|
kREFIT = 4,
|
||
|
|
|
||
|
|
//! Disable reuse of timing information across identical layers.
|
||
|
|
kDISABLE_TIMING_CACHE = 5,
|
||
|
|
|
||
|
|
//! Allow (but not require) computations on tensors of type DataType::kFLOAT to use TF32.
|
||
|
|
//! TF32 computes inner products by rounding the inputs to 10-bit mantissas before
|
||
|
|
//! multiplying, but accumulates the sum using 23-bit mantissas. Enabled by default.
|
||
|
|
kTF32 = 6,
|
||
|
|
|
||
|
|
//! Allow the builder to examine weights and use optimized functions when weights have suitable sparsity.
|
||
|
|
kSPARSE_WEIGHTS = 7,
|
||
|
|
|
||
|
|
//! Change the allowed parameters in the EngineCapability::kSTANDARD flow to
|
||
|
|
//! match the restrictions that EngineCapability::kSAFETY check against for DeviceType::kGPU
|
||
|
|
//! and EngineCapability::kDLA_STANDALONE check against the DeviceType::kDLA case. This flag
|
||
|
|
//! is forced to true if EngineCapability::kSAFETY at build time if it is unset.
|
||
|
|
//!
|
||
|
|
//! This flag is only supported in NVIDIA Drive(R) products.
|
||
|
|
kSAFETY_SCOPE = 8,
|
||
|
|
|
||
|
|
//! Require that layers execute in specified precisions. Build fails otherwise.
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
|
||
|
|
kOBEY_PRECISION_CONSTRAINTS TRT_DEPRECATED_ENUM = 9,
|
||
|
|
|
||
|
|
//! Prefer that layers execute in specified precisions.
|
||
|
|
//! Fall back (with warning) to another precision if build would otherwise fail.
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
|
||
|
|
kPREFER_PRECISION_CONSTRAINTS TRT_DEPRECATED_ENUM = 10,
|
||
|
|
|
||
|
|
//! Require that no reformats be inserted between a layer and a network I/O tensor
|
||
|
|
//! for which ITensor::setAllowedFormats was called.
|
||
|
|
//! Build fails if a reformat is required for functional correctness.
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.7. Unneeded API.
|
||
|
|
kDIRECT_IO TRT_DEPRECATED_ENUM = 11,
|
||
|
|
|
||
|
|
//! Fail if IAlgorithmSelector::selectAlgorithms returns an empty set of algorithms.
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.10. Unneeded API due to IAlgorithmSelector deprecation.
|
||
|
|
kREJECT_EMPTY_ALGORITHMS TRT_DEPRECATED_ENUM = 12,
|
||
|
|
|
||
|
|
//! Restrict to lean runtime operators to provide version forward compatibility
|
||
|
|
//! for the plan.
|
||
|
|
//!
|
||
|
|
//! This flag is only supported by NVIDIA Volta and later GPUs.
|
||
|
|
//! This flag is not supported in NVIDIA Drive(R) products.
|
||
|
|
kVERSION_COMPATIBLE = 13,
|
||
|
|
|
||
|
|
//! Exclude lean runtime from the plan when version forward compatability is enabled.
|
||
|
|
//! By default, this flag is unset, so the lean runtime will be included in the plan.
|
||
|
|
//!
|
||
|
|
//! If BuilderFlag::kVERSION_COMPATIBLE is not set then the value of this flag will be ignored.
|
||
|
|
kEXCLUDE_LEAN_RUNTIME = 14,
|
||
|
|
|
||
|
|
//! Enable plugins with FP8 input/output.
|
||
|
|
//!
|
||
|
|
//! This flag is not supported when HardwareCompatibilityLevel::kAMPERE_PLUS is enabled.
|
||
|
|
//!
|
||
|
|
//! \see HardwareCompatibilityLevel
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
|
||
|
|
kFP8 TRT_DEPRECATED_ENUM = 15,
|
||
|
|
|
||
|
|
//! Emit error when a tactic being timed is not present in the timing cache.
|
||
|
|
//! This flag has an effect only when IBuilderConfig has an associated ITimingCache.
|
||
|
|
kERROR_ON_TIMING_CACHE_MISS = 16,
|
||
|
|
|
||
|
|
//! Enable DataType::kBF16 layer selection, with FP32 fallback.
|
||
|
|
//! This flag is only supported by NVIDIA Ampere and later GPUs.
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
|
||
|
|
kBF16 TRT_DEPRECATED_ENUM = 17,
|
||
|
|
|
||
|
|
//! Disable caching of JIT-compilation results during engine build.
|
||
|
|
//! By default, JIT-compiled code will be serialized as part of the timing cache, which may significantly increase
|
||
|
|
//! the cache size. Setting this flag prevents the code from being serialized. This flag has an effect only when
|
||
|
|
//! BuilderFlag::DISABLE_TIMING_CACHE is not set.
|
||
|
|
kDISABLE_COMPILATION_CACHE = 18,
|
||
|
|
|
||
|
|
//! Strip the refittable weights from the engine plan file.
|
||
|
|
kSTRIP_PLAN = 19,
|
||
|
|
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.0. Superseded by kSTRIP_PLAN.
|
||
|
|
kWEIGHTLESS TRT_DEPRECATED_ENUM = kSTRIP_PLAN,
|
||
|
|
|
||
|
|
//! Create a refittable engine under the assumption that the refit weights will be identical to those provided at
|
||
|
|
//! build time. The resulting engine will have the same performance as a non-refittable one. All refittable weights
|
||
|
|
//! can be refitted through the refit API, but if the refit weights are not identical to the build-time weights,
|
||
|
|
//! behavior is undefined. When used alongside 'kSTRIP_PLAN', this flag will result in a small plan file for which
|
||
|
|
//! weights are later supplied via refitting. This enables use of a single set of weights with different inference
|
||
|
|
//! backends, or with TensorRT plans for multiple GPU architectures.
|
||
|
|
kREFIT_IDENTICAL = 20,
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Enable weight streaming for the current engine.
|
||
|
|
//!
|
||
|
|
//! Weight streaming from the host enables execution of models that do not fit
|
||
|
|
//! in GPU memory by allowing TensorRT to intelligently stream network weights
|
||
|
|
//! from the CPU DRAM. Please see ICudaEngine::getMinimumWeightStreamingBudget
|
||
|
|
//! for the default memory budget when this flag is enabled.
|
||
|
|
//!
|
||
|
|
//! Enabling this feature changes the behavior of
|
||
|
|
//! IRuntime::deserializeCudaEngine to allocate the entire network's weights
|
||
|
|
//! on the CPU DRAM instead of GPU memory. Then,
|
||
|
|
//! ICudaEngine::createExecutionContext will determine the optimal split of
|
||
|
|
//! weights between the CPU and GPU and place weights accordingly.
|
||
|
|
//!
|
||
|
|
//! Future TensorRT versions may enable this flag by default.
|
||
|
|
//!
|
||
|
|
//! \warning Enabling this flag may marginally increase build time.
|
||
|
|
//!
|
||
|
|
//! \warning Enabling this feature will significantly increase the latency of
|
||
|
|
//! ICudaEngine::createExecutionContext.
|
||
|
|
//!
|
||
|
|
//! \see IRuntime::deserializeCudaEngine,
|
||
|
|
//! ICudaEngine::getMinimumWeightStreamingBudget,
|
||
|
|
//! ICudaEngine::setWeightStreamingBudget
|
||
|
|
//!
|
||
|
|
kWEIGHT_STREAMING = 21,
|
||
|
|
|
||
|
|
//! Enable plugins with INT4 input/output.
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
|
||
|
|
kINT4 TRT_DEPRECATED_ENUM = 22,
|
||
|
|
|
||
|
|
//! Enable building a refittable engine and provide fine-grained control. This allows
|
||
|
|
//! control over which weights are refittable or not using INetworkDefinition::markWeightsRefittable and
|
||
|
|
//! INetworkDefinition::unmarkWeightsRefittable. By default, all weights are non-refittable when this flag is
|
||
|
|
//! enabled. This flag cannot be used together with kREFIT or kREFIT_IDENTICAL.
|
||
|
|
kREFIT_INDIVIDUAL = 23,
|
||
|
|
|
||
|
|
//! Disable floating-point optimizations: 0*x => 0, x-x => 0, or x/x => 1. These identities are
|
||
|
|
//! not true when x is a NaN or Inf, and thus might hide propagation or generation of NaNs. This flag is typically
|
||
|
|
//! used in combination with kSPARSE_WEIGHTS.
|
||
|
|
//! There are three valid sparsity configurations.
|
||
|
|
//! 1. Disable all sparsity. Both kSPARSE_WEIGHTS and kSTRICT_NANS are unset
|
||
|
|
//! 2. Enable sparsity only where it does not affect propagation/generation of NaNs. Both kSPARSE_WEIGHTS and
|
||
|
|
//! kSTRICT_NANS are set
|
||
|
|
//! 3. Enable all sparsity. kSPARSE_WEIGHTS is set and kSTRICT_NANS is unset
|
||
|
|
kSTRICT_NANS = 24,
|
||
|
|
|
||
|
|
//! Enable memory monitor during build time.
|
||
|
|
kMONITOR_MEMORY = 25,
|
||
|
|
|
||
|
|
//! Enable plugins with FP4 input/output.
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
|
||
|
|
kFP4 TRT_DEPRECATED_ENUM = 26,
|
||
|
|
|
||
|
|
//! Enable editable timing cache.
|
||
|
|
kEDITABLE_TIMING_CACHE = 27,
|
||
|
|
|
||
|
|
//! Enable distributive independence.
|
||
|
|
//! When BuilderFlag::kDISTRIBUTIVE_INDEPENDENCE is set and a layer documents axis i of an output as a distributive
|
||
|
|
//! axis, then the layer behaves exactly as if each evaluation across axis i was done using identical operations.
|
||
|
|
//! The definition of distributive axis is as follows:
|
||
|
|
//! For IMatrixMultiplyLayer:
|
||
|
|
//! All axes that are not one of the vector or matrix dimensions are distributive axes.
|
||
|
|
//! For layers that perform reduction:
|
||
|
|
//! All non-reduction axes are distributive axes.
|
||
|
|
//! For layers that perform einsum:
|
||
|
|
//! Let n be the leftmost reduction axis. The axes to the left of n are distributive axes.
|
||
|
|
kDISTRIBUTIVE_INDEPENDENCE = 28,
|
||
|
|
|
||
|
|
#if ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
|
||
|
|
//! Build an engine that requires user allocation when creating an execution context.
|
||
|
|
//! This means that runtime allocation will not be enabled even when the tensor dimensions
|
||
|
|
//! exceed the limits for static allocation, and ensures that inference will support graph
|
||
|
|
//! capture unless the network includes operations such as data-dependent dynamic shapes
|
||
|
|
//! (INonZeroLayer, ITripLimitLayer, etc.) that require runtime allocation. If such operations
|
||
|
|
//! are present, the engine build will fail with an error message.
|
||
|
|
kREQUIRE_USER_ALLOCATION = 29,
|
||
|
|
#endif // ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
|
||
|
|
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Maximum number of builder flags in BuilderFlag enum.
|
||
|
|
//!
|
||
|
|
//! \see BuilderFlag
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
constexpr inline int32_t EnumMax<BuilderFlag>() noexcept
|
||
|
|
{
|
||
|
|
#if ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
|
||
|
|
return 30;
|
||
|
|
#else
|
||
|
|
return 29;
|
||
|
|
#endif // ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
|
||
|
|
}
|
||
|
|
|
||
|
|
namespace v_1_0
|
||
|
|
{
|
||
|
|
//!
|
||
|
|
//! \struct TimingCacheKey
|
||
|
|
//!
|
||
|
|
//! \brief The key to retrieve timing cache entries.
|
||
|
|
//!
|
||
|
|
//! TimingCacheKey has two types of representation: binary and string. The conversion rule from binary to string is:
|
||
|
|
//! 1) Convert each uint8_t element in binary key into two hexadecimal ascii chars, e.g. 0xab -> "ab"
|
||
|
|
//! 2) Concat the ascii chars of all elements in sequence. The result should have exact 32 chars
|
||
|
|
//! 3) Add prefix "0x" to the string produced in step 2.
|
||
|
|
//!
|
||
|
|
//! \see ITimingCache::query(), ITimingCache::update()
|
||
|
|
//!
|
||
|
|
struct TimingCacheKey
|
||
|
|
{
|
||
|
|
uint8_t data[16];
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \struct Value
|
||
|
|
//!
|
||
|
|
//! \brief The values in the cache entry.
|
||
|
|
//!
|
||
|
|
//! \see ITimingCache::query(), ITimingCache::update()
|
||
|
|
//!
|
||
|
|
struct TimingCacheValue
|
||
|
|
{
|
||
|
|
//! Hash of the selected tactic.
|
||
|
|
uint64_t tacticHash;
|
||
|
|
//! Timing of this tactic in milliseconds. Negative numbers and NaN are invalid values.
|
||
|
|
float timingMSec;
|
||
|
|
//! UINT64_MAX represents the invalid tactic hash.
|
||
|
|
static constexpr uint64_t kINVALID_TACTIC_HASH = UINT64_MAX;
|
||
|
|
};
|
||
|
|
} // namespace v_1_0
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class ITimingCache
|
||
|
|
//!
|
||
|
|
//! \brief Class to handle tactic timing info collected from builder.
|
||
|
|
//!
|
||
|
|
//! The timing cache is created or initialized by IBuilderConfig. It can be shared across builder instances
|
||
|
|
//! to reduce the builder wallclock time.
|
||
|
|
//!
|
||
|
|
//! \warning Rebuilding the same engine multiple times using the same timing cache will always yield a correct
|
||
|
|
//! engine but the selected tactics and formats may vary between generated engine instances, if weak typing is used.
|
||
|
|
//!
|
||
|
|
//! \see IBuilderConfig
|
||
|
|
//!
|
||
|
|
class ITimingCache : public INoCopy
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
virtual ~ITimingCache() noexcept = default;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Serialize a timing cache to IHostMemory object.
|
||
|
|
//!
|
||
|
|
//! This function allows serialization of current timing cache.
|
||
|
|
//!
|
||
|
|
//! \return A pointer to a IHostMemory object that contains a serialized timing cache.
|
||
|
|
//!
|
||
|
|
//! \see IHostMemory
|
||
|
|
//!
|
||
|
|
nvinfer1::IHostMemory* serialize() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->serialize();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Combine input timing cache into local instance.
|
||
|
|
//!
|
||
|
|
//! This function allows combining entries in the input timing cache to local cache object.
|
||
|
|
//!
|
||
|
|
//! \param inputCache The input timing cache.
|
||
|
|
//! \param ignoreMismatch Whether or not to allow cache verification header mismatch.
|
||
|
|
//!
|
||
|
|
//! \return True if combined successfully, false otherwise.
|
||
|
|
//!
|
||
|
|
//! Append entries in input cache to local cache. Conflicting entries will be skipped
|
||
|
|
//! The input cache must be generated by a TensorRT build of exact same version, otherwise
|
||
|
|
//! combine will be skipped and return false.
|
||
|
|
//! ignoreMismatch must be set to true if combining a timing cache created from a
|
||
|
|
//! different device.
|
||
|
|
//!
|
||
|
|
//! \warning Combining caches generated from devices with different device properties may
|
||
|
|
//! lead to functional/performance bugs!
|
||
|
|
//!
|
||
|
|
bool combine(ITimingCache const& inputCache, bool ignoreMismatch) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->combine(inputCache, ignoreMismatch);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Empty the timing cache
|
||
|
|
//!
|
||
|
|
//! \return True if reset successfully, false otherwise.
|
||
|
|
//!
|
||
|
|
bool reset() noexcept
|
||
|
|
{
|
||
|
|
return mImpl->reset();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Query cache keys from Timing Cache.
|
||
|
|
//!
|
||
|
|
//! This function queries the entry count and writes the keys out.
|
||
|
|
//!
|
||
|
|
//! \param keyBuffer The buffer to store keys.
|
||
|
|
//! \param capacity The capacity of the buffer.
|
||
|
|
//!
|
||
|
|
//! \return The count of entries in the cache and fill keys if keyBuffer is non-null.
|
||
|
|
//! If an error occurs, -1 will be returned.
|
||
|
|
//!
|
||
|
|
//! Query the count of entries in the cache and write out cache keys if keyBuffer is provided.
|
||
|
|
//! Any key entries exceeding the capacity of the keyBuffer will not be copied.
|
||
|
|
//!
|
||
|
|
int64_t queryKeys(TimingCacheKey* keyBuffer, int64_t capacity) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->queryKeys(keyBuffer, capacity);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Query value in a cache entry.
|
||
|
|
//!
|
||
|
|
//! The function queries the value in a specific cache entry.
|
||
|
|
//!
|
||
|
|
//! \param key The query key.
|
||
|
|
//!
|
||
|
|
//! \return Cache value if the key exists, otherwise an invalid value.
|
||
|
|
//!
|
||
|
|
//! Query the value of the given cache key. If the key exists, write the value out,
|
||
|
|
//! otherwise return an invalid value.
|
||
|
|
//!
|
||
|
|
TimingCacheValue query(TimingCacheKey const& key) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->query(key);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Update values in a cache entry.
|
||
|
|
//!
|
||
|
|
//! The function updates the value in a specific cache entry.
|
||
|
|
//!
|
||
|
|
//! \param key The key to the entry to be updated.
|
||
|
|
//! \param value New cache value.
|
||
|
|
//!
|
||
|
|
//! \return True if update succeeds, otherwise false.
|
||
|
|
//!
|
||
|
|
//! Update the value of the given cache key. If the key does not exist, return false.
|
||
|
|
//! If the key exists and the new tactic timing is NaN, delete the cache entry and
|
||
|
|
//! return true. If tactic timing is not NaN and the new value is valid, override the
|
||
|
|
//! cache value and return true. False is returned when the new value is invalid.
|
||
|
|
//! If this layer cannot use the new tactic, build errors will be reported when
|
||
|
|
//! building the next engine.
|
||
|
|
//!
|
||
|
|
bool update(TimingCacheKey const& key, TimingCacheValue const& value) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->update(key, value);
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VTimingCache* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum MemoryPoolType
|
||
|
|
//!
|
||
|
|
//! \brief The type for memory pools used by TensorRT.
|
||
|
|
//!
|
||
|
|
//! \see IBuilderConfig::setMemoryPoolLimit, IBuilderConfig::getMemoryPoolLimit
|
||
|
|
//!
|
||
|
|
enum class MemoryPoolType : int32_t
|
||
|
|
{
|
||
|
|
//!
|
||
|
|
//! kWORKSPACE is used by TensorRT to store intermediate buffers within an operation.
|
||
|
|
//! This defaults to max device memory. Set to a smaller value to restrict tactics that use over the
|
||
|
|
//! threshold en masse. For more targeted removal of tactics use the IAlgorithmSelector
|
||
|
|
//! interface.
|
||
|
|
//!
|
||
|
|
kWORKSPACE = 0,
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! kDLA_MANAGED_SRAM is a fast software managed RAM used by DLA to communicate within a layer.
|
||
|
|
//! The size of this pool must be at least 4 KiB and must be a power of 2.
|
||
|
|
//! This defaults to 1 MiB.
|
||
|
|
//! Orin has capacity of 1 MiB per core.
|
||
|
|
//!
|
||
|
|
kDLA_MANAGED_SRAM = 1,
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! kDLA_LOCAL_DRAM is host RAM used by DLA to share intermediate tensor data across operations.
|
||
|
|
//! The size of this pool must be at least 4 KiB and must be a power of 2.
|
||
|
|
//! This defaults to 1 GiB.
|
||
|
|
//!
|
||
|
|
kDLA_LOCAL_DRAM = 2,
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! kDLA_GLOBAL_DRAM is host RAM used by DLA to store weights and metadata for execution.
|
||
|
|
//! The size of this pool must be at least 4 KiB and must be a power of 2.
|
||
|
|
//! This defaults to 512 MiB.
|
||
|
|
//!
|
||
|
|
kDLA_GLOBAL_DRAM = 3,
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! kTACTIC_DRAM is the device DRAM used by the optimizer to
|
||
|
|
//! run tactics. On embedded devices, where host and device memory are unified, this includes all host
|
||
|
|
//! memory required by TensorRT to build the network up to the point of each memory allocation.
|
||
|
|
//! This defaults to 75% of totalGlobalMem as reported by cudaGetDeviceProperties when
|
||
|
|
//! cudaGetDeviceProperties.embedded is true, and 100% otherwise.
|
||
|
|
//!
|
||
|
|
kTACTIC_DRAM = 4,
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! kTACTIC_SHARED_MEMORY defines the maximum sum of shared memory reserved by the driver and
|
||
|
|
//! used for executing CUDA kernels. Adjust this value to restrict tactics that exceed the
|
||
|
|
//! specified threshold en masse. The default value is device max capability. This value must
|
||
|
|
//! be less than 1GiB.
|
||
|
|
//!
|
||
|
|
//! The driver reserved shared memory can be queried from cuDeviceGetAttribute(&reservedShmem,
|
||
|
|
//! CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK).
|
||
|
|
//!
|
||
|
|
//! Updating this flag will override the shared memory limit set by \ref HardwareCompatibilityLevel,
|
||
|
|
//! which defaults to 48KiB - reservedShmem.
|
||
|
|
//!
|
||
|
|
kTACTIC_SHARED_MEMORY = 5,
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Maximum number of memory pool types in the MemoryPoolType enum.
|
||
|
|
//!
|
||
|
|
//! \see MemoryPoolType
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
constexpr inline int32_t EnumMax<MemoryPoolType>() noexcept
|
||
|
|
{
|
||
|
|
return 6;
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum PreviewFeature
|
||
|
|
//!
|
||
|
|
//! \brief Define preview features
|
||
|
|
//!
|
||
|
|
//! Preview Features have been fully tested but are not yet as stable as other features in TensorRT.
|
||
|
|
//! They are provided as opt-in features for at least one release.
|
||
|
|
//!
|
||
|
|
enum class PreviewFeature : int32_t
|
||
|
|
{
|
||
|
|
//!
|
||
|
|
//! Allows optimization profiles to be shared across execution contexts.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.0. The default value for this flag is on and can not be changed.
|
||
|
|
//!
|
||
|
|
kPROFILE_SHARING_0806 TRT_DEPRECATED_ENUM = 0,
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Allows plugin I/O to be aliased when using IPluginV3OneBuildV2
|
||
|
|
//!
|
||
|
|
kALIASED_PLUGIN_IO_10_03 = 1,
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Allows IExecutionContext::updateDeviceMemorySizeForShapes to resize runner internal activation memory.
|
||
|
|
//! Using this feature can reduce runtime memory requirement when the actual input tensor shapes are smaller than
|
||
|
|
//! the maximum input tensor dimensions.
|
||
|
|
//!
|
||
|
|
kRUNTIME_ACTIVATION_RESIZE_10_10 = 2
|
||
|
|
};
|
||
|
|
|
||
|
|
namespace impl
|
||
|
|
{
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in PreviewFeature enum.
|
||
|
|
//!
|
||
|
|
//! \see PreviewFeature
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
struct EnumMaxImpl<PreviewFeature>
|
||
|
|
{
|
||
|
|
static constexpr int32_t kVALUE = 3;
|
||
|
|
};
|
||
|
|
} // namespace impl
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum HardwareCompatibilityLevel
|
||
|
|
//!
|
||
|
|
//! \brief Describes requirements of compatibility with GPU architectures other than that of the GPU on which the engine
|
||
|
|
//! was built.
|
||
|
|
//!
|
||
|
|
//! \warning Note that compatibility with future hardware depends on CUDA forward compatibility support.
|
||
|
|
//!
|
||
|
|
enum class HardwareCompatibilityLevel : int32_t
|
||
|
|
{
|
||
|
|
//! Do not require hardware compatibility with GPU architectures other than that of the GPU on which the engine was
|
||
|
|
//! built.
|
||
|
|
kNONE = 0,
|
||
|
|
|
||
|
|
//! Require that the engine is compatible with Ampere and newer GPUs. This will limit the combined usage of driver
|
||
|
|
//! reserved and backend kernel max shared memory to 48KiB, may reduce the number of available tactics for each
|
||
|
|
//! layer, and may prevent some fusions from occurring. Thus this can decrease the performance, especially for tf32
|
||
|
|
//! models.
|
||
|
|
//! This option will disable cuDNN, cuBLAS, and cuBLASLt as tactic sources.
|
||
|
|
//!
|
||
|
|
//! This option is only supported for engines built on NVIDIA Ampere and later GPUs.
|
||
|
|
//!
|
||
|
|
//! The driver reserved shared memory can be queried from cuDeviceGetAttribute(&reservedShmem,
|
||
|
|
//! CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK).
|
||
|
|
//!
|
||
|
|
kAMPERE_PLUS = 1,
|
||
|
|
|
||
|
|
//! Require that the engine is compatible with GPUs that have the same Compute Capability
|
||
|
|
//! (https://developer.nvidia.com/cuda-gpus) as the one it was built on. This may decrease the performance compared
|
||
|
|
//! to an engine with no compatibility.
|
||
|
|
//!
|
||
|
|
//! This option will disable cuDNN, cuBLAS, and cuBLASLt as tactic sources.
|
||
|
|
//!
|
||
|
|
//! This option is only supported for engines built on NVIDIA Turing and later GPUs.
|
||
|
|
//!
|
||
|
|
kSAME_COMPUTE_CAPABILITY = 2,
|
||
|
|
};
|
||
|
|
|
||
|
|
namespace impl
|
||
|
|
{
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in HardwareCompatibilityLevel enum.
|
||
|
|
//!
|
||
|
|
//! \see HardwareCompatibilityLevel
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
struct EnumMaxImpl<HardwareCompatibilityLevel>
|
||
|
|
{
|
||
|
|
static constexpr int32_t kVALUE = 3;
|
||
|
|
};
|
||
|
|
} // namespace impl
|
||
|
|
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum TilingOptimizationLevel
|
||
|
|
//!
|
||
|
|
//! \brief Define the optimization levels for Tiling
|
||
|
|
//!
|
||
|
|
//! TensorRT will try tiling optimization for on-chip caching if non-zero level is set.
|
||
|
|
//! This level determines how much effort TensorRT would take to find a better solution for performance.
|
||
|
|
//!
|
||
|
|
enum class TilingOptimizationLevel : int32_t
|
||
|
|
{
|
||
|
|
//! Do not apply any tiling strategy.
|
||
|
|
kNONE = 0,
|
||
|
|
|
||
|
|
//! Use a fast algorithm and heuristic based strategy. Slightly increases engine build time.
|
||
|
|
kFAST = 1,
|
||
|
|
|
||
|
|
//! Increase search space and use a mixed heuristic/profiling strategy.
|
||
|
|
//! Moderately increases engine build time.
|
||
|
|
kMODERATE = 2,
|
||
|
|
|
||
|
|
//! Increase search space even wider. Significantly increases engine build time.
|
||
|
|
kFULL = 3
|
||
|
|
|
||
|
|
};
|
||
|
|
|
||
|
|
namespace impl
|
||
|
|
{
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in TilingOptimizationLevel enum.
|
||
|
|
//!
|
||
|
|
//! \see TilingOptimizationLevel
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
struct EnumMaxImpl<TilingOptimizationLevel>
|
||
|
|
{
|
||
|
|
static constexpr int32_t kVALUE = 4;
|
||
|
|
};
|
||
|
|
} // namespace impl
|
||
|
|
|
||
|
|
namespace v_1_0
|
||
|
|
{
|
||
|
|
class IProgressMonitor : public IVersionedInterface
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
IProgressMonitor() = default;
|
||
|
|
virtual ~IProgressMonitor() noexcept = default;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return version information associated with this interface. Applications must not override this method.
|
||
|
|
//!
|
||
|
|
InterfaceInfo getInterfaceInfo() const noexcept override
|
||
|
|
{
|
||
|
|
return InterfaceInfo{"IProgressMonitor", 1, 0};
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Signal that a phase of the optimizer has started.
|
||
|
|
//!
|
||
|
|
//! \param phaseName The name of this phase for tracking purposes.
|
||
|
|
//! \param parentPhase The parent phase that this phase belongs to, or nullptr if there is no parent.
|
||
|
|
//! \param nbSteps The number of steps that are involved in this phase.
|
||
|
|
//!
|
||
|
|
//! The phaseStart function signals to the application that the current phase is beginning, and that it has a
|
||
|
|
//! certain number of steps to perform. If \p phaseParent is nullptr, then the phaseStart is beginning an
|
||
|
|
//! independent phase, and if \p phaseParent is specified, then the current phase, specified by \p phaseName, is
|
||
|
|
//! within the scope of the parent phase. \p nbSteps will always be a positive number. The phaseStart function
|
||
|
|
//! implies that the first step is being executed. TensorRT will signal when each step is complete.
|
||
|
|
//!
|
||
|
|
//! Phase names are human readable English strings which are unique within a single phase hierarchy but which can be
|
||
|
|
//! reused once the previous instance has completed. Phase names and their hierarchies may change between versions
|
||
|
|
//! of TensorRT.
|
||
|
|
//!
|
||
|
|
//! \see phaseFinish
|
||
|
|
//!
|
||
|
|
virtual void phaseStart(char const* phaseName, char const* parentPhase, int32_t nbSteps) noexcept = 0;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Signal that a step of an optimizer phase has finished.
|
||
|
|
//!
|
||
|
|
//! \param phaseName The name of the innermost phase being executed.
|
||
|
|
//! \param step The step number that was completed.
|
||
|
|
//!
|
||
|
|
//! The stepComplete function signals to the application that TensorRT has finished the current \p step for the
|
||
|
|
//! phase \p phaseName, and will move onto the next step if there is one. The application can return false for
|
||
|
|
//! TensorRT to exit the build early. The step value will increase on subsequent calls in the range [0, nbSteps).
|
||
|
|
//!
|
||
|
|
//! \return true to continue to the next step or false to stop the build.
|
||
|
|
//!
|
||
|
|
virtual bool stepComplete(char const* phaseName, int32_t step) noexcept = 0;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Signal that a phase of the optimizer has finished.
|
||
|
|
//!
|
||
|
|
//! \param phaseName The name of the phase that has finished.
|
||
|
|
//!
|
||
|
|
//! The phaseFinish function signals to the application that the phase is complete. This function may be called
|
||
|
|
//! before all steps in the range [0, nbSteps) have been reported to stepComplete. This scenario can be triggered by
|
||
|
|
//! error handling, internal optimizations, or when stepComplete returns false to request cancellation of the build.
|
||
|
|
//!
|
||
|
|
//! \see phaseStart
|
||
|
|
//!
|
||
|
|
virtual void phaseFinish(char const* phaseName) noexcept = 0;
|
||
|
|
|
||
|
|
}; // class IProgressMonitor
|
||
|
|
} // namespace v_1_0
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IProgressMonitor
|
||
|
|
//!
|
||
|
|
//! \brief Application-implemented progress reporting interface for TensorRT.
|
||
|
|
//!
|
||
|
|
//! The IProgressMonitor is a user-defined object that TensorRT uses to report back when an internal algorithm has
|
||
|
|
//! started or finished a phase to help provide feedback on the progress of the optimizer.
|
||
|
|
//!
|
||
|
|
//! The IProgressMonitor will trigger its start function when a phase is entered and will trigger its finish function
|
||
|
|
//! when that phase is exited. Each phase consists of one or more steps. When each step is completed, the stepComplete
|
||
|
|
//! function is triggered. This will allow an application using the builder to communicate progress relative to when the
|
||
|
|
//! optimization step is expected to complete.
|
||
|
|
//!
|
||
|
|
//! The implementation of IProgressMonitor must be thread-safe so that it can be called from multiple internal threads.
|
||
|
|
//! The lifetime of the IProgressMonitor must exceed the lifetime of all TensorRT objects that use it.
|
||
|
|
//!
|
||
|
|
//! \note To ensure compatibility of source code with future versions of TensorRT, use IProgressMonitor, not
|
||
|
|
//! v_1_0::IProgressMonitor
|
||
|
|
//!
|
||
|
|
using IProgressMonitor = v_1_0::IProgressMonitor;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IBuilderConfig
|
||
|
|
//!
|
||
|
|
//! \brief Holds properties for configuring a builder to produce an engine.
|
||
|
|
//!
|
||
|
|
//! \see BuilderFlags
|
||
|
|
//!
|
||
|
|
class IBuilderConfig : public INoCopy
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
virtual ~IBuilderConfig() noexcept = default;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the number of averaging iterations used when timing layers.
|
||
|
|
//!
|
||
|
|
//! When timing layers, the builder minimizes over a set of average times for layer execution. This parameter
|
||
|
|
//! controls the number of iterations used in averaging.
|
||
|
|
//!
|
||
|
|
//! \see getAvgTimingIterations()
|
||
|
|
//!
|
||
|
|
virtual void setAvgTimingIterations(int32_t avgTiming) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setAvgTimingIterations(avgTiming);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Query the number of averaging iterations.
|
||
|
|
//!
|
||
|
|
//! By default the number of averaging iterations is 1.
|
||
|
|
//!
|
||
|
|
//! \see setAvgTimingIterations()
|
||
|
|
//!
|
||
|
|
int32_t getAvgTimingIterations() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAvgTimingIterations();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Configure the builder to target specified EngineCapability flow.
|
||
|
|
//!
|
||
|
|
//! The flow means a sequence of API calls that allow an application to set up a runtime, engine,
|
||
|
|
//! and execution context in order to run inference.
|
||
|
|
//!
|
||
|
|
//! The supported flows are specified in the EngineCapability enum.
|
||
|
|
//!
|
||
|
|
void setEngineCapability(EngineCapability capability) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setEngineCapability(capability);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Query EngineCapability flow configured for the builder.
|
||
|
|
//!
|
||
|
|
//! By default it returns EngineCapability::kSTANDARD.
|
||
|
|
//!
|
||
|
|
//! \see setEngineCapability()
|
||
|
|
//!
|
||
|
|
EngineCapability getEngineCapability() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getEngineCapability();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set Int8 Calibration interface.
|
||
|
|
//!
|
||
|
|
//! The calibrator is to minimize the information loss during the INT8 quantization process.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED void setInt8Calibrator(IInt8Calibrator* calibrator) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setInt8Calibrator(calibrator);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get Int8 Calibration interface.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED IInt8Calibrator* getInt8Calibrator() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getInt8Calibrator();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the build mode flags to turn on builder options for this network.
|
||
|
|
//!
|
||
|
|
//! The flags are listed in the BuilderFlags enum.
|
||
|
|
//! The flags set configuration options to build the network.
|
||
|
|
//!
|
||
|
|
//! \param builderFlags The build option for an engine.
|
||
|
|
//!
|
||
|
|
//! \note This function will override the previous set flags, rather than bitwise ORing the new flag.
|
||
|
|
//!
|
||
|
|
//! \see getFlags()
|
||
|
|
//!
|
||
|
|
void setFlags(BuilderFlags builderFlags) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setFlags(builderFlags);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the build mode flags for this builder config. Defaults to 0.
|
||
|
|
//!
|
||
|
|
//! \return The build options as a bitmask.
|
||
|
|
//!
|
||
|
|
//! \see setFlags()
|
||
|
|
//!
|
||
|
|
BuilderFlags getFlags() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getFlags();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief clear a single build mode flag.
|
||
|
|
//!
|
||
|
|
//! clears the builder mode flag from the enabled flags.
|
||
|
|
//!
|
||
|
|
//! \see setFlags()
|
||
|
|
//!
|
||
|
|
void clearFlag(BuilderFlag builderFlag) noexcept
|
||
|
|
{
|
||
|
|
mImpl->clearFlag(builderFlag);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set a single build mode flag.
|
||
|
|
//!
|
||
|
|
//! Add the input builder mode flag to the already enabled flags.
|
||
|
|
//!
|
||
|
|
//! \see setFlags()
|
||
|
|
//!
|
||
|
|
void setFlag(BuilderFlag builderFlag) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setFlag(builderFlag);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Returns true if the build mode flag is set
|
||
|
|
//!
|
||
|
|
//! \see getFlags()
|
||
|
|
//!
|
||
|
|
//! \return True if flag is set, false if unset.
|
||
|
|
//!
|
||
|
|
bool getFlag(BuilderFlag builderFlag) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getFlag(builderFlag);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the device that this layer must execute on.
|
||
|
|
//!
|
||
|
|
//! \param layer which layer to execute.
|
||
|
|
//! \param deviceType that this layer must execute on.
|
||
|
|
//! If DeviceType is not set or is reset, TensorRT will use the default DeviceType set in the builder.
|
||
|
|
//!
|
||
|
|
//! \note The device type for a layer must be compatible with the safety flow (if specified).
|
||
|
|
//! For example a layer cannot be marked for DLA execution while the builder is configured for kSAFETY.
|
||
|
|
//!
|
||
|
|
//! \see getDeviceType()
|
||
|
|
//!
|
||
|
|
void setDeviceType(ILayer const* layer, DeviceType deviceType) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setDeviceType(layer, deviceType);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the device that this layer executes on.
|
||
|
|
//!
|
||
|
|
//! \return Returns DeviceType of the layer.
|
||
|
|
//!
|
||
|
|
DeviceType getDeviceType(ILayer const* layer) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getDeviceType(layer);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief whether the DeviceType has been explicitly set for this layer
|
||
|
|
//!
|
||
|
|
//! \return true if device type is not default
|
||
|
|
//!
|
||
|
|
//! \see setDeviceType() getDeviceType() resetDeviceType()
|
||
|
|
//!
|
||
|
|
bool isDeviceTypeSet(ILayer const* layer) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->isDeviceTypeSet(layer);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief reset the DeviceType for this layer
|
||
|
|
//!
|
||
|
|
//! \see setDeviceType() getDeviceType() isDeviceTypeSet()
|
||
|
|
//!
|
||
|
|
void resetDeviceType(ILayer const* layer) noexcept
|
||
|
|
{
|
||
|
|
mImpl->resetDeviceType(layer);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Checks if a layer can run on DLA.
|
||
|
|
//!
|
||
|
|
//! \return status true if the layer can on DLA else returns false.
|
||
|
|
//!
|
||
|
|
bool canRunOnDLA(ILayer const* layer) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->canRunOnDLA(layer);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Sets the DLA core used by the network. Defaults to -1.
|
||
|
|
//!
|
||
|
|
//! \param dlaCore The DLA core to execute the engine on, in the range [0,getNbDlaCores()).
|
||
|
|
//!
|
||
|
|
//! This function is used to specify which DLA core to use via indexing, if multiple DLA cores are available.
|
||
|
|
//!
|
||
|
|
//! \warning if getNbDLACores() returns 0, then this function does nothing.
|
||
|
|
//!
|
||
|
|
//! \see IRuntime::setDLACore() getDLACore()
|
||
|
|
//!
|
||
|
|
void setDLACore(int32_t dlaCore) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setDLACore(dlaCore);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the DLA core that the engine executes on.
|
||
|
|
//!
|
||
|
|
//! \return assigned DLA core or -1 for DLA not present or unset.
|
||
|
|
//!
|
||
|
|
int32_t getDLACore() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getDLACore();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Sets the default DeviceType to be used by the builder. It ensures that all the layers that can run on
|
||
|
|
//! this device will run on it, unless setDeviceType is used to override the default DeviceType for a layer.
|
||
|
|
//!
|
||
|
|
//! \see getDefaultDeviceType()
|
||
|
|
//!
|
||
|
|
void setDefaultDeviceType(DeviceType deviceType) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setDefaultDeviceType(deviceType);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the default DeviceType which was set by setDefaultDeviceType.
|
||
|
|
//!
|
||
|
|
//! By default it returns DeviceType::kGPU.
|
||
|
|
//!
|
||
|
|
DeviceType getDefaultDeviceType() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getDefaultDeviceType();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Resets the builder configuration to defaults.
|
||
|
|
//!
|
||
|
|
//! Useful for initializing a builder config object to its original state.
|
||
|
|
//!
|
||
|
|
void reset() noexcept
|
||
|
|
{
|
||
|
|
mImpl->reset();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the CUDA stream that is used to profile this network.
|
||
|
|
//!
|
||
|
|
//! \param stream The CUDA stream used for profiling by the builder.
|
||
|
|
//!
|
||
|
|
//! \see getProfileStream()
|
||
|
|
//!
|
||
|
|
void setProfileStream(const cudaStream_t stream) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setProfileStream(stream);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the CUDA stream that is used to profile this network.
|
||
|
|
//!
|
||
|
|
//! \return The CUDA stream set by setProfileStream, nullptr if setProfileStream has not been called.
|
||
|
|
//!
|
||
|
|
//! \see setProfileStream()
|
||
|
|
//!
|
||
|
|
cudaStream_t getProfileStream() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getProfileStream();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add an optimization profile.
|
||
|
|
//!
|
||
|
|
//! This function must be called at least once if the network has dynamic or shape input tensors.
|
||
|
|
//! This function may be called at most once when building a refittable engine, as more than
|
||
|
|
//! a single optimization profile are not supported for refittable engines.
|
||
|
|
//!
|
||
|
|
//! \param profile The new optimization profile, which must satisfy profile->isValid() == true
|
||
|
|
//!
|
||
|
|
//! \return The index of the optimization profile (starting from 0) if the input is valid, or -1 if the input is
|
||
|
|
//! not valid.
|
||
|
|
//!
|
||
|
|
int32_t addOptimizationProfile(IOptimizationProfile const* profile) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->addOptimizationProfile(profile);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get number of optimization profiles.
|
||
|
|
//!
|
||
|
|
//! This is one higher than the index of the last optimization profile that has be defined (or
|
||
|
|
//! zero, if none has been defined yet).
|
||
|
|
//!
|
||
|
|
//! \return The number of the optimization profiles.
|
||
|
|
//!
|
||
|
|
int32_t getNbOptimizationProfiles() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getNbOptimizationProfiles();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set verbosity level of layer information exposed in NVTX annotations and IEngineInspector.
|
||
|
|
//!
|
||
|
|
//! Control how much layer information will be exposed in NVTX annotations and IEngineInspector.
|
||
|
|
//!
|
||
|
|
//! \see ProfilingVerbosity, getProfilingVerbosity(), IEngineInspector
|
||
|
|
//!
|
||
|
|
void setProfilingVerbosity(ProfilingVerbosity verbosity) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setProfilingVerbosity(verbosity);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get verbosity level of layer information exposed in NVTX annotations and IEngineInspector.
|
||
|
|
//!
|
||
|
|
//! Get the current setting of verbosity level of layer information exposed in
|
||
|
|
//! NVTX annotations and IEngineInspector. Default value is ProfilingVerbosity::kLAYER_NAMES_ONLY.
|
||
|
|
//!
|
||
|
|
//! \see ProfilingVerbosity, setProfilingVerbosity(), IEngineInspector
|
||
|
|
//!
|
||
|
|
ProfilingVerbosity getProfilingVerbosity() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getProfilingVerbosity();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set Algorithm Selector.
|
||
|
|
//!
|
||
|
|
//! \param selector The algorithm selector to be set in the build config.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.8. Please use editable mode in ITimingCache instead.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED void setAlgorithmSelector(IAlgorithmSelector* selector) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setAlgorithmSelector(selector);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get Algorithm Selector.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.8. Please use editable mode in ITimingCache instead.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED IAlgorithmSelector* getAlgorithmSelector() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getAlgorithmSelector();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Add a calibration profile.
|
||
|
|
//!
|
||
|
|
//! Calibration optimization profile must be set if int8 calibration is used to set scales for a network with
|
||
|
|
//! runtime dimensions.
|
||
|
|
//!
|
||
|
|
//! \param profile The new calibration profile, which must satisfy profile->isValid() == true or be nullptr.
|
||
|
|
//! MIN and MAX values will be overwritten by kOPT.
|
||
|
|
//!
|
||
|
|
//! \return True if the calibration profile was set correctly.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED bool setCalibrationProfile(IOptimizationProfile const* profile) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setCalibrationProfile(profile);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the current calibration profile.
|
||
|
|
//!
|
||
|
|
//! \return A pointer to the current calibration profile or nullptr if calibration profile is unset.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED IOptimizationProfile const* getCalibrationProfile() noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getCalibrationProfile();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the quantization flags.
|
||
|
|
//!
|
||
|
|
//! The flags are listed in the QuantizationFlag enum.
|
||
|
|
//! The flags set configuration options to quantize the network in int8.
|
||
|
|
//!
|
||
|
|
//! \param flags The quantization flags.
|
||
|
|
//!
|
||
|
|
//! \note This function will override the previous set flags, rather than bitwise ORing the new flag.
|
||
|
|
//!
|
||
|
|
//! \see getQuantizationFlags()
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.10. Superseded by explicit quantization.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED void setQuantizationFlags(QuantizationFlags flags) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setQuantizationFlags(flags);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the quantization flags.
|
||
|
|
//!
|
||
|
|
//! \return The quantization flags as a bitmask.
|
||
|
|
//!
|
||
|
|
//! \see setQuantizationFlag()
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.10. Superseded by explicit quantization.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED QuantizationFlags getQuantizationFlags() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getQuantizationFlags();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief clear a quantization flag.
|
||
|
|
//!
|
||
|
|
//! Clears the quantization flag from the enabled quantization flags.
|
||
|
|
//!
|
||
|
|
//! \see setQuantizationFlags()
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.10. Superseded by explicit quantization.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED void clearQuantizationFlag(QuantizationFlag flag) noexcept
|
||
|
|
{
|
||
|
|
mImpl->clearQuantizationFlag(flag);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set a single quantization flag.
|
||
|
|
//!
|
||
|
|
//! Add the input quantization flag to the already enabled quantization flags.
|
||
|
|
//!
|
||
|
|
//! \see setQuantizationFlags()
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.10. Superseded by explicit quantization.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED void setQuantizationFlag(QuantizationFlag flag) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setQuantizationFlag(flag);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Returns true if the quantization flag is set.
|
||
|
|
//!
|
||
|
|
//! \see getQuantizationFlags()
|
||
|
|
//!
|
||
|
|
//! \return True if quantization flag is set, false if unset.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.10. Superseded by explicit quantization.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED bool getQuantizationFlag(QuantizationFlag flag) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getQuantizationFlag(flag);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set tactic sources.
|
||
|
|
//!
|
||
|
|
//! This bitset controls which tactic sources TensorRT is allowed to use for tactic
|
||
|
|
//! selection.
|
||
|
|
//!
|
||
|
|
//! Multiple tactic sources may be combined with a bitwise OR operation. For example,
|
||
|
|
//! to enable cublas and cublasLt as tactic sources, use a value of:
|
||
|
|
//!
|
||
|
|
//! 1U << static_cast<uint32_t>(TacticSource::kCUBLAS) | 1U <<
|
||
|
|
//! static_cast<uint32_t>(TacticSource::kCUBLAS_LT)
|
||
|
|
//!
|
||
|
|
//! \see getTacticSources
|
||
|
|
//!
|
||
|
|
//! \return true if the tactic sources in the build configuration were updated.
|
||
|
|
//! The tactic sources in the build configuration will not be updated if the provided value is invalid.
|
||
|
|
//!
|
||
|
|
bool setTacticSources(TacticSources tacticSources) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setTacticSources(tacticSources);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get tactic sources.
|
||
|
|
//!
|
||
|
|
//! Get the tactic sources currently set in the engine build
|
||
|
|
//! configuration.
|
||
|
|
//!
|
||
|
|
//! \see setTacticSources()
|
||
|
|
//!
|
||
|
|
//! \return tactic sources
|
||
|
|
//!
|
||
|
|
TacticSources getTacticSources() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getTacticSources();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Create timing cache
|
||
|
|
//!
|
||
|
|
//! Create ITimingCache instance from serialized raw data. The created timing cache doesn't belong to
|
||
|
|
//! a specific IBuilderConfig. It can be shared by multiple builder instances. Call setTimingCache()
|
||
|
|
//! before launching a builder to attach cache to builder instance.
|
||
|
|
//! The lifetime of the ITimingCache must exceed the lifetime of all builders that use it.
|
||
|
|
//!
|
||
|
|
//! \param blob A pointer to the raw data that contains serialized timing cache
|
||
|
|
//! \param size The size in bytes of the serialized timing cache. Size 0 means create a new cache from scratch
|
||
|
|
//!
|
||
|
|
//! \see setTimingCache
|
||
|
|
//!
|
||
|
|
//! \return the pointer to ITimingCache created
|
||
|
|
//!
|
||
|
|
nvinfer1::ITimingCache* createTimingCache(void const* blob, std::size_t size) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->createTimingCache(blob, size);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Attach a timing cache to IBuilderConfig
|
||
|
|
//!
|
||
|
|
//! The timing cache has verification header to make sure the provided cache can be used in current environment.
|
||
|
|
//! A failure will be reported if the CUDA device property in the provided cache is different from current
|
||
|
|
//! environment. ignoreMismatch = true skips strict verification and allows loading cache created from a different
|
||
|
|
//! device.
|
||
|
|
//!
|
||
|
|
//! The cache must not be destroyed until after the engine is built.
|
||
|
|
//!
|
||
|
|
//! \param cache the timing cache to be used
|
||
|
|
//! \param ignoreMismatch whether or not allow using a cache that contains different CUDA device property
|
||
|
|
//!
|
||
|
|
//! \return true if set successfully, false otherwise
|
||
|
|
//!
|
||
|
|
//! \warning Using cache generated from devices with different CUDA device properties may lead to
|
||
|
|
//! functional/performance bugs.
|
||
|
|
//!
|
||
|
|
bool setTimingCache(ITimingCache const& cache, bool ignoreMismatch) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setTimingCache(cache, ignoreMismatch);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the pointer to the timing cache from current IBuilderConfig
|
||
|
|
//!
|
||
|
|
//! \return pointer to the timing cache used in current IBuilderConfig
|
||
|
|
//!
|
||
|
|
nvinfer1::ITimingCache const* getTimingCache() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getTimingCache();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the memory size for the memory pool.
|
||
|
|
//!
|
||
|
|
//! TensorRT layers access different memory pools depending on the operation.
|
||
|
|
//! This function sets in the IBuilderConfig the size limit, specified by \p poolSize,
|
||
|
|
//! for the corresponding memory pool, specified by \p pool.
|
||
|
|
//! TensorRT will build a plan file that is constrained by these limits or report
|
||
|
|
//! which constraint caused the failure.
|
||
|
|
//!
|
||
|
|
//! If the size of the pool, specified by \p poolSize, fails to meet the size requirements
|
||
|
|
//! for the pool, this function does nothing and emits the recoverable error,
|
||
|
|
//! ErrorCode::kINVALID_ARGUMENT, to the registered IErrorRecorder.
|
||
|
|
//!
|
||
|
|
//! If the size of the pool is larger than the maximum possible value for the
|
||
|
|
//! configuration, this function does nothing and emits ErrorCode::kUNSUPPORTED_STATE.
|
||
|
|
//!
|
||
|
|
//! If the pool does not exist on the requested device type when building
|
||
|
|
//! the network, a warning is emitted to the logger, and the memory pool
|
||
|
|
//! value is ignored.
|
||
|
|
//!
|
||
|
|
//! Refer to MemoryPoolType to see the size requirements for each pool.
|
||
|
|
//!
|
||
|
|
//! \param pool The memory pool to limit the available memory for.
|
||
|
|
//! \param poolSize The size of the pool in bytes.
|
||
|
|
//!
|
||
|
|
//! \see getMemoryPoolLimit, MemoryPoolType
|
||
|
|
//!
|
||
|
|
void setMemoryPoolLimit(MemoryPoolType pool, std::size_t poolSize) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setMemoryPoolLimit(pool, poolSize);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the memory size limit of the memory pool.
|
||
|
|
//!
|
||
|
|
//! Retrieve the memory size limit of the corresponding pool in bytes.
|
||
|
|
//! If setMemoryPoolLimit for the pool has not been called, this returns the default
|
||
|
|
//! value used by TensorRT. This default value is not necessarily the maximum possible
|
||
|
|
//! value for that configuration.
|
||
|
|
//!
|
||
|
|
//! \param pool The memory pool to get the limit for.
|
||
|
|
//!
|
||
|
|
//! \returns The size of the memory limit, in bytes, for the corresponding pool.
|
||
|
|
//!
|
||
|
|
//! \see setMemoryPoolLimit
|
||
|
|
//!
|
||
|
|
std::size_t getMemoryPoolLimit(MemoryPoolType pool) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getMemoryPoolLimit(pool);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Enable or disable a specific preview feature
|
||
|
|
//!
|
||
|
|
//! Allows enabling or disabling experimental features, which are not enabled by default in the
|
||
|
|
//! current release.
|
||
|
|
//!
|
||
|
|
//! Refer to PreviewFeature for additional information, and a list of the available features.
|
||
|
|
//!
|
||
|
|
//! \param feature the feature to enable / disable
|
||
|
|
//! \param enable true for enable, false for disable
|
||
|
|
//!
|
||
|
|
//! \see PreviewFeature, getPreviewFeature
|
||
|
|
//!
|
||
|
|
void setPreviewFeature(PreviewFeature feature, bool enable) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setPreviewFeature(feature, enable);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get status of preview feature
|
||
|
|
//!
|
||
|
|
//! \param feature the feature to query
|
||
|
|
//!
|
||
|
|
//! \returns true if the \p feature is enabled, false otherwise
|
||
|
|
//!
|
||
|
|
//! \see PreviewFeature, setPreviewFeature
|
||
|
|
//!
|
||
|
|
bool getPreviewFeature(PreviewFeature feature) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPreviewFeature(feature);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set builder optimization level
|
||
|
|
//!
|
||
|
|
//! Set the builder optimization level. Setting a higher optimization
|
||
|
|
//! level allows the optimizer to spend more time searching for optimization opportunities. The
|
||
|
|
//! resulting engine may have better performance compared to an engine built with a lower optimization level.
|
||
|
|
//!
|
||
|
|
//! The default optimization level is 3. Valid values include integers from 0 to the maximum optimization level,
|
||
|
|
//! which is currently 5. Setting it to greater than the maximum level results in behavior identical to the
|
||
|
|
//! maximum level.
|
||
|
|
//!
|
||
|
|
//! Below are the descriptions about each builder optimization level:
|
||
|
|
//!
|
||
|
|
//! - Level 0: This enables the fastest compilation by disabling dynamic kernel generation and selecting the first
|
||
|
|
//! tactic that succeeds in execution. This will also not respect a timing cache.
|
||
|
|
//! - Level 1: Available tactics are sorted by heuristics, but only the top are tested to select the best. If a
|
||
|
|
//! dynamic kernel is generated its compile optimization is low.
|
||
|
|
//! - Level 2: Available tactics are sorted by heuristics, but only the fastest tactics are tested to select the
|
||
|
|
//! best.
|
||
|
|
//! - Level 3: Apply heuristics to see if a static precompiled kernel is applicable or if a new one has to be
|
||
|
|
//! compiled dynamically.
|
||
|
|
//! - Level 4: Always compiles a dynamic kernel.
|
||
|
|
//! - Level 5: Always compiles a dynamic kernel and compares it to static kernels.
|
||
|
|
//!
|
||
|
|
//! \param level The optimization level to set to. Must be non-negative.
|
||
|
|
//!
|
||
|
|
//! \see getBuilderOptimizationLevel
|
||
|
|
//!
|
||
|
|
void setBuilderOptimizationLevel(int32_t level) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setBuilderOptimizationLevel(level);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get builder optimization level
|
||
|
|
//!
|
||
|
|
//! \returns the current builder optimization level
|
||
|
|
//!
|
||
|
|
//! \see setBuilderOptimizationLevel
|
||
|
|
//!
|
||
|
|
int32_t getBuilderOptimizationLevel() noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getBuilderOptimizationLevel();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the hardware compatibility level.
|
||
|
|
//!
|
||
|
|
//! Hardware compatibility allows an engine to run on GPU
|
||
|
|
//! architectures other than that of the GPU where the engine was
|
||
|
|
//! built.
|
||
|
|
//!
|
||
|
|
//! The default hardware compatibility level is HardwareCompatibilityLevel::kNONE.
|
||
|
|
//!
|
||
|
|
//! \param hardwareCompatibilityLevel The level of hardware
|
||
|
|
//! compatibility.
|
||
|
|
//!
|
||
|
|
void setHardwareCompatibilityLevel(HardwareCompatibilityLevel hardwareCompatibilityLevel) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setHardwareCompatibilityLevel(hardwareCompatibilityLevel);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the hardware compatibility level.
|
||
|
|
//!
|
||
|
|
//! \return hardwareCompatibilityLevel The level of hardware
|
||
|
|
//! compatibility.
|
||
|
|
//!
|
||
|
|
//! \see setHardwareCompatibilityLevel()
|
||
|
|
//!
|
||
|
|
HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getHardwareCompatibilityLevel();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the plugin libraries to be serialized with version-compatible engines.
|
||
|
|
//!
|
||
|
|
//! Each entry in the list of libraries must be unique.
|
||
|
|
//!
|
||
|
|
//! \param paths The paths of plugin libraries.
|
||
|
|
//! \param nbPaths The number of paths.
|
||
|
|
//!
|
||
|
|
void setPluginsToSerialize(char const* const* paths, int32_t nbPaths) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setPluginsToSerialize(paths, nbPaths);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the plugin library path to be serialized with version-compatible engines.
|
||
|
|
//!
|
||
|
|
//! \param index Index of the plugin library path in the list. Should be in the range `[0,
|
||
|
|
//! getNbPluginsToSerialize())`.
|
||
|
|
//!
|
||
|
|
//! \return The path to the plugin library.
|
||
|
|
//!
|
||
|
|
char const* getPluginToSerialize(int32_t index) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPluginToSerialize(index);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the number of plugin library paths to be serialized with version-compatible engines.
|
||
|
|
//!
|
||
|
|
//! \return The number of paths.
|
||
|
|
//!
|
||
|
|
int32_t getNbPluginsToSerialize() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getNbPluginsToSerialize();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the maximum number of auxiliary streams that TRT is allowed to use.
|
||
|
|
//!
|
||
|
|
//! If the network contains operators that can run in parallel, TRT can execute them using auxiliary streams
|
||
|
|
//! in addition to the one provided to the IExecutionContext::enqueueV3() call.
|
||
|
|
//!
|
||
|
|
//! The default maximum number of auxiliary streams is determined by the heuristics in TensorRT on whether enabling
|
||
|
|
//! multi-stream would improve the performance. This behavior can be overridden by calling this API to set the
|
||
|
|
//! maximum number of auxiliary streams explicitly. Set this to 0 to enforce single-stream inference.
|
||
|
|
//!
|
||
|
|
//! The resulting engine may use fewer auxiliary streams than the maximum if the network does not contain enough
|
||
|
|
//! parallelism or if TensorRT determines that using more auxiliary streams does not help improve the performance.
|
||
|
|
//!
|
||
|
|
//! \note Allowing more auxiliary streams does not always give better performance since there will be
|
||
|
|
//! synchronizations overhead between streams. Using CUDA graphs at runtime can help reduce the overhead caused by
|
||
|
|
//! cross-stream synchronizations.
|
||
|
|
//!
|
||
|
|
//! \note Using more auxiliary leads to more memory usage at runtime since some activation memory blocks will not
|
||
|
|
//! be able to be reused.
|
||
|
|
//!
|
||
|
|
//! \param nbStreams The maximum number of auxiliary streams that TRT is allowed to use.
|
||
|
|
//!
|
||
|
|
//! \see getMaxAuxStreams(), ICudaEngine::getNbAuxStreams(), IExecutionContext::setAuxStreams()
|
||
|
|
//!
|
||
|
|
void setMaxAuxStreams(int32_t nbStreams) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setMaxAuxStreams(nbStreams);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the maximum number of auxiliary streams that TRT is allowed to use.
|
||
|
|
//!
|
||
|
|
//! \see setMaxAuxStreams()
|
||
|
|
//!
|
||
|
|
int32_t getMaxAuxStreams() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getMaxAuxStreams();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Sets the progress monitor for building a network.
|
||
|
|
//!
|
||
|
|
//! \param monitor The progress monitor to assign to the IBuilderConfig.
|
||
|
|
//!
|
||
|
|
//! The progress monitor signals to the application when different phases of
|
||
|
|
//! the compiler are being executed. Setting to nullptr unsets the monitor so
|
||
|
|
//! that the application is not signaled.
|
||
|
|
//!
|
||
|
|
//! \see IBuilderConfig::getProgressMonitor
|
||
|
|
//!
|
||
|
|
void setProgressMonitor(IProgressMonitor* monitor) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setProgressMonitor(monitor);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \return The progress monitor set by the application or nullptr.
|
||
|
|
//!
|
||
|
|
//! \see IBuilderConfig::setProgressMonitor
|
||
|
|
//!
|
||
|
|
IProgressMonitor* getProgressMonitor() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getProgressMonitor();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the target platform for runtime execution.
|
||
|
|
//!
|
||
|
|
//! Cross-platform compatibility allows an engine to be built and executed on different platforms.
|
||
|
|
//!
|
||
|
|
//! The default cross-platform target is RuntimePlatform::kSAME_AS_BUILD.
|
||
|
|
//!
|
||
|
|
//! \param runtimePlatform The target platform for runtime execution.
|
||
|
|
//!
|
||
|
|
//! \see IBuilderConfig::getRuntimePlatform()
|
||
|
|
//!
|
||
|
|
void setRuntimePlatform(RuntimePlatform runtimePlatform) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setRuntimePlatform(runtimePlatform);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the target platform for runtime execution.
|
||
|
|
//!
|
||
|
|
//! \return The target platform for runtime execution.
|
||
|
|
//!
|
||
|
|
//! \see IBuilderConfig::setRuntimePlatform()
|
||
|
|
//!
|
||
|
|
RuntimePlatform getRuntimePlatform() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getRuntimePlatform();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the maximum number of tactics to time when there is a choice of tactics.
|
||
|
|
//!
|
||
|
|
//! This function controls the number of tactics timed when there are multiple tactics to choose from.
|
||
|
|
//!
|
||
|
|
//! \see getMaxNbTactics()
|
||
|
|
//!
|
||
|
|
void setMaxNbTactics(int32_t maxNbTactics) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setMaxNbTactics(maxNbTactics);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Query the maximum number of tactics timed when there is a choice.
|
||
|
|
//!
|
||
|
|
//! By default the value is -1, indicating TensorRT can determine the number of tactics based on its own heuristic.
|
||
|
|
//!
|
||
|
|
//! \see setMaxNbTactics()
|
||
|
|
//!
|
||
|
|
int32_t getMaxNbTactics() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getMaxNbTactics();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the Tiling optimization level.
|
||
|
|
//!
|
||
|
|
//! Tiling allows TensorRT to try an on-chip caching strategy.
|
||
|
|
//!
|
||
|
|
//! The default getTilingOptimizationLevel is TilingOptimizationLevel::kNONE.
|
||
|
|
//!
|
||
|
|
//! \param level The level of Tiling optimization.
|
||
|
|
//!
|
||
|
|
//! \return True if successful, false otherwise
|
||
|
|
//!
|
||
|
|
bool setTilingOptimizationLevel(TilingOptimizationLevel level) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setTilingOptimizationLevel(level);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the Tiling optimization level.
|
||
|
|
//!
|
||
|
|
//! \return TilingOptimizationLevel The level of Tiling optimization.
|
||
|
|
//!
|
||
|
|
//! \see setTilingOptimizationLevel()
|
||
|
|
//!
|
||
|
|
TilingOptimizationLevel getTilingOptimizationLevel() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getTilingOptimizationLevel();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the L2 cache usage limit for Tiling optimization.
|
||
|
|
//!
|
||
|
|
//! Parameter for tiling optimization. This API only takes effect when TilingOptimizationLevel is not kNONE.
|
||
|
|
//! \note If setL2LimitForTiling() has not been called, TensorRT would choose a default value between 0 and L2
|
||
|
|
//! capacity size.
|
||
|
|
//!
|
||
|
|
//! \param size The size of the L2 cache usage limit for Tiling optimization.
|
||
|
|
//!
|
||
|
|
//! \return True if successful, false otherwise
|
||
|
|
//!
|
||
|
|
bool setL2LimitForTiling(int64_t size) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setL2LimitForTiling(size);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the L2 cache usage limit for tiling optimization.
|
||
|
|
//!
|
||
|
|
//! \return L2 cache usage limit for tiling optimization.
|
||
|
|
//!
|
||
|
|
//! \see setL2LimitForTiling()
|
||
|
|
//!
|
||
|
|
int64_t getL2LimitForTiling() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getL2LimitForTiling();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set a config string for remote auto tuning.
|
||
|
|
//!
|
||
|
|
//! Remote auto-tuning is supported only for engines built with EngineCapability::kSAFETY.
|
||
|
|
//!
|
||
|
|
//! \param config The config string to be used during remote auto tuning.
|
||
|
|
//!
|
||
|
|
//! \return True if successful, false otherwise
|
||
|
|
//!
|
||
|
|
bool setRemoteAutoTuningConfig(char const* config) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setRemoteAutoTuningConfig(config);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get a config string for remote auto tuning.
|
||
|
|
//!
|
||
|
|
//! \return The current string for remote auto tuning, or nullptr if not set.
|
||
|
|
//!
|
||
|
|
char const* getRemoteAutoTuningConfig() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getRemoteAutoTuningConfig();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VBuilderConfig* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Represents one or more NetworkDefinitionCreationFlag flags
|
||
|
|
//! using binary OR operations.
|
||
|
|
//! e.g., 1U << NetworkDefinitionCreationFlag::kSTRONGLY_TYPED
|
||
|
|
//!
|
||
|
|
//! \see IBuilder::createNetworkV2
|
||
|
|
//!
|
||
|
|
using NetworkDefinitionCreationFlags = uint32_t;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum NetworkDefinitionCreationFlag
|
||
|
|
//!
|
||
|
|
//! \brief List of immutable network properties expressed at network creation time.
|
||
|
|
//! NetworkDefinitionCreationFlag is used with createNetworkV2() to specify immutable properties of the network.
|
||
|
|
//!
|
||
|
|
//! \see IBuilder::createNetworkV2
|
||
|
|
//!
|
||
|
|
enum class NetworkDefinitionCreationFlag : int32_t
|
||
|
|
{
|
||
|
|
//! Ignored because networks are always "explicit batch" in TensorRT 10.0.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.0.
|
||
|
|
kEXPLICIT_BATCH TRT_DEPRECATED_ENUM = 0,
|
||
|
|
|
||
|
|
//! Mark the network to be strongly typed.
|
||
|
|
//! Every tensor in the network has a data type defined in the network following only type inference rules and the
|
||
|
|
//! inputs/operator annotations. Setting layer precision and layer output types is not allowed, and the network
|
||
|
|
//! output types will be inferred based on the input types and the type inference rules.
|
||
|
|
kSTRONGLY_TYPED = 1,
|
||
|
|
//! If set, for a Python plugin with both AOT and JIT implementations, the JIT implementation will be used.
|
||
|
|
//! Any plugin-specific JIT/AOT specification may override this.
|
||
|
|
//! Cannot be used in conjunction with NetworkDefinitionCreationFlag::kPREFER_AOT_PYTHON_PLUGINS.
|
||
|
|
kPREFER_JIT_PYTHON_PLUGINS = 2,
|
||
|
|
|
||
|
|
//! If set, for a Python plugin with both AOT and JIT implementations, the AOT implementation will be used.
|
||
|
|
//! Any plugin-specific JIT/AOT specification may override this.
|
||
|
|
//! Cannot be used in conjunction with NetworkDefinitionCreationFlag::kPREFER_JIT_PYTHON_PLUGINS.
|
||
|
|
kPREFER_AOT_PYTHON_PLUGINS = 3,
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Maximum number of elements in NetworkDefinitionCreationFlag enum.
|
||
|
|
//!
|
||
|
|
//! \see NetworkDefinitionCreationFlag
|
||
|
|
//!
|
||
|
|
template <>
|
||
|
|
constexpr inline int32_t EnumMax<NetworkDefinitionCreationFlag>() noexcept
|
||
|
|
{
|
||
|
|
return 4;
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \class IBuilder
|
||
|
|
//!
|
||
|
|
//! \brief Builds an engine from a network definition.
|
||
|
|
//!
|
||
|
|
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
|
||
|
|
//!
|
||
|
|
class IBuilder : public INoCopy
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
virtual ~IBuilder() noexcept = default;
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Determine whether the platform has fast native fp16.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.5. Please query data type support from CUDA directly.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED bool platformHasFastFp16() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->platformHasFastFp16();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Determine whether the platform has fast native int8.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.5. Please query data type support from CUDA directly.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED bool platformHasFastInt8() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->platformHasFastInt8();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Get the maximum batch size DLA can support.
|
||
|
|
//! For any tensor the total volume of index dimensions combined(dimensions other than CHW) with the requested
|
||
|
|
//! batch size should not exceed the value returned by this function.
|
||
|
|
//!
|
||
|
|
//! \warning getMaxDLABatchSize does not work with dynamic shapes.
|
||
|
|
//!
|
||
|
|
int32_t getMaxDLABatchSize() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getMaxDLABatchSize();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return the number of DLA engines available to this builder.
|
||
|
|
//!
|
||
|
|
int32_t getNbDLACores() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getNbDLACores();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the GPU allocator.
|
||
|
|
//!
|
||
|
|
//! \param allocator Set the GPU allocator to be used by the builder. All GPU memory acquired will use this
|
||
|
|
//! allocator. If NULL is passed, the default allocator will be used.
|
||
|
|
//!
|
||
|
|
//! Default: allocateAsync uses cudaMallocAsync if cudaDevAttrMemoryPoolsSupported returns true, otherwise falls
|
||
|
|
//! back to cudaMalloc. allocate always uses cudaMalloc.
|
||
|
|
//!
|
||
|
|
//! \note This allocator will be passed to any engines created via the builder; thus the lifetime of the allocator
|
||
|
|
//! must span the lifetime of those engines as
|
||
|
|
//! well as that of the builder. If nullptr is passed, the default allocator will be used.
|
||
|
|
//!
|
||
|
|
void setGpuAllocator(IGpuAllocator* allocator) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setGpuAllocator(allocator);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Create a builder configuration object.
|
||
|
|
//!
|
||
|
|
//! The caller owns the new IBuilderConfig, which must be destroyed with operator delete
|
||
|
|
//! before this IBuilder is destroyed. Destroying this IBuilder before destroying the
|
||
|
|
//! IBuilderConfig causes undefined behavior.
|
||
|
|
//!
|
||
|
|
//! \see IBuilderConfig
|
||
|
|
//!
|
||
|
|
nvinfer1::IBuilderConfig* createBuilderConfig() noexcept
|
||
|
|
{
|
||
|
|
return mImpl->createBuilderConfig();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Create a network definition object
|
||
|
|
//!
|
||
|
|
//! Creates a network definition object with immutable properties specified using the flags parameter.
|
||
|
|
//!
|
||
|
|
//! createNetworkV2 supports creating network with properties from NetworkDefinitionCreationFlags.
|
||
|
|
//!
|
||
|
|
//! CreateNetworkV2 supports dynamic shapes and explicit batch dimensions by default.
|
||
|
|
//!
|
||
|
|
//! createNetworkV2 with NetworkDefinitionCreationFlag::kSTRONGLY_TYPED flag supports creating a strongly typed plan
|
||
|
|
//! where tensor data types are inferred from network input types and operator type specification.
|
||
|
|
//!
|
||
|
|
//! The caller owns the new INetworkDefinition, which must be destroyed with operator delete
|
||
|
|
//! before this IBuilder is destroyed. Destroying this IBuilder before destroying the
|
||
|
|
//! INetworkDefinition causes undefined behavior.
|
||
|
|
//!
|
||
|
|
//! \param flags Bitset of NetworkDefinitionCreationFlags specifying network properties combined with bitwise OR,
|
||
|
|
//! e.g., 1U << NetworkDefinitionCreationFlag::kSTRONGLY_TYPED.
|
||
|
|
//!
|
||
|
|
//! \see INetworkDefinition, NetworkDefinitionCreationFlags
|
||
|
|
//!
|
||
|
|
nvinfer1::INetworkDefinition* createNetworkV2(NetworkDefinitionCreationFlags flags) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->createNetworkV2(flags);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Create a new optimization profile.
|
||
|
|
//!
|
||
|
|
//! If the network has any dynamic input tensors, the appropriate calls to setDimensions() must be made.
|
||
|
|
//! Likewise, if there are any shape input tensors, the appropriate calls to setShapeValues() are required.
|
||
|
|
//! The builder retains ownership of the created optimization profile and returns a raw pointer, i.e. the users
|
||
|
|
//! must not attempt to delete the returned pointer.
|
||
|
|
//!
|
||
|
|
//! \see IOptimizationProfile
|
||
|
|
//!
|
||
|
|
nvinfer1::IOptimizationProfile* createOptimizationProfile() noexcept
|
||
|
|
{
|
||
|
|
return mImpl->createOptimizationProfile();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the ErrorRecorder for this interface
|
||
|
|
//!
|
||
|
|
//! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
|
||
|
|
//! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
|
||
|
|
//! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
|
||
|
|
//! a recorder has been registered.
|
||
|
|
//!
|
||
|
|
//! If an error recorder is not set, messages will be sent to the global log stream.
|
||
|
|
//!
|
||
|
|
//! \param recorder The error recorder to register with this interface.
|
||
|
|
//!
|
||
|
|
//! \see getErrorRecorder()
|
||
|
|
//!
|
||
|
|
void setErrorRecorder(IErrorRecorder* recorder) noexcept
|
||
|
|
{
|
||
|
|
mImpl->setErrorRecorder(recorder);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief get the ErrorRecorder assigned to this interface.
|
||
|
|
//!
|
||
|
|
//! Retrieves the assigned error recorder object for the given class.
|
||
|
|
//! A nullptr will be returned if setErrorRecorder has not been called.
|
||
|
|
//!
|
||
|
|
//! \return A pointer to the IErrorRecorder object that has been registered.
|
||
|
|
//!
|
||
|
|
//! \see setErrorRecorder()
|
||
|
|
//!
|
||
|
|
IErrorRecorder* getErrorRecorder() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getErrorRecorder();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Resets the builder state to default values.
|
||
|
|
//!
|
||
|
|
void reset() noexcept
|
||
|
|
{
|
||
|
|
mImpl->reset();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Determine whether the platform has TF32 support.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.5. Please query data type support from CUDA directly.
|
||
|
|
//!
|
||
|
|
TRT_DEPRECATED bool platformHasTf32() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->platformHasTf32();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Builds and serializes a network for the given INetworkDefinition and IBuilderConfig.
|
||
|
|
//!
|
||
|
|
//! This function allows building and serialization of a network without creating an engine.
|
||
|
|
//!
|
||
|
|
//! \param network Network definition.
|
||
|
|
//! \param config Builder configuration.
|
||
|
|
//!
|
||
|
|
//! \return A pointer to a IHostMemory object that contains a serialized network.
|
||
|
|
//!
|
||
|
|
//! \note This function will synchronize the CUDA stream returned by \p config.getProfileStream() before returning.
|
||
|
|
//!
|
||
|
|
//! \see INetworkDefinition, IBuilderConfig, IHostMemory
|
||
|
|
//!
|
||
|
|
nvinfer1::IHostMemory* buildSerializedNetwork(INetworkDefinition& network, IBuilderConfig& config) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->buildSerializedNetwork(network, config);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Builds and serializes a network into stream for the given INetworkDefinition and IBuilderConfig.
|
||
|
|
//!
|
||
|
|
//! This function allows building and serialization of a network without creating an engine. The engine is
|
||
|
|
//! finally serialized into the writer stream.
|
||
|
|
//!
|
||
|
|
//! \param network Network definition.
|
||
|
|
//! \param config Builder configuration.
|
||
|
|
//! \param writer Output writer stream.
|
||
|
|
//!
|
||
|
|
//! \return true if build succeed, otherwise false.
|
||
|
|
//!
|
||
|
|
//! \note This function will synchronize the CUDA stream returned by \p config.getProfileStream() before returning.
|
||
|
|
//!
|
||
|
|
//! \see INetworkDefinition, IBuilderConfig, IStreamWriter
|
||
|
|
//!
|
||
|
|
bool buildSerializedNetworkToStream(
|
||
|
|
INetworkDefinition& network, IBuilderConfig& config, IStreamWriter& writer) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->buildSerializedNetworkToStream(network, config, writer);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Extended form of buildSerializedNetwork that optionally permits getting the kernelText.
|
||
|
|
//!
|
||
|
|
//! Similar to two-argument form, except that if an engine with safe capability is successfully built
|
||
|
|
//! and there are kernels, sets kernelText to ..... Otherwise sets kernelText=nullptr.
|
||
|
|
//!
|
||
|
|
//! This function allows building and serialization of a network without creating an engine.
|
||
|
|
//!
|
||
|
|
//! \param network Network definition.
|
||
|
|
//! \param config Builder configuration.
|
||
|
|
//! \param kernelText A reference to a pointer to a IHostMemory object that will be set to the kernel CPP code text
|
||
|
|
//!
|
||
|
|
//! \return A pointer to a IHostMemory object that contains a serialized network.
|
||
|
|
//!
|
||
|
|
//! \note This function will synchronize the CUDA stream returned by \p config.getProfileStream() before returning.
|
||
|
|
//!
|
||
|
|
//! \see INetworkDefinition, IBuilderConfig, IHostMemory
|
||
|
|
//!
|
||
|
|
nvinfer1::IHostMemory* buildSerializedNetwork(
|
||
|
|
INetworkDefinition& network, IBuilderConfig& config, IHostMemory*& kernelText) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->buildSerializedNetworkWithKernelText(network, config, kernelText);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Builds a network for the given INetworkDefinition and IBuilderConfig.
|
||
|
|
//!
|
||
|
|
//! \param network Network definition.
|
||
|
|
//! \param config Builder configuration.
|
||
|
|
//!
|
||
|
|
//! \return A pointer to a ICudaEngine object that contains an engine.
|
||
|
|
//!
|
||
|
|
//! \note This function will synchronize the CUDA stream returned by \p config.getProfileStream() before returning.
|
||
|
|
//!
|
||
|
|
//! \note This function does not support \p BuilderFlag::kVERSION_COMPATIBLE.
|
||
|
|
//! Please use \p buildSerializedNetwork to get a version compatible engine.
|
||
|
|
//!
|
||
|
|
//! \see INetworkDefinition, IBuilderConfig, ICudaEngine
|
||
|
|
//!
|
||
|
|
nvinfer1::ICudaEngine* buildEngineWithConfig(INetworkDefinition& network, IBuilderConfig& config) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->buildEngineWithConfig(network, config);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Checks that a network is within the scope of the IBuilderConfig settings.
|
||
|
|
//!
|
||
|
|
//! \param network The network definition to check for configuration compliance.
|
||
|
|
//! \param config The configuration of the builder to use when checking \p network.
|
||
|
|
//!
|
||
|
|
//! Given an INetworkDefinition, \p network, and an IBuilderConfig, \p config, check if
|
||
|
|
//! the network falls within the constraints of the builder configuration based on the
|
||
|
|
//! EngineCapability, BuilderFlag, and DeviceType. If the network is within the constraints,
|
||
|
|
//! then the function returns true, and false if a violation occurs. This function reports
|
||
|
|
//! the conditions that are violated to the registered ErrorRecorder.
|
||
|
|
//!
|
||
|
|
//! \return True if network is within the scope of the restrictions specified by the builder config,
|
||
|
|
//! false otherwise.
|
||
|
|
//!
|
||
|
|
//! \note This function will synchronize the CUDA stream returned by \p config.getProfileStream() before returning.
|
||
|
|
//!
|
||
|
|
bool isNetworkSupported(INetworkDefinition const& network, IBuilderConfig const& config) const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->isNetworkSupported(network, config);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief get the logger with which the builder was created
|
||
|
|
//!
|
||
|
|
//! \return the logger
|
||
|
|
//!
|
||
|
|
ILogger* getLogger() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getLogger();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Set the maximum number of threads.
|
||
|
|
//!
|
||
|
|
//! \param maxThreads The maximum number of threads that can be used by the builder.
|
||
|
|
//!
|
||
|
|
//! \return True if successful, false otherwise.
|
||
|
|
//!
|
||
|
|
//! The default value is 1 and includes the current thread.
|
||
|
|
//! A value greater than 1 permits TensorRT to use multi-threaded algorithms.
|
||
|
|
//! A value less than 1 triggers a kINVALID_ARGUMENT error.
|
||
|
|
//!
|
||
|
|
bool setMaxThreads(int32_t maxThreads) noexcept
|
||
|
|
{
|
||
|
|
return mImpl->setMaxThreads(maxThreads);
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief get the maximum number of threads that can be used by the builder.
|
||
|
|
//!
|
||
|
|
//! Retrieves the maximum number of threads that can be used by the builder.
|
||
|
|
//!
|
||
|
|
//! \return The maximum number of threads that can be used by the builder.
|
||
|
|
//!
|
||
|
|
//! \see setMaxThreads()
|
||
|
|
//!
|
||
|
|
int32_t getMaxThreads() const noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getMaxThreads();
|
||
|
|
}
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief get the local plugin registry that can be used by the builder.
|
||
|
|
//!
|
||
|
|
//! \return The local plugin registry that can be used by the builder.
|
||
|
|
//!
|
||
|
|
IPluginRegistry& getPluginRegistry() noexcept
|
||
|
|
{
|
||
|
|
return mImpl->getPluginRegistry();
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
apiv::VBuilder* mImpl;
|
||
|
|
};
|
||
|
|
|
||
|
|
} // namespace nvinfer1
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! Internal C entry point for creating IBuilder.
|
||
|
|
//! @private
|
||
|
|
//!
|
||
|
|
extern "C" TENSORRTAPI void* createInferBuilder_INTERNAL(void* logger, int32_t version) noexcept;
|
||
|
|
|
||
|
|
namespace nvinfer1
|
||
|
|
{
|
||
|
|
namespace
|
||
|
|
{
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Create an instance of an IBuilder class.
|
||
|
|
//!
|
||
|
|
//! \param logger The logging class for the builder.
|
||
|
|
//!
|
||
|
|
//! unnamed namespace avoids linkage surprises when linking objects built with different versions of this header.
|
||
|
|
//!
|
||
|
|
inline IBuilder* createInferBuilder(ILogger& logger) noexcept
|
||
|
|
{
|
||
|
|
return static_cast<IBuilder*>(createInferBuilder_INTERNAL(&logger, NV_TENSORRT_VERSION));
|
||
|
|
}
|
||
|
|
|
||
|
|
} // namespace
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return the plugin registry for building a Standard engine, or nullptr if no registry exists.
|
||
|
|
//!
|
||
|
|
//! Also return nullptr if the input argument is not EngineCapability::kSTANDARD.
|
||
|
|
//! Engine capabilities EngineCapability::kSTANDARD and EngineCapability::kSAFETY have distinct plugin registries.
|
||
|
|
//! Use IPluginRegistry::registerCreator from the registry to register plugins.
|
||
|
|
//! Plugins registered in a registry associated with a specific engine capability are only available when
|
||
|
|
//! building engines with that engine capability.
|
||
|
|
//!
|
||
|
|
//! There is no plugin registry for EngineCapability::kDLA_STANDALONE.
|
||
|
|
//!
|
||
|
|
extern "C" TENSORRTAPI nvinfer1::IPluginRegistry* getBuilderPluginRegistry(
|
||
|
|
nvinfer1::EngineCapability capability) noexcept;
|
||
|
|
|
||
|
|
namespace safe
|
||
|
|
{
|
||
|
|
//! Forward declaration
|
||
|
|
class IPluginRegistry;
|
||
|
|
} // namespace safe
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief Return the plugin registry for building a Safety engine, or nullptr if no registry exists.
|
||
|
|
//!
|
||
|
|
//! Also return nullptr if the input argument is not EngineCapability::kSAFETY.
|
||
|
|
//! When building a Standard engine, use nvinfer1::getBuilderPluginRegistry().
|
||
|
|
//! Use safe::IPluginRegistry::registerCreator from the registry to register plugins.
|
||
|
|
//!
|
||
|
|
extern "C" TRT_DEPRECATED_API nvinfer1::safe::IPluginRegistry* getBuilderSafePluginRegistry(
|
||
|
|
nvinfer1::EngineCapability capability) noexcept;
|
||
|
|
|
||
|
|
} // namespace nvinfer1
|
||
|
|
|
||
|
|
#endif // NV_INFER_H
|