/* * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef NV_ONNX_PARSER_H #define NV_ONNX_PARSER_H #include "NvInfer.h" #include #include #include //! //! \file NvOnnxParser.h //! //! This is the API for the ONNX Parser //! #define NV_ONNX_PARSER_MAJOR 0 #define NV_ONNX_PARSER_MINOR 1 #define NV_ONNX_PARSER_PATCH 0 static constexpr int32_t NV_ONNX_PARSER_VERSION = ((NV_ONNX_PARSER_MAJOR * 10000) + (NV_ONNX_PARSER_MINOR * 100) + NV_ONNX_PARSER_PATCH); //! //! \typedef SubGraph_t //! //! \brief The data structure containing the parsing capability of //! a set of nodes in an ONNX graph. //! typedef std::pair, bool> SubGraph_t; //! //! \typedef SubGraphCollection_t //! //! \brief The data structure containing all SubGraph_t partitioned //! out of an ONNX graph. //! typedef std::vector SubGraphCollection_t; //! //! \namespace nvonnxparser //! //! \brief The TensorRT ONNX parser API namespace //! namespace nvonnxparser { template constexpr inline int32_t EnumMax() noexcept; //! //! \enum ErrorCode //! //! \brief The type of error that the parser or refitter may return //! enum class ErrorCode : int { kSUCCESS = 0, kINTERNAL_ERROR = 1, kMEM_ALLOC_FAILED = 2, kMODEL_DESERIALIZE_FAILED = 3, kINVALID_VALUE = 4, kINVALID_GRAPH = 5, kINVALID_NODE = 6, kUNSUPPORTED_GRAPH = 7, kUNSUPPORTED_NODE = 8, kUNSUPPORTED_NODE_ATTR = 9, kUNSUPPORTED_NODE_INPUT = 10, kUNSUPPORTED_NODE_DATATYPE = 11, kUNSUPPORTED_NODE_DYNAMIC = 12, kUNSUPPORTED_NODE_SHAPE = 13, kREFIT_FAILED = 14 }; //! //! Maximum number of flags in the ErrorCode enum. //! //! \see ErrorCode //! template <> constexpr inline int32_t EnumMax() noexcept { return 14; } //! //! \brief Represents one or more OnnxParserFlag values using binary OR //! operations, e.g., 1U << OnnxParserFlag::kNATIVE_INSTANCENORM //! //! \see IParser::setFlags() and IParser::getFlags() //! using OnnxParserFlags = uint32_t; enum class OnnxParserFlag : int32_t { //! Parse the ONNX model into the INetworkDefinition with the intention of using TensorRT's native layer //! implementation over the plugin implementation for InstanceNormalization nodes. //! This flag is required when building version-compatible or hardware-compatible engines. //! This flag is set to be ON by default. kNATIVE_INSTANCENORM = 0, //! Enable UINT8 as a quantization data type and asymmetric quantization with non-zero zero-point values //! in Quantize and Dequantize nodes. This flag is set to be OFF by default. //! The resulting engine must be built targeting DLA version >= 3.16. kENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA = 1, //! Parse the ONNX model with per-node validation for DLA. If the model is not fully supported by DLA, then //! parsing will fail. If this flag is set, isSubGraphSupported() will also return capability in the context of DLA //! support. When this flag is set, a valid IBuilderConfig must be provided to the parser via setBuilderConfig(). // This flag is set to be OFF by default. kREPORT_CAPABILITY_DLA = 2, //! Allow a loaded plugin with the same name as an ONNX operator type to override the default ONNX implementation, //! even if the plugin namespace attribute is not set. //! Useful for custom plugins that replace standard ONNX operators, such as alternative implementations for better //! performance. This flag is set to be OFF by default. kENABLE_PLUGIN_OVERRIDE = 3 }; //! //! Maximum number of flags in the OnnxParserFlag enum. //! //! \see OnnxParserFlag //! template <> constexpr inline int32_t EnumMax() noexcept { return 3; } //! //! \class IParserError //! //! \brief an object containing information about an error //! class IParserError { public: //! //!\brief the error code. //! virtual ErrorCode code() const = 0; //! //!\brief description of the error. //! virtual char const* desc() const = 0; //! //!\brief source file in which the error occurred. //! virtual char const* file() const = 0; //! //!\brief source line at which the error occurred. //! virtual int line() const = 0; //! //!\brief source function in which the error occurred. //! virtual char const* func() const = 0; //! //!\brief index of the ONNX model node in which the error occurred. //! virtual int node() const = 0; //! //!\brief name of the node in which the error occurred. //! virtual char const* nodeName() const = 0; //! //!\brief name of the node operation in which the error occurred. //! virtual char const* nodeOperator() const = 0; //! //!\brief A list of the local function names, from the top level down, constituting the current //! stack trace in which the error occurred. A top-level node that is not inside any //! local function would return a nullptr. //! virtual char const* const* localFunctionStack() const = 0; //! //!\brief The size of the stack of local functions at the point where the error occurred. //! A top-level node that is not inside any local function would correspond to // a stack size of 0. //! virtual int32_t localFunctionStackSize() const = 0; protected: virtual ~IParserError() {} }; //! //! \class IParser //! //! \brief an object for parsing ONNX models into a TensorRT network definition //! //! \warning If the ONNX model has a graph output with the same name as a graph input, //! the output will be renamed by prepending "__". //! //! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI. //! class IParser { public: //! //! \brief Parse a serialized ONNX model into the TensorRT network. //! This method has very limited diagnostics. If parsing the serialized model //! fails for any reason (e.g. unsupported IR version, unsupported opset, etc.) //! it the user responsibility to intercept and report the error. //! To obtain a better diagnostic, use the parseFromFile method below. //! //! \param serialized_onnx_model Pointer to the serialized ONNX model. Can be freed after this function returns. //! \param serialized_onnx_model_size Size of the serialized ONNX model //! in bytes //! \param model_path Absolute path to the model file for loading external weights if required //! \return true if the model was parsed successfully //! \see getNbErrors() getError() //! virtual bool parse( void const* serialized_onnx_model, size_t serialized_onnx_model_size, const char* model_path = nullptr) noexcept = 0; //! //! \brief Parse an onnx model file, which can be a binary protobuf or a text onnx model //! calls parse method inside. //! //! \param onnxModelFile name //! \param verbosity Level //! //! \return true if the model was parsed successfully //! //! virtual bool parseFromFile(const char* onnxModelFile, int verbosity) noexcept = 0; //! //! [DEPRECATED] Deprecated in TensorRT 10.1. See supportsModelV2. //! //! \brief Check whether TensorRT supports a particular ONNX model. //! If the function returns True, one can proceed to engine building //! without having to call \p parse or \p parseFromFile. //! //! \param serialized_onnx_model Pointer to the serialized ONNX model. Can be freed after this function returns. //! \param serialized_onnx_model_size Size of the serialized ONNX model //! in bytes //! \param sub_graph_collection Container to hold supported subgraphs //! \param model_path Absolute path to the model file for loading external weights if required //! \return true if the model is supported //! TRT_DEPRECATED virtual bool supportsModel(void const* serialized_onnx_model, size_t serialized_onnx_model_size, SubGraphCollection_t& sub_graph_collection, const char* model_path = nullptr) noexcept = 0; //! //! [DEPRECATED] Deprecated in TensorRT 10.13. See loadInitializer(). //! //!\brief Parse a serialized ONNX model into the TensorRT network //! with consideration of user provided weights //! //! \param serialized_onnx_model Pointer to the serialized ONNX model. Can be freed after this function returns. //! \param serialized_onnx_model_size Size of the serialized ONNX model //! in bytes //! \return true if the model was parsed successfully //! \see getNbErrors() getError() //! TRT_DEPRECATED virtual bool parseWithWeightDescriptors( void const* serialized_onnx_model, size_t serialized_onnx_model_size) noexcept = 0; //! //!\brief Returns whether the specified operator may be supported by the //! parser. //! //! Note that a result of true does not guarantee that the operator will be //! supported in all cases (i.e., this function may return false-positives). //! //! \param op_name The name of the ONNX operator to check for support //! virtual bool supportsOperator(const char* op_name) const noexcept = 0; //! //!\brief Get the number of errors that occurred during prior calls to //! \p parse //! //! \see getError() clearErrors() IParserError //! virtual int getNbErrors() const noexcept = 0; //! //!\brief Get an error that occurred during prior calls to \p parse //! //! \see getNbErrors() clearErrors() IParserError //! virtual IParserError const* getError(int index) const noexcept = 0; //! //!\brief Clear errors from prior calls to \p parse //! //! \see getNbErrors() getError() IParserError //! virtual void clearErrors() noexcept = 0; virtual ~IParser() noexcept = default; //! //! \brief Query the plugin libraries needed to implement operations used by the parser in a version-compatible //! engine. //! //! This provides a list of plugin libraries on the filesystem needed to implement operations //! in the parsed network. If you are building a version-compatible engine using this network, //! provide this list to IBuilderConfig::setPluginsToSerialize to serialize these plugins along //! with the version-compatible engine, or, if you want to ship these plugin libraries externally //! to the engine, ensure that IPluginRegistry::loadLibrary is used to load these libraries in the //! appropriate runtime before deserializing the corresponding engine. //! //! \param[out] nbPluginLibs Returns the number of plugin libraries in the array, or -1 if there was an error. //! \return Array of `nbPluginLibs` C-strings describing plugin library paths on the filesystem if nbPluginLibs > 0, //! or nullptr otherwise. This array is owned by the IParser, and the pointers in the array are only valid until //! the next call to parse(), supportsModel(), parseFromFile(), or parseWithWeightDescriptors(). //! virtual char const* const* getUsedVCPluginLibraries(int64_t& nbPluginLibs) const noexcept = 0; //! //! \brief Set the parser flags. //! //! The flags are listed in the OnnxParserFlag enum. //! //! \param OnnxParserFlags The flags used when parsing an ONNX model. //! //! \note This function will override the previous set flags, rather than bitwise ORing the new flag. //! //! \see getFlags() //! virtual void setFlags(OnnxParserFlags onnxParserFlags) noexcept = 0; //! //! \brief Get the parser flags. Defaults to 0. //! //! \return The parser flags as a bitmask. //! //! \see setFlags() //! virtual OnnxParserFlags getFlags() const noexcept = 0; //! //! \brief clear a parser flag. //! //! clears the parser flag from the enabled flags. //! //! \see setFlags() //! virtual void clearFlag(OnnxParserFlag onnxParserFlag) noexcept = 0; //! //! \brief Set a single parser flag. //! //! Add the input parser flag to the already enabled flags. //! //! \see setFlags() //! virtual void setFlag(OnnxParserFlag onnxParserFlag) noexcept = 0; //! //! \brief Returns true if the parser flag is set //! //! \see getFlags() //! //! \return True if flag is set, false if unset. //! virtual bool getFlag(OnnxParserFlag onnxParserFlag) const noexcept = 0; //! //!\brief Return the i-th output ITensor object for the ONNX layer "name". //! //! Return the i-th output ITensor object for the ONNX layer "name". //! If "name" is not found or i is out of range, return nullptr. //! In the case of multiple nodes sharing the same name this function will return //! the output tensors of the first instance of the node in the ONNX graph. //! //! \param name The name of the ONNX layer. //! //! \param i The index of the output. i must be in range [0, layer.num_outputs). //! virtual nvinfer1::ITensor const* getLayerOutputTensor(char const* name, int64_t i) noexcept = 0; //! //! \brief Check whether TensorRT supports a particular ONNX model. //! If the function returns True, one can proceed to engine building //! without having to call \p parse or \p parseFromFile. //! Results can be queried through \p getNbSubgraphs, \p isSubgraphSupported, //! \p getSubgraphNodes. //! //! \param serializedOnnxModel Pointer to the serialized ONNX model. Can be freed after this function returns. //! \param serializedOnnxModelSize Size of the serialized ONNX model in bytes //! \param modelPath Absolute path to the model file for loading external weights if required //! \return true if the model is supported //! virtual bool supportsModelV2( void const* serializedOnnxModel, size_t serializedOnnxModelSize, char const* modelPath = nullptr) noexcept = 0; //! //! \brief Get the number of subgraphs. Calling this function before calling \p supportsModelV2 results in undefined //! behavior. //! //! //! \return Number of subgraphs. //! virtual int64_t getNbSubgraphs() noexcept = 0; //! //! \brief Returns whether the subgraph is supported. Calling this function before calling \p supportsModelV2 //! results in undefined behavior. //! //! //! \param index Index of the subgraph. //! \return Whether the subgraph is supported. //! virtual bool isSubgraphSupported(int64_t const index) noexcept = 0; //! //! \brief Get the nodes of the specified subgraph. Calling this function before calling \p supportsModelV2 results //! in undefined behavior. //! //! //! \param index Index of the subgraph. //! \param subgraphLength Returns the length of the subgraph as reference. //! //! \return Pointer to the subgraph nodes array. This pointer is owned by the Parser. //! virtual int64_t* getSubgraphNodes(int64_t const index, int64_t& subgraphLength) noexcept = 0; //! //! \brief Load a serialized ONNX model into the parser. Unlike the parse(), parseFromFile(), or //! parseWithWeightDescriptors() functions, this function does not immediately convert the model into a TensorRT //! INetworkDefinition. Using this function allows users to provide their own initializers for the ONNX model //! through the loadInitializer() function. //! //! Only one model can be loaded at a time. Subsequent calls to loadModelProto() will result in an error. //! //! To begin the conversion of the model into a TensorRT INetworkDefinition, use parseModelProto(). //! //! \param serializedOnnxModel Pointer to the serialized ONNX model. Can be freed after this function returns. //! \param serializedOnnxModelSize Size of the serialized ONNX model in bytes. //! \param modelPath Absolute path to the model file for loading external weights if required. //! \return true if the model was loaded successfully //! \see getNbErrors() getError() //! virtual bool loadModelProto( void const* serializedOnnxModel, size_t serializedOnnxModelSize, char const* modelPath = nullptr) noexcept = 0; //! //! \brief Prompt the ONNX parser to load an initializer with user-provided binary data. //! The lifetime of the data must exceed the lifetime of the parser. //! //! All user-provided initializers must be provided prior to calling refitModelProto(). //! //! This function can be called multiple times to specify the names of multiple initializers. //! //! Calling this function with an initializer previously specified will overwrite the previous instance. //! //! //! This function will return false if initializer validation fails. Possible validation errors are: //! * This function was called prior to loadModelProto(). //! * The requested initializer was not found in the model. //! * The size of the data provided is different from the corresponding initializer in the model. //! //! \param name Name of the initializer. //! \param data Binary data containing the values of the initializer. //! \param size Size of the initializer in bytes. //! \return true if the initializer was loaded successfully //! \see loadModelProto() //! virtual bool loadInitializer(char const* name, void const* data, size_t size) noexcept = 0; //! \brief Begin the parsing and conversion process of the loaded ONNX model into a TensorRT INetworkDefinition. //! //! \return true if conversion was successful //! \see getNbErrors() getError() loadModelProto() loadModelProtoFromFile() //! virtual bool parseModelProto() noexcept = 0; //! //! \brief Set the BuilderConfig for the parser. //! //! \return true if the IBuilderConfig was set successfully, false otherwise. //! virtual bool setBuilderConfig(const nvinfer1::IBuilderConfig* const builderConfig) noexcept = 0; }; //! //! \class IParserRefitter //! //! \brief An interface designed to refit weights from an ONNX model. //! //! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI. //! class IParserRefitter { public: //! //! \brief Load a serialized ONNX model from memory and perform weight refit. //! //! \param serializedOnnxModel Pointer to the serialized ONNX model //! \param serializedOnnxModelSize Size of the serialized ONNX model //! in bytes //! \param modelPath Absolute path to the model file for loading external weights if required //! \return true if all the weights in the engine were refit successfully. //! //! The serialized ONNX model must be identical to the one used to generate the engine //! that will be refit. //! virtual bool refitFromBytes( void const* serializedOnnxModel, size_t serializedOnnxModelSize, char const* modelPath = nullptr) noexcept = 0; //! //! \brief Load and parse a ONNX model from disk and perform weight refit. //! //! \param onnxModelFile Path to the ONNX model to load from disk. //! //! \return true if the model was loaded successfully, and if all the weights in the engine were refit successfully. //! //! The provided ONNX model must be identical to the one used to generate the engine //! that will be refit. //! virtual bool refitFromFile(char const* onnxModelFile) noexcept = 0; //! //!\brief Get the number of errors that occurred during prior calls to \p refitFromBytes or \p refitFromFile //! //! \see getError() IParserError //! virtual int32_t getNbErrors() const noexcept = 0; //! //!\brief Get an error that occurred during prior calls to \p refitFromBytes or \p refitFromFile //! //! \see getNbErrors() IParserError //! virtual IParserError const* getError(int32_t index) const noexcept = 0; //! //!\brief Clear errors from prior calls to \p refitFromBytes or \p refitFromFile //! //! \see getNbErrors() getError() IParserError //! virtual void clearErrors() = 0; virtual ~IParserRefitter() noexcept = default; //! //! \brief Load a serialized ONNX model into the parser. Unlike the refit(), or refitFromFile() //! functions, this function does not immediately begin the refit process. Using this function //! allows users to provide their own initializers for the ONNX model through the loadInitializer() function. //! //! Only one model can be loaded at a time. Subsequent calls to loadModelProto() will result in an error. //! //! To begin the refit process, use refitModelProto(). //! //! \param serializedOnnxModel Pointer to the serialized ONNX model. Can be freed after this function returns. //! \param serializedOnnxModelSize Size of the serialized ONNX model in bytes. //! \param modelPath Absolute path to the model file for loading external weights if required. //! \return true if the model was loaded successfully //! \see getNbErrors() getError() //! virtual bool loadModelProto( void const* serializedOnnxModel, size_t serializedOnnxModelSize, char const* modelPath = nullptr) noexcept = 0; //! //! \brief Prompt the ONNX refitter to load an initializer with user-provided binary data. //! The lifetime of the data must exceed the lifetime of the refitter. //! //! All user-provided initializers must be provided prior to calling refitModelProto(). //! //! This function can be called multiple times to specify the names of multiple initializers. //! //! Calling this function with an initializer previously specified will overwrite the previous instance. //! //! This function will return false if initializer validation fails. Possible validation errors are: //! * This function was called prior to loadModelProto() //! * The requested initializer was not found in the model. //! * The size of the data provided is different from the corresponding initializer in the model. //! //! \param name Name of the initializer. //! \param data Binary data containing the values of the initializer. //! \param size Size of the initializer in bytes. //! \return true if the initializer was loaded successfully //! \see loadModelProto() //! virtual bool loadInitializer(char const* name, void const* data, size_t size) noexcept = 0; //! \brief Begin the refit process from the loaded ONNX model. //! //! \return true if refit was successful //! \see getNbErrors() getError() loadModelProto() //! virtual bool refitModelProto() noexcept = 0; }; } // namespace nvonnxparser extern "C" TENSORRTAPI void* createNvOnnxParser_INTERNAL(void* network, void* logger, int version) noexcept; extern "C" TENSORRTAPI void* createNvOnnxParserRefitter_INTERNAL( void* refitter, void* logger, int32_t version) noexcept; extern "C" TENSORRTAPI int getNvOnnxParserVersion() noexcept; namespace nvonnxparser { namespace { //! //! \brief Create a new parser object //! //! \param network The network definition that the parser will write to //! \param logger The logger to use //! \return a new parser object or NULL if an error occurred //! //! Any input dimensions that are constant should not be changed after parsing, //! because correctness of the translation may rely on those constants. //! Changing a dynamic input dimension, i.e. one that translates to -1 in //! TensorRT, to a constant is okay if the constant is consistent with the model. //! Each instance of the parser is designed to only parse one ONNX model once. //! //! \see IParser //! inline IParser* createParser(nvinfer1::INetworkDefinition& network, nvinfer1::ILogger& logger) noexcept { return static_cast(createNvOnnxParser_INTERNAL(&network, &logger, NV_ONNX_PARSER_VERSION)); } //! //! \brief Create a new ONNX refitter object //! //! \param refitter The Refitter object used to refit the model //! \param logger The logger to use //! \return a new ParserRefitter object or NULL if an error occurred //! //! \see IParserRefitter //! inline IParserRefitter* createParserRefitter(nvinfer1::IRefitter& refitter, nvinfer1::ILogger& logger) noexcept { return static_cast( createNvOnnxParserRefitter_INTERNAL(&refitter, &logger, NV_ONNX_PARSER_VERSION)); } } // namespace } // namespace nvonnxparser #endif // NV_ONNX_PARSER_H