205 lines
9.9 KiB
C
205 lines
9.9 KiB
C
|
|
/*
|
||
|
|
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||
|
|
* SPDX-License-Identifier: Apache-2.0
|
||
|
|
*
|
||
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
|
* you may not use this file except in compliance with the License.
|
||
|
|
* You may obtain a copy of the License at
|
||
|
|
*
|
||
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
*
|
||
|
|
* Unless required by applicable law or agreed to in writing, software
|
||
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
|
* See the License for the specific language governing permissions and
|
||
|
|
* limitations under the License.
|
||
|
|
*/
|
||
|
|
|
||
|
|
#ifndef NV_INFER_PLUGIN_UTILS_H
|
||
|
|
#define NV_INFER_PLUGIN_UTILS_H
|
||
|
|
|
||
|
|
#include "NvInferRuntimeCommon.h"
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \file NvInferPluginUtils.h
|
||
|
|
//!
|
||
|
|
//! This is the API for the Nvidia provided TensorRT plugin utilities.
|
||
|
|
//! It lists all the parameters utilized by the TensorRT plugins.
|
||
|
|
//!
|
||
|
|
|
||
|
|
namespace nvinfer1
|
||
|
|
{
|
||
|
|
namespace plugin
|
||
|
|
{
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \struct PriorBoxParameters
|
||
|
|
//!
|
||
|
|
//! \brief The PriorBox plugin layer generates the prior boxes of designated sizes and aspect ratios across all
|
||
|
|
//! dimensions (H x W).
|
||
|
|
//!
|
||
|
|
//! PriorBoxParameters defines a set of parameters for creating the PriorBox plugin layer.
|
||
|
|
//!
|
||
|
|
struct PriorBoxParameters
|
||
|
|
{
|
||
|
|
float *minSize; //!< Minimum box size in pixels. Can not be nullptr.
|
||
|
|
float *maxSize; //!< Maximum box size in pixels. Can be nullptr.
|
||
|
|
float *aspectRatios; //!< Aspect ratios of the boxes. Can be nullptr.
|
||
|
|
int32_t numMinSize; //!< Number of elements in minSize. Must be larger than 0.
|
||
|
|
int32_t numMaxSize; //!< Number of elements in maxSize. Can be 0 or same as numMinSize.
|
||
|
|
int32_t numAspectRatios; //!< Number of elements in aspectRatios. Can be 0.
|
||
|
|
bool flip; //!< If true, will flip each aspect ratio. For example,
|
||
|
|
//!< if there is an aspect ratio "r", the aspect ratio "1.0/r" will be generated as well.
|
||
|
|
bool clip; //!< If true, will clip the prior so that it is within [0,1].
|
||
|
|
float variance[4]; //!< Variance for adjusting the prior boxes.
|
||
|
|
int32_t imgH; //!< Image height. If 0, then the H dimension of the data tensor will be used.
|
||
|
|
int32_t imgW; //!< Image width. If 0, then the W dimension of the data tensor will be used.
|
||
|
|
float stepH; //!< Step in H. If 0, then (float)imgH/h will be used where h is the H dimension of the 1st input tensor.
|
||
|
|
float stepW; //!< Step in W. If 0, then (float)imgW/w will be used where w is the W dimension of the 1st input tensor.
|
||
|
|
float offset; //!< Offset to the top left corner of each cell.
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \struct RPROIParams
|
||
|
|
//!
|
||
|
|
//! \brief RPROIParams is used to create the RPROIPlugin instance.
|
||
|
|
//!
|
||
|
|
struct RPROIParams
|
||
|
|
{
|
||
|
|
int32_t poolingH; //!< Height of the output in pixels after ROI pooling on feature map.
|
||
|
|
int32_t poolingW; //!< Width of the output in pixels after ROI pooling on feature map.
|
||
|
|
int32_t featureStride; //!< Feature stride; ratio of input image size to feature map size.
|
||
|
|
//!< Assuming that max pooling layers in the neural network use square filters.
|
||
|
|
int32_t preNmsTop; //!< Number of proposals to keep before applying NMS.
|
||
|
|
int32_t nmsMaxOut; //!< Number of remaining proposals after applying NMS.
|
||
|
|
int32_t anchorsRatioCount; //!< Number of anchor box ratios.
|
||
|
|
int32_t anchorsScaleCount; //!< Number of anchor box scales.
|
||
|
|
float iouThreshold; //!< IoU (Intersection over Union) threshold used for the NMS step.
|
||
|
|
float minBoxSize; //!< Minimum allowed bounding box size before scaling, used for anchor box calculation.
|
||
|
|
float spatialScale; //!< Spatial scale between the input image and the last feature map.
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \struct GridAnchorParameters
|
||
|
|
//!
|
||
|
|
//! \brief The Anchor Generator plugin layer generates the prior boxes of designated sizes and aspect ratios across all dimensions (H x W).
|
||
|
|
//! GridAnchorParameters defines a set of parameters for creating the plugin layer for all feature maps.
|
||
|
|
//!
|
||
|
|
struct GridAnchorParameters
|
||
|
|
{
|
||
|
|
float minSize; //!< Scale of anchors corresponding to finest resolution.
|
||
|
|
float maxSize; //!< Scale of anchors corresponding to coarsest resolution.
|
||
|
|
float* aspectRatios; //!< List of aspect ratios to place on each grid point.
|
||
|
|
int32_t numAspectRatios; //!< Number of elements in aspectRatios.
|
||
|
|
int32_t H; //!< Height of feature map to generate anchors for.
|
||
|
|
int32_t W; //!< Width of feature map to generate anchors for.
|
||
|
|
float variance[4]; //!< Variance for adjusting the prior boxes.
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \enum CodeTypeSSD
|
||
|
|
//!
|
||
|
|
//! \brief The type of encoding used for decoding the bounding boxes and loc_data.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.0. DetectionOutput plugin is deprecated.
|
||
|
|
//!
|
||
|
|
enum class CodeTypeSSD : int32_t
|
||
|
|
{
|
||
|
|
CORNER TRT_DEPRECATED_ENUM = 0, //!< Use box corners.
|
||
|
|
CENTER_SIZE TRT_DEPRECATED_ENUM = 1, //!< Use box centers and size.
|
||
|
|
CORNER_SIZE TRT_DEPRECATED_ENUM = 2, //!< Use box centers and size.
|
||
|
|
TF_CENTER TRT_DEPRECATED_ENUM = 3 //!< Use box centers and size but flip x and y coordinates.
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \struct DetectionOutputParameters
|
||
|
|
//!
|
||
|
|
//! \brief The DetectionOutput plugin layer generates the detection output
|
||
|
|
//! based on location and confidence predictions by doing non maximum suppression.
|
||
|
|
//!
|
||
|
|
//! This plugin first decodes the bounding boxes based on the anchors generated.
|
||
|
|
//! It then performs non_max_suppression on the decoded bounding boxes.
|
||
|
|
//! DetectionOutputParameters defines a set of parameters for creating the DetectionOutput plugin layer.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.0. DetectionOutput plugin is deprecated.
|
||
|
|
//!
|
||
|
|
struct TRT_DEPRECATED DetectionOutputParameters
|
||
|
|
{
|
||
|
|
bool shareLocation; //!< If true, bounding box are shared among different classes.
|
||
|
|
bool varianceEncodedInTarget; //!< If true, variance is encoded in target.
|
||
|
|
//!< Otherwise we need to adjust the predicted offset accordingly.
|
||
|
|
int32_t backgroundLabelId; //!< Background label ID. If there is no background class, set it as -1.
|
||
|
|
int32_t numClasses; //!< Number of classes to be predicted.
|
||
|
|
int32_t topK; //!< Number of boxes per image with top confidence scores that are fed
|
||
|
|
//!< into the NMS algorithm.
|
||
|
|
int32_t keepTopK; //!< Number of total bounding boxes to be kept per image after NMS step.
|
||
|
|
float confidenceThreshold; //!< Only consider detections whose confidences are larger than a threshold.
|
||
|
|
float nmsThreshold; //!< Threshold to be used in NMS.
|
||
|
|
CodeTypeSSD codeType; //!< Type of coding method for bbox.
|
||
|
|
int32_t inputOrder[3]; //!< Specifies the order of inputs {loc_data, conf_data, priorbox_data}.
|
||
|
|
bool confSigmoid; //!< Set to true to calculate sigmoid of confidence scores.
|
||
|
|
bool isNormalized; //!< Set to true if bounding box data is normalized by the network.
|
||
|
|
bool isBatchAgnostic{true}; //!< Defaults to true. Set to false if prior boxes are unique per batch.
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief When performing yolo9000, softmaxTree is helping to do softmax on confidence scores,
|
||
|
|
//! for element to get the precise classification through word-tree structured classification definition.
|
||
|
|
//!
|
||
|
|
struct softmaxTree
|
||
|
|
{
|
||
|
|
int32_t* leaf;
|
||
|
|
int32_t n;
|
||
|
|
int32_t* parent;
|
||
|
|
int32_t* child;
|
||
|
|
int32_t* group;
|
||
|
|
char** name;
|
||
|
|
int32_t groups;
|
||
|
|
int32_t* groupSize;
|
||
|
|
int32_t* groupOffset;
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief The Region plugin layer performs region proposal calculation.
|
||
|
|
//!
|
||
|
|
//! Generate 5 bounding boxes per cell (for yolo9000, generate 3 bounding boxes per cell).
|
||
|
|
//! For each box, calculating its probabilities of objects detections from 80 pre-defined classifications
|
||
|
|
//! (yolo9000 has 9418 pre-defined classifications, and these 9418 items are organized as work-tree structure).
|
||
|
|
//! RegionParameters defines a set of parameters for creating the Region plugin layer.
|
||
|
|
//!
|
||
|
|
struct RegionParameters
|
||
|
|
{
|
||
|
|
int32_t num; //!< Number of predicted bounding box for each grid cell.
|
||
|
|
int32_t coords; //!< Number of coordinates for a bounding box.
|
||
|
|
int32_t classes; //!< Number of classifications to be predicted.
|
||
|
|
softmaxTree* smTree; //!< Helping structure to do softmax on confidence scores.
|
||
|
|
};
|
||
|
|
|
||
|
|
//!
|
||
|
|
//! \brief The NMSParameters are used by the BatchedNMSPlugin for performing
|
||
|
|
//! the non_max_suppression operation over boxes for object detection networks.
|
||
|
|
//!
|
||
|
|
//! \deprecated Deprecated in TensorRT 10.0. BatchedNMSPlugin plugin is deprecated.
|
||
|
|
//!
|
||
|
|
struct TRT_DEPRECATED NMSParameters
|
||
|
|
{
|
||
|
|
bool shareLocation; //!< If set to true, the boxes inputs are shared across all classes.
|
||
|
|
//!< If set to false, the boxes input should account for per class box data.
|
||
|
|
int32_t backgroundLabelId; //!< Label ID for the background class.
|
||
|
|
//!< If there is no background class, set it as -1
|
||
|
|
int32_t numClasses; //!< Number of classes in the network.
|
||
|
|
int32_t topK; //!< Number of bounding boxes to be fed into the NMS step.
|
||
|
|
int32_t keepTopK; //!< Number of total bounding boxes to be kept per image after NMS step.
|
||
|
|
//!< Should be less than or equal to the topK value.
|
||
|
|
float scoreThreshold; //!< Scalar threshold for score (low scoring boxes are removed).
|
||
|
|
float iouThreshold; //!< A scalar threshold for IOU (new boxes that have high IOU overlap
|
||
|
|
//!< with previously selected boxes are removed).
|
||
|
|
bool isNormalized; //!< Set to false, if the box coordinates are not normalized,
|
||
|
|
//!< i.e. not in the range [0,1]. Defaults to false.
|
||
|
|
};
|
||
|
|
|
||
|
|
} // namespace plugin
|
||
|
|
} // namespace nvinfer1
|
||
|
|
|
||
|
|
#endif // NV_INFER_PLUGIN_UTILS_H
|