Files
ANSCORE/modules/ANSODEngine/SCRFDFaceDetector.cpp

1316 lines
44 KiB
C++
Raw Permalink Normal View History

2026-03-28 16:54:11 +11:00
#include "SCRFDFaceDetector.h"
#include "ANSGpuFrameRegistry.h"
#include "NV12PreprocessHelper.h" // tl_currentGpuFrame()
#include "Utility.h"
#include <chrono>
//#define FNS_DEBUG
namespace ANSCENTER {
// Initialization function with memory leak handling
bool ANSSCRFDFD::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath,
const std::string& modelZipPassword, std::string& labelMap) {
// Clean up existing resources before reinitialization
const bool engineAlreadyLoaded = _isInitialized && m_trtEngine != nullptr;
if (!engineAlreadyLoaded) Destroy();
// Call base class Initialize
bool result = ANSFDBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
if (!result) return false;
labelMap = "Face";
_licenseValid = true;
try {
_modelConfig = modelConfig;
_modelConfig.inpHeight = 640;
_modelConfig.inpWidth = 640;
_modelConfig.modelType = ModelType::FACEDETECT;
_modelConfig.detectionType = DetectionType::FACEDETECTOR;
std::string onnxfile = CreateFilePath(_modelFolder, "scrfdface.onnx");
if (!std::filesystem::exists(onnxfile)) {
this->_logger.LogError("ANSSCRFDFD::Initialize. Model scrfdface.onnx file does not exist", onnxfile, __FILE__, __LINE__);
return false;
}
_modelFilePath = onnxfile;
// Initialize TensorRT via shared pool
if (!m_trtEngine) {
m_options.precision = ANSCENTER::Precision::FP16;
m_options.optBatchSize = _modelConfig.gpuOptBatchSize;
m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize;
m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
m_options.maxInputHeight = _modelConfig.maxInputHeight;
m_options.minInputHeight = _modelConfig.minInputHeight;
m_options.optInputHeight = _modelConfig.optInputHeight;
m_options.maxInputWidth = _modelConfig.maxInputWidth;
m_options.minInputWidth = _modelConfig.minInputWidth;
m_options.optInputWidth = _modelConfig.optInputWidth;
m_options.calibrationBatchSize = 1;
m_poolKey = { _modelFilePath,
static_cast<int>(m_options.precision),
m_options.maxBatchSize };
m_trtEngine = EnginePoolManager<float>::instance().acquire(
m_poolKey, m_options, _modelFilePath,
SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu);
m_usingSharedPool = (m_trtEngine != nullptr);
if (!m_trtEngine) {
this->_logger.LogError("ANSSCRFDFD::Initialize. Unable to build or load TensorRT engine.", _modelFilePath, __FILE__, __LINE__);
return false;
}
}
fmc = 3;
feat_stride_fpn = { 8, 16, 32 };
num_anchors = 2;
use_kps = true;
_movementObjects.clear();
_retainDetectedFaces = 0;
_isInitialized = true;
return true;
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSSCRFDFD::Initialize", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSSCRFDFD::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) {
try {
// We need to get the _modelFolder
bool result = ANSFDBase::LoadModel(modelZipFilePath, modelZipPassword);
if (!result) return false;
const bool engineAlreadyLoaded = _isInitialized && m_trtEngine != nullptr;
_modelConfig.modelType = ModelType::FACEDETECT;
_modelConfig.detectionType = DetectionType::FACEDETECTOR;
_modelConfig.inpHeight = 640;
_modelConfig.inpWidth = 640;
_movementObjects.clear();
_retainDetectedFaces = 0;
std::string onnxfile = CreateFilePath(_modelFolder, "scrfdface.onnx");
if (!std::filesystem::exists(onnxfile)) {
this->_logger.LogError("ANSSCRFDFD::Initialize. Model scrfdface.onnx file does not exist", onnxfile, __FILE__, __LINE__);
return false;
}
_modelFilePath = onnxfile;
if (!m_trtEngine) {
m_options.precision = ANSCENTER::Precision::FP16;
m_options.optBatchSize = _modelConfig.gpuOptBatchSize;
m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize;
m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
m_options.maxInputHeight = _modelConfig.maxInputHeight;
m_options.minInputHeight = _modelConfig.minInputHeight;
m_options.optInputHeight = _modelConfig.optInputHeight;
m_options.maxInputWidth = _modelConfig.maxInputWidth;
m_options.minInputWidth = _modelConfig.minInputWidth;
m_options.optInputWidth = _modelConfig.optInputWidth;
m_poolKey = { _modelFilePath,
static_cast<int>(m_options.precision),
m_options.maxBatchSize };
m_trtEngine = EnginePoolManager<float>::instance().acquire(
m_poolKey, m_options, _modelFilePath,
SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu);
m_usingSharedPool = (m_trtEngine != nullptr);
if (!m_trtEngine) {
this->_logger.LogError("ANSSCRFDFD::LoadModel. Unable to build or load TensorRT engine.", _modelFilePath, __FILE__, __LINE__);
return false;
}
}
fmc = 3;
feat_stride_fpn = { 8, 16, 32 };
num_anchors = 2;
use_kps = true;
_movementObjects.clear();
_retainDetectedFaces = 0;
_isInitialized = true;
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSSCRFDFD::LoadModel", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSSCRFDFD::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) {
try {
// We need to get the _modelFolder
bool result = ANSFDBase::LoadModelFromFolder(licenseKey, modelConfig, modelName, className, modelFolder, labelMap);
if (!result) return false;
std::string _modelName = modelName;
if (_modelName.empty()) {
_modelName = "scrfdface";
}
_modelConfig.inpHeight = 640;
_modelConfig.inpWidth = 640;
_movementObjects.clear();
_retainDetectedFaces = 0;
std::string modelFullName = _modelName + ".onnx";
std::string onnxfile = CreateFilePath(_modelFolder, modelFullName);
if (std::filesystem::exists(onnxfile)) {
_modelFilePath = onnxfile;
this->_logger.LogDebug("ANSSCRFDFD::LoadModel. Loading scrfdface weight", _modelFilePath, __FILE__, __LINE__);
}
else {
this->_logger.LogError("ANSSCRFDFD::LoadModel. Model scrfdface.onnx file is not exist", _modelFilePath, __FILE__, __LINE__);
return false;
}
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSSCRFDFD::LoadModel", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSSCRFDFD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (!FileExist(_modelFilePath)) {
optimizedModelFolder = "";
return false;
}
optimizedModelFolder = GetParentFolder(_modelFilePath);
// Check if the engine already exists to avoid reinitializing
if (!m_trtEngine) {
// Fixed batch size of 1 for this model
m_options.optBatchSize = _modelConfig.gpuOptBatchSize;
m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize;
m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
m_options.maxInputHeight = _modelConfig.maxInputHeight;
m_options.minInputHeight = _modelConfig.minInputHeight;
m_options.optInputHeight = _modelConfig.optInputHeight;
m_options.maxInputWidth = _modelConfig.maxInputWidth;
m_options.minInputWidth = _modelConfig.minInputWidth;
m_options.optInputWidth = _modelConfig.optInputWidth;
m_options.engineFileDir = optimizedModelFolder;
// Use FP16 or FP32 precision based on the input flag
m_options.precision = Precision::FP16;
// Create the TensorRT inference engine
m_trtEngine = std::make_shared<Engine<float>>(m_options);
}
// Build the TensorRT engine
auto succ = m_trtEngine->buildWithRetry(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE);
if (!succ) {
const std::string errMsg =
"Error: Unable to build the TensorRT engine. "
"Try increasing TensorRT log severity to kVERBOSE.";
this->_logger.LogError("ANSSCRFDFD::OptimizeModel", errMsg, __FILE__, __LINE__);
return false;
}
std::string optimizedFaceAttributeModelFolder;
bool result = ANSFDBase::OptimizeModel(fp16, optimizedFaceAttributeModelFolder);
return result;
}
std::vector<Object> ANSSCRFDFD::RunInference(const cv::Mat& input, bool useDynamicImage, bool validateFace, bool facelivenessCheck) {
if (facelivenessCheck) {
std::vector<Object> rawFaceResults = Inference(input, "CustomCam", useDynamicImage, validateFace);
std::vector<Object> facesWithLivenessResults = ValidateLivenessFaces(input, rawFaceResults, "CustomCam");
return facesWithLivenessResults;
}
else {
return Inference(input, "CustomCam", useDynamicImage, validateFace);
}
}
std::vector<Object> ANSSCRFDFD::RunInference(const cv::Mat& input, const std::string& camera_id, bool useDynamicImage, bool validateFace, bool facelivenessCheck) {
if (facelivenessCheck) {
std::vector<Object> rawFaceResults = Inference(input, camera_id, useDynamicImage, validateFace);
std::vector<Object> facesWithLivenessResults = ValidateLivenessFaces(input, rawFaceResults, camera_id);
return facesWithLivenessResults;
}
else {
return Inference(input, camera_id, useDynamicImage, validateFace);
}
}
std::vector<Object> ANSSCRFDFD::Inference(const cv::Mat& input,
const std::string& camera_id,
bool useDynamicImage,
bool validateFace)
{
2026-04-13 20:38:40 +10:00
if (_modelLoading.load()) return {};
2026-03-28 16:54:11 +11:00
// Phase 1: Validation + image preprocessing (brief lock)
cv::Mat im;
bool croppedFace;
float scoreThreshold;
{
2026-04-13 20:38:40 +10:00
auto lock = TryLockWithTimeout("ANSSCRFDFD::Inference");
if (!lock.owns_lock()) return {};
2026-03-28 16:54:11 +11:00
if (!_licenseValid) {
_logger.LogError("ANSSCRFDFD::Inference", "Invalid license", __FILE__, __LINE__);
return {};
}
if (!_isInitialized) {
_logger.LogError("ANSSCRFDFD::Inference", "Model is not initialized", __FILE__, __LINE__);
return {};
}
if (input.empty() || input.cols < 10 || input.rows < 10) {
_logger.LogError("ANSSCRFDFD::Inference", "Invalid input image", __FILE__, __LINE__);
return {};
}
croppedFace = !useDynamicImage;
scoreThreshold = _modelConfig.detectionScoreThreshold;
if (croppedFace) {
constexpr int border = 200;
cv::copyMakeBorder(input, im, border, border, border, border, cv::BORDER_REPLICATE);
if (im.rows > 1280) {
const float aspectRatio = static_cast<float>(im.cols) / static_cast<float>(im.rows);
constexpr int newHeight = 1280;
const int newWidth = static_cast<int>(newHeight * aspectRatio);
cv::resize(im, im, cv::Size(newWidth, newHeight));
}
}
else {
im = input;
}
}
// Phase 2: Detect faces (mutex released — Detect manages its own brief locks around GPU inference)
std::vector<Object> detectedFaces;
try {
detectedFaces = Detect(im);
}
catch (const std::exception& e) {
_logger.LogFatal("ANSSCRFDFD::Inference", e.what(), __FILE__, __LINE__);
return {};
}
catch (...) {
_logger.LogFatal("ANSSCRFDFD::Inference", "Unknown exception occurred", __FILE__, __LINE__);
return {};
}
if (detectedFaces.empty()) {
return {};
}
// Phase 3: Process detected faces (operates on per-call local data — no shared state)
const int originalWidth = croppedFace ? input.cols : 0;
const int originalHeight = croppedFace ? input.rows : 0;
constexpr int border = 200;
constexpr float borderF = 200.0f;
// NV12 affine warp: precompute scale factors (display-res → full-res NV12)
float nv12ScaleX = 1.f, nv12ScaleY = 1.f;
int nv12FullW = 0, nv12FullH = 0;
bool nv12AffineAvailable = false;
const int inferenceGpu = m_trtEngine ? m_trtEngine->getPreferredDeviceIndex() : 0;
if (!croppedFace && m_nv12Helper.isCudaContextHealthy(_logger, "SCRFD")) {
auto* gpuData = tl_currentGpuFrame();
if (gpuData && gpuData->pixelFormat == 23 && gpuData->width > 0 && gpuData->height > 0) {
nv12ScaleX = static_cast<float>(gpuData->width) / im.cols;
nv12ScaleY = static_cast<float>(gpuData->height) / im.rows;
nv12FullW = gpuData->width;
nv12FullH = gpuData->height;
nv12AffineAvailable = true;
}
}
std::vector<Object> output;
output.reserve(detectedFaces.size());
for (auto& face : detectedFaces) {
if (face.confidence <= scoreThreshold) {
continue;
}
if (validateFace && !isValidFace(face.polygon, face.box, 27)) {
continue;
}
// Get face mask — try NV12 affine warp first, fall back to CPU warpAffine
cv::Mat mask;
cv::cuda::GpuMat gpuMask;
if (nv12AffineAvailable && face.polygon.size() == 5) {
// Compute affine matrix on CPU (fast ~0.01ms)
static const std::vector<cv::Point2f> kTemplate112 = []() {
const std::vector<cv::Point2f> face_template = {
{0.34191607f, 0.46157411f}, {0.65653393f, 0.45983393f},
{0.50022500f, 0.64050536f}, {0.37097589f, 0.82469196f},
{0.63151696f, 0.82325089f}
};
std::vector<cv::Point2f> tpl;
tpl.reserve(5);
for (const auto& pt : face_template)
tpl.emplace_back(pt.x * 112.0f, pt.y * 112.0f);
return tpl;
}();
cv::Mat affineMatrix = cv::estimateAffinePartial2D(
face.polygon, kTemplate112);
if (!affineMatrix.empty()) {
auto nv12Face = m_nv12Helper.tryNV12AffineWarp(
im, inferenceGpu, affineMatrix, 112, 112,
nv12ScaleX, nv12ScaleY, _logger, "SCRFD");
if (nv12Face.succeeded) {
// Log first successful NV12 affine warp (once per instance)
static bool s_nv12AffineLogged = false;
if (!s_nv12AffineLogged) {
s_nv12AffineLogged = true;
_logger.LogInfo("ANSSCRFDFD::Inference",
"NV12 affine warp ACTIVE: face aligned from " +
std::to_string(nv12FullW) + "x" + std::to_string(nv12FullH) +
" NV12 -> 112x112 BGR (display=" +
std::to_string(im.cols) + "x" + std::to_string(im.rows) +
" scaleX=" + std::to_string(nv12ScaleX) +
" scaleY=" + std::to_string(nv12ScaleY) + ")",
__FILE__, __LINE__);
}
mask = std::move(nv12Face.alignedFaceBGR);
gpuMask = std::move(nv12Face.gpuAlignedFace);
}
}
}
// CPU fallback
if (mask.empty()) {
mask = Preprocess(im, face.polygon, im);
}
if (mask.empty()) {
_logger.LogError("ANSSCRFDFD::Inference", "Cannot get mask image", __FILE__, __LINE__);
continue;
}
// Build result object
Object result;
result.classId = 0;
result.className = "Face";
result.confidence = face.confidence;
result.cameraId = camera_id;
result.polygon = std::move(face.polygon);
result.mask = std::move(mask);
result.gpuMask = std::move(gpuMask);
if (croppedFace) {
// Adjust coordinates for border offset
const int x1_new = std::max(0, face.box.x - border);
const int y1_new = std::max(0, face.box.y - border);
const int x2_new = std::min(originalWidth, face.box.x + face.box.width - border);
const int y2_new = std::min(originalHeight, face.box.y + face.box.height - border);
result.box = cv::Rect(x1_new, y1_new,
std::max(0, x2_new - x1_new),
std::max(0, y2_new - y1_new));
result.kps.reserve(face.kps.size());
for (const auto& pt : face.kps) {
result.kps.emplace_back(pt - borderF);
}
}
else {
result.box = face.box;
result.kps = std::move(face.kps);
}
output.push_back(std::move(result));
}
return output;
}
std::vector<Object> ANSSCRFDFD::InferenceDynamic(const cv::Mat& input, const std::string& camera_id) {
2026-04-13 20:38:40 +10:00
if (_modelLoading.load()) return {};
auto lock = TryLockWithTimeout("ANSSCRFDFD::InferenceDynamic");
if (!lock.owns_lock()) return {};
2026-03-28 16:54:11 +11:00
std::vector<Object> output;
try {
if (!_licenseValid) {
_logger.LogError("ANSSCRFDFD::Inference", "Invalid license", __FILE__, __LINE__);
return output;
}
if (!_isInitialized) {
_logger.LogError("ANSSCRFDFD::Inference", "Model is not initialized", __FILE__, __LINE__);
return output;
}
if (input.empty() || input.cols < 10 || input.rows < 10) {
_logger.LogError("ANSSCRFDFD::Inference", "Invalid input image", __FILE__, __LINE__);
return output;
}
bool croppedFace = (input.cols <= 300 || input.rows <= 300);
cv::Mat im;
try {
if (croppedFace) {
cv::copyMakeBorder(input, im, 200, 200, 200, 200, cv::BORDER_REPLICATE);
}
else {
im = input.clone();
}
}
catch (const std::exception& e) {
_logger.LogError("ANSSCRFDFD::Inference", std::string("copyMakeBorder failed: ") + e.what(), __FILE__, __LINE__);
return output;
}
const int originalWidth = input.cols;
const int originalHeight = input.rows;
std::vector<ImageSection> sections = createSlideScreens(im);
int lowestPriority = getLowestPriorityRegion();
if ((_currentPriority > lowestPriority) || (_currentPriority == 0)) {
_currentPriority = getHighestPriorityRegion();
}
else {
_currentPriority++;
}
cv::Rect regionByPriority = getRegionByPriority(_currentPriority);
_detectedArea = regionByPriority;
#ifdef FNS_DEBUG
cv::Mat draw = input.clone();
cv::rectangle(draw, _detectedArea, cv::Scalar(0, 0, 255), 2);
#endif
std::vector<Object> filteredFaceObjects;
if (_detectedArea.width > 50 && _detectedArea.height > 50) {
try {
cv::Mat activeFrame = im(_detectedArea).clone();
std::vector<Object> rawDetections = Detect(activeFrame);
filteredFaceObjects = AdjustDetectedBoundingBoxes(rawDetections, _detectedArea, im.size(), 0.9);
#ifdef FNS_DEBUG
cv::imshow("Active Area", activeFrame);
cv::waitKey(1);
#endif
}
catch (const std::exception& e) {
_logger.LogError("ANSSCRFDFD::Inference", std::string("Detect() failed: ") + e.what(), __FILE__, __LINE__);
return output;
}
}
for (const auto& face : filteredFaceObjects) {
try {
if (face.confidence < _modelConfig.detectionScoreThreshold)
continue;
#ifdef FNS_DEBUG
// draw landmarks
for (cv::Point2f point : face.polygon)
{
cv::circle(draw, cv::Point(point.x + _detectedArea.x, point.y + _detectedArea.y), 2, cv::Scalar(0, 255, 0), -1);
}
#endif
if (!isValidFace(face.polygon, face.box, 27, _detectedArea.x, _detectedArea.y))
continue;
Object result;
int x_min = face.box.x;
int y_min = face.box.y;
int x_max = x_min + face.box.width;
int y_max = y_min + face.box.height;
if (croppedFace) {
x_min = std::max(0, x_min - 200);
y_min = std::max(0, y_min - 200);
x_max = std::min(originalWidth, x_max - 200);
y_max = std::min(originalHeight, y_max - 200);
}
int width_half = std::abs((x_max - x_min) / 2);
int height_half = std::abs((y_max - y_min) / 2);
int xc = x_min + width_half;
int yc = y_min + height_half;
int c = std::max(width_half, height_half);
int x1_new = std::max(0, xc - c);
int y1_new = std::max(0, yc - c);
int x2_new = std::min(originalWidth, xc + c);
int y2_new = std::min(originalHeight, yc + c);
result.classId = 0;
result.className = "Face";
result.confidence = face.confidence;
result.box = cv::Rect(x1_new, y1_new, x2_new - x1_new, y2_new - y1_new);
result.kps = face.kps;
result.cameraId = camera_id;
#ifdef FNS_DEBUG
cv::rectangle(draw, result.box, cv::Scalar(0, 0, 255), 2);
#endif
try {
result.mask = GetCroppedFaceScale(im, x1_new, y1_new, x2_new, y2_new, 112);
}
catch (const std::exception& e) {
_logger.LogError("ANSSCRFDFD::Inference", std::string("GetCroppedFaceScale failed: ") + e.what(), __FILE__, __LINE__);
continue;
}
if (!result.mask.empty()) {
output.push_back(result);
}
}
catch (const std::exception& e) {
_logger.LogError("ANSSCRFDFD::Inference", std::string("Processing one face failed: ") + e.what(), __FILE__, __LINE__);
continue;
}
}
#ifdef FNS_DEBUG
cv::resize(draw, draw, cv::Size(1920, 1080));
cv::imshow("Detected Areas", draw);
cv::waitKey(1);
draw.release();
#endif
return output;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSSCRFDFD::TensorRTInference", e.what(), __FILE__, __LINE__);
}
catch (...) {
_logger.LogFatal("ANSSCRFDFD::TensorRTInference", "Unknown exception occurred", __FILE__, __LINE__);
}
return output;
}
std::vector<Object> ANSSCRFDFD::Detect(const cv::Mat& input)
{
// Phase 1: Validation + engine dims (brief lock)
int net_h, net_w;
float imgHeight, imgWidth;
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (input.empty() || input.cols < 10 || input.rows < 10) {
this->_logger.LogError("ANSSCRFDFD::Detect", "Invalid input image", __FILE__, __LINE__);
return {};
}
if (!m_trtEngine) {
this->_logger.LogFatal("ANSSCRFDFD::Detect", "TensorRT engine not initialized", __FILE__, __LINE__);
return {};
}
imgHeight = static_cast<float>(input.rows);
imgWidth = static_cast<float>(input.cols);
// Get and validate expected input dims
auto inputDims = m_trtEngine->getInputDims();
if (inputDims.empty() || inputDims[0].nbDims < 3) {
this->_logger.LogFatal("ANSSCRFDFD::Detect", "Invalid input dimensions", __FILE__, __LINE__);
return {};
}
net_h = inputDims[0].d[1];
net_w = inputDims[0].d[2];
// Optional check against configured INPUT_H/INPUT_W
if (net_h != INPUT_H || net_w != INPUT_W) {
this->_logger.LogFatal(
"ANSSCRFDFD::Detect",
"Engine input dims mismatch with configured INPUT_H/INPUT_W",
__FILE__, __LINE__
);
return {};
}
}
// Compute scale and padding (fully local math — no lock needed)
const float w_r = static_cast<float>(net_w) / imgWidth;
const float h_r = static_cast<float>(net_h) / imgHeight;
const float r = std::min(w_r, h_r);
const int new_unpad_w = static_cast<int>(imgWidth * r);
const int new_unpad_h = static_cast<int>(imgHeight * r);
const int pad_w = net_w - new_unpad_w; // >= 0
const int pad_h = net_h - new_unpad_h; // >= 0
const int dw = pad_w / 2;
const int dh = pad_h / 2;
SCRFDScaleParams scale_params;
scale_params.ratio = r;
scale_params.dw = dw;
scale_params.dh = dh;
scale_params.flag = true;
// Phase 2: CUDA preprocessing + inference (mutex released — pool dispatches to idle GPU slot)
std::vector<std::vector<cv::cuda::GpuMat>> inputs;
bool usedNV12 = false;
try {
// Clear any sticky CUDA error from transient graph-capture failures
cudaError_t priorErr = cudaGetLastError();
if (priorErr != cudaSuccess) {
this->_logger.LogWarn(
"ANSSCRFDFD::Detect",
std::string("Cleared prior CUDA error before SCRFD preprocessing: ")
+ cudaGetErrorString(priorErr),
__FILE__, __LINE__);
}
// Try NV12 fast path first (fused NV12→RGB + center-padded letterbox)
const int inferenceGpu = m_trtEngine ? m_trtEngine->getPreferredDeviceIndex() : 0;
auto nv12 = m_nv12Helper.tryNV12(input, inferenceGpu, net_w, net_h,
NV12PreprocessHelper::scrfdCenterLetterboxLauncher(dw, dh),
_logger, "SCRFD");
if (nv12.succeeded) {
inputs = {{ std::move(nv12.gpuRGB) }};
usedNV12 = true;
}
else if (nv12.useBgrFullRes) {
// BGR full-res path — preprocess the full-res image instead
// (fall through to standard BGR path with nv12.bgrFullResImg)
// For simplicity, use the standard BGR path below with the original input
}
if (!usedNV12) {
// CPU center-padded letterbox + BGR->RGB, then upload small image
cv::Mat srcImg;
2026-03-28 16:54:11 +11:00
if (input.channels() == 1) {
cv::cvtColor(input, srcImg, cv::COLOR_GRAY2BGR);
} else if (input.channels() == 3) {
srcImg = input;
} else {
2026-03-28 16:54:11 +11:00
this->_logger.LogError("ANSSCRFDFD::Detect", "Unsupported channel count", __FILE__, __LINE__);
return {};
}
// CPU resize to unpadded size
cv::Mat cpuResized;
if (srcImg.rows != new_unpad_h || srcImg.cols != new_unpad_w) {
cv::resize(srcImg, cpuResized, cv::Size(new_unpad_w, new_unpad_h), 0, 0, cv::INTER_LINEAR);
} else {
cpuResized = srcImg;
}
2026-03-28 16:54:11 +11:00
// CPU center-pad to net_w x net_h
cv::Mat cpuPadded(net_h, net_w, CV_8UC3, cv::Scalar(0, 0, 0));
cpuResized.copyTo(cpuPadded(cv::Rect(dw, dh, new_unpad_w, new_unpad_h)));
2026-03-28 16:54:11 +11:00
// CPU BGR -> RGB
cv::Mat cpuRGB;
cv::cvtColor(cpuPadded, cpuRGB, cv::COLOR_BGR2RGB);
2026-03-28 16:54:11 +11:00
// Upload small padded image to GPU
cv::cuda::Stream stream;
cv::cuda::GpuMat d_padded;
d_padded.upload(cpuRGB, stream);
2026-03-28 16:54:11 +11:00
stream.waitForCompletion();
std::vector<cv::cuda::GpuMat> inputVec;
inputVec.emplace_back(std::move(d_padded));
inputs.emplace_back(std::move(inputVec));
}
m_nv12Helper.tickInference();
}
catch (const std::exception& e) {
this->_logger.LogError(
"ANSSCRFDFD::Detect",
std::string("CUDA preprocessing failed: ") + e.what(),
__FILE__, __LINE__
);
return {};
}
std::vector<std::vector<std::vector<float>>> featureVectors;
try {
if (!m_trtEngine->runInference(inputs, featureVectors)) {
this->_logger.LogFatal("ANSSCRFDFD::Detect", "Inference failed", __FILE__, __LINE__);
return {};
}
}
catch (const std::exception& e) {
this->_logger.LogFatal(
"ANSSCRFDFD::Detect",
std::string("runInference exception: ") + e.what(),
__FILE__, __LINE__
);
return {};
}
// Phase 3: Postprocessing (brief lock — generate_bboxes_kps uses center_points)
std::vector<Object> filteredFaceObjects;
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
std::vector<Object> proposedFaceObjects;
this->generate_bboxes_kps(
scale_params,
proposedFaceObjects,
featureVectors[0],
_modelConfig.detectionScoreThreshold,
imgHeight,
imgWidth
);
this->nms_bboxes_kps(
proposedFaceObjects,
filteredFaceObjects,
_modelConfig.modelMNSThreshold,
400
);
}
catch (const std::exception& e) {
this->_logger.LogError(
"ANSSCRFDFD::Detect",
std::string("Post-processing failed: ") + e.what(),
__FILE__, __LINE__
);
return {};
}
}
return filteredFaceObjects;
}
std::vector<Object> ANSSCRFDFD::TensorRTInferene(const cv::Mat& inputImage, const std::string& camera_id, bool useDynamicImage) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
std::vector<Object> output;
output.clear();
if (!_licenseValid) {
this->_logger.LogError("ANSSCRFDFD::TensorRTInferene", "Invalid license", __FILE__, __LINE__);
return output;
}
if (!_isInitialized) {
this->_logger.LogError("ANSSCRFDFD::TensorRTInferene", "Model is not initialized", __FILE__, __LINE__);
return output;
}
try
{
//0. Resize image
if (inputImage.empty()) return output;
if ((inputImage.cols < 10) || (inputImage.rows < 10)) return output;
bool croppedFace = false; // Check if the image is cropped face image
cv::Mat im = inputImage.clone();
int orginalHeight = im.rows;
int orginalWidth = im.cols;
if ((inputImage.size[0] <= 300) || (inputImage.size[1] <= 300)) croppedFace = true;
if (croppedFace) cv::copyMakeBorder(inputImage, im, 200, 200, 200, 200, cv::BORDER_REPLICATE);
std::vector<cv::Rect> activeROIs;
if (useDynamicImage) {
std::vector<Object> movementResults = DetectMovement(im, camera_id);
std::vector<Object> movementObjects;
if ((!movementResults.empty()) && ((movementResults.size() < 12)))
{
movementObjects.insert(movementObjects.end(), movementResults.begin(), movementResults.end());
if (!_movementObjects.empty())movementObjects.insert(movementObjects.end(), _movementObjects.begin(), _movementObjects.end());
}
else {
if (!_movementObjects.empty())movementObjects.insert(movementObjects.end(), _movementObjects.begin(), _movementObjects.end());
}
activeROIs.clear();
if (!movementObjects.empty()) {
std::vector<cv::Rect> localActiveROIs = GenerateFixedROIs(movementObjects, _modelConfig.inpHeight, _modelConfig.inpWidth, im.cols, im.rows);
activeROIs.insert(activeROIs.end(), localActiveROIs.begin(), localActiveROIs.end());
}
else {
activeROIs.push_back(cv::Rect(0, 0, im.cols, im.rows));// Use the orginal image
}
if ((activeROIs.size() <= 0) ||
(activeROIs.empty()))
{
return output;
}
UpdateAndFilterDetectionObjects(_movementObjects, 80);
}
else {
activeROIs.push_back(cv::Rect(0, 0, im.cols, im.rows));// Use the orginal image
}
#ifdef FACEDEBUG
cv::Mat draw = im.clone();
for (int i = 0; i < movementObjects.size(); i++) {
cv::rectangle(draw, movementObjects[i].box, cv::Scalar(0, 255, 255), 2); // RED for detectedArea
}
for (int i = 0; i < activeROIs.size(); i++) {
cv::rectangle(draw, activeROIs[i], cv::Scalar(0, 0, 255), 2); // RED for detectedArea
}
#endif
for (int j = 0; j < activeROIs.size(); j++) {
cv::Rect activeROI = activeROIs[j];
activeROI.x = std::max(0, activeROI.x);
activeROI.y = std::max(0, activeROI.y);
activeROI.width = std::min(im.cols, activeROI.width);
activeROI.height = std::min(im.rows, activeROI.height);
cv::Mat frame = im(activeROI).clone();
std::vector<Object> filteredFaceObjects = Detect(frame);
// 5. Return the detected objects
for (int i = 0; i < filteredFaceObjects.size(); i++)
{
if (filteredFaceObjects[i].confidence > _modelConfig.detectionScoreThreshold) {
#ifdef FACEDEBUG
cv::Rect faceRect;
faceRect.x = filteredFaceObjects[i].box.x + activeROI.x;
faceRect.y = filteredFaceObjects[i].box.y + activeROI.y;
faceRect.width = filteredFaceObjects[i].box.width;
faceRect.height = filteredFaceObjects[i].box.height;
cv::rectangle(draw, faceRect, cv::Scalar(225, 255, 0), 2); // RED for detectedArea
#endif
// Check if the face is valid
if (isValidFace(filteredFaceObjects[i].polygon, filteredFaceObjects[i].box), 27)
{
Object result;
// 0. Get the face bounding box
int x_min = filteredFaceObjects[i].box.x + activeROI.x;
int y_min = filteredFaceObjects[i].box.y + activeROI.y;
int x_max = filteredFaceObjects[i].box.width + filteredFaceObjects[i].box.x + activeROI.x;
int y_max = filteredFaceObjects[i].box.height + filteredFaceObjects[i].box.y + activeROI.y;
#ifdef FACEDEBUG
// draw landmarks
for (cv::Point2f point : filteredFaceObjects[i].polygon)
{
cv::circle(draw, cv::Point(point.x + activeROI.x, point.y + activeROI.y), 2, cv::Scalar(0, 255, 0), -1);
}
#endif
if (croppedFace)
{
x_min = std::max(0, x_min - 200);
y_min = std::max(0, y_min - 200);
x_max = std::min(orginalWidth, x_max - 200);
y_max = std::min(orginalHeight, y_max - 200);
}
// 1. Calculate the centered coordinates and dimensions
int width_half = abs((x_max - x_min) / 2);
int height_half = abs((y_max - y_min) / 2);
int xc = x_min + width_half;
int yc = y_min + height_half;
int c = std::max(width_half, height_half);
// 2. Calculate the new bounding box coordinates (square with center at xc, yc)
int x1_new = std::max(0, xc - c);
int y1_new = std::max(0, yc - c);
int x2_new = std::min(orginalWidth, xc + c);
int y2_new = std::min(orginalHeight, yc + c);
// 3. Update the bounding box coordinates
result.classId = 0;
result.className = "Face";
result.confidence = filteredFaceObjects[i].confidence;
result.box.x = x1_new;
result.box.y = y1_new;
result.box.width = x2_new - x1_new;
result.box.height = y2_new - y1_new;
//result.polygon = ANSUtilityHelper::RectToNormalizedPolygon(result.box, inputImage.cols, inputImage.rows);
result.mask = GetCroppedFaceScale(inputImage, x1_new, y1_new, x2_new, y2_new, 112);
result.kps = filteredFaceObjects[i].kps; // landmarks as array of x,y,x,y...
result.cameraId = camera_id;
if (!result.mask.empty())
{
output.push_back(result);
if (useDynamicImage) {
//// Check if movement object contain results before adding to movement objects
result.extraInfo = "0";
// Find if obj already exists in detectionObjects using ContainsIntersectingObject
auto it = std::find_if(_movementObjects.begin(), _movementObjects.end(),
[&](Object& existingObj) {
return ContainsIntersectingObject(_movementObjects, result);
});
if (it != _movementObjects.end()) {
*it = result; // Replace existing object with the new one
}
else {
// If not found, add the new object to the list
_movementObjects.push_back(result);
}
}
}
}
}
}
frame.release();
}
im.release();
#ifdef FACEDEBUG
cv::imshow("Combined Detected Areas", draw);// Debugging: Diplsay the frame with the combined detected areas
cv::waitKey(1);// Debugging: Diplsay the frame with the combined detected areas
draw.release();// Debugging: Diplsay the frame with the combined detected areas
#endif
return output;
}
catch (std::exception& e)
{
this->_logger.LogFatal("ANSSCRFDFD::TensorRTInferene", e.what(), __FILE__, __LINE__);
return output;
}
}
ANSSCRFDFD::~ANSSCRFDFD() {
try {
Destroy();
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSSCRFDFD::Destroy", e.what(), __FILE__, __LINE__);
}
}
bool ANSSCRFDFD::Destroy() {
try {
_isInitialized = false;
_licenseValid = false;
_modelFilePath.clear();
m_nv12Helper.destroy();
if (m_usingSharedPool) {
EnginePoolManager<float>::instance().release(m_poolKey);
m_trtEngine.reset();
m_usingSharedPool = false;
}
else if (m_trtEngine) {
m_trtEngine.reset();
}
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSSCRFDFD::Destroy", e.what(), __FILE__, __LINE__);
return false;
}
}
// SCRFDFD implementation (private)
void ANSSCRFDFD::resize_unscale(const cv::Mat& mat, cv::Mat& mat_rs,
int target_height, int target_width,
SCRFDScaleParams& scale_params)
{
if (mat.empty()) return;
int img_height = static_cast<int>(mat.rows);
int img_width = static_cast<int>(mat.cols);
mat_rs = cv::Mat(target_height, target_width, CV_8UC3,
cv::Scalar(0, 0, 0));
// scale ratio (new / old) new_shape(h,w)
float w_r = (float)target_width / (float)img_width;
float h_r = (float)target_height / (float)img_height;
float r = std::min(w_r, h_r);
// compute padding
int new_unpad_w = static_cast<int>((float)img_width * r); // floor
int new_unpad_h = static_cast<int>((float)img_height * r); // floor
int pad_w = target_width - new_unpad_w; // >=0
int pad_h = target_height - new_unpad_h; // >=0
int dw = pad_w / 2;
int dh = pad_h / 2;
// resize with unscaling
cv::Mat new_unpad_mat;
// cv::Mat new_unpad_mat = mat.clone(); // may not need clone.
cv::resize(mat, new_unpad_mat, cv::Size(new_unpad_w, new_unpad_h));
new_unpad_mat.copyTo(mat_rs(cv::Rect(dw, dh, new_unpad_w, new_unpad_h)));
// record scale params.
scale_params.ratio = r;
scale_params.dw = dw;
scale_params.dh = dh;
scale_params.flag = true;
}
void ANSSCRFDFD::generate_points(const int target_height, const int target_width)
{
if (center_points_is_update) return;
// 8, 16, 32
for (auto stride : feat_stride_fpn)
{
unsigned int num_grid_w = target_width / stride;
unsigned int num_grid_h = target_height / stride;
// y
for (unsigned int i = 0; i < num_grid_h; ++i)
{
// x
for (unsigned int j = 0; j < num_grid_w; ++j)
{
// num_anchors, col major
for (unsigned int k = 0; k < num_anchors; ++k)
{
SCRFDPoint point;
point.cx = (float)j;
point.cy = (float)i;
point.stride = (float)stride;
center_points[stride].push_back(point);
}
}
}
}
center_points_is_update = true;
}
void ANSSCRFDFD::generate_bboxes_kps(const SCRFDScaleParams& scale_params,
std::vector<Object>& bbox_kps_collection,
std::vector<std::vector<float>>& output_tensors,
float score_threshold,
float img_height,
float img_width)
{
// score_8,score_16,score_32,bbox_8,bbox_16,bbox_32
std::vector<float> score_8 = output_tensors.at(0); // e.g [1,12800,1]
std::vector<float> score_16 = output_tensors.at(1); // e.g [1,3200,1]
std::vector<float> score_32 = output_tensors.at(2); // e.g [1,800,1]
std::vector<float> bbox_8 = output_tensors.at(3); // e.g [1,12800,4]
std::vector<float> bbox_16 = output_tensors.at(4); // e.g [1,3200,4]
std::vector<float> bbox_32 = output_tensors.at(5); // e.g [1,800,4]
// generate center points.
const float input_height = INPUT_H;// static_cast<float>(input_node_dims.at(2)); // e.g 640
const float input_width = INPUT_W;// static_cast<float>(input_node_dims.at(3)); // e.g 640
this->generate_points(input_height, input_width);
bbox_kps_collection.clear();
if (use_kps)
{
std::vector<float> kps_8 = output_tensors.at(6); // e.g [1,12800,10]
std::vector<float> kps_16 = output_tensors.at(7); // e.g [1,3200,10]
std::vector<float> kps_32 = output_tensors.at(8); // e.g [1,800,10]
// level 8 & 16 & 32 with kps
this->generate_bboxes_kps_single_stride(scale_params, score_8, bbox_8, kps_8, 8, score_threshold,
img_height, img_width, bbox_kps_collection);
this->generate_bboxes_kps_single_stride(scale_params, score_16, bbox_16, kps_16, 16, score_threshold,
img_height, img_width, bbox_kps_collection);
this->generate_bboxes_kps_single_stride(scale_params, score_32, bbox_32, kps_32, 32, score_threshold,
img_height, img_width, bbox_kps_collection);
} // no kps
else
{
// level 8 & 16 & 32
this->generate_bboxes_single_stride(scale_params, score_8, bbox_8, 8, score_threshold,
img_height, img_width, bbox_kps_collection);
this->generate_bboxes_single_stride(scale_params, score_16, bbox_16, 16, score_threshold,
img_height, img_width, bbox_kps_collection);
this->generate_bboxes_single_stride(scale_params, score_32, bbox_32, 32, score_threshold,
img_height, img_width, bbox_kps_collection);
}
}
void ANSSCRFDFD::generate_bboxes_single_stride(
const SCRFDScaleParams& scale_params, std::vector<float>& score_pred, std::vector<float>& bbox_pred,
unsigned int stride, float score_threshold, float img_height, float img_width,
std::vector<Object>& bbox_kps_collection)
{
unsigned int nms_pre_ = (stride / 8) * nms_pre; // 1 * 1000,2*1000,...
nms_pre_ = nms_pre_ >= nms_pre ? nms_pre_ : nms_pre;
const unsigned int num_points = score_pred.size();// stride_dims.at(1); // 12800
const float* score_ptr = score_pred.data(); // [1,12800,1]
const float* bbox_ptr = bbox_pred.data(); // [1,12800,4]
float ratio = scale_params.ratio;
int dw = scale_params.dw;
int dh = scale_params.dh;
unsigned int count = 0;
auto& stride_points = center_points[stride];
for (unsigned int i = 0; i < num_points; ++i)
{
const float cls_conf = score_ptr[i];
if (cls_conf < score_threshold) continue; // filter
auto& point = stride_points.at(i);
const float cx = point.cx; // cx
const float cy = point.cy; // cy
const float s = point.stride; // stride
// bbox
const float* offsets = bbox_ptr + i * 4;
float l = offsets[0]; // left
float t = offsets[1]; // top
float r = offsets[2]; // right
float b = offsets[3]; // bottom
Object box_kps;
float x1 = ((cx - l) * s - (float)dw) / ratio; // cx - l x1
float y1 = ((cy - t) * s - (float)dh) / ratio; // cy - t y1
float x2 = ((cx + r) * s - (float)dw) / ratio; // cx + r x2
float y2 = ((cy + b) * s - (float)dh) / ratio; // cy + b y2
box_kps.box.x = std::max(0.f, x1);
box_kps.box.y = std::max(0.f, y1);
box_kps.box.width = std::min(img_width - 1.f, x2 - x1);
box_kps.box.height = std::min(img_height - 1.f, y2 - y1);
box_kps.confidence = cls_conf;
box_kps.classId = 0;
box_kps.className = "face";
bbox_kps_collection.push_back(box_kps);
count += 1; // limit boxes for nms.
if (count > max_nms)
break;
}
if (bbox_kps_collection.size() > nms_pre_)
{
std::sort(
bbox_kps_collection.begin(), bbox_kps_collection.end(),
[](const Object& a, const Object& b)
{ return a.confidence > b.confidence; }
); // sort inplace
// trunc
bbox_kps_collection.resize(nms_pre_);
}
}
void ANSSCRFDFD::generate_bboxes_kps_single_stride(
const SCRFDScaleParams& scale_params, std::vector<float>& score_pred, std::vector<float>& bbox_pred,
std::vector<float>& kps_pred, unsigned int stride, float score_threshold, float img_height,
float img_width, std::vector<Object>& bbox_kps_collection)
{
unsigned int nms_pre_ = (stride / 8) * nms_pre; // 1 * 1000,2*1000,...
nms_pre_ = nms_pre_ >= nms_pre ? nms_pre_ : nms_pre;
const unsigned int num_points = score_pred.size(); // 12800
const float* score_ptr = score_pred.data(); // [1,12800,1]
const float* bbox_ptr = bbox_pred.data(); // [1,12800,4]
const float* kps_ptr = kps_pred.data(); // [1,12800,10]
float ratio = scale_params.ratio;
int dw = scale_params.dw;
int dh = scale_params.dh;
unsigned int count = 0;
auto& stride_points = center_points[stride];
for (unsigned int i = 0; i < num_points; ++i)
{
const float cls_conf = score_ptr[i];
if (cls_conf < score_threshold) continue; // filter
auto& point = stride_points.at(i);
const float cx = point.cx; // cx
const float cy = point.cy; // cy
const float s = point.stride; // stride
// bbox
const float* offsets = bbox_ptr + i * 4;
float l = offsets[0]; // left
float t = offsets[1]; // top
float r = offsets[2]; // right
float b = offsets[3]; // bottom
Object box_kps;
float x1 = ((cx - l) * s - (float)dw) / ratio; // cx - l x1
float y1 = ((cy - t) * s - (float)dh) / ratio; // cy - t y1
float x2 = ((cx + r) * s - (float)dw) / ratio; // cx + r x2
float y2 = ((cy + b) * s - (float)dh) / ratio; // cy + b y2
box_kps.box.x = (int)std::max(0.f, x1);
box_kps.box.y = (int)std::max(0.f, y1);
box_kps.box.width = (int)std::min(img_width - 1.f, x2 - x1);
box_kps.box.height = (int)std::min(img_height - 1.f, y2 - y1);
box_kps.confidence = cls_conf;
box_kps.classId = 0;
box_kps.className = "face";
// landmarks
const float* kps_offsets = kps_ptr + i * 10;
for (unsigned int j = 0; j < 10; j += 2)
{
cv::Point2f kps;
float kps_l = kps_offsets[j];
float kps_t = kps_offsets[j + 1];
float kps_x = ((cx + kps_l) * s - (float)dw) / ratio; // cx + l x
float kps_y = ((cy + kps_t) * s - (float)dh) / ratio; // cy + t y
kps.x = std::min(std::max(0.f, kps_x), img_width - 1.f);
kps.y = std::min(std::max(0.f, kps_y), img_height - 1.f);
box_kps.kps.push_back(kps.x);
box_kps.kps.push_back(kps.y);
box_kps.polygon.push_back(kps); // landmarks as polygon
}
bbox_kps_collection.push_back(box_kps);
count += 1; // limit boxes for nms.
if (count > max_nms)
break;
}
if (bbox_kps_collection.size() > nms_pre_)
{
std::sort(
bbox_kps_collection.begin(), bbox_kps_collection.end(),
[](const Object& a, const Object& b)
{ return a.confidence > b.confidence; }
); // sort inplace
// trunc
bbox_kps_collection.resize(nms_pre_);
}
}
float ANSSCRFDFD::getIouOfObjects(const Object& a, const Object& b) {
// Retrieve the bounding boxes
const cv::Rect& boxA = a.box;
const cv::Rect& boxB = b.box;
// Compute the coordinates of the intersection rectangle
int inner_x1 = std::max(boxA.x, boxB.x);
int inner_y1 = std::max(boxA.y, boxB.y);
int inner_x2 = std::min(boxA.x + boxA.width, boxB.x + boxB.width);
int inner_y2 = std::min(boxA.y + boxA.height, boxB.y + boxB.height);
// Calculate width and height of the intersection
int inner_w = inner_x2 - inner_x1;
int inner_h = inner_y2 - inner_y1;
// If there's no overlap, return 0
if (inner_w <= 0 || inner_h <= 0) {
return 0.0f;
}
// Calculate the area of the intersection
float inner_area = static_cast<float>(inner_w * inner_h);
// Calculate the areas of the two boxes
float areaA = static_cast<float>(boxA.width * boxA.height);
float areaB = static_cast<float>(boxB.width * boxB.height);
// Calculate the union area
float union_area = areaA + areaB - inner_area;
// Avoid division by zero and return IoU
if (union_area <= 0.0f) {
return 0.0f;
}
return inner_area / union_area;
}
void ANSSCRFDFD::nms_bboxes_kps(std::vector<Object>& input,
std::vector<Object>& output,
float iou_threshold, unsigned int topk)
{
if (input.empty()) return;
std::sort(
input.begin(), input.end(),
[](const Object& a, const Object& b)
{ return a.confidence > b.confidence; }
);
const unsigned int box_num = input.size();
std::vector<int> merged(box_num, 0);
unsigned int count = 0;
for (unsigned int i = 0; i < box_num; ++i)
{
if (merged[i]) continue;
std::vector<Object> buf;
buf.push_back(input[i]);
merged[i] = 1;
for (unsigned int j = i + 1; j < box_num; ++j)
{
if (merged[j]) continue;
float iou = getIouOfObjects(input[i], input[j]); //static_cast<float>(input[i].box.iou_of(input[j].box));
if (iou > iou_threshold)
{
merged[j] = 1;
buf.push_back(input[j]);
}
}
output.push_back(buf[0]);
// keep top k
count += 1;
if (count >= topk)
break;
}
}
}