1313 lines
44 KiB
C++
1313 lines
44 KiB
C++
#include "SCRFDFaceDetector.h"
|
|
#include "ANSGpuFrameRegistry.h"
|
|
#include "NV12PreprocessHelper.h" // tl_currentGpuFrame()
|
|
#include "Utility.h"
|
|
#include <chrono>
|
|
//#define FNS_DEBUG
|
|
namespace ANSCENTER {
|
|
// Initialization function with memory leak handling
|
|
bool ANSSCRFDFD::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath,
|
|
const std::string& modelZipPassword, std::string& labelMap) {
|
|
// Clean up existing resources before reinitialization
|
|
const bool engineAlreadyLoaded = _isInitialized && m_trtEngine != nullptr;
|
|
if (!engineAlreadyLoaded) Destroy();
|
|
// Call base class Initialize
|
|
bool result = ANSFDBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
|
|
if (!result) return false;
|
|
labelMap = "Face";
|
|
_licenseValid = true;
|
|
try {
|
|
_modelConfig = modelConfig;
|
|
_modelConfig.inpHeight = 640;
|
|
_modelConfig.inpWidth = 640;
|
|
_modelConfig.modelType = ModelType::FACEDETECT;
|
|
_modelConfig.detectionType = DetectionType::FACEDETECTOR;
|
|
std::string onnxfile = CreateFilePath(_modelFolder, "scrfdface.onnx");
|
|
|
|
if (!std::filesystem::exists(onnxfile)) {
|
|
this->_logger.LogError("ANSSCRFDFD::Initialize. Model scrfdface.onnx file does not exist", onnxfile, __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
_modelFilePath = onnxfile;
|
|
// Initialize TensorRT via shared pool
|
|
if (!m_trtEngine) {
|
|
m_options.precision = ANSCENTER::Precision::FP16;
|
|
m_options.optBatchSize = _modelConfig.gpuOptBatchSize;
|
|
m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize;
|
|
m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
|
|
m_options.maxInputHeight = _modelConfig.maxInputHeight;
|
|
m_options.minInputHeight = _modelConfig.minInputHeight;
|
|
m_options.optInputHeight = _modelConfig.optInputHeight;
|
|
m_options.maxInputWidth = _modelConfig.maxInputWidth;
|
|
m_options.minInputWidth = _modelConfig.minInputWidth;
|
|
m_options.optInputWidth = _modelConfig.optInputWidth;
|
|
m_options.calibrationBatchSize = 1;
|
|
|
|
m_poolKey = { _modelFilePath,
|
|
static_cast<int>(m_options.precision),
|
|
m_options.maxBatchSize };
|
|
m_trtEngine = EnginePoolManager<float>::instance().acquire(
|
|
m_poolKey, m_options, _modelFilePath,
|
|
SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu);
|
|
m_usingSharedPool = (m_trtEngine != nullptr);
|
|
|
|
if (!m_trtEngine) {
|
|
this->_logger.LogError("ANSSCRFDFD::Initialize. Unable to build or load TensorRT engine.", _modelFilePath, __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
fmc = 3;
|
|
feat_stride_fpn = { 8, 16, 32 };
|
|
num_anchors = 2;
|
|
use_kps = true;
|
|
_movementObjects.clear();
|
|
_retainDetectedFaces = 0;
|
|
_isInitialized = true;
|
|
return true;
|
|
}
|
|
catch (const std::exception& e) {
|
|
this->_logger.LogFatal("ANSSCRFDFD::Initialize", e.what(), __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
bool ANSSCRFDFD::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) {
|
|
try {
|
|
// We need to get the _modelFolder
|
|
bool result = ANSFDBase::LoadModel(modelZipFilePath, modelZipPassword);
|
|
if (!result) return false;
|
|
const bool engineAlreadyLoaded = _isInitialized && m_trtEngine != nullptr;
|
|
_modelConfig.modelType = ModelType::FACEDETECT;
|
|
_modelConfig.detectionType = DetectionType::FACEDETECTOR;
|
|
_modelConfig.inpHeight = 640;
|
|
_modelConfig.inpWidth = 640;
|
|
_movementObjects.clear();
|
|
_retainDetectedFaces = 0;
|
|
std::string onnxfile = CreateFilePath(_modelFolder, "scrfdface.onnx");
|
|
|
|
if (!std::filesystem::exists(onnxfile)) {
|
|
this->_logger.LogError("ANSSCRFDFD::Initialize. Model scrfdface.onnx file does not exist", onnxfile, __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
_modelFilePath = onnxfile;
|
|
if (!m_trtEngine) {
|
|
m_options.precision = ANSCENTER::Precision::FP16;
|
|
m_options.optBatchSize = _modelConfig.gpuOptBatchSize;
|
|
m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize;
|
|
m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
|
|
m_options.maxInputHeight = _modelConfig.maxInputHeight;
|
|
m_options.minInputHeight = _modelConfig.minInputHeight;
|
|
m_options.optInputHeight = _modelConfig.optInputHeight;
|
|
m_options.maxInputWidth = _modelConfig.maxInputWidth;
|
|
m_options.minInputWidth = _modelConfig.minInputWidth;
|
|
m_options.optInputWidth = _modelConfig.optInputWidth;
|
|
|
|
m_poolKey = { _modelFilePath,
|
|
static_cast<int>(m_options.precision),
|
|
m_options.maxBatchSize };
|
|
m_trtEngine = EnginePoolManager<float>::instance().acquire(
|
|
m_poolKey, m_options, _modelFilePath,
|
|
SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu);
|
|
m_usingSharedPool = (m_trtEngine != nullptr);
|
|
|
|
if (!m_trtEngine) {
|
|
this->_logger.LogError("ANSSCRFDFD::LoadModel. Unable to build or load TensorRT engine.", _modelFilePath, __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
fmc = 3;
|
|
feat_stride_fpn = { 8, 16, 32 };
|
|
num_anchors = 2;
|
|
use_kps = true;
|
|
_movementObjects.clear();
|
|
_retainDetectedFaces = 0;
|
|
_isInitialized = true;
|
|
return true;
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("ANSSCRFDFD::LoadModel", e.what(), __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
bool ANSSCRFDFD::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) {
|
|
try {
|
|
// We need to get the _modelFolder
|
|
bool result = ANSFDBase::LoadModelFromFolder(licenseKey, modelConfig, modelName, className, modelFolder, labelMap);
|
|
if (!result) return false;
|
|
std::string _modelName = modelName;
|
|
if (_modelName.empty()) {
|
|
_modelName = "scrfdface";
|
|
}
|
|
_modelConfig.inpHeight = 640;
|
|
_modelConfig.inpWidth = 640;
|
|
_movementObjects.clear();
|
|
_retainDetectedFaces = 0;
|
|
std::string modelFullName = _modelName + ".onnx";
|
|
std::string onnxfile = CreateFilePath(_modelFolder, modelFullName);
|
|
if (std::filesystem::exists(onnxfile)) {
|
|
_modelFilePath = onnxfile;
|
|
this->_logger.LogDebug("ANSSCRFDFD::LoadModel. Loading scrfdface weight", _modelFilePath, __FILE__, __LINE__);
|
|
}
|
|
else {
|
|
this->_logger.LogError("ANSSCRFDFD::LoadModel. Model scrfdface.onnx file is not exist", _modelFilePath, __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("ANSSCRFDFD::LoadModel", e.what(), __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
bool ANSSCRFDFD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
if (!FileExist(_modelFilePath)) {
|
|
optimizedModelFolder = "";
|
|
return false;
|
|
}
|
|
optimizedModelFolder = GetParentFolder(_modelFilePath);
|
|
// Check if the engine already exists to avoid reinitializing
|
|
if (!m_trtEngine) {
|
|
// Fixed batch size of 1 for this model
|
|
m_options.optBatchSize = _modelConfig.gpuOptBatchSize;
|
|
m_options.maxBatchSize = _modelConfig.gpuMaxBatchSize;
|
|
m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
|
|
m_options.maxInputHeight = _modelConfig.maxInputHeight;
|
|
m_options.minInputHeight = _modelConfig.minInputHeight;
|
|
m_options.optInputHeight = _modelConfig.optInputHeight;
|
|
m_options.maxInputWidth = _modelConfig.maxInputWidth;
|
|
m_options.minInputWidth = _modelConfig.minInputWidth;
|
|
m_options.optInputWidth = _modelConfig.optInputWidth;
|
|
m_options.engineFileDir = optimizedModelFolder;
|
|
// Use FP16 or FP32 precision based on the input flag
|
|
m_options.precision = Precision::FP16;
|
|
// Create the TensorRT inference engine
|
|
m_trtEngine = std::make_shared<Engine<float>>(m_options);
|
|
}
|
|
// Build the TensorRT engine
|
|
auto succ = m_trtEngine->buildWithRetry(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE);
|
|
if (!succ) {
|
|
const std::string errMsg =
|
|
"Error: Unable to build the TensorRT engine. "
|
|
"Try increasing TensorRT log severity to kVERBOSE.";
|
|
this->_logger.LogError("ANSSCRFDFD::OptimizeModel", errMsg, __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
std::string optimizedFaceAttributeModelFolder;
|
|
bool result = ANSFDBase::OptimizeModel(fp16, optimizedFaceAttributeModelFolder);
|
|
return result;
|
|
}
|
|
|
|
std::vector<Object> ANSSCRFDFD::RunInference(const cv::Mat& input, bool useDynamicImage, bool validateFace, bool facelivenessCheck) {
|
|
if (facelivenessCheck) {
|
|
std::vector<Object> rawFaceResults = Inference(input, "CustomCam", useDynamicImage, validateFace);
|
|
std::vector<Object> facesWithLivenessResults = ValidateLivenessFaces(input, rawFaceResults, "CustomCam");
|
|
return facesWithLivenessResults;
|
|
}
|
|
else {
|
|
return Inference(input, "CustomCam", useDynamicImage, validateFace);
|
|
}
|
|
}
|
|
std::vector<Object> ANSSCRFDFD::RunInference(const cv::Mat& input, const std::string& camera_id, bool useDynamicImage, bool validateFace, bool facelivenessCheck) {
|
|
if (facelivenessCheck) {
|
|
std::vector<Object> rawFaceResults = Inference(input, camera_id, useDynamicImage, validateFace);
|
|
std::vector<Object> facesWithLivenessResults = ValidateLivenessFaces(input, rawFaceResults, camera_id);
|
|
return facesWithLivenessResults;
|
|
}
|
|
else {
|
|
return Inference(input, camera_id, useDynamicImage, validateFace);
|
|
}
|
|
}
|
|
std::vector<Object> ANSSCRFDFD::Inference(const cv::Mat& input,
|
|
const std::string& camera_id,
|
|
bool useDynamicImage,
|
|
bool validateFace)
|
|
{
|
|
// Phase 1: Validation + image preprocessing (brief lock)
|
|
cv::Mat im;
|
|
bool croppedFace;
|
|
float scoreThreshold;
|
|
{
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
|
|
if (!_licenseValid) {
|
|
_logger.LogError("ANSSCRFDFD::Inference", "Invalid license", __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
|
|
if (!_isInitialized) {
|
|
_logger.LogError("ANSSCRFDFD::Inference", "Model is not initialized", __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
|
|
if (input.empty() || input.cols < 10 || input.rows < 10) {
|
|
_logger.LogError("ANSSCRFDFD::Inference", "Invalid input image", __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
|
|
croppedFace = !useDynamicImage;
|
|
scoreThreshold = _modelConfig.detectionScoreThreshold;
|
|
|
|
if (croppedFace) {
|
|
constexpr int border = 200;
|
|
cv::copyMakeBorder(input, im, border, border, border, border, cv::BORDER_REPLICATE);
|
|
|
|
if (im.rows > 1280) {
|
|
const float aspectRatio = static_cast<float>(im.cols) / static_cast<float>(im.rows);
|
|
constexpr int newHeight = 1280;
|
|
const int newWidth = static_cast<int>(newHeight * aspectRatio);
|
|
cv::resize(im, im, cv::Size(newWidth, newHeight));
|
|
}
|
|
}
|
|
else {
|
|
im = input;
|
|
}
|
|
}
|
|
|
|
// Phase 2: Detect faces (mutex released — Detect manages its own brief locks around GPU inference)
|
|
std::vector<Object> detectedFaces;
|
|
try {
|
|
detectedFaces = Detect(im);
|
|
}
|
|
catch (const std::exception& e) {
|
|
_logger.LogFatal("ANSSCRFDFD::Inference", e.what(), __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
catch (...) {
|
|
_logger.LogFatal("ANSSCRFDFD::Inference", "Unknown exception occurred", __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
|
|
if (detectedFaces.empty()) {
|
|
return {};
|
|
}
|
|
|
|
// Phase 3: Process detected faces (operates on per-call local data — no shared state)
|
|
const int originalWidth = croppedFace ? input.cols : 0;
|
|
const int originalHeight = croppedFace ? input.rows : 0;
|
|
constexpr int border = 200;
|
|
constexpr float borderF = 200.0f;
|
|
|
|
// NV12 affine warp: precompute scale factors (display-res → full-res NV12)
|
|
float nv12ScaleX = 1.f, nv12ScaleY = 1.f;
|
|
int nv12FullW = 0, nv12FullH = 0;
|
|
bool nv12AffineAvailable = false;
|
|
const int inferenceGpu = m_trtEngine ? m_trtEngine->getPreferredDeviceIndex() : 0;
|
|
if (!croppedFace && m_nv12Helper.isCudaContextHealthy(_logger, "SCRFD")) {
|
|
auto* gpuData = tl_currentGpuFrame();
|
|
if (gpuData && gpuData->pixelFormat == 23 && gpuData->width > 0 && gpuData->height > 0) {
|
|
nv12ScaleX = static_cast<float>(gpuData->width) / im.cols;
|
|
nv12ScaleY = static_cast<float>(gpuData->height) / im.rows;
|
|
nv12FullW = gpuData->width;
|
|
nv12FullH = gpuData->height;
|
|
nv12AffineAvailable = true;
|
|
}
|
|
}
|
|
|
|
std::vector<Object> output;
|
|
output.reserve(detectedFaces.size());
|
|
|
|
for (auto& face : detectedFaces) {
|
|
if (face.confidence <= scoreThreshold) {
|
|
continue;
|
|
}
|
|
|
|
if (validateFace && !isValidFace(face.polygon, face.box, 27)) {
|
|
continue;
|
|
}
|
|
|
|
// Get face mask — try NV12 affine warp first, fall back to CPU warpAffine
|
|
cv::Mat mask;
|
|
cv::cuda::GpuMat gpuMask;
|
|
if (nv12AffineAvailable && face.polygon.size() == 5) {
|
|
// Compute affine matrix on CPU (fast ~0.01ms)
|
|
static const std::vector<cv::Point2f> kTemplate112 = []() {
|
|
const std::vector<cv::Point2f> face_template = {
|
|
{0.34191607f, 0.46157411f}, {0.65653393f, 0.45983393f},
|
|
{0.50022500f, 0.64050536f}, {0.37097589f, 0.82469196f},
|
|
{0.63151696f, 0.82325089f}
|
|
};
|
|
std::vector<cv::Point2f> tpl;
|
|
tpl.reserve(5);
|
|
for (const auto& pt : face_template)
|
|
tpl.emplace_back(pt.x * 112.0f, pt.y * 112.0f);
|
|
return tpl;
|
|
}();
|
|
|
|
cv::Mat affineMatrix = cv::estimateAffinePartial2D(
|
|
face.polygon, kTemplate112);
|
|
|
|
if (!affineMatrix.empty()) {
|
|
auto nv12Face = m_nv12Helper.tryNV12AffineWarp(
|
|
im, inferenceGpu, affineMatrix, 112, 112,
|
|
nv12ScaleX, nv12ScaleY, _logger, "SCRFD");
|
|
if (nv12Face.succeeded) {
|
|
// Log first successful NV12 affine warp (once per instance)
|
|
static bool s_nv12AffineLogged = false;
|
|
if (!s_nv12AffineLogged) {
|
|
s_nv12AffineLogged = true;
|
|
_logger.LogInfo("ANSSCRFDFD::Inference",
|
|
"NV12 affine warp ACTIVE: face aligned from " +
|
|
std::to_string(nv12FullW) + "x" + std::to_string(nv12FullH) +
|
|
" NV12 -> 112x112 BGR (display=" +
|
|
std::to_string(im.cols) + "x" + std::to_string(im.rows) +
|
|
" scaleX=" + std::to_string(nv12ScaleX) +
|
|
" scaleY=" + std::to_string(nv12ScaleY) + ")",
|
|
__FILE__, __LINE__);
|
|
}
|
|
mask = std::move(nv12Face.alignedFaceBGR);
|
|
gpuMask = std::move(nv12Face.gpuAlignedFace);
|
|
}
|
|
}
|
|
}
|
|
|
|
// CPU fallback
|
|
if (mask.empty()) {
|
|
mask = Preprocess(im, face.polygon, im);
|
|
}
|
|
|
|
if (mask.empty()) {
|
|
_logger.LogError("ANSSCRFDFD::Inference", "Cannot get mask image", __FILE__, __LINE__);
|
|
continue;
|
|
}
|
|
|
|
// Build result object
|
|
Object result;
|
|
result.classId = 0;
|
|
result.className = "Face";
|
|
result.confidence = face.confidence;
|
|
result.cameraId = camera_id;
|
|
result.polygon = std::move(face.polygon);
|
|
result.mask = std::move(mask);
|
|
result.gpuMask = std::move(gpuMask);
|
|
|
|
if (croppedFace) {
|
|
// Adjust coordinates for border offset
|
|
const int x1_new = std::max(0, face.box.x - border);
|
|
const int y1_new = std::max(0, face.box.y - border);
|
|
const int x2_new = std::min(originalWidth, face.box.x + face.box.width - border);
|
|
const int y2_new = std::min(originalHeight, face.box.y + face.box.height - border);
|
|
|
|
result.box = cv::Rect(x1_new, y1_new,
|
|
std::max(0, x2_new - x1_new),
|
|
std::max(0, y2_new - y1_new));
|
|
|
|
result.kps.reserve(face.kps.size());
|
|
for (const auto& pt : face.kps) {
|
|
result.kps.emplace_back(pt - borderF);
|
|
}
|
|
}
|
|
else {
|
|
result.box = face.box;
|
|
result.kps = std::move(face.kps);
|
|
}
|
|
|
|
output.push_back(std::move(result));
|
|
}
|
|
|
|
return output;
|
|
}
|
|
|
|
|
|
|
|
std::vector<Object> ANSSCRFDFD::InferenceDynamic(const cv::Mat& input, const std::string& camera_id) {
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
std::vector<Object> output;
|
|
|
|
try {
|
|
if (!_licenseValid) {
|
|
_logger.LogError("ANSSCRFDFD::Inference", "Invalid license", __FILE__, __LINE__);
|
|
return output;
|
|
}
|
|
|
|
if (!_isInitialized) {
|
|
_logger.LogError("ANSSCRFDFD::Inference", "Model is not initialized", __FILE__, __LINE__);
|
|
return output;
|
|
}
|
|
|
|
if (input.empty() || input.cols < 10 || input.rows < 10) {
|
|
_logger.LogError("ANSSCRFDFD::Inference", "Invalid input image", __FILE__, __LINE__);
|
|
return output;
|
|
}
|
|
|
|
bool croppedFace = (input.cols <= 300 || input.rows <= 300);
|
|
cv::Mat im;
|
|
|
|
try {
|
|
if (croppedFace) {
|
|
cv::copyMakeBorder(input, im, 200, 200, 200, 200, cv::BORDER_REPLICATE);
|
|
}
|
|
else {
|
|
im = input.clone();
|
|
}
|
|
}
|
|
catch (const std::exception& e) {
|
|
_logger.LogError("ANSSCRFDFD::Inference", std::string("copyMakeBorder failed: ") + e.what(), __FILE__, __LINE__);
|
|
return output;
|
|
}
|
|
|
|
const int originalWidth = input.cols;
|
|
const int originalHeight = input.rows;
|
|
|
|
std::vector<ImageSection> sections = createSlideScreens(im);
|
|
int lowestPriority = getLowestPriorityRegion();
|
|
if ((_currentPriority > lowestPriority) || (_currentPriority == 0)) {
|
|
_currentPriority = getHighestPriorityRegion();
|
|
}
|
|
else {
|
|
_currentPriority++;
|
|
}
|
|
|
|
cv::Rect regionByPriority = getRegionByPriority(_currentPriority);
|
|
_detectedArea = regionByPriority;
|
|
|
|
#ifdef FNS_DEBUG
|
|
cv::Mat draw = input.clone();
|
|
cv::rectangle(draw, _detectedArea, cv::Scalar(0, 0, 255), 2);
|
|
#endif
|
|
|
|
std::vector<Object> filteredFaceObjects;
|
|
|
|
if (_detectedArea.width > 50 && _detectedArea.height > 50) {
|
|
try {
|
|
cv::Mat activeFrame = im(_detectedArea).clone();
|
|
std::vector<Object> rawDetections = Detect(activeFrame);
|
|
filteredFaceObjects = AdjustDetectedBoundingBoxes(rawDetections, _detectedArea, im.size(), 0.9);
|
|
|
|
#ifdef FNS_DEBUG
|
|
cv::imshow("Active Area", activeFrame);
|
|
cv::waitKey(1);
|
|
#endif
|
|
}
|
|
catch (const std::exception& e) {
|
|
_logger.LogError("ANSSCRFDFD::Inference", std::string("Detect() failed: ") + e.what(), __FILE__, __LINE__);
|
|
return output;
|
|
}
|
|
}
|
|
|
|
for (const auto& face : filteredFaceObjects) {
|
|
try {
|
|
if (face.confidence < _modelConfig.detectionScoreThreshold)
|
|
continue;
|
|
#ifdef FNS_DEBUG
|
|
// draw landmarks
|
|
for (cv::Point2f point : face.polygon)
|
|
{
|
|
cv::circle(draw, cv::Point(point.x + _detectedArea.x, point.y + _detectedArea.y), 2, cv::Scalar(0, 255, 0), -1);
|
|
|
|
}
|
|
#endif
|
|
if (!isValidFace(face.polygon, face.box, 27, _detectedArea.x, _detectedArea.y))
|
|
continue;
|
|
|
|
Object result;
|
|
int x_min = face.box.x;
|
|
int y_min = face.box.y;
|
|
int x_max = x_min + face.box.width;
|
|
int y_max = y_min + face.box.height;
|
|
|
|
if (croppedFace) {
|
|
x_min = std::max(0, x_min - 200);
|
|
y_min = std::max(0, y_min - 200);
|
|
x_max = std::min(originalWidth, x_max - 200);
|
|
y_max = std::min(originalHeight, y_max - 200);
|
|
}
|
|
|
|
int width_half = std::abs((x_max - x_min) / 2);
|
|
int height_half = std::abs((y_max - y_min) / 2);
|
|
int xc = x_min + width_half;
|
|
int yc = y_min + height_half;
|
|
int c = std::max(width_half, height_half);
|
|
|
|
int x1_new = std::max(0, xc - c);
|
|
int y1_new = std::max(0, yc - c);
|
|
int x2_new = std::min(originalWidth, xc + c);
|
|
int y2_new = std::min(originalHeight, yc + c);
|
|
|
|
result.classId = 0;
|
|
result.className = "Face";
|
|
result.confidence = face.confidence;
|
|
result.box = cv::Rect(x1_new, y1_new, x2_new - x1_new, y2_new - y1_new);
|
|
result.kps = face.kps;
|
|
result.cameraId = camera_id;
|
|
|
|
#ifdef FNS_DEBUG
|
|
cv::rectangle(draw, result.box, cv::Scalar(0, 0, 255), 2);
|
|
#endif
|
|
|
|
try {
|
|
result.mask = GetCroppedFaceScale(im, x1_new, y1_new, x2_new, y2_new, 112);
|
|
}
|
|
catch (const std::exception& e) {
|
|
_logger.LogError("ANSSCRFDFD::Inference", std::string("GetCroppedFaceScale failed: ") + e.what(), __FILE__, __LINE__);
|
|
continue;
|
|
}
|
|
|
|
if (!result.mask.empty()) {
|
|
output.push_back(result);
|
|
}
|
|
}
|
|
catch (const std::exception& e) {
|
|
_logger.LogError("ANSSCRFDFD::Inference", std::string("Processing one face failed: ") + e.what(), __FILE__, __LINE__);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
#ifdef FNS_DEBUG
|
|
cv::resize(draw, draw, cv::Size(1920, 1080));
|
|
cv::imshow("Detected Areas", draw);
|
|
cv::waitKey(1);
|
|
draw.release();
|
|
#endif
|
|
return output;
|
|
}
|
|
catch (const std::exception& e) {
|
|
_logger.LogFatal("ANSSCRFDFD::TensorRTInference", e.what(), __FILE__, __LINE__);
|
|
}
|
|
catch (...) {
|
|
_logger.LogFatal("ANSSCRFDFD::TensorRTInference", "Unknown exception occurred", __FILE__, __LINE__);
|
|
}
|
|
|
|
return output;
|
|
}
|
|
|
|
|
|
std::vector<Object> ANSSCRFDFD::Detect(const cv::Mat& input)
|
|
{
|
|
// Phase 1: Validation + engine dims (brief lock)
|
|
int net_h, net_w;
|
|
float imgHeight, imgWidth;
|
|
{
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
|
|
if (input.empty() || input.cols < 10 || input.rows < 10) {
|
|
this->_logger.LogError("ANSSCRFDFD::Detect", "Invalid input image", __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
|
|
if (!m_trtEngine) {
|
|
this->_logger.LogFatal("ANSSCRFDFD::Detect", "TensorRT engine not initialized", __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
|
|
imgHeight = static_cast<float>(input.rows);
|
|
imgWidth = static_cast<float>(input.cols);
|
|
|
|
// Get and validate expected input dims
|
|
auto inputDims = m_trtEngine->getInputDims();
|
|
if (inputDims.empty() || inputDims[0].nbDims < 3) {
|
|
this->_logger.LogFatal("ANSSCRFDFD::Detect", "Invalid input dimensions", __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
|
|
net_h = inputDims[0].d[1];
|
|
net_w = inputDims[0].d[2];
|
|
|
|
// Optional check against configured INPUT_H/INPUT_W
|
|
if (net_h != INPUT_H || net_w != INPUT_W) {
|
|
this->_logger.LogFatal(
|
|
"ANSSCRFDFD::Detect",
|
|
"Engine input dims mismatch with configured INPUT_H/INPUT_W",
|
|
__FILE__, __LINE__
|
|
);
|
|
return {};
|
|
}
|
|
}
|
|
|
|
// Compute scale and padding (fully local math — no lock needed)
|
|
const float w_r = static_cast<float>(net_w) / imgWidth;
|
|
const float h_r = static_cast<float>(net_h) / imgHeight;
|
|
const float r = std::min(w_r, h_r);
|
|
|
|
const int new_unpad_w = static_cast<int>(imgWidth * r);
|
|
const int new_unpad_h = static_cast<int>(imgHeight * r);
|
|
|
|
const int pad_w = net_w - new_unpad_w; // >= 0
|
|
const int pad_h = net_h - new_unpad_h; // >= 0
|
|
|
|
const int dw = pad_w / 2;
|
|
const int dh = pad_h / 2;
|
|
|
|
SCRFDScaleParams scale_params;
|
|
scale_params.ratio = r;
|
|
scale_params.dw = dw;
|
|
scale_params.dh = dh;
|
|
scale_params.flag = true;
|
|
|
|
// Phase 2: CUDA preprocessing + inference (mutex released — pool dispatches to idle GPU slot)
|
|
std::vector<std::vector<cv::cuda::GpuMat>> inputs;
|
|
bool usedNV12 = false;
|
|
try {
|
|
// Clear any sticky CUDA error from transient graph-capture failures
|
|
cudaError_t priorErr = cudaGetLastError();
|
|
if (priorErr != cudaSuccess) {
|
|
this->_logger.LogWarn(
|
|
"ANSSCRFDFD::Detect",
|
|
std::string("Cleared prior CUDA error before SCRFD preprocessing: ")
|
|
+ cudaGetErrorString(priorErr),
|
|
__FILE__, __LINE__);
|
|
}
|
|
|
|
// Try NV12 fast path first (fused NV12→RGB + center-padded letterbox)
|
|
const int inferenceGpu = m_trtEngine ? m_trtEngine->getPreferredDeviceIndex() : 0;
|
|
auto nv12 = m_nv12Helper.tryNV12(input, inferenceGpu, net_w, net_h,
|
|
NV12PreprocessHelper::scrfdCenterLetterboxLauncher(dw, dh),
|
|
_logger, "SCRFD");
|
|
|
|
if (nv12.succeeded) {
|
|
inputs = {{ std::move(nv12.gpuRGB) }};
|
|
usedNV12 = true;
|
|
}
|
|
else if (nv12.useBgrFullRes) {
|
|
// BGR full-res path — preprocess the full-res image instead
|
|
// (fall through to standard BGR path with nv12.bgrFullResImg)
|
|
// For simplicity, use the standard BGR path below with the original input
|
|
}
|
|
|
|
if (!usedNV12) {
|
|
// Standard BGR upload + resize + center-pad path
|
|
cv::cuda::Stream stream;
|
|
cv::cuda::GpuMat d_bgr;
|
|
|
|
if (input.channels() == 1) {
|
|
cv::Mat img3Channel;
|
|
cv::cvtColor(input, img3Channel, cv::COLOR_GRAY2BGR);
|
|
d_bgr.upload(img3Channel, stream);
|
|
}
|
|
else if (input.channels() == 3) {
|
|
d_bgr.upload(input, stream);
|
|
}
|
|
else {
|
|
this->_logger.LogError("ANSSCRFDFD::Detect", "Unsupported channel count", __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
|
|
cv::cuda::GpuMat d_rgb;
|
|
cv::cuda::GpuMat d_resized;
|
|
cv::cuda::GpuMat d_padded;
|
|
|
|
cv::cuda::cvtColor(d_bgr, d_rgb, cv::COLOR_BGR2RGB, 0, stream);
|
|
cv::cuda::resize(d_rgb, d_resized, cv::Size(new_unpad_w, new_unpad_h), 0, 0, cv::INTER_LINEAR, stream);
|
|
|
|
d_padded.create(net_h, net_w, d_resized.type());
|
|
d_padded.setTo(cv::Scalar(0, 0, 0), stream);
|
|
|
|
cv::Rect roi(dw, dh, new_unpad_w, new_unpad_h > 0 ? new_unpad_h : 0);
|
|
roi.width = new_unpad_w;
|
|
roi.height = new_unpad_h;
|
|
d_resized.copyTo(d_padded(roi), stream);
|
|
|
|
stream.waitForCompletion();
|
|
|
|
std::vector<cv::cuda::GpuMat> inputVec;
|
|
inputVec.emplace_back(std::move(d_padded));
|
|
inputs.emplace_back(std::move(inputVec));
|
|
}
|
|
m_nv12Helper.tickInference();
|
|
}
|
|
catch (const std::exception& e) {
|
|
this->_logger.LogError(
|
|
"ANSSCRFDFD::Detect",
|
|
std::string("CUDA preprocessing failed: ") + e.what(),
|
|
__FILE__, __LINE__
|
|
);
|
|
return {};
|
|
}
|
|
|
|
std::vector<std::vector<std::vector<float>>> featureVectors;
|
|
try {
|
|
if (!m_trtEngine->runInference(inputs, featureVectors)) {
|
|
this->_logger.LogFatal("ANSSCRFDFD::Detect", "Inference failed", __FILE__, __LINE__);
|
|
return {};
|
|
}
|
|
}
|
|
catch (const std::exception& e) {
|
|
this->_logger.LogFatal(
|
|
"ANSSCRFDFD::Detect",
|
|
std::string("runInference exception: ") + e.what(),
|
|
__FILE__, __LINE__
|
|
);
|
|
return {};
|
|
}
|
|
|
|
// Phase 3: Postprocessing (brief lock — generate_bboxes_kps uses center_points)
|
|
std::vector<Object> filteredFaceObjects;
|
|
{
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
try {
|
|
std::vector<Object> proposedFaceObjects;
|
|
this->generate_bboxes_kps(
|
|
scale_params,
|
|
proposedFaceObjects,
|
|
featureVectors[0],
|
|
_modelConfig.detectionScoreThreshold,
|
|
imgHeight,
|
|
imgWidth
|
|
);
|
|
this->nms_bboxes_kps(
|
|
proposedFaceObjects,
|
|
filteredFaceObjects,
|
|
_modelConfig.modelMNSThreshold,
|
|
400
|
|
);
|
|
}
|
|
catch (const std::exception& e) {
|
|
this->_logger.LogError(
|
|
"ANSSCRFDFD::Detect",
|
|
std::string("Post-processing failed: ") + e.what(),
|
|
__FILE__, __LINE__
|
|
);
|
|
return {};
|
|
}
|
|
}
|
|
|
|
return filteredFaceObjects;
|
|
}
|
|
|
|
std::vector<Object> ANSSCRFDFD::TensorRTInferene(const cv::Mat& inputImage, const std::string& camera_id, bool useDynamicImage) {
|
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
|
std::vector<Object> output;
|
|
output.clear();
|
|
if (!_licenseValid) {
|
|
this->_logger.LogError("ANSSCRFDFD::TensorRTInferene", "Invalid license", __FILE__, __LINE__);
|
|
return output;
|
|
}
|
|
if (!_isInitialized) {
|
|
this->_logger.LogError("ANSSCRFDFD::TensorRTInferene", "Model is not initialized", __FILE__, __LINE__);
|
|
return output;
|
|
}
|
|
try
|
|
{
|
|
//0. Resize image
|
|
if (inputImage.empty()) return output;
|
|
if ((inputImage.cols < 10) || (inputImage.rows < 10)) return output;
|
|
bool croppedFace = false; // Check if the image is cropped face image
|
|
cv::Mat im = inputImage.clone();
|
|
int orginalHeight = im.rows;
|
|
int orginalWidth = im.cols;
|
|
if ((inputImage.size[0] <= 300) || (inputImage.size[1] <= 300)) croppedFace = true;
|
|
if (croppedFace) cv::copyMakeBorder(inputImage, im, 200, 200, 200, 200, cv::BORDER_REPLICATE);
|
|
std::vector<cv::Rect> activeROIs;
|
|
if (useDynamicImage) {
|
|
std::vector<Object> movementResults = DetectMovement(im, camera_id);
|
|
std::vector<Object> movementObjects;
|
|
if ((!movementResults.empty()) && ((movementResults.size() < 12)))
|
|
{
|
|
movementObjects.insert(movementObjects.end(), movementResults.begin(), movementResults.end());
|
|
if (!_movementObjects.empty())movementObjects.insert(movementObjects.end(), _movementObjects.begin(), _movementObjects.end());
|
|
}
|
|
else {
|
|
if (!_movementObjects.empty())movementObjects.insert(movementObjects.end(), _movementObjects.begin(), _movementObjects.end());
|
|
}
|
|
activeROIs.clear();
|
|
if (!movementObjects.empty()) {
|
|
std::vector<cv::Rect> localActiveROIs = GenerateFixedROIs(movementObjects, _modelConfig.inpHeight, _modelConfig.inpWidth, im.cols, im.rows);
|
|
activeROIs.insert(activeROIs.end(), localActiveROIs.begin(), localActiveROIs.end());
|
|
}
|
|
else {
|
|
activeROIs.push_back(cv::Rect(0, 0, im.cols, im.rows));// Use the orginal image
|
|
}
|
|
if ((activeROIs.size() <= 0) ||
|
|
(activeROIs.empty()))
|
|
{
|
|
return output;
|
|
}
|
|
UpdateAndFilterDetectionObjects(_movementObjects, 80);
|
|
}
|
|
else {
|
|
activeROIs.push_back(cv::Rect(0, 0, im.cols, im.rows));// Use the orginal image
|
|
}
|
|
|
|
#ifdef FACEDEBUG
|
|
cv::Mat draw = im.clone();
|
|
for (int i = 0; i < movementObjects.size(); i++) {
|
|
cv::rectangle(draw, movementObjects[i].box, cv::Scalar(0, 255, 255), 2); // RED for detectedArea
|
|
}
|
|
for (int i = 0; i < activeROIs.size(); i++) {
|
|
cv::rectangle(draw, activeROIs[i], cv::Scalar(0, 0, 255), 2); // RED for detectedArea
|
|
}
|
|
#endif
|
|
for (int j = 0; j < activeROIs.size(); j++) {
|
|
cv::Rect activeROI = activeROIs[j];
|
|
activeROI.x = std::max(0, activeROI.x);
|
|
activeROI.y = std::max(0, activeROI.y);
|
|
activeROI.width = std::min(im.cols, activeROI.width);
|
|
activeROI.height = std::min(im.rows, activeROI.height);
|
|
cv::Mat frame = im(activeROI).clone();
|
|
std::vector<Object> filteredFaceObjects = Detect(frame);
|
|
// 5. Return the detected objects
|
|
for (int i = 0; i < filteredFaceObjects.size(); i++)
|
|
{
|
|
if (filteredFaceObjects[i].confidence > _modelConfig.detectionScoreThreshold) {
|
|
#ifdef FACEDEBUG
|
|
cv::Rect faceRect;
|
|
faceRect.x = filteredFaceObjects[i].box.x + activeROI.x;
|
|
faceRect.y = filteredFaceObjects[i].box.y + activeROI.y;
|
|
faceRect.width = filteredFaceObjects[i].box.width;
|
|
faceRect.height = filteredFaceObjects[i].box.height;
|
|
cv::rectangle(draw, faceRect, cv::Scalar(225, 255, 0), 2); // RED for detectedArea
|
|
#endif
|
|
// Check if the face is valid
|
|
if (isValidFace(filteredFaceObjects[i].polygon, filteredFaceObjects[i].box), 27)
|
|
{
|
|
Object result;
|
|
// 0. Get the face bounding box
|
|
int x_min = filteredFaceObjects[i].box.x + activeROI.x;
|
|
int y_min = filteredFaceObjects[i].box.y + activeROI.y;
|
|
int x_max = filteredFaceObjects[i].box.width + filteredFaceObjects[i].box.x + activeROI.x;
|
|
int y_max = filteredFaceObjects[i].box.height + filteredFaceObjects[i].box.y + activeROI.y;
|
|
#ifdef FACEDEBUG
|
|
// draw landmarks
|
|
for (cv::Point2f point : filteredFaceObjects[i].polygon)
|
|
{
|
|
cv::circle(draw, cv::Point(point.x + activeROI.x, point.y + activeROI.y), 2, cv::Scalar(0, 255, 0), -1);
|
|
}
|
|
#endif
|
|
if (croppedFace)
|
|
{
|
|
x_min = std::max(0, x_min - 200);
|
|
y_min = std::max(0, y_min - 200);
|
|
x_max = std::min(orginalWidth, x_max - 200);
|
|
y_max = std::min(orginalHeight, y_max - 200);
|
|
}
|
|
// 1. Calculate the centered coordinates and dimensions
|
|
int width_half = abs((x_max - x_min) / 2);
|
|
int height_half = abs((y_max - y_min) / 2);
|
|
int xc = x_min + width_half;
|
|
int yc = y_min + height_half;
|
|
int c = std::max(width_half, height_half);
|
|
|
|
// 2. Calculate the new bounding box coordinates (square with center at xc, yc)
|
|
int x1_new = std::max(0, xc - c);
|
|
int y1_new = std::max(0, yc - c);
|
|
int x2_new = std::min(orginalWidth, xc + c);
|
|
int y2_new = std::min(orginalHeight, yc + c);
|
|
|
|
// 3. Update the bounding box coordinates
|
|
result.classId = 0;
|
|
result.className = "Face";
|
|
result.confidence = filteredFaceObjects[i].confidence;
|
|
result.box.x = x1_new;
|
|
result.box.y = y1_new;
|
|
result.box.width = x2_new - x1_new;
|
|
result.box.height = y2_new - y1_new;
|
|
//result.polygon = ANSUtilityHelper::RectToNormalizedPolygon(result.box, inputImage.cols, inputImage.rows);
|
|
result.mask = GetCroppedFaceScale(inputImage, x1_new, y1_new, x2_new, y2_new, 112);
|
|
result.kps = filteredFaceObjects[i].kps; // landmarks as array of x,y,x,y...
|
|
result.cameraId = camera_id;
|
|
if (!result.mask.empty())
|
|
{
|
|
output.push_back(result);
|
|
if (useDynamicImage) {
|
|
//// Check if movement object contain results before adding to movement objects
|
|
result.extraInfo = "0";
|
|
// Find if obj already exists in detectionObjects using ContainsIntersectingObject
|
|
auto it = std::find_if(_movementObjects.begin(), _movementObjects.end(),
|
|
[&](Object& existingObj) {
|
|
return ContainsIntersectingObject(_movementObjects, result);
|
|
});
|
|
|
|
if (it != _movementObjects.end()) {
|
|
*it = result; // Replace existing object with the new one
|
|
}
|
|
else {
|
|
// If not found, add the new object to the list
|
|
_movementObjects.push_back(result);
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
frame.release();
|
|
}
|
|
im.release();
|
|
#ifdef FACEDEBUG
|
|
cv::imshow("Combined Detected Areas", draw);// Debugging: Diplsay the frame with the combined detected areas
|
|
cv::waitKey(1);// Debugging: Diplsay the frame with the combined detected areas
|
|
draw.release();// Debugging: Diplsay the frame with the combined detected areas
|
|
#endif
|
|
return output;
|
|
}
|
|
catch (std::exception& e)
|
|
{
|
|
this->_logger.LogFatal("ANSSCRFDFD::TensorRTInferene", e.what(), __FILE__, __LINE__);
|
|
return output;
|
|
}
|
|
}
|
|
|
|
ANSSCRFDFD::~ANSSCRFDFD() {
|
|
try {
|
|
Destroy();
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("ANSSCRFDFD::Destroy", e.what(), __FILE__, __LINE__);
|
|
}
|
|
}
|
|
bool ANSSCRFDFD::Destroy() {
|
|
try {
|
|
_isInitialized = false;
|
|
_licenseValid = false;
|
|
_modelFilePath.clear();
|
|
m_nv12Helper.destroy();
|
|
if (m_usingSharedPool) {
|
|
EnginePoolManager<float>::instance().release(m_poolKey);
|
|
m_trtEngine.reset();
|
|
m_usingSharedPool = false;
|
|
}
|
|
else if (m_trtEngine) {
|
|
m_trtEngine.reset();
|
|
}
|
|
return true;
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("ANSSCRFDFD::Destroy", e.what(), __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// SCRFDFD implementation (private)
|
|
void ANSSCRFDFD::resize_unscale(const cv::Mat& mat, cv::Mat& mat_rs,
|
|
int target_height, int target_width,
|
|
SCRFDScaleParams& scale_params)
|
|
{
|
|
if (mat.empty()) return;
|
|
int img_height = static_cast<int>(mat.rows);
|
|
int img_width = static_cast<int>(mat.cols);
|
|
|
|
mat_rs = cv::Mat(target_height, target_width, CV_8UC3,
|
|
cv::Scalar(0, 0, 0));
|
|
// scale ratio (new / old) new_shape(h,w)
|
|
float w_r = (float)target_width / (float)img_width;
|
|
float h_r = (float)target_height / (float)img_height;
|
|
float r = std::min(w_r, h_r);
|
|
|
|
// compute padding
|
|
int new_unpad_w = static_cast<int>((float)img_width * r); // floor
|
|
int new_unpad_h = static_cast<int>((float)img_height * r); // floor
|
|
int pad_w = target_width - new_unpad_w; // >=0
|
|
int pad_h = target_height - new_unpad_h; // >=0
|
|
|
|
int dw = pad_w / 2;
|
|
int dh = pad_h / 2;
|
|
|
|
// resize with unscaling
|
|
cv::Mat new_unpad_mat;
|
|
// cv::Mat new_unpad_mat = mat.clone(); // may not need clone.
|
|
cv::resize(mat, new_unpad_mat, cv::Size(new_unpad_w, new_unpad_h));
|
|
new_unpad_mat.copyTo(mat_rs(cv::Rect(dw, dh, new_unpad_w, new_unpad_h)));
|
|
|
|
// record scale params.
|
|
scale_params.ratio = r;
|
|
scale_params.dw = dw;
|
|
scale_params.dh = dh;
|
|
scale_params.flag = true;
|
|
}
|
|
void ANSSCRFDFD::generate_points(const int target_height, const int target_width)
|
|
{
|
|
if (center_points_is_update) return;
|
|
// 8, 16, 32
|
|
for (auto stride : feat_stride_fpn)
|
|
{
|
|
unsigned int num_grid_w = target_width / stride;
|
|
unsigned int num_grid_h = target_height / stride;
|
|
// y
|
|
for (unsigned int i = 0; i < num_grid_h; ++i)
|
|
{
|
|
// x
|
|
for (unsigned int j = 0; j < num_grid_w; ++j)
|
|
{
|
|
// num_anchors, col major
|
|
for (unsigned int k = 0; k < num_anchors; ++k)
|
|
{
|
|
SCRFDPoint point;
|
|
point.cx = (float)j;
|
|
point.cy = (float)i;
|
|
point.stride = (float)stride;
|
|
center_points[stride].push_back(point);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
center_points_is_update = true;
|
|
}
|
|
|
|
void ANSSCRFDFD::generate_bboxes_kps(const SCRFDScaleParams& scale_params,
|
|
std::vector<Object>& bbox_kps_collection,
|
|
std::vector<std::vector<float>>& output_tensors,
|
|
float score_threshold,
|
|
float img_height,
|
|
float img_width)
|
|
{
|
|
// score_8,score_16,score_32,bbox_8,bbox_16,bbox_32
|
|
std::vector<float> score_8 = output_tensors.at(0); // e.g [1,12800,1]
|
|
std::vector<float> score_16 = output_tensors.at(1); // e.g [1,3200,1]
|
|
std::vector<float> score_32 = output_tensors.at(2); // e.g [1,800,1]
|
|
std::vector<float> bbox_8 = output_tensors.at(3); // e.g [1,12800,4]
|
|
std::vector<float> bbox_16 = output_tensors.at(4); // e.g [1,3200,4]
|
|
std::vector<float> bbox_32 = output_tensors.at(5); // e.g [1,800,4]
|
|
// generate center points.
|
|
const float input_height = INPUT_H;// static_cast<float>(input_node_dims.at(2)); // e.g 640
|
|
const float input_width = INPUT_W;// static_cast<float>(input_node_dims.at(3)); // e.g 640
|
|
this->generate_points(input_height, input_width);
|
|
bbox_kps_collection.clear();
|
|
if (use_kps)
|
|
{
|
|
std::vector<float> kps_8 = output_tensors.at(6); // e.g [1,12800,10]
|
|
std::vector<float> kps_16 = output_tensors.at(7); // e.g [1,3200,10]
|
|
std::vector<float> kps_32 = output_tensors.at(8); // e.g [1,800,10]
|
|
|
|
// level 8 & 16 & 32 with kps
|
|
this->generate_bboxes_kps_single_stride(scale_params, score_8, bbox_8, kps_8, 8, score_threshold,
|
|
img_height, img_width, bbox_kps_collection);
|
|
this->generate_bboxes_kps_single_stride(scale_params, score_16, bbox_16, kps_16, 16, score_threshold,
|
|
img_height, img_width, bbox_kps_collection);
|
|
this->generate_bboxes_kps_single_stride(scale_params, score_32, bbox_32, kps_32, 32, score_threshold,
|
|
img_height, img_width, bbox_kps_collection);
|
|
} // no kps
|
|
else
|
|
{
|
|
// level 8 & 16 & 32
|
|
this->generate_bboxes_single_stride(scale_params, score_8, bbox_8, 8, score_threshold,
|
|
img_height, img_width, bbox_kps_collection);
|
|
this->generate_bboxes_single_stride(scale_params, score_16, bbox_16, 16, score_threshold,
|
|
img_height, img_width, bbox_kps_collection);
|
|
this->generate_bboxes_single_stride(scale_params, score_32, bbox_32, 32, score_threshold,
|
|
img_height, img_width, bbox_kps_collection);
|
|
}
|
|
}
|
|
void ANSSCRFDFD::generate_bboxes_single_stride(
|
|
const SCRFDScaleParams& scale_params, std::vector<float>& score_pred, std::vector<float>& bbox_pred,
|
|
unsigned int stride, float score_threshold, float img_height, float img_width,
|
|
std::vector<Object>& bbox_kps_collection)
|
|
{
|
|
unsigned int nms_pre_ = (stride / 8) * nms_pre; // 1 * 1000,2*1000,...
|
|
nms_pre_ = nms_pre_ >= nms_pre ? nms_pre_ : nms_pre;
|
|
|
|
const unsigned int num_points = score_pred.size();// stride_dims.at(1); // 12800
|
|
const float* score_ptr = score_pred.data(); // [1,12800,1]
|
|
const float* bbox_ptr = bbox_pred.data(); // [1,12800,4]
|
|
|
|
float ratio = scale_params.ratio;
|
|
int dw = scale_params.dw;
|
|
int dh = scale_params.dh;
|
|
|
|
unsigned int count = 0;
|
|
auto& stride_points = center_points[stride];
|
|
|
|
for (unsigned int i = 0; i < num_points; ++i)
|
|
{
|
|
const float cls_conf = score_ptr[i];
|
|
if (cls_conf < score_threshold) continue; // filter
|
|
auto& point = stride_points.at(i);
|
|
const float cx = point.cx; // cx
|
|
const float cy = point.cy; // cy
|
|
const float s = point.stride; // stride
|
|
|
|
// bbox
|
|
const float* offsets = bbox_ptr + i * 4;
|
|
float l = offsets[0]; // left
|
|
float t = offsets[1]; // top
|
|
float r = offsets[2]; // right
|
|
float b = offsets[3]; // bottom
|
|
|
|
Object box_kps;
|
|
float x1 = ((cx - l) * s - (float)dw) / ratio; // cx - l x1
|
|
float y1 = ((cy - t) * s - (float)dh) / ratio; // cy - t y1
|
|
float x2 = ((cx + r) * s - (float)dw) / ratio; // cx + r x2
|
|
float y2 = ((cy + b) * s - (float)dh) / ratio; // cy + b y2
|
|
box_kps.box.x = std::max(0.f, x1);
|
|
box_kps.box.y = std::max(0.f, y1);
|
|
box_kps.box.width = std::min(img_width - 1.f, x2 - x1);
|
|
box_kps.box.height = std::min(img_height - 1.f, y2 - y1);
|
|
box_kps.confidence = cls_conf;
|
|
box_kps.classId = 0;
|
|
box_kps.className = "face";
|
|
|
|
bbox_kps_collection.push_back(box_kps);
|
|
|
|
count += 1; // limit boxes for nms.
|
|
if (count > max_nms)
|
|
break;
|
|
}
|
|
|
|
if (bbox_kps_collection.size() > nms_pre_)
|
|
{
|
|
std::sort(
|
|
bbox_kps_collection.begin(), bbox_kps_collection.end(),
|
|
[](const Object& a, const Object& b)
|
|
{ return a.confidence > b.confidence; }
|
|
); // sort inplace
|
|
// trunc
|
|
bbox_kps_collection.resize(nms_pre_);
|
|
}
|
|
}
|
|
|
|
void ANSSCRFDFD::generate_bboxes_kps_single_stride(
|
|
const SCRFDScaleParams& scale_params, std::vector<float>& score_pred, std::vector<float>& bbox_pred,
|
|
std::vector<float>& kps_pred, unsigned int stride, float score_threshold, float img_height,
|
|
float img_width, std::vector<Object>& bbox_kps_collection)
|
|
{
|
|
unsigned int nms_pre_ = (stride / 8) * nms_pre; // 1 * 1000,2*1000,...
|
|
nms_pre_ = nms_pre_ >= nms_pre ? nms_pre_ : nms_pre;
|
|
|
|
const unsigned int num_points = score_pred.size(); // 12800
|
|
const float* score_ptr = score_pred.data(); // [1,12800,1]
|
|
const float* bbox_ptr = bbox_pred.data(); // [1,12800,4]
|
|
const float* kps_ptr = kps_pred.data(); // [1,12800,10]
|
|
|
|
float ratio = scale_params.ratio;
|
|
int dw = scale_params.dw;
|
|
int dh = scale_params.dh;
|
|
|
|
unsigned int count = 0;
|
|
auto& stride_points = center_points[stride];
|
|
|
|
for (unsigned int i = 0; i < num_points; ++i)
|
|
{
|
|
const float cls_conf = score_ptr[i];
|
|
if (cls_conf < score_threshold) continue; // filter
|
|
auto& point = stride_points.at(i);
|
|
const float cx = point.cx; // cx
|
|
const float cy = point.cy; // cy
|
|
const float s = point.stride; // stride
|
|
|
|
// bbox
|
|
const float* offsets = bbox_ptr + i * 4;
|
|
float l = offsets[0]; // left
|
|
float t = offsets[1]; // top
|
|
float r = offsets[2]; // right
|
|
float b = offsets[3]; // bottom
|
|
|
|
Object box_kps;
|
|
float x1 = ((cx - l) * s - (float)dw) / ratio; // cx - l x1
|
|
float y1 = ((cy - t) * s - (float)dh) / ratio; // cy - t y1
|
|
float x2 = ((cx + r) * s - (float)dw) / ratio; // cx + r x2
|
|
float y2 = ((cy + b) * s - (float)dh) / ratio; // cy + b y2
|
|
|
|
box_kps.box.x = (int)std::max(0.f, x1);
|
|
box_kps.box.y = (int)std::max(0.f, y1);
|
|
box_kps.box.width = (int)std::min(img_width - 1.f, x2 - x1);
|
|
box_kps.box.height = (int)std::min(img_height - 1.f, y2 - y1);
|
|
|
|
box_kps.confidence = cls_conf;
|
|
box_kps.classId = 0;
|
|
box_kps.className = "face";
|
|
|
|
// landmarks
|
|
const float* kps_offsets = kps_ptr + i * 10;
|
|
for (unsigned int j = 0; j < 10; j += 2)
|
|
{
|
|
cv::Point2f kps;
|
|
float kps_l = kps_offsets[j];
|
|
float kps_t = kps_offsets[j + 1];
|
|
float kps_x = ((cx + kps_l) * s - (float)dw) / ratio; // cx + l x
|
|
float kps_y = ((cy + kps_t) * s - (float)dh) / ratio; // cy + t y
|
|
kps.x = std::min(std::max(0.f, kps_x), img_width - 1.f);
|
|
kps.y = std::min(std::max(0.f, kps_y), img_height - 1.f);
|
|
box_kps.kps.push_back(kps.x);
|
|
box_kps.kps.push_back(kps.y);
|
|
box_kps.polygon.push_back(kps); // landmarks as polygon
|
|
}
|
|
bbox_kps_collection.push_back(box_kps);
|
|
count += 1; // limit boxes for nms.
|
|
if (count > max_nms)
|
|
break;
|
|
}
|
|
|
|
if (bbox_kps_collection.size() > nms_pre_)
|
|
{
|
|
std::sort(
|
|
bbox_kps_collection.begin(), bbox_kps_collection.end(),
|
|
[](const Object& a, const Object& b)
|
|
{ return a.confidence > b.confidence; }
|
|
); // sort inplace
|
|
// trunc
|
|
bbox_kps_collection.resize(nms_pre_);
|
|
}
|
|
|
|
}
|
|
float ANSSCRFDFD::getIouOfObjects(const Object& a, const Object& b) {
|
|
// Retrieve the bounding boxes
|
|
const cv::Rect& boxA = a.box;
|
|
const cv::Rect& boxB = b.box;
|
|
|
|
// Compute the coordinates of the intersection rectangle
|
|
int inner_x1 = std::max(boxA.x, boxB.x);
|
|
int inner_y1 = std::max(boxA.y, boxB.y);
|
|
int inner_x2 = std::min(boxA.x + boxA.width, boxB.x + boxB.width);
|
|
int inner_y2 = std::min(boxA.y + boxA.height, boxB.y + boxB.height);
|
|
|
|
// Calculate width and height of the intersection
|
|
int inner_w = inner_x2 - inner_x1;
|
|
int inner_h = inner_y2 - inner_y1;
|
|
|
|
// If there's no overlap, return 0
|
|
if (inner_w <= 0 || inner_h <= 0) {
|
|
return 0.0f;
|
|
}
|
|
|
|
// Calculate the area of the intersection
|
|
float inner_area = static_cast<float>(inner_w * inner_h);
|
|
|
|
// Calculate the areas of the two boxes
|
|
float areaA = static_cast<float>(boxA.width * boxA.height);
|
|
float areaB = static_cast<float>(boxB.width * boxB.height);
|
|
|
|
// Calculate the union area
|
|
float union_area = areaA + areaB - inner_area;
|
|
|
|
// Avoid division by zero and return IoU
|
|
if (union_area <= 0.0f) {
|
|
return 0.0f;
|
|
}
|
|
return inner_area / union_area;
|
|
}
|
|
|
|
void ANSSCRFDFD::nms_bboxes_kps(std::vector<Object>& input,
|
|
std::vector<Object>& output,
|
|
float iou_threshold, unsigned int topk)
|
|
{
|
|
if (input.empty()) return;
|
|
std::sort(
|
|
input.begin(), input.end(),
|
|
[](const Object& a, const Object& b)
|
|
{ return a.confidence > b.confidence; }
|
|
);
|
|
const unsigned int box_num = input.size();
|
|
std::vector<int> merged(box_num, 0);
|
|
|
|
unsigned int count = 0;
|
|
for (unsigned int i = 0; i < box_num; ++i)
|
|
{
|
|
if (merged[i]) continue;
|
|
std::vector<Object> buf;
|
|
|
|
buf.push_back(input[i]);
|
|
merged[i] = 1;
|
|
|
|
for (unsigned int j = i + 1; j < box_num; ++j)
|
|
{
|
|
if (merged[j]) continue;
|
|
|
|
float iou = getIouOfObjects(input[i], input[j]); //static_cast<float>(input[i].box.iou_of(input[j].box));
|
|
|
|
if (iou > iou_threshold)
|
|
{
|
|
merged[j] = 1;
|
|
buf.push_back(input[j]);
|
|
}
|
|
}
|
|
output.push_back(buf[0]);
|
|
|
|
// keep top k
|
|
count += 1;
|
|
if (count >= topk)
|
|
break;
|
|
}
|
|
}
|
|
|
|
}
|