Files
ANSCORE/modules/ANSFR/ANSFaceRecognizer.cpp

2177 lines
72 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include "ANSFaceRecognizer.h"
#include "engine.h"
//#define DEBUG_TIME
#ifdef DEBUG_TIME
#include <chrono>
#include <iostream>
#define FR_START_TIMER(name) auto timer_##name = std::chrono::high_resolution_clock::now()
#define FR_END_TIMER(name, label) \
do { \
auto timer_##name##_end = std::chrono::high_resolution_clock::now(); \
auto duration_us = std::chrono::duration_cast<std::chrono::microseconds>(timer_##name##_end - timer_##name).count(); \
std::cout << " [FR] " << label << ": " << (duration_us / 1000.0) << " ms" << std::endl; \
} while(0)
#else
#define FR_START_TIMER(name)
#define FR_END_TIMER(name, label)
#endif
namespace ANSCENTER {
std::string ANSFaceRecognizer::GetOpenVINODevice() {
ov::Core core;
std::vector<std::string> available_devices = core.get_available_devices();
ANS_DBG("FaceRecognizer", "OpenVINO available devices: %zu", available_devices.size());
for (const auto& d : available_devices) {
ANS_DBG("FaceRecognizer", " OpenVINO device: %s", d.c_str());
}
std::vector<std::string> priority_devices = { "GPU", "CPU" };
for (const auto& device : priority_devices) {
if (std::find(available_devices.begin(), available_devices.end(), device) != available_devices.end()) {
ANS_DBG("FaceRecognizer", "OpenVINO selected device: %s", device.c_str());
return device;
}
}
ANS_DBG("FaceRecognizer", "OpenVINO fallback to CPU");
return "CPU";
}
bool ANSFaceRecognizer::Initialize(std::string licenseKey,
ModelConfig modelConfig,
const std::string& modelZipFilePath,
const std::string& modelZipPassword,
std::string& labelMap)
{
ANS_DBG("FaceRecognizer", "Initialize: modelZip=%s", modelZipFilePath.c_str());
bool result = ANSFRBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
if (!result) {
ANS_DBG("FaceRecognizer", "ANSFRBase::Initialize FAILED");
return false;
}
#ifdef CPU_MODE
engineType = EngineType::CPU;
ANS_DBG("FaceRecognizer", "CPU_MODE forced: engineType=CPU");
#else
engineType = ANSLicenseHelper::CheckHardwareInformation();
ANS_DBG("FaceRecognizer", "HW detection: engineType=%d", static_cast<int>(engineType));
#endif
try {
_modelConfig = modelConfig;
_modelConfig.modelType = ModelType::FACERECOGNIZE;
_modelConfig.detectionType = DetectionType::FACERECOGNIZER;
m_knownPersonThresh = _modelConfig.unknownPersonThreshold;
if (m_knownPersonThresh == 0.0f) m_knownPersonThresh = 0.35f;
ANS_DBG("FaceRecognizer", "engineType=%d (NVIDIA=%d, OPENVINO=%d, AMD=%d, CPU=%d)",
static_cast<int>(engineType),
static_cast<int>(EngineType::NVIDIA_GPU),
static_cast<int>(EngineType::OPENVINO_GPU),
static_cast<int>(EngineType::AMD_GPU),
static_cast<int>(EngineType::CPU));
if (engineType == EngineType::NVIDIA_GPU) {
// 1. Load ONNX model
std::string onnxfile = CreateFilePath(_modelFolder, "ansfacerecognizer.onnx");
if (std::filesystem::exists(onnxfile)) {
_modelFilePath = onnxfile;
_logger.LogDebug("ANSFaceRecognizer::Initialize. Loading ansfacerecognizer weight",
_modelFilePath, __FILE__, __LINE__);
}
else {
_logger.LogError("ANSFaceRecognizer::Initialize. Model ansfacerecognizer.onnx does not exist",
_modelFilePath, __FILE__, __LINE__);
return false;
}
if (!m_trtEngine) {
// Enable batch support
m_options.optBatchSize = 8;
m_options.maxBatchSize = 32;
m_options.maxInputHeight = GPU_FACE_HEIGHT;
m_options.minInputHeight = GPU_FACE_HEIGHT;
m_options.optInputHeight = GPU_FACE_HEIGHT;
m_options.maxInputWidth = GPU_FACE_WIDTH;
m_options.minInputWidth = GPU_FACE_WIDTH;
m_options.optInputWidth = GPU_FACE_WIDTH;
m_options.calibrationBatchSize = 8;
m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
m_options.engineFileDir = _modelFolder;
m_options.precision = ANSCENTER::Precision::FP16;
m_poolKey = { _modelFilePath,
static_cast<int>(m_options.precision),
m_options.maxBatchSize };
m_trtEngine = EnginePoolManager<float>::instance().acquire(
m_poolKey, m_options, _modelFilePath,
SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu);
m_usingSharedPool = (m_trtEngine != nullptr);
if (!m_trtEngine) {
_logger.LogError("ANSFaceRecognizer::Initialize. Unable to build/load TensorRT engine.",
_modelFilePath, __FILE__, __LINE__);
return false;
}
// Create CUDA stream for GPU preprocessing (lazy init)
if (!m_gpuStream) {
m_gpuStream = std::make_unique<cv::cuda::Stream>();
}
}
}
else {
#ifdef USE_ONNX_ENGINE
std::string faceidModel = CreateFilePath(_modelFolder, "ansfacerecognizer.onnx");
if (std::filesystem::exists(faceidModel)) {
_modelFilePath = faceidModel;
_logger.LogDebug("ANSFaceRecognizer::Initialize. Loading ANSFaceRecognizer weight",
_modelFilePath, __FILE__, __LINE__);
}
else {
_logger.LogError("ANSFaceRecognizer::Initialize. Model ansfacerecognizer.onnx does not exist",
_modelFilePath, __FILE__, __LINE__);
return false;
}
ANS_DBG("FaceRecognizer", "Creating GlintArcFace with engineType=%d model=%s",
static_cast<int>(engineType), faceidModel.c_str());
faceRecognizer = std::make_unique<GlintArcFace>(faceidModel, engineType);
if (!faceRecognizer) {
_logger.LogFatal("ANSFaceRecognizer::Initialize",
"Failed to initialize ONNX face recognizer", __FILE__, __LINE__);
ANS_DBG("FaceRecognizer", "FAILED: GlintArcFace returned null");
return false;
}
#else
std::string faceidModel = CreateFilePath(_modelFolder, "ansfacenet.xml");
if (std::filesystem::exists(faceidModel)) {
_modelFilePath = faceidModel;
_logger.LogDebug("ANSFaceRecognizer::Initialize. Loading ANSFaceRecognizer weight",
_modelFilePath, __FILE__, __LINE__);
}
else {
_logger.LogError("ANSFaceRecognizer::Initialize. Model ansfacenet.xml does not exist",
_modelFilePath, __FILE__, __LINE__);
return false;
}
std::string deviceName = GetOpenVINODevice();
ov::Core core;
CnnConfig reid_config(_modelFilePath, "Face Re-Identification");
reid_config.m_deviceName = deviceName;
reid_config.m_max_batch_size = 1; // can be increased if you want batching here too
reid_config.m_core = core;
faceRecognizer = std::make_unique<VectorCNN>(reid_config);
if (!faceRecognizer) {
_logger.LogFatal("ANSFaceRecognizer::Initialize",
"Failed to initialize OpenVINO face recognizer", __FILE__, __LINE__);
return false;
}
#endif
}
Init();
_isInitialized = true;
return true;
}
catch (const std::exception& e) {
ANS_DBG("FaceRecognizer", "Initialize EXCEPTION: %s", e.what());
_logger.LogFatal("ANSFaceRecognizer::Initialize", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSFaceRecognizer::LoadModel(const std::string& modelZipFilePath,
const std::string& modelZipPassword) {
try {
bool result = ANSFRBase::LoadModel(modelZipFilePath, modelZipPassword);
if (!result) return false;
if (engineType == EngineType::NVIDIA_GPU) {
std::string onnxfile = CreateFilePath(_modelFolder, "ansfacerecognizer.onnx");
if (std::filesystem::exists(onnxfile)) {
_modelFilePath = onnxfile;
_logger.LogDebug("ANSFaceRecognizer::LoadModel. Loading ansfacerecognizer weight",
_modelFilePath, __FILE__, __LINE__);
}
else {
_logger.LogError("ANSFaceRecognizer::LoadModel. ansfacerecognizer.onnx not found",
_modelFilePath, __FILE__, __LINE__);
return false;
}
}
else {
std::string faceidModel = CreateFilePath(_modelFolder, "ansfacerecognizer.onnx");
#ifdef USE_ONNX_ENGINE
if (std::filesystem::exists(faceidModel)) {
_modelFilePath = faceidModel;
_logger.LogDebug("ANSFaceRecognizer::LoadModel. Loading ansfacerecognizer weight",
_modelFilePath, __FILE__, __LINE__);
}
else {
_logger.LogError("ANSFaceRecognizer::LoadModel. ansfacerecognizer.onnx not found",
_modelFilePath, __FILE__, __LINE__);
return false;
}
#else
if (std::filesystem::exists(faceidModel)) {
_modelFilePath = faceidModel;
_logger.LogDebug("ANSFaceRecognizer::LoadModel. Loading ansfacenet weight",
_modelFilePath, __FILE__, __LINE__);
}
else {
_logger.LogError("ANSFaceRecognizer::LoadModel. ansfacenet.xml not found",
_modelFilePath, __FILE__, __LINE__);
return false;
}
#endif
}
return true;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::LoadModel", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSFaceRecognizer::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
if (!FileExist(_modelFilePath)) {
optimizedModelFolder = "";
return false;
}
if (engineType == EngineType::NVIDIA_GPU) {
optimizedModelFolder = GetParentFolder(_modelFilePath);
m_options.optBatchSize = 8;
m_options.maxBatchSize = 32;
m_options.engineFileDir = optimizedModelFolder;
m_options.precision = fp16 ? Precision::FP16 : Precision::FP32;
Engine<float> engine(m_options);
auto succ = engine.buildWithRetry(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE);
if (!succ) {
const std::string errMsg =
"Error: Unable to build the TensorRT engine. Try increasing TensorRT log severity to kVERBOSE.";
_logger.LogError("ANSFaceRecognizer::OptimizeModel", errMsg, __FILE__, __LINE__);
return false;
}
return true;
}
return true;
}
std::vector<float> ANSFaceRecognizer::Feature(const cv::Mat& image, const ANSCENTER::Object& bBox) {
std::vector<float> embeddingResult;
// Early validation before locking
if (image.empty()) {
_logger.LogError("ANSFaceRecognizer::Feature",
"Input image is empty", __FILE__, __LINE__);
return embeddingResult;
}
if (image.cols < 10 || image.rows < 10) {
return embeddingResult;
}
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
if (engineType == EngineType::NVIDIA_GPU) {
return RunArcFace(bBox.mask);
}
else {
// CPU path
#ifdef USE_ONNX_ENGINE
types::FaceContent face_content;
faceRecognizer->detect(bBox.mask, face_content);
// Explicitly move and let face_content destructor clean up
embeddingResult = std::move(face_content.embedding);
return embeddingResult;
#else
cv::Mat embedding;
faceRecognizer->Compute(bBox.mask, &embedding);
// Check if embedding is valid
if (embedding.empty() || embedding.total() == 0) {
return embeddingResult;
}
// Reserve space first to avoid reallocations
embeddingResult.reserve(embedding.total());
// Optimized conversion
if (embedding.isContinuous()) {
const float* dataPtr = embedding.ptr<float>(0);
embeddingResult.assign(dataPtr, dataPtr + embedding.total());
}
else {
embeddingResult.assign(embedding.begin<float>(), embedding.end<float>());
}
// Explicitly release OpenCV Mat
embedding.release();
return embeddingResult;
#endif
}
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::Feature", e.what(), __FILE__, __LINE__);
return std::vector<float>();
}
}
std::vector<FaceResultObject> ANSFaceRecognizer::Match(const cv::Mat& input,const std::vector<ANSCENTER::Object>& bBox,const std::map<std::string, std::string>& userDict) {
std::vector<FaceResultObject> resultObjects;
// Early validation before locking
if (input.empty()) {
_logger.LogError("ANSFaceRecognizer::Match",
"Input image is empty", __FILE__, __LINE__);
return resultObjects;
}
if (input.cols < 10 || input.rows < 10) {
return resultObjects;
}
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (!_isInitialized) {
_logger.LogError("ANSFaceRecognizer::Match",
"Model is not initialized", __FILE__, __LINE__);
return resultObjects;
}
try {
// Get embeddings (Fix #5: call unlocked — we already hold _mutex)
FR_START_TIMER(forward);
std::vector<std::vector<float>> detectedEmbeddings = ForwardUnlocked(input, bBox);
FR_END_TIMER(forward, "ForwardUnlocked (GPU embeddings)");
// Search for matches (Fix #5: call unlocked — we already hold _mutex)
FR_START_TIMER(search);
std::vector<std::string> names;
std::vector<float> sims;
std::tie(names, sims) = SearchForFacesUnlocked(detectedEmbeddings);
FR_END_TIMER(search, "SearchForFacesUnlocked (FAISS)");
if (names.empty()) {
_logger.LogError("ANSFaceRecognizer::Match",
"No face is match", __FILE__, __LINE__);
return resultObjects;
}
// Pre-reserve result space
const size_t resultCount = std::min(names.size(), bBox.size());
resultObjects.reserve(resultCount);
// Build result objects
for (size_t i = 0; i < resultCount; ++i) {
FaceResultObject resultObject;
// Determine if face is known or unknown
const bool isUnknown = (sims[i] > m_knownPersonThresh);
if (isUnknown) {
resultObject.isUnknown = true;
resultObject.userId = "0";
resultObject.userName = "Unknown";
resultObject.confidence = 1.0f;
}
else {
resultObject.isUnknown = false;
resultObject.userId = names[i];
// Safe map lookup
auto it = userDict.find(names[i]);
resultObject.userName = (it != userDict.end()) ? it->second : names[i];
resultObject.confidence = std::clamp((2.0f - sims[i]) / 2.0f, 0.0f, 1.0f);
}
resultObject.similarity = sims[i];
// Clamp bounding box to image boundaries
const float x = MAX(0.0f, bBox[i].box.x);
const float y = MAX(0.0f, bBox[i].box.y);
const float w = MIN(bBox[i].box.width, static_cast<float>(input.cols) - x);
const float h = MIN(bBox[i].box.height, static_cast<float>(input.rows) - y);
resultObject.box.x = x;
resultObject.box.y = y;
resultObject.box.width = w;
resultObject.box.height = h;
// Copy additional data
resultObject.mask = bBox[i].mask;
resultObject.cameraId = bBox[i].cameraId;
resultObject.trackId = bBox[i].trackId;
resultObject.polygon = bBox[i].polygon;
resultObject.kps = bBox[i].kps;
resultObject.extraInformation = bBox[i].extraInfo;
resultObjects.push_back(std::move(resultObject));
}
return resultObjects;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::Match", e.what(), __FILE__, __LINE__);
return resultObjects;
}
}
cv::Mat ANSFaceRecognizer::GetCropFace(const cv::Mat& input, const ANSCENTER::Object& bBox) {
try {
// Determine target size based on engine type
const int targetWidth = (engineType == EngineType::NVIDIA_GPU) ? GPU_FACE_WIDTH : CPU_FACE_WIDTH;
const int targetHeight = (engineType == EngineType::NVIDIA_GPU) ? GPU_FACE_HEIGHT : CPU_FACE_HEIGHT;
// Prepare single bounding box vector
std::vector<ANSCENTER::Object> outputBbox;
outputBbox.reserve(1);
outputBbox.push_back(bBox);
// Get cropped face
std::vector<CroppedFace> crFaces;
ANSFRHelper::GetCroppedFaces(input, outputBbox, targetWidth, targetHeight, crFaces);
if (crFaces.empty()) {
return cv::Mat();
}
return crFaces[0].faceMat;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::GetCropFace", e.what(), __FILE__, __LINE__);
return cv::Mat();
}
}
bool ANSFaceRecognizer::LoadEngine(const std::string& xmlModelPath,bool engineOptimisation) {
try {
// Early validation
if (!FileExist(xmlModelPath)) {
_logger.LogError("ANSFaceRecognizer::LoadEngine",
"Cannot find the raw XML/ONNX model file.", __FILE__, __LINE__);
return false;
}
if (engineType == EngineType::NVIDIA_GPU) {
// Initialize TensorRT engine via shared pool
if (!m_trtEngine) {
m_options.optBatchSize = 8;
m_options.maxBatchSize = 32;
m_options.calibrationBatchSize = 8;
m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
m_options.engineFileDir = _modelFolder;
m_options.precision = ANSCENTER::Precision::FP16;
m_poolKey = { xmlModelPath,
static_cast<int>(m_options.precision),
m_options.maxBatchSize };
m_trtEngine = EnginePoolManager<float>::instance().acquire(
m_poolKey, m_options, xmlModelPath,
SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu);
m_usingSharedPool = (m_trtEngine != nullptr);
if (!m_trtEngine) {
_logger.LogError("ANSFaceRecognizer::LoadEngine. Unable to build/load TensorRT engine.",
xmlModelPath, __FILE__, __LINE__);
return false;
}
}
return true;
}
return true;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::LoadEngine", e.what(), __FILE__, __LINE__);
return false;
}
}
std::vector<std::vector<float>> ANSFaceRecognizer::ForwardUnlocked(const cv::Mat& input, const std::vector<ANSCENTER::Object>& outputBbox)
{
// Fix #5: Caller must hold _mutex — no lock acquisition here
std::vector<std::vector<float>> detectedEmbeddings;
if (input.empty() || outputBbox.empty()) {
return detectedEmbeddings;
}
try {
detectedEmbeddings.reserve(outputBbox.size());
if (engineType == EngineType::NVIDIA_GPU) {
// Collect face ROIs — use GPU-resident masks when available (NV12 path),
// fall back to CPU masks (standard path). This avoids re-uploading faces
// that are already on GPU from the NV12 affine warp kernel.
std::vector<cv::Mat> cpuFaceROIs;
std::vector<cv::cuda::GpuMat> gpuFaceROIs;
cpuFaceROIs.reserve(outputBbox.size());
gpuFaceROIs.reserve(outputBbox.size());
for (const auto& obj : outputBbox) {
if (!obj.gpuMask.empty()) {
gpuFaceROIs.push_back(obj.gpuMask);
cpuFaceROIs.push_back(cv::Mat()); // placeholder to keep indices aligned
} else if (!obj.mask.empty()) {
gpuFaceROIs.push_back(cv::cuda::GpuMat()); // placeholder
cpuFaceROIs.push_back(obj.mask);
}
}
if (!cpuFaceROIs.empty()) {
detectedEmbeddings = RunArcFaceBatch(cpuFaceROIs, gpuFaceROIs);
}
}
else {
#ifdef USE_ONNX_ENGINE
#ifdef USE_CPU_BATCH_MODE
detectedEmbeddings.clear();
if (!outputBbox.empty()) {
// Pre-count valid faces to avoid reallocation
size_t valid_count = 0;
for (const auto& obj : outputBbox) {
if (!obj.mask.empty()) valid_count++;
}
if (valid_count == 0) return detectedEmbeddings;
std::vector<cv::Mat> face_rois;
face_rois.reserve(valid_count);
// Prepare batch (only valid faces)
for (const auto& obj : outputBbox) {
if (obj.mask.empty()) continue;
if (obj.mask.cols != CPU_FACE_WIDTH || obj.mask.rows != CPU_FACE_HEIGHT) {
face_rois.emplace_back();
cv::resize(obj.mask, face_rois.back(), cv::Size(CPU_FACE_WIDTH, CPU_FACE_HEIGHT));
}
else {
face_rois.push_back(obj.mask);
}
}
// Batch inference
std::vector<types::FaceContent> face_contents;
faceRecognizer->detectBatch(face_rois, face_contents);
// Extract embeddings
detectedEmbeddings.reserve(face_contents.size());
for (auto& content : face_contents) {
if (!content.embedding.empty()) {
detectedEmbeddings.push_back(std::move(content.embedding));
}
}
}
#else
// Using single detection in stead of batch (less efficient)
detectedEmbeddings.clear();
detectedEmbeddings.reserve(outputBbox.size());
if (!outputBbox.empty()) {
// Pre-allocate frame buffer with target size
cv::Mat frame(CPU_FACE_HEIGHT, CPU_FACE_WIDTH, CV_8UC3);
types::FaceContent face_content;
for (const auto& obj : outputBbox) {
if (obj.mask.empty()) continue;
// Resize into pre-allocated buffer
if (obj.mask.cols != CPU_FACE_WIDTH || obj.mask.rows != CPU_FACE_HEIGHT) {
cv::resize(obj.mask, frame, cv::Size(CPU_FACE_WIDTH, CPU_FACE_HEIGHT));
}
else {
obj.mask.copyTo(frame); // Copy into buffer
}
// Detect
faceRecognizer->detect(frame, face_content);
// Move embedding
if (!face_content.embedding.empty()) {
detectedEmbeddings.push_back(std::move(face_content.embedding));
face_content.embedding.clear();
}
}
}
#endif
#else
// OpenVINO path - Optimized version
std::vector<cv::Mat> face_rois;
face_rois.reserve(outputBbox.size());
cv::Mat resized_buffer;
for (const auto& obj : outputBbox) {
if (obj.mask.empty()) continue;
if (obj.mask.cols != CPU_FACE_WIDTH || obj.mask.rows != CPU_FACE_HEIGHT) {
cv::resize(obj.mask, resized_buffer, cv::Size(CPU_FACE_WIDTH, CPU_FACE_HEIGHT));
face_rois.push_back(resized_buffer.clone());
}
else {
face_rois.push_back(obj.mask);
}
}
if (!face_rois.empty()) {
std::vector<cv::Mat> embeddings;
embeddings.reserve(face_rois.size());
faceRecognizer->Compute(face_rois, &embeddings);
detectedEmbeddings.reserve(embeddings.size());
// Process embeddings efficiently
for (auto& embMat : embeddings) {
if (embMat.empty()) continue;
// Direct emplace_back construction
if (embMat.isContinuous()) {
const float* dataPtr = embMat.ptr<float>(0);
detectedEmbeddings.emplace_back(dataPtr, dataPtr + embMat.total());
}
else {
detectedEmbeddings.emplace_back(embMat.begin<float>(), embMat.end<float>());
}
}
}
#endif
}
return detectedEmbeddings;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::ForwardUnlocked", e.what(), __FILE__, __LINE__);
return std::vector<std::vector<float>>();
}
}
ANSFaceRecognizer::~ANSFaceRecognizer() noexcept {
try {
Destroy();
}
catch (const std::exception& e) {
// Log but don't throw - exceptions in destructors are dangerous
_logger.LogError("ANSFaceRecognizer::~ANSFaceRecognizer",
e.what(), __FILE__, __LINE__);
}
catch (...) {
// Catch all exceptions to prevent std::terminate
_logger.LogError("ANSFaceRecognizer::~ANSFaceRecognizer",
"Unknown exception during destruction", __FILE__, __LINE__);
}
}
void ANSFaceRecognizer::L2NormalizeInPlace(std::vector<float>& vec) {
// Fix #7: In-place normalization — zero allocations
float norm = 0.0f;
#pragma omp simd reduction(+:norm)
for (size_t i = 0; i < vec.size(); ++i) {
norm += vec[i] * vec[i];
}
norm = std::sqrt(norm);
if (norm > 0.0f) {
const float inv_norm = 1.0f / norm;
#pragma omp simd
for (size_t i = 0; i < vec.size(); ++i) {
vec[i] *= inv_norm;
}
}
}
std::vector<float> ANSFaceRecognizer::RunArcFace(const cv::Mat& inputImage) {
std::vector<float> embedding;
// Defense-in-depth: this function uses m_gpuStream / cv::cuda::GpuMat
// upload path, which is only valid on NVIDIA hardware. Callers in
// Feature() and ExtractEmbeddings() already gate on engineType, but
// the method is public — refuse to run on AMD/Intel/CPU so we never
// touch m_gpuStream (lazy-initialized, nullptr on non-NVIDIA) or
// m_gpuRgb.upload() which would activate the CUDA runtime.
if (engineType != EngineType::NVIDIA_GPU) {
_logger.LogError("ANSFaceRecognizer::RunArcFace",
"RunArcFace is NVIDIA-only; called on engineType="
+ std::to_string(static_cast<int>(engineType)), __FILE__, __LINE__);
return embedding;
}
// Early validation before locking
if (inputImage.empty()) {
_logger.LogError("ANSFaceRecognizer::RunArcFace",
"Input image is empty", __FILE__, __LINE__);
return embedding;
}
if (inputImage.cols < 10 || inputImage.rows < 10) {
_logger.LogError("ANSFaceRecognizer::RunArcFace",
"Input image is too small", __FILE__, __LINE__);
return embedding;
}
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (!_isInitialized) {
_logger.LogError("ANSFaceRecognizer::RunArcFace",
"Model is not initialized", __FILE__, __LINE__);
return embedding;
}
if (!m_gpuStream || !m_trtEngine) {
_logger.LogError("ANSFaceRecognizer::RunArcFace",
"GPU stream or TRT engine not available (engineType="
+ std::to_string(static_cast<int>(engineType)) + ")", __FILE__, __LINE__);
return embedding;
}
try {
// CPU preprocessing: resize + BGR→RGB before GPU upload
// Reduces PCIe transfer and eliminates GPU cvtColor/resize overhead
cv::Mat srcImg = inputImage;
if (srcImg.channels() == 1) {
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
}
cv::Mat cpuResized;
if (srcImg.cols != GPU_FACE_WIDTH || srcImg.rows != GPU_FACE_HEIGHT) {
cv::resize(srcImg, cpuResized, cv::Size(GPU_FACE_WIDTH, GPU_FACE_HEIGHT), 0, 0, cv::INTER_LINEAR);
} else {
cpuResized = srcImg;
}
cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
m_gpuRgb.upload(cpuRGB, *m_gpuStream);
m_gpuStream->waitForCompletion();
// Prepare inference inputs
std::vector<cv::cuda::GpuMat> inputVec;
inputVec.emplace_back(m_gpuRgb);
std::vector<std::vector<cv::cuda::GpuMat>> inputs;
inputs.emplace_back(std::move(inputVec));
// Run inference
std::vector<std::vector<std::vector<float>>> featureVectors;
bool succ = m_trtEngine->runInference(inputs, featureVectors);
// Synchronize stream
m_gpuStream->waitForCompletion();
if (!succ) {
_logger.LogError("ANSFaceRecognizer::RunArcFace",
"Failed to run inference.", __FILE__, __LINE__);
return embedding;
}
if (featureVectors.empty() || featureVectors[0].empty()) {
_logger.LogError("ANSFaceRecognizer::RunArcFace",
"Feature vectors are empty.", __FILE__, __LINE__);
return embedding;
}
embedding = std::move(featureVectors[0][0]);
cv::normalize(embedding, embedding); // l2 normalize
return embedding;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::RunArcFace", e.what(), __FILE__, __LINE__);
return embedding;
}
}
std::vector<std::vector<float>> ANSFaceRecognizer::RunArcFaceBatch(
const std::vector<cv::Mat>& faceROIs,
const std::vector<cv::cuda::GpuMat>& gpuFaceROIs)
{
std::vector<std::vector<float>> embeddings;
// Defense-in-depth: TensorRT + cv::cuda::GpuMat batch path is NVIDIA-only.
// Callers in ExtractEmbeddings() already gate on engineType, but this is a
// public method — refuse to run on AMD/Intel/CPU so we never touch the
// TRT engine or cv::cuda primitives on non-NVIDIA hardware.
if (engineType != EngineType::NVIDIA_GPU) {
_logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
"RunArcFaceBatch is NVIDIA-only; called on engineType="
+ std::to_string(static_cast<int>(engineType)), __FILE__, __LINE__);
return embeddings;
}
try {
// Early validation checks
if (!_isInitialized) {
_logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
"Model is not initialized", __FILE__, __LINE__);
return embeddings;
}
if (!m_trtEngine) {
_logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
"TensorRT engine not initialized", __FILE__, __LINE__);
return embeddings;
}
if (!m_gpuStream) {
_logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
"GPU stream not initialized", __FILE__, __LINE__);
return embeddings;
}
if (faceROIs.empty()) {
return embeddings;
}
const auto& inputDims = m_trtEngine->getInputDims();
if (inputDims.empty() || inputDims[0].nbDims < 3) {
_logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
"Invalid engine input dims", __FILE__, __LINE__);
return embeddings;
}
// Use actual engine batch limit (profile max), not the configured maxBatchSize
// which may be larger than what the engine was built with.
const size_t maxBatch = static_cast<size_t>(
m_trtEngine->getOptions().maxBatchSize > 0
? m_trtEngine->getOptions().maxBatchSize
: m_options.maxBatchSize);
embeddings.reserve(faceROIs.size());
const cv::Size targetSize(GPU_FACE_WIDTH, GPU_FACE_HEIGHT);
// Process in chunks of maxBatchSize to avoid exceeding engine limits
for (size_t chunkStart = 0; chunkStart < faceROIs.size(); chunkStart += maxBatch) {
const size_t chunkEnd = std::min(chunkStart + maxBatch, faceROIs.size());
FR_START_TIMER(gpu_preproc);
std::vector<cv::cuda::GpuMat> batchGpu;
batchGpu.reserve(chunkEnd - chunkStart);
for (size_t i = chunkStart; i < chunkEnd; i++) {
// Use GPU-resident face if available (NV12 affine warp path),
// otherwise do CPU resize + BGR→RGB before upload
if (i < gpuFaceROIs.size() && !gpuFaceROIs[i].empty()) {
cv::cuda::GpuMat d_img = gpuFaceROIs[i]; // already on GPU
if (d_img.cols != GPU_FACE_WIDTH || d_img.rows != GPU_FACE_HEIGHT) {
cv::cuda::GpuMat d_resized;
cv::cuda::resize(d_img, d_resized, targetSize, 0, 0, cv::INTER_LINEAR, *m_gpuStream);
d_img = d_resized;
}
cv::cuda::GpuMat d_rgb;
cv::cuda::cvtColor(d_img, d_rgb, cv::COLOR_BGR2RGB, 0, *m_gpuStream);
batchGpu.emplace_back(std::move(d_rgb));
} else {
const auto& roi = faceROIs[i];
if (roi.empty()) continue;
// CPU preprocessing: resize + BGR→RGB before upload
cv::Mat srcImg = roi;
if (srcImg.channels() == 1) {
cv::cvtColor(srcImg, srcImg, cv::COLOR_GRAY2BGR);
}
cv::Mat cpuResized;
if (srcImg.cols != GPU_FACE_WIDTH || srcImg.rows != GPU_FACE_HEIGHT) {
cv::resize(srcImg, cpuResized, targetSize, 0, 0, cv::INTER_LINEAR);
} else {
cpuResized = srcImg;
}
cv::Mat cpuRGB;
cv::cvtColor(cpuResized, cpuRGB, cv::COLOR_BGR2RGB);
cv::cuda::GpuMat d_rgb;
d_rgb.upload(cpuRGB, *m_gpuStream);
batchGpu.emplace_back(std::move(d_rgb));
}
}
FR_END_TIMER(gpu_preproc, "RunArcFaceBatch GPU preprocess (" + std::to_string(batchGpu.size()) + " faces)");
if (batchGpu.empty()) continue;
const size_t actualCount = batchGpu.size();
std::vector<std::vector<cv::cuda::GpuMat>> inputs;
inputs.emplace_back(std::move(batchGpu));
FR_START_TIMER(trt_infer);
std::vector<std::vector<std::vector<float>>> featureVectors;
bool succ = m_trtEngine->runInference(inputs, featureVectors);
m_gpuStream->waitForCompletion();
FR_END_TIMER(trt_infer, "RunArcFaceBatch TRT inference (batch=" + std::to_string(actualCount) + ")");
if (!succ) {
_logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
"runInference failed for chunk starting at " + std::to_string(chunkStart),
__FILE__, __LINE__);
continue; // Try remaining chunks instead of aborting
}
if (featureVectors.empty()) continue;
for (size_t j = 0; j < featureVectors.size(); j++) {
if (!featureVectors[j].empty() && !featureVectors[j][0].empty()) {
auto emb = std::move(featureVectors[j][0]);
cv::normalize(emb, emb);
embeddings.emplace_back(std::move(emb));
}
}
}
return embeddings;
}
catch (const cv::Exception& e) {
_logger.LogFatal("ANSFaceRecognizer::RunArcFaceBatch",
"OpenCV error: " + std::string(e.what()), __FILE__, __LINE__);
return embeddings;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::RunArcFaceBatch",
e.what(), __FILE__, __LINE__);
return embeddings;
}
}
void ANSFaceRecognizer::Init() {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
_faceIdToUserId.clear();
_nextFaceId = 0;
// Always create a fresh FAISS index
if (faiss_index) {
faiss_index->reset();
faiss_index.reset();
}
if (engineType == EngineType::NVIDIA_GPU) {
// GPU mode: Initialize GPU resources (shared across index rebuilds)
if (!m_gpuResources) {
m_gpuResources = std::make_shared<faiss::gpu::StandardGpuResources>();
}
auto gpuIndex = new faiss::gpu::GpuIndexFlatIP(
m_gpuResources.get(), FACE_EMBEDDING_SIZE);
faiss_index = std::make_shared<faiss::IndexIDMap>(gpuIndex);
}
else {
// CPU mode: Use CPU IndexFlatIP
auto cpuIndex = new faiss::IndexFlatIP(FACE_EMBEDDING_SIZE);
faiss_index = std::make_shared<faiss::IndexIDMap>(cpuIndex);
}
faiss_index->own_fields = true;
_isInitialized = true;
}
catch (const std::exception& e) {
_isInitialized = false;
_logger.LogFatal("ANSFaceRecognizer::Init",
std::string(e.what()), __FILE__, __LINE__);
}
catch (...) {
_isInitialized = false;
_logger.LogFatal("ANSFaceRecognizer::Init",
"Unknown exception occurred", __FILE__, __LINE__);
}
}
bool ANSFaceRecognizer::Destroy() {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
// Mark as uninitialized first to prevent new operations
_isInitialized = false;
// Clear GPU/TensorRT resources first (may fail, so do early)
if (m_usingSharedPool) {
EnginePoolManager<float>::instance().release(m_poolKey);
m_trtEngine.reset();
m_usingSharedPool = false;
}
else if (m_trtEngine) {
m_trtEngine.reset();
}
// Clear FAISS index BEFORE GPU resources (index lives on GPU)
if (faiss_index) {
faiss_index->reset(); // Clear index data
faiss_index.reset(); // Delete the object (frees GPU memory)
}
// Release FAISS GPU resources after index is destroyed
if (m_gpuResources) {
m_gpuResources.reset();
}
// Clear face recognizer
if (faceRecognizer) {
faceRecognizer.reset();
}
// Clear face ID mapping last (safest operation)
_faceIdToUserId.clear();
_nextFaceId = 0;
return true;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::Destroy", e.what(), __FILE__, __LINE__);
// Even on failure, mark as uninitialized
_isInitialized = false;
return false;
}
}
void ANSFaceRecognizer::AddEmbedding(const std::string& className,float embedding[]) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
if (!faiss_index) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding",
"Search_index is not initialized.", __FILE__, __LINE__);
return;
}
// Fix #1: L2-normalize before storing so L2²/2 = 1-cos(θ)
std::vector<float> normEmb(embedding, embedding + FACE_EMBEDDING_SIZE);
L2NormalizeInPlace(normEmb);
// Use sequential ID for backward-compat path (initial load)
faiss::idx_t id = _nextFaceId++;
_faceIdToUserId[id] = className;
faiss_index->add_with_ids(1, normEmb.data(), &id);
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding", e.what(), __FILE__, __LINE__);
}
}
void ANSFaceRecognizer::AddEmbedding(const std::string& className,const std::vector<float>& embedding) {
// Early validation before locking (non-shared state)
if (embedding.size() != FACE_EMBEDDING_SIZE) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding",
"Embedding size != 512.", __FILE__, __LINE__);
return;
}
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
if (!faiss_index) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding",
"Search_index is not initialized.", __FILE__, __LINE__);
return;
}
// Fix #1: L2-normalize before storing so L2²/2 = 1-cos(θ)
std::vector<float> normEmb = embedding;
L2NormalizeInPlace(normEmb);
faiss::idx_t id = _nextFaceId++;
_faceIdToUserId[id] = className;
faiss_index->add_with_ids(1, normEmb.data(), &id);
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding", e.what(), __FILE__, __LINE__);
}
}
void ANSFaceRecognizer::SwapIndex(
std::shared_ptr<faiss::IndexIDMap> newIndex,
std::unordered_map<faiss::idx_t, std::string>&& newFaceIdToUserId)
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
faiss_index = std::move(newIndex);
_faceIdToUserId = std::move(newFaceIdToUserId);
_nextFaceId = static_cast<faiss::idx_t>(_faceIdToUserId.size());
_logger.LogDebug("ANSFaceRecognizer::SwapIndex",
"Index swapped successfully. Total embeddings: " +
std::to_string(faiss_index ? faiss_index->ntotal : 0),
__FILE__, __LINE__);
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::SwapIndex",
e.what(), __FILE__, __LINE__);
}
}
std::tuple<std::vector<std::string>, std::vector<float>>
ANSFaceRecognizer::SearchForFacesUnlocked(const std::vector<std::vector<float>>& detectedEmbeddings)
{
// Fix #1 + #5: No lock (caller holds _mutex), no reconstruct(),
// use L2²/2 directly. All stored + query embeddings are L2-normalized,
// so L2²(a,b)/2 = 1 - cos(a,b) = similarity.
std::vector<std::string> detectedUsers;
std::vector<float> simValues;
try {
const size_t nq = detectedEmbeddings.size();
if (nq == 0 || _faceIdToUserId.empty() || !faiss_index || faiss_index->ntotal == 0) {
detectedUsers.assign(nq, "0");
simValues.assign(nq, 2.0f);
return std::make_tuple(detectedUsers, simValues);
}
const int k = 10;
const int d = static_cast<int>(detectedEmbeddings[0].size());
FR_START_TIMER(search_detail);
// Build contiguous query matrix (nq × d) — embeddings are already
// L2-normalized by RunArcFaceBatch (cv::normalize), so just copy
FR_START_TIMER(build_matrix);
std::vector<float> queryMatrix(nq * d);
for (size_t i = 0; i < nq; i++) {
std::copy(detectedEmbeddings[i].begin(), detectedEmbeddings[i].end(),
queryMatrix.begin() + i * d);
}
FR_END_TIMER(build_matrix, "Build query matrix (nq=" + std::to_string(nq) + ")");
// Single batched FAISS search — uses BLAS/MKL for matrix multiply
// when nq > 1, which is dramatically faster than nq individual searches
FR_START_TIMER(batch_search);
std::vector<float> allDistances(nq * k);
std::vector<faiss::idx_t> allIndices(nq * k);
faiss_index->search(static_cast<faiss::idx_t>(nq), queryMatrix.data(), k,
allDistances.data(), allIndices.data());
FR_END_TIMER(batch_search, "FAISS batch search (nq=" + std::to_string(nq) + ")");
// Process results: find best matching user for each query
detectedUsers.reserve(nq);
simValues.reserve(nq);
for (size_t q = 0; q < nq; q++) {
float bestSim = 2.0f;
std::string bestUser = "0";
const faiss::idx_t* qIndices = allIndices.data() + q * k;
const float* qDistances = allDistances.data() + q * k;
for (int i = 0; i < k; ++i) {
faiss::idx_t id = qIndices[i];
if (id < 0) continue;
auto it = _faceIdToUserId.find(id);
if (it == _faceIdToUserId.end()) continue;
// IP returns dot(a,b); for unit vectors: 1 - dot = 1 - cos(θ) = same as L2²/2
float similarity = 1.0f - qDistances[i];
if (similarity < bestSim) {
bestSim = similarity;
bestUser = it->second;
}
}
detectedUsers.push_back(std::move(bestUser));
simValues.push_back(bestSim);
}
FR_END_TIMER(search_detail, "SearchForFacesUnlocked total");
return std::make_tuple(detectedUsers, simValues);
}
catch (const std::exception& e) {
detectedUsers.assign(1, "0000");
simValues.assign(1, 2.0f);
_logger.LogFatal("ANSFaceRecognizer::SearchForFacesUnlocked",
e.what(), __FILE__, __LINE__);
return std::make_tuple(detectedUsers, simValues);
}
}
//std::tuple<std::vector<std::string>, std::vector<float>>
// ANSFaceRecognizer::SearchForFaces(
// const std::vector<std::vector<float>>& detectedEmbeddings)
//{
// std::lock_guard<std::recursive_mutex> lock(_mutex);
// std::vector<std::string> detectedUsers;
// std::vector<float> simValues;
// try {
// if (!classNames.empty() && faiss_index && faiss_index->ntotal > 0) {
// const int k = 10;
// for (const auto& embedding : detectedEmbeddings) {
// std::vector<faiss::idx_t> indices(k);
// std::vector<float> distances(k);
// faiss_index->search(1, embedding.data(), k,
// distances.data(), indices.data());
// float bestSim = 2.0f;
// std::string bestUser = "0";
// for (int i = 0; i < k; ++i) {
// faiss::idx_t id = indices[i];
// if (id < 0 || id >= static_cast<faiss::idx_t>(classNames.size())) {
// continue;
// }
// std::vector<float> matchEmbedding(faiss_index->d);
// faiss_index->reconstruct(id, matchEmbedding.data());
// float cosine = CosineSimilarity(embedding, matchEmbedding, true);
// float similarity = 1.0f - cosine; // [0,2], 0 best
// if (similarity < bestSim) {
// bestSim = similarity;
// bestUser = classNames.at(id);
// }
// }
// detectedUsers.push_back(bestUser);
// simValues.push_back(bestSim);
// }
// }
// else {
// detectedUsers.assign(detectedEmbeddings.size(), "0");
// simValues.assign(detectedEmbeddings.size(), 2.0f);
// }
// return std::make_tuple(detectedUsers, simValues);
// }
// catch (const std::exception& e) {
// detectedUsers.assign(1, "0000");
// simValues.assign(1, 2.0f);
// _logger.LogFatal("ANSFaceRecognizer::SearchForFaces",
// e.what(), __FILE__, __LINE__);
// return std::make_tuple(detectedUsers, simValues);
// }
//}
}
// Old method
//std::tuple<std::vector<std::string>, std::vector<float>> ANSFaceRecognizer::SearchForFaces(std::vector<std::vector<float>> detectedEmbeddings) {
// std::lock_guard<std::recursive_mutex> lock(_mutex);
// std::vector<std::string> detectedUsers;
// std::vector<float> simValues;
// try {
// // Check if there are class names available
// if (!classNames.empty()) {
// const int k = 3; // Number of nearest neighbors to retrieve
// // Process each detected embedding
// for (const auto& embedding : detectedEmbeddings) {
// // Prepare vectors to hold search results
// std::vector<faiss::idx_t> indices(k);
// std::vector<float> distances(k);
// // Perform the search for the k nearest neighbors
// faiss_index->search(1, embedding.data(), k, distances.data(), indices.data());
// // Find the index with the minimum distance (i.e., closest neighbor)
// auto min_it = std::min_element(distances.begin(), distances.end());
// int best_index = std::distance(distances.begin(), min_it);
// // Map the index to the corresponding class name and calculate similarity
// std::vector<float> matchEmbedding(faiss_index->d);
// faiss_index->reconstruct(indices[best_index], matchEmbedding.data());
// float similarity = 1 - CosineSimilarity(embedding, matchEmbedding, true);
// detectedUsers.push_back(classNames.at(indices[best_index]));
// simValues.push_back(std::abs(similarity));
// }
// }
// else {
// // If no class names are available, mark all users as "unknown"
// detectedUsers.assign(detectedEmbeddings.size(), "0000");
// simValues.assign(detectedEmbeddings.size(), 1.0f);
// }
// return std::make_tuple(detectedUsers, simValues);
// }
// catch (const std::exception& e) {
// // Log the error and return default values for the failed search
// detectedUsers.assign(1, "0000");
// simValues.assign(1, 1.0f);
// this->_logger.LogFatal("ANSFaceRecognizer::SearchForFaces", e.what(), __FILE__, __LINE__);
// return std::make_tuple(detectedUsers, simValues);
// }
//}
//std::tuple<std::vector<std::string>, std::vector<float>> ANSFaceRecognizer::SearchForFaces(const std::vector<std::vector<float>>& detectedEmbeddings)
//{
// std::lock_guard<std::recursive_mutex> lock(_mutex);
// std::vector<std::string> detectedUsers;
// std::vector<float> simValues;
// try {
// if (!classNames.empty() && faiss_index && faiss_index->ntotal > 0) {
// const int k = 3;
// for (const auto& embedding : detectedEmbeddings) {
// std::vector<faiss::idx_t> indices(k);
// std::vector<float> distances(k);
// faiss_index->search(1, embedding.data(), k, distances.data(), indices.data());
// // Find best (smallest distance) match
// auto min_it = std::min_element(distances.begin(), distances.end());
// int best_index = std::distance(distances.begin(), min_it);
// faiss::idx_t best_id = indices[best_index];
// if (best_id < 0 || best_id >= static_cast<faiss::idx_t>(classNames.size())) {
// detectedUsers.push_back("0000");
// simValues.push_back(1.0f);
// continue;
// }
// // Reconstruct matched embedding
// std::vector<float> matchEmbedding(faiss_index->d);
// faiss_index->reconstruct(best_id, matchEmbedding.data());
// float similarity = CosineSimilarity(embedding, matchEmbedding, true);
// detectedUsers.push_back(classNames.at(best_id));
// simValues.push_back(similarity);
// }
// }
// else {
// detectedUsers.assign(detectedEmbeddings.size(), "0000");
// simValues.assign(detectedEmbeddings.size(), 1.0f);
// }
// return std::make_tuple(detectedUsers, simValues);
// }
// catch (const std::exception& e) {
// detectedUsers.assign(1, "0000");
// simValues.assign(1, 1.0f);
// this->_logger.LogFatal("ANSFaceRecognizer::SearchForFaces", e.what(), __FILE__, __LINE__);
// return std::make_tuple(detectedUsers, simValues);
// }
//}
//space = new hnswlib::InnerProductSpace(FACE_EMBEDDING_SIZE); // it should be in init
//void ANSFaceRecognizer::Init()
//{
// try {
// classNames.clear();
// int maxElements = 100000; // Maximum number of elements, should be known beforehand
// int M = 16; // Tightly connected with internal dimensionality of the data
// int efConstruction = 200; // Controls index search speed/build speed tradeoff
// this->alg_hnsw = std::make_unique<hnswlib::HierarchicalNSW<float>>(space, maxElements, M, efConstruction);
// }
// catch (std::exception& e) {
// this->_logger.LogFatal("ANSFaceRecognizer::Init", e.what(), __FILE__, __LINE__);
// }
//}
/*
* void ANSFaceRecognizer::Init() {
try {
// Clear class names and reset resources
classNames.clear();
// Define parameters
constexpr int maxElements = 100000; // Maximum number of elements
constexpr int M = 16; // Internal dimensionality of the data
constexpr int efConstruction = 200; // Controls index search/build tradeoff
// Release and reinitialize the space
if (space) {
delete space;
space = nullptr;
}
space = new hnswlib::InnerProductSpace(FACE_EMBEDDING_SIZE);
// Reinitialize the HNSW algorithm
alg_hnsw = std::make_unique<hnswlib::HierarchicalNSW<float>>(space, maxElements, M, efConstruction);
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::Init", e.what(), __FILE__, __LINE__);
}
catch (...) {
_logger.LogFatal("ANSFaceRecognizer::Init", "Unknown exception occurred", __FILE__, __LINE__);
}
}
bool ANSFaceRecognizer::Destroy() {
try {
if (space) {
delete space;
space = nullptr;
}
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSFaceRecognizer::Destroy", e.what(), __FILE__, __LINE__);
return false;
}
}
void ANSFaceRecognizer::AddEmbedding(const std::string& className, float embedding[]) {
try {
classNames.push_back(className);
int index = classNames.size() - 1;
int outputDim = 512;
std::vector<float> vec(embedding, embedding + outputDim);
std::vector<float> normalizedVector = NormalizeVector(vec);
alg_hnsw->addPoint(normalizedVector.data(), (size_t)index);
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSFaceRecognizer::AddEmbedding", e.what(), __FILE__, __LINE__);
}
}
void ANSFaceRecognizer::AddEmbedding(const std::string& className, const std::vector<float>& embedding) {
try {
classNames.push_back(className);
int index = classNames.size() - 1;
std::vector<float> normalizedVector = NormalizeVector(embedding);
alg_hnsw->addPoint(normalizedVector.data(), index);
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSFaceRecognizer::AddEmbedding", e.what(), __FILE__, __LINE__);
}
}
std::tuple<std::vector<std::string>, std::vector<float>> ANSFaceRecognizer::SearchForFaces(std::vector<std::vector<float>> detectedEmbeddings) {
std::vector<std::string> detectedUsers;
std::vector<float> distanceValues;
try {
if (classNames.size() > 0) {
detectedUsers.clear();
for (int i = 0; i < detectedEmbeddings.size(); i++) {
NormalizeVector(detectedEmbeddings[i]);
std::priority_queue<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnn(detectedEmbeddings[i].data(), 1);
hnswlib::labeltype label = result.top().second;
if (label >= 0) {
detectedUsers.push_back(classNames.at(label));
distanceValues.push_back(result.top().first);
}
}
return std::make_tuple(detectedUsers, distanceValues);
}
else {
detectedUsers.clear();
for (int i = 0; i < detectedEmbeddings.size(); i++) {
detectedUsers.push_back("0000");
distanceValues.push_back(1);
}
return std::make_tuple(detectedUsers, distanceValues);
}
}
catch (std::exception& e) {
detectedUsers.push_back("0000");
distanceValues.push_back(1);
this->_logger.LogFatal("ANSArcFace100::SearchForFaces", e.what(), __FILE__, __LINE__);
return std::make_tuple(detectedUsers, distanceValues);
}
}
*/
// Private methods, can be replacable
/* std::vector<std::vector<float>> ANSFaceRecognizer::Forward(const cv::Mat& input, std::vector<ANSCENTER::Object> outputBbox) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
std::vector<std::vector<float>> detectedEmbeddings;
if (input.empty()) {
this->_logger.LogError("ANSFaceRecognizer::Forward", "Input image is empty", __FILE__, __LINE__);
return detectedEmbeddings;
}
try {
if (engineType == EngineType::NVIDIA_GPU) {
detectedEmbeddings.clear();
if (outputBbox.size() > 0) {
for (int i = 0; i < outputBbox.size(); i++) {
cv::Mat faceROI = outputBbox[i].mask.clone();
if (faceROI.empty()) continue;
if ((faceROI.cols != GPU_FACE_WIDTH) && (faceROI.rows != GPU_FACE_HEIGHT))
{
cv::resize(faceROI, faceROI, cv::Size(GPU_FACE_WIDTH, GPU_FACE_HEIGHT));
}
std::vector<float> embedding = RunArcFace(faceROI);
detectedEmbeddings.push_back(embedding);
faceROI.release();
}
}
}
else {
std::vector<cv::Mat> embeddings;
std::vector<cv::Mat> face_rois;
detectedEmbeddings.clear();
if (outputBbox.size() > 0) {
for (int i = 0; i < outputBbox.size(); i++) {
std::vector<float> embeddingRs;
cv::Mat frame = outputBbox[i].mask.clone();
if (frame.empty()) continue;
if ((frame.cols != CPU_FACE_WIDTH) && (frame.rows != CPU_FACE_HEIGHT))
{
cv::resize(frame, frame, cv::Size(CPU_FACE_WIDTH, CPU_FACE_HEIGHT));
}
#ifdef USE_ONNX_ENGINE
types::FaceContent face_content;
faceRecognizer->detect(frame, face_content);
embeddingRs = face_content.embedding;
#else
face_rois.clear();
embeddings.clear();
face_rois.push_back(frame);
faceRecognizer->Compute(face_rois, &embeddings);
embeddingRs.assign(embeddings[0].begin<float>(), embeddings[0].end<float>());
#endif
detectedEmbeddings.push_back(embeddingRs);
frame.release();
}
}
}
return detectedEmbeddings;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSFaceRecognizer::Forward", e.what(), __FILE__, __LINE__);
return detectedEmbeddings;
}
} */
// Before refactor (working version)
/*std::vector<float> ANSFaceRecognizer::Feature(const cv::Mat& image,
ANSCENTER::Object bBox) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
std::vector<float> embeddingResult;
if (image.empty()) {
_logger.LogError("ANSFaceRecognizer::Feature",
"Input image is empty", __FILE__, __LINE__);
return embeddingResult;
}
try {
if (engineType == EngineType::NVIDIA_GPU) {
embeddingResult = RunArcFace(bBox.mask);
return embeddingResult;
}
else {
if (image.cols < 10 || image.rows < 10) return embeddingResult;
cv::Mat frame = bBox.mask.clone();
#ifdef USE_ONNX_ENGINE
types::FaceContent face_content;
faceRecognizer->detect(frame, face_content);
embeddingResult = face_content.embedding;
#else
cv::Mat embedding;
faceRecognizer->Compute(frame, &embedding);
embeddingResult.assign(embedding.begin<float>(), embedding.end<float>());
#endif
return embeddingResult;
}
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::Feature", e.what(), __FILE__, __LINE__);
embeddingResult.clear();
return embeddingResult;
}
}*/
/*std::vector<FaceResultObject> ANSFaceRecognizer::Match(
const cv::Mat& input,
std::vector<ANSCENTER::Object> bBox,
std::map<std::string, std::string> userDict) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
std::vector<FaceResultObject> resultObjects;
if (input.empty()) {
_logger.LogError("ANSFaceRecognizer::Match",
"Input image is empty", __FILE__, __LINE__);
return resultObjects;
}
if (!_isInitialized) {
_logger.LogError("ANSFaceRecognizer::Match",
"Model is not initialized", __FILE__, __LINE__);
return resultObjects;
}
try {
if (input.cols < 10 || input.rows < 10) return resultObjects;
std::vector<std::vector<float>> detectedEmbeddings = Forward(input, bBox);
std::vector<std::string> names;
std::vector<float> sims;
std::tie(names, sims) = SearchForFaces(detectedEmbeddings);
if (names.empty()) {
_logger.LogError("ANSFaceRecognizer::Match",
"No face is match", __FILE__, __LINE__);
return resultObjects;
}
for (size_t i = 0; i < names.size() && i < bBox.size(); ++i) {
FaceResultObject resultObject;
bool isUnknown = (sims[i] > m_knownPersonThresh);
if (isUnknown) {
resultObject.isUnknown = true;
resultObject.userId = "0";
resultObject.userName = "Unknown";
resultObject.confidence = 1.0f;
}
else {
resultObject.isUnknown = false;
resultObject.userId = names[i];
resultObject.userName = userDict[names[i]];
resultObject.confidence =
std::clamp((2.0f - sims[i]) / 2.0f, 0.0f, 1.0f);
}
resultObject.similarity = sims[i];
float x = bBox[i].box.x;
float y = bBox[i].box.y;
float w = bBox[i].box.width;
float h = bBox[i].box.height;
x = std::max(0.0f, x);
y = std::max(0.0f, y);
w = std::min(w, input.cols - x);
h = std::min(h, input.rows - y);
resultObject.box.x = x;
resultObject.box.y = y;
resultObject.box.width = w;
resultObject.box.height = h;
resultObject.mask = bBox[i].mask;
resultObject.cameraId = bBox[i].cameraId;
resultObject.trackId = bBox[i].trackId;
resultObject.polygon = bBox[i].polygon;
resultObject.kps = bBox[i].kps;
resultObject.extraInformation = bBox[i].extraInfo;
resultObjects.push_back(resultObject);
}
return resultObjects;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::Match", e.what(), __FILE__, __LINE__);
return resultObjects;
}
}*/
//cv::Mat ANSFaceRecognizer::GetCropFace(const cv::Mat& input,
// ANSCENTER::Object bBox) {
// try {
// std::vector<ANSCENTER::Object> outputBbox;
// outputBbox.push_back(bBox);
// std::vector<CroppedFace> crFaces;
// crFaces.clear();
// if (engineType == EngineType::NVIDIA_GPU) {
// ANSFRHelper::GetCroppedFaces(input, outputBbox, GPU_FACE_WIDTH, GPU_FACE_HEIGHT, crFaces);
// }
// else {
// ANSFRHelper::GetCroppedFaces(input, outputBbox, CPU_FACE_WIDTH, CPU_FACE_HEIGHT, crFaces);
// }
// if (crFaces.empty()) return cv::Mat();
// return crFaces[0].faceMat;
// }
// catch (const std::exception& e) {
// _logger.LogFatal("ANSFaceRecognizer::GetCropFace", e.what(), __FILE__, __LINE__);
// return cv::Mat();
// }
//}
/*void ANSFaceRecognizer::AddEmbedding(const std::string& className,
float embedding[]) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
if (!faiss_index) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding",
"Search_index is not initialized.", __FILE__, __LINE__);
return;
}
std::vector<float> vec(embedding, embedding + FACE_EMBEDDING_SIZE);
if (vec.size() != FACE_EMBEDDING_SIZE) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding",
"Embedding size != 512.", __FILE__, __LINE__);
return;
}
classNames.push_back(className);
faiss_index->add(1, vec.data());
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding", e.what(), __FILE__, __LINE__);
}
}*/
/*void ANSFaceRecognizer::AddEmbedding(const std::string& className,
const std::vector<float>& embedding) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
if (!faiss_index) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding",
"Search_index is not initialized.", __FILE__, __LINE__);
return;
}
if (embedding.size() != FACE_EMBEDDING_SIZE) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding",
"Embedding size != 512.", __FILE__, __LINE__);
return;
}
classNames.push_back(className);
faiss_index->add(1, embedding.data());
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding", e.what(), __FILE__, __LINE__);
}
}*/
//void ANSFaceRecognizer::Init() {
// std::lock_guard<std::recursive_mutex> lock(_mutex);
// try {
// classNames.clear();
// if (faiss_index) {
// faiss_index->reset();
// }
// else {
// faiss_index = std::make_shared<faiss::IndexFlatL2>(FACE_EMBEDDING_SIZE);
// }
// }
// catch (const std::exception& e) {
// _logger.LogFatal("ANSFaceRecognizer::Init", e.what(), __FILE__, __LINE__);
// }
// catch (...) {
// _logger.LogFatal("ANSFaceRecognizer::Init", "Unknown exception occurred", __FILE__, __LINE__);
// }
//}
//std::vector<float> ANSFaceRecognizer::RunArcFace(const cv::Mat& inputImage) {
// std::lock_guard<std::recursive_mutex> lock(_mutex);
// std::vector<float> embedding;
// if (inputImage.empty()) {
// _logger.LogError("ANSFaceRecognizer::RunArcFace",
// "Input image is empty", __FILE__, __LINE__);
// return embedding;
// }
// if (!_isInitialized) {
// _logger.LogError("ANSFaceRecognizer::RunArcFace",
// "Model is not initialized", __FILE__, __LINE__);
// return embedding;
// }
// if (inputImage.cols < 10 || inputImage.rows < 10) {
// _logger.LogError("ANSFaceRecognizer::RunArcFace",
// "Input image is too small", __FILE__, __LINE__);
// return embedding;
// }
// try {
// cv::Mat frame;
// if (inputImage.channels() == 1) {
// cv::cvtColor(inputImage, frame, cv::COLOR_GRAY2BGR);
// }
// else {
// frame = inputImage;
// }
// if (frame.cols != GPU_FACE_WIDTH || frame.rows != GPU_FACE_HEIGHT) {
// cv::resize(frame, frame, cv::Size(GPU_FACE_WIDTH, GPU_FACE_HEIGHT));
// }
// cv::cvtColor(frame, frame, cv::COLOR_BGR2RGB);
// cv::cuda::Stream stream;
// cv::cuda::GpuMat img;
// img.upload(frame, stream);
// stream.waitForCompletion();
// std::vector<cv::cuda::GpuMat> inputVec{ std::move(img) };
// std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(inputVec) };
// std::vector<std::vector<std::vector<float>>> featureVectors;
// bool succ = m_trtEngine->runInference(inputs, featureVectors);
// if (!succ) {
// _logger.LogError("ANSFaceRecognizer::RunArcFace",
// "Failed to run inference.", __FILE__, __LINE__);
// return embedding;
// }
// if (featureVectors.empty() || featureVectors[0].empty()) {
// _logger.LogError("ANSFaceRecognizer::RunArcFace",
// "Feature vectors are empty.", __FILE__, __LINE__);
// return embedding;
// }
// embedding = std::move(featureVectors[0][0]);
// cv::normalize(embedding, embedding); // l2 normalize
// return embedding;
// }
// catch (const std::exception& e) {
// _logger.LogFatal("ANSFaceRecognizer::RunArcFace", e.what(), __FILE__, __LINE__);
// return embedding;
// }
//}
// std::vector<std::vector<float>> ANSFaceRecognizer::RunArcFaceBatch(const std::vector<cv::Mat>& faceROIs)
// {
// std::vector<std::vector<float>> embeddings;
// try {
// if (!_isInitialized) {
// _logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
// "Model is not initialized", __FILE__, __LINE__);
// return embeddings;
// }
// if (!m_trtEngine) {
// _logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
// "TensorRT engine not initialized", __FILE__, __LINE__);
// return embeddings;
// }
// if (faceROIs.empty()) {
// return embeddings;
// }
// if (faceROIs.size() > static_cast<size_t>(m_options.maxBatchSize)) {
// _logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
// "Batch size " + std::to_string(faceROIs.size()) +
// " exceeds maxBatchSize " + std::to_string(m_options.maxBatchSize),
// __FILE__, __LINE__);
// return embeddings;
// }
//
// const auto& inputDims = m_trtEngine->getInputDims();
// if (inputDims.empty() || inputDims[0].nbDims < 3) {
// _logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
// "Invalid engine input dims", __FILE__, __LINE__);
// return embeddings;
// }
//
// // Prepare GPU batch
// cv::cuda::Stream stream;
// std::vector<cv::cuda::GpuMat> batchGpu;
// batchGpu.reserve(faceROIs.size());
//
// for (size_t i = 0; i < faceROIs.size(); ++i) {
// const cv::Mat& roi = faceROIs[i];
// if (roi.empty()) {
// continue;
// }
// cv::Mat frame;
// if (roi.channels() == 1) {
// cv::cvtColor(roi, frame, cv::COLOR_GRAY2BGR);
// }
// else {
// frame = roi;
// }
//
// if (frame.cols != GPU_FACE_WIDTH || frame.rows != GPU_FACE_HEIGHT) {
// cv::resize(frame, frame, cv::Size(GPU_FACE_WIDTH, GPU_FACE_HEIGHT));
// }
//
// cv::cvtColor(frame, frame, cv::COLOR_BGR2RGB);
//
// cv::cuda::GpuMat d_img;
// d_img.upload(frame, stream);
// batchGpu.emplace_back(std::move(d_img));
// }
//
// if (batchGpu.empty()) {
// return embeddings;
// }
//
// // Prepare inputs for inference
// std::vector<std::vector<cv::cuda::GpuMat>> inputs;
// inputs.emplace_back(std::move(batchGpu));
//
// // Run inference
// std::vector<std::vector<std::vector<float>>> featureVectors;
// bool succ = m_trtEngine->runInference(inputs, featureVectors);
// stream.waitForCompletion();
//
// if (!succ) {
// _logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
// "runInference failed", __FILE__, __LINE__);
// return embeddings;
// }
//
// if (featureVectors.empty()) {
// _logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
// "Empty featureVectors returned from inference", __FILE__, __LINE__);
// return embeddings;
// }
//
//
// size_t batchSize = featureVectors.size();
// embeddings.reserve(batchSize);
//
// for (size_t i = 0; i < batchSize; ++i) {
// if (featureVectors[i].empty()) {
// continue;
// }
// // Get the first (and typically only) output for this face
// // featureVectors[i][0] is the embedding vector for face i
// const auto& embedding = featureVectors[i][0];
//
// if (embedding.empty()) {
// continue;
// }
//
// embeddings.push_back(embedding);
// }
//
// // Verify we got the expected number of embeddings
// //if (embeddings.size() != inputs[0].size()) {
// // _logger.LogWarn("ANSFaceRecognizer::RunArcFaceBatch",
// // "Expected " + std::to_string(inputs[0].size()) +
// // " embeddings but got " + std::to_string(embeddings.size()),
// // __FILE__, __LINE__);
// //}
// return embeddings;
// }
// catch (const std::exception& e) {
// _logger.LogFatal("ANSFaceRecognizer::RunArcFaceBatch",
// e.what(), __FILE__, __LINE__);
// return embeddings;
// }
//}
// std::vector<std::vector<float>> ANSFaceRecognizer::Forward(const cv::Mat& input, std::vector<ANSCENTER::Object> outputBbox)
// {
// std::lock_guard<std::recursive_mutex> lock(_mutex);
// std::vector<std::vector<float>> detectedEmbeddings;
//
// if (input.empty()) {
// _logger.LogError("ANSFaceRecognizer::Forward",
// "Input image is empty", __FILE__, __LINE__);
// return detectedEmbeddings;
// }
//
// try {
// if (outputBbox.empty()) {
// return detectedEmbeddings;
// }
//
// if (engineType == EngineType::NVIDIA_GPU) {
// std::vector<cv::Mat> faceROIs;
// faceROIs.reserve(outputBbox.size());
//
// for (const auto& obj : outputBbox) {
// const cv::Mat& faceROI = obj.mask; // aligned mask
// if (faceROI.empty()) continue;
// faceROIs.push_back(faceROI);
// }
//
// if (!faceROIs.empty()) {
// detectedEmbeddings = RunArcFaceBatch(faceROIs);
// }
// }
// else {
// detectedEmbeddings.clear();
//#ifdef USE_ONNX_ENGINE
// for (const auto& obj : outputBbox) {
// cv::Mat frame = obj.mask.clone();
// if (frame.empty()) continue;
//
// if (frame.cols != CPU_FACE_WIDTH || frame.rows != CPU_FACE_HEIGHT) {
// cv::resize(frame, frame, cv::Size(CPU_FACE_WIDTH, CPU_FACE_HEIGHT));
// }
//
// types::FaceContent face_content;
// faceRecognizer->detect(frame, face_content);
// if (!face_content.embedding.empty()) {
// detectedEmbeddings.push_back(face_content.embedding);
// }
// }
//#else
// std::vector<cv::Mat> face_rois;
// face_rois.reserve(outputBbox.size());
//
// for (const auto& obj : outputBbox) {
// cv::Mat frame = obj.mask;
// if (frame.empty()) continue;
//
// if (frame.cols != CPU_FACE_WIDTH || frame.rows != CPU_FACE_HEIGHT) {
// cv::resize(frame, frame, cv::Size(CPU_FACE_WIDTH, CPU_FACE_HEIGHT));
// }
// face_rois.push_back(frame);
// }
//
// if (!face_rois.empty()) {
// std::vector<cv::Mat> embeddings;
// faceRecognizer->Compute(face_rois, &embeddings);
//
// detectedEmbeddings.reserve(embeddings.size());
// for (const auto& embMat : embeddings) {
// if (embMat.empty()) continue;
// std::vector<float> emb(embMat.begin<float>(), embMat.end<float>());
// detectedEmbeddings.push_back(std::move(emb));
// }
// }
//#endif
// }
// return detectedEmbeddings;
// }
// catch (const std::exception& e) {
// _logger.LogFatal("ANSFaceRecognizer::Forward", e.what(), __FILE__, __LINE__);
// return detectedEmbeddings;
// }
// }
//std::vector<float> ANSFaceRecognizer::NormalizeVector(const std::vector<float>& vec) {
// std::lock_guard<std::recursive_mutex> lock(_mutex);
// float norm = 0.0f;
// for (float v : vec) norm += v * v;
// norm = std::sqrt(norm);
// std::vector<float> normalizedVec(vec.size());
// if (norm > 0) {
// for (size_t i = 0; i < vec.size(); ++i) {
// normalizedVec[i] = vec[i] / norm;
// }
// }
// else {
// normalizedVec = vec;
// }
// return normalizedVec;
//}
/*bool ANSFaceRecognizer::LoadEngine(const std::string xmlModelPath,
bool engineOptimisation) {
try {
if (!FileExist(xmlModelPath)) {
_logger.LogError("ANSFaceRecognizer::LoadEngine",
"Cannot find the raw XML/ONNX model file.", __FILE__, __LINE__);
return false;
}
if (engineType == EngineType::NVIDIA_GPU) {
if (!m_trtEngine) {
m_options.optBatchSize = 8;
m_options.maxBatchSize = 32;
m_options.calibrationBatchSize = 8;
m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
m_options.engineFileDir = _modelFolder;
m_options.precision = ANSCENTER::Precision::FP16;
m_trtEngine = std::make_unique<Engine<float>>(m_options);
}
if (FileExist(xmlModelPath)) {
bool succ = m_trtEngine->buildLoadNetwork(xmlModelPath, SUB_VALS, DIV_VALS, NORMALIZE);
if (!succ) {
_logger.LogError("ANSFaceRecognizer::LoadEngine. Unable to build/load TensorRT engine.",
xmlModelPath, __FILE__, __LINE__);
return false;
}
}
else {
_logger.LogError("ANSFaceRecognizer::LoadEngine. Model file does not exist",
xmlModelPath, __FILE__, __LINE__);
return false;
}
return true;
}
return true;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::LoadEngine", e.what(), __FILE__, __LINE__);
return false;
}
}*/
//bool ANSFaceRecognizer::Destroy() {
// try {
// classNames.clear();
// if (faiss_index) {
// faiss_index->reset();
// faiss_index.reset();
// }
// faceRecognizer.reset();
// m_trtEngine.reset();
// return true;
// }
// catch (const std::exception& e) {
// _logger.LogFatal("ANSFaceRecognizer::Destroy", e.what(), __FILE__, __LINE__);
// return false;
// }
//}
// std::vector<std::vector<float>> ANSFaceRecognizer::Forward(const cv::Mat& input, std::vector<ANSCENTER::Object> outputBbox)
// {
// std::vector<std::vector<float>> detectedEmbeddings;
//
// // Early validation before locking
// if (input.empty()) {
// _logger.LogError("ANSFaceRecognizer::Forward",
// "Input image is empty", __FILE__, __LINE__);
// return detectedEmbeddings;
// }
//
// if (outputBbox.empty()) {
// return detectedEmbeddings;
// }
//
// std::lock_guard<std::recursive_mutex> lock(_mutex);
//
// try {
// // Pre-reserve output space
// detectedEmbeddings.reserve(outputBbox.size());
//
// if (engineType == EngineType::NVIDIA_GPU) {
// // GPU path
// std::vector<cv::Mat> faceROIs;
// faceROIs.reserve(outputBbox.size());
//
// for (const auto& obj : outputBbox) {
// if (!obj.mask.empty()) {
// faceROIs.push_back(obj.mask);
// }
// }
//
// if (!faceROIs.empty()) {
// detectedEmbeddings = RunArcFaceBatch(faceROIs);
// }
// }
// else {
// // CPU path
//#ifdef USE_ONNX_ENGINE
// for (const auto& obj : outputBbox) {
// if (obj.mask.empty()) continue;
//
// cv::Mat frame;
// if (obj.mask.cols != CPU_FACE_WIDTH || obj.mask.rows != CPU_FACE_HEIGHT) {
// cv::resize(obj.mask, frame, cv::Size(CPU_FACE_WIDTH, CPU_FACE_HEIGHT));
// }
// else {
// frame = obj.mask.clone();
// }
//
// types::FaceContent face_content;
// faceRecognizer->detect(frame, face_content);
//
// if (!face_content.embedding.empty()) {
// detectedEmbeddings.push_back(std::move(face_content.embedding));
// }
// }
//#else
// std::vector<cv::Mat> face_rois;
// face_rois.reserve(outputBbox.size());
//
// for (const auto& obj : outputBbox) {
// if (obj.mask.empty()) continue;
//
// cv::Mat frame;
// if (obj.mask.cols != CPU_FACE_WIDTH || obj.mask.rows != CPU_FACE_HEIGHT) {
// cv::resize(obj.mask, frame, cv::Size(CPU_FACE_WIDTH, CPU_FACE_HEIGHT));
// }
// else {
// frame = obj.mask;
// }
//
// face_rois.push_back(frame);
// }
//
// if (!face_rois.empty()) {
// std::vector<cv::Mat> embeddings;
// embeddings.reserve(face_rois.size());
//
// faceRecognizer->Compute(face_rois, &embeddings);
//
// detectedEmbeddings.reserve(embeddings.size());
// for (auto& embMat : embeddings) {
// if (embMat.empty()) continue;
//
// // Optimized conversion
// if (embMat.isContinuous()) {
// float* dataPtr = embMat.ptr<float>(0);
// detectedEmbeddings.emplace_back(dataPtr, dataPtr + embMat.total());
// }
// else {
// std::vector<float> emb(embMat.begin<float>(), embMat.end<float>());
// detectedEmbeddings.push_back(std::move(emb));
// }
// }
// }
//#endif
// }
//
// return detectedEmbeddings;
// }
// catch (const std::exception& e) {
// _logger.LogFatal("ANSFaceRecognizer::Forward", e.what(), __FILE__, __LINE__);
// return detectedEmbeddings;
// }
// }