Files
ANSCORE/modules/ANSFR/ANSFaceRecognizer.cpp

2110 lines
68 KiB
C++
Raw Normal View History

2026-03-28 16:54:11 +11:00
#include "ANSFaceRecognizer.h"
#include "engine.h"
//#define DEBUG_TIME
#ifdef DEBUG_TIME
#include <chrono>
#include <iostream>
#define FR_START_TIMER(name) auto timer_##name = std::chrono::high_resolution_clock::now()
#define FR_END_TIMER(name, label) \
do { \
auto timer_##name##_end = std::chrono::high_resolution_clock::now(); \
auto duration_us = std::chrono::duration_cast<std::chrono::microseconds>(timer_##name##_end - timer_##name).count(); \
std::cout << " [FR] " << label << ": " << (duration_us / 1000.0) << " ms" << std::endl; \
} while(0)
#else
#define FR_START_TIMER(name)
#define FR_END_TIMER(name, label)
#endif
namespace ANSCENTER {
std::string ANSFaceRecognizer::GetOpenVINODevice() {
ov::Core core;
std::vector<std::string> available_devices = core.get_available_devices();
std::vector<std::string> priority_devices = { "GPU", "CPU" };
for (const auto& device : priority_devices) {
if (std::find(available_devices.begin(), available_devices.end(), device) != available_devices.end()) {
return device;
}
}
return "CPU";
}
bool ANSFaceRecognizer::Initialize(std::string licenseKey,
ModelConfig modelConfig,
const std::string& modelZipFilePath,
const std::string& modelZipPassword,
std::string& labelMap)
{
bool result = ANSFRBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
if (!result) return false;
#ifdef CPU_MODE
engineType = EngineType::CPU;
#else
engineType = ANSLicenseHelper::CheckHardwareInformation();
#endif
try {
_modelConfig = modelConfig;
_modelConfig.modelType = ModelType::FACERECOGNIZE;
_modelConfig.detectionType = DetectionType::FACERECOGNIZER;
m_knownPersonThresh = _modelConfig.unknownPersonThreshold;
if (m_knownPersonThresh == 0.0f) m_knownPersonThresh = 0.35f;
if (engineType == EngineType::NVIDIA_GPU) {
// 1. Load ONNX model
std::string onnxfile = CreateFilePath(_modelFolder, "ansfacerecognizer.onnx");
if (std::filesystem::exists(onnxfile)) {
_modelFilePath = onnxfile;
_logger.LogDebug("ANSFaceRecognizer::Initialize. Loading ansfacerecognizer weight",
_modelFilePath, __FILE__, __LINE__);
}
else {
_logger.LogError("ANSFaceRecognizer::Initialize. Model ansfacerecognizer.onnx does not exist",
_modelFilePath, __FILE__, __LINE__);
return false;
}
if (!m_trtEngine) {
// Enable batch support
m_options.optBatchSize = 8;
m_options.maxBatchSize = 32;
m_options.maxInputHeight = GPU_FACE_HEIGHT;
m_options.minInputHeight = GPU_FACE_HEIGHT;
m_options.optInputHeight = GPU_FACE_HEIGHT;
m_options.maxInputWidth = GPU_FACE_WIDTH;
m_options.minInputWidth = GPU_FACE_WIDTH;
m_options.optInputWidth = GPU_FACE_WIDTH;
m_options.calibrationBatchSize = 8;
m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
m_options.engineFileDir = _modelFolder;
m_options.precision = ANSCENTER::Precision::FP16;
m_poolKey = { _modelFilePath,
static_cast<int>(m_options.precision),
m_options.maxBatchSize };
m_trtEngine = EnginePoolManager<float>::instance().acquire(
m_poolKey, m_options, _modelFilePath,
SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu);
m_usingSharedPool = (m_trtEngine != nullptr);
if (!m_trtEngine) {
_logger.LogError("ANSFaceRecognizer::Initialize. Unable to build/load TensorRT engine.",
_modelFilePath, __FILE__, __LINE__);
return false;
}
}
}
else {
#ifdef USE_ONNX_ENGINE
std::string faceidModel = CreateFilePath(_modelFolder, "ansfacerecognizer.onnx");
if (std::filesystem::exists(faceidModel)) {
_modelFilePath = faceidModel;
_logger.LogDebug("ANSFaceRecognizer::Initialize. Loading ANSFaceRecognizer weight",
_modelFilePath, __FILE__, __LINE__);
}
else {
_logger.LogError("ANSFaceRecognizer::Initialize. Model ansfacerecognizer.onnx does not exist",
_modelFilePath, __FILE__, __LINE__);
return false;
}
faceRecognizer = std::make_unique<GlintArcFace>(faceidModel);
if (!faceRecognizer) {
_logger.LogFatal("ANSFaceRecognizer::Initialize",
"Failed to initialize ONNX face recognizer", __FILE__, __LINE__);
return false;
}
#else
std::string faceidModel = CreateFilePath(_modelFolder, "ansfacenet.xml");
if (std::filesystem::exists(faceidModel)) {
_modelFilePath = faceidModel;
_logger.LogDebug("ANSFaceRecognizer::Initialize. Loading ANSFaceRecognizer weight",
_modelFilePath, __FILE__, __LINE__);
}
else {
_logger.LogError("ANSFaceRecognizer::Initialize. Model ansfacenet.xml does not exist",
_modelFilePath, __FILE__, __LINE__);
return false;
}
std::string deviceName = GetOpenVINODevice();
ov::Core core;
CnnConfig reid_config(_modelFilePath, "Face Re-Identification");
reid_config.m_deviceName = deviceName;
reid_config.m_max_batch_size = 1; // can be increased if you want batching here too
reid_config.m_core = core;
faceRecognizer = std::make_unique<VectorCNN>(reid_config);
if (!faceRecognizer) {
_logger.LogFatal("ANSFaceRecognizer::Initialize",
"Failed to initialize OpenVINO face recognizer", __FILE__, __LINE__);
return false;
}
#endif
}
Init();
_isInitialized = true;
return true;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::Initialize", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSFaceRecognizer::LoadModel(const std::string& modelZipFilePath,
const std::string& modelZipPassword) {
try {
bool result = ANSFRBase::LoadModel(modelZipFilePath, modelZipPassword);
if (!result) return false;
if (engineType == EngineType::NVIDIA_GPU) {
std::string onnxfile = CreateFilePath(_modelFolder, "ansfacerecognizer.onnx");
if (std::filesystem::exists(onnxfile)) {
_modelFilePath = onnxfile;
_logger.LogDebug("ANSFaceRecognizer::LoadModel. Loading ansfacerecognizer weight",
_modelFilePath, __FILE__, __LINE__);
}
else {
_logger.LogError("ANSFaceRecognizer::LoadModel. ansfacerecognizer.onnx not found",
_modelFilePath, __FILE__, __LINE__);
return false;
}
}
else {
std::string faceidModel = CreateFilePath(_modelFolder, "ansfacerecognizer.onnx");
#ifdef USE_ONNX_ENGINE
if (std::filesystem::exists(faceidModel)) {
_modelFilePath = faceidModel;
_logger.LogDebug("ANSFaceRecognizer::LoadModel. Loading ansfacerecognizer weight",
_modelFilePath, __FILE__, __LINE__);
}
else {
_logger.LogError("ANSFaceRecognizer::LoadModel. ansfacerecognizer.onnx not found",
_modelFilePath, __FILE__, __LINE__);
return false;
}
#else
if (std::filesystem::exists(faceidModel)) {
_modelFilePath = faceidModel;
_logger.LogDebug("ANSFaceRecognizer::LoadModel. Loading ansfacenet weight",
_modelFilePath, __FILE__, __LINE__);
}
else {
_logger.LogError("ANSFaceRecognizer::LoadModel. ansfacenet.xml not found",
_modelFilePath, __FILE__, __LINE__);
return false;
}
#endif
}
return true;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::LoadModel", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSFaceRecognizer::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
if (!FileExist(_modelFilePath)) {
optimizedModelFolder = "";
return false;
}
if (engineType == EngineType::NVIDIA_GPU) {
optimizedModelFolder = GetParentFolder(_modelFilePath);
m_options.optBatchSize = 8;
m_options.maxBatchSize = 32;
m_options.engineFileDir = optimizedModelFolder;
m_options.precision = fp16 ? Precision::FP16 : Precision::FP32;
Engine<float> engine(m_options);
auto succ = engine.buildWithRetry(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE);
if (!succ) {
const std::string errMsg =
"Error: Unable to build the TensorRT engine. Try increasing TensorRT log severity to kVERBOSE.";
_logger.LogError("ANSFaceRecognizer::OptimizeModel", errMsg, __FILE__, __LINE__);
return false;
}
return true;
}
return true;
}
std::vector<float> ANSFaceRecognizer::Feature(const cv::Mat& image, const ANSCENTER::Object& bBox) {
std::vector<float> embeddingResult;
// Early validation before locking
if (image.empty()) {
_logger.LogError("ANSFaceRecognizer::Feature",
"Input image is empty", __FILE__, __LINE__);
return embeddingResult;
}
if (image.cols < 10 || image.rows < 10) {
return embeddingResult;
}
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
if (engineType == EngineType::NVIDIA_GPU) {
return RunArcFace(bBox.mask);
}
else {
// CPU path
#ifdef USE_ONNX_ENGINE
types::FaceContent face_content;
faceRecognizer->detect(bBox.mask, face_content);
// Explicitly move and let face_content destructor clean up
embeddingResult = std::move(face_content.embedding);
return embeddingResult;
#else
cv::Mat embedding;
faceRecognizer->Compute(bBox.mask, &embedding);
// Check if embedding is valid
if (embedding.empty() || embedding.total() == 0) {
return embeddingResult;
}
// Reserve space first to avoid reallocations
embeddingResult.reserve(embedding.total());
// Optimized conversion
if (embedding.isContinuous()) {
const float* dataPtr = embedding.ptr<float>(0);
embeddingResult.assign(dataPtr, dataPtr + embedding.total());
}
else {
embeddingResult.assign(embedding.begin<float>(), embedding.end<float>());
}
// Explicitly release OpenCV Mat
embedding.release();
return embeddingResult;
#endif
}
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::Feature", e.what(), __FILE__, __LINE__);
return std::vector<float>();
}
}
std::vector<FaceResultObject> ANSFaceRecognizer::Match(const cv::Mat& input,const std::vector<ANSCENTER::Object>& bBox,const std::map<std::string, std::string>& userDict) {
std::vector<FaceResultObject> resultObjects;
// Early validation before locking
if (input.empty()) {
_logger.LogError("ANSFaceRecognizer::Match",
"Input image is empty", __FILE__, __LINE__);
return resultObjects;
}
if (input.cols < 10 || input.rows < 10) {
return resultObjects;
}
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (!_isInitialized) {
_logger.LogError("ANSFaceRecognizer::Match",
"Model is not initialized", __FILE__, __LINE__);
return resultObjects;
}
try {
// Get embeddings (Fix #5: call unlocked — we already hold _mutex)
FR_START_TIMER(forward);
std::vector<std::vector<float>> detectedEmbeddings = ForwardUnlocked(input, bBox);
FR_END_TIMER(forward, "ForwardUnlocked (GPU embeddings)");
// Search for matches (Fix #5: call unlocked — we already hold _mutex)
FR_START_TIMER(search);
std::vector<std::string> names;
std::vector<float> sims;
std::tie(names, sims) = SearchForFacesUnlocked(detectedEmbeddings);
FR_END_TIMER(search, "SearchForFacesUnlocked (FAISS)");
if (names.empty()) {
_logger.LogError("ANSFaceRecognizer::Match",
"No face is match", __FILE__, __LINE__);
return resultObjects;
}
// Pre-reserve result space
const size_t resultCount = std::min(names.size(), bBox.size());
resultObjects.reserve(resultCount);
// Build result objects
for (size_t i = 0; i < resultCount; ++i) {
FaceResultObject resultObject;
// Determine if face is known or unknown
const bool isUnknown = (sims[i] > m_knownPersonThresh);
if (isUnknown) {
resultObject.isUnknown = true;
resultObject.userId = "0";
resultObject.userName = "Unknown";
resultObject.confidence = 1.0f;
}
else {
resultObject.isUnknown = false;
resultObject.userId = names[i];
// Safe map lookup
auto it = userDict.find(names[i]);
resultObject.userName = (it != userDict.end()) ? it->second : names[i];
resultObject.confidence = std::clamp((2.0f - sims[i]) / 2.0f, 0.0f, 1.0f);
}
resultObject.similarity = sims[i];
// Clamp bounding box to image boundaries
const float x = MAX(0.0f, bBox[i].box.x);
const float y = MAX(0.0f, bBox[i].box.y);
const float w = MIN(bBox[i].box.width, static_cast<float>(input.cols) - x);
const float h = MIN(bBox[i].box.height, static_cast<float>(input.rows) - y);
resultObject.box.x = x;
resultObject.box.y = y;
resultObject.box.width = w;
resultObject.box.height = h;
// Copy additional data
resultObject.mask = bBox[i].mask;
resultObject.cameraId = bBox[i].cameraId;
resultObject.trackId = bBox[i].trackId;
resultObject.polygon = bBox[i].polygon;
resultObject.kps = bBox[i].kps;
resultObject.extraInformation = bBox[i].extraInfo;
resultObjects.push_back(std::move(resultObject));
}
return resultObjects;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::Match", e.what(), __FILE__, __LINE__);
return resultObjects;
}
}
cv::Mat ANSFaceRecognizer::GetCropFace(const cv::Mat& input, const ANSCENTER::Object& bBox) {
try {
// Determine target size based on engine type
const int targetWidth = (engineType == EngineType::NVIDIA_GPU) ? GPU_FACE_WIDTH : CPU_FACE_WIDTH;
const int targetHeight = (engineType == EngineType::NVIDIA_GPU) ? GPU_FACE_HEIGHT : CPU_FACE_HEIGHT;
// Prepare single bounding box vector
std::vector<ANSCENTER::Object> outputBbox;
outputBbox.reserve(1);
outputBbox.push_back(bBox);
// Get cropped face
std::vector<CroppedFace> crFaces;
ANSFRHelper::GetCroppedFaces(input, outputBbox, targetWidth, targetHeight, crFaces);
if (crFaces.empty()) {
return cv::Mat();
}
return crFaces[0].faceMat;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::GetCropFace", e.what(), __FILE__, __LINE__);
return cv::Mat();
}
}
bool ANSFaceRecognizer::LoadEngine(const std::string& xmlModelPath,bool engineOptimisation) {
try {
// Early validation
if (!FileExist(xmlModelPath)) {
_logger.LogError("ANSFaceRecognizer::LoadEngine",
"Cannot find the raw XML/ONNX model file.", __FILE__, __LINE__);
return false;
}
if (engineType == EngineType::NVIDIA_GPU) {
// Initialize TensorRT engine via shared pool
if (!m_trtEngine) {
m_options.optBatchSize = 8;
m_options.maxBatchSize = 32;
m_options.calibrationBatchSize = 8;
m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
m_options.engineFileDir = _modelFolder;
m_options.precision = ANSCENTER::Precision::FP16;
m_poolKey = { xmlModelPath,
static_cast<int>(m_options.precision),
m_options.maxBatchSize };
m_trtEngine = EnginePoolManager<float>::instance().acquire(
m_poolKey, m_options, xmlModelPath,
SUB_VALS, DIV_VALS, NORMALIZE, m_maxSlotsPerGpu);
m_usingSharedPool = (m_trtEngine != nullptr);
if (!m_trtEngine) {
_logger.LogError("ANSFaceRecognizer::LoadEngine. Unable to build/load TensorRT engine.",
xmlModelPath, __FILE__, __LINE__);
return false;
}
}
return true;
}
return true;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::LoadEngine", e.what(), __FILE__, __LINE__);
return false;
}
}
std::vector<std::vector<float>> ANSFaceRecognizer::ForwardUnlocked(const cv::Mat& input, const std::vector<ANSCENTER::Object>& outputBbox)
{
// Fix #5: Caller must hold _mutex — no lock acquisition here
std::vector<std::vector<float>> detectedEmbeddings;
if (input.empty() || outputBbox.empty()) {
return detectedEmbeddings;
}
try {
detectedEmbeddings.reserve(outputBbox.size());
if (engineType == EngineType::NVIDIA_GPU) {
// Collect face ROIs — use GPU-resident masks when available (NV12 path),
// fall back to CPU masks (standard path). This avoids re-uploading faces
// that are already on GPU from the NV12 affine warp kernel.
std::vector<cv::Mat> cpuFaceROIs;
std::vector<cv::cuda::GpuMat> gpuFaceROIs;
cpuFaceROIs.reserve(outputBbox.size());
gpuFaceROIs.reserve(outputBbox.size());
for (const auto& obj : outputBbox) {
if (!obj.gpuMask.empty()) {
gpuFaceROIs.push_back(obj.gpuMask);
cpuFaceROIs.push_back(cv::Mat()); // placeholder to keep indices aligned
} else if (!obj.mask.empty()) {
gpuFaceROIs.push_back(cv::cuda::GpuMat()); // placeholder
cpuFaceROIs.push_back(obj.mask);
}
}
if (!cpuFaceROIs.empty()) {
detectedEmbeddings = RunArcFaceBatch(cpuFaceROIs, gpuFaceROIs);
}
}
else {
#ifdef USE_ONNX_ENGINE
#ifdef USE_CPU_BATCH_MODE
detectedEmbeddings.clear();
if (!outputBbox.empty()) {
// Pre-count valid faces to avoid reallocation
size_t valid_count = 0;
for (const auto& obj : outputBbox) {
if (!obj.mask.empty()) valid_count++;
}
if (valid_count == 0) return detectedEmbeddings;
std::vector<cv::Mat> face_rois;
face_rois.reserve(valid_count);
// Prepare batch (only valid faces)
for (const auto& obj : outputBbox) {
if (obj.mask.empty()) continue;
if (obj.mask.cols != CPU_FACE_WIDTH || obj.mask.rows != CPU_FACE_HEIGHT) {
face_rois.emplace_back();
cv::resize(obj.mask, face_rois.back(), cv::Size(CPU_FACE_WIDTH, CPU_FACE_HEIGHT));
}
else {
face_rois.push_back(obj.mask);
}
}
// Batch inference
std::vector<types::FaceContent> face_contents;
faceRecognizer->detectBatch(face_rois, face_contents);
// Extract embeddings
detectedEmbeddings.reserve(face_contents.size());
for (auto& content : face_contents) {
if (!content.embedding.empty()) {
detectedEmbeddings.push_back(std::move(content.embedding));
}
}
}
#else
// Using single detection in stead of batch (less efficient)
detectedEmbeddings.clear();
detectedEmbeddings.reserve(outputBbox.size());
if (!outputBbox.empty()) {
// Pre-allocate frame buffer with target size
cv::Mat frame(CPU_FACE_HEIGHT, CPU_FACE_WIDTH, CV_8UC3);
types::FaceContent face_content;
for (const auto& obj : outputBbox) {
if (obj.mask.empty()) continue;
// Resize into pre-allocated buffer
if (obj.mask.cols != CPU_FACE_WIDTH || obj.mask.rows != CPU_FACE_HEIGHT) {
cv::resize(obj.mask, frame, cv::Size(CPU_FACE_WIDTH, CPU_FACE_HEIGHT));
}
else {
obj.mask.copyTo(frame); // Copy into buffer
}
// Detect
faceRecognizer->detect(frame, face_content);
// Move embedding
if (!face_content.embedding.empty()) {
detectedEmbeddings.push_back(std::move(face_content.embedding));
face_content.embedding.clear();
}
}
}
#endif
#else
// OpenVINO path - Optimized version
std::vector<cv::Mat> face_rois;
face_rois.reserve(outputBbox.size());
cv::Mat resized_buffer;
for (const auto& obj : outputBbox) {
if (obj.mask.empty()) continue;
if (obj.mask.cols != CPU_FACE_WIDTH || obj.mask.rows != CPU_FACE_HEIGHT) {
cv::resize(obj.mask, resized_buffer, cv::Size(CPU_FACE_WIDTH, CPU_FACE_HEIGHT));
face_rois.push_back(resized_buffer.clone());
}
else {
face_rois.push_back(obj.mask);
}
}
if (!face_rois.empty()) {
std::vector<cv::Mat> embeddings;
embeddings.reserve(face_rois.size());
faceRecognizer->Compute(face_rois, &embeddings);
detectedEmbeddings.reserve(embeddings.size());
// Process embeddings efficiently
for (auto& embMat : embeddings) {
if (embMat.empty()) continue;
// Direct emplace_back construction
if (embMat.isContinuous()) {
const float* dataPtr = embMat.ptr<float>(0);
detectedEmbeddings.emplace_back(dataPtr, dataPtr + embMat.total());
}
else {
detectedEmbeddings.emplace_back(embMat.begin<float>(), embMat.end<float>());
}
}
}
#endif
}
return detectedEmbeddings;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::ForwardUnlocked", e.what(), __FILE__, __LINE__);
return std::vector<std::vector<float>>();
}
}
ANSFaceRecognizer::~ANSFaceRecognizer() noexcept {
try {
Destroy();
}
catch (const std::exception& e) {
// Log but don't throw - exceptions in destructors are dangerous
_logger.LogError("ANSFaceRecognizer::~ANSFaceRecognizer",
e.what(), __FILE__, __LINE__);
}
catch (...) {
// Catch all exceptions to prevent std::terminate
_logger.LogError("ANSFaceRecognizer::~ANSFaceRecognizer",
"Unknown exception during destruction", __FILE__, __LINE__);
}
}
void ANSFaceRecognizer::L2NormalizeInPlace(std::vector<float>& vec) {
// Fix #7: In-place normalization — zero allocations
float norm = 0.0f;
#pragma omp simd reduction(+:norm)
for (size_t i = 0; i < vec.size(); ++i) {
norm += vec[i] * vec[i];
}
norm = std::sqrt(norm);
if (norm > 0.0f) {
const float inv_norm = 1.0f / norm;
#pragma omp simd
for (size_t i = 0; i < vec.size(); ++i) {
vec[i] *= inv_norm;
}
}
}
std::vector<float> ANSFaceRecognizer::RunArcFace(const cv::Mat& inputImage) {
std::vector<float> embedding;
// Early validation before locking
if (inputImage.empty()) {
_logger.LogError("ANSFaceRecognizer::RunArcFace",
"Input image is empty", __FILE__, __LINE__);
return embedding;
}
if (inputImage.cols < 10 || inputImage.rows < 10) {
_logger.LogError("ANSFaceRecognizer::RunArcFace",
"Input image is too small", __FILE__, __LINE__);
return embedding;
}
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (!_isInitialized) {
_logger.LogError("ANSFaceRecognizer::RunArcFace",
"Model is not initialized", __FILE__, __LINE__);
return embedding;
}
try {
// Fix #8: Use pooled GPU buffers to avoid per-frame allocation
m_gpuImg.upload(inputImage, m_gpuStream);
// Handle grayscale conversion on GPU
if (inputImage.channels() == 1) {
cv::cuda::cvtColor(m_gpuImg, m_gpuRgb, cv::COLOR_GRAY2BGR, 0, m_gpuStream);
std::swap(m_gpuImg, m_gpuRgb);
}
// Resize on GPU if needed
if (inputImage.cols != GPU_FACE_WIDTH || inputImage.rows != GPU_FACE_HEIGHT) {
cv::cuda::resize(m_gpuImg, m_gpuResized, cv::Size(GPU_FACE_WIDTH, GPU_FACE_HEIGHT),
0, 0, cv::INTER_LINEAR, m_gpuStream);
}
else {
m_gpuResized = m_gpuImg;
}
// BGR to RGB conversion on GPU
cv::cuda::cvtColor(m_gpuResized, m_gpuRgb, cv::COLOR_BGR2RGB, 0, m_gpuStream);
// Prepare inference inputs
std::vector<cv::cuda::GpuMat> inputVec;
inputVec.emplace_back(m_gpuRgb);
std::vector<std::vector<cv::cuda::GpuMat>> inputs;
inputs.emplace_back(std::move(inputVec));
// Run inference
std::vector<std::vector<std::vector<float>>> featureVectors;
bool succ = m_trtEngine->runInference(inputs, featureVectors);
// Synchronize stream
m_gpuStream.waitForCompletion();
if (!succ) {
_logger.LogError("ANSFaceRecognizer::RunArcFace",
"Failed to run inference.", __FILE__, __LINE__);
return embedding;
}
if (featureVectors.empty() || featureVectors[0].empty()) {
_logger.LogError("ANSFaceRecognizer::RunArcFace",
"Feature vectors are empty.", __FILE__, __LINE__);
return embedding;
}
embedding = std::move(featureVectors[0][0]);
cv::normalize(embedding, embedding); // l2 normalize
return embedding;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::RunArcFace", e.what(), __FILE__, __LINE__);
return embedding;
}
}
std::vector<std::vector<float>> ANSFaceRecognizer::RunArcFaceBatch(
const std::vector<cv::Mat>& faceROIs,
const std::vector<cv::cuda::GpuMat>& gpuFaceROIs)
{
std::vector<std::vector<float>> embeddings;
try {
// Early validation checks
if (!_isInitialized) {
_logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
"Model is not initialized", __FILE__, __LINE__);
return embeddings;
}
if (!m_trtEngine) {
_logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
"TensorRT engine not initialized", __FILE__, __LINE__);
return embeddings;
}
if (faceROIs.empty()) {
return embeddings;
}
const auto& inputDims = m_trtEngine->getInputDims();
if (inputDims.empty() || inputDims[0].nbDims < 3) {
_logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
"Invalid engine input dims", __FILE__, __LINE__);
return embeddings;
}
// Use actual engine batch limit (profile max), not the configured maxBatchSize
// which may be larger than what the engine was built with.
const size_t maxBatch = static_cast<size_t>(
m_trtEngine->getOptions().maxBatchSize > 0
? m_trtEngine->getOptions().maxBatchSize
: m_options.maxBatchSize);
embeddings.reserve(faceROIs.size());
const cv::Size targetSize(GPU_FACE_WIDTH, GPU_FACE_HEIGHT);
// Process in chunks of maxBatchSize to avoid exceeding engine limits
for (size_t chunkStart = 0; chunkStart < faceROIs.size(); chunkStart += maxBatch) {
const size_t chunkEnd = std::min(chunkStart + maxBatch, faceROIs.size());
FR_START_TIMER(gpu_preproc);
std::vector<cv::cuda::GpuMat> batchGpu;
batchGpu.reserve(chunkEnd - chunkStart);
for (size_t i = chunkStart; i < chunkEnd; i++) {
cv::cuda::GpuMat d_img;
// Use GPU-resident face if available (NV12 affine warp path),
// otherwise upload from CPU (standard path)
if (i < gpuFaceROIs.size() && !gpuFaceROIs[i].empty()) {
d_img = gpuFaceROIs[i]; // already on GPU — skip upload
} else {
const auto& roi = faceROIs[i];
if (roi.empty()) continue;
d_img.upload(roi, m_gpuStream);
if (roi.channels() == 1) {
cv::cuda::GpuMat d_bgr;
cv::cuda::cvtColor(d_img, d_bgr, cv::COLOR_GRAY2BGR, 0, m_gpuStream);
d_img = d_bgr;
}
}
if (d_img.cols != GPU_FACE_WIDTH || d_img.rows != GPU_FACE_HEIGHT) {
cv::cuda::GpuMat d_resized;
cv::cuda::resize(d_img, d_resized, targetSize, 0, 0, cv::INTER_LINEAR, m_gpuStream);
d_img = d_resized;
}
cv::cuda::GpuMat d_rgb;
cv::cuda::cvtColor(d_img, d_rgb, cv::COLOR_BGR2RGB, 0, m_gpuStream);
batchGpu.emplace_back(std::move(d_rgb));
}
FR_END_TIMER(gpu_preproc, "RunArcFaceBatch GPU preprocess (" + std::to_string(batchGpu.size()) + " faces)");
if (batchGpu.empty()) continue;
const size_t actualCount = batchGpu.size();
std::vector<std::vector<cv::cuda::GpuMat>> inputs;
inputs.emplace_back(std::move(batchGpu));
FR_START_TIMER(trt_infer);
std::vector<std::vector<std::vector<float>>> featureVectors;
bool succ = m_trtEngine->runInference(inputs, featureVectors);
m_gpuStream.waitForCompletion();
FR_END_TIMER(trt_infer, "RunArcFaceBatch TRT inference (batch=" + std::to_string(actualCount) + ")");
if (!succ) {
_logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
"runInference failed for chunk starting at " + std::to_string(chunkStart),
__FILE__, __LINE__);
continue; // Try remaining chunks instead of aborting
}
if (featureVectors.empty()) continue;
for (size_t j = 0; j < featureVectors.size(); j++) {
if (!featureVectors[j].empty() && !featureVectors[j][0].empty()) {
auto emb = std::move(featureVectors[j][0]);
cv::normalize(emb, emb);
embeddings.emplace_back(std::move(emb));
}
}
}
return embeddings;
}
catch (const cv::Exception& e) {
_logger.LogFatal("ANSFaceRecognizer::RunArcFaceBatch",
"OpenCV error: " + std::string(e.what()), __FILE__, __LINE__);
return embeddings;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::RunArcFaceBatch",
e.what(), __FILE__, __LINE__);
return embeddings;
}
}
void ANSFaceRecognizer::Init() {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
_faceIdToUserId.clear();
_nextFaceId = 0;
// Always create a fresh FAISS index
if (faiss_index) {
faiss_index->reset();
faiss_index.reset();
}
if (engineType == EngineType::NVIDIA_GPU) {
// GPU mode: Initialize GPU resources (shared across index rebuilds)
if (!m_gpuResources) {
m_gpuResources = std::make_shared<faiss::gpu::StandardGpuResources>();
}
auto gpuIndex = new faiss::gpu::GpuIndexFlatIP(
m_gpuResources.get(), FACE_EMBEDDING_SIZE);
faiss_index = std::make_shared<faiss::IndexIDMap>(gpuIndex);
}
else {
// CPU mode: Use CPU IndexFlatIP
auto cpuIndex = new faiss::IndexFlatIP(FACE_EMBEDDING_SIZE);
faiss_index = std::make_shared<faiss::IndexIDMap>(cpuIndex);
}
faiss_index->own_fields = true;
_isInitialized = true;
}
catch (const std::exception& e) {
_isInitialized = false;
_logger.LogFatal("ANSFaceRecognizer::Init",
std::string(e.what()), __FILE__, __LINE__);
}
catch (...) {
_isInitialized = false;
_logger.LogFatal("ANSFaceRecognizer::Init",
"Unknown exception occurred", __FILE__, __LINE__);
}
}
bool ANSFaceRecognizer::Destroy() {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
// Mark as uninitialized first to prevent new operations
_isInitialized = false;
// Clear GPU/TensorRT resources first (may fail, so do early)
if (m_usingSharedPool) {
EnginePoolManager<float>::instance().release(m_poolKey);
m_trtEngine.reset();
m_usingSharedPool = false;
}
else if (m_trtEngine) {
m_trtEngine.reset();
}
// Clear FAISS index BEFORE GPU resources (index lives on GPU)
if (faiss_index) {
faiss_index->reset(); // Clear index data
faiss_index.reset(); // Delete the object (frees GPU memory)
}
// Release FAISS GPU resources after index is destroyed
if (m_gpuResources) {
m_gpuResources.reset();
}
// Clear face recognizer
if (faceRecognizer) {
faceRecognizer.reset();
}
// Clear face ID mapping last (safest operation)
_faceIdToUserId.clear();
_nextFaceId = 0;
return true;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::Destroy", e.what(), __FILE__, __LINE__);
// Even on failure, mark as uninitialized
_isInitialized = false;
return false;
}
}
void ANSFaceRecognizer::AddEmbedding(const std::string& className,float embedding[]) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
if (!faiss_index) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding",
"Search_index is not initialized.", __FILE__, __LINE__);
return;
}
// Fix #1: L2-normalize before storing so L2²/2 = 1-cos(θ)
std::vector<float> normEmb(embedding, embedding + FACE_EMBEDDING_SIZE);
L2NormalizeInPlace(normEmb);
// Use sequential ID for backward-compat path (initial load)
faiss::idx_t id = _nextFaceId++;
_faceIdToUserId[id] = className;
faiss_index->add_with_ids(1, normEmb.data(), &id);
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding", e.what(), __FILE__, __LINE__);
}
}
void ANSFaceRecognizer::AddEmbedding(const std::string& className,const std::vector<float>& embedding) {
// Early validation before locking (non-shared state)
if (embedding.size() != FACE_EMBEDDING_SIZE) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding",
"Embedding size != 512.", __FILE__, __LINE__);
return;
}
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
if (!faiss_index) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding",
"Search_index is not initialized.", __FILE__, __LINE__);
return;
}
// Fix #1: L2-normalize before storing so L2²/2 = 1-cos(θ)
std::vector<float> normEmb = embedding;
L2NormalizeInPlace(normEmb);
faiss::idx_t id = _nextFaceId++;
_faceIdToUserId[id] = className;
faiss_index->add_with_ids(1, normEmb.data(), &id);
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding", e.what(), __FILE__, __LINE__);
}
}
void ANSFaceRecognizer::SwapIndex(
std::shared_ptr<faiss::IndexIDMap> newIndex,
std::unordered_map<faiss::idx_t, std::string>&& newFaceIdToUserId)
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
faiss_index = std::move(newIndex);
_faceIdToUserId = std::move(newFaceIdToUserId);
_nextFaceId = static_cast<faiss::idx_t>(_faceIdToUserId.size());
_logger.LogDebug("ANSFaceRecognizer::SwapIndex",
"Index swapped successfully. Total embeddings: " +
std::to_string(faiss_index ? faiss_index->ntotal : 0),
__FILE__, __LINE__);
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::SwapIndex",
e.what(), __FILE__, __LINE__);
}
}
std::tuple<std::vector<std::string>, std::vector<float>>
ANSFaceRecognizer::SearchForFacesUnlocked(const std::vector<std::vector<float>>& detectedEmbeddings)
{
// Fix #1 + #5: No lock (caller holds _mutex), no reconstruct(),
// use L2²/2 directly. All stored + query embeddings are L2-normalized,
// so L2²(a,b)/2 = 1 - cos(a,b) = similarity.
std::vector<std::string> detectedUsers;
std::vector<float> simValues;
try {
const size_t nq = detectedEmbeddings.size();
if (nq == 0 || _faceIdToUserId.empty() || !faiss_index || faiss_index->ntotal == 0) {
detectedUsers.assign(nq, "0");
simValues.assign(nq, 2.0f);
return std::make_tuple(detectedUsers, simValues);
}
const int k = 10;
const int d = static_cast<int>(detectedEmbeddings[0].size());
FR_START_TIMER(search_detail);
// Build contiguous query matrix (nq × d) — embeddings are already
// L2-normalized by RunArcFaceBatch (cv::normalize), so just copy
FR_START_TIMER(build_matrix);
std::vector<float> queryMatrix(nq * d);
for (size_t i = 0; i < nq; i++) {
std::copy(detectedEmbeddings[i].begin(), detectedEmbeddings[i].end(),
queryMatrix.begin() + i * d);
}
FR_END_TIMER(build_matrix, "Build query matrix (nq=" + std::to_string(nq) + ")");
// Single batched FAISS search — uses BLAS/MKL for matrix multiply
// when nq > 1, which is dramatically faster than nq individual searches
FR_START_TIMER(batch_search);
std::vector<float> allDistances(nq * k);
std::vector<faiss::idx_t> allIndices(nq * k);
faiss_index->search(static_cast<faiss::idx_t>(nq), queryMatrix.data(), k,
allDistances.data(), allIndices.data());
FR_END_TIMER(batch_search, "FAISS batch search (nq=" + std::to_string(nq) + ")");
// Process results: find best matching user for each query
detectedUsers.reserve(nq);
simValues.reserve(nq);
for (size_t q = 0; q < nq; q++) {
float bestSim = 2.0f;
std::string bestUser = "0";
const faiss::idx_t* qIndices = allIndices.data() + q * k;
const float* qDistances = allDistances.data() + q * k;
for (int i = 0; i < k; ++i) {
faiss::idx_t id = qIndices[i];
if (id < 0) continue;
auto it = _faceIdToUserId.find(id);
if (it == _faceIdToUserId.end()) continue;
// IP returns dot(a,b); for unit vectors: 1 - dot = 1 - cos(θ) = same as L2²/2
float similarity = 1.0f - qDistances[i];
if (similarity < bestSim) {
bestSim = similarity;
bestUser = it->second;
}
}
detectedUsers.push_back(std::move(bestUser));
simValues.push_back(bestSim);
}
FR_END_TIMER(search_detail, "SearchForFacesUnlocked total");
return std::make_tuple(detectedUsers, simValues);
}
catch (const std::exception& e) {
detectedUsers.assign(1, "0000");
simValues.assign(1, 2.0f);
_logger.LogFatal("ANSFaceRecognizer::SearchForFacesUnlocked",
e.what(), __FILE__, __LINE__);
return std::make_tuple(detectedUsers, simValues);
}
}
//std::tuple<std::vector<std::string>, std::vector<float>>
// ANSFaceRecognizer::SearchForFaces(
// const std::vector<std::vector<float>>& detectedEmbeddings)
//{
// std::lock_guard<std::recursive_mutex> lock(_mutex);
// std::vector<std::string> detectedUsers;
// std::vector<float> simValues;
// try {
// if (!classNames.empty() && faiss_index && faiss_index->ntotal > 0) {
// const int k = 10;
// for (const auto& embedding : detectedEmbeddings) {
// std::vector<faiss::idx_t> indices(k);
// std::vector<float> distances(k);
// faiss_index->search(1, embedding.data(), k,
// distances.data(), indices.data());
// float bestSim = 2.0f;
// std::string bestUser = "0";
// for (int i = 0; i < k; ++i) {
// faiss::idx_t id = indices[i];
// if (id < 0 || id >= static_cast<faiss::idx_t>(classNames.size())) {
// continue;
// }
// std::vector<float> matchEmbedding(faiss_index->d);
// faiss_index->reconstruct(id, matchEmbedding.data());
// float cosine = CosineSimilarity(embedding, matchEmbedding, true);
// float similarity = 1.0f - cosine; // [0,2], 0 best
// if (similarity < bestSim) {
// bestSim = similarity;
// bestUser = classNames.at(id);
// }
// }
// detectedUsers.push_back(bestUser);
// simValues.push_back(bestSim);
// }
// }
// else {
// detectedUsers.assign(detectedEmbeddings.size(), "0");
// simValues.assign(detectedEmbeddings.size(), 2.0f);
// }
// return std::make_tuple(detectedUsers, simValues);
// }
// catch (const std::exception& e) {
// detectedUsers.assign(1, "0000");
// simValues.assign(1, 2.0f);
// _logger.LogFatal("ANSFaceRecognizer::SearchForFaces",
// e.what(), __FILE__, __LINE__);
// return std::make_tuple(detectedUsers, simValues);
// }
//}
}
// Old method
//std::tuple<std::vector<std::string>, std::vector<float>> ANSFaceRecognizer::SearchForFaces(std::vector<std::vector<float>> detectedEmbeddings) {
// std::lock_guard<std::recursive_mutex> lock(_mutex);
// std::vector<std::string> detectedUsers;
// std::vector<float> simValues;
// try {
// // Check if there are class names available
// if (!classNames.empty()) {
// const int k = 3; // Number of nearest neighbors to retrieve
// // Process each detected embedding
// for (const auto& embedding : detectedEmbeddings) {
// // Prepare vectors to hold search results
// std::vector<faiss::idx_t> indices(k);
// std::vector<float> distances(k);
// // Perform the search for the k nearest neighbors
// faiss_index->search(1, embedding.data(), k, distances.data(), indices.data());
// // Find the index with the minimum distance (i.e., closest neighbor)
// auto min_it = std::min_element(distances.begin(), distances.end());
// int best_index = std::distance(distances.begin(), min_it);
// // Map the index to the corresponding class name and calculate similarity
// std::vector<float> matchEmbedding(faiss_index->d);
// faiss_index->reconstruct(indices[best_index], matchEmbedding.data());
// float similarity = 1 - CosineSimilarity(embedding, matchEmbedding, true);
// detectedUsers.push_back(classNames.at(indices[best_index]));
// simValues.push_back(std::abs(similarity));
// }
// }
// else {
// // If no class names are available, mark all users as "unknown"
// detectedUsers.assign(detectedEmbeddings.size(), "0000");
// simValues.assign(detectedEmbeddings.size(), 1.0f);
// }
// return std::make_tuple(detectedUsers, simValues);
// }
// catch (const std::exception& e) {
// // Log the error and return default values for the failed search
// detectedUsers.assign(1, "0000");
// simValues.assign(1, 1.0f);
// this->_logger.LogFatal("ANSFaceRecognizer::SearchForFaces", e.what(), __FILE__, __LINE__);
// return std::make_tuple(detectedUsers, simValues);
// }
//}
//std::tuple<std::vector<std::string>, std::vector<float>> ANSFaceRecognizer::SearchForFaces(const std::vector<std::vector<float>>& detectedEmbeddings)
//{
// std::lock_guard<std::recursive_mutex> lock(_mutex);
// std::vector<std::string> detectedUsers;
// std::vector<float> simValues;
// try {
// if (!classNames.empty() && faiss_index && faiss_index->ntotal > 0) {
// const int k = 3;
// for (const auto& embedding : detectedEmbeddings) {
// std::vector<faiss::idx_t> indices(k);
// std::vector<float> distances(k);
// faiss_index->search(1, embedding.data(), k, distances.data(), indices.data());
// // Find best (smallest distance) match
// auto min_it = std::min_element(distances.begin(), distances.end());
// int best_index = std::distance(distances.begin(), min_it);
// faiss::idx_t best_id = indices[best_index];
// if (best_id < 0 || best_id >= static_cast<faiss::idx_t>(classNames.size())) {
// detectedUsers.push_back("0000");
// simValues.push_back(1.0f);
// continue;
// }
// // Reconstruct matched embedding
// std::vector<float> matchEmbedding(faiss_index->d);
// faiss_index->reconstruct(best_id, matchEmbedding.data());
// float similarity = CosineSimilarity(embedding, matchEmbedding, true);
// detectedUsers.push_back(classNames.at(best_id));
// simValues.push_back(similarity);
// }
// }
// else {
// detectedUsers.assign(detectedEmbeddings.size(), "0000");
// simValues.assign(detectedEmbeddings.size(), 1.0f);
// }
// return std::make_tuple(detectedUsers, simValues);
// }
// catch (const std::exception& e) {
// detectedUsers.assign(1, "0000");
// simValues.assign(1, 1.0f);
// this->_logger.LogFatal("ANSFaceRecognizer::SearchForFaces", e.what(), __FILE__, __LINE__);
// return std::make_tuple(detectedUsers, simValues);
// }
//}
//space = new hnswlib::InnerProductSpace(FACE_EMBEDDING_SIZE); // it should be in init
//void ANSFaceRecognizer::Init()
//{
// try {
// classNames.clear();
// int maxElements = 100000; // Maximum number of elements, should be known beforehand
// int M = 16; // Tightly connected with internal dimensionality of the data
// int efConstruction = 200; // Controls index search speed/build speed tradeoff
// this->alg_hnsw = std::make_unique<hnswlib::HierarchicalNSW<float>>(space, maxElements, M, efConstruction);
// }
// catch (std::exception& e) {
// this->_logger.LogFatal("ANSFaceRecognizer::Init", e.what(), __FILE__, __LINE__);
// }
//}
/*
* void ANSFaceRecognizer::Init() {
try {
// Clear class names and reset resources
classNames.clear();
// Define parameters
constexpr int maxElements = 100000; // Maximum number of elements
constexpr int M = 16; // Internal dimensionality of the data
constexpr int efConstruction = 200; // Controls index search/build tradeoff
// Release and reinitialize the space
if (space) {
delete space;
space = nullptr;
}
space = new hnswlib::InnerProductSpace(FACE_EMBEDDING_SIZE);
// Reinitialize the HNSW algorithm
alg_hnsw = std::make_unique<hnswlib::HierarchicalNSW<float>>(space, maxElements, M, efConstruction);
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::Init", e.what(), __FILE__, __LINE__);
}
catch (...) {
_logger.LogFatal("ANSFaceRecognizer::Init", "Unknown exception occurred", __FILE__, __LINE__);
}
}
bool ANSFaceRecognizer::Destroy() {
try {
if (space) {
delete space;
space = nullptr;
}
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSFaceRecognizer::Destroy", e.what(), __FILE__, __LINE__);
return false;
}
}
void ANSFaceRecognizer::AddEmbedding(const std::string& className, float embedding[]) {
try {
classNames.push_back(className);
int index = classNames.size() - 1;
int outputDim = 512;
std::vector<float> vec(embedding, embedding + outputDim);
std::vector<float> normalizedVector = NormalizeVector(vec);
alg_hnsw->addPoint(normalizedVector.data(), (size_t)index);
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSFaceRecognizer::AddEmbedding", e.what(), __FILE__, __LINE__);
}
}
void ANSFaceRecognizer::AddEmbedding(const std::string& className, const std::vector<float>& embedding) {
try {
classNames.push_back(className);
int index = classNames.size() - 1;
std::vector<float> normalizedVector = NormalizeVector(embedding);
alg_hnsw->addPoint(normalizedVector.data(), index);
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSFaceRecognizer::AddEmbedding", e.what(), __FILE__, __LINE__);
}
}
std::tuple<std::vector<std::string>, std::vector<float>> ANSFaceRecognizer::SearchForFaces(std::vector<std::vector<float>> detectedEmbeddings) {
std::vector<std::string> detectedUsers;
std::vector<float> distanceValues;
try {
if (classNames.size() > 0) {
detectedUsers.clear();
for (int i = 0; i < detectedEmbeddings.size(); i++) {
NormalizeVector(detectedEmbeddings[i]);
std::priority_queue<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnn(detectedEmbeddings[i].data(), 1);
hnswlib::labeltype label = result.top().second;
if (label >= 0) {
detectedUsers.push_back(classNames.at(label));
distanceValues.push_back(result.top().first);
}
}
return std::make_tuple(detectedUsers, distanceValues);
}
else {
detectedUsers.clear();
for (int i = 0; i < detectedEmbeddings.size(); i++) {
detectedUsers.push_back("0000");
distanceValues.push_back(1);
}
return std::make_tuple(detectedUsers, distanceValues);
}
}
catch (std::exception& e) {
detectedUsers.push_back("0000");
distanceValues.push_back(1);
this->_logger.LogFatal("ANSArcFace100::SearchForFaces", e.what(), __FILE__, __LINE__);
return std::make_tuple(detectedUsers, distanceValues);
}
}
*/
// Private methods, can be replacable
/* std::vector<std::vector<float>> ANSFaceRecognizer::Forward(const cv::Mat& input, std::vector<ANSCENTER::Object> outputBbox) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
std::vector<std::vector<float>> detectedEmbeddings;
if (input.empty()) {
this->_logger.LogError("ANSFaceRecognizer::Forward", "Input image is empty", __FILE__, __LINE__);
return detectedEmbeddings;
}
try {
if (engineType == EngineType::NVIDIA_GPU) {
detectedEmbeddings.clear();
if (outputBbox.size() > 0) {
for (int i = 0; i < outputBbox.size(); i++) {
cv::Mat faceROI = outputBbox[i].mask.clone();
if (faceROI.empty()) continue;
if ((faceROI.cols != GPU_FACE_WIDTH) && (faceROI.rows != GPU_FACE_HEIGHT))
{
cv::resize(faceROI, faceROI, cv::Size(GPU_FACE_WIDTH, GPU_FACE_HEIGHT));
}
std::vector<float> embedding = RunArcFace(faceROI);
detectedEmbeddings.push_back(embedding);
faceROI.release();
}
}
}
else {
std::vector<cv::Mat> embeddings;
std::vector<cv::Mat> face_rois;
detectedEmbeddings.clear();
if (outputBbox.size() > 0) {
for (int i = 0; i < outputBbox.size(); i++) {
std::vector<float> embeddingRs;
cv::Mat frame = outputBbox[i].mask.clone();
if (frame.empty()) continue;
if ((frame.cols != CPU_FACE_WIDTH) && (frame.rows != CPU_FACE_HEIGHT))
{
cv::resize(frame, frame, cv::Size(CPU_FACE_WIDTH, CPU_FACE_HEIGHT));
}
#ifdef USE_ONNX_ENGINE
types::FaceContent face_content;
faceRecognizer->detect(frame, face_content);
embeddingRs = face_content.embedding;
#else
face_rois.clear();
embeddings.clear();
face_rois.push_back(frame);
faceRecognizer->Compute(face_rois, &embeddings);
embeddingRs.assign(embeddings[0].begin<float>(), embeddings[0].end<float>());
#endif
detectedEmbeddings.push_back(embeddingRs);
frame.release();
}
}
}
return detectedEmbeddings;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSFaceRecognizer::Forward", e.what(), __FILE__, __LINE__);
return detectedEmbeddings;
}
} */
// Before refactor (working version)
/*std::vector<float> ANSFaceRecognizer::Feature(const cv::Mat& image,
ANSCENTER::Object bBox) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
std::vector<float> embeddingResult;
if (image.empty()) {
_logger.LogError("ANSFaceRecognizer::Feature",
"Input image is empty", __FILE__, __LINE__);
return embeddingResult;
}
try {
if (engineType == EngineType::NVIDIA_GPU) {
embeddingResult = RunArcFace(bBox.mask);
return embeddingResult;
}
else {
if (image.cols < 10 || image.rows < 10) return embeddingResult;
cv::Mat frame = bBox.mask.clone();
#ifdef USE_ONNX_ENGINE
types::FaceContent face_content;
faceRecognizer->detect(frame, face_content);
embeddingResult = face_content.embedding;
#else
cv::Mat embedding;
faceRecognizer->Compute(frame, &embedding);
embeddingResult.assign(embedding.begin<float>(), embedding.end<float>());
#endif
return embeddingResult;
}
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::Feature", e.what(), __FILE__, __LINE__);
embeddingResult.clear();
return embeddingResult;
}
}*/
/*std::vector<FaceResultObject> ANSFaceRecognizer::Match(
const cv::Mat& input,
std::vector<ANSCENTER::Object> bBox,
std::map<std::string, std::string> userDict) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
std::vector<FaceResultObject> resultObjects;
if (input.empty()) {
_logger.LogError("ANSFaceRecognizer::Match",
"Input image is empty", __FILE__, __LINE__);
return resultObjects;
}
if (!_isInitialized) {
_logger.LogError("ANSFaceRecognizer::Match",
"Model is not initialized", __FILE__, __LINE__);
return resultObjects;
}
try {
if (input.cols < 10 || input.rows < 10) return resultObjects;
std::vector<std::vector<float>> detectedEmbeddings = Forward(input, bBox);
std::vector<std::string> names;
std::vector<float> sims;
std::tie(names, sims) = SearchForFaces(detectedEmbeddings);
if (names.empty()) {
_logger.LogError("ANSFaceRecognizer::Match",
"No face is match", __FILE__, __LINE__);
return resultObjects;
}
for (size_t i = 0; i < names.size() && i < bBox.size(); ++i) {
FaceResultObject resultObject;
bool isUnknown = (sims[i] > m_knownPersonThresh);
if (isUnknown) {
resultObject.isUnknown = true;
resultObject.userId = "0";
resultObject.userName = "Unknown";
resultObject.confidence = 1.0f;
}
else {
resultObject.isUnknown = false;
resultObject.userId = names[i];
resultObject.userName = userDict[names[i]];
resultObject.confidence =
std::clamp((2.0f - sims[i]) / 2.0f, 0.0f, 1.0f);
}
resultObject.similarity = sims[i];
float x = bBox[i].box.x;
float y = bBox[i].box.y;
float w = bBox[i].box.width;
float h = bBox[i].box.height;
x = std::max(0.0f, x);
y = std::max(0.0f, y);
w = std::min(w, input.cols - x);
h = std::min(h, input.rows - y);
resultObject.box.x = x;
resultObject.box.y = y;
resultObject.box.width = w;
resultObject.box.height = h;
resultObject.mask = bBox[i].mask;
resultObject.cameraId = bBox[i].cameraId;
resultObject.trackId = bBox[i].trackId;
resultObject.polygon = bBox[i].polygon;
resultObject.kps = bBox[i].kps;
resultObject.extraInformation = bBox[i].extraInfo;
resultObjects.push_back(resultObject);
}
return resultObjects;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::Match", e.what(), __FILE__, __LINE__);
return resultObjects;
}
}*/
//cv::Mat ANSFaceRecognizer::GetCropFace(const cv::Mat& input,
// ANSCENTER::Object bBox) {
// try {
// std::vector<ANSCENTER::Object> outputBbox;
// outputBbox.push_back(bBox);
// std::vector<CroppedFace> crFaces;
// crFaces.clear();
// if (engineType == EngineType::NVIDIA_GPU) {
// ANSFRHelper::GetCroppedFaces(input, outputBbox, GPU_FACE_WIDTH, GPU_FACE_HEIGHT, crFaces);
// }
// else {
// ANSFRHelper::GetCroppedFaces(input, outputBbox, CPU_FACE_WIDTH, CPU_FACE_HEIGHT, crFaces);
// }
// if (crFaces.empty()) return cv::Mat();
// return crFaces[0].faceMat;
// }
// catch (const std::exception& e) {
// _logger.LogFatal("ANSFaceRecognizer::GetCropFace", e.what(), __FILE__, __LINE__);
// return cv::Mat();
// }
//}
/*void ANSFaceRecognizer::AddEmbedding(const std::string& className,
float embedding[]) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
if (!faiss_index) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding",
"Search_index is not initialized.", __FILE__, __LINE__);
return;
}
std::vector<float> vec(embedding, embedding + FACE_EMBEDDING_SIZE);
if (vec.size() != FACE_EMBEDDING_SIZE) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding",
"Embedding size != 512.", __FILE__, __LINE__);
return;
}
classNames.push_back(className);
faiss_index->add(1, vec.data());
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding", e.what(), __FILE__, __LINE__);
}
}*/
/*void ANSFaceRecognizer::AddEmbedding(const std::string& className,
const std::vector<float>& embedding) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
if (!faiss_index) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding",
"Search_index is not initialized.", __FILE__, __LINE__);
return;
}
if (embedding.size() != FACE_EMBEDDING_SIZE) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding",
"Embedding size != 512.", __FILE__, __LINE__);
return;
}
classNames.push_back(className);
faiss_index->add(1, embedding.data());
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::AddEmbedding", e.what(), __FILE__, __LINE__);
}
}*/
//void ANSFaceRecognizer::Init() {
// std::lock_guard<std::recursive_mutex> lock(_mutex);
// try {
// classNames.clear();
// if (faiss_index) {
// faiss_index->reset();
// }
// else {
// faiss_index = std::make_shared<faiss::IndexFlatL2>(FACE_EMBEDDING_SIZE);
// }
// }
// catch (const std::exception& e) {
// _logger.LogFatal("ANSFaceRecognizer::Init", e.what(), __FILE__, __LINE__);
// }
// catch (...) {
// _logger.LogFatal("ANSFaceRecognizer::Init", "Unknown exception occurred", __FILE__, __LINE__);
// }
//}
//std::vector<float> ANSFaceRecognizer::RunArcFace(const cv::Mat& inputImage) {
// std::lock_guard<std::recursive_mutex> lock(_mutex);
// std::vector<float> embedding;
// if (inputImage.empty()) {
// _logger.LogError("ANSFaceRecognizer::RunArcFace",
// "Input image is empty", __FILE__, __LINE__);
// return embedding;
// }
// if (!_isInitialized) {
// _logger.LogError("ANSFaceRecognizer::RunArcFace",
// "Model is not initialized", __FILE__, __LINE__);
// return embedding;
// }
// if (inputImage.cols < 10 || inputImage.rows < 10) {
// _logger.LogError("ANSFaceRecognizer::RunArcFace",
// "Input image is too small", __FILE__, __LINE__);
// return embedding;
// }
// try {
// cv::Mat frame;
// if (inputImage.channels() == 1) {
// cv::cvtColor(inputImage, frame, cv::COLOR_GRAY2BGR);
// }
// else {
// frame = inputImage;
// }
// if (frame.cols != GPU_FACE_WIDTH || frame.rows != GPU_FACE_HEIGHT) {
// cv::resize(frame, frame, cv::Size(GPU_FACE_WIDTH, GPU_FACE_HEIGHT));
// }
// cv::cvtColor(frame, frame, cv::COLOR_BGR2RGB);
// cv::cuda::Stream stream;
// cv::cuda::GpuMat img;
// img.upload(frame, stream);
// stream.waitForCompletion();
// std::vector<cv::cuda::GpuMat> inputVec{ std::move(img) };
// std::vector<std::vector<cv::cuda::GpuMat>> inputs{ std::move(inputVec) };
// std::vector<std::vector<std::vector<float>>> featureVectors;
// bool succ = m_trtEngine->runInference(inputs, featureVectors);
// if (!succ) {
// _logger.LogError("ANSFaceRecognizer::RunArcFace",
// "Failed to run inference.", __FILE__, __LINE__);
// return embedding;
// }
// if (featureVectors.empty() || featureVectors[0].empty()) {
// _logger.LogError("ANSFaceRecognizer::RunArcFace",
// "Feature vectors are empty.", __FILE__, __LINE__);
// return embedding;
// }
// embedding = std::move(featureVectors[0][0]);
// cv::normalize(embedding, embedding); // l2 normalize
// return embedding;
// }
// catch (const std::exception& e) {
// _logger.LogFatal("ANSFaceRecognizer::RunArcFace", e.what(), __FILE__, __LINE__);
// return embedding;
// }
//}
// std::vector<std::vector<float>> ANSFaceRecognizer::RunArcFaceBatch(const std::vector<cv::Mat>& faceROIs)
// {
// std::vector<std::vector<float>> embeddings;
// try {
// if (!_isInitialized) {
// _logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
// "Model is not initialized", __FILE__, __LINE__);
// return embeddings;
// }
// if (!m_trtEngine) {
// _logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
// "TensorRT engine not initialized", __FILE__, __LINE__);
// return embeddings;
// }
// if (faceROIs.empty()) {
// return embeddings;
// }
// if (faceROIs.size() > static_cast<size_t>(m_options.maxBatchSize)) {
// _logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
// "Batch size " + std::to_string(faceROIs.size()) +
// " exceeds maxBatchSize " + std::to_string(m_options.maxBatchSize),
// __FILE__, __LINE__);
// return embeddings;
// }
//
// const auto& inputDims = m_trtEngine->getInputDims();
// if (inputDims.empty() || inputDims[0].nbDims < 3) {
// _logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
// "Invalid engine input dims", __FILE__, __LINE__);
// return embeddings;
// }
//
// // Prepare GPU batch
// cv::cuda::Stream stream;
// std::vector<cv::cuda::GpuMat> batchGpu;
// batchGpu.reserve(faceROIs.size());
//
// for (size_t i = 0; i < faceROIs.size(); ++i) {
// const cv::Mat& roi = faceROIs[i];
// if (roi.empty()) {
// continue;
// }
// cv::Mat frame;
// if (roi.channels() == 1) {
// cv::cvtColor(roi, frame, cv::COLOR_GRAY2BGR);
// }
// else {
// frame = roi;
// }
//
// if (frame.cols != GPU_FACE_WIDTH || frame.rows != GPU_FACE_HEIGHT) {
// cv::resize(frame, frame, cv::Size(GPU_FACE_WIDTH, GPU_FACE_HEIGHT));
// }
//
// cv::cvtColor(frame, frame, cv::COLOR_BGR2RGB);
//
// cv::cuda::GpuMat d_img;
// d_img.upload(frame, stream);
// batchGpu.emplace_back(std::move(d_img));
// }
//
// if (batchGpu.empty()) {
// return embeddings;
// }
//
// // Prepare inputs for inference
// std::vector<std::vector<cv::cuda::GpuMat>> inputs;
// inputs.emplace_back(std::move(batchGpu));
//
// // Run inference
// std::vector<std::vector<std::vector<float>>> featureVectors;
// bool succ = m_trtEngine->runInference(inputs, featureVectors);
// stream.waitForCompletion();
//
// if (!succ) {
// _logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
// "runInference failed", __FILE__, __LINE__);
// return embeddings;
// }
//
// if (featureVectors.empty()) {
// _logger.LogError("ANSFaceRecognizer::RunArcFaceBatch",
// "Empty featureVectors returned from inference", __FILE__, __LINE__);
// return embeddings;
// }
//
//
// size_t batchSize = featureVectors.size();
// embeddings.reserve(batchSize);
//
// for (size_t i = 0; i < batchSize; ++i) {
// if (featureVectors[i].empty()) {
// continue;
// }
// // Get the first (and typically only) output for this face
// // featureVectors[i][0] is the embedding vector for face i
// const auto& embedding = featureVectors[i][0];
//
// if (embedding.empty()) {
// continue;
// }
//
// embeddings.push_back(embedding);
// }
//
// // Verify we got the expected number of embeddings
// //if (embeddings.size() != inputs[0].size()) {
// // _logger.LogWarn("ANSFaceRecognizer::RunArcFaceBatch",
// // "Expected " + std::to_string(inputs[0].size()) +
// // " embeddings but got " + std::to_string(embeddings.size()),
// // __FILE__, __LINE__);
// //}
// return embeddings;
// }
// catch (const std::exception& e) {
// _logger.LogFatal("ANSFaceRecognizer::RunArcFaceBatch",
// e.what(), __FILE__, __LINE__);
// return embeddings;
// }
//}
// std::vector<std::vector<float>> ANSFaceRecognizer::Forward(const cv::Mat& input, std::vector<ANSCENTER::Object> outputBbox)
// {
// std::lock_guard<std::recursive_mutex> lock(_mutex);
// std::vector<std::vector<float>> detectedEmbeddings;
//
// if (input.empty()) {
// _logger.LogError("ANSFaceRecognizer::Forward",
// "Input image is empty", __FILE__, __LINE__);
// return detectedEmbeddings;
// }
//
// try {
// if (outputBbox.empty()) {
// return detectedEmbeddings;
// }
//
// if (engineType == EngineType::NVIDIA_GPU) {
// std::vector<cv::Mat> faceROIs;
// faceROIs.reserve(outputBbox.size());
//
// for (const auto& obj : outputBbox) {
// const cv::Mat& faceROI = obj.mask; // aligned mask
// if (faceROI.empty()) continue;
// faceROIs.push_back(faceROI);
// }
//
// if (!faceROIs.empty()) {
// detectedEmbeddings = RunArcFaceBatch(faceROIs);
// }
// }
// else {
// detectedEmbeddings.clear();
//#ifdef USE_ONNX_ENGINE
// for (const auto& obj : outputBbox) {
// cv::Mat frame = obj.mask.clone();
// if (frame.empty()) continue;
//
// if (frame.cols != CPU_FACE_WIDTH || frame.rows != CPU_FACE_HEIGHT) {
// cv::resize(frame, frame, cv::Size(CPU_FACE_WIDTH, CPU_FACE_HEIGHT));
// }
//
// types::FaceContent face_content;
// faceRecognizer->detect(frame, face_content);
// if (!face_content.embedding.empty()) {
// detectedEmbeddings.push_back(face_content.embedding);
// }
// }
//#else
// std::vector<cv::Mat> face_rois;
// face_rois.reserve(outputBbox.size());
//
// for (const auto& obj : outputBbox) {
// cv::Mat frame = obj.mask;
// if (frame.empty()) continue;
//
// if (frame.cols != CPU_FACE_WIDTH || frame.rows != CPU_FACE_HEIGHT) {
// cv::resize(frame, frame, cv::Size(CPU_FACE_WIDTH, CPU_FACE_HEIGHT));
// }
// face_rois.push_back(frame);
// }
//
// if (!face_rois.empty()) {
// std::vector<cv::Mat> embeddings;
// faceRecognizer->Compute(face_rois, &embeddings);
//
// detectedEmbeddings.reserve(embeddings.size());
// for (const auto& embMat : embeddings) {
// if (embMat.empty()) continue;
// std::vector<float> emb(embMat.begin<float>(), embMat.end<float>());
// detectedEmbeddings.push_back(std::move(emb));
// }
// }
//#endif
// }
// return detectedEmbeddings;
// }
// catch (const std::exception& e) {
// _logger.LogFatal("ANSFaceRecognizer::Forward", e.what(), __FILE__, __LINE__);
// return detectedEmbeddings;
// }
// }
//std::vector<float> ANSFaceRecognizer::NormalizeVector(const std::vector<float>& vec) {
// std::lock_guard<std::recursive_mutex> lock(_mutex);
// float norm = 0.0f;
// for (float v : vec) norm += v * v;
// norm = std::sqrt(norm);
// std::vector<float> normalizedVec(vec.size());
// if (norm > 0) {
// for (size_t i = 0; i < vec.size(); ++i) {
// normalizedVec[i] = vec[i] / norm;
// }
// }
// else {
// normalizedVec = vec;
// }
// return normalizedVec;
//}
/*bool ANSFaceRecognizer::LoadEngine(const std::string xmlModelPath,
bool engineOptimisation) {
try {
if (!FileExist(xmlModelPath)) {
_logger.LogError("ANSFaceRecognizer::LoadEngine",
"Cannot find the raw XML/ONNX model file.", __FILE__, __LINE__);
return false;
}
if (engineType == EngineType::NVIDIA_GPU) {
if (!m_trtEngine) {
m_options.optBatchSize = 8;
m_options.maxBatchSize = 32;
m_options.calibrationBatchSize = 8;
m_options.deviceIndex = _modelConfig.gpuDeviceIndex;
m_options.engineFileDir = _modelFolder;
m_options.precision = ANSCENTER::Precision::FP16;
m_trtEngine = std::make_unique<Engine<float>>(m_options);
}
if (FileExist(xmlModelPath)) {
bool succ = m_trtEngine->buildLoadNetwork(xmlModelPath, SUB_VALS, DIV_VALS, NORMALIZE);
if (!succ) {
_logger.LogError("ANSFaceRecognizer::LoadEngine. Unable to build/load TensorRT engine.",
xmlModelPath, __FILE__, __LINE__);
return false;
}
}
else {
_logger.LogError("ANSFaceRecognizer::LoadEngine. Model file does not exist",
xmlModelPath, __FILE__, __LINE__);
return false;
}
return true;
}
return true;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSFaceRecognizer::LoadEngine", e.what(), __FILE__, __LINE__);
return false;
}
}*/
//bool ANSFaceRecognizer::Destroy() {
// try {
// classNames.clear();
// if (faiss_index) {
// faiss_index->reset();
// faiss_index.reset();
// }
// faceRecognizer.reset();
// m_trtEngine.reset();
// return true;
// }
// catch (const std::exception& e) {
// _logger.LogFatal("ANSFaceRecognizer::Destroy", e.what(), __FILE__, __LINE__);
// return false;
// }
//}
// std::vector<std::vector<float>> ANSFaceRecognizer::Forward(const cv::Mat& input, std::vector<ANSCENTER::Object> outputBbox)
// {
// std::vector<std::vector<float>> detectedEmbeddings;
//
// // Early validation before locking
// if (input.empty()) {
// _logger.LogError("ANSFaceRecognizer::Forward",
// "Input image is empty", __FILE__, __LINE__);
// return detectedEmbeddings;
// }
//
// if (outputBbox.empty()) {
// return detectedEmbeddings;
// }
//
// std::lock_guard<std::recursive_mutex> lock(_mutex);
//
// try {
// // Pre-reserve output space
// detectedEmbeddings.reserve(outputBbox.size());
//
// if (engineType == EngineType::NVIDIA_GPU) {
// // GPU path
// std::vector<cv::Mat> faceROIs;
// faceROIs.reserve(outputBbox.size());
//
// for (const auto& obj : outputBbox) {
// if (!obj.mask.empty()) {
// faceROIs.push_back(obj.mask);
// }
// }
//
// if (!faceROIs.empty()) {
// detectedEmbeddings = RunArcFaceBatch(faceROIs);
// }
// }
// else {
// // CPU path
//#ifdef USE_ONNX_ENGINE
// for (const auto& obj : outputBbox) {
// if (obj.mask.empty()) continue;
//
// cv::Mat frame;
// if (obj.mask.cols != CPU_FACE_WIDTH || obj.mask.rows != CPU_FACE_HEIGHT) {
// cv::resize(obj.mask, frame, cv::Size(CPU_FACE_WIDTH, CPU_FACE_HEIGHT));
// }
// else {
// frame = obj.mask.clone();
// }
//
// types::FaceContent face_content;
// faceRecognizer->detect(frame, face_content);
//
// if (!face_content.embedding.empty()) {
// detectedEmbeddings.push_back(std::move(face_content.embedding));
// }
// }
//#else
// std::vector<cv::Mat> face_rois;
// face_rois.reserve(outputBbox.size());
//
// for (const auto& obj : outputBbox) {
// if (obj.mask.empty()) continue;
//
// cv::Mat frame;
// if (obj.mask.cols != CPU_FACE_WIDTH || obj.mask.rows != CPU_FACE_HEIGHT) {
// cv::resize(obj.mask, frame, cv::Size(CPU_FACE_WIDTH, CPU_FACE_HEIGHT));
// }
// else {
// frame = obj.mask;
// }
//
// face_rois.push_back(frame);
// }
//
// if (!face_rois.empty()) {
// std::vector<cv::Mat> embeddings;
// embeddings.reserve(face_rois.size());
//
// faceRecognizer->Compute(face_rois, &embeddings);
//
// detectedEmbeddings.reserve(embeddings.size());
// for (auto& embMat : embeddings) {
// if (embMat.empty()) continue;
//
// // Optimized conversion
// if (embMat.isContinuous()) {
// float* dataPtr = embMat.ptr<float>(0);
// detectedEmbeddings.emplace_back(dataPtr, dataPtr + embMat.total());
// }
// else {
// std::vector<float> emb(embMat.begin<float>(), embMat.end<float>());
// detectedEmbeddings.push_back(std::move(emb));
// }
// }
// }
//#endif
// }
//
// return detectedEmbeddings;
// }
// catch (const std::exception& e) {
// _logger.LogFatal("ANSFaceRecognizer::Forward", e.what(), __FILE__, __LINE__);
// return detectedEmbeddings;
// }
// }