#include "ARCFaceRT.h" #include "NvOnnxParser.h" namespace ANSCENTER { bool ArcFace::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) { bool result = ANSFRBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap); if (!result) return false; try { _modelConfig = modelConfig; _modelConfig.modelType = ModelType::FACERECOGNIZE; _modelConfig.detectionType = DetectionType::FACERECOGNIZER; m_knownPersonThresh = _modelConfig.unknownPersonThreshold; if (m_knownPersonThresh == 0.0f) m_knownPersonThresh = 0.35f; std::string onnxfile50 = CreateFilePath(_modelFolder, "ansfacerecognizer50.onnx"); if (std::filesystem::exists(onnxfile50)) { _modelFilePath = onnxfile50; _logger.LogDebug("ArcFace::Initialize. Loading arcface50 weight", _modelFilePath, __FILE__, __LINE__); } else { std::string onnxfile = CreateFilePath(_modelFolder, "ansfacerecognizer.onnx"); if (std::filesystem::exists(onnxfile)) { _modelFilePath = onnxfile; _logger.LogDebug("ArcFace::Initialize. Loading arcface weight", _modelFilePath, __FILE__, __LINE__); } else { _logger.LogError("ArcFace::Initialize. Model arcface.onnx file does not exist", _modelFilePath, __FILE__, __LINE__); return false; } } // Configure engine with batch support m_options.precision = ANSCENTER::Precision::FP32; m_options.optBatchSize = 8; // expected typical batch m_options.maxBatchSize = 32; // maximum number of faces per frame you want m_options.calibrationBatchSize = 8; m_options.deviceIndex = 0; m_trtEngine.UpdateOptions(m_options); if (FileExist(_modelFilePath)) { bool succ = m_trtEngine.buildLoadNetwork(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE); if (!succ) { _logger.LogError("ArcFace::Initialize. Unable to build or load TensorRT engine.", _modelFilePath, __FILE__, __LINE__); return false; } } else { _logger.LogError("ArcFace::Initialize. Model file does not exist", _modelFilePath, __FILE__, __LINE__); return false; } Init(); _isInitialized = true; return true; } catch (const std::exception& e) { _logger.LogFatal("ArcFace::Initialize", e.what(), __FILE__, __LINE__); return false; } } bool ArcFace::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) { try { bool result = ANSFRBase::LoadModel(modelZipFilePath, modelZipPassword); if (!result) return false; std::string onnxfile50 = CreateFilePath(_modelFolder, "ansfacerecognizer50.onnx"); if (std::filesystem::exists(onnxfile50)) { _modelFilePath = onnxfile50; _logger.LogDebug("ArcFace::LoadModel. Loading arcface50 weight", _modelFilePath, __FILE__, __LINE__); } else { std::string onnxfile = CreateFilePath(_modelFolder, "ansfacerecognizer.onnx"); if (std::filesystem::exists(onnxfile)) { _modelFilePath = onnxfile; _logger.LogDebug("ArcFace::LoadModel. Loading arcface weight", _modelFilePath, __FILE__, __LINE__); } else { _logger.LogError("ArcFace::LoadModel. Model arcface.onnx file does not exist", _modelFilePath, __FILE__, __LINE__); return false; } } return true; } catch (const std::exception& e) { _logger.LogFatal("ArcFace50::LoadModel", e.what(), __FILE__, __LINE__); return false; } } bool ArcFace::OptimizeModel(bool fp16, std::string& optimizedModelFolder) { if (!FileExist(_modelFilePath)) { optimizedModelFolder = ""; return false; } optimizedModelFolder = GetParentFolder(_modelFilePath); m_options.optBatchSize = 8; m_options.maxBatchSize = 32; m_options.engineFileDir = optimizedModelFolder; m_options.precision = Precision::FP32; Engine engine(m_options); auto succ = engine.buildWithRetry(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE); if (!succ) { const std::string errMsg = "Error: Unable to build the TensorRT engine. Try increasing TensorRT log severity to kVERBOSE."; _logger.LogError("ArcFace::OptimizeModel", errMsg, __FILE__, __LINE__); return false; } return true; } std::vector ArcFace::Feature(const cv::Mat& image, const ANSCENTER::Object& bBox) { std::vector embedding; // Early validation before locking if (image.empty()) { return embedding; } if (image.cols < 10 || image.rows < 10) { return embedding; } std::lock_guard lock(_mutex); try { return RunArcFace(bBox.mask); } catch (const std::exception& e) { _logger.LogFatal("ArcFace::Feature", e.what(), __FILE__, __LINE__); return std::vector(); } } std::vector ArcFace::Match( const cv::Mat& input, const std::vector& bBox, const std::map& userDict) { std::vector resultObjects; // Early validation before locking if (input.empty()) { return resultObjects; } if (input.cols < 10 || input.rows < 10) { return resultObjects; } std::lock_guard lock(_mutex); if (!_isInitialized) { _logger.LogError("ArcFace::Match", "Model is not initialized", __FILE__, __LINE__); return resultObjects; } try { // Get embeddings std::vector> detectedEmbeddings = Forward(input, bBox); // Search for matches std::vector names; std::vector sims; std::tie(names, sims) = SearchForFaces(detectedEmbeddings); if (names.empty()) { _logger.LogError("ArcFace::Match", "No face is match", __FILE__, __LINE__); return resultObjects; } // Pre-reserve result space const size_t resultCount = std::min(names.size(), bBox.size()); resultObjects.reserve(resultCount); // Build result objects for (size_t i = 0; i < resultCount; ++i) { FaceResultObject resultObject; // Determine if face is known or unknown const bool isUnknown = (sims[i] > m_knownPersonThresh); if (isUnknown) { resultObject.isUnknown = true; resultObject.userId = "0"; resultObject.userName = "Unknown"; resultObject.confidence = 1.0f; } else { resultObject.isUnknown = false; resultObject.userId = names[i]; // Safe map lookup with fallback auto it = userDict.find(names[i]); resultObject.userName = (it != userDict.end()) ? it->second : names[i]; resultObject.confidence = 1.0f - sims[i]; } resultObject.similarity = sims[i]; // Copy bounding box and additional data resultObject.box = bBox[i].box; resultObject.mask = bBox[i].mask; resultObject.cameraId = bBox[i].cameraId; resultObject.trackId = bBox[i].trackId; resultObject.polygon = bBox[i].polygon; resultObject.kps = bBox[i].kps; resultObject.extraInformation = bBox[i].extraInfo; resultObjects.push_back(std::move(resultObject)); } return resultObjects; } catch (const std::exception& e) { _logger.LogFatal("ArcFace::Match", e.what(), __FILE__, __LINE__); return resultObjects; } } cv::Mat ArcFace::GetCropFace(const cv::Mat& input, const ANSCENTER::Object& bBox) { try { std::vector outputBbox; outputBbox.reserve(1); outputBbox.push_back(bBox); std::vector crFaces; ANSFRHelper::GetCroppedFaces(input, outputBbox, 112, 112, crFaces); if (crFaces.empty()) { return cv::Mat(); } return crFaces[0].faceMat; } catch (const std::exception& e) { _logger.LogFatal("ArcFace::GetCropFace", e.what(), __FILE__, __LINE__); return cv::Mat(); } } bool ArcFace::LoadEngine(const std::string onnxModelPath, bool engineOptimisation) { try { if (!FileExist(onnxModelPath)) { _logger.LogError("ArcFace::LoadEngine", "Cannot find the raw ONNX model file.", __FILE__, __LINE__); return false; } m_options.precision = ANSCENTER::Precision::FP32; m_options.optBatchSize = 8; m_options.maxBatchSize = 32; m_options.calibrationBatchSize = 8; m_options.deviceIndex = 0; m_trtEngine.UpdateOptions(m_options); if (FileExist(onnxModelPath)) { bool succ = m_trtEngine.buildLoadNetwork(onnxModelPath, SUB_VALS, DIV_VALS, NORMALIZE); if (!succ) { _logger.LogError("ArcFace::LoadEngine. Unable to build or load TensorRT engine.", onnxModelPath, __FILE__, __LINE__); return false; } } else { _logger.LogError("ArcFace::LoadEngine. Model file does not exist", onnxModelPath, __FILE__, __LINE__); return false; } return true; } catch (const std::exception& e) { _logger.LogFatal("ArcFace::LoadEngine", e.what(), __FILE__, __LINE__); return false; } } std::vector ArcFace::RunArcFace(const cv::Mat& inputImage) { std::vector embedding; // Early validation before locking if (inputImage.empty()) { _logger.LogError("ArcFace::RunArcFace", "Input image is empty", __FILE__, __LINE__); return embedding; } std::lock_guard lock(_mutex); try { if (!_isInitialized) { _logger.LogError("ArcFace::RunArcFace", "Model is not initialized", __FILE__, __LINE__); return embedding; } // GPU preprocessing pipeline cv::cuda::Stream stream; cv::cuda::GpuMat d_img; // Upload to GPU d_img.upload(inputImage, stream); // Handle grayscale conversion on GPU if (inputImage.channels() == 1) { cv::cuda::GpuMat d_bgr; cv::cuda::cvtColor(d_img, d_bgr, cv::COLOR_GRAY2BGR, 0, stream); d_img = d_bgr; } // Resize on GPU if needed if (inputImage.cols != FACE_WIDTH || inputImage.rows != FACE_HEIGHT) { cv::cuda::GpuMat d_resized; cv::cuda::resize(d_img, d_resized, cv::Size(FACE_WIDTH, FACE_HEIGHT), 0, 0, cv::INTER_LINEAR, stream); d_img = d_resized; } // BGR to RGB conversion on GPU cv::cuda::GpuMat d_rgb; cv::cuda::cvtColor(d_img, d_rgb, cv::COLOR_BGR2RGB, 0, stream); // Prepare inference inputs std::vector inputVec; inputVec.emplace_back(std::move(d_rgb)); std::vector> inputs; inputs.emplace_back(std::move(inputVec)); // Run inference std::vector>> featureVectors; bool succ = m_trtEngine.runInference(inputs, featureVectors); stream.waitForCompletion(); if (!succ) { _logger.LogError("ArcFace::RunArcFace", "Failed to run inference.", __FILE__, __LINE__); return embedding; } if (!featureVectors.empty() && !featureVectors[0].empty()) { embedding = std::move(featureVectors[0][0]); } return embedding; } catch (const std::exception& e) { _logger.LogFatal("ArcFace::RunArcFace", e.what(), __FILE__, __LINE__); return embedding; } } std::vector> ArcFace::RunArcFaceBatch(const std::vector& faceROIs) { std::vector> embeddings; try { if (!_isInitialized) { _logger.LogError("ArcFace::RunArcFaceBatch", "Model is not initialized", __FILE__, __LINE__); return embeddings; } if (faceROIs.empty()) { return embeddings; } if (faceROIs.size() > static_cast(m_options.maxBatchSize)) { _logger.LogError("ArcFace::RunArcFaceBatch", "Batch size exceeds maxBatchSize", __FILE__, __LINE__); return embeddings; } const auto& inputDims = m_trtEngine.getInputDims(); if (inputDims.empty() || inputDims[0].nbDims < 3) { _logger.LogError("ArcFace::RunArcFaceBatch", "Invalid engine input dims", __FILE__, __LINE__); return embeddings; } // Pre-reserve embeddings space embeddings.reserve(faceROIs.size()); // GPU preprocessing pipeline cv::cuda::Stream stream; std::vector batchGpu; batchGpu.reserve(faceROIs.size()); const cv::Size targetSize(FACE_WIDTH, FACE_HEIGHT); for (size_t i = 0; i < faceROIs.size(); ++i) { const cv::Mat& roi = faceROIs[i]; if (roi.empty()) { _logger.LogWarn("ArcFace::RunArcFaceBatch", "Empty ROI at index " + std::to_string(i) + ", skipping", __FILE__, __LINE__); continue; } // Upload to GPU cv::cuda::GpuMat d_img; d_img.upload(roi, stream); // Handle grayscale conversion on GPU if (roi.channels() == 1) { cv::cuda::GpuMat d_bgr; cv::cuda::cvtColor(d_img, d_bgr, cv::COLOR_GRAY2BGR, 0, stream); d_img = d_bgr; } // Resize on GPU if needed if (roi.cols != FACE_WIDTH || roi.rows != FACE_HEIGHT) { cv::cuda::GpuMat d_resized; cv::cuda::resize(d_img, d_resized, targetSize, 0, 0, cv::INTER_LINEAR, stream); d_img = d_resized; } // BGR to RGB conversion on GPU cv::cuda::GpuMat d_rgb; cv::cuda::cvtColor(d_img, d_rgb, cv::COLOR_BGR2RGB, 0, stream); batchGpu.emplace_back(std::move(d_rgb)); } if (batchGpu.empty()) { return embeddings; } // Prepare inference inputs std::vector> inputs; inputs.reserve(1); inputs.emplace_back(std::move(batchGpu)); // Run inference std::vector>> featureVectors; bool succ = m_trtEngine.runInference(inputs, featureVectors); stream.waitForCompletion(); if (!succ) { _logger.LogError("ArcFace::RunArcFaceBatch", "runInference failed", __FILE__, __LINE__); return embeddings; } if (featureVectors.empty() || featureVectors[0].empty()) { _logger.LogError("ArcFace::RunArcFaceBatch", "Empty featureVectors", __FILE__, __LINE__); return embeddings; } embeddings = std::move(featureVectors[0]); return embeddings; } catch (const std::exception& e) { _logger.LogFatal("ArcFace::RunArcFaceBatch", e.what(), __FILE__, __LINE__); return embeddings; } } std::vector> ArcFace::Forward(const cv::Mat& input,const std::vector& outputBbox) { std::vector> detectedEmbeddings; // Early validation before locking if (input.empty()) { _logger.LogError("ArcFace::Forward", "Input image is empty", __FILE__, __LINE__); return detectedEmbeddings; } if (outputBbox.empty()) { return detectedEmbeddings; } std::lock_guard lock(_mutex); try { // Pre-reserve output space detectedEmbeddings.reserve(outputBbox.size()); // Collect valid face ROIs std::vector faceROIs; faceROIs.reserve(outputBbox.size()); for (const auto& obj : outputBbox) { if (!obj.mask.empty()) { faceROIs.push_back(obj.mask); } } if (faceROIs.empty()) { return detectedEmbeddings; } // Run batch inference detectedEmbeddings = RunArcFaceBatch(faceROIs); return detectedEmbeddings; } catch (const std::exception& e) { _logger.LogFatal("ArcFace::Forward", e.what(), __FILE__, __LINE__); return detectedEmbeddings; } } std::tuple, std::vector> ArcFace::SearchForFaces(const std::vector>& detectedEmbeddings) { std::vector detectedUsers; std::vector simValues; // Early exit before locking if (detectedEmbeddings.empty()) { return std::make_tuple(detectedUsers, simValues); } std::lock_guard lock(_mutex); try { // Pre-reserve output space detectedUsers.reserve(detectedEmbeddings.size()); simValues.reserve(detectedEmbeddings.size()); if (!classNames.empty() && faiss_index && faiss_index->ntotal > 0) { // Determine k based on database size const int k = std::min(3, static_cast(faiss_index->ntotal)); // Pre-allocate search buffers (reuse across iterations) std::vector indices(k); std::vector distances(k); std::vector matchEmbedding(faiss_index->d); for (const auto& embedding : detectedEmbeddings) { if (embedding.size() != FACE_EMBEDDING_SIZE) { detectedUsers.push_back("0"); simValues.push_back(1.0f); continue; } // Search FAISS index faiss_index->search(1, embedding.data(), k, distances.data(), indices.data()); // Find best match (minimum distance for L2) auto min_it = std::min_element(distances.begin(), distances.end()); int best_index = static_cast(std::distance(distances.begin(), min_it)); // Validate index faiss::idx_t id = indices[best_index]; if (id < 0 || id >= static_cast(classNames.size())) { detectedUsers.push_back("0"); simValues.push_back(1.0f); continue; } // Reconstruct embedding and compute similarity faiss_index->reconstruct(id, matchEmbedding.data()); float cosine = CosineSimilarity(embedding, matchEmbedding, false); float similarity = 1.0f - cosine; detectedUsers.push_back(classNames[id]); simValues.push_back(std::abs(similarity)); } } else { detectedUsers.assign(detectedEmbeddings.size(), "0"); simValues.assign(detectedEmbeddings.size(), 1.0f); } return std::make_tuple(std::move(detectedUsers), std::move(simValues)); } catch (const std::exception& e) { _logger.LogFatal("ArcFace::SearchForFaces", e.what(), __FILE__, __LINE__); // Return appropriate sized vectors on error detectedUsers.assign(detectedEmbeddings.size(), "0"); simValues.assign(detectedEmbeddings.size(), 1.0f); return std::make_tuple(detectedUsers, simValues); } } void ArcFace::Init() { std::lock_guard lock(_mutex); try { classNames.clear(); if (faiss_index) { faiss_index->reset(); } else { faiss_index = std::make_unique(FACE_EMBEDDING_SIZE); } } catch (const std::exception& e) { _isInitialized = false; _logger.LogFatal("ArcFace::Init", e.what(), __FILE__, __LINE__); } } ArcFace::~ArcFace() { try { Destroy(); } catch (const std::exception& e) { // Log but don't throw - exceptions in destructors are dangerous _logger.LogError("ArcFace::~ArcFace", e.what(), __FILE__, __LINE__); } catch (...) { // Catch all exceptions to prevent std::terminate _logger.LogError("ArcFace::~ArcFace", "Unknown exception during destruction", __FILE__, __LINE__); } } bool ArcFace::Destroy() { std::lock_guard lock(_mutex); try { classNames.clear(); if (faiss_index) { faiss_index->reset(); faiss_index.reset(); } _isInitialized = false; m_trtEngine.clearGpuBuffers(); return true; } catch (const std::exception& e) { _logger.LogFatal("ArcFace::Destroy", e.what(), __FILE__, __LINE__); return false; } } void ArcFace::AddEmbedding(const std::string& className, float embedding[]) { // Validate input before locking if (!embedding) { _logger.LogError("ArcFace::AddEmbedding", "Null embedding pointer.", __FILE__, __LINE__); return; } std::lock_guard lock(_mutex); try { if (!faiss_index) { _logger.LogError("ArcFace::AddEmbedding", "FAISS index is not initialized.", __FILE__, __LINE__); return; } // Direct add without intermediate vector copy classNames.push_back(className); faiss_index->add(1, embedding); } catch (const std::exception& e) { _logger.LogFatal("ArcFace::AddEmbedding", e.what(), __FILE__, __LINE__); } } void ArcFace::AddEmbedding(const std::string& className, const std::vector& embedding) { // Early validation before locking if (embedding.size() != FACE_EMBEDDING_SIZE) { _logger.LogError("ArcFace::AddEmbedding", "Embedding size does not match expected output dimension of 512.", __FILE__, __LINE__); return; } std::lock_guard lock(_mutex); try { if (!faiss_index) { _logger.LogError("ArcFace::AddEmbedding", "FAISS index is not initialized.", __FILE__, __LINE__); return; } classNames.push_back(className); faiss_index->add(1, embedding.data()); } catch (const std::exception& e) { _logger.LogFatal("ArcFace::AddEmbedding", e.what(), __FILE__, __LINE__); } } }