#ifndef ANSFACERECOGNISER_H #define ANSFACERECOGNISER_H #pragma once #include "ANSFRCommon.h" #include "hnswlib/hnswlib.h" #include "cnn.hpp" #include "face_reid.hpp" #include "openvino/openvino.hpp" #include #include #include #include #include #include "engine.h" #include "engine/EnginePoolManager.h" #include "ONNXEngine.h" #define USE_ONNX_ENGINE //#define CPU_MODE //#define USE_CPU_BATCH_MODE namespace ANSCENTER { class ANSFaceRecognizer : public ANSFRBase { public: virtual bool Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) override; virtual bool LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) override; bool OptimizeModel(bool fp16, std::string& optimizedModelFolder); // Single face feature std::vector Feature(const cv::Mat& image,const ANSCENTER::Object& bBox); // Full pipeline: embeddings -> FAISS search -> results std::vector Match(const cv::Mat& input, const std::vector& bBox, const std::map& userDict); cv::Mat GetCropFace(const cv::Mat& input, const ANSCENTER::Object& bBox); void Init(); void AddEmbedding(const std::string& className, float embedding[]); void AddEmbedding(const std::string& className, const std::vector& embedding); // Double-buffer support: atomically swap FAISS index + mapping void SwapIndex(std::shared_ptr newIndex, std::unordered_map&& newFaceIdToUserId); int GetEmbeddingSize() const { return FACE_EMBEDDING_SIZE; } std::shared_ptr GetGpuResources() const { return m_gpuResources; } bool UpdateParamater(double knownPersonThreshold) { _modelConfig.unknownPersonThreshold = knownPersonThreshold; m_knownPersonThresh = _modelConfig.unknownPersonThreshold; return true; } ~ANSFaceRecognizer(); bool Destroy(); // L2-normalize a vector in-place (public — used by ANSFR::Reload) static void L2NormalizeInPlace(std::vector& vec); private: bool LoadEngine(const std::string& xmlModelPath, bool engineOptimisation = true); // Batched forward: one embedding per Object.mask (caller must hold _mutex) std::vector> ForwardUnlocked(const cv::Mat& input,const std::vector& outputBbox); // FAISS search (caller must hold _mutex) std::tuple, std::vector> SearchForFacesUnlocked(const std::vector>& detectedEmbeddings); std::string GetOpenVINODevice(); // Single-face GPU inference std::vector RunArcFace(const cv::Mat& input); // Batched GPU inference std::vector> RunArcFaceBatch( const std::vector& faceROIs, const std::vector& gpuFaceROIs = {}); protected: const int GPU_FACE_WIDTH = 112; const int GPU_FACE_HEIGHT = 112; const int FACE_EMBEDDING_SIZE = 512; std::unordered_map _faceIdToUserId; faiss::idx_t _nextFaceId = 0; // Sequential ID counter for backward-compat AddEmbedding ModelConfig _modelConfig; std::string _modelFilePath; std::string _landmarkModelFilePath; ANSCENTER::Options m_options; const std::array SUB_VALS{ 0.5f, 0.5f, 0.5f }; const std::array DIV_VALS{ 0.5f, 0.5f, 0.5f }; const bool NORMALIZE = true; std::recursive_mutex _mutex; float m_knownPersonThresh = 0.35f; EngineType engineType; #ifdef USE_ONNX_ENGINE std::unique_ptr faceRecognizer = nullptr; const int CPU_FACE_WIDTH = 112; const int CPU_FACE_HEIGHT = 112; #else std::unique_ptr faceRecognizer = nullptr; // OpenVINO const int CPU_FACE_WIDTH = 160; const int CPU_FACE_HEIGHT = 160; #endif // Pooled GPU buffers to avoid per-frame allocation (Fix #8) cv::cuda::Stream m_gpuStream; cv::cuda::GpuMat m_gpuImg; cv::cuda::GpuMat m_gpuResized; cv::cuda::GpuMat m_gpuRgb; std::shared_ptr> m_trtEngine = nullptr; // NVIDIA TensorRT EnginePoolManager::PoolKey m_poolKey; bool m_usingSharedPool = false; int m_maxSlotsPerGpu{ -1 }; // -1 = elastic mode (on-demand slots, auto-cleanup) void SetMaxSlotsPerGpu(int n) override { m_maxSlotsPerGpu = n; } std::shared_ptr faiss_index; std::shared_ptr m_gpuResources; }; } #endif