Files
ANSCORE/ANSFR/ARCFaceRT.cpp

692 lines
25 KiB
C++

#include "ARCFaceRT.h"
#include "NvOnnxParser.h"
namespace ANSCENTER {
bool ArcFace::Initialize(std::string licenseKey,
ModelConfig modelConfig,
const std::string& modelZipFilePath,
const std::string& modelZipPassword,
std::string& labelMap) {
bool result = ANSFRBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
if (!result) return false;
try {
_modelConfig = modelConfig;
_modelConfig.modelType = ModelType::FACERECOGNIZE;
_modelConfig.detectionType = DetectionType::FACERECOGNIZER;
m_knownPersonThresh = _modelConfig.unknownPersonThreshold;
if (m_knownPersonThresh == 0.0f) m_knownPersonThresh = 0.35f;
std::string onnxfile50 = CreateFilePath(_modelFolder, "ansfacerecognizer50.onnx");
if (std::filesystem::exists(onnxfile50)) {
_modelFilePath = onnxfile50;
_logger.LogDebug("ArcFace::Initialize. Loading arcface50 weight", _modelFilePath, __FILE__, __LINE__);
}
else {
std::string onnxfile = CreateFilePath(_modelFolder, "ansfacerecognizer.onnx");
if (std::filesystem::exists(onnxfile)) {
_modelFilePath = onnxfile;
_logger.LogDebug("ArcFace::Initialize. Loading arcface weight", _modelFilePath, __FILE__, __LINE__);
}
else {
_logger.LogError("ArcFace::Initialize. Model arcface.onnx file does not exist", _modelFilePath, __FILE__, __LINE__);
return false;
}
}
// Configure engine with batch support
m_options.precision = ANSCENTER::Precision::FP32;
m_options.optBatchSize = 8; // expected typical batch
m_options.maxBatchSize = 32; // maximum number of faces per frame you want
m_options.calibrationBatchSize = 8;
m_options.deviceIndex = 0;
m_trtEngine.UpdateOptions(m_options);
if (FileExist(_modelFilePath)) {
bool succ = m_trtEngine.buildLoadNetwork(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE);
if (!succ) {
_logger.LogError("ArcFace::Initialize. Unable to build or load TensorRT engine.",
_modelFilePath, __FILE__, __LINE__);
return false;
}
}
else {
_logger.LogError("ArcFace::Initialize. Model file does not exist",
_modelFilePath, __FILE__, __LINE__);
return false;
}
Init();
_isInitialized = true;
return true;
}
catch (const std::exception& e) {
_logger.LogFatal("ArcFace::Initialize", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ArcFace::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) {
try {
bool result = ANSFRBase::LoadModel(modelZipFilePath, modelZipPassword);
if (!result) return false;
std::string onnxfile50 = CreateFilePath(_modelFolder, "ansfacerecognizer50.onnx");
if (std::filesystem::exists(onnxfile50)) {
_modelFilePath = onnxfile50;
_logger.LogDebug("ArcFace::LoadModel. Loading arcface50 weight", _modelFilePath, __FILE__, __LINE__);
}
else {
std::string onnxfile = CreateFilePath(_modelFolder, "ansfacerecognizer.onnx");
if (std::filesystem::exists(onnxfile)) {
_modelFilePath = onnxfile;
_logger.LogDebug("ArcFace::LoadModel. Loading arcface weight", _modelFilePath, __FILE__, __LINE__);
}
else {
_logger.LogError("ArcFace::LoadModel. Model arcface.onnx file does not exist",
_modelFilePath, __FILE__, __LINE__);
return false;
}
}
return true;
}
catch (const std::exception& e) {
_logger.LogFatal("ArcFace50::LoadModel", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ArcFace::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
if (!FileExist(_modelFilePath)) {
optimizedModelFolder = "";
return false;
}
optimizedModelFolder = GetParentFolder(_modelFilePath);
m_options.optBatchSize = 8;
m_options.maxBatchSize = 32;
m_options.engineFileDir = optimizedModelFolder;
m_options.precision = Precision::FP32;
Engine<float> engine(m_options);
auto succ = engine.buildWithRetry(_modelFilePath, SUB_VALS, DIV_VALS, NORMALIZE);
if (!succ) {
const std::string errMsg =
"Error: Unable to build the TensorRT engine. Try increasing TensorRT log severity to kVERBOSE.";
_logger.LogError("ArcFace::OptimizeModel", errMsg, __FILE__, __LINE__);
return false;
}
return true;
}
std::vector<float> ArcFace::Feature(const cv::Mat& image, const ANSCENTER::Object& bBox) {
std::vector<float> embedding;
// Early validation before locking
if (image.empty()) {
return embedding;
}
if (image.cols < 10 || image.rows < 10) {
return embedding;
}
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
return RunArcFace(bBox.mask);
}
catch (const std::exception& e) {
_logger.LogFatal("ArcFace::Feature", e.what(), __FILE__, __LINE__);
return std::vector<float>();
}
}
std::vector<FaceResultObject> ArcFace::Match(
const cv::Mat& input,
const std::vector<ANSCENTER::Object>& bBox,
const std::map<std::string, std::string>& userDict) {
std::vector<FaceResultObject> resultObjects;
// Early validation before locking
if (input.empty()) {
return resultObjects;
}
if (input.cols < 10 || input.rows < 10) {
return resultObjects;
}
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (!_isInitialized) {
_logger.LogError("ArcFace::Match", "Model is not initialized", __FILE__, __LINE__);
return resultObjects;
}
try {
// Get embeddings
std::vector<std::vector<float>> detectedEmbeddings = Forward(input, bBox);
// Search for matches
std::vector<std::string> names;
std::vector<float> sims;
std::tie(names, sims) = SearchForFaces(detectedEmbeddings);
if (names.empty()) {
_logger.LogError("ArcFace::Match", "No face is match", __FILE__, __LINE__);
return resultObjects;
}
// Pre-reserve result space
const size_t resultCount = std::min(names.size(), bBox.size());
resultObjects.reserve(resultCount);
// Build result objects
for (size_t i = 0; i < resultCount; ++i) {
FaceResultObject resultObject;
// Determine if face is known or unknown
const bool isUnknown = (sims[i] > m_knownPersonThresh);
if (isUnknown) {
resultObject.isUnknown = true;
resultObject.userId = "0";
resultObject.userName = "Unknown";
resultObject.confidence = 1.0f;
}
else {
resultObject.isUnknown = false;
resultObject.userId = names[i];
// Safe map lookup with fallback
auto it = userDict.find(names[i]);
resultObject.userName = (it != userDict.end()) ? it->second : names[i];
resultObject.confidence = 1.0f - sims[i];
}
resultObject.similarity = sims[i];
// Copy bounding box and additional data
resultObject.box = bBox[i].box;
resultObject.mask = bBox[i].mask;
resultObject.cameraId = bBox[i].cameraId;
resultObject.trackId = bBox[i].trackId;
resultObject.polygon = bBox[i].polygon;
resultObject.kps = bBox[i].kps;
resultObject.extraInformation = bBox[i].extraInfo;
resultObjects.push_back(std::move(resultObject));
}
return resultObjects;
}
catch (const std::exception& e) {
_logger.LogFatal("ArcFace::Match", e.what(), __FILE__, __LINE__);
return resultObjects;
}
}
cv::Mat ArcFace::GetCropFace(const cv::Mat& input, const ANSCENTER::Object& bBox) {
try {
std::vector<ANSCENTER::Object> outputBbox;
outputBbox.reserve(1);
outputBbox.push_back(bBox);
std::vector<CroppedFace> crFaces;
ANSFRHelper::GetCroppedFaces(input, outputBbox, 112, 112, crFaces);
if (crFaces.empty()) {
return cv::Mat();
}
return crFaces[0].faceMat;
}
catch (const std::exception& e) {
_logger.LogFatal("ArcFace::GetCropFace", e.what(), __FILE__, __LINE__);
return cv::Mat();
}
}
bool ArcFace::LoadEngine(const std::string onnxModelPath, bool engineOptimisation) {
try {
if (!FileExist(onnxModelPath)) {
_logger.LogError("ArcFace::LoadEngine", "Cannot find the raw ONNX model file.", __FILE__, __LINE__);
return false;
}
m_options.precision = ANSCENTER::Precision::FP32;
m_options.optBatchSize = 8;
m_options.maxBatchSize = 32;
m_options.calibrationBatchSize = 8;
m_options.deviceIndex = 0;
m_trtEngine.UpdateOptions(m_options);
if (FileExist(onnxModelPath)) {
bool succ = m_trtEngine.buildLoadNetwork(onnxModelPath, SUB_VALS, DIV_VALS, NORMALIZE);
if (!succ) {
_logger.LogError("ArcFace::LoadEngine. Unable to build or load TensorRT engine.",
onnxModelPath, __FILE__, __LINE__);
return false;
}
}
else {
_logger.LogError("ArcFace::LoadEngine. Model file does not exist",
onnxModelPath, __FILE__, __LINE__);
return false;
}
return true;
}
catch (const std::exception& e) {
_logger.LogFatal("ArcFace::LoadEngine", e.what(), __FILE__, __LINE__);
return false;
}
}
std::vector<float> ArcFace::RunArcFace(const cv::Mat& inputImage) {
std::vector<float> embedding;
// Early validation before locking
if (inputImage.empty()) {
_logger.LogError("ArcFace::RunArcFace", "Input image is empty", __FILE__, __LINE__);
return embedding;
}
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
if (!_isInitialized) {
_logger.LogError("ArcFace::RunArcFace", "Model is not initialized", __FILE__, __LINE__);
return embedding;
}
// GPU preprocessing pipeline
cv::cuda::Stream stream;
cv::cuda::GpuMat d_img;
// Upload to GPU
d_img.upload(inputImage, stream);
// Handle grayscale conversion on GPU
if (inputImage.channels() == 1) {
cv::cuda::GpuMat d_bgr;
cv::cuda::cvtColor(d_img, d_bgr, cv::COLOR_GRAY2BGR, 0, stream);
d_img = d_bgr;
}
// Resize on GPU if needed
if (inputImage.cols != FACE_WIDTH || inputImage.rows != FACE_HEIGHT) {
cv::cuda::GpuMat d_resized;
cv::cuda::resize(d_img, d_resized, cv::Size(FACE_WIDTH, FACE_HEIGHT),
0, 0, cv::INTER_LINEAR, stream);
d_img = d_resized;
}
// BGR to RGB conversion on GPU
cv::cuda::GpuMat d_rgb;
cv::cuda::cvtColor(d_img, d_rgb, cv::COLOR_BGR2RGB, 0, stream);
// Prepare inference inputs
std::vector<cv::cuda::GpuMat> inputVec;
inputVec.emplace_back(std::move(d_rgb));
std::vector<std::vector<cv::cuda::GpuMat>> inputs;
inputs.emplace_back(std::move(inputVec));
// Run inference
std::vector<std::vector<std::vector<float>>> featureVectors;
bool succ = m_trtEngine.runInference(inputs, featureVectors);
stream.waitForCompletion();
if (!succ) {
_logger.LogError("ArcFace::RunArcFace", "Failed to run inference.", __FILE__, __LINE__);
return embedding;
}
if (!featureVectors.empty() && !featureVectors[0].empty()) {
embedding = std::move(featureVectors[0][0]);
}
return embedding;
}
catch (const std::exception& e) {
_logger.LogFatal("ArcFace::RunArcFace", e.what(), __FILE__, __LINE__);
return embedding;
}
}
std::vector<std::vector<float>> ArcFace::RunArcFaceBatch(const std::vector<cv::Mat>& faceROIs) {
std::vector<std::vector<float>> embeddings;
try {
if (!_isInitialized) {
_logger.LogError("ArcFace::RunArcFaceBatch", "Model is not initialized", __FILE__, __LINE__);
return embeddings;
}
if (faceROIs.empty()) {
return embeddings;
}
if (faceROIs.size() > static_cast<size_t>(m_options.maxBatchSize)) {
_logger.LogError("ArcFace::RunArcFaceBatch",
"Batch size exceeds maxBatchSize", __FILE__, __LINE__);
return embeddings;
}
const auto& inputDims = m_trtEngine.getInputDims();
if (inputDims.empty() || inputDims[0].nbDims < 3) {
_logger.LogError("ArcFace::RunArcFaceBatch",
"Invalid engine input dims", __FILE__, __LINE__);
return embeddings;
}
// Pre-reserve embeddings space
embeddings.reserve(faceROIs.size());
// GPU preprocessing pipeline
cv::cuda::Stream stream;
std::vector<cv::cuda::GpuMat> batchGpu;
batchGpu.reserve(faceROIs.size());
const cv::Size targetSize(FACE_WIDTH, FACE_HEIGHT);
for (size_t i = 0; i < faceROIs.size(); ++i) {
const cv::Mat& roi = faceROIs[i];
if (roi.empty()) {
_logger.LogWarn("ArcFace::RunArcFaceBatch",
"Empty ROI at index " + std::to_string(i) + ", skipping",
__FILE__, __LINE__);
continue;
}
// Upload to GPU
cv::cuda::GpuMat d_img;
d_img.upload(roi, stream);
// Handle grayscale conversion on GPU
if (roi.channels() == 1) {
cv::cuda::GpuMat d_bgr;
cv::cuda::cvtColor(d_img, d_bgr, cv::COLOR_GRAY2BGR, 0, stream);
d_img = d_bgr;
}
// Resize on GPU if needed
if (roi.cols != FACE_WIDTH || roi.rows != FACE_HEIGHT) {
cv::cuda::GpuMat d_resized;
cv::cuda::resize(d_img, d_resized, targetSize, 0, 0, cv::INTER_LINEAR, stream);
d_img = d_resized;
}
// BGR to RGB conversion on GPU
cv::cuda::GpuMat d_rgb;
cv::cuda::cvtColor(d_img, d_rgb, cv::COLOR_BGR2RGB, 0, stream);
batchGpu.emplace_back(std::move(d_rgb));
}
if (batchGpu.empty()) {
return embeddings;
}
// Prepare inference inputs
std::vector<std::vector<cv::cuda::GpuMat>> inputs;
inputs.reserve(1);
inputs.emplace_back(std::move(batchGpu));
// Run inference
std::vector<std::vector<std::vector<float>>> featureVectors;
bool succ = m_trtEngine.runInference(inputs, featureVectors);
stream.waitForCompletion();
if (!succ) {
_logger.LogError("ArcFace::RunArcFaceBatch", "runInference failed", __FILE__, __LINE__);
return embeddings;
}
if (featureVectors.empty() || featureVectors[0].empty()) {
_logger.LogError("ArcFace::RunArcFaceBatch", "Empty featureVectors", __FILE__, __LINE__);
return embeddings;
}
embeddings = std::move(featureVectors[0]);
return embeddings;
}
catch (const std::exception& e) {
_logger.LogFatal("ArcFace::RunArcFaceBatch", e.what(), __FILE__, __LINE__);
return embeddings;
}
}
std::vector<std::vector<float>> ArcFace::Forward(const cv::Mat& input,const std::vector<ANSCENTER::Object>& outputBbox)
{
std::vector<std::vector<float>> detectedEmbeddings;
// Early validation before locking
if (input.empty()) {
_logger.LogError("ArcFace::Forward",
"Input image is empty", __FILE__, __LINE__);
return detectedEmbeddings;
}
if (outputBbox.empty()) {
return detectedEmbeddings;
}
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
// Pre-reserve output space
detectedEmbeddings.reserve(outputBbox.size());
// Collect valid face ROIs
std::vector<cv::Mat> faceROIs;
faceROIs.reserve(outputBbox.size());
for (const auto& obj : outputBbox) {
if (!obj.mask.empty()) {
faceROIs.push_back(obj.mask);
}
}
if (faceROIs.empty()) {
return detectedEmbeddings;
}
// Run batch inference
detectedEmbeddings = RunArcFaceBatch(faceROIs);
return detectedEmbeddings;
}
catch (const std::exception& e) {
_logger.LogFatal("ArcFace::Forward", e.what(), __FILE__, __LINE__);
return detectedEmbeddings;
}
}
std::tuple<std::vector<std::string>, std::vector<float>>
ArcFace::SearchForFaces(const std::vector<std::vector<float>>& detectedEmbeddings) {
std::vector<std::string> detectedUsers;
std::vector<float> simValues;
// Early exit before locking
if (detectedEmbeddings.empty()) {
return std::make_tuple(detectedUsers, simValues);
}
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
// Pre-reserve output space
detectedUsers.reserve(detectedEmbeddings.size());
simValues.reserve(detectedEmbeddings.size());
if (!classNames.empty() && faiss_index && faiss_index->ntotal > 0) {
// Determine k based on database size
const int k = std::min(3, static_cast<int>(faiss_index->ntotal));
// Pre-allocate search buffers (reuse across iterations)
std::vector<faiss::idx_t> indices(k);
std::vector<float> distances(k);
std::vector<float> matchEmbedding(faiss_index->d);
for (const auto& embedding : detectedEmbeddings) {
if (embedding.size() != FACE_EMBEDDING_SIZE) {
detectedUsers.push_back("0");
simValues.push_back(1.0f);
continue;
}
// Search FAISS index
faiss_index->search(1, embedding.data(), k,
distances.data(), indices.data());
// Find best match (minimum distance for L2)
auto min_it = std::min_element(distances.begin(), distances.end());
int best_index = static_cast<int>(std::distance(distances.begin(), min_it));
// Validate index
faiss::idx_t id = indices[best_index];
if (id < 0 || id >= static_cast<faiss::idx_t>(classNames.size())) {
detectedUsers.push_back("0");
simValues.push_back(1.0f);
continue;
}
// Reconstruct embedding and compute similarity
faiss_index->reconstruct(id, matchEmbedding.data());
float cosine = CosineSimilarity(embedding, matchEmbedding, false);
float similarity = 1.0f - cosine;
detectedUsers.push_back(classNames[id]);
simValues.push_back(std::abs(similarity));
}
}
else {
detectedUsers.assign(detectedEmbeddings.size(), "0");
simValues.assign(detectedEmbeddings.size(), 1.0f);
}
return std::make_tuple(std::move(detectedUsers), std::move(simValues));
}
catch (const std::exception& e) {
_logger.LogFatal("ArcFace::SearchForFaces", e.what(), __FILE__, __LINE__);
// Return appropriate sized vectors on error
detectedUsers.assign(detectedEmbeddings.size(), "0");
simValues.assign(detectedEmbeddings.size(), 1.0f);
return std::make_tuple(detectedUsers, simValues);
}
}
void ArcFace::Init() {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
classNames.clear();
if (faiss_index) {
faiss_index->reset();
}
else {
faiss_index = std::make_unique<faiss::IndexFlatL2>(FACE_EMBEDDING_SIZE);
}
}
catch (const std::exception& e) {
_isInitialized = false;
_logger.LogFatal("ArcFace::Init", e.what(), __FILE__, __LINE__);
}
}
ArcFace::~ArcFace() {
try {
Destroy();
}
catch (const std::exception& e) {
// Log but don't throw - exceptions in destructors are dangerous
_logger.LogError("ArcFace::~ArcFace", e.what(), __FILE__, __LINE__);
}
catch (...) {
// Catch all exceptions to prevent std::terminate
_logger.LogError("ArcFace::~ArcFace", "Unknown exception during destruction", __FILE__, __LINE__);
}
}
bool ArcFace::Destroy() {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
classNames.clear();
if (faiss_index) {
faiss_index->reset();
faiss_index.reset();
}
_isInitialized = false;
m_trtEngine.clearGpuBuffers();
return true;
}
catch (const std::exception& e) {
_logger.LogFatal("ArcFace::Destroy", e.what(), __FILE__, __LINE__);
return false;
}
}
void ArcFace::AddEmbedding(const std::string& className, float embedding[]) {
// Validate input before locking
if (!embedding) {
_logger.LogError("ArcFace::AddEmbedding",
"Null embedding pointer.", __FILE__, __LINE__);
return;
}
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
if (!faiss_index) {
_logger.LogError("ArcFace::AddEmbedding",
"FAISS index is not initialized.", __FILE__, __LINE__);
return;
}
// Direct add without intermediate vector copy
classNames.push_back(className);
faiss_index->add(1, embedding);
}
catch (const std::exception& e) {
_logger.LogFatal("ArcFace::AddEmbedding", e.what(), __FILE__, __LINE__);
}
}
void ArcFace::AddEmbedding(const std::string& className, const std::vector<float>& embedding) {
// Early validation before locking
if (embedding.size() != FACE_EMBEDDING_SIZE) {
_logger.LogError("ArcFace::AddEmbedding",
"Embedding size does not match expected output dimension of 512.", __FILE__, __LINE__);
return;
}
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
if (!faiss_index) {
_logger.LogError("ArcFace::AddEmbedding",
"FAISS index is not initialized.", __FILE__, __LINE__);
return;
}
classNames.push_back(className);
faiss_index->add(1, embedding.data());
}
catch (const std::exception& e) {
_logger.LogFatal("ArcFace::AddEmbedding", e.what(), __FILE__, __LINE__);
}
}
}