Fix mutex lock issues (OCR and FR)

This commit is contained in:
2026-04-13 20:38:40 +10:00
parent 844d7396b2
commit 8e60126c4c
16 changed files with 227 additions and 18 deletions

View File

@@ -125,7 +125,16 @@
"Bash(python /tmp/apply_guards.py)",
"Bash(python /tmp/apply_all_guards.py)",
"Bash(python /tmp/cleanup_redundant_checks.py)",
"Bash(python /tmp/final_cleanup.py)"
"Bash(python /tmp/final_cleanup.py)",
"Bash(grep -n \"ANSFRBase\" /c/Projects/CLionProjects/ANSCORE/modules/ANSODEngine/*.h /c/Projects/CLionProjects/ANSCORE/modules/ANSODEngine/*.cpp)",
"Bash(grep -n \"class.*public ANSFRBase\\\\|class.*: public ANSFRBase\" /c/Projects/CLionProjects/ANSCORE/modules/ANSODEngine/*.h)",
"Bash(grep -n \"class.*public.*FaceDetector\\\\|class.*public ANSFDBase\\\\|struct.*public\" /c/Projects/CLionProjects/ANSCORE/modules/ANSODEngine/*.h)",
"Bash(grep -l \"ANSFRBase\" /c/Projects/CLionProjects/ANSCORE/modules/ANSODEngine/*.h)",
"Bash(grep -rn \"public ANSFRBase\" /c/Projects/CLionProjects/ANSCORE/modules/ANSODEngine/*.h)",
"Bash(python /tmp/apply_fd_guards.py)",
"Bash(python /tmp/apply_fd_precheck.py)",
"Bash(ls /c/Projects/CLionProjects/ANSCORE/modules/ANSFR/*.cpp /c/Projects/CLionProjects/ANSCORE/modules/ANSFR/*.h)",
"Bash(grep -rn \"lock_guard.*_mutex\\\\|lock_guard.*mutex\" /c/Projects/CLionProjects/ANSCORE/modules/ANSFR/*.cpp)"
]
}
}

View File

@@ -44,6 +44,11 @@ namespace ANSCENTER {
const std::string& modelZipPassword,
std::string& labelMap)
{
struct LoadGuard {
std::atomic<bool>& f;
explicit LoadGuard(std::atomic<bool>& fl) : f(fl) { f.store(true); }
~LoadGuard() { f.store(false); }
} mlg(_modelLoading);
ANS_DBG("FaceRecognizer", "Initialize: modelZip=%s", modelZipFilePath.c_str());
bool result = ANSFRBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap);
if (!result) {
@@ -276,7 +281,9 @@ namespace ANSCENTER {
return embeddingResult;
}
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (_modelLoading.load()) return embeddingResult;
auto lock = TryLockWithTimeout("ANSFaceRecognizer::Feature");
if (!lock.owns_lock()) return embeddingResult;
try {
if (engineType == EngineType::NVIDIA_GPU) {
@@ -338,7 +345,9 @@ namespace ANSCENTER {
return resultObjects;
}
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (_modelLoading.load()) return resultObjects;
auto lock = TryLockWithTimeout("ANSFaceRecognizer::Match");
if (!lock.owns_lock()) return resultObjects;
if (!_isInitialized) {
_logger.LogError("ANSFaceRecognizer::Match",

View File

@@ -11,6 +11,9 @@
#include <faiss/gpu/GpuIndexFlat.h>
#include <faiss/gpu/StandardGpuResources.h>
#include <unordered_map>
#include <atomic>
#include <chrono>
#include <thread>
#include "engine.h"
#include "engine/EnginePoolManager.h"
#include "ONNXEngine.h"
@@ -99,6 +102,28 @@ namespace ANSCENTER {
const bool NORMALIZE = true;
std::recursive_mutex _mutex;
std::atomic<bool> _modelLoading{ false };
std::unique_lock<std::recursive_mutex> TryLockWithTimeout(
const char* caller, unsigned int timeoutMs = 5000)
{
const auto deadline = std::chrono::steady_clock::now()
+ std::chrono::milliseconds(timeoutMs);
std::unique_lock<std::recursive_mutex> lk(_mutex, std::defer_lock);
while (!lk.try_lock()) {
if (std::chrono::steady_clock::now() >= deadline) {
_logger.LogWarn(caller,
"Mutex acquisition timed out after "
+ std::to_string(timeoutMs) + " ms"
+ (_modelLoading.load() ? " (model loading in progress)" : ""),
__FILE__, __LINE__);
return lk;
}
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
return lk;
}
float m_knownPersonThresh = 0.35f;
EngineType engineType;

View File

@@ -212,7 +212,9 @@ namespace ANSCENTER {
return RunInference(input, "OCRCPUCAM");
}
std::vector<ANSCENTER::OCRObject> ANSCPUOCR::RunInference(const cv::Mat& input, const std::string& cameraId) {
std::lock_guard<std::mutex> lock(_mutex);
if (_modelLoading.load()) return {};
auto lock = TryLockWithTimeout("ANSCPUOCR::RunInference");
if (!lock.owns_lock()) return {};
// Early validation
if (!_licenseValid) {
@@ -311,7 +313,9 @@ namespace ANSCENTER {
}
std::vector<ANSCENTER::OCRObject> ANSCPUOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox) {
std::lock_guard<std::mutex> lock(_mutex);
if (_modelLoading.load()) return {};
auto lock = TryLockWithTimeout("ANSCPUOCR::RunInference(Bbox)");
if (!lock.owns_lock()) return {};
// Early validation
if (!_licenseValid) {
@@ -434,7 +438,9 @@ namespace ANSCENTER {
const std::vector<cv::Rect>& Bbox,
const std::string& cameraId)
{
std::lock_guard<std::mutex> lock(_mutex);
if (_modelLoading.load()) return {};
auto lock = TryLockWithTimeout("ANSCPUOCR::RunInference(Bbox,CamId)");
if (!lock.owns_lock()) return {};
// Early validation
if (!_licenseValid) {

View File

@@ -6,6 +6,9 @@
#include "opencv2/imgproc.hpp"
#include <iostream>
#include <vector>
#include <atomic>
#include <chrono>
#include <thread>
#include "LabVIEWHeader/extcode.h"
#include "ANSLicense.h"
#include "ANSOCRBase.h"
@@ -25,8 +28,30 @@ namespace ANSCENTER {
private:
std::unique_ptr<PaddleOCR::PPOCR> ppocr = std::make_unique<PaddleOCR::PPOCR>();
std::mutex _mutex;
std::atomic<bool> _modelLoading{ false };
cv::Mat _frameBuffer; // Reusable buffer for color conversion
// Try to lock _mutex with a timeout for non-recursive mutex
std::unique_lock<std::mutex> TryLockWithTimeout(
const char* caller, unsigned int timeoutMs = 5000)
{
const auto deadline = std::chrono::steady_clock::now()
+ std::chrono::milliseconds(timeoutMs);
std::unique_lock<std::mutex> lk(_mutex, std::defer_lock);
while (!lk.try_lock()) {
if (std::chrono::steady_clock::now() >= deadline) {
_logger.LogWarn(caller,
"Mutex acquisition timed out after "
+ std::to_string(timeoutMs) + " ms"
+ (_modelLoading.load() ? " (model loading in progress)" : ""),
__FILE__, __LINE__);
return lk;
}
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
return lk;
}
};
}
#endif

View File

@@ -13,6 +13,7 @@ bool PaddleOCRV5Engine::Initialize(const std::string& detModelPath,
const std::string& recModelPath,
const std::string& dictPath) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
ModelLoadingGuard mlg(_modelLoading);
try {
// Initialize detector (also triggers EPLoader init in BasicOrtHandler)
@@ -59,13 +60,16 @@ bool PaddleOCRV5Engine::Initialize(const std::string& detModelPath,
}
std::vector<OCRPredictResult> PaddleOCRV5Engine::ocr(const cv::Mat& img) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (_modelLoading.load()) return {};
std::vector<OCRPredictResult> results;
if (!_initialized || img.empty()) {
return results;
{
auto lk = TryLockWithTimeout("PaddleOCRV5Engine::ocr");
if (!lk.owns_lock()) return results;
if (!_initialized || img.empty()) return results;
}
// _mutex released — heavy pipeline runs lock-free
// Step 1: Text Detection
auto boxes = detector_->Detect(img, _maxSideLen, _detDbThresh, _detBoxThresh, _detUnclipRatio, _useDilation);
@@ -127,8 +131,12 @@ std::vector<OCRPredictResult> PaddleOCRV5Engine::ocr(const cv::Mat& img) {
}
TextLine PaddleOCRV5Engine::recognizeOnly(const cv::Mat& croppedImage) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (!_initialized || !recognizer_ || croppedImage.empty()) return { "", 0.0f };
if (_modelLoading.load()) return { "", 0.0f };
{
auto lk = TryLockWithTimeout("PaddleOCRV5Engine::recognizeOnly");
if (!lk.owns_lock()) return { "", 0.0f };
if (!_initialized || !recognizer_ || croppedImage.empty()) return { "", 0.0f };
}
return recognizer_->Recognize(croppedImage);
}

View File

@@ -7,6 +7,9 @@
#include <memory>
#include <mutex>
#include <atomic>
#include <chrono>
#include <thread>
#include <string>
#include <vector>
@@ -48,6 +51,37 @@ private:
std::unique_ptr<ONNXOCRRecognizer> recognizer_;
std::recursive_mutex _mutex;
std::atomic<bool> _modelLoading{ false };
// RAII helper: sets _modelLoading=true on construction, false on destruction.
struct ModelLoadingGuard {
std::atomic<bool>& flag;
explicit ModelLoadingGuard(std::atomic<bool>& f) : flag(f) { flag.store(true); }
~ModelLoadingGuard() { flag.store(false); }
ModelLoadingGuard(const ModelLoadingGuard&) = delete;
ModelLoadingGuard& operator=(const ModelLoadingGuard&) = delete;
};
// Try to lock _mutex with a timeout. Returns a unique_lock that
// evaluates to true on success.
std::unique_lock<std::recursive_mutex> TryLockWithTimeout(
const char* caller, unsigned int timeoutMs = 5000)
{
const auto deadline = std::chrono::steady_clock::now()
+ std::chrono::milliseconds(timeoutMs);
std::unique_lock<std::recursive_mutex> lk(_mutex, std::defer_lock);
while (!lk.try_lock()) {
if (std::chrono::steady_clock::now() >= deadline) {
std::cerr << "[" << caller << "] Mutex acquisition timed out after "
<< timeoutMs << " ms"
<< (_modelLoading.load() ? " (model loading in progress)" : "")
<< std::endl;
return lk; // unlocked
}
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
return lk; // locked
}
// Detection parameters
int _maxSideLen = kDetMaxSideLen;

View File

@@ -12,6 +12,7 @@ bool PaddleOCRV5RTEngine::Initialize(const std::string& detModelPath,
int gpuId,
const std::string& engineCacheDir) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
ModelLoadingGuard mlg(_modelLoading);
gpuId_ = gpuId;
if (!engineCacheDir.empty()) {
@@ -57,10 +58,15 @@ bool PaddleOCRV5RTEngine::Initialize(const std::string& detModelPath,
}
std::vector<OCRPredictResult> PaddleOCRV5RTEngine::ocr(const cv::Mat& image) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
std::vector<OCRPredictResult> results;
if (_modelLoading.load()) return {};
if (!detector_ || !recognizer_ || image.empty()) return results;
std::vector<OCRPredictResult> results;
{
auto lk = TryLockWithTimeout("PaddleOCRV5RTEngine::ocr");
if (!lk.owns_lock()) return results;
if (!detector_ || !recognizer_ || image.empty()) return results;
}
// _mutex released — heavy pipeline runs lock-free
try {
// 1. Detection: find text boxes
@@ -148,8 +154,12 @@ std::vector<OCRPredictResult> PaddleOCRV5RTEngine::ocr(const cv::Mat& image) {
}
TextLine PaddleOCRV5RTEngine::recognizeOnly(const cv::Mat& croppedImage) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (!recognizer_ || croppedImage.empty()) return { "", 0.0f };
if (_modelLoading.load()) return { "", 0.0f };
{
auto lk = TryLockWithTimeout("PaddleOCRV5RTEngine::recognizeOnly");
if (!lk.owns_lock()) return { "", 0.0f };
if (!recognizer_ || croppedImage.empty()) return { "", 0.0f };
}
return recognizer_->Recognize(croppedImage);
}

View File

@@ -6,6 +6,9 @@
#include "RTOCRRecognizer.h"
#include <memory>
#include <mutex>
#include <atomic>
#include <chrono>
#include <thread>
#include <string>
#include <vector>
#include "ANSLicense.h"
@@ -64,6 +67,34 @@ private:
std::string engineCacheDir_;
std::recursive_mutex _mutex;
std::atomic<bool> _modelLoading{ false };
struct ModelLoadingGuard {
std::atomic<bool>& flag;
explicit ModelLoadingGuard(std::atomic<bool>& f) : flag(f) { flag.store(true); }
~ModelLoadingGuard() { flag.store(false); }
ModelLoadingGuard(const ModelLoadingGuard&) = delete;
ModelLoadingGuard& operator=(const ModelLoadingGuard&) = delete;
};
std::unique_lock<std::recursive_mutex> TryLockWithTimeout(
const char* caller, unsigned int timeoutMs = 5000)
{
const auto deadline = std::chrono::steady_clock::now()
+ std::chrono::milliseconds(timeoutMs);
std::unique_lock<std::recursive_mutex> lk(_mutex, std::defer_lock);
while (!lk.try_lock()) {
if (std::chrono::steady_clock::now() >= deadline) {
std::cerr << "[" << caller << "] Mutex acquisition timed out after "
<< timeoutMs << " ms"
<< (_modelLoading.load() ? " (model loading in progress)" : "")
<< std::endl;
return lk;
}
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
return lk;
}
};
} // namespace rtocr

View File

@@ -1046,6 +1046,46 @@ namespace ANSCENTER
MoveDetectsHandler _handler;
const size_t QUEUE_SIZE = 10;
std::recursive_mutex _mutex;
std::atomic<bool> _modelLoading{ false };
// Pre-inference gate for ANSFDBase subclasses
bool PreInferenceCheck(const char* caller) {
if (_modelLoading.load()) return false;
auto lk = TryLockWithTimeout(caller);
if (!lk.owns_lock()) return false;
if (!_licenseValid || !_isInitialized)
return false;
return true;
}
struct ModelLoadingGuard {
std::atomic<bool>& flag;
explicit ModelLoadingGuard(std::atomic<bool>& f) : flag(f) { flag.store(true); }
~ModelLoadingGuard() { flag.store(false); }
ModelLoadingGuard(const ModelLoadingGuard&) = delete;
ModelLoadingGuard& operator=(const ModelLoadingGuard&) = delete;
};
std::unique_lock<std::recursive_mutex> TryLockWithTimeout(
const char* caller, unsigned int timeoutMs = 5000)
{
const auto deadline = std::chrono::steady_clock::now()
+ std::chrono::milliseconds(timeoutMs);
std::unique_lock<std::recursive_mutex> lk(_mutex, std::defer_lock);
while (!lk.try_lock()) {
if (std::chrono::steady_clock::now() >= deadline) {
_logger.LogWarn(caller,
"Mutex acquisition timed out after "
+ std::to_string(timeoutMs) + " ms"
+ (_modelLoading.load() ? " (model loading in progress)" : ""),
__FILE__, __LINE__);
return lk;
}
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
return lk;
}
ANSCENTER::EngineType engineType;
bool _facelivenessEngineValid{ false };

View File

@@ -99,6 +99,7 @@ namespace ANSCENTER {
std::vector<Object> output;
output.clear();
if (!_licenseValid) {
if (_modelLoading.load()) return {};
this->_logger.LogError("ANSOVFBFD::RunInference", "Invalid license", __FILE__, __LINE__);
return output;
}
@@ -180,6 +181,7 @@ namespace ANSCENTER {
std::vector<Object> output;
output.clear();
if (!_licenseValid) {
if (_modelLoading.load()) return {};
this->_logger.LogError("ANSOVFBFD::RunInference", "Invalid license", __FILE__, __LINE__);
return output;
}

View File

@@ -102,6 +102,7 @@ namespace ANSCENTER {
std::vector<Object> ANSOVFD::RunInference(const cv::Mat& input, const std::string& camera_id, bool useDynamicImage, bool validateFace, bool facelivenessCheck) {
// ── DML device-lost recovery (outside mutex) ──────────────
if (_dmlDeviceLost && _face_detector) {
if (_modelLoading.load()) return {};
// The DML session is broken — recreate on CPU
try {
auto cpuDetector = std::make_unique<ANSCENTER::SCRFD>(
@@ -136,6 +137,7 @@ namespace ANSCENTER {
std::vector<Object> ANSOVFD::RunInference(const cv::Mat& input, bool useDynamicImage, bool validateFace, bool facelivenessCheck) {
// ── DML device-lost recovery (outside mutex) ──────────────
if (_dmlDeviceLost && _face_detector) {
if (_modelLoading.load()) return {};
try {
auto cpuDetector = std::make_unique<ANSCENTER::SCRFD>(
_scrfdModelPath, ANSCENTER::EngineType::CPU);

View File

@@ -73,6 +73,7 @@ namespace ANSCENTER {
std::vector<Object> output;
output.clear();
if (!_licenseValid) {
if (_modelLoading.load()) return {};
this->_logger.LogError("ANSCNNFD::RunInference", "Invalid license", __FILE__, __LINE__);
return output;
}

View File

@@ -114,6 +114,7 @@ namespace ANSCENTER {
std::vector<Object> output;
output.clear();
if (!_licenseValid) {
if (_modelLoading.load()) return {};
this->_logger.LogError("ANSRETINAFD::RunInference", "Invalid license", __FILE__, __LINE__);
return output;
}
@@ -184,6 +185,7 @@ namespace ANSCENTER {
std::vector<Object> output;
output.clear();
if (!_licenseValid) {
if (_modelLoading.load()) return {};
this->_logger.LogError("ANSRETINAFD::RunInference", "Invalid license", __FILE__, __LINE__);
return output;
}

View File

@@ -223,12 +223,14 @@ namespace ANSCENTER {
bool useDynamicImage,
bool validateFace)
{
if (_modelLoading.load()) return {};
// Phase 1: Validation + image preprocessing (brief lock)
cv::Mat im;
bool croppedFace;
float scoreThreshold;
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
auto lock = TryLockWithTimeout("ANSSCRFDFD::Inference");
if (!lock.owns_lock()) return {};
if (!_licenseValid) {
_logger.LogError("ANSSCRFDFD::Inference", "Invalid license", __FILE__, __LINE__);
@@ -411,7 +413,9 @@ namespace ANSCENTER {
std::vector<Object> ANSSCRFDFD::InferenceDynamic(const cv::Mat& input, const std::string& camera_id) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (_modelLoading.load()) return {};
auto lock = TryLockWithTimeout("ANSSCRFDFD::InferenceDynamic");
if (!lock.owns_lock()) return {};
std::vector<Object> output;
try {

View File

@@ -218,6 +218,7 @@ namespace ANSCENTER {
std::vector<Object> outputs;
outputs.clear();
if (!_licenseValid) {
if (_modelLoading.load()) return {};
this->_logger.LogError("ANSOVSCRFDFD::RunInference", "Invalid license", __FILE__, __LINE__);
return outputs;
}