Fix mutex lock issues
This commit is contained in:
@@ -1734,6 +1734,7 @@ namespace ANSCENTER {
|
||||
std::string& labelMap)
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
ModelLoadingGuard mlg(_modelLoading);
|
||||
try {
|
||||
_modelLoadValid = false;
|
||||
bool result = ANSODBase::Initialize(licenseKey, modelConfig,
|
||||
@@ -1810,6 +1811,7 @@ namespace ANSCENTER {
|
||||
const std::string& modelZipPassword)
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
ModelLoadingGuard mlg(_modelLoading);
|
||||
try {
|
||||
bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword);
|
||||
if (!result) return false;
|
||||
@@ -1873,6 +1875,7 @@ namespace ANSCENTER {
|
||||
std::string& labelMap)
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
ModelLoadingGuard mlg(_modelLoading);
|
||||
try {
|
||||
bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig,
|
||||
modelName, className,
|
||||
@@ -1948,8 +1951,10 @@ namespace ANSCENTER {
|
||||
std::vector<Object> ANSONNXYOLO::RunInference(const cv::Mat& inputImgBGR,
|
||||
const std::string& camera_id)
|
||||
{
|
||||
if (_modelLoading.load()) return {};
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
auto lock = TryLockWithTimeout("ANSONNXYOLO::RunInference");
|
||||
if (!lock.owns_lock()) return {};
|
||||
if (!_modelLoadValid) {
|
||||
_logger.LogError("ANSONNXYOLO::RunInference", "Model not loaded", __FILE__, __LINE__);
|
||||
return {};
|
||||
@@ -1976,29 +1981,55 @@ namespace ANSCENTER {
|
||||
const std::string& camera_id)
|
||||
{
|
||||
try {
|
||||
ANS_DBG("ONNXYOLO", "DetectObjects: cam=%s acquiring mutex...", camera_id.c_str());
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
ANS_DBG("ONNXYOLO", "DetectObjects: mutex acquired, cam=%s", camera_id.c_str());
|
||||
if (!m_ortEngine) {
|
||||
_logger.LogError("ANSONNXYOLO::DetectObjects", "ORT engine is null", __FILE__, __LINE__);
|
||||
ANS_DBG("ONNXYOLO", "DetectObjects: ORT engine is null!");
|
||||
// Fail-fast if a model load/init is in progress on another thread
|
||||
if (_modelLoading.load()) {
|
||||
ANS_DBG("ONNXYOLO", "DetectObjects: skipped — model loading in progress, cam=%s", camera_id.c_str());
|
||||
return {};
|
||||
}
|
||||
|
||||
// ── Snapshot config under a brief lock ──────────────────
|
||||
// Only hold _mutex long enough to validate state and copy
|
||||
// the parameters needed for inference. The actual ORT
|
||||
// detect() call runs outside the lock so that concurrent
|
||||
// Initialize/LoadModel calls are not blocked for the full
|
||||
// duration of inference.
|
||||
float probThresh, nmsThresh;
|
||||
int numKps;
|
||||
std::vector<std::string> classes;
|
||||
bool trackerEnabled, stabilizationEnabled;
|
||||
{
|
||||
ANS_DBG("ONNXYOLO", "DetectObjects: cam=%s acquiring mutex...", camera_id.c_str());
|
||||
auto lk = TryLockWithTimeout("ANSONNXYOLO::DetectObjects");
|
||||
if (!lk.owns_lock()) return {}; // timed out
|
||||
ANS_DBG("ONNXYOLO", "DetectObjects: mutex acquired, cam=%s", camera_id.c_str());
|
||||
|
||||
if (!m_ortEngine) {
|
||||
_logger.LogError("ANSONNXYOLO::DetectObjects", "ORT engine is null", __FILE__, __LINE__);
|
||||
ANS_DBG("ONNXYOLO", "DetectObjects: ORT engine is null!");
|
||||
return {};
|
||||
}
|
||||
|
||||
// Snapshot parameters while locked
|
||||
probThresh = PROBABILITY_THRESHOLD;
|
||||
nmsThresh = NMS_THRESHOLD;
|
||||
numKps = NUM_KPS;
|
||||
classes = _classes;
|
||||
trackerEnabled = _trackerEnabled;
|
||||
stabilizationEnabled = _stabilizationEnabled;
|
||||
}
|
||||
// ── _mutex released — heavy work below runs lock-free ───
|
||||
|
||||
// --- NV12 fast path: try to get full-res BGR from GPU NV12 frame ---
|
||||
cv::Mat inferenceImage = inputImage;
|
||||
float bgrScaleX = 1.0f, bgrScaleY = 1.0f;
|
||||
{
|
||||
auto* gpuData = tl_currentGpuFrame();
|
||||
if (gpuData && gpuData->width > 0 && gpuData->height > 0) {
|
||||
// Full-res NV12 available — convert to BGR on CPU for ORT
|
||||
// (ORT preprocessing is CPU-based, so we need a cv::Mat)
|
||||
if (gpuData->cpuYPlane && gpuData->cpuUvPlane &&
|
||||
gpuData->cpuYLinesize >= gpuData->width &&
|
||||
gpuData->cpuUvLinesize >= gpuData->width) {
|
||||
const int fw = gpuData->width;
|
||||
const int fh = gpuData->height;
|
||||
// NV12 requires even dimensions
|
||||
if ((fw % 2) == 0 && (fh % 2) == 0) {
|
||||
try {
|
||||
cv::Mat yPlane(fh, fw, CV_8UC1,
|
||||
@@ -2017,10 +2048,12 @@ namespace ANSCENTER {
|
||||
}
|
||||
}
|
||||
}
|
||||
auto results = m_ortEngine->detect(inferenceImage, _classes,
|
||||
PROBABILITY_THRESHOLD,
|
||||
NMS_THRESHOLD,
|
||||
NUM_KPS);
|
||||
|
||||
// Run ORT inference — no mutex held, this is the expensive call
|
||||
auto results = m_ortEngine->detect(inferenceImage, classes,
|
||||
probThresh,
|
||||
nmsThresh,
|
||||
numKps);
|
||||
|
||||
// --- Rescale coordinates from full-res back to display-res ---
|
||||
if (bgrScaleX != 1.0f || bgrScaleY != 1.0f) {
|
||||
@@ -2044,21 +2077,16 @@ namespace ANSCENTER {
|
||||
for (auto& obj : results)
|
||||
obj.cameraId = camera_id;
|
||||
|
||||
// Skip tracking for classification models
|
||||
if (_trackerEnabled && !m_ortEngine->lastWasClassification) {
|
||||
// Tracking/stabilization (ApplyTracking has its own lock)
|
||||
if (trackerEnabled && !m_ortEngine->lastWasClassification) {
|
||||
results = ApplyTracking(results, camera_id);
|
||||
if (_stabilizationEnabled) results = StabilizeDetections(results, camera_id);
|
||||
if (stabilizationEnabled) results = StabilizeDetections(results, camera_id);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
const std::string msg = e.what();
|
||||
|
||||
// ── DML device-removal detection ──────────────────────────
|
||||
// HRESULT 887A0005 = DXGI_ERROR_DEVICE_REMOVED ("The GPU
|
||||
// device instance has been suspended"). Once the D3D12
|
||||
// device is gone the ORT session is permanently broken.
|
||||
// Log once, attempt CPU fallback, suppress further flood.
|
||||
if (msg.find("887A0005") != std::string::npos) {
|
||||
if (!_dmlDeviceLost) {
|
||||
_dmlDeviceLost = true;
|
||||
@@ -2067,6 +2095,7 @@ namespace ANSCENTER {
|
||||
__FILE__, __LINE__);
|
||||
ANS_DBG("ONNXYOLO", "DML device lost — recreating session on CPU");
|
||||
try {
|
||||
std::lock_guard<std::recursive_mutex> lk(_mutex);
|
||||
m_ortEngine.reset();
|
||||
if (InitOrtEngine(ANSCENTER::EngineType::CPU)) {
|
||||
_logger.LogInfo("ANSONNXYOLO::DetectObjects",
|
||||
@@ -2084,7 +2113,6 @@ namespace ANSCENTER {
|
||||
__FILE__, __LINE__);
|
||||
}
|
||||
}
|
||||
// Suppress flood — already logged above
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -2101,8 +2129,10 @@ namespace ANSCENTER {
|
||||
std::vector<std::vector<Object>> ANSONNXYOLO::RunInferencesBatch(
|
||||
const std::vector<cv::Mat>& inputs, const std::string& camera_id)
|
||||
{
|
||||
if (_modelLoading.load()) return {};
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
auto lock = TryLockWithTimeout("ANSONNXYOLO::RunInferencesBatch");
|
||||
if (!lock.owns_lock()) return {};
|
||||
if (!_modelLoadValid) {
|
||||
_logger.LogFatal("ANSONNXYOLO::RunInferencesBatch",
|
||||
"Cannot load ONNX model", __FILE__, __LINE__);
|
||||
@@ -2154,16 +2184,33 @@ namespace ANSCENTER {
|
||||
const std::vector<cv::Mat>& inputImages, const std::string& camera_id)
|
||||
{
|
||||
try {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (!m_ortEngine) {
|
||||
_logger.LogError("ANSONNXYOLO::DetectObjectsBatch",
|
||||
"ORT engine is null", __FILE__, __LINE__);
|
||||
return {};
|
||||
if (_modelLoading.load()) return {};
|
||||
|
||||
// Snapshot config under brief lock
|
||||
float probThresh, nmsThresh;
|
||||
int numKps;
|
||||
std::vector<std::string> classes;
|
||||
bool trackerEnabled, stabilizationEnabled;
|
||||
{
|
||||
auto lk = TryLockWithTimeout("ANSONNXYOLO::DetectObjectsBatch");
|
||||
if (!lk.owns_lock()) return {};
|
||||
|
||||
if (!m_ortEngine) {
|
||||
_logger.LogError("ANSONNXYOLO::DetectObjectsBatch",
|
||||
"ORT engine is null", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
probThresh = PROBABILITY_THRESHOLD;
|
||||
nmsThresh = NMS_THRESHOLD;
|
||||
numKps = NUM_KPS;
|
||||
classes = _classes;
|
||||
trackerEnabled = _trackerEnabled;
|
||||
stabilizationEnabled = _stabilizationEnabled;
|
||||
}
|
||||
|
||||
// Heavy work outside lock
|
||||
auto batchResults = m_ortEngine->detectBatch(
|
||||
inputImages, _classes,
|
||||
PROBABILITY_THRESHOLD, NMS_THRESHOLD, NUM_KPS);
|
||||
inputImages, classes, probThresh, nmsThresh, numKps);
|
||||
|
||||
const bool isClassification = m_ortEngine->lastBatchWasClassification;
|
||||
|
||||
@@ -2171,10 +2218,9 @@ namespace ANSCENTER {
|
||||
for (auto& obj : results)
|
||||
obj.cameraId = camera_id;
|
||||
|
||||
// Skip tracking for classification models
|
||||
if (_trackerEnabled && !isClassification) {
|
||||
if (trackerEnabled && !isClassification) {
|
||||
results = ApplyTracking(results, camera_id);
|
||||
if (_stabilizationEnabled) results = StabilizeDetections(results, camera_id);
|
||||
if (stabilizationEnabled) results = StabilizeDetections(results, camera_id);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2189,6 +2235,7 @@ namespace ANSCENTER {
|
||||
"DirectML GPU device lost (887A0005) — attempting CPU fallback",
|
||||
__FILE__, __LINE__);
|
||||
try {
|
||||
std::lock_guard<std::recursive_mutex> lk(_mutex);
|
||||
m_ortEngine.reset();
|
||||
if (!InitOrtEngine(ANSCENTER::EngineType::CPU))
|
||||
_logger.LogFatal("ANSONNXYOLO::DetectObjectsBatch",
|
||||
|
||||
Reference in New Issue
Block a user