From bab94bf9bc6ec65aa6e27f94ff213afcd9894240 Mon Sep 17 00:00:00 2001 From: Tuan Nghia Nguyen Date: Sat, 25 Apr 2026 20:29:58 +1000 Subject: [PATCH] Add log and change logger policy --- core/ANSLicensingSystem/ANSLicense.cpp | 16 +++++++-- modules/ANSODEngine/ANSONNXCL.cpp | 50 ++++++++++++++++++++++++-- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/core/ANSLicensingSystem/ANSLicense.cpp b/core/ANSLicensingSystem/ANSLicense.cpp index 1176828..4824638 100644 --- a/core/ANSLicensingSystem/ANSLicense.cpp +++ b/core/ANSLicensingSystem/ANSLicense.cpp @@ -163,7 +163,9 @@ namespace ANSCENTER std::call_once(initFlag, [this]() { try { - spdlog::init_thread_pool(8192, 1); + // Two worker threads so the slow Event Log sink (RPC to event log + // service) cannot starve the console sink. Queue size unchanged. + spdlog::init_thread_pool(8192, 2); ConfigureLogger(); } catch (const spdlog::spdlog_ex& ex) { @@ -188,9 +190,14 @@ namespace ANSCENTER stdout_sink->set_level(spdlog::level::trace); consoleSinks.push_back(stdout_sink); + // overrun_oldest: producer never blocks. If the queue fills (8192 + // pending msgs), the OLDEST queued msg is dropped to make room. + // Why: block + 1-thread pool + slow WindowsEventSink can stall + // inference threads on logging and, if shutdown races a blocked + // producer, surface as system_error("resource deadlock would occur"). auto consoleLogger = std::make_shared( "anslogger_console", consoleSinks.begin(), consoleSinks.end(), - spdlog::thread_pool(), spdlog::async_overflow_policy::block); + spdlog::thread_pool(), spdlog::async_overflow_policy::overrun_oldest); consoleLogger->set_level(spdlog::level::trace); consoleLogger->flush_on(spdlog::level::info); spdlog::register_logger(consoleLogger); @@ -201,9 +208,12 @@ namespace ANSCENTER win_event_sink->set_level(spdlog::level::info); std::vector eventSinks{ win_event_sink }; + // Same overrun_oldest policy as console — see note above. Especially + // important for this logger because WindowsEventSink::sink_it_ does + // a synchronous ReportEventW RPC which can be slow under load. auto eventLogger = std::make_shared( "anslogger_event", eventSinks.begin(), eventSinks.end(), - spdlog::thread_pool(), spdlog::async_overflow_policy::block); + spdlog::thread_pool(), spdlog::async_overflow_policy::overrun_oldest); eventLogger->set_level(spdlog::level::info); eventLogger->flush_on(spdlog::level::info); spdlog::register_logger(eventLogger); diff --git a/modules/ANSODEngine/ANSONNXCL.cpp b/modules/ANSODEngine/ANSONNXCL.cpp index 15b7981..2776493 100644 --- a/modules/ANSODEngine/ANSONNXCL.cpp +++ b/modules/ANSODEngine/ANSONNXCL.cpp @@ -1,6 +1,7 @@ #include"ANSONNXCL.h" #include "EPLoader.h" #include "OpenVINODeviceConfig.h" +#include // typeid(e).name() in ANS_DBG diagnostics namespace ANSCENTER { @@ -575,23 +576,32 @@ namespace ANSCENTER } } std::vector ANSONNXCL::postprocess(const std::vector& outputTensors, const std::string& camera_id) { + ANS_DBG("ANSONNXCL_pp", "ENTRY tensors=%zu cam=%s this=%p", + outputTensors.size(), camera_id.c_str(), (void*)this); std::lock_guard lock(_mutex); + ANS_DBG("ANSONNXCL_pp", "lock acquired this=%p", (void*)this); try { std::vector output; if (outputTensors.empty()) { + ANS_DBG("ANSONNXCL_pp", "EARLY-RETURN empty tensors"); this->_logger.LogError("ANSONNXCL::postprocess", "No output tensors", __FILE__, __LINE__); return {}; } + ANS_DBG("ANSONNXCL_pp", "GetTensorData"); const float* rawOutput = outputTensors[0].GetTensorData(); if (!rawOutput) { + ANS_DBG("ANSONNXCL_pp", "EARLY-RETURN rawOutput=null"); this->_logger.LogError("ANSONNXCL::postprocess", "rawOutput pointer is null", __FILE__, __LINE__); return {}; } + ANS_DBG("ANSONNXCL_pp", "GetTensorTypeAndShapeInfo + GetShape"); const std::vector outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); size_t numScores = vectorProduct(outputShape); + ANS_DBG("ANSONNXCL_pp", "shape.size=%zu numScores=%zu", + outputShape.size(), numScores); std::ostringstream oss_shape; oss_shape << "Output tensor shape: ["; @@ -602,6 +612,8 @@ namespace ANSCENTER DEBUG_PRINT(oss_shape.str()); // CRITICAL: Check for NaN/Inf in raw output + ANS_DBG("ANSONNXCL_pp", "NaN/Inf scan begin (limit=%zu)", + std::min(numScores, size_t(100))); bool hasNaN = false; for (size_t i = 0; i < std::min(numScores, size_t(100)); ++i) { if (std::isnan(rawOutput[i]) || std::isinf(rawOutput[i])) { @@ -614,6 +626,7 @@ namespace ANSCENTER } if (hasNaN) { + ANS_DBG("ANSONNXCL_pp", "EARLY-RETURN NaN/Inf"); this->_logger.LogError("ANSONNXCL::postprocess", "Model produced NaN/Inf values - input may be corrupted or model is broken", __FILE__, __LINE__); @@ -621,7 +634,10 @@ namespace ANSCENTER } int currentNumClasses = numClasses_ > 0 ? numClasses_ : static_cast(_classes.size()); + ANS_DBG("ANSONNXCL_pp", "currentNumClasses=%d (numClasses_=%d _classes.size=%zu)", + currentNumClasses, numClasses_, _classes.size()); if (currentNumClasses <= 0) { + ANS_DBG("ANSONNXCL_pp", "EARLY-RETURN no classes"); this->_logger.LogError("ANSONNXCL::postprocess", "No valid number of classes", __FILE__, __LINE__); return {}; } @@ -635,6 +651,7 @@ namespace ANSCENTER DEBUG_PRINT(oss_scores.str()); // Initialize scores with zeros + ANS_DBG("ANSONNXCL_pp", "alloc scores vec n=%d", currentNumClasses); std::vector scores(currentNumClasses, 0.0f); int validScores = 0; @@ -658,9 +675,12 @@ namespace ANSCENTER } } else { + ANS_DBG("ANSONNXCL_pp", "EARLY-RETURN unsupported shape size=%zu", + outputShape.size()); this->_logger.LogError("ANSONNXCL::postprocess", "Unsupported output shape", __FILE__, __LINE__); return {}; } + ANS_DBG("ANSONNXCL_pp", "scores filled validScores=%d", validScores); // Find maximum score int bestClassId = -1; @@ -674,9 +694,11 @@ namespace ANSCENTER } if (bestClassId == -1) { + ANS_DBG("ANSONNXCL_pp", "EARLY-RETURN bestClassId=-1"); this->_logger.LogError("ANSONNXCL::postprocess", "Could not determine best class ID", __FILE__, __LINE__); return {}; } + ANS_DBG("ANSONNXCL_pp", "argmax bestClassId=%d maxScore=%g", bestClassId, maxScore); // Check if output is already a probability distribution (sums to ~1.0). // Some ONNX models include a Softmax layer in the graph; applying @@ -689,6 +711,8 @@ namespace ANSCENTER } const bool alreadyNormalized = (allNonNeg && rawSum > 0.9f && rawSum < 1.1f); + ANS_DBG("ANSONNXCL_pp", "alloc probabilities vec n=%d normalized=%d", + currentNumClasses, (int)alreadyNormalized); std::vector probabilities(currentNumClasses, 0.0f); float confidence = 0.0f; if (alreadyNormalized) { @@ -704,9 +728,11 @@ namespace ANSCENTER } confidence = sumExp > 0 ? probabilities[bestClassId] / sumExp : 0.0f; } + ANS_DBG("ANSONNXCL_pp", "softmax done confidence=%g", confidence); // CRITICAL: Validate final confidence if (std::isnan(confidence) || std::isinf(confidence)) { + ANS_DBG("ANSONNXCL_pp", "EARLY-RETURN confidence NaN/Inf"); this->_logger.LogError("ANSONNXCL::postprocess", "Final confidence is NaN/Inf: " + std::to_string(confidence), __FILE__, __LINE__); @@ -720,6 +746,8 @@ namespace ANSCENTER else if (bestClassId >= 0) { className = "ClassID_" + std::to_string(bestClassId); } + ANS_DBG("ANSONNXCL_pp", "className resolved id=%d name=%s", + bestClassId, className.c_str()); Object detection; if (m_imgWidth > 20 && m_imgHeight > 20) { @@ -728,6 +756,8 @@ namespace ANSCENTER else { detection.box = cv::Rect(0, 0, m_imgWidth, m_imgHeight); } + ANS_DBG("ANSONNXCL_pp", "RectToNormalizedPolygon w=%g h=%g", + m_imgWidth, m_imgHeight); detection.polygon = ANSUtilityHelper::RectToNormalizedPolygon(detection.box, m_imgWidth, m_imgHeight); detection.classId = bestClassId; detection.className = className; @@ -735,13 +765,21 @@ namespace ANSCENTER detection.cameraId = camera_id; output.push_back(detection); + ANS_DBG("ANSONNXCL_pp", "SUCCESS detections=%zu", output.size()); return output; } catch (const std::exception& e) { + ANS_DBG("ANSONNXCL_pp", "FATAL exception type=%s msg=%s", + typeid(e).name(), e.what()); this->_logger.LogFatal("ANSONNXCL::postprocess", e.what(), __FILE__, __LINE__); return {}; } - } + catch (...) { + ANS_DBG("ANSONNXCL_pp", "FATAL unknown exception (non-std)"); + this->_logger.LogFatal("ANSONNXCL::postprocess", "unknown non-std exception", __FILE__, __LINE__); + return {}; + } + } std::vector ANSONNXCL::classify(const cv::Mat& image, const std::string& camera_id) { std::lock_guard lock(_mutex); @@ -848,9 +886,17 @@ namespace ANSCENTER // Postprocess try { - return postprocess(outputTensors, camera_id); + ANS_DBG("ANSONNXCL_cls", "calling postprocess outputTensors=%zu", + outputTensors.size()); + auto pp = postprocess(outputTensors, camera_id); + ANS_DBG("ANSONNXCL_cls", "postprocess returned detections=%zu", pp.size()); + return pp; } catch (const std::exception& e) { + // If we land here, the throw escaped postprocess's own catch — + // most likely from its lock_guard line (outside its try block). + ANS_DBG("ANSONNXCL_cls", "postprocess THREW (escaped its catch) type=%s msg=%s", + typeid(e).name(), e.what()); this->_logger.LogError("ANSONNXCL::classify", "Exception during postprocessing: " + std::string(e.what()), __FILE__, __LINE__); return {};