Add log and change logger policy

This commit is contained in:
2026-04-25 20:29:58 +10:00
parent ef2a122fec
commit bab94bf9bc
2 changed files with 61 additions and 5 deletions

View File

@@ -163,7 +163,9 @@ namespace ANSCENTER
std::call_once(initFlag, [this]() { std::call_once(initFlag, [this]() {
try { try {
spdlog::init_thread_pool(8192, 1); // Two worker threads so the slow Event Log sink (RPC to event log
// service) cannot starve the console sink. Queue size unchanged.
spdlog::init_thread_pool(8192, 2);
ConfigureLogger(); ConfigureLogger();
} }
catch (const spdlog::spdlog_ex& ex) { catch (const spdlog::spdlog_ex& ex) {
@@ -188,9 +190,14 @@ namespace ANSCENTER
stdout_sink->set_level(spdlog::level::trace); stdout_sink->set_level(spdlog::level::trace);
consoleSinks.push_back(stdout_sink); consoleSinks.push_back(stdout_sink);
// overrun_oldest: producer never blocks. If the queue fills (8192
// pending msgs), the OLDEST queued msg is dropped to make room.
// Why: block + 1-thread pool + slow WindowsEventSink can stall
// inference threads on logging and, if shutdown races a blocked
// producer, surface as system_error("resource deadlock would occur").
auto consoleLogger = std::make_shared<spdlog::async_logger>( auto consoleLogger = std::make_shared<spdlog::async_logger>(
"anslogger_console", consoleSinks.begin(), consoleSinks.end(), "anslogger_console", consoleSinks.begin(), consoleSinks.end(),
spdlog::thread_pool(), spdlog::async_overflow_policy::block); spdlog::thread_pool(), spdlog::async_overflow_policy::overrun_oldest);
consoleLogger->set_level(spdlog::level::trace); consoleLogger->set_level(spdlog::level::trace);
consoleLogger->flush_on(spdlog::level::info); consoleLogger->flush_on(spdlog::level::info);
spdlog::register_logger(consoleLogger); spdlog::register_logger(consoleLogger);
@@ -201,9 +208,12 @@ namespace ANSCENTER
win_event_sink->set_level(spdlog::level::info); win_event_sink->set_level(spdlog::level::info);
std::vector<spdlog::sink_ptr> eventSinks{ win_event_sink }; std::vector<spdlog::sink_ptr> eventSinks{ win_event_sink };
// Same overrun_oldest policy as console — see note above. Especially
// important for this logger because WindowsEventSink::sink_it_ does
// a synchronous ReportEventW RPC which can be slow under load.
auto eventLogger = std::make_shared<spdlog::async_logger>( auto eventLogger = std::make_shared<spdlog::async_logger>(
"anslogger_event", eventSinks.begin(), eventSinks.end(), "anslogger_event", eventSinks.begin(), eventSinks.end(),
spdlog::thread_pool(), spdlog::async_overflow_policy::block); spdlog::thread_pool(), spdlog::async_overflow_policy::overrun_oldest);
eventLogger->set_level(spdlog::level::info); eventLogger->set_level(spdlog::level::info);
eventLogger->flush_on(spdlog::level::info); eventLogger->flush_on(spdlog::level::info);
spdlog::register_logger(eventLogger); spdlog::register_logger(eventLogger);

View File

@@ -1,6 +1,7 @@
#include"ANSONNXCL.h" #include"ANSONNXCL.h"
#include "EPLoader.h" #include "EPLoader.h"
#include "OpenVINODeviceConfig.h" #include "OpenVINODeviceConfig.h"
#include <typeinfo> // typeid(e).name() in ANS_DBG diagnostics
namespace ANSCENTER namespace ANSCENTER
{ {
@@ -575,23 +576,32 @@ namespace ANSCENTER
} }
} }
std::vector<Object> ANSONNXCL::postprocess(const std::vector<Ort::Value>& outputTensors, const std::string& camera_id) { std::vector<Object> ANSONNXCL::postprocess(const std::vector<Ort::Value>& outputTensors, const std::string& camera_id) {
ANS_DBG("ANSONNXCL_pp", "ENTRY tensors=%zu cam=%s this=%p",
outputTensors.size(), camera_id.c_str(), (void*)this);
std::lock_guard<std::recursive_mutex> lock(_mutex); std::lock_guard<std::recursive_mutex> lock(_mutex);
ANS_DBG("ANSONNXCL_pp", "lock acquired this=%p", (void*)this);
try { try {
std::vector<Object> output; std::vector<Object> output;
if (outputTensors.empty()) { if (outputTensors.empty()) {
ANS_DBG("ANSONNXCL_pp", "EARLY-RETURN empty tensors");
this->_logger.LogError("ANSONNXCL::postprocess", "No output tensors", __FILE__, __LINE__); this->_logger.LogError("ANSONNXCL::postprocess", "No output tensors", __FILE__, __LINE__);
return {}; return {};
} }
ANS_DBG("ANSONNXCL_pp", "GetTensorData<float>");
const float* rawOutput = outputTensors[0].GetTensorData<float>(); const float* rawOutput = outputTensors[0].GetTensorData<float>();
if (!rawOutput) { if (!rawOutput) {
ANS_DBG("ANSONNXCL_pp", "EARLY-RETURN rawOutput=null");
this->_logger.LogError("ANSONNXCL::postprocess", "rawOutput pointer is null", __FILE__, __LINE__); this->_logger.LogError("ANSONNXCL::postprocess", "rawOutput pointer is null", __FILE__, __LINE__);
return {}; return {};
} }
ANS_DBG("ANSONNXCL_pp", "GetTensorTypeAndShapeInfo + GetShape");
const std::vector<int64_t> outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); const std::vector<int64_t> outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
size_t numScores = vectorProduct(outputShape); size_t numScores = vectorProduct(outputShape);
ANS_DBG("ANSONNXCL_pp", "shape.size=%zu numScores=%zu",
outputShape.size(), numScores);
std::ostringstream oss_shape; std::ostringstream oss_shape;
oss_shape << "Output tensor shape: ["; oss_shape << "Output tensor shape: [";
@@ -602,6 +612,8 @@ namespace ANSCENTER
DEBUG_PRINT(oss_shape.str()); DEBUG_PRINT(oss_shape.str());
// CRITICAL: Check for NaN/Inf in raw output // CRITICAL: Check for NaN/Inf in raw output
ANS_DBG("ANSONNXCL_pp", "NaN/Inf scan begin (limit=%zu)",
std::min(numScores, size_t(100)));
bool hasNaN = false; bool hasNaN = false;
for (size_t i = 0; i < std::min(numScores, size_t(100)); ++i) { for (size_t i = 0; i < std::min(numScores, size_t(100)); ++i) {
if (std::isnan(rawOutput[i]) || std::isinf(rawOutput[i])) { if (std::isnan(rawOutput[i]) || std::isinf(rawOutput[i])) {
@@ -614,6 +626,7 @@ namespace ANSCENTER
} }
if (hasNaN) { if (hasNaN) {
ANS_DBG("ANSONNXCL_pp", "EARLY-RETURN NaN/Inf");
this->_logger.LogError("ANSONNXCL::postprocess", this->_logger.LogError("ANSONNXCL::postprocess",
"Model produced NaN/Inf values - input may be corrupted or model is broken", "Model produced NaN/Inf values - input may be corrupted or model is broken",
__FILE__, __LINE__); __FILE__, __LINE__);
@@ -621,7 +634,10 @@ namespace ANSCENTER
} }
int currentNumClasses = numClasses_ > 0 ? numClasses_ : static_cast<int>(_classes.size()); int currentNumClasses = numClasses_ > 0 ? numClasses_ : static_cast<int>(_classes.size());
ANS_DBG("ANSONNXCL_pp", "currentNumClasses=%d (numClasses_=%d _classes.size=%zu)",
currentNumClasses, numClasses_, _classes.size());
if (currentNumClasses <= 0) { if (currentNumClasses <= 0) {
ANS_DBG("ANSONNXCL_pp", "EARLY-RETURN no classes");
this->_logger.LogError("ANSONNXCL::postprocess", "No valid number of classes", __FILE__, __LINE__); this->_logger.LogError("ANSONNXCL::postprocess", "No valid number of classes", __FILE__, __LINE__);
return {}; return {};
} }
@@ -635,6 +651,7 @@ namespace ANSCENTER
DEBUG_PRINT(oss_scores.str()); DEBUG_PRINT(oss_scores.str());
// Initialize scores with zeros // Initialize scores with zeros
ANS_DBG("ANSONNXCL_pp", "alloc scores vec n=%d", currentNumClasses);
std::vector<float> scores(currentNumClasses, 0.0f); std::vector<float> scores(currentNumClasses, 0.0f);
int validScores = 0; int validScores = 0;
@@ -658,9 +675,12 @@ namespace ANSCENTER
} }
} }
else { else {
ANS_DBG("ANSONNXCL_pp", "EARLY-RETURN unsupported shape size=%zu",
outputShape.size());
this->_logger.LogError("ANSONNXCL::postprocess", "Unsupported output shape", __FILE__, __LINE__); this->_logger.LogError("ANSONNXCL::postprocess", "Unsupported output shape", __FILE__, __LINE__);
return {}; return {};
} }
ANS_DBG("ANSONNXCL_pp", "scores filled validScores=%d", validScores);
// Find maximum score // Find maximum score
int bestClassId = -1; int bestClassId = -1;
@@ -674,9 +694,11 @@ namespace ANSCENTER
} }
if (bestClassId == -1) { if (bestClassId == -1) {
ANS_DBG("ANSONNXCL_pp", "EARLY-RETURN bestClassId=-1");
this->_logger.LogError("ANSONNXCL::postprocess", "Could not determine best class ID", __FILE__, __LINE__); this->_logger.LogError("ANSONNXCL::postprocess", "Could not determine best class ID", __FILE__, __LINE__);
return {}; return {};
} }
ANS_DBG("ANSONNXCL_pp", "argmax bestClassId=%d maxScore=%g", bestClassId, maxScore);
// Check if output is already a probability distribution (sums to ~1.0). // Check if output is already a probability distribution (sums to ~1.0).
// Some ONNX models include a Softmax layer in the graph; applying // Some ONNX models include a Softmax layer in the graph; applying
@@ -689,6 +711,8 @@ namespace ANSCENTER
} }
const bool alreadyNormalized = (allNonNeg && rawSum > 0.9f && rawSum < 1.1f); const bool alreadyNormalized = (allNonNeg && rawSum > 0.9f && rawSum < 1.1f);
ANS_DBG("ANSONNXCL_pp", "alloc probabilities vec n=%d normalized=%d",
currentNumClasses, (int)alreadyNormalized);
std::vector<float> probabilities(currentNumClasses, 0.0f); std::vector<float> probabilities(currentNumClasses, 0.0f);
float confidence = 0.0f; float confidence = 0.0f;
if (alreadyNormalized) { if (alreadyNormalized) {
@@ -704,9 +728,11 @@ namespace ANSCENTER
} }
confidence = sumExp > 0 ? probabilities[bestClassId] / sumExp : 0.0f; confidence = sumExp > 0 ? probabilities[bestClassId] / sumExp : 0.0f;
} }
ANS_DBG("ANSONNXCL_pp", "softmax done confidence=%g", confidence);
// CRITICAL: Validate final confidence // CRITICAL: Validate final confidence
if (std::isnan(confidence) || std::isinf(confidence)) { if (std::isnan(confidence) || std::isinf(confidence)) {
ANS_DBG("ANSONNXCL_pp", "EARLY-RETURN confidence NaN/Inf");
this->_logger.LogError("ANSONNXCL::postprocess", this->_logger.LogError("ANSONNXCL::postprocess",
"Final confidence is NaN/Inf: " + std::to_string(confidence), "Final confidence is NaN/Inf: " + std::to_string(confidence),
__FILE__, __LINE__); __FILE__, __LINE__);
@@ -720,6 +746,8 @@ namespace ANSCENTER
else if (bestClassId >= 0) { else if (bestClassId >= 0) {
className = "ClassID_" + std::to_string(bestClassId); className = "ClassID_" + std::to_string(bestClassId);
} }
ANS_DBG("ANSONNXCL_pp", "className resolved id=%d name=%s",
bestClassId, className.c_str());
Object detection; Object detection;
if (m_imgWidth > 20 && m_imgHeight > 20) { if (m_imgWidth > 20 && m_imgHeight > 20) {
@@ -728,6 +756,8 @@ namespace ANSCENTER
else { else {
detection.box = cv::Rect(0, 0, m_imgWidth, m_imgHeight); detection.box = cv::Rect(0, 0, m_imgWidth, m_imgHeight);
} }
ANS_DBG("ANSONNXCL_pp", "RectToNormalizedPolygon w=%g h=%g",
m_imgWidth, m_imgHeight);
detection.polygon = ANSUtilityHelper::RectToNormalizedPolygon(detection.box, m_imgWidth, m_imgHeight); detection.polygon = ANSUtilityHelper::RectToNormalizedPolygon(detection.box, m_imgWidth, m_imgHeight);
detection.classId = bestClassId; detection.classId = bestClassId;
detection.className = className; detection.className = className;
@@ -735,12 +765,20 @@ namespace ANSCENTER
detection.cameraId = camera_id; detection.cameraId = camera_id;
output.push_back(detection); output.push_back(detection);
ANS_DBG("ANSONNXCL_pp", "SUCCESS detections=%zu", output.size());
return output; return output;
} }
catch (const std::exception& e) { catch (const std::exception& e) {
ANS_DBG("ANSONNXCL_pp", "FATAL exception type=%s msg=%s",
typeid(e).name(), e.what());
this->_logger.LogFatal("ANSONNXCL::postprocess", e.what(), __FILE__, __LINE__); this->_logger.LogFatal("ANSONNXCL::postprocess", e.what(), __FILE__, __LINE__);
return {}; return {};
} }
catch (...) {
ANS_DBG("ANSONNXCL_pp", "FATAL unknown exception (non-std)");
this->_logger.LogFatal("ANSONNXCL::postprocess", "unknown non-std exception", __FILE__, __LINE__);
return {};
}
} }
std::vector<Object> ANSONNXCL::classify(const cv::Mat& image, const std::string& camera_id) { std::vector<Object> ANSONNXCL::classify(const cv::Mat& image, const std::string& camera_id) {
std::lock_guard<std::recursive_mutex> lock(_mutex); std::lock_guard<std::recursive_mutex> lock(_mutex);
@@ -848,9 +886,17 @@ namespace ANSCENTER
// Postprocess // Postprocess
try { try {
return postprocess(outputTensors, camera_id); ANS_DBG("ANSONNXCL_cls", "calling postprocess outputTensors=%zu",
outputTensors.size());
auto pp = postprocess(outputTensors, camera_id);
ANS_DBG("ANSONNXCL_cls", "postprocess returned detections=%zu", pp.size());
return pp;
} }
catch (const std::exception& e) { catch (const std::exception& e) {
// If we land here, the throw escaped postprocess's own catch —
// most likely from its lock_guard line (outside its try block).
ANS_DBG("ANSONNXCL_cls", "postprocess THREW (escaped its catch) type=%s msg=%s",
typeid(e).name(), e.what());
this->_logger.LogError("ANSONNXCL::classify", this->_logger.LogError("ANSONNXCL::classify",
"Exception during postprocessing: " + std::string(e.what()), __FILE__, __LINE__); "Exception during postprocessing: " + std::string(e.what()), __FILE__, __LINE__);
return {}; return {};