#include"ANSONNXCL.h" #include "EPLoader.h" namespace ANSCENTER { bool ANSONNXCL::preprocessImageToTensor(const cv::Mat& image, cv::Mat& outImage, const cv::Size& targetShape, const cv::Scalar& color, bool scaleUp, const std::string& strategy) { if (image.empty()) { this->_logger.LogFatal("ANSONNXCL::preprocessImageToTensor", "Input image to preprocessImageToTensor is empty.", __FILE__, __LINE__); return false; } if (strategy == "letterbox") { float r = std::min(static_cast(targetShape.height) / image.rows, static_cast(targetShape.width) / image.cols); if (!scaleUp) { r = std::min(r, 1.0f); } int newUnpadW = static_cast(std::round(image.cols * r)); int newUnpadH = static_cast(std::round(image.rows * r)); cv::Mat resizedTemp; cv::resize(image, resizedTemp, cv::Size(newUnpadW, newUnpadH), 0, 0, cv::INTER_LINEAR); int dw = targetShape.width - newUnpadW; int dh = targetShape.height - newUnpadH; int top = dh / 2; int bottom = dh - top; int left = dw / 2; int right = dw - left; cv::copyMakeBorder(resizedTemp, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color); return true; } else { // Default to "resize" if (image.size() == targetShape) { outImage = image.clone(); } else { cv::resize(image, outImage, targetShape, 0, 0, cv::INTER_LINEAR); } return true; } } size_t ANSONNXCL::vectorProduct(const std::vector& vector) { if (vector.empty()) return 0; return std::accumulate(vector.begin(), vector.end(), 1LL, std::multiplies()); } bool ANSONNXCL::Init(const std::string& modelPath, const cv::Size& targetInputShape, bool useGPU) { std::lock_guard lock(_mutex); try { inputImageShape_ = targetInputShape; const auto& ep = ANSCENTER::EPLoader::Current(); if (Ort::Global::api_ == nullptr) Ort::InitApi(static_cast(EPLoader::GetOrtApiRaw())); std::cout << "[ANSONNXCL] EP ready: " << ANSCENTER::EPLoader::EngineTypeName(ep.type) << std::endl; env_ = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "ONNX_CLASSIFICATION_ENV"); sessionOptions_ = Ort::SessionOptions(); sessionOptions_.SetIntraOpNumThreads( std::min(6, static_cast(std::thread::hardware_concurrency()))); sessionOptions_.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); // ── Log available providers ───────────────────────────────────────── std::vector availableProviders = Ort::GetAvailableProviders(); std::cout << "Available Execution Providers:" << std::endl; for (const auto& p : availableProviders) std::cout << " - " << p << std::endl; // ── Attach EP based on runtime-detected hardware ──────────────────── if (useGPU) { bool attached = false; switch (ep.type) { case ANSCENTER::EngineType::NVIDIA_GPU: { auto it = std::find(availableProviders.begin(), availableProviders.end(), "CUDAExecutionProvider"); if (it == availableProviders.end()) { this->_logger.LogError("ANSONNXCL::Init", "CUDAExecutionProvider not in DLL — " "check ep/cuda/ has the CUDA ORT build.", __FILE__, __LINE__); break; } try { OrtCUDAProviderOptionsV2* cuda_options = nullptr; Ort::GetApi().CreateCUDAProviderOptions(&cuda_options); const char* keys[] = { "device_id" }; const char* values[] = { "0" }; Ort::GetApi().UpdateCUDAProviderOptions(cuda_options, keys, values, 1); sessionOptions_.AppendExecutionProvider_CUDA_V2(*cuda_options); Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options); std::cout << "[ANSONNXCL] CUDA EP attached." << std::endl; attached = true; } catch (const Ort::Exception& e) { this->_logger.LogError("ANSONNXCL::Init", e.what(), __FILE__, __LINE__); } break; } case ANSCENTER::EngineType::AMD_GPU: { auto it = std::find(availableProviders.begin(), availableProviders.end(), "DmlExecutionProvider"); if (it == availableProviders.end()) { this->_logger.LogError("ANSONNXCL::Init", "DmlExecutionProvider not in DLL — " "check ep/directml/ has the DirectML ORT build.", __FILE__, __LINE__); break; } try { std::unordered_map opts = { { "device_id", "0" } }; sessionOptions_.AppendExecutionProvider("DML", opts); std::cout << "[ANSONNXCL] DirectML EP attached." << std::endl; attached = true; } catch (const Ort::Exception& e) { this->_logger.LogError("ANSONNXCL::Init", e.what(), __FILE__, __LINE__); } break; } case ANSCENTER::EngineType::OPENVINO_GPU: { auto it = std::find(availableProviders.begin(), availableProviders.end(), "OpenVINOExecutionProvider"); if (it == availableProviders.end()) { this->_logger.LogError("ANSONNXCL::Init", "OpenVINOExecutionProvider not in DLL — " "check ep/openvino/ has the OpenVINO ORT build.", __FILE__, __LINE__); break; } // Note: FP32 + single thread/stream preserved from original for classification determinism const std::string precision = "FP32"; const std::string numberOfThreads = "1"; const std::string numberOfStreams = "1"; std::vector> try_configs = { { {"device_type","AUTO:NPU,GPU"}, {"precision",precision}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} }, { {"device_type","GPU.0"}, {"precision",precision}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} }, { {"device_type","GPU.1"}, {"precision",precision}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} }, { {"device_type","AUTO:GPU,CPU"}, {"precision",precision}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} } }; for (const auto& config : try_configs) { try { sessionOptions_.AppendExecutionProvider_OpenVINO_V2(config); std::cout << "[ANSONNXCL] OpenVINO EP attached (" << config.at("device_type") << ")." << std::endl; attached = true; break; } catch (const Ort::Exception& e) { this->_logger.LogError("ANSONNXCL::Init", e.what(), __FILE__, __LINE__); } } if (!attached) std::cerr << "[ANSONNXCL] OpenVINO EP: all device configs failed." << std::endl; break; } default: break; } if (!attached) { this->_logger.LogFatal("ANSONNXCL::Init", "GPU EP not attached. Running on CPU.", __FILE__, __LINE__); } } else { std::cout << "[ANSONNXCL] Inference device: CPU (useGPU=false)" << std::endl; } // ── Load model ────────────────────────────────────────────────────── #ifdef _WIN32 std::wstring w_modelPath = std::wstring(modelPath.begin(), modelPath.end()); session_ = Ort::Session(env_, w_modelPath.c_str(), sessionOptions_); #else session_ = Ort::Session(env_, modelPath.c_str(), sessionOptions_); #endif Ort::AllocatorWithDefaultOptions allocator; numInputNodes_ = session_.GetInputCount(); numOutputNodes_ = session_.GetOutputCount(); if (numInputNodes_ == 0) throw std::runtime_error("Model has no input nodes."); if (numOutputNodes_ == 0) throw std::runtime_error("Model has no output nodes."); // ── Input node name & shape ───────────────────────────────────────── auto input_node_name = session_.GetInputNameAllocated(0, allocator); inputNodeNameAllocatedStrings_.push_back(std::move(input_node_name)); inputNames_.push_back(inputNodeNameAllocatedStrings_.back().get()); Ort::TypeInfo inputTypeInfo = session_.GetInputTypeInfo(0); std::vector modelInputTensorShapeVec = inputTypeInfo.GetTensorTypeAndShapeInfo().GetShape(); if (modelInputTensorShapeVec.size() == 4) { isDynamicInputShape_ = (modelInputTensorShapeVec[2] == -1 || modelInputTensorShapeVec[3] == -1); DEBUG_PRINT("Model input tensor shape from metadata: " << modelInputTensorShapeVec[0] << "x" << modelInputTensorShapeVec[1] << "x" << modelInputTensorShapeVec[2] << "x" << modelInputTensorShapeVec[3]); if (!isDynamicInputShape_) { int modelH = static_cast(modelInputTensorShapeVec[2]); int modelW = static_cast(modelInputTensorShapeVec[3]); if (modelH != inputImageShape_.height || modelW != inputImageShape_.width) { std::cout << "Warning: Target preprocessing shape (" << inputImageShape_.height << "x" << inputImageShape_.width << ") differs from model's fixed input shape (" << modelH << "x" << modelW << "). " << "Consider aligning these for optimal performance/accuracy." << std::endl; } } else { DEBUG_PRINT("Model has dynamic input H/W. Preprocessing to specified target: " << inputImageShape_.height << "x" << inputImageShape_.width); } } else { std::cerr << "Warning: Model input tensor does not have 4 dimensions (NCHW). Shape: ["; for (size_t i = 0; i < modelInputTensorShapeVec.size(); ++i) std::cerr << modelInputTensorShapeVec[i] << (i == modelInputTensorShapeVec.size() - 1 ? "" : ", "); std::cerr << "]. Assuming dynamic shape, proceeding with target HxW: " << inputImageShape_.height << "x" << inputImageShape_.width << std::endl; isDynamicInputShape_ = true; } // ── Output node name & shape ──────────────────────────────────────── auto output_node_name = session_.GetOutputNameAllocated(0, allocator); outputNodeNameAllocatedStrings_.push_back(std::move(output_node_name)); outputNames_.push_back(outputNodeNameAllocatedStrings_.back().get()); Ort::TypeInfo outputTypeInfo = session_.GetOutputTypeInfo(0); std::vector outputTensorShapeVec = outputTypeInfo.GetTensorTypeAndShapeInfo().GetShape(); if (!outputTensorShapeVec.empty()) { if (outputTensorShapeVec.size() == 2 && outputTensorShapeVec[0] > 0) { numClasses_ = static_cast(outputTensorShapeVec[1]); } else if (outputTensorShapeVec.size() == 1 && outputTensorShapeVec[0] > 0) { numClasses_ = static_cast(outputTensorShapeVec[0]); } else { for (long long dim : outputTensorShapeVec) { if (dim > 1 && numClasses_ == 0) numClasses_ = static_cast(dim); } if (numClasses_ == 0 && !outputTensorShapeVec.empty()) numClasses_ = static_cast(outputTensorShapeVec.back()); } } if (numClasses_ > 0) { std::ostringstream oss; oss << "["; for (size_t i = 0; i < outputTensorShapeVec.size(); ++i) oss << outputTensorShapeVec[i] << (i < outputTensorShapeVec.size() - 1 ? ", " : ""); oss << "]"; DEBUG_PRINT("Model predicts " << numClasses_ << " classes, output shape: " << oss.str()); } else { std::cerr << "Warning: Could not determine number of classes from output shape." << std::endl; } if (numClasses_ > 0 && !_classes.empty() && _classes.size() != static_cast(numClasses_)) { std::cerr << "Warning: Model class count (" << numClasses_ << ") does not match label count (" << _classes.size() << ")." << std::endl; } if (_classes.empty() && numClasses_ > 0) { std::cout << "Warning: No class names loaded. Predictions will use numeric IDs." << std::endl; } std::cout << "[ANSONNXCL] Initialized successfully. Model: " << modelPath << std::endl; // ── Warmup ────────────────────────────────────────────────────────── DEBUG_PRINT("Starting model warmup..."); warmupModel(); DEBUG_PRINT("Model warmup completed."); return true; } catch (const std::exception& e) { this->_logger.LogFatal("ANSONNXCL::Init", e.what(), __FILE__, __LINE__); return false; } } void ANSONNXCL::warmupModel() { try { // Create a dummy input with the correct shape (224x224 based on your logs) cv::Mat dummyImage = cv::Mat::zeros(inputImageShape_.height, inputImageShape_.width, CV_8UC3); DEBUG_PRINT("Warmup: Created dummy image " << dummyImage.cols << "x" << dummyImage.rows); // Run 2-3 inferences to stabilize GPU/OpenVINO context for (int i = 0; i < 3; ++i) { float* blob = nullptr; std::vector inputShape; if (preprocess(dummyImage, blob, inputShape)) { // Create input tensor auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); Ort::Value inputTensor = Ort::Value::CreateTensor( memoryInfo, blob, vectorProduct(inputShape), inputShape.data(), inputShape.size() ); // Run inference auto outputTensors = session_.Run( Ort::RunOptions{ nullptr }, inputNames_.data(), &inputTensor, 1, outputNames_.data(), numOutputNodes_ ); // Clean up delete[] blob; DEBUG_PRINT("Warmup inference " << (i + 1) << "/3 completed"); } } DEBUG_PRINT("Model warmup successful - all internal states initialized"); } catch (const std::exception& e) { this->_logger.LogWarn("ANSONNXCL::warmupModel", std::string("Warmup failed but continuing: ") + e.what(), __FILE__, __LINE__); } } bool ANSONNXCL::preprocess(const cv::Mat& image, float*& blob, std::vector& inputTensorShape) { std::lock_guard lock(_mutex); try { // CRITICAL: Validate input image if (image.empty()) { this->_logger.LogError("ANSONNXCL::preprocess", "Input image to preprocess is empty", __FILE__, __LINE__); return false; } // CRITICAL: Check for valid image data if (image.data == nullptr) { this->_logger.LogError("ANSONNXCL::preprocess", "Input image data pointer is null", __FILE__, __LINE__); return false; } // CRITICAL: Verify image is continuous if (!image.isContinuous()) { this->_logger.LogWarn("ANSONNXCL::preprocess", "Input image is not continuous in memory", __FILE__, __LINE__); } // CRITICAL: Check for valid dimensions if (image.cols <= 0 || image.rows <= 0) { this->_logger.LogError("ANSONNXCL::preprocess", "Invalid image dimensions: " + std::to_string(image.cols) + "x" + std::to_string(image.rows), __FILE__, __LINE__); return false; } // CRITICAL: Verify valid pixel values in source image double minVal, maxVal; cv::minMaxLoc(image, &minVal, &maxVal); if (std::isnan(minVal) || std::isnan(maxVal) || std::isinf(minVal) || std::isinf(maxVal)) { this->_logger.LogError("ANSONNXCL::preprocess", "Input image contains NaN or Inf values. Min: " + std::to_string(minVal) + ", Max: " + std::to_string(maxVal), __FILE__, __LINE__); return false; } //DEBUG_PRINT("[Instance " << instanceId_ << "] Input image: " << image.cols << "x" << image.rows // << ", channels: " << image.channels() // << ", type: " << image.type() // << ", pixel range: [" << minVal << ", " << maxVal << "]"); m_imgWidth = static_cast(image.cols); m_imgHeight = static_cast(image.rows); // 0. Grayscale → BGR if needed cv::Mat bgrImage; if (image.channels() == 1) { cv::cvtColor(image, bgrImage, cv::COLOR_GRAY2BGR); } else { bgrImage = image; } cv::Mat processedImage; // 1. Resize to target input shape preprocessImageToTensor(bgrImage, processedImage, inputImageShape_, cv::Scalar(0, 0, 0), true, "resize"); // CRITICAL: Validate after resize if (processedImage.empty() || processedImage.data == nullptr) { this->_logger.LogError("ANSONNXCL::preprocess", "Processed image is empty after resize", __FILE__, __LINE__); return false; } // 2. Convert BGR to RGB cv::Mat rgbImageMat; cv::cvtColor(processedImage, rgbImageMat, cv::COLOR_BGR2RGB); // CRITICAL: Validate after color conversion if (rgbImageMat.empty() || rgbImageMat.data == nullptr) { this->_logger.LogError("ANSONNXCL::preprocess", "RGB image is empty after color conversion", __FILE__, __LINE__); return false; } // 3. Convert to float32 cv::Mat floatRgbImage; rgbImageMat.convertTo(floatRgbImage, CV_32F); // CRITICAL: Validate float conversion if (floatRgbImage.empty() || floatRgbImage.data == nullptr) { this->_logger.LogError("ANSONNXCL::preprocess", "Float image is empty after conversion", __FILE__, __LINE__); return false; } // CRITICAL: Check for NaN/Inf after float conversion cv::minMaxLoc(floatRgbImage, &minVal, &maxVal); if (std::isnan(minVal) || std::isnan(maxVal) || std::isinf(minVal) || std::isinf(maxVal)) { this->_logger.LogError("ANSONNXCL::preprocess", "Float image contains NaN or Inf after conversion. Min: " + std::to_string(minVal) + ", Max: " + std::to_string(maxVal), __FILE__, __LINE__); return false; } // Set tensor shape inputTensorShape = { 1, 3, static_cast(floatRgbImage.rows), static_cast(floatRgbImage.cols) }; if (static_cast(inputTensorShape[2]) != inputImageShape_.height || static_cast(inputTensorShape[3]) != inputImageShape_.width) { this->_logger.LogError("ANSONNXCL::preprocess", "Preprocessed image dimensions do not match target inputImageShape_", __FILE__, __LINE__); return false; } size_t tensorSize = vectorProduct(inputTensorShape); // CRITICAL: Clean up existing blob first if (blob != nullptr) { delete[] blob; blob = nullptr; } // CRITICAL: Allocate and zero-initialize blob = new float[tensorSize]; std::memset(blob, 0, tensorSize * sizeof(float)); int h = static_cast(inputTensorShape[2]); int w = static_cast(inputTensorShape[3]); int num_channels = static_cast(inputTensorShape[1]); if (num_channels != 3) { delete[] blob; blob = nullptr; this->_logger.LogError("ANSONNXCL::preprocess", "Expected 3 channels but got: " + std::to_string(num_channels), __FILE__, __LINE__); return false; } if (floatRgbImage.channels() != 3) { delete[] blob; blob = nullptr; this->_logger.LogError("ANSONNXCL::preprocess", "Float image has wrong channel count: " + std::to_string(floatRgbImage.channels()), __FILE__, __LINE__); return false; } // Convert HWC to CHW with validation bool hasNaN = false; for (int c_idx = 0; c_idx < num_channels; ++c_idx) { for (int i = 0; i < h; ++i) { for (int j = 0; j < w; ++j) { float pixel_value = floatRgbImage.at(i, j)[c_idx]; // CRITICAL: Check each pixel for NaN/Inf if (std::isnan(pixel_value) || std::isinf(pixel_value)) { hasNaN = true; this->_logger.LogError("ANSONNXCL::preprocess", "NaN/Inf detected at position (" + std::to_string(i) + "," + std::to_string(j) + "), channel " + std::to_string(c_idx) + ", value: " + std::to_string(pixel_value), __FILE__, __LINE__); break; } // Scale to [0.0, 1.0] float scaled_pixel = pixel_value / 255.0f; // CRITICAL: Verify scaled value is valid if (std::isnan(scaled_pixel) || std::isinf(scaled_pixel)) { hasNaN = true; this->_logger.LogError("ANSONNXCL::preprocess", "NaN/Inf after scaling at position (" + std::to_string(i) + "," + std::to_string(j) + "), channel " + std::to_string(c_idx) + ", original: " + std::to_string(pixel_value) + ", scaled: " + std::to_string(scaled_pixel), __FILE__, __LINE__); break; } // Store in blob (CHW format) blob[c_idx * (h * w) + i * w + j] = scaled_pixel; } if (hasNaN) break; } if (hasNaN) break; } if (hasNaN) { delete[] blob; blob = nullptr; return false; } // CRITICAL: Final validation of blob float blobSum = 0.0f; float blobMin = std::numeric_limits::max(); float blobMax = std::numeric_limits::lowest(); for (size_t i = 0; i < tensorSize; ++i) { if (std::isnan(blob[i]) || std::isinf(blob[i])) { this->_logger.LogError("ANSONNXCL::preprocess", "NaN/Inf found in blob at index " + std::to_string(i) + ", value: " + std::to_string(blob[i]), __FILE__, __LINE__); delete[] blob; blob = nullptr; return false; } blobSum += blob[i]; blobMin = std::min(blobMin, blob[i]); blobMax = std::max(blobMax, blob[i]); } //DEBUG_PRINT("[Instance " << instanceId_ << "] Preprocessing completed. " // << "Tensor shape: " << inputTensorShape[0] << "x" << inputTensorShape[1] << "x" // << inputTensorShape[2] << "x" << inputTensorShape[3] // << " | Original: " << m_imgWidth << "x" << m_imgHeight // << " | Blob stats - Min: " << blobMin << ", Max: " << blobMax // << ", Sum: " << blobSum << ", Avg: " << (blobSum / tensorSize)); return true; } catch (const std::exception& e) { this->_logger.LogFatal("ANSONNXCL::preprocess", e.what(), __FILE__, __LINE__); if (blob != nullptr) { delete[] blob; blob = nullptr; } return false; } } std::vector ANSONNXCL::postprocess(const std::vector& outputTensors, const std::string& camera_id) { std::lock_guard lock(_mutex); try { std::vector output; if (outputTensors.empty()) { this->_logger.LogError("ANSONNXCL::postprocess", "No output tensors", __FILE__, __LINE__); return {}; } const float* rawOutput = outputTensors[0].GetTensorData(); if (!rawOutput) { this->_logger.LogError("ANSONNXCL::postprocess", "rawOutput pointer is null", __FILE__, __LINE__); return {}; } const std::vector outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); size_t numScores = vectorProduct(outputShape); std::ostringstream oss_shape; oss_shape << "Output tensor shape: ["; for (size_t i = 0; i < outputShape.size(); ++i) { oss_shape << outputShape[i] << (i == outputShape.size() - 1 ? "" : ", "); } oss_shape << "]"; DEBUG_PRINT(oss_shape.str()); // CRITICAL: Check for NaN/Inf in raw output bool hasNaN = false; for (size_t i = 0; i < std::min(numScores, size_t(100)); ++i) { if (std::isnan(rawOutput[i]) || std::isinf(rawOutput[i])) { hasNaN = true; this->_logger.LogError("ANSONNXCL::postprocess", "NaN/Inf detected in model output at index " + std::to_string(i) + ", value: " + std::to_string(rawOutput[i]), __FILE__, __LINE__); } } if (hasNaN) { this->_logger.LogError("ANSONNXCL::postprocess", "Model produced NaN/Inf values - input may be corrupted or model is broken", __FILE__, __LINE__); return {}; } int currentNumClasses = numClasses_ > 0 ? numClasses_ : static_cast(_classes.size()); if (currentNumClasses <= 0) { this->_logger.LogError("ANSONNXCL::postprocess", "No valid number of classes", __FILE__, __LINE__); return {}; } // Debug first few raw scores std::ostringstream oss_scores; oss_scores << "First few raw scores: "; for (size_t i = 0; i < std::min(size_t(5), numScores); ++i) { oss_scores << rawOutput[i] << " "; } DEBUG_PRINT(oss_scores.str()); // Initialize scores with zeros std::vector scores(currentNumClasses, 0.0f); int validScores = 0; // Handle different output shapes if (outputShape.size() == 2 && outputShape[0] == 1) { validScores = std::min(currentNumClasses, static_cast(outputShape[1])); for (int i = 0; i < validScores; ++i) { scores[i] = rawOutput[i]; } } else if (outputShape.size() == 1) { validScores = std::min(currentNumClasses, static_cast(outputShape[0])); for (int i = 0; i < validScores; ++i) { scores[i] = rawOutput[i]; } } else if (outputShape.size() == 2 && outputShape[0] > 1) { validScores = std::min(currentNumClasses, static_cast(outputShape[1])); for (int i = 0; i < validScores; ++i) { scores[i] = rawOutput[i]; } } else { this->_logger.LogError("ANSONNXCL::postprocess", "Unsupported output shape", __FILE__, __LINE__); return {}; } // Find maximum score int bestClassId = -1; float maxScore = -std::numeric_limits::infinity(); for (int i = 0; i < validScores; ++i) { if (scores[i] > maxScore) { maxScore = scores[i]; bestClassId = i; } } if (bestClassId == -1) { this->_logger.LogError("ANSONNXCL::postprocess", "Could not determine best class ID", __FILE__, __LINE__); return {}; } // Check if output is already a probability distribution (sums to ~1.0). // Some ONNX models include a Softmax layer in the graph; applying // softmax again would flatten the distribution and cause wrong results. float rawSum = 0.f; bool allNonNeg = true; for (int i = 0; i < validScores; ++i) { rawSum += scores[i]; if (scores[i] < 0.f) allNonNeg = false; } const bool alreadyNormalized = (allNonNeg && rawSum > 0.9f && rawSum < 1.1f); std::vector probabilities(currentNumClasses, 0.0f); float confidence = 0.0f; if (alreadyNormalized) { // Output is already softmax — use as-is for (int i = 0; i < validScores; ++i) probabilities[i] = scores[i]; confidence = probabilities[bestClassId]; } else { // Raw logits — apply softmax float sumExp = 0.0f; for (int i = 0; i < validScores; ++i) { probabilities[i] = std::exp(scores[i] - maxScore); sumExp += probabilities[i]; } confidence = sumExp > 0 ? probabilities[bestClassId] / sumExp : 0.0f; } // CRITICAL: Validate final confidence if (std::isnan(confidence) || std::isinf(confidence)) { this->_logger.LogError("ANSONNXCL::postprocess", "Final confidence is NaN/Inf: " + std::to_string(confidence), __FILE__, __LINE__); return {}; } std::string className = "Unknown"; if (bestClassId >= 0 && static_cast(bestClassId) < _classes.size()) { className = _classes[bestClassId]; } else if (bestClassId >= 0) { className = "ClassID_" + std::to_string(bestClassId); } Object detection; if (m_imgWidth > 20 && m_imgHeight > 20) { detection.box = cv::Rect(10, 10, m_imgWidth - 20, m_imgHeight - 20); } else { detection.box = cv::Rect(0, 0, m_imgWidth, m_imgHeight); } detection.polygon = ANSUtilityHelper::RectToNormalizedPolygon(detection.box, m_imgWidth, m_imgHeight); detection.classId = bestClassId; detection.className = className; detection.confidence = confidence; detection.cameraId = camera_id; output.push_back(detection); return output; } catch (const std::exception& e) { this->_logger.LogFatal("ANSONNXCL::postprocess", e.what(), __FILE__, __LINE__); return {}; } } std::vector ANSONNXCL::classify(const cv::Mat& image, const std::string& camera_id) { std::lock_guard lock(_mutex); float* blobPtr = nullptr; // Declare outside try block for proper cleanup try { if (image.empty()) { this->_logger.LogError("ANSONNXCL::classify", "Input image for classification is empty", __FILE__, __LINE__); return {}; } std::vector currentInputTensorShape; // Preprocess try { if (!preprocess(image, blobPtr, currentInputTensorShape)) { this->_logger.LogError("ANSONNXCL::classify", "Preprocessing returned false", __FILE__, __LINE__); if (blobPtr) { delete[] blobPtr; blobPtr = nullptr; } return {}; } } catch (const std::exception& e) { this->_logger.LogError("ANSONNXCL::classify", "Exception during preprocessing: " + std::string(e.what()), __FILE__, __LINE__); if (blobPtr) { delete[] blobPtr; blobPtr = nullptr; } return {}; } if (!blobPtr) { this->_logger.LogError("ANSONNXCL::classify", "Preprocessing failed to produce a valid data blob", __FILE__, __LINE__); return {}; } size_t inputTensorSize = vectorProduct(currentInputTensorShape); if (inputTensorSize == 0) { this->_logger.LogError("ANSONNXCL::classify", "Input tensor size is zero after preprocessing", __FILE__, __LINE__); delete[] blobPtr; return {}; } // CRITICAL: Validate blob data before creating tensor bool hasInvalidData = false; for (size_t i = 0; i < std::min(inputTensorSize, size_t(100)); ++i) { if (std::isnan(blobPtr[i]) || std::isinf(blobPtr[i])) { this->_logger.LogError("ANSONNXCL::classify", "Blob contains NaN/Inf at index " + std::to_string(i) + ", value: " + std::to_string(blobPtr[i]), __FILE__, __LINE__); hasInvalidData = true; break; } } if (hasInvalidData) { delete[] blobPtr; return {}; } // Create input tensor (this does NOT copy data, just wraps the pointer!) Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); Ort::Value inputTensor = Ort::Value::CreateTensor( memoryInfo, blobPtr, inputTensorSize, currentInputTensorShape.data(), currentInputTensorShape.size() ); // Run inference - blob must still be valid here! std::vector outputTensors; try { outputTensors = session_.Run( Ort::RunOptions{ nullptr }, inputNames_.data(), &inputTensor, numInputNodes_, outputNames_.data(), numOutputNodes_ ); } catch (const Ort::Exception& e) { this->_logger.LogError("ANSONNXCL::classify", "ONNX Runtime Exception during Run(): " + std::string(e.what()), __FILE__, __LINE__); delete[] blobPtr; // Clean up on error return {}; } // CRITICAL: NOW it's safe to delete the blob (after inference completes) delete[] blobPtr; blobPtr = nullptr; if (outputTensors.empty()) { this->_logger.LogError("ANSONNXCL::classify", "ONNX Runtime Run() produced no output tensors", __FILE__, __LINE__); return {}; } // Postprocess try { return postprocess(outputTensors, camera_id); } catch (const std::exception& e) { this->_logger.LogError("ANSONNXCL::classify", "Exception during postprocessing: " + std::string(e.what()), __FILE__, __LINE__); return {}; } } catch (const std::exception& e) { this->_logger.LogFatal("ANSONNXCL::classify", e.what(), __FILE__, __LINE__); // Clean up blob if still allocated if (blobPtr) { delete[] blobPtr; blobPtr = nullptr; } return {}; } } bool ANSONNXCL::OptimizeModel(bool fp16, std::string & optimizedModelFolder) { if (!ANSODBase::OptimizeModel(fp16, optimizedModelFolder)) { return false; } return true; } bool ANSONNXCL::Initialize(std::string licenseKey, ModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, std::string& labelMap) { std::lock_guard lock(_mutex); try { _modelLoadValid = false; bool result = ANSODBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, labelMap); if (!result) return false; // Parsing for YOLO only here _modelConfig = modelConfig; _modelConfig.detectionType = ANSCENTER::DetectionType::CLASSIFICATION; _modelConfig.modelType = ModelType::ONNXCL; _modelConfig.inpHeight = 224; _modelConfig.inpWidth = 224; if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133) // 133 = COCO wholebody max _modelConfig.numKPS = 17; if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define _fp16 = (modelConfig.precisionType == PrecisionType::FP16); if (FileExist(_modelConfigFile)) { ModelType modelType; std::vector inputShape; _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape); if (inputShape.size() == 2) { if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0]; if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1]; } } else {// This is old version of model zip file _modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx"); _classFilePath = CreateFilePath(_modelFolder, "classes.names"); std::ifstream isValidFileName(_classFilePath); if (!isValidFileName) { this->_logger.LogDebug("ANSONNXCL::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__); LoadClassesFromString(); } else { this->_logger.LogDebug("ANSONNXCL::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__); LoadClassesFromFile(); } } // 1. Load labelMap and engine labelMap.clear(); if (!_classes.empty()) labelMap = VectorToCommaSeparatedString(_classes); // 2. Initialize ONNX Runtime session cv::Size targetInputShape= cv::Size(_modelConfig.inpWidth, _modelConfig.inpHeight); result = Init(_modelFilePath, targetInputShape,true); _modelLoadValid = true; _isInitialized = true; return result; } catch (const std::exception& e) { this->_logger.LogFatal("ANSONNXCL::Initialize", e.what(), __FILE__, __LINE__); return false; } } bool ANSONNXCL::LoadModel(const std::string& modelZipFilePath, const std::string& modelZipPassword) { std::lock_guard lock(_mutex); try { bool result = ANSODBase::LoadModel(modelZipFilePath, modelZipPassword); if (!result) return false; _modelConfig.detectionType = ANSCENTER::DetectionType::CLASSIFICATION; _modelConfig.modelType = ModelType::TENSORRT; _modelConfig.inpHeight = 224; _modelConfig.inpWidth = 224; if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133) // 133 = COCO wholebody max _modelConfig.numKPS = 17; if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define // if (_modelConfig.precisionType == PrecisionType::FP16)_fp16 = true; _fp16 = true; // Load Model from Here // 0. Check if the configuration file exist if (FileExist(_modelConfigFile)) { ModelType modelType; std::vector inputShape; _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape); if (inputShape.size() == 2) { if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0]; if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1]; } } else {// This is old version of model zip file _modelFilePath = CreateFilePath(_modelFolder, "train_last.onnx"); _classFilePath = CreateFilePath(_modelFolder, "classes.names"); std::ifstream isValidFileName(_classFilePath); if (!isValidFileName) { this->_logger.LogDebug("ANSONNXCL::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__); LoadClassesFromString(); } else { this->_logger.LogDebug("ANSONNXCL::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__); LoadClassesFromFile(); } } // Initialize ONNX Runtime session cv::Size targetInputShape = cv::Size(_modelConfig.inpWidth, _modelConfig.inpHeight); result = Init(_modelFilePath, targetInputShape,true); _modelLoadValid = true; _isInitialized = true; return result; } catch (const std::exception& e) { this->_logger.LogFatal("ANSONNXCL::LoadModel", e.what(), __FILE__, __LINE__); return false; } } bool ANSONNXCL::LoadModelFromFolder(std::string licenseKey, ModelConfig modelConfig, std::string modelName, std::string className, const std::string& modelFolder, std::string& labelMap) { std::lock_guard lock(_mutex); try { bool result = ANSODBase::LoadModelFromFolder(licenseKey, modelConfig, modelName, className, modelFolder, labelMap); if (!result) return false; std::string _modelName = modelName; if (_modelName.empty()) { _modelName = "train_last"; } std::string modelFullName = _modelName + ".onnx"; // Parsing for YOLO only here _modelConfig = modelConfig; _modelConfig.detectionType = ANSCENTER::DetectionType::CLASSIFICATION; _modelConfig.modelType = ModelType::TENSORRT; _modelConfig.inpHeight = 224; _modelConfig.inpWidth = 224; if (_modelConfig.modelMNSThreshold < 0.2) _modelConfig.modelMNSThreshold = 0.5; if (_modelConfig.modelConfThreshold < 0.2) _modelConfig.modelConfThreshold = 0.5; if (_modelConfig.numKPS <= 0 || _modelConfig.numKPS > 133) // 133 = COCO wholebody max _modelConfig.numKPS = 17; if (_modelConfig.kpsThreshold == 0)_modelConfig.kpsThreshold = 0.5; // If not define _fp16 = true; // Load Model from Here // 0. Check if the configuration file exist if (FileExist(_modelConfigFile)) { ModelType modelType; std::vector inputShape; _classes = ANSUtilityHelper::GetConfigFileContent(_modelConfigFile, modelType, inputShape); if (inputShape.size() == 2) { if (inputShape[0] > 0)_modelConfig.inpHeight = inputShape[0]; if (inputShape[1] > 0)_modelConfig.inpWidth = inputShape[1]; } } else {// This is old version of model zip file _modelFilePath = CreateFilePath(_modelFolder, modelFullName); _classFilePath = CreateFilePath(_modelFolder, className); std::ifstream isValidFileName(_classFilePath); if (!isValidFileName) { this->_logger.LogDebug("ANSONNXCL::Initialize. Load classes from string", _classFilePath, __FILE__, __LINE__); LoadClassesFromString(); } else { this->_logger.LogDebug("ANSONNXCL::Initialize. Load classes from file", _classFilePath, __FILE__, __LINE__); LoadClassesFromFile(); } } // 1. Load labelMap and engine labelMap.clear(); if (!_classes.empty()) labelMap = VectorToCommaSeparatedString(_classes); // 2. Initialize ONNX Runtime session cv::Size targetInputShape = cv::Size(_modelConfig.inpWidth, _modelConfig.inpHeight); result = Init(_modelFilePath, targetInputShape,true); _modelLoadValid = true; _isInitialized = true; return result; } catch (const std::exception& e) { this->_logger.LogFatal("ANSONNXCL::LoadModelFromFolder", e.what(), __FILE__, __LINE__); return false; } } bool ANSONNXCL::Destroy() { return true; } ANSONNXCL::~ANSONNXCL() { Destroy(); } std::vector ANSONNXCL::RunInference(const cv::Mat& input, const std::string& camera_id) { std::lock_guard lock(_mutex); if (!_modelLoadValid) { this->_logger.LogFatal("ANSONNXCL::RunInference", "Cannot load the TensorRT model. Please check if it is exist", __FILE__, __LINE__); std::vector result; result.clear(); return result; } if (!_licenseValid) { this->_logger.LogFatal("ANSONNXCL::RunInference", "Runtime license is not valid or expired. Please contact ANSCENTER", __FILE__, __LINE__); std::vector result; result.clear(); return result; } if (!_isInitialized) { this->_logger.LogFatal("ANSONNXCL::RunInference", "Model is not initialized", __FILE__, __LINE__); std::vector result; result.clear(); return result; } try { std::vector result; if (input.empty()) return result; if ((input.cols < 5) || (input.rows < 5)) return result; return classify(input, camera_id); } catch (const std::exception& e) { this->_logger.LogFatal("ANSONNXCL::RunInference", e.what(), __FILE__, __LINE__); return {}; } } std::vector ANSONNXCL::RunInference(const cv::Mat& inputImgBGR) { return RunInference(inputImgBGR, "CustomCam"); } }// end of namespace ANSCENTER