Enable log information. Disable NPU in U9

This commit is contained in:
2026-04-21 15:48:27 +10:00
parent 00f6e2f852
commit 97d814936d
18 changed files with 301 additions and 54 deletions

View File

@@ -13,7 +13,18 @@
"Bash(powershell.exe -NoProfile -Command \"[System.Environment]::GetEnvironmentVariable\\('PATH','Machine'\\) -split ';' | Select-String -Pattern 'ANSCENTER|Shared'\")", "Bash(powershell.exe -NoProfile -Command \"[System.Environment]::GetEnvironmentVariable\\('PATH','Machine'\\) -split ';' | Select-String -Pattern 'ANSCENTER|Shared'\")",
"Bash(cmd.exe //c 'dir /AL \"C:\\\\Program Files\\\\ANSCENTER\\\\ANSVIS\\\\data\" 2>&1 | findstr /i \"junction symlink\"')", "Bash(cmd.exe //c 'dir /AL \"C:\\\\Program Files\\\\ANSCENTER\\\\ANSVIS\\\\data\" 2>&1 | findstr /i \"junction symlink\"')",
"Bash(cmd.exe //c 'dir /AL \"C:\\\\Program Files\\\\ANSCENTER\\\\ANSVIS\\\\data\"')", "Bash(cmd.exe //c 'dir /AL \"C:\\\\Program Files\\\\ANSCENTER\\\\ANSVIS\\\\data\"')",
"PowerShell(Get-ChildItem \"C:\\\\Program Files\\\\ANSCENTER\\\\ANSVIS\\\\data\" -Force | Where-Object { $_.LinkType } | Select-Object Name, LinkType, Target | Format-Table -AutoSize)" "PowerShell(Get-ChildItem \"C:\\\\Program Files\\\\ANSCENTER\\\\ANSVIS\\\\data\" -Force | Where-Object { $_.LinkType } | Select-Object Name, LinkType, Target | Format-Table -AutoSize)",
"Bash(awk '{print \"start: \"$2\"s\"}')",
"Bash(awk '{print \"end: \"$2\"s\"}')",
"Bash(awk '{ *)",
"Bash(awk '{v[NR]=$1} END {asort\\(v\\); n=length\\(v\\); printf \"count=%d\\\\nmedian=%.1fms\\\\np90=%.1fms\\\\np95=%.1fms\\\\np99=%.1fms\\\\nmax=%.1fms\\\\n\", n, v[int\\(n*0.5\\)], v[int\\(n*0.9\\)], v[int\\(n*0.95\\)], v[int\\(n*0.99\\)], v[n]}')",
"Bash(awk '{v[NR]=$1} END {asort\\(v\\); n=length\\(v\\); printf \"slow_inf_count=%d \\(over %d total inferences = %.1f%%\\)\\\\nmedian=%.1fms max=%.1fms\\\\n\", n, 10456, 100.0*n/10456, v[int\\(n*0.5\\)], v[n]}')",
"Bash(awk '{v[NR]=$1} END {asort\\(v\\); n=length\\(v\\); if\\(n>0\\){printf \"slow_getImage_count=%d median=%.1fms max=%.1fms\\\\n\", n, v[int\\(n*0.5\\)], v[n]}}')",
"Bash(awk -F= '{print $2}')",
"Bash(git -C \"C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\" status --short engines/ONNXEngine/ONNXEngine.cpp)",
"Bash(git -C \"C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\" diff engines/ONNXEngine/ONNXEngine.cpp)",
"Bash(git -C \"C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\" status --short)",
"Bash(grep -E \"\\\\.\\(cpp|h\\)$\")"
] ]
} }
} }

View File

@@ -4,6 +4,7 @@
#include "lock.h" #include "lock.h"
#include "media_codec.h" #include "media_codec.h"
#include "media_parse.h" #include "media_parse.h"
#include <atomic>
#include <memory> #include <memory>
#include "ANSLicense.h" // ANS_DBG macro (gated by ANSCORE_DEBUGVIEW) #include "ANSLicense.h" // ANS_DBG macro (gated by ANSCORE_DEBUGVIEW)
@@ -14,6 +15,16 @@ extern "C" {
#include "libavutil/mem.h" #include "libavutil/mem.h"
} }
// ---------------------------------------------------------------------------
// Leak diagnostics — exported counters for media allocation balance.
// Incremented in allocation sites, decremented in free paths. If (alloc -
// free) climbs monotonically over time, the allocator is leaking.
// Read by the MEDIA_Leak heartbeat in video_player.cpp (every 60 s).
// ---------------------------------------------------------------------------
std::atomic<int64_t> g_contiguousAllocs{0};
std::atomic<int64_t> g_contiguousFrees{0};
std::atomic<int64_t> g_contiguousBytesInFlight{0}; // sum(total) of unfreed buffers
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Contiguous YUV420P allocator — trims per-call malloc overhead and enables // Contiguous YUV420P allocator — trims per-call malloc overhead and enables
// the zero-copy fast path in avframeYUV420PToCvMat for resolutions where the // the zero-copy fast path in avframeYUV420PToCvMat for resolutions where the
@@ -23,7 +34,20 @@ extern "C" {
// single-block layout still improves cache behaviour for the bulk memcpy.) // single-block layout still improves cache behaviour for the bulk memcpy.)
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
namespace { namespace {
void anscore_contiguous_free(void* /*opaque*/, uint8_t* data) { // Opaque payload stored in AVBufferRef so the free callback can account
// for the exact byte count being returned (no global lookup needed).
struct ContiguousOpaque {
size_t bytes;
};
void anscore_contiguous_free(void* opaque, uint8_t* data) {
if (opaque) {
auto* o = static_cast<ContiguousOpaque*>(opaque);
g_contiguousBytesInFlight.fetch_sub(static_cast<int64_t>(o->bytes),
std::memory_order_relaxed);
delete o;
}
g_contiguousFrees.fetch_add(1, std::memory_order_relaxed);
av_free(data); av_free(data);
} }
} }
@@ -77,13 +101,24 @@ int CVideoDecoder::contiguousGetBuffer2(AVCodecContext* s, AVFrame* frame, int f
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
} }
AVBufferRef* ref = av_buffer_create(buf, (int)total, auto* opaque = new (std::nothrow) ContiguousOpaque{total};
anscore_contiguous_free, nullptr, 0); if (!opaque) {
if (!ref) {
av_free(buf); av_free(buf);
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
} }
AVBufferRef* ref = av_buffer_create(buf, (int)total,
anscore_contiguous_free, opaque, 0);
if (!ref) {
delete opaque;
av_free(buf);
return AVERROR(ENOMEM);
}
g_contiguousAllocs.fetch_add(1, std::memory_order_relaxed);
g_contiguousBytesInFlight.fetch_add(static_cast<int64_t>(total),
std::memory_order_relaxed);
for (int i = 0; i < AV_NUM_DATA_POINTERS; ++i) { for (int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
frame->buf[i] = nullptr; frame->buf[i] = nullptr;
frame->data[i] = nullptr; frame->data[i] = nullptr;

View File

@@ -37,6 +37,22 @@ extern "C"
#include "ANSLicense.h" // ANS_DBG macro (gated by ANSCORE_DEBUGVIEW) #include "ANSLicense.h" // ANS_DBG macro (gated by ANSCORE_DEBUGVIEW)
// ---------------------------------------------------------------------------
// Leak diagnostics — definitions for counters declared extern in header.
// Also references counters defined in video_decoder.cpp so the heartbeat
// below can report media allocator balance in a single line.
// ---------------------------------------------------------------------------
std::atomic<int64_t> g_queueClones{0};
std::atomic<int64_t> g_queueFrees{0};
std::atomic<int64_t> g_nv12Clones{0};
std::atomic<int64_t> g_nv12Frees{0};
std::atomic<int64_t> g_cudaHWClones{0};
std::atomic<int64_t> g_cudaHWFrees{0};
extern std::atomic<int64_t> g_contiguousAllocs;
extern std::atomic<int64_t> g_contiguousFrees;
extern std::atomic<int64_t> g_contiguousBytesInFlight;
// libyuv: SIMD-accelerated YUV↔RGB conversion with native strided-plane input. // libyuv: SIMD-accelerated YUV↔RGB conversion with native strided-plane input.
// Replaces the memcpy-into-staging + cv::cvtColor(COLOR_YUV2BGR_I420) chain // Replaces the memcpy-into-staging + cv::cvtColor(COLOR_YUV2BGR_I420) chain
// in avframeYUV420PToCvMat with a direct I420→RGB24 (== OpenCV BGR memory // in avframeYUV420PToCvMat with a direct I420→RGB24 (== OpenCV BGR memory
@@ -1629,10 +1645,12 @@ void CVideoPlayer::close()
closeAudio(); closeAudio();
if (m_currentNV12Frame) { if (m_currentNV12Frame) {
av_frame_free(&m_currentNV12Frame); av_frame_free(&m_currentNV12Frame);
g_nv12Frees.fetch_add(1, std::memory_order_relaxed);
m_currentNV12Frame = nullptr; m_currentNV12Frame = nullptr;
} }
if (m_currentCudaHWFrame) { if (m_currentCudaHWFrame) {
av_frame_free(&m_currentCudaHWFrame); av_frame_free(&m_currentCudaHWFrame);
g_cudaHWFrees.fetch_add(1, std::memory_order_relaxed);
m_currentCudaHWFrame = nullptr; m_currentCudaHWFrame = nullptr;
} }
if (m_pSnapFrame) if (m_pSnapFrame)
@@ -2329,8 +2347,12 @@ void CVideoPlayer::onVideoFrame(AVFrame* frame)
// and we can safely clone the CUDA frame without deadlock risk. // and we can safely clone the CUDA frame without deadlock risk.
// cloneCudaHWFrame_unlocked() is safe because decoder._mutex is already held. // cloneCudaHWFrame_unlocked() is safe because decoder._mutex is already held.
if (m_pVideoDecoder && m_pVideoDecoder->isCudaHWAccel()) { if (m_pVideoDecoder && m_pVideoDecoder->isCudaHWAccel()) {
if (m_currentCudaHWFrame) av_frame_free(&m_currentCudaHWFrame); if (m_currentCudaHWFrame) {
av_frame_free(&m_currentCudaHWFrame);
g_cudaHWFrees.fetch_add(1, std::memory_order_relaxed);
}
m_currentCudaHWFrame = m_pVideoDecoder->cloneCudaHWFrame_unlocked(); m_currentCudaHWFrame = m_pVideoDecoder->cloneCudaHWFrame_unlocked();
if (m_currentCudaHWFrame) g_cudaHWClones.fetch_add(1, std::memory_order_relaxed);
} }
// Track how many clean frames have arrived since keyframe // Track how many clean frames have arrived since keyframe
@@ -2455,8 +2477,12 @@ cv::Mat CVideoPlayer::getImage(int& width, int& height, int64_t& pts) {
(frameToProcess->format == AV_PIX_FMT_NV12 || (frameToProcess->format == AV_PIX_FMT_NV12 ||
frameToProcess->format == AV_PIX_FMT_YUV420P || frameToProcess->format == AV_PIX_FMT_YUV420P ||
frameToProcess->format == AV_PIX_FMT_YUVJ420P)) { frameToProcess->format == AV_PIX_FMT_YUVJ420P)) {
if (m_currentNV12Frame) av_frame_free(&m_currentNV12Frame); if (m_currentNV12Frame) {
av_frame_free(&m_currentNV12Frame);
g_nv12Frees.fetch_add(1, std::memory_order_relaxed);
}
m_currentNV12Frame = av_frame_clone(frameToProcess); m_currentNV12Frame = av_frame_clone(frameToProcess);
if (m_currentNV12Frame) g_nv12Clones.fetch_add(1, std::memory_order_relaxed);
} }
width = m_currentImage.cols; width = m_currentImage.cols;
@@ -2466,6 +2492,49 @@ cv::Mat CVideoPlayer::getImage(int& width, int& height, int64_t& pts) {
} }
av_frame_free(&frameToProcess); av_frame_free(&frameToProcess);
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
// Leak diagnostics — one heartbeat every 60 s across the whole process.
// Each counter pair (allocs, frees) should stay balanced. A monotonic
// rise in (allocs - frees) identifies the leaking pool. Bytes field
// covers the ~12 MB/frame contiguous YUV420P buffers specifically —
// watch for steady climb while the counters look balanced (refcount
// leak in a held clone would show that shape).
{
using clk = std::chrono::steady_clock;
static std::atomic<long long> s_nextLeakLogTick{0};
const long long tick = clk::now().time_since_epoch().count();
long long expected = s_nextLeakLogTick.load(std::memory_order_relaxed);
if (tick >= expected) {
const long long deadline = tick +
std::chrono::duration_cast<clk::duration>(
std::chrono::seconds(60)).count();
// Claim the next window — first writer wins so only one thread logs.
if (s_nextLeakLogTick.compare_exchange_strong(
expected, deadline, std::memory_order_relaxed)) {
const int64_t qA = g_queueClones.load(std::memory_order_relaxed);
const int64_t qF = g_queueFrees.load(std::memory_order_relaxed);
const int64_t nvA = g_nv12Clones.load(std::memory_order_relaxed);
const int64_t nvF = g_nv12Frees.load(std::memory_order_relaxed);
const int64_t cuA = g_cudaHWClones.load(std::memory_order_relaxed);
const int64_t cuF = g_cudaHWFrees.load(std::memory_order_relaxed);
const int64_t cgA = g_contiguousAllocs.load(std::memory_order_relaxed);
const int64_t cgF = g_contiguousFrees.load(std::memory_order_relaxed);
const int64_t cgB = g_contiguousBytesInFlight.load(std::memory_order_relaxed);
ANS_DBG("MEDIA_Leak",
"queue(C=%lld F=%lld net=%lld depth=%zu) "
"nv12(C=%lld F=%lld net=%lld) "
"cudaHW(C=%lld F=%lld net=%lld) "
"contig(A=%lld F=%lld net=%lld bytesMB=%.1f)",
(long long)qA, (long long)qF, (long long)(qA - qF),
g_frameQueue.size(),
(long long)nvA, (long long)nvF, (long long)(nvA - nvF),
(long long)cuA, (long long)cuF, (long long)(cuA - cuF),
(long long)cgA, (long long)cgF, (long long)(cgA - cgF),
(double)cgB / (1024.0 * 1024.0));
}
}
}
// Emit timing breakdown. Throttled so DebugView / stderr stay readable. // Emit timing breakdown. Throttled so DebugView / stderr stay readable.
{ {
@@ -2540,11 +2609,13 @@ std::string CVideoPlayer::getJpegImage(int& width, int& height, int64_t& pts) {
catch (const std::exception& e) { catch (const std::exception& e) {
std::cerr << "Exception while converting AVFrame to JPEG string: " << e.what() << std::endl; std::cerr << "Exception while converting AVFrame to JPEG string: " << e.what() << std::endl;
av_frame_free(&frameToProcess); av_frame_free(&frameToProcess);
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
return m_lastJpegImage; return m_lastJpegImage;
} }
const auto t3 = clk::now(); const auto t3 = clk::now();
av_frame_free(&frameToProcess); av_frame_free(&frameToProcess);
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
if (m_pts < INT64_MAX) { if (m_pts < INT64_MAX) {
m_pts++; m_pts++;

View File

@@ -15,8 +15,18 @@
#include <opencv2/highgui.hpp> #include <opencv2/highgui.hpp>
#include <opencv2/opencv.hpp> #include <opencv2/opencv.hpp>
#include <turbojpeg.h> #include <turbojpeg.h>
#include <atomic>
#include <chrono> #include <chrono>
// Leak diagnostics — net counters surfaced in MEDIA_Leak heartbeat.
// Defined in video_player.cpp; also incremented from FrameQueue here.
extern std::atomic<int64_t> g_queueClones; // av_frame_clone from FrameQueue
extern std::atomic<int64_t> g_queueFrees; // av_frame_free from FrameQueue
extern std::atomic<int64_t> g_nv12Clones; // m_currentNV12Frame = av_frame_clone
extern std::atomic<int64_t> g_nv12Frees; // av_frame_free(&m_currentNV12Frame)
extern std::atomic<int64_t> g_cudaHWClones; // m_currentCudaHWFrame = clone
extern std::atomic<int64_t> g_cudaHWFrees; // av_frame_free(&m_currentCudaHWFrame)
typedef struct typedef struct
{ {
uint32 SyncTimestamp; uint32 SyncTimestamp;
@@ -46,6 +56,7 @@ public:
std::cerr << "Failed to clone AVFrame!" << std::endl; std::cerr << "Failed to clone AVFrame!" << std::endl;
return; return;
} }
g_queueClones.fetch_add(1, std::memory_order_relaxed);
frameQueue.push(frameCopy); frameQueue.push(frameCopy);
m_frameSeq++; // New frame arrived m_frameSeq++; // New frame arrived
@@ -55,6 +66,7 @@ public:
AVFrame* oldFrame = frameQueue.front(); AVFrame* oldFrame = frameQueue.front();
frameQueue.pop(); frameQueue.pop();
av_frame_free(&oldFrame); av_frame_free(&oldFrame);
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
} }
} }
@@ -73,7 +85,15 @@ public:
} }
// Clone the latest frame before returning it // Clone the latest frame before returning it
return av_frame_clone(frameQueue.back()); AVFrame* clone = av_frame_clone(frameQueue.back());
if (clone) g_queueClones.fetch_add(1, std::memory_order_relaxed);
return clone;
}
// Current depth — snapshot used by the leak heartbeat.
size_t size() {
std::lock_guard<std::mutex> lock(queueMutex);
return frameQueue.size();
} }
// Retrieve and remove the oldest frame from the queue // Retrieve and remove the oldest frame from the queue
@@ -102,6 +122,7 @@ public:
AVFrame* frame = frameQueue.front(); AVFrame* frame = frameQueue.front();
frameQueue.pop(); frameQueue.pop();
av_frame_free(&frame); av_frame_free(&frame);
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
} }
m_frameSeq = 0; m_frameSeq = 0;
} }

View File

@@ -1,8 +1,10 @@
#include "ONNXEngine.h" #include "ONNXEngine.h"
#include "EPLoader.h" #include "EPLoader.h"
#include "OpenVINODeviceConfig.h"
#include "Utility.h" #include "Utility.h"
#include <algorithm> #include <algorithm>
#include <cctype>
#include <limits> #include <limits>
#include <filesystem> #include <filesystem>
#include <fstream> #include <fstream>
@@ -318,8 +320,9 @@ namespace ANSCENTER {
std::vector<std::unordered_map<std::string, std::string>> try_configs; std::vector<std::unordered_map<std::string, std::string>> try_configs;
// Only try NPU if it hasn't been probed yet or was previously available // NPU is disabled by default — see OpenVINODeviceConfig.h. Opt in via
if (!s_npuProbed || s_npuAvailable) { // OPENVINO_ENABLE_NPU=1. Even when enabled, skip if a prior probe failed.
if (IsOpenVINONpuEnabled() && (!s_npuProbed || s_npuAvailable)) {
try_configs.push_back(makeConfig("AUTO:NPU,GPU")); try_configs.push_back(makeConfig("AUTO:NPU,GPU"));
} }
try_configs.push_back(makeConfig("GPU.0")); try_configs.push_back(makeConfig("GPU.0"));

View File

@@ -1,5 +1,6 @@
#include "ONNXSAM3.h" #include "ONNXSAM3.h"
#include "ONNXEngine.h" // OrtCompatiableGetInputName/OutputName helpers #include "ONNXEngine.h" // OrtCompatiableGetInputName/OutputName helpers
#include "OpenVINODeviceConfig.h"
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
@@ -73,11 +74,13 @@ namespace ANSCENTER
bool ONNXSAM3::TryAppendOpenVINO(Ort::SessionOptions& session_options) bool ONNXSAM3::TryAppendOpenVINO(Ort::SessionOptions& session_options)
{ {
std::vector<std::unordered_map<std::string, std::string>> configs = { // NPU gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h
{{"device_type","AUTO:NPU,GPU"},{"precision","FP16"},{"num_of_threads","4"},{"num_streams","4"}}, std::vector<std::unordered_map<std::string, std::string>> configs;
{{"device_type","GPU.0"}, {"precision","FP16"},{"num_of_threads","4"},{"num_streams","4"}}, if (IsOpenVINONpuEnabled()) {
{{"device_type","AUTO:GPU,CPU"},{"precision","FP16"},{"num_of_threads","4"},{"num_streams","4"}} configs.push_back({{"device_type","AUTO:NPU,GPU"},{"precision","FP16"},{"num_of_threads","4"},{"num_streams","4"}});
}; }
configs.push_back({{"device_type","GPU.0"}, {"precision","FP16"},{"num_of_threads","4"},{"num_streams","4"}});
configs.push_back({{"device_type","AUTO:GPU,CPU"},{"precision","FP16"},{"num_of_threads","4"},{"num_streams","4"}});
for (const auto& config : configs) { for (const auto& config : configs) {
try { try {
session_options.AppendExecutionProvider_OpenVINO_V2(config); session_options.AppendExecutionProvider_OpenVINO_V2(config);

View File

@@ -0,0 +1,38 @@
#pragma once
// Shared runtime switch for enabling the Intel NPU in OpenVINO code paths.
//
// NPU is DISABLED BY DEFAULT because the NPU plugin on some Intel platforms
// (observed: Core Ultra 9 285K / Arrow Lake) crashes inside
// ov::Core::compile_model or Ort::Session construction when compiling
// multiple ONNX models in quick succession. That failure mode cannot be
// caught by the surrounding try/catch (it fires on a plugin worker thread)
// and takes down the host process.
//
// To opt into NPU (e.g. on a machine with a known-good NPU driver), set the
// environment variable OPENVINO_ENABLE_NPU to 1 / true / yes / on before
// launching the host process.
//
// Every OpenVINO device-selection site in this codebase consults this helper
// rather than probing NPU unconditionally.
#include <algorithm>
#include <cctype>
#include <cstdlib>
#include <string>
namespace ANSCENTER {
inline bool IsOpenVINONpuEnabled() {
static const bool enabled = [] {
const char* v = std::getenv("OPENVINO_ENABLE_NPU");
if (!v || !*v) return false;
std::string s(v);
std::transform(s.begin(), s.end(), s.begin(),
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
return s == "1" || s == "true" || s == "yes" || s == "on";
}();
return enabled;
}
}

View File

@@ -1,4 +1,5 @@
#include "ANSFR.h" #include "ANSFR.h"
#include "OpenVINODeviceConfig.h"
#include <opencv2/imgcodecs.hpp> #include <opencv2/imgcodecs.hpp>
#include "ANSOVFaceDetector.h" #include "ANSOVFaceDetector.h"
#include "SCRFDFaceDetector.h" #include "SCRFDFaceDetector.h"
@@ -2695,8 +2696,12 @@ namespace ANSCENTER {
for (const auto& d : available_devices) { for (const auto& d : available_devices) {
ANS_DBG("ANSFR", " OpenVINO device: %s", d.c_str()); ANS_DBG("ANSFR", " OpenVINO device: %s", d.c_str());
} }
// Prioritize devices: NPU > GPU > CPU // Prioritize devices: NPU > GPU > CPU. NPU gated behind runtime switch
std::vector<std::string> priority_devices = { "NPU","GPU","CPU" }; // (OPENVINO_ENABLE_NPU=1) — see OpenVINODeviceConfig.h.
std::vector<std::string> priority_devices;
if (IsOpenVINONpuEnabled()) priority_devices.push_back("NPU");
priority_devices.push_back("GPU");
priority_devices.push_back("CPU");
for (const auto& device : priority_devices) { for (const auto& device : priority_devices) {
if (std::find(available_devices.begin(), available_devices.end(), device) != available_devices.end()) { if (std::find(available_devices.begin(), available_devices.end(), device) != available_devices.end()) {
ANS_DBG("ANSFR", "GetOpenVINODevice: selected %s", device.c_str()); ANS_DBG("ANSFR", "GetOpenVINODevice: selected %s", device.c_str());

View File

@@ -1,4 +1,5 @@
#include "ANSLPR_CPU.h" #include "ANSLPR_CPU.h"
#include "OpenVINODeviceConfig.h"
#include "ANSYOLOV10OVOD.h" #include "ANSYOLOV10OVOD.h"
#include "ANSOPENVINOOD.h" #include "ANSOPENVINOOD.h"
#include "ANSTENSORRTOD.h" #include "ANSTENSORRTOD.h"
@@ -119,8 +120,10 @@ namespace ANSCENTER {
std::vector<std::string> available_devices = _core.get_available_devices(); std::vector<std::string> available_devices = _core.get_available_devices();
bool device_found = false; bool device_found = false;
std::string deviceName = "CPU"; std::string deviceName = "CPU";
// Search for NPU // Search for NPU (gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h)
auto it = std::find(available_devices.begin(), available_devices.end(), "NPU"); auto it = IsOpenVINONpuEnabled()
? std::find(available_devices.begin(), available_devices.end(), "NPU")
: available_devices.end();
if (it != available_devices.end()) { if (it != available_devices.end()) {
_core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); _core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
_core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); _core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));

View File

@@ -1,4 +1,5 @@
#include "BYTETracker.h" #include "BYTETracker.h"
#include "ANSLicense.h" // ANS_DBG for tracker-state-size diagnostic
#include <algorithm> #include <algorithm>
#include <cstddef> #include <cstddef>
#include <limits> #include <limits>
@@ -322,6 +323,24 @@ std::vector<ByteTrack::BYTETracker::STrackPtr> ByteTrack::BYTETracker::update(co
tracked_stracks_ = tracked_stracks_out; tracked_stracks_ = tracked_stracks_out;
lost_stracks_ = lost_stracks_out; lost_stracks_ = lost_stracks_out;
// Diagnostic: report tracker state size at most once every 60 s per instance.
// removed_stracks_ is append-only in this implementation — watch it grow.
{
static thread_local std::chrono::steady_clock::time_point s_nextLog{};
auto now = std::chrono::steady_clock::now();
if (now >= s_nextLog) {
s_nextLog = now + std::chrono::seconds(60);
ANS_DBG("ANSMOT",
"BYTETracker state this=%p frame=%zu nextId=%zu tracked=%zu lost=%zu removed=%zu",
(void*)this,
frame_id_,
track_id_count_,
tracked_stracks_.size(),
lost_stracks_.size(),
removed_stracks_.size());
}
}
std::vector<STrackPtr> output_stracks; std::vector<STrackPtr> output_stracks;
for (const auto &track : tracked_stracks_) for (const auto &track : tracked_stracks_)
{ {

View File

@@ -1,5 +1,6 @@
#pragma once #pragma once
#include "ANSODEngine.h" #include "ANSODEngine.h"
#include "OpenVINODeviceConfig.h"
#include "ANSYOLOOD.h" #include "ANSYOLOOD.h"
#include "ANSTENSORRTOD.h" #include "ANSTENSORRTOD.h"
#include "ANSTENSORRTCL.h" #include "ANSTENSORRTCL.h"
@@ -333,8 +334,10 @@ namespace ANSCENTER
std::vector<std::string> available_devices = core.get_available_devices(); std::vector<std::string> available_devices = core.get_available_devices();
bool device_found = false; bool device_found = false;
std::string deviceName = "CPU"; std::string deviceName = "CPU";
// Search for NPU // Search for NPU (gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h)
auto it = std::find(available_devices.begin(), available_devices.end(), "NPU"); auto it = IsOpenVINONpuEnabled()
? std::find(available_devices.begin(), available_devices.end(), "NPU")
: available_devices.end();
if (it != available_devices.end()) { if (it != available_devices.end()) {
core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
@@ -1414,7 +1417,7 @@ namespace ANSCENTER
}; };
std::vector<std::unordered_map<std::string, std::string>> try_configs; std::vector<std::unordered_map<std::string, std::string>> try_configs;
if (!s_npuProbed || s_npuAvailable) { if (IsOpenVINONpuEnabled() && (!s_npuProbed || s_npuAvailable)) {
try_configs.push_back(makeConfig("AUTO:NPU,GPU")); try_configs.push_back(makeConfig("AUTO:NPU,GPU"));
} }
try_configs.push_back(makeConfig("GPU.0")); try_configs.push_back(makeConfig("GPU.0"));

View File

@@ -4,6 +4,7 @@
#include <json.hpp> #include <json.hpp>
#include "ANSODEngine.h" #include "ANSODEngine.h"
#include "ANSLicense.h" // ANS_DBG macro #include "ANSLicense.h" // ANS_DBG macro
#include "OpenVINODeviceConfig.h"
#include "ANSYOLOOD.h" #include "ANSYOLOOD.h"
#include "ANSTENSORRTOD.h" #include "ANSTENSORRTOD.h"
#include "ANSTENSORRTCL.h" #include "ANSTENSORRTCL.h"
@@ -354,8 +355,10 @@ namespace ANSCENTER
std::vector<std::string> available_devices = core.get_available_devices(); std::vector<std::string> available_devices = core.get_available_devices();
bool device_found = false; bool device_found = false;
std::string deviceName = "CPU"; std::string deviceName = "CPU";
// Search for NPU // Search for NPU (gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h)
auto it = std::find(available_devices.begin(), available_devices.end(), "NPU"); auto it = IsOpenVINONpuEnabled()
? std::find(available_devices.begin(), available_devices.end(), "NPU")
: available_devices.end();
if (it != available_devices.end()) { if (it != available_devices.end()) {
core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));

View File

@@ -1,5 +1,6 @@
#include"ANSONNXCL.h" #include"ANSONNXCL.h"
#include "EPLoader.h" #include "EPLoader.h"
#include "OpenVINODeviceConfig.h"
namespace ANSCENTER namespace ANSCENTER
{ {
@@ -143,20 +144,26 @@ namespace ANSCENTER
const std::string numberOfThreads = "1"; const std::string numberOfThreads = "1";
const std::string numberOfStreams = "1"; const std::string numberOfStreams = "1";
std::vector<std::unordered_map<std::string, std::string>> try_configs = { std::vector<std::unordered_map<std::string, std::string>> try_configs;
// NPU gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h
if (IsOpenVINONpuEnabled()) {
try_configs.push_back(
{ {"device_type","AUTO:NPU,GPU"}, {"precision",precision}, { {"device_type","AUTO:NPU,GPU"}, {"precision",precision},
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} }, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} });
}
try_configs.push_back(
{ {"device_type","GPU.0"}, {"precision",precision}, { {"device_type","GPU.0"}, {"precision",precision},
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} }, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} });
try_configs.push_back(
{ {"device_type","GPU.1"}, {"precision",precision}, { {"device_type","GPU.1"}, {"precision",precision},
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} }, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} });
try_configs.push_back(
{ {"device_type","AUTO:GPU,CPU"}, {"precision",precision}, { {"device_type","AUTO:GPU,CPU"}, {"precision",precision},
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} } {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} });
};
for (const auto& config : try_configs) { for (const auto& config : try_configs) {
try { try {

View File

@@ -1,5 +1,6 @@
#include "ANSOPENVINOCL.h" #include "ANSOPENVINOCL.h"
#include "Utility.h" #include "Utility.h"
#include "OpenVINODeviceConfig.h"
namespace ANSCENTER namespace ANSCENTER
{ {
bool OPENVINOCL::OptimizeModel(bool fp16, std::string& optimizedModelFolder) { bool OPENVINOCL::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
@@ -369,8 +370,10 @@ namespace ANSCENTER
std::vector<std::string> available_devices = core.get_available_devices(); std::vector<std::string> available_devices = core.get_available_devices();
bool device_found = false; bool device_found = false;
// Search for NPU // Search for NPU (gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h)
auto it = std::find(available_devices.begin(), available_devices.end(), "NPU"); auto it = IsOpenVINONpuEnabled()
? std::find(available_devices.begin(), available_devices.end(), "NPU")
: available_devices.end();
if (it != available_devices.end()) { if (it != available_devices.end()) {
core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));

View File

@@ -1,5 +1,6 @@
#include "ANSOPENVINOOD.h" #include "ANSOPENVINOOD.h"
#include "Utility.h" #include "Utility.h"
#include "OpenVINODeviceConfig.h"
namespace ANSCENTER namespace ANSCENTER
{ {
bool OPENVINOOD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) { bool OPENVINOOD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
@@ -437,8 +438,11 @@ namespace ANSCENTER
ov::Core core; ov::Core core;
// Step 2: Get Available Devices and Log // Step 2: Get Available Devices and Log
std::vector<std::string> available_devices = core.get_available_devices(); std::vector<std::string> available_devices = core.get_available_devices();
// Define device priority: NPU > GPU > CPU // Define device priority: NPU > GPU > CPU. NPU gated by
std::vector<std::string> priority_devices = { "NPU", "GPU" }; // OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h.
std::vector<std::string> priority_devices;
if (IsOpenVINONpuEnabled()) priority_devices.push_back("NPU");
priority_devices.push_back("GPU");
bool device_found = false; bool device_found = false;
// Iterate over prioritized devices // Iterate over prioritized devices

View File

@@ -1,4 +1,5 @@
#include "ANSOVSEG.h" #include "ANSOVSEG.h"
#include "OpenVINODeviceConfig.h"
namespace ANSCENTER { namespace ANSCENTER {
bool ANSOVSEG::OptimizeModel(bool fp16, std::string& optimizedModelFolder) { bool ANSOVSEG::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
std::lock_guard<std::recursive_mutex> lock(_mutex); std::lock_guard<std::recursive_mutex> lock(_mutex);
@@ -493,8 +494,11 @@ namespace ANSCENTER {
ov::Core core; ov::Core core;
// Step 2: Get Available Devices and Log // Step 2: Get Available Devices and Log
std::vector<std::string> available_devices = core.get_available_devices(); std::vector<std::string> available_devices = core.get_available_devices();
// Define device priority: NPU > GPU > CPU // Define device priority: NPU > GPU > CPU. NPU gated by
std::vector<std::string> priority_devices = { "NPU", "GPU" }; // OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h.
std::vector<std::string> priority_devices;
if (IsOpenVINONpuEnabled()) priority_devices.push_back("NPU");
priority_devices.push_back("GPU");
bool device_found = false; bool device_found = false;
// Iterate over prioritized devices // Iterate over prioritized devices

View File

@@ -1,5 +1,6 @@
#include "ANSYOLO12OD.h" #include "ANSYOLO12OD.h"
#include "EPLoader.h" #include "EPLoader.h"
#include "OpenVINODeviceConfig.h"
#ifdef USEONNXOV #ifdef USEONNXOV
#endif #endif
@@ -365,20 +366,26 @@ namespace ANSCENTER {
const std::string numberOfThreads = "8"; const std::string numberOfThreads = "8";
const std::string numberOfStreams = "8"; const std::string numberOfStreams = "8";
std::vector<std::unordered_map<std::string, std::string>> try_configs = { std::vector<std::unordered_map<std::string, std::string>> try_configs;
// NPU gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h
if (IsOpenVINONpuEnabled()) {
try_configs.push_back(
{ {"device_type","AUTO:NPU,GPU"}, {"precision",precision}, { {"device_type","AUTO:NPU,GPU"}, {"precision",precision},
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
}
try_configs.push_back(
{ {"device_type","GPU.0"}, {"precision",precision}, { {"device_type","GPU.0"}, {"precision",precision},
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
try_configs.push_back(
{ {"device_type","GPU.1"}, {"precision",precision}, { {"device_type","GPU.1"}, {"precision",precision},
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
try_configs.push_back(
{ {"device_type","AUTO:GPU,CPU"}, {"precision",precision}, { {"device_type","AUTO:GPU,CPU"}, {"precision",precision},
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} } {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
};
for (const auto& config : try_configs) { for (const auto& config : try_configs) {
try { try {

View File

@@ -1,6 +1,7 @@
#include "ANSYOLOOD.h" #include "ANSYOLOOD.h"
#include "Utility.h" #include "Utility.h"
#include "EPLoader.h" #include "EPLoader.h"
#include "OpenVINODeviceConfig.h"
#include "ANSGpuFrameRegistry.h" #include "ANSGpuFrameRegistry.h"
#include "NV12PreprocessHelper.h" // tl_currentGpuFrame() #include "NV12PreprocessHelper.h" // tl_currentGpuFrame()
#ifdef USEONNXOV #ifdef USEONNXOV
@@ -303,20 +304,26 @@ namespace ANSCENTER
const std::string numberOfThreads = "8"; const std::string numberOfThreads = "8";
const std::string numberOfStreams = "8"; const std::string numberOfStreams = "8";
std::vector<std::unordered_map<std::string, std::string>> try_configs = { std::vector<std::unordered_map<std::string, std::string>> try_configs;
// NPU gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h
if (IsOpenVINONpuEnabled()) {
try_configs.push_back(
{ {"device_type","AUTO:NPU,GPU"}, {"precision",precision}, { {"device_type","AUTO:NPU,GPU"}, {"precision",precision},
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
}
try_configs.push_back(
{ {"device_type","GPU.0"}, {"precision",precision}, { {"device_type","GPU.0"}, {"precision",precision},
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
try_configs.push_back(
{ {"device_type","GPU.1"}, {"precision",precision}, { {"device_type","GPU.1"}, {"precision",precision},
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }, {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
try_configs.push_back(
{ {"device_type","AUTO:GPU,CPU"}, {"precision",precision}, { {"device_type","AUTO:GPU,CPU"}, {"precision",precision},
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams}, {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} } {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
};
for (const auto& config : try_configs) { for (const auto& config : try_configs) {
try { try {