Enable log information. Disable NPU in U9
This commit is contained in:
@@ -13,7 +13,18 @@
|
||||
"Bash(powershell.exe -NoProfile -Command \"[System.Environment]::GetEnvironmentVariable\\('PATH','Machine'\\) -split ';' | Select-String -Pattern 'ANSCENTER|Shared'\")",
|
||||
"Bash(cmd.exe //c 'dir /AL \"C:\\\\Program Files\\\\ANSCENTER\\\\ANSVIS\\\\data\" 2>&1 | findstr /i \"junction symlink\"')",
|
||||
"Bash(cmd.exe //c 'dir /AL \"C:\\\\Program Files\\\\ANSCENTER\\\\ANSVIS\\\\data\"')",
|
||||
"PowerShell(Get-ChildItem \"C:\\\\Program Files\\\\ANSCENTER\\\\ANSVIS\\\\data\" -Force | Where-Object { $_.LinkType } | Select-Object Name, LinkType, Target | Format-Table -AutoSize)"
|
||||
"PowerShell(Get-ChildItem \"C:\\\\Program Files\\\\ANSCENTER\\\\ANSVIS\\\\data\" -Force | Where-Object { $_.LinkType } | Select-Object Name, LinkType, Target | Format-Table -AutoSize)",
|
||||
"Bash(awk '{print \"start: \"$2\"s\"}')",
|
||||
"Bash(awk '{print \"end: \"$2\"s\"}')",
|
||||
"Bash(awk '{ *)",
|
||||
"Bash(awk '{v[NR]=$1} END {asort\\(v\\); n=length\\(v\\); printf \"count=%d\\\\nmedian=%.1fms\\\\np90=%.1fms\\\\np95=%.1fms\\\\np99=%.1fms\\\\nmax=%.1fms\\\\n\", n, v[int\\(n*0.5\\)], v[int\\(n*0.9\\)], v[int\\(n*0.95\\)], v[int\\(n*0.99\\)], v[n]}')",
|
||||
"Bash(awk '{v[NR]=$1} END {asort\\(v\\); n=length\\(v\\); printf \"slow_inf_count=%d \\(over %d total inferences = %.1f%%\\)\\\\nmedian=%.1fms max=%.1fms\\\\n\", n, 10456, 100.0*n/10456, v[int\\(n*0.5\\)], v[n]}')",
|
||||
"Bash(awk '{v[NR]=$1} END {asort\\(v\\); n=length\\(v\\); if\\(n>0\\){printf \"slow_getImage_count=%d median=%.1fms max=%.1fms\\\\n\", n, v[int\\(n*0.5\\)], v[n]}}')",
|
||||
"Bash(awk -F= '{print $2}')",
|
||||
"Bash(git -C \"C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\" status --short engines/ONNXEngine/ONNXEngine.cpp)",
|
||||
"Bash(git -C \"C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\" diff engines/ONNXEngine/ONNXEngine.cpp)",
|
||||
"Bash(git -C \"C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\" status --short)",
|
||||
"Bash(grep -E \"\\\\.\\(cpp|h\\)$\")"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include "lock.h"
|
||||
#include "media_codec.h"
|
||||
#include "media_parse.h"
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
|
||||
#include "ANSLicense.h" // ANS_DBG macro (gated by ANSCORE_DEBUGVIEW)
|
||||
@@ -14,6 +15,16 @@ extern "C" {
|
||||
#include "libavutil/mem.h"
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Leak diagnostics — exported counters for media allocation balance.
|
||||
// Incremented in allocation sites, decremented in free paths. If (alloc -
|
||||
// free) climbs monotonically over time, the allocator is leaking.
|
||||
// Read by the MEDIA_Leak heartbeat in video_player.cpp (every 60 s).
|
||||
// ---------------------------------------------------------------------------
|
||||
std::atomic<int64_t> g_contiguousAllocs{0};
|
||||
std::atomic<int64_t> g_contiguousFrees{0};
|
||||
std::atomic<int64_t> g_contiguousBytesInFlight{0}; // sum(total) of unfreed buffers
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Contiguous YUV420P allocator — trims per-call malloc overhead and enables
|
||||
// the zero-copy fast path in avframeYUV420PToCvMat for resolutions where the
|
||||
@@ -23,7 +34,20 @@ extern "C" {
|
||||
// single-block layout still improves cache behaviour for the bulk memcpy.)
|
||||
// ---------------------------------------------------------------------------
|
||||
namespace {
|
||||
void anscore_contiguous_free(void* /*opaque*/, uint8_t* data) {
|
||||
// Opaque payload stored in AVBufferRef so the free callback can account
|
||||
// for the exact byte count being returned (no global lookup needed).
|
||||
struct ContiguousOpaque {
|
||||
size_t bytes;
|
||||
};
|
||||
|
||||
void anscore_contiguous_free(void* opaque, uint8_t* data) {
|
||||
if (opaque) {
|
||||
auto* o = static_cast<ContiguousOpaque*>(opaque);
|
||||
g_contiguousBytesInFlight.fetch_sub(static_cast<int64_t>(o->bytes),
|
||||
std::memory_order_relaxed);
|
||||
delete o;
|
||||
}
|
||||
g_contiguousFrees.fetch_add(1, std::memory_order_relaxed);
|
||||
av_free(data);
|
||||
}
|
||||
}
|
||||
@@ -77,13 +101,24 @@ int CVideoDecoder::contiguousGetBuffer2(AVCodecContext* s, AVFrame* frame, int f
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
AVBufferRef* ref = av_buffer_create(buf, (int)total,
|
||||
anscore_contiguous_free, nullptr, 0);
|
||||
if (!ref) {
|
||||
auto* opaque = new (std::nothrow) ContiguousOpaque{total};
|
||||
if (!opaque) {
|
||||
av_free(buf);
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
AVBufferRef* ref = av_buffer_create(buf, (int)total,
|
||||
anscore_contiguous_free, opaque, 0);
|
||||
if (!ref) {
|
||||
delete opaque;
|
||||
av_free(buf);
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
g_contiguousAllocs.fetch_add(1, std::memory_order_relaxed);
|
||||
g_contiguousBytesInFlight.fetch_add(static_cast<int64_t>(total),
|
||||
std::memory_order_relaxed);
|
||||
|
||||
for (int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
|
||||
frame->buf[i] = nullptr;
|
||||
frame->data[i] = nullptr;
|
||||
|
||||
@@ -37,6 +37,22 @@ extern "C"
|
||||
|
||||
#include "ANSLicense.h" // ANS_DBG macro (gated by ANSCORE_DEBUGVIEW)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Leak diagnostics — definitions for counters declared extern in header.
|
||||
// Also references counters defined in video_decoder.cpp so the heartbeat
|
||||
// below can report media allocator balance in a single line.
|
||||
// ---------------------------------------------------------------------------
|
||||
std::atomic<int64_t> g_queueClones{0};
|
||||
std::atomic<int64_t> g_queueFrees{0};
|
||||
std::atomic<int64_t> g_nv12Clones{0};
|
||||
std::atomic<int64_t> g_nv12Frees{0};
|
||||
std::atomic<int64_t> g_cudaHWClones{0};
|
||||
std::atomic<int64_t> g_cudaHWFrees{0};
|
||||
|
||||
extern std::atomic<int64_t> g_contiguousAllocs;
|
||||
extern std::atomic<int64_t> g_contiguousFrees;
|
||||
extern std::atomic<int64_t> g_contiguousBytesInFlight;
|
||||
|
||||
// libyuv: SIMD-accelerated YUV↔RGB conversion with native strided-plane input.
|
||||
// Replaces the memcpy-into-staging + cv::cvtColor(COLOR_YUV2BGR_I420) chain
|
||||
// in avframeYUV420PToCvMat with a direct I420→RGB24 (== OpenCV BGR memory
|
||||
@@ -1629,10 +1645,12 @@ void CVideoPlayer::close()
|
||||
closeAudio();
|
||||
if (m_currentNV12Frame) {
|
||||
av_frame_free(&m_currentNV12Frame);
|
||||
g_nv12Frees.fetch_add(1, std::memory_order_relaxed);
|
||||
m_currentNV12Frame = nullptr;
|
||||
}
|
||||
if (m_currentCudaHWFrame) {
|
||||
av_frame_free(&m_currentCudaHWFrame);
|
||||
g_cudaHWFrees.fetch_add(1, std::memory_order_relaxed);
|
||||
m_currentCudaHWFrame = nullptr;
|
||||
}
|
||||
if (m_pSnapFrame)
|
||||
@@ -2329,8 +2347,12 @@ void CVideoPlayer::onVideoFrame(AVFrame* frame)
|
||||
// and we can safely clone the CUDA frame without deadlock risk.
|
||||
// cloneCudaHWFrame_unlocked() is safe because decoder._mutex is already held.
|
||||
if (m_pVideoDecoder && m_pVideoDecoder->isCudaHWAccel()) {
|
||||
if (m_currentCudaHWFrame) av_frame_free(&m_currentCudaHWFrame);
|
||||
if (m_currentCudaHWFrame) {
|
||||
av_frame_free(&m_currentCudaHWFrame);
|
||||
g_cudaHWFrees.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
m_currentCudaHWFrame = m_pVideoDecoder->cloneCudaHWFrame_unlocked();
|
||||
if (m_currentCudaHWFrame) g_cudaHWClones.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
// Track how many clean frames have arrived since keyframe
|
||||
@@ -2455,8 +2477,12 @@ cv::Mat CVideoPlayer::getImage(int& width, int& height, int64_t& pts) {
|
||||
(frameToProcess->format == AV_PIX_FMT_NV12 ||
|
||||
frameToProcess->format == AV_PIX_FMT_YUV420P ||
|
||||
frameToProcess->format == AV_PIX_FMT_YUVJ420P)) {
|
||||
if (m_currentNV12Frame) av_frame_free(&m_currentNV12Frame);
|
||||
if (m_currentNV12Frame) {
|
||||
av_frame_free(&m_currentNV12Frame);
|
||||
g_nv12Frees.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
m_currentNV12Frame = av_frame_clone(frameToProcess);
|
||||
if (m_currentNV12Frame) g_nv12Clones.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
width = m_currentImage.cols;
|
||||
@@ -2466,6 +2492,49 @@ cv::Mat CVideoPlayer::getImage(int& width, int& height, int64_t& pts) {
|
||||
}
|
||||
|
||||
av_frame_free(&frameToProcess);
|
||||
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
// Leak diagnostics — one heartbeat every 60 s across the whole process.
|
||||
// Each counter pair (allocs, frees) should stay balanced. A monotonic
|
||||
// rise in (allocs - frees) identifies the leaking pool. Bytes field
|
||||
// covers the ~12 MB/frame contiguous YUV420P buffers specifically —
|
||||
// watch for steady climb while the counters look balanced (refcount
|
||||
// leak in a held clone would show that shape).
|
||||
{
|
||||
using clk = std::chrono::steady_clock;
|
||||
static std::atomic<long long> s_nextLeakLogTick{0};
|
||||
const long long tick = clk::now().time_since_epoch().count();
|
||||
long long expected = s_nextLeakLogTick.load(std::memory_order_relaxed);
|
||||
if (tick >= expected) {
|
||||
const long long deadline = tick +
|
||||
std::chrono::duration_cast<clk::duration>(
|
||||
std::chrono::seconds(60)).count();
|
||||
// Claim the next window — first writer wins so only one thread logs.
|
||||
if (s_nextLeakLogTick.compare_exchange_strong(
|
||||
expected, deadline, std::memory_order_relaxed)) {
|
||||
const int64_t qA = g_queueClones.load(std::memory_order_relaxed);
|
||||
const int64_t qF = g_queueFrees.load(std::memory_order_relaxed);
|
||||
const int64_t nvA = g_nv12Clones.load(std::memory_order_relaxed);
|
||||
const int64_t nvF = g_nv12Frees.load(std::memory_order_relaxed);
|
||||
const int64_t cuA = g_cudaHWClones.load(std::memory_order_relaxed);
|
||||
const int64_t cuF = g_cudaHWFrees.load(std::memory_order_relaxed);
|
||||
const int64_t cgA = g_contiguousAllocs.load(std::memory_order_relaxed);
|
||||
const int64_t cgF = g_contiguousFrees.load(std::memory_order_relaxed);
|
||||
const int64_t cgB = g_contiguousBytesInFlight.load(std::memory_order_relaxed);
|
||||
ANS_DBG("MEDIA_Leak",
|
||||
"queue(C=%lld F=%lld net=%lld depth=%zu) "
|
||||
"nv12(C=%lld F=%lld net=%lld) "
|
||||
"cudaHW(C=%lld F=%lld net=%lld) "
|
||||
"contig(A=%lld F=%lld net=%lld bytesMB=%.1f)",
|
||||
(long long)qA, (long long)qF, (long long)(qA - qF),
|
||||
g_frameQueue.size(),
|
||||
(long long)nvA, (long long)nvF, (long long)(nvA - nvF),
|
||||
(long long)cuA, (long long)cuF, (long long)(cuA - cuF),
|
||||
(long long)cgA, (long long)cgF, (long long)(cgA - cgF),
|
||||
(double)cgB / (1024.0 * 1024.0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Emit timing breakdown. Throttled so DebugView / stderr stay readable.
|
||||
{
|
||||
@@ -2540,11 +2609,13 @@ std::string CVideoPlayer::getJpegImage(int& width, int& height, int64_t& pts) {
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "Exception while converting AVFrame to JPEG string: " << e.what() << std::endl;
|
||||
av_frame_free(&frameToProcess);
|
||||
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
|
||||
return m_lastJpegImage;
|
||||
}
|
||||
const auto t3 = clk::now();
|
||||
|
||||
av_frame_free(&frameToProcess);
|
||||
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
if (m_pts < INT64_MAX) {
|
||||
m_pts++;
|
||||
|
||||
@@ -15,8 +15,18 @@
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <turbojpeg.h>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
|
||||
// Leak diagnostics — net counters surfaced in MEDIA_Leak heartbeat.
|
||||
// Defined in video_player.cpp; also incremented from FrameQueue here.
|
||||
extern std::atomic<int64_t> g_queueClones; // av_frame_clone from FrameQueue
|
||||
extern std::atomic<int64_t> g_queueFrees; // av_frame_free from FrameQueue
|
||||
extern std::atomic<int64_t> g_nv12Clones; // m_currentNV12Frame = av_frame_clone
|
||||
extern std::atomic<int64_t> g_nv12Frees; // av_frame_free(&m_currentNV12Frame)
|
||||
extern std::atomic<int64_t> g_cudaHWClones; // m_currentCudaHWFrame = clone
|
||||
extern std::atomic<int64_t> g_cudaHWFrees; // av_frame_free(&m_currentCudaHWFrame)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32 SyncTimestamp;
|
||||
@@ -46,6 +56,7 @@ public:
|
||||
std::cerr << "Failed to clone AVFrame!" << std::endl;
|
||||
return;
|
||||
}
|
||||
g_queueClones.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
frameQueue.push(frameCopy);
|
||||
m_frameSeq++; // New frame arrived
|
||||
@@ -55,6 +66,7 @@ public:
|
||||
AVFrame* oldFrame = frameQueue.front();
|
||||
frameQueue.pop();
|
||||
av_frame_free(&oldFrame);
|
||||
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,7 +85,15 @@ public:
|
||||
}
|
||||
|
||||
// Clone the latest frame before returning it
|
||||
return av_frame_clone(frameQueue.back());
|
||||
AVFrame* clone = av_frame_clone(frameQueue.back());
|
||||
if (clone) g_queueClones.fetch_add(1, std::memory_order_relaxed);
|
||||
return clone;
|
||||
}
|
||||
|
||||
// Current depth — snapshot used by the leak heartbeat.
|
||||
size_t size() {
|
||||
std::lock_guard<std::mutex> lock(queueMutex);
|
||||
return frameQueue.size();
|
||||
}
|
||||
|
||||
// Retrieve and remove the oldest frame from the queue
|
||||
@@ -102,6 +122,7 @@ public:
|
||||
AVFrame* frame = frameQueue.front();
|
||||
frameQueue.pop();
|
||||
av_frame_free(&frame);
|
||||
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
m_frameSeq = 0;
|
||||
}
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
#include "ONNXEngine.h"
|
||||
#include "EPLoader.h"
|
||||
#include "OpenVINODeviceConfig.h"
|
||||
#include "Utility.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <limits>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
@@ -318,8 +320,9 @@ namespace ANSCENTER {
|
||||
|
||||
std::vector<std::unordered_map<std::string, std::string>> try_configs;
|
||||
|
||||
// Only try NPU if it hasn't been probed yet or was previously available
|
||||
if (!s_npuProbed || s_npuAvailable) {
|
||||
// NPU is disabled by default — see OpenVINODeviceConfig.h. Opt in via
|
||||
// OPENVINO_ENABLE_NPU=1. Even when enabled, skip if a prior probe failed.
|
||||
if (IsOpenVINONpuEnabled() && (!s_npuProbed || s_npuAvailable)) {
|
||||
try_configs.push_back(makeConfig("AUTO:NPU,GPU"));
|
||||
}
|
||||
try_configs.push_back(makeConfig("GPU.0"));
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#include "ONNXSAM3.h"
|
||||
#include "ONNXEngine.h" // OrtCompatiableGetInputName/OutputName helpers
|
||||
#include "OpenVINODeviceConfig.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
@@ -73,11 +74,13 @@ namespace ANSCENTER
|
||||
|
||||
bool ONNXSAM3::TryAppendOpenVINO(Ort::SessionOptions& session_options)
|
||||
{
|
||||
std::vector<std::unordered_map<std::string, std::string>> configs = {
|
||||
{{"device_type","AUTO:NPU,GPU"},{"precision","FP16"},{"num_of_threads","4"},{"num_streams","4"}},
|
||||
{{"device_type","GPU.0"}, {"precision","FP16"},{"num_of_threads","4"},{"num_streams","4"}},
|
||||
{{"device_type","AUTO:GPU,CPU"},{"precision","FP16"},{"num_of_threads","4"},{"num_streams","4"}}
|
||||
};
|
||||
// NPU gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h
|
||||
std::vector<std::unordered_map<std::string, std::string>> configs;
|
||||
if (IsOpenVINONpuEnabled()) {
|
||||
configs.push_back({{"device_type","AUTO:NPU,GPU"},{"precision","FP16"},{"num_of_threads","4"},{"num_streams","4"}});
|
||||
}
|
||||
configs.push_back({{"device_type","GPU.0"}, {"precision","FP16"},{"num_of_threads","4"},{"num_streams","4"}});
|
||||
configs.push_back({{"device_type","AUTO:GPU,CPU"},{"precision","FP16"},{"num_of_threads","4"},{"num_streams","4"}});
|
||||
for (const auto& config : configs) {
|
||||
try {
|
||||
session_options.AppendExecutionProvider_OpenVINO_V2(config);
|
||||
|
||||
38
engines/ONNXEngine/OpenVINODeviceConfig.h
Normal file
38
engines/ONNXEngine/OpenVINODeviceConfig.h
Normal file
@@ -0,0 +1,38 @@
|
||||
#pragma once
|
||||
|
||||
// Shared runtime switch for enabling the Intel NPU in OpenVINO code paths.
|
||||
//
|
||||
// NPU is DISABLED BY DEFAULT because the NPU plugin on some Intel platforms
|
||||
// (observed: Core Ultra 9 285K / Arrow Lake) crashes inside
|
||||
// ov::Core::compile_model or Ort::Session construction when compiling
|
||||
// multiple ONNX models in quick succession. That failure mode cannot be
|
||||
// caught by the surrounding try/catch (it fires on a plugin worker thread)
|
||||
// and takes down the host process.
|
||||
//
|
||||
// To opt into NPU (e.g. on a machine with a known-good NPU driver), set the
|
||||
// environment variable OPENVINO_ENABLE_NPU to 1 / true / yes / on before
|
||||
// launching the host process.
|
||||
//
|
||||
// Every OpenVINO device-selection site in this codebase consults this helper
|
||||
// rather than probing NPU unconditionally.
|
||||
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
|
||||
namespace ANSCENTER {
|
||||
|
||||
inline bool IsOpenVINONpuEnabled() {
|
||||
static const bool enabled = [] {
|
||||
const char* v = std::getenv("OPENVINO_ENABLE_NPU");
|
||||
if (!v || !*v) return false;
|
||||
std::string s(v);
|
||||
std::transform(s.begin(), s.end(), s.begin(),
|
||||
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
|
||||
return s == "1" || s == "true" || s == "yes" || s == "on";
|
||||
}();
|
||||
return enabled;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
#include "ANSFR.h"
|
||||
#include "OpenVINODeviceConfig.h"
|
||||
#include <opencv2/imgcodecs.hpp>
|
||||
#include "ANSOVFaceDetector.h"
|
||||
#include "SCRFDFaceDetector.h"
|
||||
@@ -2695,8 +2696,12 @@ namespace ANSCENTER {
|
||||
for (const auto& d : available_devices) {
|
||||
ANS_DBG("ANSFR", " OpenVINO device: %s", d.c_str());
|
||||
}
|
||||
// Prioritize devices: NPU > GPU > CPU
|
||||
std::vector<std::string> priority_devices = { "NPU","GPU","CPU" };
|
||||
// Prioritize devices: NPU > GPU > CPU. NPU gated behind runtime switch
|
||||
// (OPENVINO_ENABLE_NPU=1) — see OpenVINODeviceConfig.h.
|
||||
std::vector<std::string> priority_devices;
|
||||
if (IsOpenVINONpuEnabled()) priority_devices.push_back("NPU");
|
||||
priority_devices.push_back("GPU");
|
||||
priority_devices.push_back("CPU");
|
||||
for (const auto& device : priority_devices) {
|
||||
if (std::find(available_devices.begin(), available_devices.end(), device) != available_devices.end()) {
|
||||
ANS_DBG("ANSFR", "GetOpenVINODevice: selected %s", device.c_str());
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#include "ANSLPR_CPU.h"
|
||||
#include "OpenVINODeviceConfig.h"
|
||||
#include "ANSYOLOV10OVOD.h"
|
||||
#include "ANSOPENVINOOD.h"
|
||||
#include "ANSTENSORRTOD.h"
|
||||
@@ -119,8 +120,10 @@ namespace ANSCENTER {
|
||||
std::vector<std::string> available_devices = _core.get_available_devices();
|
||||
bool device_found = false;
|
||||
std::string deviceName = "CPU";
|
||||
// Search for NPU
|
||||
auto it = std::find(available_devices.begin(), available_devices.end(), "NPU");
|
||||
// Search for NPU (gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h)
|
||||
auto it = IsOpenVINONpuEnabled()
|
||||
? std::find(available_devices.begin(), available_devices.end(), "NPU")
|
||||
: available_devices.end();
|
||||
if (it != available_devices.end()) {
|
||||
_core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
||||
_core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#include "BYTETracker.h"
|
||||
#include "ANSLicense.h" // ANS_DBG for tracker-state-size diagnostic
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <limits>
|
||||
@@ -322,6 +323,24 @@ std::vector<ByteTrack::BYTETracker::STrackPtr> ByteTrack::BYTETracker::update(co
|
||||
tracked_stracks_ = tracked_stracks_out;
|
||||
lost_stracks_ = lost_stracks_out;
|
||||
|
||||
// Diagnostic: report tracker state size at most once every 60 s per instance.
|
||||
// removed_stracks_ is append-only in this implementation — watch it grow.
|
||||
{
|
||||
static thread_local std::chrono::steady_clock::time_point s_nextLog{};
|
||||
auto now = std::chrono::steady_clock::now();
|
||||
if (now >= s_nextLog) {
|
||||
s_nextLog = now + std::chrono::seconds(60);
|
||||
ANS_DBG("ANSMOT",
|
||||
"BYTETracker state this=%p frame=%zu nextId=%zu tracked=%zu lost=%zu removed=%zu",
|
||||
(void*)this,
|
||||
frame_id_,
|
||||
track_id_count_,
|
||||
tracked_stracks_.size(),
|
||||
lost_stracks_.size(),
|
||||
removed_stracks_.size());
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<STrackPtr> output_stracks;
|
||||
for (const auto &track : tracked_stracks_)
|
||||
{
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
#include "ANSODEngine.h"
|
||||
#include "OpenVINODeviceConfig.h"
|
||||
#include "ANSYOLOOD.h"
|
||||
#include "ANSTENSORRTOD.h"
|
||||
#include "ANSTENSORRTCL.h"
|
||||
@@ -333,8 +334,10 @@ namespace ANSCENTER
|
||||
std::vector<std::string> available_devices = core.get_available_devices();
|
||||
bool device_found = false;
|
||||
std::string deviceName = "CPU";
|
||||
// Search for NPU
|
||||
auto it = std::find(available_devices.begin(), available_devices.end(), "NPU");
|
||||
// Search for NPU (gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h)
|
||||
auto it = IsOpenVINONpuEnabled()
|
||||
? std::find(available_devices.begin(), available_devices.end(), "NPU")
|
||||
: available_devices.end();
|
||||
if (it != available_devices.end()) {
|
||||
core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
||||
core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
||||
@@ -1414,7 +1417,7 @@ namespace ANSCENTER
|
||||
};
|
||||
|
||||
std::vector<std::unordered_map<std::string, std::string>> try_configs;
|
||||
if (!s_npuProbed || s_npuAvailable) {
|
||||
if (IsOpenVINONpuEnabled() && (!s_npuProbed || s_npuAvailable)) {
|
||||
try_configs.push_back(makeConfig("AUTO:NPU,GPU"));
|
||||
}
|
||||
try_configs.push_back(makeConfig("GPU.0"));
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include <json.hpp>
|
||||
#include "ANSODEngine.h"
|
||||
#include "ANSLicense.h" // ANS_DBG macro
|
||||
#include "OpenVINODeviceConfig.h"
|
||||
#include "ANSYOLOOD.h"
|
||||
#include "ANSTENSORRTOD.h"
|
||||
#include "ANSTENSORRTCL.h"
|
||||
@@ -354,8 +355,10 @@ namespace ANSCENTER
|
||||
std::vector<std::string> available_devices = core.get_available_devices();
|
||||
bool device_found = false;
|
||||
std::string deviceName = "CPU";
|
||||
// Search for NPU
|
||||
auto it = std::find(available_devices.begin(), available_devices.end(), "NPU");
|
||||
// Search for NPU (gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h)
|
||||
auto it = IsOpenVINONpuEnabled()
|
||||
? std::find(available_devices.begin(), available_devices.end(), "NPU")
|
||||
: available_devices.end();
|
||||
if (it != available_devices.end()) {
|
||||
core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
||||
core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#include"ANSONNXCL.h"
|
||||
#include "EPLoader.h"
|
||||
#include "OpenVINODeviceConfig.h"
|
||||
namespace ANSCENTER
|
||||
{
|
||||
|
||||
@@ -143,20 +144,26 @@ namespace ANSCENTER
|
||||
const std::string numberOfThreads = "1";
|
||||
const std::string numberOfStreams = "1";
|
||||
|
||||
std::vector<std::unordered_map<std::string, std::string>> try_configs = {
|
||||
{ {"device_type","AUTO:NPU,GPU"}, {"precision",precision},
|
||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} },
|
||||
std::vector<std::unordered_map<std::string, std::string>> try_configs;
|
||||
// NPU gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h
|
||||
if (IsOpenVINONpuEnabled()) {
|
||||
try_configs.push_back(
|
||||
{ {"device_type","AUTO:NPU,GPU"}, {"precision",precision},
|
||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} });
|
||||
}
|
||||
try_configs.push_back(
|
||||
{ {"device_type","GPU.0"}, {"precision",precision},
|
||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} },
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} });
|
||||
try_configs.push_back(
|
||||
{ {"device_type","GPU.1"}, {"precision",precision},
|
||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} },
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} });
|
||||
try_configs.push_back(
|
||||
{ {"device_type","AUTO:GPU,CPU"}, {"precision",precision},
|
||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} }
|
||||
};
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} });
|
||||
|
||||
for (const auto& config : try_configs) {
|
||||
try {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#include "ANSOPENVINOCL.h"
|
||||
#include "Utility.h"
|
||||
#include "OpenVINODeviceConfig.h"
|
||||
namespace ANSCENTER
|
||||
{
|
||||
bool OPENVINOCL::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
|
||||
@@ -369,8 +370,10 @@ namespace ANSCENTER
|
||||
std::vector<std::string> available_devices = core.get_available_devices();
|
||||
bool device_found = false;
|
||||
|
||||
// Search for NPU
|
||||
auto it = std::find(available_devices.begin(), available_devices.end(), "NPU");
|
||||
// Search for NPU (gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h)
|
||||
auto it = IsOpenVINONpuEnabled()
|
||||
? std::find(available_devices.begin(), available_devices.end(), "NPU")
|
||||
: available_devices.end();
|
||||
if (it != available_devices.end()) {
|
||||
core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
||||
core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#include "ANSOPENVINOOD.h"
|
||||
#include "Utility.h"
|
||||
#include "OpenVINODeviceConfig.h"
|
||||
namespace ANSCENTER
|
||||
{
|
||||
bool OPENVINOOD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
|
||||
@@ -437,8 +438,11 @@ namespace ANSCENTER
|
||||
ov::Core core;
|
||||
// Step 2: Get Available Devices and Log
|
||||
std::vector<std::string> available_devices = core.get_available_devices();
|
||||
// Define device priority: NPU > GPU > CPU
|
||||
std::vector<std::string> priority_devices = { "NPU", "GPU" };
|
||||
// Define device priority: NPU > GPU > CPU. NPU gated by
|
||||
// OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h.
|
||||
std::vector<std::string> priority_devices;
|
||||
if (IsOpenVINONpuEnabled()) priority_devices.push_back("NPU");
|
||||
priority_devices.push_back("GPU");
|
||||
bool device_found = false;
|
||||
|
||||
// Iterate over prioritized devices
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#include "ANSOVSEG.h"
|
||||
#include "OpenVINODeviceConfig.h"
|
||||
namespace ANSCENTER {
|
||||
bool ANSOVSEG::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
@@ -493,8 +494,11 @@ namespace ANSCENTER {
|
||||
ov::Core core;
|
||||
// Step 2: Get Available Devices and Log
|
||||
std::vector<std::string> available_devices = core.get_available_devices();
|
||||
// Define device priority: NPU > GPU > CPU
|
||||
std::vector<std::string> priority_devices = { "NPU", "GPU" };
|
||||
// Define device priority: NPU > GPU > CPU. NPU gated by
|
||||
// OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h.
|
||||
std::vector<std::string> priority_devices;
|
||||
if (IsOpenVINONpuEnabled()) priority_devices.push_back("NPU");
|
||||
priority_devices.push_back("GPU");
|
||||
bool device_found = false;
|
||||
|
||||
// Iterate over prioritized devices
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#include "ANSYOLO12OD.h"
|
||||
#include "EPLoader.h"
|
||||
#include "OpenVINODeviceConfig.h"
|
||||
#ifdef USEONNXOV
|
||||
#endif
|
||||
|
||||
@@ -365,20 +366,26 @@ namespace ANSCENTER {
|
||||
const std::string numberOfThreads = "8";
|
||||
const std::string numberOfStreams = "8";
|
||||
|
||||
std::vector<std::unordered_map<std::string, std::string>> try_configs = {
|
||||
{ {"device_type","AUTO:NPU,GPU"}, {"precision",precision},
|
||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
||||
std::vector<std::unordered_map<std::string, std::string>> try_configs;
|
||||
// NPU gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h
|
||||
if (IsOpenVINONpuEnabled()) {
|
||||
try_configs.push_back(
|
||||
{ {"device_type","AUTO:NPU,GPU"}, {"precision",precision},
|
||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
|
||||
}
|
||||
try_configs.push_back(
|
||||
{ {"device_type","GPU.0"}, {"precision",precision},
|
||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
|
||||
try_configs.push_back(
|
||||
{ {"device_type","GPU.1"}, {"precision",precision},
|
||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
|
||||
try_configs.push_back(
|
||||
{ {"device_type","AUTO:GPU,CPU"}, {"precision",precision},
|
||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }
|
||||
};
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
|
||||
|
||||
for (const auto& config : try_configs) {
|
||||
try {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#include "ANSYOLOOD.h"
|
||||
#include "Utility.h"
|
||||
#include "EPLoader.h"
|
||||
#include "OpenVINODeviceConfig.h"
|
||||
#include "ANSGpuFrameRegistry.h"
|
||||
#include "NV12PreprocessHelper.h" // tl_currentGpuFrame()
|
||||
#ifdef USEONNXOV
|
||||
@@ -303,20 +304,26 @@ namespace ANSCENTER
|
||||
const std::string numberOfThreads = "8";
|
||||
const std::string numberOfStreams = "8";
|
||||
|
||||
std::vector<std::unordered_map<std::string, std::string>> try_configs = {
|
||||
{ {"device_type","AUTO:NPU,GPU"}, {"precision",precision},
|
||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
||||
std::vector<std::unordered_map<std::string, std::string>> try_configs;
|
||||
// NPU gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h
|
||||
if (IsOpenVINONpuEnabled()) {
|
||||
try_configs.push_back(
|
||||
{ {"device_type","AUTO:NPU,GPU"}, {"precision",precision},
|
||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
|
||||
}
|
||||
try_configs.push_back(
|
||||
{ {"device_type","GPU.0"}, {"precision",precision},
|
||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
|
||||
try_configs.push_back(
|
||||
{ {"device_type","GPU.1"}, {"precision",precision},
|
||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
|
||||
try_configs.push_back(
|
||||
{ {"device_type","AUTO:GPU,CPU"}, {"precision",precision},
|
||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }
|
||||
};
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
|
||||
|
||||
for (const auto& config : try_configs) {
|
||||
try {
|
||||
|
||||
Reference in New Issue
Block a user