Enable log information. Disable NPU in U9
This commit is contained in:
@@ -13,7 +13,18 @@
|
|||||||
"Bash(powershell.exe -NoProfile -Command \"[System.Environment]::GetEnvironmentVariable\\('PATH','Machine'\\) -split ';' | Select-String -Pattern 'ANSCENTER|Shared'\")",
|
"Bash(powershell.exe -NoProfile -Command \"[System.Environment]::GetEnvironmentVariable\\('PATH','Machine'\\) -split ';' | Select-String -Pattern 'ANSCENTER|Shared'\")",
|
||||||
"Bash(cmd.exe //c 'dir /AL \"C:\\\\Program Files\\\\ANSCENTER\\\\ANSVIS\\\\data\" 2>&1 | findstr /i \"junction symlink\"')",
|
"Bash(cmd.exe //c 'dir /AL \"C:\\\\Program Files\\\\ANSCENTER\\\\ANSVIS\\\\data\" 2>&1 | findstr /i \"junction symlink\"')",
|
||||||
"Bash(cmd.exe //c 'dir /AL \"C:\\\\Program Files\\\\ANSCENTER\\\\ANSVIS\\\\data\"')",
|
"Bash(cmd.exe //c 'dir /AL \"C:\\\\Program Files\\\\ANSCENTER\\\\ANSVIS\\\\data\"')",
|
||||||
"PowerShell(Get-ChildItem \"C:\\\\Program Files\\\\ANSCENTER\\\\ANSVIS\\\\data\" -Force | Where-Object { $_.LinkType } | Select-Object Name, LinkType, Target | Format-Table -AutoSize)"
|
"PowerShell(Get-ChildItem \"C:\\\\Program Files\\\\ANSCENTER\\\\ANSVIS\\\\data\" -Force | Where-Object { $_.LinkType } | Select-Object Name, LinkType, Target | Format-Table -AutoSize)",
|
||||||
|
"Bash(awk '{print \"start: \"$2\"s\"}')",
|
||||||
|
"Bash(awk '{print \"end: \"$2\"s\"}')",
|
||||||
|
"Bash(awk '{ *)",
|
||||||
|
"Bash(awk '{v[NR]=$1} END {asort\\(v\\); n=length\\(v\\); printf \"count=%d\\\\nmedian=%.1fms\\\\np90=%.1fms\\\\np95=%.1fms\\\\np99=%.1fms\\\\nmax=%.1fms\\\\n\", n, v[int\\(n*0.5\\)], v[int\\(n*0.9\\)], v[int\\(n*0.95\\)], v[int\\(n*0.99\\)], v[n]}')",
|
||||||
|
"Bash(awk '{v[NR]=$1} END {asort\\(v\\); n=length\\(v\\); printf \"slow_inf_count=%d \\(over %d total inferences = %.1f%%\\)\\\\nmedian=%.1fms max=%.1fms\\\\n\", n, 10456, 100.0*n/10456, v[int\\(n*0.5\\)], v[n]}')",
|
||||||
|
"Bash(awk '{v[NR]=$1} END {asort\\(v\\); n=length\\(v\\); if\\(n>0\\){printf \"slow_getImage_count=%d median=%.1fms max=%.1fms\\\\n\", n, v[int\\(n*0.5\\)], v[n]}}')",
|
||||||
|
"Bash(awk -F= '{print $2}')",
|
||||||
|
"Bash(git -C \"C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\" status --short engines/ONNXEngine/ONNXEngine.cpp)",
|
||||||
|
"Bash(git -C \"C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\" diff engines/ONNXEngine/ONNXEngine.cpp)",
|
||||||
|
"Bash(git -C \"C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\" status --short)",
|
||||||
|
"Bash(grep -E \"\\\\.\\(cpp|h\\)$\")"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
#include "lock.h"
|
#include "lock.h"
|
||||||
#include "media_codec.h"
|
#include "media_codec.h"
|
||||||
#include "media_parse.h"
|
#include "media_parse.h"
|
||||||
|
#include <atomic>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
#include "ANSLicense.h" // ANS_DBG macro (gated by ANSCORE_DEBUGVIEW)
|
#include "ANSLicense.h" // ANS_DBG macro (gated by ANSCORE_DEBUGVIEW)
|
||||||
@@ -14,6 +15,16 @@ extern "C" {
|
|||||||
#include "libavutil/mem.h"
|
#include "libavutil/mem.h"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Leak diagnostics — exported counters for media allocation balance.
|
||||||
|
// Incremented in allocation sites, decremented in free paths. If (alloc -
|
||||||
|
// free) climbs monotonically over time, the allocator is leaking.
|
||||||
|
// Read by the MEDIA_Leak heartbeat in video_player.cpp (every 60 s).
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
std::atomic<int64_t> g_contiguousAllocs{0};
|
||||||
|
std::atomic<int64_t> g_contiguousFrees{0};
|
||||||
|
std::atomic<int64_t> g_contiguousBytesInFlight{0}; // sum(total) of unfreed buffers
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Contiguous YUV420P allocator — trims per-call malloc overhead and enables
|
// Contiguous YUV420P allocator — trims per-call malloc overhead and enables
|
||||||
// the zero-copy fast path in avframeYUV420PToCvMat for resolutions where the
|
// the zero-copy fast path in avframeYUV420PToCvMat for resolutions where the
|
||||||
@@ -23,7 +34,20 @@ extern "C" {
|
|||||||
// single-block layout still improves cache behaviour for the bulk memcpy.)
|
// single-block layout still improves cache behaviour for the bulk memcpy.)
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
namespace {
|
namespace {
|
||||||
void anscore_contiguous_free(void* /*opaque*/, uint8_t* data) {
|
// Opaque payload stored in AVBufferRef so the free callback can account
|
||||||
|
// for the exact byte count being returned (no global lookup needed).
|
||||||
|
struct ContiguousOpaque {
|
||||||
|
size_t bytes;
|
||||||
|
};
|
||||||
|
|
||||||
|
void anscore_contiguous_free(void* opaque, uint8_t* data) {
|
||||||
|
if (opaque) {
|
||||||
|
auto* o = static_cast<ContiguousOpaque*>(opaque);
|
||||||
|
g_contiguousBytesInFlight.fetch_sub(static_cast<int64_t>(o->bytes),
|
||||||
|
std::memory_order_relaxed);
|
||||||
|
delete o;
|
||||||
|
}
|
||||||
|
g_contiguousFrees.fetch_add(1, std::memory_order_relaxed);
|
||||||
av_free(data);
|
av_free(data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -77,13 +101,24 @@ int CVideoDecoder::contiguousGetBuffer2(AVCodecContext* s, AVFrame* frame, int f
|
|||||||
return AVERROR(ENOMEM);
|
return AVERROR(ENOMEM);
|
||||||
}
|
}
|
||||||
|
|
||||||
AVBufferRef* ref = av_buffer_create(buf, (int)total,
|
auto* opaque = new (std::nothrow) ContiguousOpaque{total};
|
||||||
anscore_contiguous_free, nullptr, 0);
|
if (!opaque) {
|
||||||
if (!ref) {
|
|
||||||
av_free(buf);
|
av_free(buf);
|
||||||
return AVERROR(ENOMEM);
|
return AVERROR(ENOMEM);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AVBufferRef* ref = av_buffer_create(buf, (int)total,
|
||||||
|
anscore_contiguous_free, opaque, 0);
|
||||||
|
if (!ref) {
|
||||||
|
delete opaque;
|
||||||
|
av_free(buf);
|
||||||
|
return AVERROR(ENOMEM);
|
||||||
|
}
|
||||||
|
|
||||||
|
g_contiguousAllocs.fetch_add(1, std::memory_order_relaxed);
|
||||||
|
g_contiguousBytesInFlight.fetch_add(static_cast<int64_t>(total),
|
||||||
|
std::memory_order_relaxed);
|
||||||
|
|
||||||
for (int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
|
for (int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
|
||||||
frame->buf[i] = nullptr;
|
frame->buf[i] = nullptr;
|
||||||
frame->data[i] = nullptr;
|
frame->data[i] = nullptr;
|
||||||
|
|||||||
@@ -37,6 +37,22 @@ extern "C"
|
|||||||
|
|
||||||
#include "ANSLicense.h" // ANS_DBG macro (gated by ANSCORE_DEBUGVIEW)
|
#include "ANSLicense.h" // ANS_DBG macro (gated by ANSCORE_DEBUGVIEW)
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Leak diagnostics — definitions for counters declared extern in header.
|
||||||
|
// Also references counters defined in video_decoder.cpp so the heartbeat
|
||||||
|
// below can report media allocator balance in a single line.
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
std::atomic<int64_t> g_queueClones{0};
|
||||||
|
std::atomic<int64_t> g_queueFrees{0};
|
||||||
|
std::atomic<int64_t> g_nv12Clones{0};
|
||||||
|
std::atomic<int64_t> g_nv12Frees{0};
|
||||||
|
std::atomic<int64_t> g_cudaHWClones{0};
|
||||||
|
std::atomic<int64_t> g_cudaHWFrees{0};
|
||||||
|
|
||||||
|
extern std::atomic<int64_t> g_contiguousAllocs;
|
||||||
|
extern std::atomic<int64_t> g_contiguousFrees;
|
||||||
|
extern std::atomic<int64_t> g_contiguousBytesInFlight;
|
||||||
|
|
||||||
// libyuv: SIMD-accelerated YUV↔RGB conversion with native strided-plane input.
|
// libyuv: SIMD-accelerated YUV↔RGB conversion with native strided-plane input.
|
||||||
// Replaces the memcpy-into-staging + cv::cvtColor(COLOR_YUV2BGR_I420) chain
|
// Replaces the memcpy-into-staging + cv::cvtColor(COLOR_YUV2BGR_I420) chain
|
||||||
// in avframeYUV420PToCvMat with a direct I420→RGB24 (== OpenCV BGR memory
|
// in avframeYUV420PToCvMat with a direct I420→RGB24 (== OpenCV BGR memory
|
||||||
@@ -1629,10 +1645,12 @@ void CVideoPlayer::close()
|
|||||||
closeAudio();
|
closeAudio();
|
||||||
if (m_currentNV12Frame) {
|
if (m_currentNV12Frame) {
|
||||||
av_frame_free(&m_currentNV12Frame);
|
av_frame_free(&m_currentNV12Frame);
|
||||||
|
g_nv12Frees.fetch_add(1, std::memory_order_relaxed);
|
||||||
m_currentNV12Frame = nullptr;
|
m_currentNV12Frame = nullptr;
|
||||||
}
|
}
|
||||||
if (m_currentCudaHWFrame) {
|
if (m_currentCudaHWFrame) {
|
||||||
av_frame_free(&m_currentCudaHWFrame);
|
av_frame_free(&m_currentCudaHWFrame);
|
||||||
|
g_cudaHWFrees.fetch_add(1, std::memory_order_relaxed);
|
||||||
m_currentCudaHWFrame = nullptr;
|
m_currentCudaHWFrame = nullptr;
|
||||||
}
|
}
|
||||||
if (m_pSnapFrame)
|
if (m_pSnapFrame)
|
||||||
@@ -2329,8 +2347,12 @@ void CVideoPlayer::onVideoFrame(AVFrame* frame)
|
|||||||
// and we can safely clone the CUDA frame without deadlock risk.
|
// and we can safely clone the CUDA frame without deadlock risk.
|
||||||
// cloneCudaHWFrame_unlocked() is safe because decoder._mutex is already held.
|
// cloneCudaHWFrame_unlocked() is safe because decoder._mutex is already held.
|
||||||
if (m_pVideoDecoder && m_pVideoDecoder->isCudaHWAccel()) {
|
if (m_pVideoDecoder && m_pVideoDecoder->isCudaHWAccel()) {
|
||||||
if (m_currentCudaHWFrame) av_frame_free(&m_currentCudaHWFrame);
|
if (m_currentCudaHWFrame) {
|
||||||
|
av_frame_free(&m_currentCudaHWFrame);
|
||||||
|
g_cudaHWFrees.fetch_add(1, std::memory_order_relaxed);
|
||||||
|
}
|
||||||
m_currentCudaHWFrame = m_pVideoDecoder->cloneCudaHWFrame_unlocked();
|
m_currentCudaHWFrame = m_pVideoDecoder->cloneCudaHWFrame_unlocked();
|
||||||
|
if (m_currentCudaHWFrame) g_cudaHWClones.fetch_add(1, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Track how many clean frames have arrived since keyframe
|
// Track how many clean frames have arrived since keyframe
|
||||||
@@ -2455,8 +2477,12 @@ cv::Mat CVideoPlayer::getImage(int& width, int& height, int64_t& pts) {
|
|||||||
(frameToProcess->format == AV_PIX_FMT_NV12 ||
|
(frameToProcess->format == AV_PIX_FMT_NV12 ||
|
||||||
frameToProcess->format == AV_PIX_FMT_YUV420P ||
|
frameToProcess->format == AV_PIX_FMT_YUV420P ||
|
||||||
frameToProcess->format == AV_PIX_FMT_YUVJ420P)) {
|
frameToProcess->format == AV_PIX_FMT_YUVJ420P)) {
|
||||||
if (m_currentNV12Frame) av_frame_free(&m_currentNV12Frame);
|
if (m_currentNV12Frame) {
|
||||||
|
av_frame_free(&m_currentNV12Frame);
|
||||||
|
g_nv12Frees.fetch_add(1, std::memory_order_relaxed);
|
||||||
|
}
|
||||||
m_currentNV12Frame = av_frame_clone(frameToProcess);
|
m_currentNV12Frame = av_frame_clone(frameToProcess);
|
||||||
|
if (m_currentNV12Frame) g_nv12Clones.fetch_add(1, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
width = m_currentImage.cols;
|
width = m_currentImage.cols;
|
||||||
@@ -2466,6 +2492,49 @@ cv::Mat CVideoPlayer::getImage(int& width, int& height, int64_t& pts) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
av_frame_free(&frameToProcess);
|
av_frame_free(&frameToProcess);
|
||||||
|
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
|
||||||
|
|
||||||
|
// Leak diagnostics — one heartbeat every 60 s across the whole process.
|
||||||
|
// Each counter pair (allocs, frees) should stay balanced. A monotonic
|
||||||
|
// rise in (allocs - frees) identifies the leaking pool. Bytes field
|
||||||
|
// covers the ~12 MB/frame contiguous YUV420P buffers specifically —
|
||||||
|
// watch for steady climb while the counters look balanced (refcount
|
||||||
|
// leak in a held clone would show that shape).
|
||||||
|
{
|
||||||
|
using clk = std::chrono::steady_clock;
|
||||||
|
static std::atomic<long long> s_nextLeakLogTick{0};
|
||||||
|
const long long tick = clk::now().time_since_epoch().count();
|
||||||
|
long long expected = s_nextLeakLogTick.load(std::memory_order_relaxed);
|
||||||
|
if (tick >= expected) {
|
||||||
|
const long long deadline = tick +
|
||||||
|
std::chrono::duration_cast<clk::duration>(
|
||||||
|
std::chrono::seconds(60)).count();
|
||||||
|
// Claim the next window — first writer wins so only one thread logs.
|
||||||
|
if (s_nextLeakLogTick.compare_exchange_strong(
|
||||||
|
expected, deadline, std::memory_order_relaxed)) {
|
||||||
|
const int64_t qA = g_queueClones.load(std::memory_order_relaxed);
|
||||||
|
const int64_t qF = g_queueFrees.load(std::memory_order_relaxed);
|
||||||
|
const int64_t nvA = g_nv12Clones.load(std::memory_order_relaxed);
|
||||||
|
const int64_t nvF = g_nv12Frees.load(std::memory_order_relaxed);
|
||||||
|
const int64_t cuA = g_cudaHWClones.load(std::memory_order_relaxed);
|
||||||
|
const int64_t cuF = g_cudaHWFrees.load(std::memory_order_relaxed);
|
||||||
|
const int64_t cgA = g_contiguousAllocs.load(std::memory_order_relaxed);
|
||||||
|
const int64_t cgF = g_contiguousFrees.load(std::memory_order_relaxed);
|
||||||
|
const int64_t cgB = g_contiguousBytesInFlight.load(std::memory_order_relaxed);
|
||||||
|
ANS_DBG("MEDIA_Leak",
|
||||||
|
"queue(C=%lld F=%lld net=%lld depth=%zu) "
|
||||||
|
"nv12(C=%lld F=%lld net=%lld) "
|
||||||
|
"cudaHW(C=%lld F=%lld net=%lld) "
|
||||||
|
"contig(A=%lld F=%lld net=%lld bytesMB=%.1f)",
|
||||||
|
(long long)qA, (long long)qF, (long long)(qA - qF),
|
||||||
|
g_frameQueue.size(),
|
||||||
|
(long long)nvA, (long long)nvF, (long long)(nvA - nvF),
|
||||||
|
(long long)cuA, (long long)cuF, (long long)(cuA - cuF),
|
||||||
|
(long long)cgA, (long long)cgF, (long long)(cgA - cgF),
|
||||||
|
(double)cgB / (1024.0 * 1024.0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Emit timing breakdown. Throttled so DebugView / stderr stay readable.
|
// Emit timing breakdown. Throttled so DebugView / stderr stay readable.
|
||||||
{
|
{
|
||||||
@@ -2540,11 +2609,13 @@ std::string CVideoPlayer::getJpegImage(int& width, int& height, int64_t& pts) {
|
|||||||
catch (const std::exception& e) {
|
catch (const std::exception& e) {
|
||||||
std::cerr << "Exception while converting AVFrame to JPEG string: " << e.what() << std::endl;
|
std::cerr << "Exception while converting AVFrame to JPEG string: " << e.what() << std::endl;
|
||||||
av_frame_free(&frameToProcess);
|
av_frame_free(&frameToProcess);
|
||||||
|
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
|
||||||
return m_lastJpegImage;
|
return m_lastJpegImage;
|
||||||
}
|
}
|
||||||
const auto t3 = clk::now();
|
const auto t3 = clk::now();
|
||||||
|
|
||||||
av_frame_free(&frameToProcess);
|
av_frame_free(&frameToProcess);
|
||||||
|
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
|
||||||
|
|
||||||
if (m_pts < INT64_MAX) {
|
if (m_pts < INT64_MAX) {
|
||||||
m_pts++;
|
m_pts++;
|
||||||
|
|||||||
@@ -15,8 +15,18 @@
|
|||||||
#include <opencv2/highgui.hpp>
|
#include <opencv2/highgui.hpp>
|
||||||
#include <opencv2/opencv.hpp>
|
#include <opencv2/opencv.hpp>
|
||||||
#include <turbojpeg.h>
|
#include <turbojpeg.h>
|
||||||
|
#include <atomic>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
|
||||||
|
// Leak diagnostics — net counters surfaced in MEDIA_Leak heartbeat.
|
||||||
|
// Defined in video_player.cpp; also incremented from FrameQueue here.
|
||||||
|
extern std::atomic<int64_t> g_queueClones; // av_frame_clone from FrameQueue
|
||||||
|
extern std::atomic<int64_t> g_queueFrees; // av_frame_free from FrameQueue
|
||||||
|
extern std::atomic<int64_t> g_nv12Clones; // m_currentNV12Frame = av_frame_clone
|
||||||
|
extern std::atomic<int64_t> g_nv12Frees; // av_frame_free(&m_currentNV12Frame)
|
||||||
|
extern std::atomic<int64_t> g_cudaHWClones; // m_currentCudaHWFrame = clone
|
||||||
|
extern std::atomic<int64_t> g_cudaHWFrees; // av_frame_free(&m_currentCudaHWFrame)
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
uint32 SyncTimestamp;
|
uint32 SyncTimestamp;
|
||||||
@@ -46,6 +56,7 @@ public:
|
|||||||
std::cerr << "Failed to clone AVFrame!" << std::endl;
|
std::cerr << "Failed to clone AVFrame!" << std::endl;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
g_queueClones.fetch_add(1, std::memory_order_relaxed);
|
||||||
|
|
||||||
frameQueue.push(frameCopy);
|
frameQueue.push(frameCopy);
|
||||||
m_frameSeq++; // New frame arrived
|
m_frameSeq++; // New frame arrived
|
||||||
@@ -55,6 +66,7 @@ public:
|
|||||||
AVFrame* oldFrame = frameQueue.front();
|
AVFrame* oldFrame = frameQueue.front();
|
||||||
frameQueue.pop();
|
frameQueue.pop();
|
||||||
av_frame_free(&oldFrame);
|
av_frame_free(&oldFrame);
|
||||||
|
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -73,7 +85,15 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Clone the latest frame before returning it
|
// Clone the latest frame before returning it
|
||||||
return av_frame_clone(frameQueue.back());
|
AVFrame* clone = av_frame_clone(frameQueue.back());
|
||||||
|
if (clone) g_queueClones.fetch_add(1, std::memory_order_relaxed);
|
||||||
|
return clone;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Current depth — snapshot used by the leak heartbeat.
|
||||||
|
size_t size() {
|
||||||
|
std::lock_guard<std::mutex> lock(queueMutex);
|
||||||
|
return frameQueue.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Retrieve and remove the oldest frame from the queue
|
// Retrieve and remove the oldest frame from the queue
|
||||||
@@ -102,6 +122,7 @@ public:
|
|||||||
AVFrame* frame = frameQueue.front();
|
AVFrame* frame = frameQueue.front();
|
||||||
frameQueue.pop();
|
frameQueue.pop();
|
||||||
av_frame_free(&frame);
|
av_frame_free(&frame);
|
||||||
|
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
m_frameSeq = 0;
|
m_frameSeq = 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
#include "ONNXEngine.h"
|
#include "ONNXEngine.h"
|
||||||
#include "EPLoader.h"
|
#include "EPLoader.h"
|
||||||
|
#include "OpenVINODeviceConfig.h"
|
||||||
#include "Utility.h"
|
#include "Utility.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <cctype>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
@@ -318,8 +320,9 @@ namespace ANSCENTER {
|
|||||||
|
|
||||||
std::vector<std::unordered_map<std::string, std::string>> try_configs;
|
std::vector<std::unordered_map<std::string, std::string>> try_configs;
|
||||||
|
|
||||||
// Only try NPU if it hasn't been probed yet or was previously available
|
// NPU is disabled by default — see OpenVINODeviceConfig.h. Opt in via
|
||||||
if (!s_npuProbed || s_npuAvailable) {
|
// OPENVINO_ENABLE_NPU=1. Even when enabled, skip if a prior probe failed.
|
||||||
|
if (IsOpenVINONpuEnabled() && (!s_npuProbed || s_npuAvailable)) {
|
||||||
try_configs.push_back(makeConfig("AUTO:NPU,GPU"));
|
try_configs.push_back(makeConfig("AUTO:NPU,GPU"));
|
||||||
}
|
}
|
||||||
try_configs.push_back(makeConfig("GPU.0"));
|
try_configs.push_back(makeConfig("GPU.0"));
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
#include "ONNXSAM3.h"
|
#include "ONNXSAM3.h"
|
||||||
#include "ONNXEngine.h" // OrtCompatiableGetInputName/OutputName helpers
|
#include "ONNXEngine.h" // OrtCompatiableGetInputName/OutputName helpers
|
||||||
|
#include "OpenVINODeviceConfig.h"
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
@@ -73,11 +74,13 @@ namespace ANSCENTER
|
|||||||
|
|
||||||
bool ONNXSAM3::TryAppendOpenVINO(Ort::SessionOptions& session_options)
|
bool ONNXSAM3::TryAppendOpenVINO(Ort::SessionOptions& session_options)
|
||||||
{
|
{
|
||||||
std::vector<std::unordered_map<std::string, std::string>> configs = {
|
// NPU gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h
|
||||||
{{"device_type","AUTO:NPU,GPU"},{"precision","FP16"},{"num_of_threads","4"},{"num_streams","4"}},
|
std::vector<std::unordered_map<std::string, std::string>> configs;
|
||||||
{{"device_type","GPU.0"}, {"precision","FP16"},{"num_of_threads","4"},{"num_streams","4"}},
|
if (IsOpenVINONpuEnabled()) {
|
||||||
{{"device_type","AUTO:GPU,CPU"},{"precision","FP16"},{"num_of_threads","4"},{"num_streams","4"}}
|
configs.push_back({{"device_type","AUTO:NPU,GPU"},{"precision","FP16"},{"num_of_threads","4"},{"num_streams","4"}});
|
||||||
};
|
}
|
||||||
|
configs.push_back({{"device_type","GPU.0"}, {"precision","FP16"},{"num_of_threads","4"},{"num_streams","4"}});
|
||||||
|
configs.push_back({{"device_type","AUTO:GPU,CPU"},{"precision","FP16"},{"num_of_threads","4"},{"num_streams","4"}});
|
||||||
for (const auto& config : configs) {
|
for (const auto& config : configs) {
|
||||||
try {
|
try {
|
||||||
session_options.AppendExecutionProvider_OpenVINO_V2(config);
|
session_options.AppendExecutionProvider_OpenVINO_V2(config);
|
||||||
|
|||||||
38
engines/ONNXEngine/OpenVINODeviceConfig.h
Normal file
38
engines/ONNXEngine/OpenVINODeviceConfig.h
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
// Shared runtime switch for enabling the Intel NPU in OpenVINO code paths.
|
||||||
|
//
|
||||||
|
// NPU is DISABLED BY DEFAULT because the NPU plugin on some Intel platforms
|
||||||
|
// (observed: Core Ultra 9 285K / Arrow Lake) crashes inside
|
||||||
|
// ov::Core::compile_model or Ort::Session construction when compiling
|
||||||
|
// multiple ONNX models in quick succession. That failure mode cannot be
|
||||||
|
// caught by the surrounding try/catch (it fires on a plugin worker thread)
|
||||||
|
// and takes down the host process.
|
||||||
|
//
|
||||||
|
// To opt into NPU (e.g. on a machine with a known-good NPU driver), set the
|
||||||
|
// environment variable OPENVINO_ENABLE_NPU to 1 / true / yes / on before
|
||||||
|
// launching the host process.
|
||||||
|
//
|
||||||
|
// Every OpenVINO device-selection site in this codebase consults this helper
|
||||||
|
// rather than probing NPU unconditionally.
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cctype>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace ANSCENTER {
|
||||||
|
|
||||||
|
inline bool IsOpenVINONpuEnabled() {
|
||||||
|
static const bool enabled = [] {
|
||||||
|
const char* v = std::getenv("OPENVINO_ENABLE_NPU");
|
||||||
|
if (!v || !*v) return false;
|
||||||
|
std::string s(v);
|
||||||
|
std::transform(s.begin(), s.end(), s.begin(),
|
||||||
|
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
|
||||||
|
return s == "1" || s == "true" || s == "yes" || s == "on";
|
||||||
|
}();
|
||||||
|
return enabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
#include "ANSFR.h"
|
#include "ANSFR.h"
|
||||||
|
#include "OpenVINODeviceConfig.h"
|
||||||
#include <opencv2/imgcodecs.hpp>
|
#include <opencv2/imgcodecs.hpp>
|
||||||
#include "ANSOVFaceDetector.h"
|
#include "ANSOVFaceDetector.h"
|
||||||
#include "SCRFDFaceDetector.h"
|
#include "SCRFDFaceDetector.h"
|
||||||
@@ -2695,8 +2696,12 @@ namespace ANSCENTER {
|
|||||||
for (const auto& d : available_devices) {
|
for (const auto& d : available_devices) {
|
||||||
ANS_DBG("ANSFR", " OpenVINO device: %s", d.c_str());
|
ANS_DBG("ANSFR", " OpenVINO device: %s", d.c_str());
|
||||||
}
|
}
|
||||||
// Prioritize devices: NPU > GPU > CPU
|
// Prioritize devices: NPU > GPU > CPU. NPU gated behind runtime switch
|
||||||
std::vector<std::string> priority_devices = { "NPU","GPU","CPU" };
|
// (OPENVINO_ENABLE_NPU=1) — see OpenVINODeviceConfig.h.
|
||||||
|
std::vector<std::string> priority_devices;
|
||||||
|
if (IsOpenVINONpuEnabled()) priority_devices.push_back("NPU");
|
||||||
|
priority_devices.push_back("GPU");
|
||||||
|
priority_devices.push_back("CPU");
|
||||||
for (const auto& device : priority_devices) {
|
for (const auto& device : priority_devices) {
|
||||||
if (std::find(available_devices.begin(), available_devices.end(), device) != available_devices.end()) {
|
if (std::find(available_devices.begin(), available_devices.end(), device) != available_devices.end()) {
|
||||||
ANS_DBG("ANSFR", "GetOpenVINODevice: selected %s", device.c_str());
|
ANS_DBG("ANSFR", "GetOpenVINODevice: selected %s", device.c_str());
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
#include "ANSLPR_CPU.h"
|
#include "ANSLPR_CPU.h"
|
||||||
|
#include "OpenVINODeviceConfig.h"
|
||||||
#include "ANSYOLOV10OVOD.h"
|
#include "ANSYOLOV10OVOD.h"
|
||||||
#include "ANSOPENVINOOD.h"
|
#include "ANSOPENVINOOD.h"
|
||||||
#include "ANSTENSORRTOD.h"
|
#include "ANSTENSORRTOD.h"
|
||||||
@@ -119,8 +120,10 @@ namespace ANSCENTER {
|
|||||||
std::vector<std::string> available_devices = _core.get_available_devices();
|
std::vector<std::string> available_devices = _core.get_available_devices();
|
||||||
bool device_found = false;
|
bool device_found = false;
|
||||||
std::string deviceName = "CPU";
|
std::string deviceName = "CPU";
|
||||||
// Search for NPU
|
// Search for NPU (gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h)
|
||||||
auto it = std::find(available_devices.begin(), available_devices.end(), "NPU");
|
auto it = IsOpenVINONpuEnabled()
|
||||||
|
? std::find(available_devices.begin(), available_devices.end(), "NPU")
|
||||||
|
: available_devices.end();
|
||||||
if (it != available_devices.end()) {
|
if (it != available_devices.end()) {
|
||||||
_core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
_core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
||||||
_core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
_core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
#include "BYTETracker.h"
|
#include "BYTETracker.h"
|
||||||
|
#include "ANSLicense.h" // ANS_DBG for tracker-state-size diagnostic
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
@@ -322,6 +323,24 @@ std::vector<ByteTrack::BYTETracker::STrackPtr> ByteTrack::BYTETracker::update(co
|
|||||||
tracked_stracks_ = tracked_stracks_out;
|
tracked_stracks_ = tracked_stracks_out;
|
||||||
lost_stracks_ = lost_stracks_out;
|
lost_stracks_ = lost_stracks_out;
|
||||||
|
|
||||||
|
// Diagnostic: report tracker state size at most once every 60 s per instance.
|
||||||
|
// removed_stracks_ is append-only in this implementation — watch it grow.
|
||||||
|
{
|
||||||
|
static thread_local std::chrono::steady_clock::time_point s_nextLog{};
|
||||||
|
auto now = std::chrono::steady_clock::now();
|
||||||
|
if (now >= s_nextLog) {
|
||||||
|
s_nextLog = now + std::chrono::seconds(60);
|
||||||
|
ANS_DBG("ANSMOT",
|
||||||
|
"BYTETracker state this=%p frame=%zu nextId=%zu tracked=%zu lost=%zu removed=%zu",
|
||||||
|
(void*)this,
|
||||||
|
frame_id_,
|
||||||
|
track_id_count_,
|
||||||
|
tracked_stracks_.size(),
|
||||||
|
lost_stracks_.size(),
|
||||||
|
removed_stracks_.size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<STrackPtr> output_stracks;
|
std::vector<STrackPtr> output_stracks;
|
||||||
for (const auto &track : tracked_stracks_)
|
for (const auto &track : tracked_stracks_)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
#include "ANSODEngine.h"
|
#include "ANSODEngine.h"
|
||||||
|
#include "OpenVINODeviceConfig.h"
|
||||||
#include "ANSYOLOOD.h"
|
#include "ANSYOLOOD.h"
|
||||||
#include "ANSTENSORRTOD.h"
|
#include "ANSTENSORRTOD.h"
|
||||||
#include "ANSTENSORRTCL.h"
|
#include "ANSTENSORRTCL.h"
|
||||||
@@ -333,8 +334,10 @@ namespace ANSCENTER
|
|||||||
std::vector<std::string> available_devices = core.get_available_devices();
|
std::vector<std::string> available_devices = core.get_available_devices();
|
||||||
bool device_found = false;
|
bool device_found = false;
|
||||||
std::string deviceName = "CPU";
|
std::string deviceName = "CPU";
|
||||||
// Search for NPU
|
// Search for NPU (gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h)
|
||||||
auto it = std::find(available_devices.begin(), available_devices.end(), "NPU");
|
auto it = IsOpenVINONpuEnabled()
|
||||||
|
? std::find(available_devices.begin(), available_devices.end(), "NPU")
|
||||||
|
: available_devices.end();
|
||||||
if (it != available_devices.end()) {
|
if (it != available_devices.end()) {
|
||||||
core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
||||||
core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
||||||
@@ -1414,7 +1417,7 @@ namespace ANSCENTER
|
|||||||
};
|
};
|
||||||
|
|
||||||
std::vector<std::unordered_map<std::string, std::string>> try_configs;
|
std::vector<std::unordered_map<std::string, std::string>> try_configs;
|
||||||
if (!s_npuProbed || s_npuAvailable) {
|
if (IsOpenVINONpuEnabled() && (!s_npuProbed || s_npuAvailable)) {
|
||||||
try_configs.push_back(makeConfig("AUTO:NPU,GPU"));
|
try_configs.push_back(makeConfig("AUTO:NPU,GPU"));
|
||||||
}
|
}
|
||||||
try_configs.push_back(makeConfig("GPU.0"));
|
try_configs.push_back(makeConfig("GPU.0"));
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
#include <json.hpp>
|
#include <json.hpp>
|
||||||
#include "ANSODEngine.h"
|
#include "ANSODEngine.h"
|
||||||
#include "ANSLicense.h" // ANS_DBG macro
|
#include "ANSLicense.h" // ANS_DBG macro
|
||||||
|
#include "OpenVINODeviceConfig.h"
|
||||||
#include "ANSYOLOOD.h"
|
#include "ANSYOLOOD.h"
|
||||||
#include "ANSTENSORRTOD.h"
|
#include "ANSTENSORRTOD.h"
|
||||||
#include "ANSTENSORRTCL.h"
|
#include "ANSTENSORRTCL.h"
|
||||||
@@ -354,8 +355,10 @@ namespace ANSCENTER
|
|||||||
std::vector<std::string> available_devices = core.get_available_devices();
|
std::vector<std::string> available_devices = core.get_available_devices();
|
||||||
bool device_found = false;
|
bool device_found = false;
|
||||||
std::string deviceName = "CPU";
|
std::string deviceName = "CPU";
|
||||||
// Search for NPU
|
// Search for NPU (gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h)
|
||||||
auto it = std::find(available_devices.begin(), available_devices.end(), "NPU");
|
auto it = IsOpenVINONpuEnabled()
|
||||||
|
? std::find(available_devices.begin(), available_devices.end(), "NPU")
|
||||||
|
: available_devices.end();
|
||||||
if (it != available_devices.end()) {
|
if (it != available_devices.end()) {
|
||||||
core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
||||||
core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
#include"ANSONNXCL.h"
|
#include"ANSONNXCL.h"
|
||||||
#include "EPLoader.h"
|
#include "EPLoader.h"
|
||||||
|
#include "OpenVINODeviceConfig.h"
|
||||||
namespace ANSCENTER
|
namespace ANSCENTER
|
||||||
{
|
{
|
||||||
|
|
||||||
@@ -143,20 +144,26 @@ namespace ANSCENTER
|
|||||||
const std::string numberOfThreads = "1";
|
const std::string numberOfThreads = "1";
|
||||||
const std::string numberOfStreams = "1";
|
const std::string numberOfStreams = "1";
|
||||||
|
|
||||||
std::vector<std::unordered_map<std::string, std::string>> try_configs = {
|
std::vector<std::unordered_map<std::string, std::string>> try_configs;
|
||||||
|
// NPU gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h
|
||||||
|
if (IsOpenVINONpuEnabled()) {
|
||||||
|
try_configs.push_back(
|
||||||
{ {"device_type","AUTO:NPU,GPU"}, {"precision",precision},
|
{ {"device_type","AUTO:NPU,GPU"}, {"precision",precision},
|
||||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} },
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} });
|
||||||
|
}
|
||||||
|
try_configs.push_back(
|
||||||
{ {"device_type","GPU.0"}, {"precision",precision},
|
{ {"device_type","GPU.0"}, {"precision",precision},
|
||||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} },
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} });
|
||||||
|
try_configs.push_back(
|
||||||
{ {"device_type","GPU.1"}, {"precision",precision},
|
{ {"device_type","GPU.1"}, {"precision",precision},
|
||||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} },
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} });
|
||||||
|
try_configs.push_back(
|
||||||
{ {"device_type","AUTO:GPU,CPU"}, {"precision",precision},
|
{ {"device_type","AUTO:GPU,CPU"}, {"precision",precision},
|
||||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} }
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","False"} });
|
||||||
};
|
|
||||||
|
|
||||||
for (const auto& config : try_configs) {
|
for (const auto& config : try_configs) {
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
#include "ANSOPENVINOCL.h"
|
#include "ANSOPENVINOCL.h"
|
||||||
#include "Utility.h"
|
#include "Utility.h"
|
||||||
|
#include "OpenVINODeviceConfig.h"
|
||||||
namespace ANSCENTER
|
namespace ANSCENTER
|
||||||
{
|
{
|
||||||
bool OPENVINOCL::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
|
bool OPENVINOCL::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
|
||||||
@@ -369,8 +370,10 @@ namespace ANSCENTER
|
|||||||
std::vector<std::string> available_devices = core.get_available_devices();
|
std::vector<std::string> available_devices = core.get_available_devices();
|
||||||
bool device_found = false;
|
bool device_found = false;
|
||||||
|
|
||||||
// Search for NPU
|
// Search for NPU (gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h)
|
||||||
auto it = std::find(available_devices.begin(), available_devices.end(), "NPU");
|
auto it = IsOpenVINONpuEnabled()
|
||||||
|
? std::find(available_devices.begin(), available_devices.end(), "NPU")
|
||||||
|
: available_devices.end();
|
||||||
if (it != available_devices.end()) {
|
if (it != available_devices.end()) {
|
||||||
core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
core.set_property("NPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
||||||
core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
core.set_property("GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
#include "ANSOPENVINOOD.h"
|
#include "ANSOPENVINOOD.h"
|
||||||
#include "Utility.h"
|
#include "Utility.h"
|
||||||
|
#include "OpenVINODeviceConfig.h"
|
||||||
namespace ANSCENTER
|
namespace ANSCENTER
|
||||||
{
|
{
|
||||||
bool OPENVINOOD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
|
bool OPENVINOOD::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
|
||||||
@@ -437,8 +438,11 @@ namespace ANSCENTER
|
|||||||
ov::Core core;
|
ov::Core core;
|
||||||
// Step 2: Get Available Devices and Log
|
// Step 2: Get Available Devices and Log
|
||||||
std::vector<std::string> available_devices = core.get_available_devices();
|
std::vector<std::string> available_devices = core.get_available_devices();
|
||||||
// Define device priority: NPU > GPU > CPU
|
// Define device priority: NPU > GPU > CPU. NPU gated by
|
||||||
std::vector<std::string> priority_devices = { "NPU", "GPU" };
|
// OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h.
|
||||||
|
std::vector<std::string> priority_devices;
|
||||||
|
if (IsOpenVINONpuEnabled()) priority_devices.push_back("NPU");
|
||||||
|
priority_devices.push_back("GPU");
|
||||||
bool device_found = false;
|
bool device_found = false;
|
||||||
|
|
||||||
// Iterate over prioritized devices
|
// Iterate over prioritized devices
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
#include "ANSOVSEG.h"
|
#include "ANSOVSEG.h"
|
||||||
|
#include "OpenVINODeviceConfig.h"
|
||||||
namespace ANSCENTER {
|
namespace ANSCENTER {
|
||||||
bool ANSOVSEG::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
|
bool ANSOVSEG::OptimizeModel(bool fp16, std::string& optimizedModelFolder) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
@@ -493,8 +494,11 @@ namespace ANSCENTER {
|
|||||||
ov::Core core;
|
ov::Core core;
|
||||||
// Step 2: Get Available Devices and Log
|
// Step 2: Get Available Devices and Log
|
||||||
std::vector<std::string> available_devices = core.get_available_devices();
|
std::vector<std::string> available_devices = core.get_available_devices();
|
||||||
// Define device priority: NPU > GPU > CPU
|
// Define device priority: NPU > GPU > CPU. NPU gated by
|
||||||
std::vector<std::string> priority_devices = { "NPU", "GPU" };
|
// OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h.
|
||||||
|
std::vector<std::string> priority_devices;
|
||||||
|
if (IsOpenVINONpuEnabled()) priority_devices.push_back("NPU");
|
||||||
|
priority_devices.push_back("GPU");
|
||||||
bool device_found = false;
|
bool device_found = false;
|
||||||
|
|
||||||
// Iterate over prioritized devices
|
// Iterate over prioritized devices
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
#include "ANSYOLO12OD.h"
|
#include "ANSYOLO12OD.h"
|
||||||
#include "EPLoader.h"
|
#include "EPLoader.h"
|
||||||
|
#include "OpenVINODeviceConfig.h"
|
||||||
#ifdef USEONNXOV
|
#ifdef USEONNXOV
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -365,20 +366,26 @@ namespace ANSCENTER {
|
|||||||
const std::string numberOfThreads = "8";
|
const std::string numberOfThreads = "8";
|
||||||
const std::string numberOfStreams = "8";
|
const std::string numberOfStreams = "8";
|
||||||
|
|
||||||
std::vector<std::unordered_map<std::string, std::string>> try_configs = {
|
std::vector<std::unordered_map<std::string, std::string>> try_configs;
|
||||||
|
// NPU gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h
|
||||||
|
if (IsOpenVINONpuEnabled()) {
|
||||||
|
try_configs.push_back(
|
||||||
{ {"device_type","AUTO:NPU,GPU"}, {"precision",precision},
|
{ {"device_type","AUTO:NPU,GPU"}, {"precision",precision},
|
||||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
|
||||||
|
}
|
||||||
|
try_configs.push_back(
|
||||||
{ {"device_type","GPU.0"}, {"precision",precision},
|
{ {"device_type","GPU.0"}, {"precision",precision},
|
||||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
|
||||||
|
try_configs.push_back(
|
||||||
{ {"device_type","GPU.1"}, {"precision",precision},
|
{ {"device_type","GPU.1"}, {"precision",precision},
|
||||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
|
||||||
|
try_configs.push_back(
|
||||||
{ {"device_type","AUTO:GPU,CPU"}, {"precision",precision},
|
{ {"device_type","AUTO:GPU,CPU"}, {"precision",precision},
|
||||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
|
||||||
};
|
|
||||||
|
|
||||||
for (const auto& config : try_configs) {
|
for (const auto& config : try_configs) {
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
#include "ANSYOLOOD.h"
|
#include "ANSYOLOOD.h"
|
||||||
#include "Utility.h"
|
#include "Utility.h"
|
||||||
#include "EPLoader.h"
|
#include "EPLoader.h"
|
||||||
|
#include "OpenVINODeviceConfig.h"
|
||||||
#include "ANSGpuFrameRegistry.h"
|
#include "ANSGpuFrameRegistry.h"
|
||||||
#include "NV12PreprocessHelper.h" // tl_currentGpuFrame()
|
#include "NV12PreprocessHelper.h" // tl_currentGpuFrame()
|
||||||
#ifdef USEONNXOV
|
#ifdef USEONNXOV
|
||||||
@@ -303,20 +304,26 @@ namespace ANSCENTER
|
|||||||
const std::string numberOfThreads = "8";
|
const std::string numberOfThreads = "8";
|
||||||
const std::string numberOfStreams = "8";
|
const std::string numberOfStreams = "8";
|
||||||
|
|
||||||
std::vector<std::unordered_map<std::string, std::string>> try_configs = {
|
std::vector<std::unordered_map<std::string, std::string>> try_configs;
|
||||||
|
// NPU gated by OPENVINO_ENABLE_NPU — see OpenVINODeviceConfig.h
|
||||||
|
if (IsOpenVINONpuEnabled()) {
|
||||||
|
try_configs.push_back(
|
||||||
{ {"device_type","AUTO:NPU,GPU"}, {"precision",precision},
|
{ {"device_type","AUTO:NPU,GPU"}, {"precision",precision},
|
||||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
|
||||||
|
}
|
||||||
|
try_configs.push_back(
|
||||||
{ {"device_type","GPU.0"}, {"precision",precision},
|
{ {"device_type","GPU.0"}, {"precision",precision},
|
||||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
|
||||||
|
try_configs.push_back(
|
||||||
{ {"device_type","GPU.1"}, {"precision",precision},
|
{ {"device_type","GPU.1"}, {"precision",precision},
|
||||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
|
||||||
|
try_configs.push_back(
|
||||||
{ {"device_type","AUTO:GPU,CPU"}, {"precision",precision},
|
{ {"device_type","AUTO:GPU,CPU"}, {"precision",precision},
|
||||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }
|
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} });
|
||||||
};
|
|
||||||
|
|
||||||
for (const auto& config : try_configs) {
|
for (const auto& config : try_configs) {
|
||||||
try {
|
try {
|
||||||
|
|||||||
Reference in New Issue
Block a user