Enable log information. Disable NPU in U9

This commit is contained in:
2026-04-21 15:48:27 +10:00
parent 00f6e2f852
commit 97d814936d
18 changed files with 301 additions and 54 deletions

View File

@@ -37,6 +37,22 @@ extern "C"
#include "ANSLicense.h" // ANS_DBG macro (gated by ANSCORE_DEBUGVIEW)
// ---------------------------------------------------------------------------
// Leak diagnostics — definitions for counters declared extern in header.
// Also references counters defined in video_decoder.cpp so the heartbeat
// below can report media allocator balance in a single line.
// ---------------------------------------------------------------------------
std::atomic<int64_t> g_queueClones{0};
std::atomic<int64_t> g_queueFrees{0};
std::atomic<int64_t> g_nv12Clones{0};
std::atomic<int64_t> g_nv12Frees{0};
std::atomic<int64_t> g_cudaHWClones{0};
std::atomic<int64_t> g_cudaHWFrees{0};
extern std::atomic<int64_t> g_contiguousAllocs;
extern std::atomic<int64_t> g_contiguousFrees;
extern std::atomic<int64_t> g_contiguousBytesInFlight;
// libyuv: SIMD-accelerated YUV↔RGB conversion with native strided-plane input.
// Replaces the memcpy-into-staging + cv::cvtColor(COLOR_YUV2BGR_I420) chain
// in avframeYUV420PToCvMat with a direct I420→RGB24 (== OpenCV BGR memory
@@ -1629,10 +1645,12 @@ void CVideoPlayer::close()
closeAudio();
if (m_currentNV12Frame) {
av_frame_free(&m_currentNV12Frame);
g_nv12Frees.fetch_add(1, std::memory_order_relaxed);
m_currentNV12Frame = nullptr;
}
if (m_currentCudaHWFrame) {
av_frame_free(&m_currentCudaHWFrame);
g_cudaHWFrees.fetch_add(1, std::memory_order_relaxed);
m_currentCudaHWFrame = nullptr;
}
if (m_pSnapFrame)
@@ -2329,8 +2347,12 @@ void CVideoPlayer::onVideoFrame(AVFrame* frame)
// and we can safely clone the CUDA frame without deadlock risk.
// cloneCudaHWFrame_unlocked() is safe because decoder._mutex is already held.
if (m_pVideoDecoder && m_pVideoDecoder->isCudaHWAccel()) {
if (m_currentCudaHWFrame) av_frame_free(&m_currentCudaHWFrame);
if (m_currentCudaHWFrame) {
av_frame_free(&m_currentCudaHWFrame);
g_cudaHWFrees.fetch_add(1, std::memory_order_relaxed);
}
m_currentCudaHWFrame = m_pVideoDecoder->cloneCudaHWFrame_unlocked();
if (m_currentCudaHWFrame) g_cudaHWClones.fetch_add(1, std::memory_order_relaxed);
}
// Track how many clean frames have arrived since keyframe
@@ -2455,8 +2477,12 @@ cv::Mat CVideoPlayer::getImage(int& width, int& height, int64_t& pts) {
(frameToProcess->format == AV_PIX_FMT_NV12 ||
frameToProcess->format == AV_PIX_FMT_YUV420P ||
frameToProcess->format == AV_PIX_FMT_YUVJ420P)) {
if (m_currentNV12Frame) av_frame_free(&m_currentNV12Frame);
if (m_currentNV12Frame) {
av_frame_free(&m_currentNV12Frame);
g_nv12Frees.fetch_add(1, std::memory_order_relaxed);
}
m_currentNV12Frame = av_frame_clone(frameToProcess);
if (m_currentNV12Frame) g_nv12Clones.fetch_add(1, std::memory_order_relaxed);
}
width = m_currentImage.cols;
@@ -2466,6 +2492,49 @@ cv::Mat CVideoPlayer::getImage(int& width, int& height, int64_t& pts) {
}
av_frame_free(&frameToProcess);
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
// Leak diagnostics — one heartbeat every 60 s across the whole process.
// Each counter pair (allocs, frees) should stay balanced. A monotonic
// rise in (allocs - frees) identifies the leaking pool. Bytes field
// covers the ~12 MB/frame contiguous YUV420P buffers specifically —
// watch for steady climb while the counters look balanced (refcount
// leak in a held clone would show that shape).
{
using clk = std::chrono::steady_clock;
static std::atomic<long long> s_nextLeakLogTick{0};
const long long tick = clk::now().time_since_epoch().count();
long long expected = s_nextLeakLogTick.load(std::memory_order_relaxed);
if (tick >= expected) {
const long long deadline = tick +
std::chrono::duration_cast<clk::duration>(
std::chrono::seconds(60)).count();
// Claim the next window — first writer wins so only one thread logs.
if (s_nextLeakLogTick.compare_exchange_strong(
expected, deadline, std::memory_order_relaxed)) {
const int64_t qA = g_queueClones.load(std::memory_order_relaxed);
const int64_t qF = g_queueFrees.load(std::memory_order_relaxed);
const int64_t nvA = g_nv12Clones.load(std::memory_order_relaxed);
const int64_t nvF = g_nv12Frees.load(std::memory_order_relaxed);
const int64_t cuA = g_cudaHWClones.load(std::memory_order_relaxed);
const int64_t cuF = g_cudaHWFrees.load(std::memory_order_relaxed);
const int64_t cgA = g_contiguousAllocs.load(std::memory_order_relaxed);
const int64_t cgF = g_contiguousFrees.load(std::memory_order_relaxed);
const int64_t cgB = g_contiguousBytesInFlight.load(std::memory_order_relaxed);
ANS_DBG("MEDIA_Leak",
"queue(C=%lld F=%lld net=%lld depth=%zu) "
"nv12(C=%lld F=%lld net=%lld) "
"cudaHW(C=%lld F=%lld net=%lld) "
"contig(A=%lld F=%lld net=%lld bytesMB=%.1f)",
(long long)qA, (long long)qF, (long long)(qA - qF),
g_frameQueue.size(),
(long long)nvA, (long long)nvF, (long long)(nvA - nvF),
(long long)cuA, (long long)cuF, (long long)(cuA - cuF),
(long long)cgA, (long long)cgF, (long long)(cgA - cgF),
(double)cgB / (1024.0 * 1024.0));
}
}
}
// Emit timing breakdown. Throttled so DebugView / stderr stay readable.
{
@@ -2540,11 +2609,13 @@ std::string CVideoPlayer::getJpegImage(int& width, int& height, int64_t& pts) {
catch (const std::exception& e) {
std::cerr << "Exception while converting AVFrame to JPEG string: " << e.what() << std::endl;
av_frame_free(&frameToProcess);
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
return m_lastJpegImage;
}
const auto t3 = clk::now();
av_frame_free(&frameToProcess);
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
if (m_pts < INT64_MAX) {
m_pts++;