Enable log information. Disable NPU in U9
This commit is contained in:
@@ -4,6 +4,7 @@
|
||||
#include "lock.h"
|
||||
#include "media_codec.h"
|
||||
#include "media_parse.h"
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
|
||||
#include "ANSLicense.h" // ANS_DBG macro (gated by ANSCORE_DEBUGVIEW)
|
||||
@@ -14,6 +15,16 @@ extern "C" {
|
||||
#include "libavutil/mem.h"
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Leak diagnostics — exported counters for media allocation balance.
|
||||
// Incremented in allocation sites, decremented in free paths. If (alloc -
|
||||
// free) climbs monotonically over time, the allocator is leaking.
|
||||
// Read by the MEDIA_Leak heartbeat in video_player.cpp (every 60 s).
|
||||
// ---------------------------------------------------------------------------
|
||||
std::atomic<int64_t> g_contiguousAllocs{0};
|
||||
std::atomic<int64_t> g_contiguousFrees{0};
|
||||
std::atomic<int64_t> g_contiguousBytesInFlight{0}; // sum(total) of unfreed buffers
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Contiguous YUV420P allocator — trims per-call malloc overhead and enables
|
||||
// the zero-copy fast path in avframeYUV420PToCvMat for resolutions where the
|
||||
@@ -23,7 +34,20 @@ extern "C" {
|
||||
// single-block layout still improves cache behaviour for the bulk memcpy.)
|
||||
// ---------------------------------------------------------------------------
|
||||
namespace {
|
||||
void anscore_contiguous_free(void* /*opaque*/, uint8_t* data) {
|
||||
// Opaque payload stored in AVBufferRef so the free callback can account
|
||||
// for the exact byte count being returned (no global lookup needed).
|
||||
struct ContiguousOpaque {
|
||||
size_t bytes;
|
||||
};
|
||||
|
||||
void anscore_contiguous_free(void* opaque, uint8_t* data) {
|
||||
if (opaque) {
|
||||
auto* o = static_cast<ContiguousOpaque*>(opaque);
|
||||
g_contiguousBytesInFlight.fetch_sub(static_cast<int64_t>(o->bytes),
|
||||
std::memory_order_relaxed);
|
||||
delete o;
|
||||
}
|
||||
g_contiguousFrees.fetch_add(1, std::memory_order_relaxed);
|
||||
av_free(data);
|
||||
}
|
||||
}
|
||||
@@ -77,13 +101,24 @@ int CVideoDecoder::contiguousGetBuffer2(AVCodecContext* s, AVFrame* frame, int f
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
AVBufferRef* ref = av_buffer_create(buf, (int)total,
|
||||
anscore_contiguous_free, nullptr, 0);
|
||||
if (!ref) {
|
||||
auto* opaque = new (std::nothrow) ContiguousOpaque{total};
|
||||
if (!opaque) {
|
||||
av_free(buf);
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
AVBufferRef* ref = av_buffer_create(buf, (int)total,
|
||||
anscore_contiguous_free, opaque, 0);
|
||||
if (!ref) {
|
||||
delete opaque;
|
||||
av_free(buf);
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
g_contiguousAllocs.fetch_add(1, std::memory_order_relaxed);
|
||||
g_contiguousBytesInFlight.fetch_add(static_cast<int64_t>(total),
|
||||
std::memory_order_relaxed);
|
||||
|
||||
for (int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
|
||||
frame->buf[i] = nullptr;
|
||||
frame->data[i] = nullptr;
|
||||
|
||||
@@ -37,6 +37,22 @@ extern "C"
|
||||
|
||||
#include "ANSLicense.h" // ANS_DBG macro (gated by ANSCORE_DEBUGVIEW)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Leak diagnostics — definitions for counters declared extern in header.
|
||||
// Also references counters defined in video_decoder.cpp so the heartbeat
|
||||
// below can report media allocator balance in a single line.
|
||||
// ---------------------------------------------------------------------------
|
||||
std::atomic<int64_t> g_queueClones{0};
|
||||
std::atomic<int64_t> g_queueFrees{0};
|
||||
std::atomic<int64_t> g_nv12Clones{0};
|
||||
std::atomic<int64_t> g_nv12Frees{0};
|
||||
std::atomic<int64_t> g_cudaHWClones{0};
|
||||
std::atomic<int64_t> g_cudaHWFrees{0};
|
||||
|
||||
extern std::atomic<int64_t> g_contiguousAllocs;
|
||||
extern std::atomic<int64_t> g_contiguousFrees;
|
||||
extern std::atomic<int64_t> g_contiguousBytesInFlight;
|
||||
|
||||
// libyuv: SIMD-accelerated YUV↔RGB conversion with native strided-plane input.
|
||||
// Replaces the memcpy-into-staging + cv::cvtColor(COLOR_YUV2BGR_I420) chain
|
||||
// in avframeYUV420PToCvMat with a direct I420→RGB24 (== OpenCV BGR memory
|
||||
@@ -1629,10 +1645,12 @@ void CVideoPlayer::close()
|
||||
closeAudio();
|
||||
if (m_currentNV12Frame) {
|
||||
av_frame_free(&m_currentNV12Frame);
|
||||
g_nv12Frees.fetch_add(1, std::memory_order_relaxed);
|
||||
m_currentNV12Frame = nullptr;
|
||||
}
|
||||
if (m_currentCudaHWFrame) {
|
||||
av_frame_free(&m_currentCudaHWFrame);
|
||||
g_cudaHWFrees.fetch_add(1, std::memory_order_relaxed);
|
||||
m_currentCudaHWFrame = nullptr;
|
||||
}
|
||||
if (m_pSnapFrame)
|
||||
@@ -2329,8 +2347,12 @@ void CVideoPlayer::onVideoFrame(AVFrame* frame)
|
||||
// and we can safely clone the CUDA frame without deadlock risk.
|
||||
// cloneCudaHWFrame_unlocked() is safe because decoder._mutex is already held.
|
||||
if (m_pVideoDecoder && m_pVideoDecoder->isCudaHWAccel()) {
|
||||
if (m_currentCudaHWFrame) av_frame_free(&m_currentCudaHWFrame);
|
||||
if (m_currentCudaHWFrame) {
|
||||
av_frame_free(&m_currentCudaHWFrame);
|
||||
g_cudaHWFrees.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
m_currentCudaHWFrame = m_pVideoDecoder->cloneCudaHWFrame_unlocked();
|
||||
if (m_currentCudaHWFrame) g_cudaHWClones.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
// Track how many clean frames have arrived since keyframe
|
||||
@@ -2455,8 +2477,12 @@ cv::Mat CVideoPlayer::getImage(int& width, int& height, int64_t& pts) {
|
||||
(frameToProcess->format == AV_PIX_FMT_NV12 ||
|
||||
frameToProcess->format == AV_PIX_FMT_YUV420P ||
|
||||
frameToProcess->format == AV_PIX_FMT_YUVJ420P)) {
|
||||
if (m_currentNV12Frame) av_frame_free(&m_currentNV12Frame);
|
||||
if (m_currentNV12Frame) {
|
||||
av_frame_free(&m_currentNV12Frame);
|
||||
g_nv12Frees.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
m_currentNV12Frame = av_frame_clone(frameToProcess);
|
||||
if (m_currentNV12Frame) g_nv12Clones.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
width = m_currentImage.cols;
|
||||
@@ -2466,6 +2492,49 @@ cv::Mat CVideoPlayer::getImage(int& width, int& height, int64_t& pts) {
|
||||
}
|
||||
|
||||
av_frame_free(&frameToProcess);
|
||||
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
// Leak diagnostics — one heartbeat every 60 s across the whole process.
|
||||
// Each counter pair (allocs, frees) should stay balanced. A monotonic
|
||||
// rise in (allocs - frees) identifies the leaking pool. Bytes field
|
||||
// covers the ~12 MB/frame contiguous YUV420P buffers specifically —
|
||||
// watch for steady climb while the counters look balanced (refcount
|
||||
// leak in a held clone would show that shape).
|
||||
{
|
||||
using clk = std::chrono::steady_clock;
|
||||
static std::atomic<long long> s_nextLeakLogTick{0};
|
||||
const long long tick = clk::now().time_since_epoch().count();
|
||||
long long expected = s_nextLeakLogTick.load(std::memory_order_relaxed);
|
||||
if (tick >= expected) {
|
||||
const long long deadline = tick +
|
||||
std::chrono::duration_cast<clk::duration>(
|
||||
std::chrono::seconds(60)).count();
|
||||
// Claim the next window — first writer wins so only one thread logs.
|
||||
if (s_nextLeakLogTick.compare_exchange_strong(
|
||||
expected, deadline, std::memory_order_relaxed)) {
|
||||
const int64_t qA = g_queueClones.load(std::memory_order_relaxed);
|
||||
const int64_t qF = g_queueFrees.load(std::memory_order_relaxed);
|
||||
const int64_t nvA = g_nv12Clones.load(std::memory_order_relaxed);
|
||||
const int64_t nvF = g_nv12Frees.load(std::memory_order_relaxed);
|
||||
const int64_t cuA = g_cudaHWClones.load(std::memory_order_relaxed);
|
||||
const int64_t cuF = g_cudaHWFrees.load(std::memory_order_relaxed);
|
||||
const int64_t cgA = g_contiguousAllocs.load(std::memory_order_relaxed);
|
||||
const int64_t cgF = g_contiguousFrees.load(std::memory_order_relaxed);
|
||||
const int64_t cgB = g_contiguousBytesInFlight.load(std::memory_order_relaxed);
|
||||
ANS_DBG("MEDIA_Leak",
|
||||
"queue(C=%lld F=%lld net=%lld depth=%zu) "
|
||||
"nv12(C=%lld F=%lld net=%lld) "
|
||||
"cudaHW(C=%lld F=%lld net=%lld) "
|
||||
"contig(A=%lld F=%lld net=%lld bytesMB=%.1f)",
|
||||
(long long)qA, (long long)qF, (long long)(qA - qF),
|
||||
g_frameQueue.size(),
|
||||
(long long)nvA, (long long)nvF, (long long)(nvA - nvF),
|
||||
(long long)cuA, (long long)cuF, (long long)(cuA - cuF),
|
||||
(long long)cgA, (long long)cgF, (long long)(cgA - cgF),
|
||||
(double)cgB / (1024.0 * 1024.0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Emit timing breakdown. Throttled so DebugView / stderr stay readable.
|
||||
{
|
||||
@@ -2540,11 +2609,13 @@ std::string CVideoPlayer::getJpegImage(int& width, int& height, int64_t& pts) {
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "Exception while converting AVFrame to JPEG string: " << e.what() << std::endl;
|
||||
av_frame_free(&frameToProcess);
|
||||
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
|
||||
return m_lastJpegImage;
|
||||
}
|
||||
const auto t3 = clk::now();
|
||||
|
||||
av_frame_free(&frameToProcess);
|
||||
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
if (m_pts < INT64_MAX) {
|
||||
m_pts++;
|
||||
|
||||
@@ -15,8 +15,18 @@
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <turbojpeg.h>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
|
||||
// Leak diagnostics — net counters surfaced in MEDIA_Leak heartbeat.
|
||||
// Defined in video_player.cpp; also incremented from FrameQueue here.
|
||||
extern std::atomic<int64_t> g_queueClones; // av_frame_clone from FrameQueue
|
||||
extern std::atomic<int64_t> g_queueFrees; // av_frame_free from FrameQueue
|
||||
extern std::atomic<int64_t> g_nv12Clones; // m_currentNV12Frame = av_frame_clone
|
||||
extern std::atomic<int64_t> g_nv12Frees; // av_frame_free(&m_currentNV12Frame)
|
||||
extern std::atomic<int64_t> g_cudaHWClones; // m_currentCudaHWFrame = clone
|
||||
extern std::atomic<int64_t> g_cudaHWFrees; // av_frame_free(&m_currentCudaHWFrame)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32 SyncTimestamp;
|
||||
@@ -46,6 +56,7 @@ public:
|
||||
std::cerr << "Failed to clone AVFrame!" << std::endl;
|
||||
return;
|
||||
}
|
||||
g_queueClones.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
frameQueue.push(frameCopy);
|
||||
m_frameSeq++; // New frame arrived
|
||||
@@ -55,6 +66,7 @@ public:
|
||||
AVFrame* oldFrame = frameQueue.front();
|
||||
frameQueue.pop();
|
||||
av_frame_free(&oldFrame);
|
||||
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,7 +85,15 @@ public:
|
||||
}
|
||||
|
||||
// Clone the latest frame before returning it
|
||||
return av_frame_clone(frameQueue.back());
|
||||
AVFrame* clone = av_frame_clone(frameQueue.back());
|
||||
if (clone) g_queueClones.fetch_add(1, std::memory_order_relaxed);
|
||||
return clone;
|
||||
}
|
||||
|
||||
// Current depth — snapshot used by the leak heartbeat.
|
||||
size_t size() {
|
||||
std::lock_guard<std::mutex> lock(queueMutex);
|
||||
return frameQueue.size();
|
||||
}
|
||||
|
||||
// Retrieve and remove the oldest frame from the queue
|
||||
@@ -102,6 +122,7 @@ public:
|
||||
AVFrame* frame = frameQueue.front();
|
||||
frameQueue.pop();
|
||||
av_frame_free(&frame);
|
||||
g_queueFrees.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
m_frameSeq = 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user