Use software decoder by default

This commit is contained in:
2026-04-04 20:19:54 +11:00
parent 3a21026790
commit e134ebdf15
24 changed files with 693 additions and 215 deletions

View File

@@ -1275,6 +1275,90 @@ cv::Mat CVideoPlayer::avframeNV12ToCvMat(const AVFrame* frame)
return cv::Mat();
}
}
cv::Mat CVideoPlayer::avframeYUV420PToCvMat(const AVFrame* frame) {
try {
if (!frame || frame->width <= 0 || frame->height <= 0) {
return cv::Mat();
}
const int width = frame->width;
const int height = frame->height;
// YUV420P has 3 separate planes: Y (full res), U (half), V (half).
// OpenCV's cvtColor(COLOR_YUV2BGR_I420) expects a single contiguous buffer
// with Y on top (H rows) and U,V stacked below (H/2 rows total).
// Layout: [Y: W×H] [U: W/2 × H/2] [V: W/2 × H/2]
// Total height = H * 3/2, width = W, single channel.
// If all planes are contiguous with matching strides, wrap directly
const int yStride = frame->linesize[0];
const int uStride = frame->linesize[1];
const int vStride = frame->linesize[2];
// Fast path: planes are packed contiguously with stride == width
if (yStride == width && uStride == width / 2 && vStride == width / 2 &&
frame->data[1] == frame->data[0] + width * height &&
frame->data[2] == frame->data[1] + (width / 2) * (height / 2)) {
// Contiguous I420 — wrap directly, zero copy
cv::Mat yuv(height * 3 / 2, width, CV_8UC1, frame->data[0]);
cv::Mat bgrImage;
cv::cvtColor(yuv, bgrImage, cv::COLOR_YUV2BGR_I420);
if (m_nImageQuality == 1) {
bgrImage.convertTo(bgrImage, -1, 255.0 / 219.0, -16.0 * 255.0 / 219.0);
}
return bgrImage;
}
// Slow path: planes have padding (linesize > width) — copy to contiguous buffer
const int uvWidth = width / 2;
const int uvHeight = height / 2;
const int totalSize = width * height + uvWidth * uvHeight * 2;
cv::Mat yuv(height * 3 / 2, width, CV_8UC1);
uint8_t* dst = yuv.data;
// Copy Y plane (line by line if stride != width)
if (yStride == width) {
std::memcpy(dst, frame->data[0], width * height);
} else {
for (int row = 0; row < height; ++row) {
std::memcpy(dst + row * width, frame->data[0] + row * yStride, width);
}
}
dst += width * height;
// Copy U plane
if (uStride == uvWidth) {
std::memcpy(dst, frame->data[1], uvWidth * uvHeight);
} else {
for (int row = 0; row < uvHeight; ++row) {
std::memcpy(dst + row * uvWidth, frame->data[1] + row * uStride, uvWidth);
}
}
dst += uvWidth * uvHeight;
// Copy V plane
if (vStride == uvWidth) {
std::memcpy(dst, frame->data[2], uvWidth * uvHeight);
} else {
for (int row = 0; row < uvHeight; ++row) {
std::memcpy(dst + row * uvWidth, frame->data[2] + row * vStride, uvWidth);
}
}
cv::Mat bgrImage;
cv::cvtColor(yuv, bgrImage, cv::COLOR_YUV2BGR_I420);
if (m_nImageQuality == 1) {
bgrImage.convertTo(bgrImage, -1, 255.0 / 219.0, -16.0 * 255.0 / 219.0);
}
return bgrImage;
}
catch (const std::exception& e) {
std::cerr << "Exception in avframeYUV420PToCvMat: " << e.what() << std::endl;
return cv::Mat();
}
}
cv::Mat CVideoPlayer::avframeToCVMat(const AVFrame* pFrame) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
@@ -1287,8 +1371,9 @@ cv::Mat CVideoPlayer::avframeToCVMat(const AVFrame* pFrame) {
switch (pFrame->format) {
case AV_PIX_FMT_NV12:
return avframeNV12ToCvMat(pFrame);
case AV_PIX_FMT_YUV420P:
case AV_PIX_FMT_YUVJ420P:
return avframeAnyToCvmat(pFrame);
return avframeYUV420PToCvMat(pFrame);
default:
return avframeAnyToCvmat(pFrame);
@@ -1305,7 +1390,7 @@ CVideoPlayer::CVideoPlayer() :
, m_bAudioInited(FALSE)
, m_bPlaying(FALSE)
, m_bPaused(FALSE)
, m_nHWDecoding(HW_DECODING_AUTO)//(HW_DECODING_AUTO)// HW_DECODING_D3D11 //HW_DECODING_DISABLE
, m_nHWDecoding(HW_DECODING_DISABLE)// Software decode by default — saves VRAM (no NVDEC DPB surfaces)
, m_bUpdown(FALSE)
, m_bSnapshot(FALSE)
, m_nSnapVideoFmt(AV_PIX_FMT_YUVJ420P)
@@ -1740,6 +1825,13 @@ void CVideoPlayer::StopVideoDecoder() {
// Flush decoder to drain and discard any buffered frames,
// so stale reference frames don't corrupt the next session
decoder->flush();
// Free NVDEC decoder context and all GPU surfaces (DPB buffers).
// Stopped cameras should not hold VRAM — with 100 cameras created
// but only 5 running, the 95 idle decoders would consume ~5-10 GB.
// The decoder will be re-initialized automatically when the next
// video packet arrives after Start() is called.
decoder->uninit();
m_bVideoInited = FALSE;
}
// Clear queue but KEEP m_currentImage and m_lastJpegImage —
// getImage()/getJpegImage() will return the last good frame while decoder stabilizes
@@ -1842,6 +1934,13 @@ void CVideoPlayer::setTargetFPS(double intervalMs)
m_targetIntervalMs = intervalMs;
m_targetFPSInitialized = false; // reset timing on change
}
double CVideoPlayer::getLastFrameAgeMs()
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (!m_lastDecoderFrameTimeSet) return 0.0;
auto now = std::chrono::steady_clock::now();
return std::chrono::duration<double, std::milli>(now - m_lastDecoderFrameTime).count();
}
void CVideoPlayer::playVideo(uint8* data, int len, uint32 ts, uint16 seq)
{
if (m_bRecording)
@@ -2061,6 +2160,11 @@ void CVideoPlayer::onVideoFrame(AVFrame* frame)
}
}
// Record wall-clock time of every decoded frame (even rate-limited ones).
// Used by getLastFrameAgeMs() to detect truly stale cameras.
m_lastDecoderFrameTime = std::chrono::steady_clock::now();
m_lastDecoderFrameTimeSet = true;
// --- Frame rate limiting ---
// Skip post-decode processing (clone, queue push, CUDA clone) if not enough
// time has elapsed since the last processed frame. The decode itself still