Use software decoder by default

This commit is contained in:
2026-04-04 20:19:54 +11:00
parent 3a21026790
commit e134ebdf15
24 changed files with 693 additions and 215 deletions

View File

@@ -258,7 +258,15 @@ void CRtspPlayer::stop()
// Set flags BEFORE stopping decoder so TCP rx thread stops calling decode()
m_bPlaying = FALSE;
m_bPaused = FALSE;
CVideoPlayer::StopVideoDecoder(); // Stop the video decoder
CVideoPlayer::StopVideoDecoder(); // Stop the video decoder + uninit (free VRAM)
// Close RTSP connection and shut down RX threads.
// Without this, stopped cameras keep TCP/UDP threads running,
// sockets open, and receiving network data — wasting CPU and
// network resources. With 100 cameras and only 5 running,
// 95 idle threads would consume CPU for no purpose.
// Start() → Setup() → open() will reconnect when needed.
m_rtsp.rtsp_close();
}
BOOL CRtspPlayer::pause()

View File

@@ -1275,6 +1275,90 @@ cv::Mat CVideoPlayer::avframeNV12ToCvMat(const AVFrame* frame)
return cv::Mat();
}
}
cv::Mat CVideoPlayer::avframeYUV420PToCvMat(const AVFrame* frame) {
try {
if (!frame || frame->width <= 0 || frame->height <= 0) {
return cv::Mat();
}
const int width = frame->width;
const int height = frame->height;
// YUV420P has 3 separate planes: Y (full res), U (half), V (half).
// OpenCV's cvtColor(COLOR_YUV2BGR_I420) expects a single contiguous buffer
// with Y on top (H rows) and U,V stacked below (H/2 rows total).
// Layout: [Y: W×H] [U: W/2 × H/2] [V: W/2 × H/2]
// Total height = H * 3/2, width = W, single channel.
// If all planes are contiguous with matching strides, wrap directly
const int yStride = frame->linesize[0];
const int uStride = frame->linesize[1];
const int vStride = frame->linesize[2];
// Fast path: planes are packed contiguously with stride == width
if (yStride == width && uStride == width / 2 && vStride == width / 2 &&
frame->data[1] == frame->data[0] + width * height &&
frame->data[2] == frame->data[1] + (width / 2) * (height / 2)) {
// Contiguous I420 — wrap directly, zero copy
cv::Mat yuv(height * 3 / 2, width, CV_8UC1, frame->data[0]);
cv::Mat bgrImage;
cv::cvtColor(yuv, bgrImage, cv::COLOR_YUV2BGR_I420);
if (m_nImageQuality == 1) {
bgrImage.convertTo(bgrImage, -1, 255.0 / 219.0, -16.0 * 255.0 / 219.0);
}
return bgrImage;
}
// Slow path: planes have padding (linesize > width) — copy to contiguous buffer
const int uvWidth = width / 2;
const int uvHeight = height / 2;
const int totalSize = width * height + uvWidth * uvHeight * 2;
cv::Mat yuv(height * 3 / 2, width, CV_8UC1);
uint8_t* dst = yuv.data;
// Copy Y plane (line by line if stride != width)
if (yStride == width) {
std::memcpy(dst, frame->data[0], width * height);
} else {
for (int row = 0; row < height; ++row) {
std::memcpy(dst + row * width, frame->data[0] + row * yStride, width);
}
}
dst += width * height;
// Copy U plane
if (uStride == uvWidth) {
std::memcpy(dst, frame->data[1], uvWidth * uvHeight);
} else {
for (int row = 0; row < uvHeight; ++row) {
std::memcpy(dst + row * uvWidth, frame->data[1] + row * uStride, uvWidth);
}
}
dst += uvWidth * uvHeight;
// Copy V plane
if (vStride == uvWidth) {
std::memcpy(dst, frame->data[2], uvWidth * uvHeight);
} else {
for (int row = 0; row < uvHeight; ++row) {
std::memcpy(dst + row * uvWidth, frame->data[2] + row * vStride, uvWidth);
}
}
cv::Mat bgrImage;
cv::cvtColor(yuv, bgrImage, cv::COLOR_YUV2BGR_I420);
if (m_nImageQuality == 1) {
bgrImage.convertTo(bgrImage, -1, 255.0 / 219.0, -16.0 * 255.0 / 219.0);
}
return bgrImage;
}
catch (const std::exception& e) {
std::cerr << "Exception in avframeYUV420PToCvMat: " << e.what() << std::endl;
return cv::Mat();
}
}
cv::Mat CVideoPlayer::avframeToCVMat(const AVFrame* pFrame) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
@@ -1287,8 +1371,9 @@ cv::Mat CVideoPlayer::avframeToCVMat(const AVFrame* pFrame) {
switch (pFrame->format) {
case AV_PIX_FMT_NV12:
return avframeNV12ToCvMat(pFrame);
case AV_PIX_FMT_YUV420P:
case AV_PIX_FMT_YUVJ420P:
return avframeAnyToCvmat(pFrame);
return avframeYUV420PToCvMat(pFrame);
default:
return avframeAnyToCvmat(pFrame);
@@ -1305,7 +1390,7 @@ CVideoPlayer::CVideoPlayer() :
, m_bAudioInited(FALSE)
, m_bPlaying(FALSE)
, m_bPaused(FALSE)
, m_nHWDecoding(HW_DECODING_AUTO)//(HW_DECODING_AUTO)// HW_DECODING_D3D11 //HW_DECODING_DISABLE
, m_nHWDecoding(HW_DECODING_DISABLE)// Software decode by default — saves VRAM (no NVDEC DPB surfaces)
, m_bUpdown(FALSE)
, m_bSnapshot(FALSE)
, m_nSnapVideoFmt(AV_PIX_FMT_YUVJ420P)
@@ -1740,6 +1825,13 @@ void CVideoPlayer::StopVideoDecoder() {
// Flush decoder to drain and discard any buffered frames,
// so stale reference frames don't corrupt the next session
decoder->flush();
// Free NVDEC decoder context and all GPU surfaces (DPB buffers).
// Stopped cameras should not hold VRAM — with 100 cameras created
// but only 5 running, the 95 idle decoders would consume ~5-10 GB.
// The decoder will be re-initialized automatically when the next
// video packet arrives after Start() is called.
decoder->uninit();
m_bVideoInited = FALSE;
}
// Clear queue but KEEP m_currentImage and m_lastJpegImage —
// getImage()/getJpegImage() will return the last good frame while decoder stabilizes
@@ -1842,6 +1934,13 @@ void CVideoPlayer::setTargetFPS(double intervalMs)
m_targetIntervalMs = intervalMs;
m_targetFPSInitialized = false; // reset timing on change
}
double CVideoPlayer::getLastFrameAgeMs()
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (!m_lastDecoderFrameTimeSet) return 0.0;
auto now = std::chrono::steady_clock::now();
return std::chrono::duration<double, std::milli>(now - m_lastDecoderFrameTime).count();
}
void CVideoPlayer::playVideo(uint8* data, int len, uint32 ts, uint16 seq)
{
if (m_bRecording)
@@ -2061,6 +2160,11 @@ void CVideoPlayer::onVideoFrame(AVFrame* frame)
}
}
// Record wall-clock time of every decoded frame (even rate-limited ones).
// Used by getLastFrameAgeMs() to detect truly stale cameras.
m_lastDecoderFrameTime = std::chrono::steady_clock::now();
m_lastDecoderFrameTimeSet = true;
// --- Frame rate limiting ---
// Skip post-decode processing (clone, queue push, CUDA clone) if not enough
// time has elapsed since the last processed frame. The decode itself still

View File

@@ -148,6 +148,7 @@ public:
// Image quality mode: 0=fast (OpenCV BT.601, ~2ms), 1=quality (sws BT.709+range, ~12ms)
virtual void setImageQuality(int mode) { m_nImageQuality = mode; }
void setTargetFPS(double intervalMs); // Set minimum interval between processed frames in ms (0 = no limit, 100 = ~10 FPS)
double getLastFrameAgeMs(); // Milliseconds since last frame arrived from decoder (0 if no frame yet)
virtual void setRtpMulticast(BOOL flag) {}
virtual void setRtpOverUdp(BOOL flag) {}
@@ -223,6 +224,7 @@ protected:
cv::Mat avframeAnyToCvmat(const AVFrame* frame);
cv::Mat avframeNV12ToCvMat(const AVFrame* frame);
cv::Mat avframeYUV420PToCvMat(const AVFrame* frame); // YUV420P/YUVJ420P → BGR (OpenCV, no sws_scale)
cv::Mat avframeYUVJ420PToCvmat(const AVFrame* frame);
cv::Mat avframeToCVMat(const AVFrame* frame);
@@ -273,6 +275,12 @@ protected:
std::chrono::steady_clock::time_point m_lastProcessedTime; // timestamp of last processed frame
bool m_targetFPSInitialized = false; // first-frame flag
// Wall-clock timestamp of last frame received from the decoder (NOT from getImage).
// Updated in onVideoFrame() for EVERY decoded frame, even rate-limited ones.
// Used by LabVIEW to detect truly stale cameras vs rate-limited ones.
std::chrono::steady_clock::time_point m_lastDecoderFrameTime;
bool m_lastDecoderFrameTimeSet = false;
BOOL m_bPlaying;
BOOL m_bPaused;