Files
ANSCORE/MediaClient/media/video_player.cpp
Tuan Nghia Nguyen 9f0a10a4c8 Improve ANSCV with sotfware decoder:
Thread-local staging Mat (video_player.cpp:1400-1407) — single biggest win. Eliminates the 12 MB per-call malloc/free cycle.
Contiguous get_buffer2 allocator (video_decoder.cpp:35-102) — keeps the 3 bulk memcpys cache-friendly. Would also enable FAST/zero-copy for resolutions where visible_h % 64 == 0.
SW-decoder thread config (video_decoder.cpp:528-540) — thread_count=0, thread_type=FRAME|SLICE. FRAME is downgraded to SLICE-only by AV_CODEC_FLAG_LOW_DELAY, but decode throughput is sufficient for your input rate.
SetTargetFPS(100) delivery throttle (already there) — caps onVideoFrame post-decode work at 10 FPS. Keeps the caller path warm-cached.
Instrumentation — [MEDIA_DecInit] / [MEDIA_Convert] / [MEDIA_SWDec] / [MEDIA_Timing] / [MEDIA_JpegTiming] — always-on regression detector, zero cost when ANSCORE_DEBUGVIEW=OFF.
2026-04-20 12:18:43 +10:00

2601 lines
78 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include "sys_inc.h"
#include "media_util.h"
#include "media_parse.h"
#include "media_codec.h"
#include "h264.h"
#include "h265.h"
#include "video_player.h"
extern "C"
{
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libswresample/swresample.h>
#include <libavutil/intreadwrite.h>
#include <libavutil/avstring.h>
#include <libavutil/base64.h>
#include <libavutil/imgutils.h>
}
#if __WINDOWS_OS__
#include "audio_play_win.h"
#elif defined(IOS)
#include "video_render_sdl.h"
#include "audio_play_mac.h"
#elif __LINUX_OS__
#include "video_render_sdl.h"
#include "audio_play_qt.h"
#endif
#include <string>
#include <vector>
#include <chrono>
#include <atomic>
#include <libswscale/swscale.h>
#if defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64)
#include <emmintrin.h>
#define HAS_SSE2 1
#endif
#include "ANSLicense.h" // ANS_DBG macro (gated by ANSCORE_DEBUGVIEW)
// libyuv: SIMD-accelerated YUV↔RGB conversion with native strided-plane input.
// Replaces the memcpy-into-staging + cv::cvtColor(COLOR_YUV2BGR_I420) chain
// in avframeYUV420PToCvMat with a direct I420→RGB24 (== OpenCV BGR memory
// order) call. When the submodule isn't checked out, ANSCORE_HAS_LIBYUV is
// not defined and we fall back to the pre-libyuv path.
#if defined(ANSCORE_HAS_LIBYUV) && ANSCORE_HAS_LIBYUV
#include "libyuv/convert_argb.h" // libyuv::I420ToRGB24
#endif
void VideoDecoderCallback(AVFrame* frame, void* userdata)
{
CVideoPlayer* pPlayer = (CVideoPlayer*)userdata;
pPlayer->onVideoFrame(frame);
}
void AudioDecoderCallback(AVFrame* frame, void* userdata)
{
CVideoPlayer* pPlayer = (CVideoPlayer*)userdata;
pPlayer->onAudioFrame(frame);
}
void CVideoPlayer::setBbox(cv::Rect bbox) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
m_Bbox = bbox;
}
void CVideoPlayer::setCrop(bool crop) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
m_bCrop = crop;
}
AVFrame* CVideoPlayer::cropFrame(const AVFrame* srcFrame, cv::Rect bBox, bool cropFlag) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
// Validate prerequisites
if (!cropFlag || !srcFrame || !m_bPlaying) {
return nullptr;
}
// Ensure the bounding box is within the source frame's boundaries
bBox.x = std::clamp(bBox.x, 0, srcFrame->width);
bBox.y = std::clamp(bBox.y, 0, srcFrame->height);
bBox.width = std::clamp(bBox.width, 0, srcFrame->width - bBox.x);
bBox.height = std::clamp(bBox.height, 0, srcFrame->height - bBox.y);
// Validate the bounding box dimensions
if (bBox.width <= 10 || bBox.height <= 10) {
std::cerr << "Invalid bounding box dimensions for cropping." << std::endl;
return nullptr;
}
// Allocate memory for the cropped frame
AVFrame* croppedFrame = av_frame_alloc();
if (!croppedFrame) {
std::cerr << "Failed to allocate memory for the cropped frame." << std::endl;
return nullptr;
}
// Set cropped frame attributes
croppedFrame->format = srcFrame->format;
croppedFrame->width = bBox.width;
croppedFrame->height = bBox.height;
// *** REMOVED: Don't allocate buffer since we're only setting pointers ***
// The cropFrameData() function will set pointers to the original frame's data
// Crop the frame based on its format
if (!cropFrameData(srcFrame, croppedFrame, bBox)) {
av_frame_free(&croppedFrame);
return nullptr;
}
return croppedFrame;
}
catch (const std::exception& e) {
std::cerr << "Exception in CVideoPlayer::cropFrame: " << e.what() << std::endl;
return nullptr;
}
}
// Helper function to crop frame data
bool CVideoPlayer::cropFrameData(const AVFrame* srcFrame, AVFrame* croppedFrame, const cv::Rect& bBox) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
switch (srcFrame->format) {
case AV_PIX_FMT_YUVJ444P:
case AV_PIX_FMT_YUV444P:
// Full chroma resolution (No subsampling)
cropPlane(srcFrame, croppedFrame, 0, bBox.x, bBox.y, 1, 1); // Y plane
cropPlane(srcFrame, croppedFrame, 1, bBox.x, bBox.y, 1, 1); // U plane
cropPlane(srcFrame, croppedFrame, 2, bBox.x, bBox.y, 1, 1); // V plane
break;
case AV_PIX_FMT_YUVJ422P:
case AV_PIX_FMT_YUV422P:
// Horizontal chroma subsampling (chroma resolution is half in X direction)
cropPlane(srcFrame, croppedFrame, 0, bBox.x, bBox.y, 1, 1); // Y plane
cropPlane(srcFrame, croppedFrame, 1, bBox.x / 2, bBox.y, 1, 1); // U plane
cropPlane(srcFrame, croppedFrame, 2, bBox.x / 2, bBox.y, 1, 1); // V plane
break;
case AV_PIX_FMT_YUVJ420P:
case AV_PIX_FMT_YUV420P:
// Both horizontal and vertical chroma subsampling (chroma is 1/4 of Y resolution)
cropPlane(srcFrame, croppedFrame, 0, bBox.x, bBox.y, 1, 1); // Y plane
cropPlane(srcFrame, croppedFrame, 1, bBox.x / 2, bBox.y / 2, 1, 1); // U plane
cropPlane(srcFrame, croppedFrame, 2, bBox.x / 2, bBox.y / 2, 1, 1); // V plane
break;
case AV_PIX_FMT_NV12:
// NV12 has a **single interleaved UV plane**
cropPlane(srcFrame, croppedFrame, 0, bBox.x, bBox.y, 1, 1); // Y plane
cropPlane(srcFrame, croppedFrame, 1, bBox.x / 2, bBox.y / 2, 2, 1); // UV plane (interleaved, stepX=2)
break;
default:
std::cerr << "Unsupported pixel format: " << av_get_pix_fmt_name((AVPixelFormat)srcFrame->format) << std::endl;
return false;
}
return true;
}
catch (const std::exception& e) {
std::cerr << "Exception in cropFrameData: " << e.what() << std::endl;
return false;
}
}
// Helper function to crop individual planes
void CVideoPlayer::cropPlane(const AVFrame* srcFrame, AVFrame* croppedFrame, int planeIndex, int offsetX, int offsetY, int subsampleX, int subsampleY) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
croppedFrame->data[planeIndex] = srcFrame->data[planeIndex]
+ offsetY * srcFrame->linesize[planeIndex]
+ offsetX * subsampleX;
croppedFrame->linesize[planeIndex] = srcFrame->linesize[planeIndex];
}
catch (const std::exception& e) {
std::cerr << "Exception in cropPlane: " << e.what() << std::endl;
}
}
// Convert NV12 AVFrame to YUVJ420P
AVFrame* CVideoPlayer::convertNV12ToYUVJ420P(const AVFrame* nv12Frame) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
AVFrame* yuvjFrame = nullptr;
try {
if (!nv12Frame || !nv12Frame->data[0] || nv12Frame->width <= 10 || nv12Frame->height <= 10) {
std::cerr << "Invalid or empty NV12 frame data, or invalid dimensions." << std::endl;
return nullptr;
}
int width = nv12Frame->width;
int height = nv12Frame->height;
// ✅ Allocate new YUVJ420P frame
yuvjFrame = av_frame_alloc();
if (!yuvjFrame) {
std::cerr << "Failed to allocate YUVJ420P frame" << std::endl;
return nullptr;
}
yuvjFrame->format = AV_PIX_FMT_YUVJ420P;
yuvjFrame->width = width;
yuvjFrame->height = height;
// ✅ Allocate buffer for YUVJ420P frame
if (av_frame_get_buffer(yuvjFrame, 32) < 0) {
std::cerr << "Failed to allocate buffer for YUVJ420P" << std::endl;
av_frame_free(&yuvjFrame);
return nullptr;
}
// ✅ Copy Y plane (Luma) row by row (prevents memory corruption)
for (int j = 0; j < height; ++j) {
memcpy(yuvjFrame->data[0] + j * yuvjFrame->linesize[0],
nv12Frame->data[0] + j * nv12Frame->linesize[0], width);
}
// ✅ Correctly extract UV planes from interleaved NV12
uint8_t* nv12_uv = nv12Frame->data[1];
uint8_t* yuvj_u = yuvjFrame->data[1];
uint8_t* yuvj_v = yuvjFrame->data[2];
int uvWidth = width / 2;
int uvHeight = height / 2;
for (int j = 0; j < uvHeight; ++j) {
uint8_t* nv12Row = nv12_uv + j * nv12Frame->linesize[1];
uint8_t* uRow = yuvj_u + j * yuvjFrame->linesize[1];
uint8_t* vRow = yuvj_v + j * yuvjFrame->linesize[2];
for (int i = 0; i < uvWidth; ++i) {
uRow[i] = nv12Row[i * 2]; // Extract U
vRow[i] = nv12Row[i * 2 + 1]; // Extract V
}
}
return yuvjFrame;
}
catch (const std::exception& e) {
std::cerr << "Exception in convertNV12ToYUVJ420P: " << e.what() << std::endl;
// ✅ Prevent Memory Leak by Freeing the Allocated Frame
if (yuvjFrame) {
av_frame_free(&yuvjFrame);
}
return nullptr;
}
}
std::string CVideoPlayer::avframeYUVJ420PToJpegStringUsingFFMpeg(const AVFrame* pFrame) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
if (!m_bPlaying) {
return "";
}
if (!pFrame || !pFrame->data[0] || pFrame->width <= 10 || pFrame->height <= 10) {
std::cerr << "Invalid or empty frame data, or invalid dimensions." << std::endl;
return "";
}
AVCodec* jpegCodec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
if (!jpegCodec) {
std::cerr << "Failed to find MJPEG encoder." << std::endl;
return "";
}
AVCodecContext* jpegContext = avcodec_alloc_context3(jpegCodec);
if (!jpegContext) {
std::cerr << "Failed to allocate codec context." << std::endl;
return "";
}
int imageSize = std::max(pFrame->width, pFrame->height);
AVPixelFormat pixFmt = AV_PIX_FMT_YUVJ420P;// Fix to use YUVJ420P for all resolutions
jpegContext->pix_fmt = pixFmt;
jpegContext->time_base.num = 1;
jpegContext->time_base.den = 30;
jpegContext->compression_level = 10;
jpegContext->flags |= AV_CODEC_FLAG_QSCALE; // Enable quality scale
jpegContext->global_quality = 90 * FF_QP2LAMBDA; // Adjust quality (90 is near lossless)
AVFrame* convertedFrame = nullptr;
AVPacket packet;
av_init_packet(&packet);
packet.data = nullptr;
packet.size = 0;
bool isSuccess = false;
std::string jpegData;
// Determine if conversion is needed based on the pixel format
if ((pFrame->format == AV_PIX_FMT_YUVJ420P) ||
(pFrame->format == AV_PIX_FMT_YUV420P))
{
jpegContext->width = pFrame->width;
jpegContext->height = pFrame->height;
if (avcodec_open2(jpegContext, jpegCodec, NULL) >= 0) {
if (avcodec_send_frame(jpegContext, pFrame) >= 0) {
if (avcodec_receive_packet(jpegContext, &packet) >= 0) {
jpegData.assign(reinterpret_cast<char*>(packet.data), packet.size);
m_Width = pFrame->width;
m_Height = pFrame->height;
m_pts = m_pts + 1;
isSuccess = true;
}
}
}
}
else {
// Conversion is needed to AV_PIX_FMT_YUVJ420P
initSwsContext(pFrame->width, pFrame->height, static_cast<AVPixelFormat>(pFrame->format));
convertedFrame = av_frame_alloc();
if (convertedFrame) {
convertedFrame->format = pixFmt;
convertedFrame->width = pFrame->width;
convertedFrame->height = pFrame->height;
convertedFrame->color_range = AVCOL_RANGE_JPEG;
if (av_frame_get_buffer(convertedFrame, 32) >= 0) {
sws_scale(swsCtx, pFrame->data, pFrame->linesize, 0, pFrame->height,
convertedFrame->data, convertedFrame->linesize);
jpegContext->width = convertedFrame->width;
jpegContext->height = convertedFrame->height;
if (avcodec_open2(jpegContext, jpegCodec, NULL) >= 0) {
if (avcodec_send_frame(jpegContext, convertedFrame) >= 0) {
if (avcodec_receive_packet(jpegContext, &packet) >= 0) {
// Successfully encoded to JPEG
jpegData.assign(reinterpret_cast<char*>(packet.data), packet.size);
m_Width = convertedFrame->width;
m_Height = convertedFrame->height;
m_pts = m_pts + 1;
isSuccess = true;
}
}
}
}
}
av_frame_free(&convertedFrame); // Free the converted frame if allocated
}
// Cleanup
av_packet_unref(&packet); // Free the packet data
avcodec_free_context(&jpegContext); // Free the codec context
// Return the JPEG data as a string if successful, otherwise an empty string
return isSuccess ? jpegData : "";
}
catch (const std::exception& e) {
std::cerr << "Exception in avframeToJpegString: " << e.what() << std::endl;
return ""; // Return empty string on error
}
}
std::string CVideoPlayer::avframeYUVJ420PToJpegStringUsingTurboJPEG(const AVFrame* pFrame) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
if (!m_bPlaying || !pFrame || !pFrame->data[0] || pFrame->width <= 10 || pFrame->height <= 10) {
return "";
}
// Ensure TurboJPEG instance is valid
if (!_tjInstance) {
return "";
}
unsigned char* yuvPlanes[3] = { pFrame->data[0], pFrame->data[1], pFrame->data[2] };
int strides[3] = { pFrame->linesize[0], pFrame->linesize[1], pFrame->linesize[2] };
int width = pFrame->width;
int height = pFrame->height;
constexpr int subsampling = TJSAMP_420;
constexpr int quality = 85;
// Use thread-local buffers to avoid malloc/free overhead
static thread_local std::vector<unsigned char> jpegBuffer;
static thread_local std::vector<unsigned char> yuvBuffer;
// Estimate required buffer sizes
unsigned long jpegBufferSize = tjBufSize(width, height, subsampling);
unsigned long yuvBufferSize = tjBufSizeYUV(width, height, subsampling);
// Resize buffers only if necessary
if (jpegBuffer.size() < jpegBufferSize) {
jpegBuffer.resize(jpegBufferSize);
}
if (yuvBuffer.size() < yuvBufferSize) {
yuvBuffer.resize(yuvBufferSize);
}
// Pointers for JPEG output
unsigned char* jpegDataPtr = jpegBuffer.data();
unsigned long jpegSize = 0;
// Convert YUV to JPEG using TurboJPEG
int ret = tjCompressFromYUVPlanes(
_tjInstance,
(const unsigned char**)yuvPlanes,
width,
strides,
height,
subsampling,
&jpegDataPtr, // Using preallocated buffer
&jpegSize,
quality,
TJFLAG_FASTDCT | TJFLAG_FASTUPSAMPLE
);
// Check if TurboJPEG reallocated the buffer
if (ret < 0) {
return "";
}
// If TurboJPEG allocated a new buffer, we must free it
if (jpegDataPtr != jpegBuffer.data()) {
std::string jpegString(reinterpret_cast<char*>(jpegDataPtr), jpegSize);
tjFree(jpegDataPtr); // Free the buffer allocated by TurboJPEG
return jpegString;
}
// Convert to std::string (without extra allocations)
return std::string(reinterpret_cast<char*>(jpegDataPtr), jpegSize);
}
catch (const std::exception& e) {
std::cerr << "Exception in avframeToJpegString: " << e.what() << std::endl;
return ""; // Return empty string on error
}
}
std::string CVideoPlayer::encodeYUVJ420PToJPEG(AVFrame* frame, int quality) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
if (!frame || frame->format != AV_PIX_FMT_YUVJ420P) {
std::cerr << "Invalid frame format (must be YUVJ420P)" << std::endl;
return "";
}
// Find MJPEG encoder
AVCodec* codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
if (!codec) {
std::cerr << "JPEG encoder not found" << std::endl;
return "";
}
// Allocate codec context
AVCodecContext* codecCtx = avcodec_alloc_context3(codec);
if (!codecCtx) {
std::cerr << "Failed to allocate codec context" << std::endl;
return "";
}
// Set encoding parameters
codecCtx->pix_fmt = AV_PIX_FMT_YUVJ420P; // Use full-range YUV for better quality
codecCtx->width = frame->width;
codecCtx->height = frame->height;
codecCtx->time_base.num = 1;
codecCtx->time_base.den = 30;
codecCtx->gop_size = 1;
codecCtx->max_b_frames = 0;
codecCtx->compression_level = 10; // Increase quality
codecCtx->flags |= AV_CODEC_FLAG_QSCALE; // Enable quality scale
codecCtx->global_quality = quality * FF_QP2LAMBDA; // Adjust quality (90 is near lossless)
// Enable optimal Huffman tables
AVDictionary* opts = nullptr;
av_dict_set(&opts, "huffman", "optimal", 0);
// Open codec
if (avcodec_open2(codecCtx, codec, &opts) < 0) {
std::cerr << "Failed to open JPEG encoder" << std::endl;
avcodec_free_context(&codecCtx);
return "";
}
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = nullptr;
pkt.size = 0;
// Send frame to encoder
if (avcodec_send_frame(codecCtx, frame) < 0) {
std::cerr << "Failed to send frame for encoding" << std::endl;
avcodec_free_context(&codecCtx);
return "";
}
// Receive encoded packet
if (avcodec_receive_packet(codecCtx, &pkt) < 0) {
std::cerr << "Failed to receive encoded packet" << std::endl;
avcodec_free_context(&codecCtx);
return "";
}
// Convert to string and clean up
std::string jpegString(reinterpret_cast<char*>(pkt.data), pkt.size);
av_packet_unref(&pkt);
avcodec_free_context(&codecCtx);
av_dict_free(&opts);
return jpegString;
}
catch (const std::exception& e) {
std::cerr << "Exception in encodeYUVJ420PToJPEG: " << e.what() << std::endl;
return ""; // Return empty string on error
}
}
std::string CVideoPlayer::avframeYUVJ420PToJpegString(const AVFrame* spFrame) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
AVFrame* croppedFrame = nullptr;
AVFrame* convertedFrame = nullptr;
AVFrame* convertedNV12Frame = nullptr;
AVFrame* pFrame = const_cast<AVFrame*>(spFrame); // Default to original frame
bool isSuccess = false;
std::string jpegData;
try {
if (!m_bPlaying) {
return "";
}
if (!spFrame || !spFrame->data[0] || spFrame->width <= 10 || spFrame->height <= 10) {
std::cerr << "Invalid or empty frame data, or invalid dimensions." << std::endl;
return "";
}
// ✅ Convert NV12 to YUVJ420P if needed
if (pFrame->format == AV_PIX_FMT_NV12) {
convertedNV12Frame = convertNV12ToYUVJ420P(spFrame);
if (convertedNV12Frame) {
pFrame = convertedNV12Frame; // Use the converted frame
}
}
// ✅ Process the frame if it's already in YUVJ420P or YUV420P
if ((pFrame->format == AV_PIX_FMT_YUVJ420P) || (pFrame->format == AV_PIX_FMT_YUV420P)) {
croppedFrame = cropFrame(pFrame, m_Bbox, m_bCrop);
if (!croppedFrame) {
croppedFrame = pFrame; // Use original frame if cropping failed
}
// TurboJPEG handles all resolutions efficiently
jpegData = avframeYUVJ420PToJpegStringUsingTurboJPEG(croppedFrame);
if (!jpegData.empty()) {
m_Width = croppedFrame->width;
m_Height = croppedFrame->height;
m_pts = m_pts + 1;
isSuccess = true;
}
// ✅ Free cropped frame if allocated
if (croppedFrame != pFrame) {
av_frame_free(&croppedFrame);
croppedFrame = nullptr;
}
}
else { // ✅ Convert non-YUVJ420P frames
initSwsContext(pFrame->width, pFrame->height, static_cast<AVPixelFormat>(pFrame->format));
convertedFrame = av_frame_alloc();
if (convertedFrame) {
convertedFrame->format = AV_PIX_FMT_YUVJ420P;
convertedFrame->width = pFrame->width;
convertedFrame->height = pFrame->height;
convertedFrame->color_range = AVCOL_RANGE_JPEG;
if (av_frame_get_buffer(convertedFrame, 32) >= 0) {
sws_scale(swsCtx, pFrame->data, pFrame->linesize, 0, pFrame->height,
convertedFrame->data, convertedFrame->linesize);
croppedFrame = cropFrame(convertedFrame, m_Bbox, m_bCrop);
if (!croppedFrame) {
croppedFrame = convertedFrame; // Use converted frame if cropping failed
}
// TurboJPEG handles all resolutions efficiently
jpegData = avframeYUVJ420PToJpegStringUsingTurboJPEG(croppedFrame);
if (!jpegData.empty()) {
m_Width = croppedFrame->width;
m_Height = croppedFrame->height;
m_pts = m_pts + 1;
isSuccess = true;
}
// ✅ Free cropped frame if allocated
if (croppedFrame != convertedFrame) {
av_frame_free(&croppedFrame);
croppedFrame = nullptr;
}
}
}
// ✅ Free converted frame if allocated
if (convertedFrame) {
av_frame_free(&convertedFrame);
convertedFrame = nullptr;
}
}
// ✅ Free the NV12 converted frame if used
if (convertedNV12Frame) {
av_frame_free(&convertedNV12Frame);
convertedNV12Frame = nullptr;
}
return isSuccess ? jpegData : "";
}
catch (const std::exception& e) {
std::cerr << "Exception in avframeYUVJ420PToJpegString: " << e.what() << std::endl;
// ✅ Ensure all allocated frames are freed in case of an exception
if (croppedFrame && croppedFrame != pFrame && croppedFrame != convertedFrame) {
av_frame_free(&croppedFrame);
}
if (convertedFrame) {
av_frame_free(&convertedFrame);
}
if (convertedNV12Frame) {
av_frame_free(&convertedNV12Frame);
}
return "";
}
}
// Direct conversion of AVFrame to JPEG using TurboJPEG
std::string CVideoPlayer::encodeNV12ToJPEG_TurboJPEG(const AVFrame* pFrame, int quality) {
// NOTE: caller (avframeToJpegString) already holds _mutex — no lock needed here
try {
if (!m_bPlaying || !pFrame || !pFrame->data[0] || pFrame->width <= 10 || pFrame->height <= 10) {
return "";
}
// Ensure TurboJPEG instance is valid
if (!_tjInstance) {
std::cerr << "TurboJPEG instance is not initialized." << std::endl;
return "";
}
// Ensure the frame format is NV12
if (pFrame->format != AV_PIX_FMT_NV12) {
std::cerr << "Unsupported format! Expected NV12, got: "
<< av_get_pix_fmt_name((AVPixelFormat)pFrame->format) << std::endl;
return "";
}
int width = pFrame->width;
int height = pFrame->height;
// Use caller's quality parameter (default 90 from function signature)
// NV12 has interleaved UV, but TurboJPEG requires separate U and V planes
unsigned char* yuvPlanes[3];
int strides[3];
yuvPlanes[0] = pFrame->data[0]; // Y plane (full resolution)
strides[0] = pFrame->linesize[0];
// **Convert NV12 interleaved UV to separate U and V planes**
int uvWidth = width / 2;
int uvHeight = height / 2;
int uvSize = uvWidth * uvHeight;
static thread_local std::vector<unsigned char> uPlane(uvSize);
static thread_local std::vector<unsigned char> vPlane(uvSize);
// Deinterleave NV12 UV plane into separate U and V planes
unsigned char* uvData = pFrame->data[1];
int uvStride = pFrame->linesize[1];
for (int j = 0; j < uvHeight; j++) {
const unsigned char* uvRow = uvData + j * uvStride;
unsigned char* uRow = uPlane.data() + j * uvWidth;
unsigned char* vRow = vPlane.data() + j * uvWidth;
int i = 0;
#ifdef HAS_SSE2
// SSE2: process 16 UV pairs (32 bytes) at a time
for (; i + 15 < uvWidth; i += 16) {
__m128i uv0 = _mm_loadu_si128((__m128i*)(uvRow + i * 2));
__m128i uv1 = _mm_loadu_si128((__m128i*)(uvRow + i * 2 + 16));
// Deinterleave: even bytes = U, odd bytes = V
__m128i mask = _mm_set1_epi16(0x00FF);
__m128i u0 = _mm_and_si128(uv0, mask);
__m128i u1 = _mm_and_si128(uv1, mask);
__m128i v0 = _mm_srli_epi16(uv0, 8);
__m128i v1 = _mm_srli_epi16(uv1, 8);
__m128i uPacked = _mm_packus_epi16(u0, u1);
__m128i vPacked = _mm_packus_epi16(v0, v1);
_mm_storeu_si128((__m128i*)(uRow + i), uPacked);
_mm_storeu_si128((__m128i*)(vRow + i), vPacked);
}
#endif
// Scalar fallback for remaining pixels
for (; i < uvWidth; i++) {
uRow[i] = uvRow[i * 2];
vRow[i] = uvRow[i * 2 + 1];
}
}
// Assign separate planes to TurboJPEG input
yuvPlanes[1] = uPlane.data();
yuvPlanes[2] = vPlane.data();
strides[1] = uvWidth;
strides[2] = uvWidth;
// Use thread-local buffers to avoid malloc/free overhead
static thread_local std::vector<unsigned char> jpegBuffer;
// Estimate required buffer size for JPEG
unsigned long jpegBufferSize = tjBufSize(width, height, TJSAMP_420);
// Resize JPEG buffer only if necessary
if (jpegBuffer.size() < jpegBufferSize) {
jpegBuffer.resize(jpegBufferSize);
}
// Pointer for JPEG output
unsigned char* jpegDataPtr = jpegBuffer.data();
unsigned long jpegSize = 0;
// Convert NV12 (separated into YUV420P) to JPEG using TurboJPEG
int ret = tjCompressFromYUVPlanes(
_tjInstance,
(const unsigned char**)yuvPlanes,
width,
strides,
height,
TJSAMP_420, // Explicitly define subsampling format for NV12
&jpegDataPtr, // Preallocated buffer
&jpegSize,
quality,
TJFLAG_FASTDCT | TJFLAG_FASTUPSAMPLE
);
if (ret < 0) {
std::cerr << "TurboJPEG compression failed: " << tjGetErrorStr() << std::endl;
return "";
}
// If TurboJPEG allocated a new buffer, free it after copying
if (jpegDataPtr != jpegBuffer.data()) {
std::string jpegString(reinterpret_cast<char*>(jpegDataPtr), jpegSize);
tjFree(jpegDataPtr);
return jpegString;
}
// Convert to std::string without extra allocations
return std::string(reinterpret_cast<char*>(jpegDataPtr), jpegSize);
}
catch (const std::exception& e) {
std::cerr << "Exception in avframeNV12ToJpegStringUsingTurboJPEG: " << e.what() << std::endl;
return ""; // Return empty string on error
}
}
std::string CVideoPlayer::encodeNV12ToJPEG_FFmpeg(const AVFrame* nv12Frame, int quality) {
// NOTE: caller (avframeToJpegString) already holds _mutex — no lock needed here
AVCodecContext* codecCtx = nullptr;
AVFrame* yuvjFrame = nullptr;
AVPacket pkt;
try {
if (!m_bPlaying || !nv12Frame || !nv12Frame->data[0] || nv12Frame->width <= 10 || nv12Frame->height <= 10) {
return "";
}
if (nv12Frame->format != AV_PIX_FMT_NV12) {
std::cerr << "Invalid frame format! Expected NV12." << std::endl;
return "";
}
int width = nv12Frame->width;
int height = nv12Frame->height;
// ✅ Find and allocate MJPEG encoder
AVCodec* jpegCodec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
if (!jpegCodec) {
std::cerr << "MJPEG encoder not found!" << std::endl;
return "";
}
codecCtx = avcodec_alloc_context3(jpegCodec);
if (!codecCtx) {
std::cerr << "Failed to allocate codec context!" << std::endl;
return "";
}
// ✅ Set encoding parameters
codecCtx->pix_fmt = AV_PIX_FMT_YUVJ420P;
codecCtx->width = width;
codecCtx->height = height;
codecCtx->time_base = { 1, 25 };
codecCtx->gop_size = 1;
codecCtx->max_b_frames = 0;
codecCtx->compression_level = 10;
codecCtx->flags |= AV_CODEC_FLAG_QSCALE;
codecCtx->global_quality = quality * FF_QP2LAMBDA;
if (avcodec_open2(codecCtx, jpegCodec, nullptr) < 0) {
std::cerr << "Failed to open MJPEG encoder!" << std::endl;
avcodec_free_context(&codecCtx);
return "";
}
// ✅ Allocate YUVJ420P frame
yuvjFrame = av_frame_alloc();
if (!yuvjFrame) {
std::cerr << "Failed to allocate YUVJ420P frame!" << std::endl;
avcodec_free_context(&codecCtx);
return "";
}
yuvjFrame->format = AV_PIX_FMT_YUVJ420P;
yuvjFrame->width = width;
yuvjFrame->height = height;
if (av_frame_get_buffer(yuvjFrame, 32) < 0) {
std::cerr << "Failed to allocate buffer for YUVJ420P frame!" << std::endl;
av_frame_free(&yuvjFrame);
avcodec_free_context(&codecCtx);
return "";
}
// ✅ Copy Y plane row by row (Prevents memory corruption)
for (int j = 0; j < height; ++j) {
memcpy(yuvjFrame->data[0] + j * yuvjFrame->linesize[0],
nv12Frame->data[0] + j * nv12Frame->linesize[0], width);
}
// ✅ Correctly extract UV planes from NV12
uint8_t* nv12_uv = nv12Frame->data[1];
uint8_t* yuvj_u = yuvjFrame->data[1];
uint8_t* yuvj_v = yuvjFrame->data[2];
int uvWidth = width / 2;
int uvHeight = height / 2;
for (int j = 0; j < uvHeight; ++j) {
uint8_t* nv12Row = nv12_uv + j * nv12Frame->linesize[1];
uint8_t* uRow = yuvj_u + j * yuvjFrame->linesize[1];
uint8_t* vRow = yuvj_v + j * yuvjFrame->linesize[2];
for (int i = 0; i < uvWidth; ++i) {
uRow[i] = nv12Row[i * 2]; // Extract U
vRow[i] = nv12Row[i * 2 + 1]; // Extract V
}
}
// ✅ Encode frame to JPEG
av_init_packet(&pkt);
pkt.data = nullptr;
pkt.size = 0;
bool isSuccess = false;
std::string jpegData;
if (avcodec_send_frame(codecCtx, yuvjFrame) >= 0) {
if (avcodec_receive_packet(codecCtx, &pkt) >= 0) {
jpegData.assign(reinterpret_cast<char*>(pkt.data), pkt.size);
isSuccess = true;
}
}
// ✅ Cleanup
av_packet_unref(&pkt);
av_frame_free(&yuvjFrame);
avcodec_free_context(&codecCtx);
return isSuccess ? jpegData : "";
}
catch (const std::exception& e) {
std::cerr << "Exception in encodeNV12ToJPEG_FFmpeg: " << e.what() << std::endl;
}
// ✅ Ensure memory cleanup in case of exceptions
if (yuvjFrame) av_frame_free(&yuvjFrame);
if (codecCtx) avcodec_free_context(&codecCtx);
av_packet_unref(&pkt);
return ""; // Return empty string on error
}
std::string CVideoPlayer::avframeToJpegString(const AVFrame* spFrame) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
AVFrame* croppedFrame = nullptr;
AVFrame* convertedFrame = nullptr;
AVFrame* pFrame = const_cast<AVFrame*>(spFrame);
bool isSuccess = false;
std::string jpegData;
try {
if (!m_bPlaying) {
return "";
}
if (!spFrame || !spFrame->data[0] || spFrame->width <= 10 || spFrame->height <= 10) {
std::cerr << "Invalid or empty frame data, or invalid dimensions." << std::endl;
return "";
}
// ✅ Process NV12 frames directly
if (pFrame->format == AV_PIX_FMT_NV12) {
croppedFrame = cropFrame(pFrame, m_Bbox, m_bCrop);
if (!croppedFrame) {
croppedFrame = pFrame; // Use original frame if cropping failed
}
// TurboJPEG handles all resolutions — no need for slow FFmpeg MJPEG path
jpegData = encodeNV12ToJPEG_TurboJPEG(croppedFrame);
if (!jpegData.empty()) {
m_Width = croppedFrame->width;
m_Height = croppedFrame->height;
m_pts = m_pts + 1;
isSuccess = true;
}
// ✅ Free cropped frame if allocated
if (croppedFrame != pFrame) {
av_frame_free(&croppedFrame);
croppedFrame = nullptr;
}
}
else { // ✅ Convert other formats to NV12 before processing
initSwsContext(pFrame->width, pFrame->height, static_cast<AVPixelFormat>(pFrame->format), AV_PIX_FMT_NV12);
convertedFrame = av_frame_alloc();
if (convertedFrame) {
convertedFrame->format = AV_PIX_FMT_NV12;
convertedFrame->width = pFrame->width;
convertedFrame->height = pFrame->height;
convertedFrame->color_range = AVCOL_RANGE_JPEG;
if (av_frame_get_buffer(convertedFrame, 32) >= 0) {
sws_scale(swsCtx, pFrame->data, pFrame->linesize, 0, pFrame->height,
convertedFrame->data, convertedFrame->linesize);
croppedFrame = cropFrame(convertedFrame, m_Bbox, m_bCrop);
if (!croppedFrame) {
croppedFrame = convertedFrame; // Use converted frame if cropping failed
}
// TurboJPEG handles all resolutions
jpegData = encodeNV12ToJPEG_TurboJPEG(croppedFrame);
if (!jpegData.empty()) {
m_Width = croppedFrame->width;
m_Height = croppedFrame->height;
m_pts = m_pts + 1;
isSuccess = true;
}
// ✅ Free cropped frame if allocated
if (croppedFrame != convertedFrame) {
av_frame_free(&croppedFrame);
croppedFrame = nullptr;
}
}
}
// ✅ Free converted frame if allocated
if (convertedFrame) {
av_frame_free(&convertedFrame);
convertedFrame = nullptr;
}
}
return isSuccess ? jpegData : "";
}
catch (const std::exception& e) {
std::cerr << "Exception in avframeToJpegString: " << e.what() << std::endl;
// ✅ Cleanup memory in case of exceptions
if (croppedFrame && croppedFrame != pFrame && croppedFrame != convertedFrame) {
av_frame_free(&croppedFrame);
}
if (convertedFrame) {
av_frame_free(&convertedFrame);
}
return "";
}
}
bool CVideoPlayer::areFramesIdentical(AVFrame* frame1, AVFrame* frame2) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
if (!frame1 || !frame2) return false;
// Ensure the frames have the same width, height, and format
if (frame1->width != frame2->width || frame1->height != frame2->height || frame1->format != frame2->format) {
return false;
}
int height = frame1->height;
int width = frame1->width;
// Compare Y plane (Luma)
for (int y = 0; y < height; y++) {
if (std::memcmp(frame1->data[0] + y * frame1->linesize[0],
frame2->data[0] + y * frame2->linesize[0],
width) != 0) {
return false;
}
}
if (frame1->format == AV_PIX_FMT_NV12) {
// Compare UV plane (Interleaved)
int chromaHeight = height / 2;
for (int y = 0; y < chromaHeight; y++) {
if (std::memcmp(frame1->data[1] + y * frame1->linesize[1],
frame2->data[1] + y * frame2->linesize[1],
width) != 0) {
return false;
}
}
}
else if (frame1->format == AV_PIX_FMT_YUVJ420P) {
// Compare U and V planes separately
int chromaWidth = width / 2;
int chromaHeight = height / 2;
for (int y = 0; y < chromaHeight; y++) {
if (std::memcmp(frame1->data[1] + y * frame1->linesize[1], // U
frame2->data[1] + y * frame2->linesize[1],
chromaWidth) != 0) {
return false;
}
if (std::memcmp(frame1->data[2] + y * frame1->linesize[2], // V
frame2->data[2] + y * frame2->linesize[2],
chromaWidth) != 0) {
return false;
}
}
}
return true; // If all planes match
}
catch (const std::exception& e) {
std::cerr << "Exception in areFramesIdentical: " << e.what() << std::endl;
return false;
}
}
void CVideoPlayer::initSwsContext(int width, int height, AVPixelFormat pixFmt, AVPixelFormat outputPixFmt) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
// Validate input dimensions and pixel format
if (width <= 0 || height <= 0 || pixFmt == AV_PIX_FMT_NONE) {
std::cerr << "Invalid parameters: width=" << width
<< ", height=" << height
<< ", pixFmt=" << pixFmt << std::endl;
return;
}
// Check if reinitialization is required
bool needsReinit = (swsCtx == nullptr) ||
(width != lastWidth || height != lastHeight || pixFmt != lastPixFmt || outputPixFmt != lastOutPixFmt);
if (!needsReinit) {
// SwsContext is already up-to-date
return;
}
// Free the existing SwsContext if it exists
if (swsCtx) {
sws_freeContext(swsCtx);
swsCtx = nullptr;
}
// Determine output pixel format and scaling options based on resolution
int scalingFlags = SWS_BILINEAR; // Fast scaling — LANCZOS is too slow for real-time
// Create a new SwsContext
swsCtx = sws_getContext(width, height, pixFmt,
width, height, outputPixFmt,
scalingFlags,
nullptr, nullptr, nullptr);
// Check for errors in SwsContext creation
if (!swsCtx) {
std::cerr << "Failed to create SwsContext: width=" << width
<< ", height=" << height
<< ", inputPixFmt=" << pixFmt
<< ", outputPixFmt=" << outputPixFmt << std::endl;
return;
}
// Update last known parameters
lastWidth = width;
lastHeight = height;
lastPixFmt = pixFmt;
lastOutPixFmt = outputPixFmt;
}
catch (const std::exception& e) {
std::cerr << "Exception in initSwsContext: " << e.what() << std::endl;
}
catch (...) {
std::cerr << "Unknown exception in initSwsContext." << std::endl;
}
}
cv::Mat CVideoPlayer::avframeAnyToCvmat(const AVFrame* frame) {
std::lock_guard<std::recursive_mutex> lock(_mutex); // Protect against concurrent access
try {
if (!frame || !frame->data[0] || frame->width <= 10 || frame->height <= 10) {
std::cerr << "Invalid or empty frame data, or invalid dimensions." << std::endl;
return cv::Mat(); // Return an empty matrix if the frame is invalid
}
initSwsContext(frame->width, frame->height, static_cast<AVPixelFormat>(frame->format), AV_PIX_FMT_BGR24);
// Create OpenCV Mat to store the resulting image
cv::Mat image(frame->height, frame->width, CV_8UC3);
uint8_t* dst[1] = { image.data };
int dstStride[1] = { static_cast<int>(image.step[0]) }; // OpenCV's stride
// Perform the conversion using sws_scale
int result = sws_scale(swsCtx, frame->data, frame->linesize, 0, frame->height, dst, dstStride);
if (result < 0) {
std::cerr << "Failed to scale the frame." << std::endl;
return cv::Mat(); // Return an empty matrix if scaling fails
}
return image; // Return the successfully converted OpenCV Mat
}
catch (const std::exception& e) {
std::cerr << "Exception in avframeToCvmat: " << e.what() << std::endl;
return cv::Mat(); // Return an empty matrix on error
}
}
cv::Mat CVideoPlayer::avframeYUVJ420PToCvmat(const AVFrame* frame) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
try {
if (!frame || !frame->data[0] || frame->width <= 10 || frame->height <= 10) {
std::cerr << "Invalid or empty frame data, or invalid dimensions." << std::endl;
return cv::Mat();
}
// Create OpenCV Mat for the output image (RGB)
cv::Mat image(frame->height, frame->width, CV_8UC3); // 8-bit 3 channels for RGB image
// Pointer to Y, U, V data from AVFrame
uint8_t* yPlane = frame->data[0]; // Y plane (luminance)
uint8_t* uPlane = frame->data[1]; // U plane (chrominance)
uint8_t* vPlane = frame->data[2]; // V plane (chrominance)
int yStride = frame->linesize[0]; // Stride of Y plane
int uStride = frame->linesize[1]; // Stride of U plane
int vStride = frame->linesize[2]; // Stride of V plane
// Precompute offsets for U and V channels
int uvWidth = frame->width / 2; // U and V are subsampled (half resolution)
int uvHeight = frame->height / 2;
// Loop through each pixel and convert YUV to RGB
for (int y = 0; y < frame->height; ++y) {
for (int x = 0; x < frame->width; ++x) {
// Y, U, V values for each pixel
int yVal = yPlane[y * yStride + x];
int uVal = uPlane[(y / 2) * uStride + (x / 2)];
int vVal = vPlane[(y / 2) * vStride + (x / 2)];
// Precompute differences for speed
int uDiff = uVal - 128;
int vDiff = vVal - 128;
// Convert YUV to RGB (clamping values inline)
int r = yVal + (1.402 * vDiff);
int g = yVal - (0.344136 * uDiff) - (0.714136 * vDiff);
int b = yVal + (1.772 * uDiff);
// Clamp the values to the valid range for RGB (0-255)
r = std::clamp(r, 0, 255);
g = std::clamp(g, 0, 255);
b = std::clamp(b, 0, 255);
// Store the result in the OpenCV Mat (BGR format)
image.at<cv::Vec3b>(y, x) = cv::Vec3b(b, g, r); // OpenCV uses BGR by default
}
}
return image; // Return the converted OpenCV Mat (BGR)
}
catch (const std::exception& e) {
std::cerr << "Exception in avframeToCvmatYUVJ420P: " << e.what() << std::endl;
return cv::Mat(); // Return an empty matrix on error
}
}
// Initialize a dedicated SwsContext for NV12→BGR with correct color space
void CVideoPlayer::initNV12SwsContext(const AVFrame* frame) {
int width = frame->width;
int height = frame->height;
// Detect color space from frame metadata (BT.709 for HD/4K, BT.601 for SD)
int colorspace = SWS_CS_ITU709; // Default to BT.709 for HD/4K
if (frame->colorspace == AVCOL_SPC_BT470BG || frame->colorspace == AVCOL_SPC_SMPTE170M) {
colorspace = SWS_CS_ITU601;
}
else if (frame->colorspace == AVCOL_SPC_BT2020_NCL || frame->colorspace == AVCOL_SPC_BT2020_CL) {
colorspace = SWS_CS_BT2020;
}
else if (frame->colorspace == AVCOL_SPC_BT709) {
colorspace = SWS_CS_ITU709;
}
else if (width >= 1280 || height >= 720) {
// Auto-detect: HD and above → BT.709 (most common for IP cameras)
colorspace = SWS_CS_ITU709;
}
else {
colorspace = SWS_CS_ITU601; // SD content
}
// Detect color range: limited (16-235) vs full (0-255)
int srcRange = (frame->color_range == AVCOL_RANGE_JPEG) ? 1 : 0; // 0=limited, 1=full
int dstRange = 1; // Output always full range (0-255) for display/AI processing
// Check if reinit needed
if (m_nv12SwsCtx && width == m_nv12LastWidth && height == m_nv12LastHeight
&& colorspace == m_nv12LastColorspace && srcRange == m_nv12LastRange) {
return; // Already configured
}
// Free old context
if (m_nv12SwsCtx) {
sws_freeContext(m_nv12SwsCtx);
m_nv12SwsCtx = nullptr;
}
// Create context: NV12 → BGR24, same dimensions (no scaling)
// SWS_BILINEAR + SWS_FULL_CHR_H_INT: good quality chroma upsampling (~12ms for 4K)
// SWS_ACCURATE_RND: better rounding for color precision
// Note: SWS_LANCZOS gives VLC-matching quality but costs 50-80ms — too slow.
// VLC achieves its quality via GPU shaders, not CPU processing.
m_nv12SwsCtx = sws_getContext(width, height, AV_PIX_FMT_NV12,
width, height, AV_PIX_FMT_BGR24,
SWS_BILINEAR | SWS_ACCURATE_RND | SWS_FULL_CHR_H_INT,
nullptr, nullptr, nullptr);
if (!m_nv12SwsCtx) {
std::cerr << "Failed to create NV12 SwsContext" << std::endl;
return;
}
// Configure correct color space and range
const int* coefficients = sws_getCoefficients(colorspace);
int* inv_table; int* table;
int curSrcRange, curDstRange, brightness, contrast, saturation;
sws_getColorspaceDetails(m_nv12SwsCtx, &inv_table, &curSrcRange, &table, &curDstRange,
&brightness, &contrast, &saturation);
sws_setColorspaceDetails(m_nv12SwsCtx, coefficients, srcRange, coefficients, dstRange,
brightness, contrast, saturation);
m_nv12LastWidth = width;
m_nv12LastHeight = height;
m_nv12LastColorspace = colorspace;
m_nv12LastRange = srcRange;
}
cv::Mat CVideoPlayer::avframeNV12ToCvMat(const AVFrame* frame)
{
try {
if (!frame || frame->width <= 0 || frame->height <= 0) {
std::cerr << "Invalid frame! Either null, incorrect format, or zero dimensions." << std::endl;
return cv::Mat();
}
// Software decode handler
if (frame->format != AV_PIX_FMT_NV12) return avframeAnyToCvmat(frame);
int width = frame->width;
int height = frame->height;
// Store original NV12 dimensions for inference coordinate mapping
m_nv12OrigWidth = width;
m_nv12OrigHeight = height;
// Return full-resolution BGR image.
// No forced downscale — LabVIEW manages display resolution via SetDisplayResolution().
// If the caller needs a specific display size, SetDisplayResolution(w, h) applies
// resizing in GetImage() at the ANSRTSP/ANS*Client level after this returns.
// Store original NV12 dimensions for inference coordinate mapping
m_nv12OrigWidth = width;
m_nv12OrigHeight = height;
cv::Mat yPlane(height, width, CV_8UC1, frame->data[0], frame->linesize[0]);
cv::Mat uvPlane(height / 2, width / 2, CV_8UC2, frame->data[1], frame->linesize[1]);
cv::Mat bgrImage;
cv::cvtColorTwoPlane(yPlane, uvPlane, bgrImage, cv::COLOR_YUV2BGR_NV12);
if (m_nImageQuality == 1) {
bgrImage.convertTo(bgrImage, -1, 255.0 / 219.0, -16.0 * 255.0 / 219.0);
}
return bgrImage;
}
catch (const std::exception& e) {
std::cerr << "Exception in avframeNV12ToCvMat: " << e.what() << std::endl;
return cv::Mat();
}
}
cv::Mat CVideoPlayer::avframeYUV420PToCvMat(const AVFrame* frame) {
try {
if (!frame || frame->width <= 0 || frame->height <= 0) {
return cv::Mat();
}
const int width = frame->width;
const int height = frame->height;
// Debug: confirm this SW-decode conversion is actually hit.
// Throttled to ~1 log/sec at 30 fps to keep DebugView readable.
// Gated by ANSCORE_DEBUGVIEW — compiles to nothing in production.
{
static std::atomic<uint64_t> s_swCallCount{0};
uint64_t n = s_swCallCount.fetch_add(1, std::memory_order_relaxed);
if ((n % 30) == 0) {
const char* fmtName = av_get_pix_fmt_name((AVPixelFormat)frame->format);
const bool contig =
(frame->linesize[0] == width &&
frame->linesize[1] == width / 2 &&
frame->linesize[2] == width / 2 &&
frame->data[1] == frame->data[0] + width * height &&
frame->data[2] == frame->data[1] + (width / 2) * (height / 2));
// Report the codec's allocated Y-plane height (inferred from
// the Y/U pointer spacing and Y stride). Lets us see whether
// our custom get_buffer2 achieved alloc_h == visible_h.
const int yStrideDbg = frame->linesize[0] > 0 ? frame->linesize[0] : 1;
const int alloc_h_y = (int)((frame->data[1] - frame->data[0]) / yStrideDbg);
#if defined(ANSCORE_HAS_LIBYUV) && ANSCORE_HAS_LIBYUV
const char* pathLabel = "LIBYUV/I420ToRGB24";
#else
const char* pathLabel =
contig ? "FAST/zero-copy" :
(frame->linesize[0] == width) ? "SLOW/bulk-memcpy" :
"SLOW/per-row-copy";
#endif
(void)contig; // silence unused warning when libyuv is on
ANS_DBG("MEDIA_SWDec",
"avframeYUV420PToCvMat ENTRY call#%llu fmt=%s visible=%dx%d alloc_h_y=%d "
"linesize=[%d,%d,%d] path=%s (this=%p)",
(unsigned long long)n,
fmtName ? fmtName : "?",
width, height, alloc_h_y,
frame->linesize[0], frame->linesize[1], frame->linesize[2],
pathLabel,
(void*)this);
}
}
#if defined(ANSCORE_HAS_LIBYUV) && ANSCORE_HAS_LIBYUV
// libyuv path: direct I420 (3 strided planes) → RGB24 (== BGR in memory
// order for libyuv, matches cv::Mat CV_8UC3 default). No staging buffer,
// no memcpy, no cv::cvtColor — one SIMD-optimized sweep.
//
// libyuv's "RGB24" is B,G,R per pixel in memory (see RGB24ToARGBRow_C
// in libyuv/source/row_common.cc where src[0]=b, src[1]=g, src[2]=r).
// That matches OpenCV's BGR layout — safe to wrap in CV_8UC3.
cv::Mat bgrImage(height, width, CV_8UC3);
int ret = libyuv::I420ToRGB24(
frame->data[0], frame->linesize[0],
frame->data[1], frame->linesize[1],
frame->data[2], frame->linesize[2],
bgrImage.data, static_cast<int>(bgrImage.step),
width, height);
if (ret != 0) {
std::cerr << "libyuv::I420ToRGB24 failed with ret=" << ret << std::endl;
return cv::Mat();
}
if (m_nImageQuality == 1) {
bgrImage.convertTo(bgrImage, -1, 255.0 / 219.0, -16.0 * 255.0 / 219.0);
}
return bgrImage;
#else
// YUV420P has 3 separate planes: Y (full res), U (half), V (half).
// OpenCV's cvtColor(COLOR_YUV2BGR_I420) expects a single contiguous buffer
// with Y on top (H rows) and U,V stacked below (H/2 rows total).
// Layout: [Y: W×H] [U: W/2 × H/2] [V: W/2 × H/2]
// Total height = H * 3/2, width = W, single channel.
// If all planes are contiguous with matching strides, wrap directly
const int yStride = frame->linesize[0];
const int uStride = frame->linesize[1];
const int vStride = frame->linesize[2];
// Fast path: planes are packed contiguously with stride == width
if (yStride == width && uStride == width / 2 && vStride == width / 2 &&
frame->data[1] == frame->data[0] + width * height &&
frame->data[2] == frame->data[1] + (width / 2) * (height / 2)) {
// Contiguous I420 — wrap directly, zero copy
cv::Mat yuv(height * 3 / 2, width, CV_8UC1, frame->data[0]);
cv::Mat bgrImage;
cv::cvtColor(yuv, bgrImage, cv::COLOR_YUV2BGR_I420);
if (m_nImageQuality == 1) {
bgrImage.convertTo(bgrImage, -1, 255.0 / 219.0, -16.0 * 255.0 / 219.0);
}
return bgrImage;
}
// Slow path: planes have padding (linesize > width) OR Y/U/V live in
// non-adjacent buffers. Copy into a single I420-layout staging buffer
// so cvtColor(COLOR_YUV2BGR_I420) can process it in one SIMD sweep.
const int uvWidth = width / 2;
const int uvHeight = height / 2;
const int totalSize = width * height + uvWidth * uvHeight * 2;
// Thread-local staging Mat — reused across calls to avoid a 12 MB malloc
// on every 4K frame. Each decoder runs on its own worker thread, so
// thread_local is the right granularity (no cross-thread sharing, no
// locking). The Mat reallocates only when dimensions change.
static thread_local cv::Mat s_yuvStaging;
if (s_yuvStaging.rows != height * 3 / 2 ||
s_yuvStaging.cols != width ||
s_yuvStaging.type() != CV_8UC1 ||
!s_yuvStaging.isContinuous()) {
s_yuvStaging.create(height * 3 / 2, width, CV_8UC1);
}
cv::Mat& yuv = s_yuvStaging;
uint8_t* dst = yuv.data;
// Copy Y plane (line by line if stride != width)
if (yStride == width) {
std::memcpy(dst, frame->data[0], width * height);
} else {
for (int row = 0; row < height; ++row) {
std::memcpy(dst + row * width, frame->data[0] + row * yStride, width);
}
}
dst += width * height;
// Copy U plane
if (uStride == uvWidth) {
std::memcpy(dst, frame->data[1], uvWidth * uvHeight);
} else {
for (int row = 0; row < uvHeight; ++row) {
std::memcpy(dst + row * uvWidth, frame->data[1] + row * uStride, uvWidth);
}
}
dst += uvWidth * uvHeight;
// Copy V plane
if (vStride == uvWidth) {
std::memcpy(dst, frame->data[2], uvWidth * uvHeight);
} else {
for (int row = 0; row < uvHeight; ++row) {
std::memcpy(dst + row * uvWidth, frame->data[2] + row * vStride, uvWidth);
}
}
cv::Mat bgrImage;
cv::cvtColor(yuv, bgrImage, cv::COLOR_YUV2BGR_I420);
if (m_nImageQuality == 1) {
bgrImage.convertTo(bgrImage, -1, 255.0 / 219.0, -16.0 * 255.0 / 219.0);
}
return bgrImage;
#endif // ANSCORE_HAS_LIBYUV
}
catch (const std::exception& e) {
std::cerr << "Exception in avframeYUV420PToCvMat: " << e.what() << std::endl;
return cv::Mat();
}
}
cv::Mat CVideoPlayer::avframeToCVMat(const AVFrame* pFrame) {
// No _mutex here: caller (getImage) releases the mutex before invoking this
// so the expensive NV12/YUV420P→BGR conversion does not block onVideoFrame.
// NV12/YUV420P paths touch only the caller-owned AVFrame clone and benign
// member reads. avframeAnyToCvmat() takes its own lock for swsCtx.
try {
// 1. Validate input frame
if (!pFrame || !pFrame->data[0] || pFrame->width <= 10 || pFrame->height <= 10) {
std::cerr << "Invalid or empty frame data, or invalid dimensions." << std::endl;
return cv::Mat();
}
// One-shot diagnostic: print the pixel format the first time through so
// we can see which branch of the switch below is taken. Remove after use.
static bool s_loggedFmt = false;
if (!s_loggedFmt) {
s_loggedFmt = true;
const char* name = av_get_pix_fmt_name((AVPixelFormat)pFrame->format);
fprintf(stderr, "[avframeToCVMat] first frame format=%d (%s) %dx%d\n",
pFrame->format, name ? name : "?", pFrame->width, pFrame->height);
ANS_DBG("MEDIA_Convert",
"avframeToCVMat FIRST-FRAME fmt=%d(%s) %dx%d HWDecoding=%d (this=%p)",
pFrame->format, name ? name : "?",
pFrame->width, pFrame->height,
m_nHWDecoding, (void*)this);
}
// Per-branch throttled trace so we can see the dispatch at runtime.
// Gated by ANSCORE_DEBUGVIEW — zero overhead in production.
static std::atomic<uint64_t> s_dispatchCount{0};
const uint64_t dispN = s_dispatchCount.fetch_add(1, std::memory_order_relaxed);
const bool logThis = ((dispN % 30) == 0);
switch (pFrame->format) {
case AV_PIX_FMT_NV12:
if (logThis) {
ANS_DBG("MEDIA_Convert",
"DISPATCH call#%llu fmt=NV12 %dx%d -> avframeNV12ToCvMat (HW-decode path)",
(unsigned long long)dispN, pFrame->width, pFrame->height);
}
return avframeNV12ToCvMat(pFrame);
case AV_PIX_FMT_YUV420P:
case AV_PIX_FMT_YUVJ420P:
if (logThis) {
ANS_DBG("MEDIA_Convert",
"DISPATCH call#%llu fmt=%s %dx%d -> avframeYUV420PToCvMat (SW-decode path)",
(unsigned long long)dispN,
(pFrame->format == AV_PIX_FMT_YUVJ420P) ? "YUVJ420P" : "YUV420P",
pFrame->width, pFrame->height);
}
return avframeYUV420PToCvMat(pFrame);
default:
if (logThis) {
const char* name = av_get_pix_fmt_name((AVPixelFormat)pFrame->format);
ANS_DBG("MEDIA_Convert",
"DISPATCH call#%llu fmt=%d(%s) %dx%d -> avframeAnyToCvmat (sws_scale fallback)",
(unsigned long long)dispN,
pFrame->format, name ? name : "?",
pFrame->width, pFrame->height);
}
return avframeAnyToCvmat(pFrame);
}
}
catch (const std::exception& e) {
std::cerr << "Exception in avframeToCvMat: " << e.what() << std::endl;
return cv::Mat(); // Return an empty matrix on error
}
}
CVideoPlayer::CVideoPlayer() :
m_bVideoInited(FALSE)
, m_bAudioInited(FALSE)
, m_bPlaying(FALSE)
, m_bPaused(FALSE)
, m_nHWDecoding(HW_DECODING_DISABLE)// Software decode by default — saves VRAM (no NVDEC DPB surfaces)
, m_bUpdown(FALSE)
, m_bSnapshot(FALSE)
, m_nSnapVideoFmt(AV_PIX_FMT_YUVJ420P)
, m_nVideoCodec(VIDEO_CODEC_NONE)
, m_nAudioCodec(AUDIO_CODEC_NONE)
, m_nSampleRate(0)
, m_nChannel(0)
, m_nBitPerSample(0)
, m_pSnapFrame(NULL)
, m_bRecording(FALSE)
, m_bNalFlag(FALSE)
, m_pAviCtx(NULL)
, m_pAudioListMutex(NULL)
, m_audioPlayFlag(FALSE)
//, m_audioPlayThread(0)
, m_pVideoListMutex(NULL)
, m_videoPlayFlag(FALSE)
//, m_videoPlayThread(0)
, m_nLastAudioPts(AV_NOPTS_VALUE)
, m_lastAudioTS(0)
{
m_Bbox.x = 0;
m_Bbox.y = 0;
m_Bbox.width = 0;
m_Bbox.height = 0;
m_bCrop = false;
m_pRecordMutex = sys_os_create_mutex();
m_lastJpegImage = "";
m_jpegImage = "";
m_pts = 0;
memset(&m_h26XParamSets, 0, sizeof(H26XParamSets));
memset(&m_audioClock, 0, sizeof(HTCLOCK));
memset(&m_videoClock, 0, sizeof(HTCLOCK));
this->_tjInstance = tjInitCompress();
}
CVideoPlayer::~CVideoPlayer()
{
// Lock to ensure no other thread is mid-operation (getImage, getJpegImage, onVideoFrame)
// before we free resources. close() stops the decoder which prevents new callbacks.
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
close(); // Stop decoder first — prevents new onVideoFrame callbacks
g_frameQueue.clearQueue();
if (swsCtx != nullptr) {
sws_freeContext(swsCtx);
swsCtx = nullptr;
}
if (m_nv12SwsCtx != nullptr) {
sws_freeContext(m_nv12SwsCtx);
m_nv12SwsCtx = nullptr;
}
if (this->_tjInstance) {
tjDestroy(this->_tjInstance);
this->_tjInstance = nullptr;
}
}
// _mutex is destroyed after this block — no other thread should be accessing this object
}
BOOL CVideoPlayer::open(std::string fileName)
{
m_sFileName = fileName;
return TRUE;
}
BOOL CVideoPlayer::open(std::string _username, std::string _password, std::string _url)
{
m_acct = _username;
m_pass = _password;
m_sFileName = _url;
return TRUE;
}
AVFrame* CVideoPlayer::getNV12Frame() {
// Return a CLONE so multiple consumers (tasks sharing the same stream)
// each get their own copy. The original m_currentNV12Frame stays valid
// until the next getImage() call overwrites it.
// (Previously used ownership transfer — only the first caller got NV12,
// and the second caller fell back to BGR.)
std::lock_guard<std::recursive_mutex> lock(_mutex);
return m_currentNV12Frame ? av_frame_clone(m_currentNV12Frame) : nullptr;
}
AVFrame* CVideoPlayer::getCudaHWFrame() {
// Return a clone of the CUDA HW frame captured by onVideoFrame().
// Clone (not ownership transfer) because multiple callers may request
// the frame between onVideoFrame updates (e.g., during warmup when
// GetRTSPCVImage is called faster than the decode rate).
// extra_hw_frames=2 in the decoder provides surface pool headroom
// for the 3 concurrent clones (decoder + player + registry).
std::lock_guard<std::recursive_mutex> lock(_mutex);
return m_currentCudaHWFrame ? av_frame_clone(m_currentCudaHWFrame) : nullptr;
}
bool CVideoPlayer::isCudaHWAccel() const {
return m_pVideoDecoder && m_pVideoDecoder->isCudaHWAccel();
}
void CVideoPlayer::close()
{
closeVideo();
closeAudio();
if (m_currentNV12Frame) {
av_frame_free(&m_currentNV12Frame);
m_currentNV12Frame = nullptr;
}
if (m_currentCudaHWFrame) {
av_frame_free(&m_currentCudaHWFrame);
m_currentCudaHWFrame = nullptr;
}
if (m_pSnapFrame)
{
av_frame_free(&m_pSnapFrame);
m_pSnapFrame = nullptr;
}
stopRecord();
if (m_pRecordMutex) {
sys_os_destroy_sig_mutex(m_pRecordMutex);
m_pRecordMutex = NULL;
}
}
void CVideoPlayer::setVolume(int volume)
{
if (m_pAudioPlay)
{
m_pAudioPlay->setVolume(volume);
}
}
void CVideoPlayer::snapshot(int videofmt)
{
m_bSnapshot = TRUE;
m_nSnapVideoFmt = videofmt;
}
BOOL CVideoPlayer::record(std::string baseName)
{
if (m_bRecording)
{
return TRUE;
}
//std::string path = getRecordPath();
std::string file = baseName;// path + "/" + getTempFile(baseName, ".avi");
m_sBaseName = baseName;
m_pAviCtx = avi_write_open(file.c_str());
if (NULL == m_pAviCtx)
{
log_print(HT_LOG_ERR, "%s, avi_write_open failed. %s\r\n",
__FUNCTION__, file.c_str());
return FALSE;
}
if (!onRecord())
{
avi_write_close(m_pAviCtx);
m_pAviCtx = NULL;
return FALSE;
}
m_bRecording = TRUE;
return m_bRecording;
}
void CVideoPlayer::stopRecord()
{
sys_os_mutex_enter(m_pRecordMutex);
m_bRecording = FALSE;
m_bNalFlag = FALSE;
memset(&m_h26XParamSets, 0, sizeof(H26XParamSets));
if (m_pAviCtx)
{
avi_write_close(m_pAviCtx);
m_pAviCtx = NULL;
}
sys_os_mutex_leave(m_pRecordMutex);
}
void CVideoPlayer::recordVideo(uint8* data, int len, uint32 ts, uint16 seq)
{
int codec = VIDEO_CODEC_NONE;
if (!memcmp(m_pAviCtx->v_fcc, "H264", 4))
{
codec = VIDEO_CODEC_H264;
}
else if (!memcmp(m_pAviCtx->v_fcc, "H265", 4))
{
codec = VIDEO_CODEC_H265;
}
if ((VIDEO_CODEC_H264 == codec || VIDEO_CODEC_H265 == codec) && !m_bNalFlag)
{
if (avc_get_h26x_paramsets(data, len, codec, &m_h26XParamSets))
{
avi_write_nalu(m_pAviCtx,
m_h26XParamSets.vps, m_h26XParamSets.vps_size,
m_h26XParamSets.sps, m_h26XParamSets.sps_size,
m_h26XParamSets.pps, m_h26XParamSets.pps_size);
m_bNalFlag = 1;
}
}
recordVideoEx(data, len, ts, seq);
if (recordSwitchCheck())
{
recordFileSwitch();
}
}
void CVideoPlayer::recordVideoEx(uint8* data, int len, uint32 ts, uint16 seq)
{
AVICTX* p_avictx = m_pAviCtx;
if (p_avictx->v_width == 0 || p_avictx->v_height == 0)
{
int codec = VIDEO_CODEC_NONE;
if (memcmp(p_avictx->v_fcc, "H264", 4) == 0)
{
codec = VIDEO_CODEC_H264;
}
else if (memcmp(p_avictx->v_fcc, "H265", 4) == 0)
{
codec = VIDEO_CODEC_H265;
}
else if (memcmp(p_avictx->v_fcc, "JPEG", 4) == 0)
{
codec = VIDEO_CODEC_JPEG;
}
else if (memcmp(p_avictx->v_fcc, "MP4V", 4) == 0)
{
codec = VIDEO_CODEC_MP4;
}
avc_parse_video_size(codec, data, len, &p_avictx->v_width, &p_avictx->v_height);
if (p_avictx->v_width && p_avictx->v_height)
{
avi_update_header(p_avictx);
}
}
int key = 0;
if (memcmp(p_avictx->v_fcc, "H264", 4) == 0)
{
uint8 nalu_t = (data[4] & 0x1F);
key = (nalu_t == 5 || nalu_t == 7 || nalu_t == 8);
}
else if (memcmp(p_avictx->v_fcc, "H265", 4) == 0)
{
uint8 nalu_t = (data[4] >> 1) & 0x3F;
key = ((nalu_t >= 16 && nalu_t <= 21) || nalu_t == 32 || nalu_t == 33 || nalu_t == 34);
}
else if (memcmp(p_avictx->v_fcc, "MP4V", 4) == 0)
{
key = 1;
}
else if (memcmp(p_avictx->v_fcc, "JPEG", 4) == 0)
{
key = 1;
}
avi_write_video(p_avictx, data, len, ts, key);
}
void CVideoPlayer::recordAudio(uint8* data, int len, uint32 ts, uint16 seq)
{
AVICTX* p_avictx = m_pAviCtx;
avi_write_audio(p_avictx, data, len, ts);
if (recordSwitchCheck())
{
recordFileSwitch();
}
}
BOOL CVideoPlayer::recordSwitchCheck()
{
uint64 tlen = avi_get_file_length(m_pAviCtx);
uint32 mtime = avi_get_media_time(m_pAviCtx);
uint32 recordSize = 0;// getRecordSize();
if (recordSize == 0)
{
recordSize = 1048576; // max 1G file size
}
// Switch according to the recording size
if (tlen > recordSize * 1024)
{
return TRUE;
}
uint32 recordTime = 0;// getRecordTime();
// Switch according to the recording duration
if (recordTime > 0 && mtime > recordTime * 1000)
{
return TRUE;
}
return FALSE;
}
void CVideoPlayer::recordFileSwitch()
{
AVICTX* p_ctx;
AVICTX* p_oldctx = m_pAviCtx;
//std::string path = getRecordPath();
std::string file = m_sBaseName;// path + "/" + getTempFile(m_sBaseName, ".avi");
p_ctx = avi_write_open(file.c_str());
if (NULL == p_ctx)
{
return;
}
p_ctx->ctxf_video = p_oldctx->ctxf_video;
p_ctx->ctxf_audio = p_oldctx->ctxf_audio;
if (p_ctx->ctxf_video)
{
avi_calc_fps(p_oldctx);
avi_set_video_info(p_ctx, p_oldctx->v_fps, p_oldctx->v_width, p_oldctx->v_height, p_oldctx->v_fcc);
avi_set_video_extra_info(p_ctx, p_oldctx->v_extra, p_oldctx->v_extra_len);
}
if (p_ctx->ctxf_audio)
{
avi_set_audio_info(p_ctx, p_oldctx->a_chns, p_oldctx->a_rate, p_oldctx->a_fmt);
avi_set_audio_extra_info(p_ctx, p_oldctx->a_extra, p_oldctx->a_extra_len);
}
avi_write_close(p_oldctx);
avi_update_header(p_ctx);
m_pAviCtx = p_ctx;
if (m_h26XParamSets.vps_size > 0 ||
m_h26XParamSets.sps_size > 0 ||
m_h26XParamSets.pps_size > 0)
{
avi_write_nalu(m_pAviCtx,
m_h26XParamSets.vps, m_h26XParamSets.vps_size,
m_h26XParamSets.sps, m_h26XParamSets.sps_size,
m_h26XParamSets.pps, m_h26XParamSets.pps_size);
}
}
BOOL CVideoPlayer::openVideo(enum AVCodecID codec, uint8* extradata, int extradata_size)
{
if (m_bVideoInited)
{
return TRUE;
}
if (m_pVideoDecoder)
{
m_bVideoInited = m_pVideoDecoder->init(codec, extradata, extradata_size, m_nHWDecoding, m_nPreferredGpu);
}
if (m_bVideoInited)
{
m_pVideoDecoder->setCallback(VideoDecoderCallback, this);
m_pVideoListMutex = sys_os_create_mutex();
m_videoPlayFlag = TRUE;
//m_videoPlayThread = sys_os_create_thread((void*)VideoPlayThread, this);
}
m_nVideoCodec = to_video_codec(codec);
return m_bVideoInited;
}
BOOL CVideoPlayer::openVideo(int codec, uint8* extradata, int extradata_size)
{
return openVideo(to_video_avcodecid(codec), extradata, extradata_size);
}
void CVideoPlayer::closeVideo()
{
// Stop decoder outside the player lock to avoid the same lock-ordering
// deadlock as StopVideoDecoder() (see comment there).
CVideoDecoder* decoder = nullptr;
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
decoder = m_pVideoDecoder.get();
}
if (decoder)
{
decoder->Stop();
decoder->flush();
}
// Now clean up resources under the lock
std::lock_guard<std::recursive_mutex> lock(_mutex);
m_videoPlayFlag = FALSE;
if (m_pVideoListMutex)
{
sys_os_destroy_sig_mutex(m_pVideoListMutex);
m_pVideoListMutex = NULL;
}
if (!g_frameQueue.isEmpty())g_frameQueue.clearQueue();
m_bVideoInited = FALSE;
}
void CVideoPlayer::StartVideoDecoder() {
std::lock_guard<std::recursive_mutex> lock(_mutex);
// Clear queue but KEEP m_currentImage — it holds the last good frame
// which we'll return while the decoder stabilizes after restart
g_frameQueue.clearQueue();
m_lastFrameSeq = 0;
m_bWaitingForKeyframe = true; // Skip frames until first keyframe
m_cleanFrameCount = 0; // Reset settle counter
if (m_pVideoDecoder)
{
m_pVideoDecoder->Start();
}
}
void CVideoPlayer::StopVideoDecoder() {
// Get decoder pointer under lock, then release BEFORE calling decoder methods.
// This avoids a lock-ordering deadlock:
// Thread 1 (here): CVideoPlayer::_mutex -> CVideoDecoder::_mutex
// Thread 2 (TCP rx decode -> onVideoFrame callback): CVideoDecoder::_mutex -> CVideoPlayer::_mutex
CVideoDecoder* decoder = nullptr;
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
decoder = m_pVideoDecoder.get();
}
if (decoder)
{
decoder->Stop();
// Flush decoder to drain and discard any buffered frames,
// so stale reference frames don't corrupt the next session
decoder->flush();
// Free NVDEC decoder context and all GPU surfaces (DPB buffers).
// Stopped cameras should not hold VRAM — with 100 cameras created
// but only 5 running, the 95 idle decoders would consume ~5-10 GB.
// The decoder will be re-initialized automatically when the next
// video packet arrives after Start() is called.
decoder->uninit();
m_bVideoInited = FALSE;
}
// Clear queue but KEEP m_currentImage and m_lastJpegImage —
// getImage()/getJpegImage() will return the last good frame while decoder stabilizes
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
g_frameQueue.clearQueue();
m_lastFrameSeq = 0;
}
}
BOOL CVideoPlayer::openAudio(enum AVCodecID codec, int samplerate, int channels, int bitpersample)
{
if (m_bAudioInited)
{
return TRUE;
}
if (m_pAudioDecoder)
{
m_bAudioInited = m_pAudioDecoder->init(codec, samplerate, channels, bitpersample);
}
if (m_bAudioInited)
{
m_pAudioDecoder->setCallback(AudioDecoderCallback, this);
#if __WINDOWS_OS__
m_pAudioPlay = std::make_unique<CWAudioPlay>();/// new CWAudioPlay();
#elif defined(IOS)
m_pAudioPlay = std::make_unique<CMAudioPlay>();
#elif __LINUX_OS__
m_pAudioPlay = std::make_unique<CQAudioPlay>();
#endif
if (m_pAudioPlay)
{
m_pAudioPlay->startPlay(samplerate, channels);
}
m_pAudioListMutex = sys_os_create_mutex();
m_audioPlayFlag = FALSE;//disable by default
//m_audioPlayThread = sys_os_create_thread((void*)AudioPlayThread, this);
}
m_nAudioCodec = to_audio_codec(codec);
m_nSampleRate = samplerate;
m_nChannel = channels;
m_nBitPerSample = bitpersample;
return m_bAudioInited;
}
void CVideoPlayer::enableAudio(bool status) {
if (status)m_audioPlayFlag = TRUE;
else m_audioPlayFlag = FALSE;
}
BOOL CVideoPlayer::openAudio(int codec, int samplerate, int channels, int bitpersample)
{
return openAudio(to_audio_avcodecid(codec), samplerate, channels, bitpersample);
}
void CVideoPlayer::closeAudio()
{
m_audioPlayFlag = FALSE;
if (m_pAudioListMutex)
{
sys_os_destroy_sig_mutex(m_pAudioListMutex);
m_pAudioListMutex = NULL;
}
if (!a_frameQueue.isEmpty())a_frameQueue.clearQueue();
m_bAudioInited = FALSE;
}
int CVideoPlayer::getVideoWidth()
{
if (m_pVideoDecoder)
{
return m_pVideoDecoder->getWidth();
}
return 0;
}
int CVideoPlayer::getVideoHeight()
{
if (m_pVideoDecoder)
{
return m_pVideoDecoder->getHeight();
}
return 0;
}
double CVideoPlayer::getFrameRate()
{
if (m_pVideoDecoder)
{
return m_pVideoDecoder->getFrameRate();
}
return 0;
}
void CVideoPlayer::setTargetFPS(double intervalMs)
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
m_targetIntervalMs = intervalMs;
m_targetFPSInitialized = false; // reset timing on change
}
double CVideoPlayer::getLastFrameAgeMs()
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (!m_lastDecoderFrameTimeSet) return 0.0;
auto now = std::chrono::steady_clock::now();
return std::chrono::duration<double, std::milli>(now - m_lastDecoderFrameTime).count();
}
void CVideoPlayer::playVideo(uint8* data, int len, uint32 ts, uint16 seq)
{
if (m_bRecording)
{
sys_os_mutex_enter(m_pRecordMutex);
recordVideo(data, len, ts, seq);
sys_os_mutex_leave(m_pRecordMutex);
}
updateClock(&m_videoClock, ts, getVideoClock());
if (m_bVideoInited)
{
if (m_bPlaying) {
m_pVideoDecoder->decode(data, len, m_videoClock.SyncTime.tv_sec * 1000000 + m_videoClock.SyncTime.tv_usec);
}
}
}
void CVideoPlayer::playAudio(uint8* data, int len, uint32 ts, uint16 seq)
{
if (m_bRecording)
{
sys_os_mutex_enter(m_pRecordMutex);
recordAudio(data, len, ts, seq);
sys_os_mutex_leave(m_pRecordMutex);
}
updateClock(&m_audioClock, ts, getAudioClock());
if (m_bAudioInited)
{
m_pAudioDecoder->decode(data, len, m_audioClock.SyncTime.tv_sec * 1000000 + m_audioClock.SyncTime.tv_usec);
}
}
void CVideoPlayer::updateClock(HTCLOCK* clock, uint32 ts, int frequency)
{
if (ts == 0)
{
return;
}
if (clock->SyncTime.tv_sec == 0 && clock->SyncTime.tv_usec == 0)
{
clock->SyncTimestamp = ts;
gettimeofday(&clock->SyncTime, NULL);
}
int timestampDiff = ts - clock->SyncTimestamp;
// Divide this by the timestamp frequency to get real time:
double timeDiff = timestampDiff / (double)frequency;
uint32 const million = 1000000;
uint32 seconds, uSeconds;
if (timeDiff >= 0.0)
{
seconds = clock->SyncTime.tv_sec + (uint32)(timeDiff);
uSeconds = clock->SyncTime.tv_usec + (uint32)((timeDiff - (uint32)timeDiff) * million);
if (uSeconds >= million)
{
uSeconds -= million;
++seconds;
}
}
else
{
timeDiff = -timeDiff;
seconds = clock->SyncTime.tv_sec - (uint32)(timeDiff);
uSeconds = clock->SyncTime.tv_usec - (uint32)((timeDiff - (uint32)timeDiff) * million);
if ((int)uSeconds < 0)
{
uSeconds += million;
--seconds;
}
}
// Save these as the new synchronization timestamp & time:
clock->SyncTimestamp = ts;
clock->SyncTime.tv_sec = seconds;
clock->SyncTime.tv_usec = uSeconds;
}
BOOL CVideoPlayer::initFrame(AVFrame*& frame, int width, int height, AVPixelFormat pixfmt)
{
if (width == 0 || height == 0 || pixfmt == AV_PIX_FMT_NONE)
{
return FALSE;
}
if (NULL == frame || frame->width != width || frame->height != height || frame->format != pixfmt)
{
if (frame)
{
av_frame_free(&frame);
}
frame = av_frame_alloc();
if (NULL == frame)
{
return FALSE;
}
frame->format = pixfmt;
frame->width = width;
frame->height = height;
if (0 != av_frame_get_buffer(frame, 0))
{
av_frame_free(&frame);
return FALSE;
}
av_frame_make_writable(frame);
}
return TRUE;
}
BOOL CVideoPlayer::doSnapshot(AVFrame* frame)
{
if (m_pSnapFrame) {
av_frame_free(&m_pSnapFrame); // Free the previous snapshot frame if it exists
}
if (!initFrame(m_pSnapFrame,
frame->width,
frame->height,
to_avpixelformat(m_nSnapVideoFmt)))
{
return FALSE;
}
if (NULL == convertFrame(frame, m_pSnapFrame, FALSE))
{
return FALSE;
}
return TRUE;
}
AVFrame* CVideoPlayer::convertFrame(AVFrame* srcframe, AVFrame* dstframe, BOOL updown)
{
if (!srcframe || !dstframe) {
return NULL;
}
SwsContext* _swsctx = sws_getContext(srcframe->width,
srcframe->height,
(enum AVPixelFormat)srcframe->format,
srcframe->width,
srcframe->height,
(enum AVPixelFormat)dstframe->format,
SWS_BICUBIC, NULL, NULL, NULL);
if (!_swsctx) {
return NULL;
}
if (updown) {
srcframe->data[0] += srcframe->linesize[0] * (srcframe->height - 1);
srcframe->linesize[0] *= -1;
srcframe->data[1] += srcframe->linesize[1] * (srcframe->height / 2 - 1);
srcframe->linesize[1] *= -1;
srcframe->data[2] += srcframe->linesize[2] * (srcframe->height / 2 - 1);
srcframe->linesize[2] *= -1;
}
int ret = sws_scale(_swsctx,
srcframe->data,
srcframe->linesize, 0,
srcframe->height,
dstframe->data,
dstframe->linesize);
sws_freeContext(_swsctx); // Free context after scaling attempt
if (ret > 0) {
dstframe->pts = srcframe->pts;
dstframe->pkt_dts = srcframe->pkt_dts;
return dstframe;
}
else {
log_print(HT_LOG_ERR, "%s, sws_scale failed\r\n", __FUNCTION__);
return NULL;
}
}
void CVideoPlayer::onVideoFrame(AVFrame* frame)
{
std::lock_guard<std::recursive_mutex> lock(_mutex); // Protect against concurrent access
if (!frame) return; // Check for null pointer
if (m_bSnapshot)
{
if (doSnapshot(frame))
{
m_bSnapshot = FALSE;
}
}
if (m_bPlaying && m_videoPlayFlag) {
// Drop any frame with decode errors (corrupted reference frames, etc.)
if (frame->decode_error_flags != 0) {
fprintf(stderr, "[HWDecode] Dropping frame with decode errors (flags=0x%x)\n", frame->decode_error_flags);
return;
}
// After start/restart, skip corrupted frames until first keyframe (IDR) arrives.
// HEVC/H.264 P/B frames received before the first I-frame will produce visual
// corruption ("Could not find ref with POC", green/grey artifacts).
if (m_bWaitingForKeyframe) {
if (frame->key_frame || frame->pict_type == AV_PICTURE_TYPE_I) {
m_bWaitingForKeyframe = false;
m_cleanFrameCount = 0;
fprintf(stderr, "[HWDecode] First keyframe received, settling for %d frames\n", SETTLE_FRAME_COUNT);
} else {
return; // Drop this frame — not yet safe to decode
}
}
// Record wall-clock time of every decoded frame (even rate-limited ones).
// Used by getLastFrameAgeMs() to detect truly stale cameras.
m_lastDecoderFrameTime = std::chrono::steady_clock::now();
m_lastDecoderFrameTimeSet = true;
// --- Frame rate limiting ---
// Skip post-decode processing (clone, queue push, CUDA clone) if not enough
// time has elapsed since the last processed frame. The decode itself still
// runs for every packet to maintain the H.264/H.265 reference frame chain.
if (m_targetIntervalMs > 0.0) {
auto now = std::chrono::steady_clock::now();
if (!m_targetFPSInitialized) {
m_lastProcessedTime = now;
m_targetFPSInitialized = true;
} else {
auto elapsed = std::chrono::duration<double, std::milli>(now - m_lastProcessedTime).count();
if (elapsed < m_targetIntervalMs) {
return; // Skip this frame — too soon
}
}
m_lastProcessedTime = now;
}
// --- End frame rate limiting ---
// Push frame to queue; during settle period getImage() will ignore the queue
// and keep returning the last good cached image
g_frameQueue.pushFrame(frame); // pushFrame() clones the frame internally
// Capture CUDA HW frame for zero-copy inference.
// We're inside decode()'s lock scope (decoder._mutex held) AND onVideoFrame
// holds player._mutex — so this is the ONE place where both locks are held
// and we can safely clone the CUDA frame without deadlock risk.
// cloneCudaHWFrame_unlocked() is safe because decoder._mutex is already held.
if (m_pVideoDecoder && m_pVideoDecoder->isCudaHWAccel()) {
if (m_currentCudaHWFrame) av_frame_free(&m_currentCudaHWFrame);
m_currentCudaHWFrame = m_pVideoDecoder->cloneCudaHWFrame_unlocked();
}
// Track how many clean frames have arrived since keyframe
if (m_cleanFrameCount < SETTLE_FRAME_COUNT) {
m_cleanFrameCount++;
if (m_cleanFrameCount == SETTLE_FRAME_COUNT) {
fprintf(stderr, "[HWDecode] Settle complete, delivering new frames\n");
}
}
}
}
void CVideoPlayer::onAudioFrame(AVFrame* frame)
{
// Support for audio playback
std::lock_guard<std::recursive_mutex> lock(_mutex); // Protect against concurrent access
if (!frame) return; // Check for null pointer
if (m_bSnapshot)
{
if (doSnapshot(frame))
{
m_bSnapshot = FALSE;
}
}
if (m_bPlaying && m_audioPlayFlag) {
a_frameQueue.pushFrame(frame); // pushFrame() clones the frame internally
}
}
cv::Mat CVideoPlayer::getImage(int& width, int& height, int64_t& pts) {
try {
AVFrame* frameToProcess = nullptr;
uint64_t currentSeq = 0;
// Timing breakdown — gated by ANSCORE_DEBUGVIEW (zero overhead in production).
// t0 = entry, t1 = after pulling frame from queue, t2 = after YUV->BGR,
// t3 = after publish. Throttled to every 30 full-path calls (~1/sec @30fps).
using clk = std::chrono::steady_clock;
const auto t0 = clk::now();
// --- Phase 1: short locked section — examine state, pull latest frame ---
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (!m_bPlaying) {
width = m_currentImage.cols;
height = m_currentImage.rows;
pts = m_pts;
return m_currentImage; // Shallow copy (reference counted)
}
// While waiting for keyframe or during settle period after restart,
// return the last good cached image to avoid showing corrupted frames
if (m_bWaitingForKeyframe || m_cleanFrameCount < SETTLE_FRAME_COUNT) {
width = m_currentImage.cols;
height = m_currentImage.rows;
pts = m_pts;
return m_currentImage;
}
// Fast path: same frame as last call — skip clone + BGR conversion
currentSeq = g_frameQueue.getSequence();
if (currentSeq == m_lastFrameSeq && !m_currentImage.empty()) {
width = m_currentImage.cols;
height = m_currentImage.rows;
pts = m_pts;
return m_currentImage;
}
if (g_frameQueue.isEmpty()) {
width = m_currentImage.cols;
height = m_currentImage.rows;
pts = m_pts;
std::cerr << "No frame available in getImage()" << std::endl;
return cv::Mat();
}
// getLatestFrame() clones the AVFrame — we own it from here
frameToProcess = g_frameQueue.getLatestFrame();
if (!frameToProcess) {
width = m_currentImage.cols;
height = m_currentImage.rows;
pts = m_pts;
return cv::Mat();
}
}
// --- _mutex released here ---
// At 4K NV12, cvtColorTwoPlane takes ~100300 ms on CPU; during that
// window the decoder callback (onVideoFrame) is free to push the next
// frame and the CUDA HW capture path can run in parallel.
const auto t1 = clk::now();
cv::Mat converted;
try {
converted = avframeToCVMat(frameToProcess);
}
catch (const std::exception& e) {
std::cerr << "Exception while converting AVFrame to cv::Mat: " << e.what() << std::endl;
}
const auto t2 = clk::now();
// --- Phase 2: short locked section — publish new frame state ---
cv::Mat result; // Snapshot taken under the lock, returned after release.
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (!converted.empty()) {
m_currentImage = converted;
m_pts++;
m_lastFrameSeq = currentSeq;
}
// Preserve raw YUV/NV12 frame for GPU fast-path inference
// (NV12 from HW decode, YUV420P/YUVJ420P from SW decode)
if (frameToProcess &&
(frameToProcess->format == AV_PIX_FMT_NV12 ||
frameToProcess->format == AV_PIX_FMT_YUV420P ||
frameToProcess->format == AV_PIX_FMT_YUVJ420P)) {
if (m_currentNV12Frame) av_frame_free(&m_currentNV12Frame);
m_currentNV12Frame = av_frame_clone(frameToProcess);
}
width = m_currentImage.cols;
height = m_currentImage.rows;
pts = m_pts;
result = m_currentImage; // Shallow copy under lock — refcount keeps buffer alive
}
av_frame_free(&frameToProcess);
// Emit timing breakdown. Throttled so DebugView / stderr stay readable.
{
static std::atomic<uint64_t> s_timingCount{0};
const uint64_t n = s_timingCount.fetch_add(1, std::memory_order_relaxed);
if ((n % 30) == 0) {
const auto t3 = clk::now();
auto ms = [](clk::time_point a, clk::time_point b) {
return std::chrono::duration<double, std::milli>(b - a).count();
};
ANS_DBG("MEDIA_Timing",
"getImage call#%llu pull=%.2fms convert=%.2fms publish=%.2fms total=%.2fms "
"size=%dx%d seq=%llu (this=%p)",
(unsigned long long)n,
ms(t0, t1), ms(t1, t2), ms(t2, t3), ms(t0, t3),
width, height,
(unsigned long long)currentSeq,
(void*)this);
}
}
return result;
}
catch (const std::exception& e) {
std::cerr << "Unexpected exception in getImage(): " << e.what() << std::endl;
return cv::Mat(); // Return an empty cv::Mat if an exception occurs
}
catch (...) {
std::cerr << "Unknown exception in getImage()" << std::endl;
return cv::Mat(); // Return an empty cv::Mat if an exception occurs
}
}
std::string CVideoPlayer::getJpegImage(int& width, int& height, int64_t& pts) {
try {
// Timing breakdown — gated by ANSCORE_DEBUGVIEW (zero overhead in production).
using clk = std::chrono::steady_clock;
const auto t0 = clk::now();
// Use same _mutex as getImage() to protect shared state consistently
// recursive_mutex allows nested calls to avframeToJpegString → _mutex
std::lock_guard<std::recursive_mutex> lock(_mutex);
const auto t1 = clk::now();
// While waiting for keyframe or during settle period after restart,
// return the last good cached JPEG to avoid showing corrupted frames
if (m_bWaitingForKeyframe || m_cleanFrameCount < SETTLE_FRAME_COUNT) {
width = m_Width;
height = m_Height;
pts = m_pts;
return m_lastJpegImage; // Last good JPEG (may be empty on first-ever start)
}
AVFrame* frameToProcess = g_frameQueue.getLatestFrame(); // Get a safe copy
if (!frameToProcess) {
return m_lastJpegImage; // Return the last valid JPEG image if no frame is available
}
const auto t2 = clk::now();
const int frameFmt = frameToProcess->format;
const int frameW = frameToProcess->width;
const int frameH = frameToProcess->height;
try {
if (frameToProcess->format == AV_PIX_FMT_NV12) {
m_jpegImage = avframeToJpegString(frameToProcess); // Convert frame to JPEG from NV12
}
else {
m_jpegImage = avframeYUVJ420PToJpegString(frameToProcess); // Convert frame to JPEG from YUVJ420P
}
}
catch (const std::exception& e) {
std::cerr << "Exception while converting AVFrame to JPEG string: " << e.what() << std::endl;
av_frame_free(&frameToProcess);
return m_lastJpegImage;
}
const auto t3 = clk::now();
av_frame_free(&frameToProcess);
if (m_pts < INT64_MAX) {
m_pts++;
}
else {
m_pts = 0; // Reset to zero when max is reached
}
// Update the width, height, and pts
width = m_Width;
height = m_Height;
pts = m_pts;
if (!m_jpegImage.empty()) {
m_lastJpegImage = std::move(m_jpegImage); // Move instead of copy
}
// Throttled timing breakdown for the JPEG hot path.
{
static std::atomic<uint64_t> s_jpegTimingCount{0};
const uint64_t n = s_jpegTimingCount.fetch_add(1, std::memory_order_relaxed);
if ((n % 30) == 0) {
const auto t4 = clk::now();
auto ms = [](clk::time_point a, clk::time_point b) {
return std::chrono::duration<double, std::milli>(b - a).count();
};
const char* fmtName = av_get_pix_fmt_name((AVPixelFormat)frameFmt);
ANS_DBG("MEDIA_JpegTiming",
"getJpegImage call#%llu lock=%.2fms pull=%.2fms encode=%.2fms publish=%.2fms "
"total=%.2fms src_fmt=%s %dx%d jpeg_bytes=%zu (this=%p)",
(unsigned long long)n,
ms(t0, t1), ms(t1, t2), ms(t2, t3), ms(t3, t4), ms(t0, t4),
fmtName ? fmtName : "?",
frameW, frameH,
m_lastJpegImage.size(),
(void*)this);
}
}
// Return the most recent valid JPEG image
return m_lastJpegImage;
}
catch (const std::exception& e) {
std::cerr << "Unexpected exception in getJpegImage(): " << e.what() << std::endl;
}
catch (...) {
std::cerr << "Unknown exception in getJpegImage()" << std::endl;
}
// If any exception occurs, return the last valid JPEG image
return m_lastJpegImage;
}