#include "sys_inc.h" #include "media_util.h" #include "media_parse.h" #include "media_codec.h" #include "h264.h" #include "h265.h" #include "video_player.h" extern "C" { #include #include #include #include #include #include #include #include } #if __WINDOWS_OS__ #include "audio_play_win.h" #elif defined(IOS) #include "video_render_sdl.h" #include "audio_play_mac.h" #elif __LINUX_OS__ #include "video_render_sdl.h" #include "audio_play_qt.h" #endif #include #include #include #include #if defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) #include #define HAS_SSE2 1 #endif void VideoDecoderCallback(AVFrame* frame, void* userdata) { CVideoPlayer* pPlayer = (CVideoPlayer*)userdata; pPlayer->onVideoFrame(frame); } void AudioDecoderCallback(AVFrame* frame, void* userdata) { CVideoPlayer* pPlayer = (CVideoPlayer*)userdata; pPlayer->onAudioFrame(frame); } void CVideoPlayer::setBbox(cv::Rect bbox) { std::lock_guard lock(_mutex); m_Bbox = bbox; } void CVideoPlayer::setCrop(bool crop) { std::lock_guard lock(_mutex); m_bCrop = crop; } AVFrame* CVideoPlayer::cropFrame(const AVFrame* srcFrame, cv::Rect bBox, bool cropFlag) { std::lock_guard lock(_mutex); try { // Validate prerequisites if (!cropFlag || !srcFrame || !m_bPlaying) { return nullptr; } // Ensure the bounding box is within the source frame's boundaries bBox.x = std::clamp(bBox.x, 0, srcFrame->width); bBox.y = std::clamp(bBox.y, 0, srcFrame->height); bBox.width = std::clamp(bBox.width, 0, srcFrame->width - bBox.x); bBox.height = std::clamp(bBox.height, 0, srcFrame->height - bBox.y); // Validate the bounding box dimensions if (bBox.width <= 10 || bBox.height <= 10) { std::cerr << "Invalid bounding box dimensions for cropping." << std::endl; return nullptr; } // Allocate memory for the cropped frame AVFrame* croppedFrame = av_frame_alloc(); if (!croppedFrame) { std::cerr << "Failed to allocate memory for the cropped frame." << std::endl; return nullptr; } // Set cropped frame attributes croppedFrame->format = srcFrame->format; croppedFrame->width = bBox.width; croppedFrame->height = bBox.height; // *** REMOVED: Don't allocate buffer since we're only setting pointers *** // The cropFrameData() function will set pointers to the original frame's data // Crop the frame based on its format if (!cropFrameData(srcFrame, croppedFrame, bBox)) { av_frame_free(&croppedFrame); return nullptr; } return croppedFrame; } catch (const std::exception& e) { std::cerr << "Exception in CVideoPlayer::cropFrame: " << e.what() << std::endl; return nullptr; } } // Helper function to crop frame data bool CVideoPlayer::cropFrameData(const AVFrame* srcFrame, AVFrame* croppedFrame, const cv::Rect& bBox) { std::lock_guard lock(_mutex); try { switch (srcFrame->format) { case AV_PIX_FMT_YUVJ444P: case AV_PIX_FMT_YUV444P: // Full chroma resolution (No subsampling) cropPlane(srcFrame, croppedFrame, 0, bBox.x, bBox.y, 1, 1); // Y plane cropPlane(srcFrame, croppedFrame, 1, bBox.x, bBox.y, 1, 1); // U plane cropPlane(srcFrame, croppedFrame, 2, bBox.x, bBox.y, 1, 1); // V plane break; case AV_PIX_FMT_YUVJ422P: case AV_PIX_FMT_YUV422P: // Horizontal chroma subsampling (chroma resolution is half in X direction) cropPlane(srcFrame, croppedFrame, 0, bBox.x, bBox.y, 1, 1); // Y plane cropPlane(srcFrame, croppedFrame, 1, bBox.x / 2, bBox.y, 1, 1); // U plane cropPlane(srcFrame, croppedFrame, 2, bBox.x / 2, bBox.y, 1, 1); // V plane break; case AV_PIX_FMT_YUVJ420P: case AV_PIX_FMT_YUV420P: // Both horizontal and vertical chroma subsampling (chroma is 1/4 of Y resolution) cropPlane(srcFrame, croppedFrame, 0, bBox.x, bBox.y, 1, 1); // Y plane cropPlane(srcFrame, croppedFrame, 1, bBox.x / 2, bBox.y / 2, 1, 1); // U plane cropPlane(srcFrame, croppedFrame, 2, bBox.x / 2, bBox.y / 2, 1, 1); // V plane break; case AV_PIX_FMT_NV12: // NV12 has a **single interleaved UV plane** cropPlane(srcFrame, croppedFrame, 0, bBox.x, bBox.y, 1, 1); // Y plane cropPlane(srcFrame, croppedFrame, 1, bBox.x / 2, bBox.y / 2, 2, 1); // UV plane (interleaved, stepX=2) break; default: std::cerr << "Unsupported pixel format: " << av_get_pix_fmt_name((AVPixelFormat)srcFrame->format) << std::endl; return false; } return true; } catch (const std::exception& e) { std::cerr << "Exception in cropFrameData: " << e.what() << std::endl; return false; } } // Helper function to crop individual planes void CVideoPlayer::cropPlane(const AVFrame* srcFrame, AVFrame* croppedFrame, int planeIndex, int offsetX, int offsetY, int subsampleX, int subsampleY) { std::lock_guard lock(_mutex); try { croppedFrame->data[planeIndex] = srcFrame->data[planeIndex] + offsetY * srcFrame->linesize[planeIndex] + offsetX * subsampleX; croppedFrame->linesize[planeIndex] = srcFrame->linesize[planeIndex]; } catch (const std::exception& e) { std::cerr << "Exception in cropPlane: " << e.what() << std::endl; } } // Convert NV12 AVFrame to YUVJ420P AVFrame* CVideoPlayer::convertNV12ToYUVJ420P(const AVFrame* nv12Frame) { std::lock_guard lock(_mutex); AVFrame* yuvjFrame = nullptr; try { if (!nv12Frame || !nv12Frame->data[0] || nv12Frame->width <= 10 || nv12Frame->height <= 10) { std::cerr << "Invalid or empty NV12 frame data, or invalid dimensions." << std::endl; return nullptr; } int width = nv12Frame->width; int height = nv12Frame->height; // ✅ Allocate new YUVJ420P frame yuvjFrame = av_frame_alloc(); if (!yuvjFrame) { std::cerr << "Failed to allocate YUVJ420P frame" << std::endl; return nullptr; } yuvjFrame->format = AV_PIX_FMT_YUVJ420P; yuvjFrame->width = width; yuvjFrame->height = height; // ✅ Allocate buffer for YUVJ420P frame if (av_frame_get_buffer(yuvjFrame, 32) < 0) { std::cerr << "Failed to allocate buffer for YUVJ420P" << std::endl; av_frame_free(&yuvjFrame); return nullptr; } // ✅ Copy Y plane (Luma) row by row (prevents memory corruption) for (int j = 0; j < height; ++j) { memcpy(yuvjFrame->data[0] + j * yuvjFrame->linesize[0], nv12Frame->data[0] + j * nv12Frame->linesize[0], width); } // ✅ Correctly extract UV planes from interleaved NV12 uint8_t* nv12_uv = nv12Frame->data[1]; uint8_t* yuvj_u = yuvjFrame->data[1]; uint8_t* yuvj_v = yuvjFrame->data[2]; int uvWidth = width / 2; int uvHeight = height / 2; for (int j = 0; j < uvHeight; ++j) { uint8_t* nv12Row = nv12_uv + j * nv12Frame->linesize[1]; uint8_t* uRow = yuvj_u + j * yuvjFrame->linesize[1]; uint8_t* vRow = yuvj_v + j * yuvjFrame->linesize[2]; for (int i = 0; i < uvWidth; ++i) { uRow[i] = nv12Row[i * 2]; // Extract U vRow[i] = nv12Row[i * 2 + 1]; // Extract V } } return yuvjFrame; } catch (const std::exception& e) { std::cerr << "Exception in convertNV12ToYUVJ420P: " << e.what() << std::endl; // ✅ Prevent Memory Leak by Freeing the Allocated Frame if (yuvjFrame) { av_frame_free(&yuvjFrame); } return nullptr; } } std::string CVideoPlayer::avframeYUVJ420PToJpegStringUsingFFMpeg(const AVFrame* pFrame) { std::lock_guard lock(_mutex); try { if (!m_bPlaying) { return ""; } if (!pFrame || !pFrame->data[0] || pFrame->width <= 10 || pFrame->height <= 10) { std::cerr << "Invalid or empty frame data, or invalid dimensions." << std::endl; return ""; } AVCodec* jpegCodec = avcodec_find_encoder(AV_CODEC_ID_MJPEG); if (!jpegCodec) { std::cerr << "Failed to find MJPEG encoder." << std::endl; return ""; } AVCodecContext* jpegContext = avcodec_alloc_context3(jpegCodec); if (!jpegContext) { std::cerr << "Failed to allocate codec context." << std::endl; return ""; } int imageSize = std::max(pFrame->width, pFrame->height); AVPixelFormat pixFmt = AV_PIX_FMT_YUVJ420P;// Fix to use YUVJ420P for all resolutions jpegContext->pix_fmt = pixFmt; jpegContext->time_base.num = 1; jpegContext->time_base.den = 30; jpegContext->compression_level = 10; jpegContext->flags |= AV_CODEC_FLAG_QSCALE; // Enable quality scale jpegContext->global_quality = 90 * FF_QP2LAMBDA; // Adjust quality (90 is near lossless) AVFrame* convertedFrame = nullptr; AVPacket packet; av_init_packet(&packet); packet.data = nullptr; packet.size = 0; bool isSuccess = false; std::string jpegData; // Determine if conversion is needed based on the pixel format if ((pFrame->format == AV_PIX_FMT_YUVJ420P) || (pFrame->format == AV_PIX_FMT_YUV420P)) { jpegContext->width = pFrame->width; jpegContext->height = pFrame->height; if (avcodec_open2(jpegContext, jpegCodec, NULL) >= 0) { if (avcodec_send_frame(jpegContext, pFrame) >= 0) { if (avcodec_receive_packet(jpegContext, &packet) >= 0) { jpegData.assign(reinterpret_cast(packet.data), packet.size); m_Width = pFrame->width; m_Height = pFrame->height; m_pts = m_pts + 1; isSuccess = true; } } } } else { // Conversion is needed to AV_PIX_FMT_YUVJ420P initSwsContext(pFrame->width, pFrame->height, static_cast(pFrame->format)); convertedFrame = av_frame_alloc(); if (convertedFrame) { convertedFrame->format = pixFmt; convertedFrame->width = pFrame->width; convertedFrame->height = pFrame->height; convertedFrame->color_range = AVCOL_RANGE_JPEG; if (av_frame_get_buffer(convertedFrame, 32) >= 0) { sws_scale(swsCtx, pFrame->data, pFrame->linesize, 0, pFrame->height, convertedFrame->data, convertedFrame->linesize); jpegContext->width = convertedFrame->width; jpegContext->height = convertedFrame->height; if (avcodec_open2(jpegContext, jpegCodec, NULL) >= 0) { if (avcodec_send_frame(jpegContext, convertedFrame) >= 0) { if (avcodec_receive_packet(jpegContext, &packet) >= 0) { // Successfully encoded to JPEG jpegData.assign(reinterpret_cast(packet.data), packet.size); m_Width = convertedFrame->width; m_Height = convertedFrame->height; m_pts = m_pts + 1; isSuccess = true; } } } } } av_frame_free(&convertedFrame); // Free the converted frame if allocated } // Cleanup av_packet_unref(&packet); // Free the packet data avcodec_free_context(&jpegContext); // Free the codec context // Return the JPEG data as a string if successful, otherwise an empty string return isSuccess ? jpegData : ""; } catch (const std::exception& e) { std::cerr << "Exception in avframeToJpegString: " << e.what() << std::endl; return ""; // Return empty string on error } } std::string CVideoPlayer::avframeYUVJ420PToJpegStringUsingTurboJPEG(const AVFrame* pFrame) { std::lock_guard lock(_mutex); try { if (!m_bPlaying || !pFrame || !pFrame->data[0] || pFrame->width <= 10 || pFrame->height <= 10) { return ""; } // Ensure TurboJPEG instance is valid if (!_tjInstance) { return ""; } unsigned char* yuvPlanes[3] = { pFrame->data[0], pFrame->data[1], pFrame->data[2] }; int strides[3] = { pFrame->linesize[0], pFrame->linesize[1], pFrame->linesize[2] }; int width = pFrame->width; int height = pFrame->height; constexpr int subsampling = TJSAMP_420; constexpr int quality = 85; // Use thread-local buffers to avoid malloc/free overhead static thread_local std::vector jpegBuffer; static thread_local std::vector yuvBuffer; // Estimate required buffer sizes unsigned long jpegBufferSize = tjBufSize(width, height, subsampling); unsigned long yuvBufferSize = tjBufSizeYUV(width, height, subsampling); // Resize buffers only if necessary if (jpegBuffer.size() < jpegBufferSize) { jpegBuffer.resize(jpegBufferSize); } if (yuvBuffer.size() < yuvBufferSize) { yuvBuffer.resize(yuvBufferSize); } // Pointers for JPEG output unsigned char* jpegDataPtr = jpegBuffer.data(); unsigned long jpegSize = 0; // Convert YUV to JPEG using TurboJPEG int ret = tjCompressFromYUVPlanes( _tjInstance, (const unsigned char**)yuvPlanes, width, strides, height, subsampling, &jpegDataPtr, // Using preallocated buffer &jpegSize, quality, TJFLAG_FASTDCT | TJFLAG_FASTUPSAMPLE ); // Check if TurboJPEG reallocated the buffer if (ret < 0) { return ""; } // If TurboJPEG allocated a new buffer, we must free it if (jpegDataPtr != jpegBuffer.data()) { std::string jpegString(reinterpret_cast(jpegDataPtr), jpegSize); tjFree(jpegDataPtr); // Free the buffer allocated by TurboJPEG return jpegString; } // Convert to std::string (without extra allocations) return std::string(reinterpret_cast(jpegDataPtr), jpegSize); } catch (const std::exception& e) { std::cerr << "Exception in avframeToJpegString: " << e.what() << std::endl; return ""; // Return empty string on error } } std::string CVideoPlayer::encodeYUVJ420PToJPEG(AVFrame* frame, int quality) { std::lock_guard lock(_mutex); try { if (!frame || frame->format != AV_PIX_FMT_YUVJ420P) { std::cerr << "Invalid frame format (must be YUVJ420P)" << std::endl; return ""; } // Find MJPEG encoder AVCodec* codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG); if (!codec) { std::cerr << "JPEG encoder not found" << std::endl; return ""; } // Allocate codec context AVCodecContext* codecCtx = avcodec_alloc_context3(codec); if (!codecCtx) { std::cerr << "Failed to allocate codec context" << std::endl; return ""; } // Set encoding parameters codecCtx->pix_fmt = AV_PIX_FMT_YUVJ420P; // Use full-range YUV for better quality codecCtx->width = frame->width; codecCtx->height = frame->height; codecCtx->time_base.num = 1; codecCtx->time_base.den = 30; codecCtx->gop_size = 1; codecCtx->max_b_frames = 0; codecCtx->compression_level = 10; // Increase quality codecCtx->flags |= AV_CODEC_FLAG_QSCALE; // Enable quality scale codecCtx->global_quality = quality * FF_QP2LAMBDA; // Adjust quality (90 is near lossless) // Enable optimal Huffman tables AVDictionary* opts = nullptr; av_dict_set(&opts, "huffman", "optimal", 0); // Open codec if (avcodec_open2(codecCtx, codec, &opts) < 0) { std::cerr << "Failed to open JPEG encoder" << std::endl; avcodec_free_context(&codecCtx); return ""; } AVPacket pkt; av_init_packet(&pkt); pkt.data = nullptr; pkt.size = 0; // Send frame to encoder if (avcodec_send_frame(codecCtx, frame) < 0) { std::cerr << "Failed to send frame for encoding" << std::endl; avcodec_free_context(&codecCtx); return ""; } // Receive encoded packet if (avcodec_receive_packet(codecCtx, &pkt) < 0) { std::cerr << "Failed to receive encoded packet" << std::endl; avcodec_free_context(&codecCtx); return ""; } // Convert to string and clean up std::string jpegString(reinterpret_cast(pkt.data), pkt.size); av_packet_unref(&pkt); avcodec_free_context(&codecCtx); av_dict_free(&opts); return jpegString; } catch (const std::exception& e) { std::cerr << "Exception in encodeYUVJ420PToJPEG: " << e.what() << std::endl; return ""; // Return empty string on error } } std::string CVideoPlayer::avframeYUVJ420PToJpegString(const AVFrame* spFrame) { std::lock_guard lock(_mutex); AVFrame* croppedFrame = nullptr; AVFrame* convertedFrame = nullptr; AVFrame* convertedNV12Frame = nullptr; AVFrame* pFrame = const_cast(spFrame); // Default to original frame bool isSuccess = false; std::string jpegData; try { if (!m_bPlaying) { return ""; } if (!spFrame || !spFrame->data[0] || spFrame->width <= 10 || spFrame->height <= 10) { std::cerr << "Invalid or empty frame data, or invalid dimensions." << std::endl; return ""; } // ✅ Convert NV12 to YUVJ420P if needed if (pFrame->format == AV_PIX_FMT_NV12) { convertedNV12Frame = convertNV12ToYUVJ420P(spFrame); if (convertedNV12Frame) { pFrame = convertedNV12Frame; // Use the converted frame } } // ✅ Process the frame if it's already in YUVJ420P or YUV420P if ((pFrame->format == AV_PIX_FMT_YUVJ420P) || (pFrame->format == AV_PIX_FMT_YUV420P)) { croppedFrame = cropFrame(pFrame, m_Bbox, m_bCrop); if (!croppedFrame) { croppedFrame = pFrame; // Use original frame if cropping failed } // TurboJPEG handles all resolutions efficiently jpegData = avframeYUVJ420PToJpegStringUsingTurboJPEG(croppedFrame); if (!jpegData.empty()) { m_Width = croppedFrame->width; m_Height = croppedFrame->height; m_pts = m_pts + 1; isSuccess = true; } // ✅ Free cropped frame if allocated if (croppedFrame != pFrame) { av_frame_free(&croppedFrame); croppedFrame = nullptr; } } else { // ✅ Convert non-YUVJ420P frames initSwsContext(pFrame->width, pFrame->height, static_cast(pFrame->format)); convertedFrame = av_frame_alloc(); if (convertedFrame) { convertedFrame->format = AV_PIX_FMT_YUVJ420P; convertedFrame->width = pFrame->width; convertedFrame->height = pFrame->height; convertedFrame->color_range = AVCOL_RANGE_JPEG; if (av_frame_get_buffer(convertedFrame, 32) >= 0) { sws_scale(swsCtx, pFrame->data, pFrame->linesize, 0, pFrame->height, convertedFrame->data, convertedFrame->linesize); croppedFrame = cropFrame(convertedFrame, m_Bbox, m_bCrop); if (!croppedFrame) { croppedFrame = convertedFrame; // Use converted frame if cropping failed } // TurboJPEG handles all resolutions efficiently jpegData = avframeYUVJ420PToJpegStringUsingTurboJPEG(croppedFrame); if (!jpegData.empty()) { m_Width = croppedFrame->width; m_Height = croppedFrame->height; m_pts = m_pts + 1; isSuccess = true; } // ✅ Free cropped frame if allocated if (croppedFrame != convertedFrame) { av_frame_free(&croppedFrame); croppedFrame = nullptr; } } } // ✅ Free converted frame if allocated if (convertedFrame) { av_frame_free(&convertedFrame); convertedFrame = nullptr; } } // ✅ Free the NV12 converted frame if used if (convertedNV12Frame) { av_frame_free(&convertedNV12Frame); convertedNV12Frame = nullptr; } return isSuccess ? jpegData : ""; } catch (const std::exception& e) { std::cerr << "Exception in avframeYUVJ420PToJpegString: " << e.what() << std::endl; // ✅ Ensure all allocated frames are freed in case of an exception if (croppedFrame && croppedFrame != pFrame && croppedFrame != convertedFrame) { av_frame_free(&croppedFrame); } if (convertedFrame) { av_frame_free(&convertedFrame); } if (convertedNV12Frame) { av_frame_free(&convertedNV12Frame); } return ""; } } // Direct conversion of AVFrame to JPEG using TurboJPEG std::string CVideoPlayer::encodeNV12ToJPEG_TurboJPEG(const AVFrame* pFrame, int quality) { // NOTE: caller (avframeToJpegString) already holds _mutex — no lock needed here try { if (!m_bPlaying || !pFrame || !pFrame->data[0] || pFrame->width <= 10 || pFrame->height <= 10) { return ""; } // Ensure TurboJPEG instance is valid if (!_tjInstance) { std::cerr << "TurboJPEG instance is not initialized." << std::endl; return ""; } // Ensure the frame format is NV12 if (pFrame->format != AV_PIX_FMT_NV12) { std::cerr << "Unsupported format! Expected NV12, got: " << av_get_pix_fmt_name((AVPixelFormat)pFrame->format) << std::endl; return ""; } int width = pFrame->width; int height = pFrame->height; // Use caller's quality parameter (default 90 from function signature) // NV12 has interleaved UV, but TurboJPEG requires separate U and V planes unsigned char* yuvPlanes[3]; int strides[3]; yuvPlanes[0] = pFrame->data[0]; // Y plane (full resolution) strides[0] = pFrame->linesize[0]; // **Convert NV12 interleaved UV to separate U and V planes** int uvWidth = width / 2; int uvHeight = height / 2; int uvSize = uvWidth * uvHeight; static thread_local std::vector uPlane(uvSize); static thread_local std::vector vPlane(uvSize); // Deinterleave NV12 UV plane into separate U and V planes unsigned char* uvData = pFrame->data[1]; int uvStride = pFrame->linesize[1]; for (int j = 0; j < uvHeight; j++) { const unsigned char* uvRow = uvData + j * uvStride; unsigned char* uRow = uPlane.data() + j * uvWidth; unsigned char* vRow = vPlane.data() + j * uvWidth; int i = 0; #ifdef HAS_SSE2 // SSE2: process 16 UV pairs (32 bytes) at a time for (; i + 15 < uvWidth; i += 16) { __m128i uv0 = _mm_loadu_si128((__m128i*)(uvRow + i * 2)); __m128i uv1 = _mm_loadu_si128((__m128i*)(uvRow + i * 2 + 16)); // Deinterleave: even bytes = U, odd bytes = V __m128i mask = _mm_set1_epi16(0x00FF); __m128i u0 = _mm_and_si128(uv0, mask); __m128i u1 = _mm_and_si128(uv1, mask); __m128i v0 = _mm_srli_epi16(uv0, 8); __m128i v1 = _mm_srli_epi16(uv1, 8); __m128i uPacked = _mm_packus_epi16(u0, u1); __m128i vPacked = _mm_packus_epi16(v0, v1); _mm_storeu_si128((__m128i*)(uRow + i), uPacked); _mm_storeu_si128((__m128i*)(vRow + i), vPacked); } #endif // Scalar fallback for remaining pixels for (; i < uvWidth; i++) { uRow[i] = uvRow[i * 2]; vRow[i] = uvRow[i * 2 + 1]; } } // Assign separate planes to TurboJPEG input yuvPlanes[1] = uPlane.data(); yuvPlanes[2] = vPlane.data(); strides[1] = uvWidth; strides[2] = uvWidth; // Use thread-local buffers to avoid malloc/free overhead static thread_local std::vector jpegBuffer; // Estimate required buffer size for JPEG unsigned long jpegBufferSize = tjBufSize(width, height, TJSAMP_420); // Resize JPEG buffer only if necessary if (jpegBuffer.size() < jpegBufferSize) { jpegBuffer.resize(jpegBufferSize); } // Pointer for JPEG output unsigned char* jpegDataPtr = jpegBuffer.data(); unsigned long jpegSize = 0; // Convert NV12 (separated into YUV420P) to JPEG using TurboJPEG int ret = tjCompressFromYUVPlanes( _tjInstance, (const unsigned char**)yuvPlanes, width, strides, height, TJSAMP_420, // Explicitly define subsampling format for NV12 &jpegDataPtr, // Preallocated buffer &jpegSize, quality, TJFLAG_FASTDCT | TJFLAG_FASTUPSAMPLE ); if (ret < 0) { std::cerr << "TurboJPEG compression failed: " << tjGetErrorStr() << std::endl; return ""; } // If TurboJPEG allocated a new buffer, free it after copying if (jpegDataPtr != jpegBuffer.data()) { std::string jpegString(reinterpret_cast(jpegDataPtr), jpegSize); tjFree(jpegDataPtr); return jpegString; } // Convert to std::string without extra allocations return std::string(reinterpret_cast(jpegDataPtr), jpegSize); } catch (const std::exception& e) { std::cerr << "Exception in avframeNV12ToJpegStringUsingTurboJPEG: " << e.what() << std::endl; return ""; // Return empty string on error } } std::string CVideoPlayer::encodeNV12ToJPEG_FFmpeg(const AVFrame* nv12Frame, int quality) { // NOTE: caller (avframeToJpegString) already holds _mutex — no lock needed here AVCodecContext* codecCtx = nullptr; AVFrame* yuvjFrame = nullptr; AVPacket pkt; try { if (!m_bPlaying || !nv12Frame || !nv12Frame->data[0] || nv12Frame->width <= 10 || nv12Frame->height <= 10) { return ""; } if (nv12Frame->format != AV_PIX_FMT_NV12) { std::cerr << "Invalid frame format! Expected NV12." << std::endl; return ""; } int width = nv12Frame->width; int height = nv12Frame->height; // ✅ Find and allocate MJPEG encoder AVCodec* jpegCodec = avcodec_find_encoder(AV_CODEC_ID_MJPEG); if (!jpegCodec) { std::cerr << "MJPEG encoder not found!" << std::endl; return ""; } codecCtx = avcodec_alloc_context3(jpegCodec); if (!codecCtx) { std::cerr << "Failed to allocate codec context!" << std::endl; return ""; } // ✅ Set encoding parameters codecCtx->pix_fmt = AV_PIX_FMT_YUVJ420P; codecCtx->width = width; codecCtx->height = height; codecCtx->time_base = { 1, 25 }; codecCtx->gop_size = 1; codecCtx->max_b_frames = 0; codecCtx->compression_level = 10; codecCtx->flags |= AV_CODEC_FLAG_QSCALE; codecCtx->global_quality = quality * FF_QP2LAMBDA; if (avcodec_open2(codecCtx, jpegCodec, nullptr) < 0) { std::cerr << "Failed to open MJPEG encoder!" << std::endl; avcodec_free_context(&codecCtx); return ""; } // ✅ Allocate YUVJ420P frame yuvjFrame = av_frame_alloc(); if (!yuvjFrame) { std::cerr << "Failed to allocate YUVJ420P frame!" << std::endl; avcodec_free_context(&codecCtx); return ""; } yuvjFrame->format = AV_PIX_FMT_YUVJ420P; yuvjFrame->width = width; yuvjFrame->height = height; if (av_frame_get_buffer(yuvjFrame, 32) < 0) { std::cerr << "Failed to allocate buffer for YUVJ420P frame!" << std::endl; av_frame_free(&yuvjFrame); avcodec_free_context(&codecCtx); return ""; } // ✅ Copy Y plane row by row (Prevents memory corruption) for (int j = 0; j < height; ++j) { memcpy(yuvjFrame->data[0] + j * yuvjFrame->linesize[0], nv12Frame->data[0] + j * nv12Frame->linesize[0], width); } // ✅ Correctly extract UV planes from NV12 uint8_t* nv12_uv = nv12Frame->data[1]; uint8_t* yuvj_u = yuvjFrame->data[1]; uint8_t* yuvj_v = yuvjFrame->data[2]; int uvWidth = width / 2; int uvHeight = height / 2; for (int j = 0; j < uvHeight; ++j) { uint8_t* nv12Row = nv12_uv + j * nv12Frame->linesize[1]; uint8_t* uRow = yuvj_u + j * yuvjFrame->linesize[1]; uint8_t* vRow = yuvj_v + j * yuvjFrame->linesize[2]; for (int i = 0; i < uvWidth; ++i) { uRow[i] = nv12Row[i * 2]; // Extract U vRow[i] = nv12Row[i * 2 + 1]; // Extract V } } // ✅ Encode frame to JPEG av_init_packet(&pkt); pkt.data = nullptr; pkt.size = 0; bool isSuccess = false; std::string jpegData; if (avcodec_send_frame(codecCtx, yuvjFrame) >= 0) { if (avcodec_receive_packet(codecCtx, &pkt) >= 0) { jpegData.assign(reinterpret_cast(pkt.data), pkt.size); isSuccess = true; } } // ✅ Cleanup av_packet_unref(&pkt); av_frame_free(&yuvjFrame); avcodec_free_context(&codecCtx); return isSuccess ? jpegData : ""; } catch (const std::exception& e) { std::cerr << "Exception in encodeNV12ToJPEG_FFmpeg: " << e.what() << std::endl; } // ✅ Ensure memory cleanup in case of exceptions if (yuvjFrame) av_frame_free(&yuvjFrame); if (codecCtx) avcodec_free_context(&codecCtx); av_packet_unref(&pkt); return ""; // Return empty string on error } std::string CVideoPlayer::avframeToJpegString(const AVFrame* spFrame) { std::lock_guard lock(_mutex); AVFrame* croppedFrame = nullptr; AVFrame* convertedFrame = nullptr; AVFrame* pFrame = const_cast(spFrame); bool isSuccess = false; std::string jpegData; try { if (!m_bPlaying) { return ""; } if (!spFrame || !spFrame->data[0] || spFrame->width <= 10 || spFrame->height <= 10) { std::cerr << "Invalid or empty frame data, or invalid dimensions." << std::endl; return ""; } // ✅ Process NV12 frames directly if (pFrame->format == AV_PIX_FMT_NV12) { croppedFrame = cropFrame(pFrame, m_Bbox, m_bCrop); if (!croppedFrame) { croppedFrame = pFrame; // Use original frame if cropping failed } // TurboJPEG handles all resolutions — no need for slow FFmpeg MJPEG path jpegData = encodeNV12ToJPEG_TurboJPEG(croppedFrame); if (!jpegData.empty()) { m_Width = croppedFrame->width; m_Height = croppedFrame->height; m_pts = m_pts + 1; isSuccess = true; } // ✅ Free cropped frame if allocated if (croppedFrame != pFrame) { av_frame_free(&croppedFrame); croppedFrame = nullptr; } } else { // ✅ Convert other formats to NV12 before processing initSwsContext(pFrame->width, pFrame->height, static_cast(pFrame->format), AV_PIX_FMT_NV12); convertedFrame = av_frame_alloc(); if (convertedFrame) { convertedFrame->format = AV_PIX_FMT_NV12; convertedFrame->width = pFrame->width; convertedFrame->height = pFrame->height; convertedFrame->color_range = AVCOL_RANGE_JPEG; if (av_frame_get_buffer(convertedFrame, 32) >= 0) { sws_scale(swsCtx, pFrame->data, pFrame->linesize, 0, pFrame->height, convertedFrame->data, convertedFrame->linesize); croppedFrame = cropFrame(convertedFrame, m_Bbox, m_bCrop); if (!croppedFrame) { croppedFrame = convertedFrame; // Use converted frame if cropping failed } // TurboJPEG handles all resolutions jpegData = encodeNV12ToJPEG_TurboJPEG(croppedFrame); if (!jpegData.empty()) { m_Width = croppedFrame->width; m_Height = croppedFrame->height; m_pts = m_pts + 1; isSuccess = true; } // ✅ Free cropped frame if allocated if (croppedFrame != convertedFrame) { av_frame_free(&croppedFrame); croppedFrame = nullptr; } } } // ✅ Free converted frame if allocated if (convertedFrame) { av_frame_free(&convertedFrame); convertedFrame = nullptr; } } return isSuccess ? jpegData : ""; } catch (const std::exception& e) { std::cerr << "Exception in avframeToJpegString: " << e.what() << std::endl; // ✅ Cleanup memory in case of exceptions if (croppedFrame && croppedFrame != pFrame && croppedFrame != convertedFrame) { av_frame_free(&croppedFrame); } if (convertedFrame) { av_frame_free(&convertedFrame); } return ""; } } bool CVideoPlayer::areFramesIdentical(AVFrame* frame1, AVFrame* frame2) { std::lock_guard lock(_mutex); try { if (!frame1 || !frame2) return false; // Ensure the frames have the same width, height, and format if (frame1->width != frame2->width || frame1->height != frame2->height || frame1->format != frame2->format) { return false; } int height = frame1->height; int width = frame1->width; // Compare Y plane (Luma) for (int y = 0; y < height; y++) { if (std::memcmp(frame1->data[0] + y * frame1->linesize[0], frame2->data[0] + y * frame2->linesize[0], width) != 0) { return false; } } if (frame1->format == AV_PIX_FMT_NV12) { // Compare UV plane (Interleaved) int chromaHeight = height / 2; for (int y = 0; y < chromaHeight; y++) { if (std::memcmp(frame1->data[1] + y * frame1->linesize[1], frame2->data[1] + y * frame2->linesize[1], width) != 0) { return false; } } } else if (frame1->format == AV_PIX_FMT_YUVJ420P) { // Compare U and V planes separately int chromaWidth = width / 2; int chromaHeight = height / 2; for (int y = 0; y < chromaHeight; y++) { if (std::memcmp(frame1->data[1] + y * frame1->linesize[1], // U frame2->data[1] + y * frame2->linesize[1], chromaWidth) != 0) { return false; } if (std::memcmp(frame1->data[2] + y * frame1->linesize[2], // V frame2->data[2] + y * frame2->linesize[2], chromaWidth) != 0) { return false; } } } return true; // If all planes match } catch (const std::exception& e) { std::cerr << "Exception in areFramesIdentical: " << e.what() << std::endl; return false; } } void CVideoPlayer::initSwsContext(int width, int height, AVPixelFormat pixFmt, AVPixelFormat outputPixFmt) { std::lock_guard lock(_mutex); try { // Validate input dimensions and pixel format if (width <= 0 || height <= 0 || pixFmt == AV_PIX_FMT_NONE) { std::cerr << "Invalid parameters: width=" << width << ", height=" << height << ", pixFmt=" << pixFmt << std::endl; return; } // Check if reinitialization is required bool needsReinit = (swsCtx == nullptr) || (width != lastWidth || height != lastHeight || pixFmt != lastPixFmt || outputPixFmt != lastOutPixFmt); if (!needsReinit) { // SwsContext is already up-to-date return; } // Free the existing SwsContext if it exists if (swsCtx) { sws_freeContext(swsCtx); swsCtx = nullptr; } // Determine output pixel format and scaling options based on resolution int scalingFlags = SWS_BILINEAR; // Fast scaling — LANCZOS is too slow for real-time // Create a new SwsContext swsCtx = sws_getContext(width, height, pixFmt, width, height, outputPixFmt, scalingFlags, nullptr, nullptr, nullptr); // Check for errors in SwsContext creation if (!swsCtx) { std::cerr << "Failed to create SwsContext: width=" << width << ", height=" << height << ", inputPixFmt=" << pixFmt << ", outputPixFmt=" << outputPixFmt << std::endl; return; } // Update last known parameters lastWidth = width; lastHeight = height; lastPixFmt = pixFmt; lastOutPixFmt = outputPixFmt; } catch (const std::exception& e) { std::cerr << "Exception in initSwsContext: " << e.what() << std::endl; } catch (...) { std::cerr << "Unknown exception in initSwsContext." << std::endl; } } cv::Mat CVideoPlayer::avframeAnyToCvmat(const AVFrame* frame) { std::lock_guard lock(_mutex); // Protect against concurrent access try { if (!frame || !frame->data[0] || frame->width <= 10 || frame->height <= 10) { std::cerr << "Invalid or empty frame data, or invalid dimensions." << std::endl; return cv::Mat(); // Return an empty matrix if the frame is invalid } initSwsContext(frame->width, frame->height, static_cast(frame->format), AV_PIX_FMT_BGR24); // Create OpenCV Mat to store the resulting image cv::Mat image(frame->height, frame->width, CV_8UC3); uint8_t* dst[1] = { image.data }; int dstStride[1] = { static_cast(image.step[0]) }; // OpenCV's stride // Perform the conversion using sws_scale int result = sws_scale(swsCtx, frame->data, frame->linesize, 0, frame->height, dst, dstStride); if (result < 0) { std::cerr << "Failed to scale the frame." << std::endl; return cv::Mat(); // Return an empty matrix if scaling fails } return image; // Return the successfully converted OpenCV Mat } catch (const std::exception& e) { std::cerr << "Exception in avframeToCvmat: " << e.what() << std::endl; return cv::Mat(); // Return an empty matrix on error } } cv::Mat CVideoPlayer::avframeYUVJ420PToCvmat(const AVFrame* frame) { std::lock_guard lock(_mutex); try { if (!frame || !frame->data[0] || frame->width <= 10 || frame->height <= 10) { std::cerr << "Invalid or empty frame data, or invalid dimensions." << std::endl; return cv::Mat(); } // Create OpenCV Mat for the output image (RGB) cv::Mat image(frame->height, frame->width, CV_8UC3); // 8-bit 3 channels for RGB image // Pointer to Y, U, V data from AVFrame uint8_t* yPlane = frame->data[0]; // Y plane (luminance) uint8_t* uPlane = frame->data[1]; // U plane (chrominance) uint8_t* vPlane = frame->data[2]; // V plane (chrominance) int yStride = frame->linesize[0]; // Stride of Y plane int uStride = frame->linesize[1]; // Stride of U plane int vStride = frame->linesize[2]; // Stride of V plane // Precompute offsets for U and V channels int uvWidth = frame->width / 2; // U and V are subsampled (half resolution) int uvHeight = frame->height / 2; // Loop through each pixel and convert YUV to RGB for (int y = 0; y < frame->height; ++y) { for (int x = 0; x < frame->width; ++x) { // Y, U, V values for each pixel int yVal = yPlane[y * yStride + x]; int uVal = uPlane[(y / 2) * uStride + (x / 2)]; int vVal = vPlane[(y / 2) * vStride + (x / 2)]; // Precompute differences for speed int uDiff = uVal - 128; int vDiff = vVal - 128; // Convert YUV to RGB (clamping values inline) int r = yVal + (1.402 * vDiff); int g = yVal - (0.344136 * uDiff) - (0.714136 * vDiff); int b = yVal + (1.772 * uDiff); // Clamp the values to the valid range for RGB (0-255) r = std::clamp(r, 0, 255); g = std::clamp(g, 0, 255); b = std::clamp(b, 0, 255); // Store the result in the OpenCV Mat (BGR format) image.at(y, x) = cv::Vec3b(b, g, r); // OpenCV uses BGR by default } } return image; // Return the converted OpenCV Mat (BGR) } catch (const std::exception& e) { std::cerr << "Exception in avframeToCvmatYUVJ420P: " << e.what() << std::endl; return cv::Mat(); // Return an empty matrix on error } } // Initialize a dedicated SwsContext for NV12→BGR with correct color space void CVideoPlayer::initNV12SwsContext(const AVFrame* frame) { int width = frame->width; int height = frame->height; // Detect color space from frame metadata (BT.709 for HD/4K, BT.601 for SD) int colorspace = SWS_CS_ITU709; // Default to BT.709 for HD/4K if (frame->colorspace == AVCOL_SPC_BT470BG || frame->colorspace == AVCOL_SPC_SMPTE170M) { colorspace = SWS_CS_ITU601; } else if (frame->colorspace == AVCOL_SPC_BT2020_NCL || frame->colorspace == AVCOL_SPC_BT2020_CL) { colorspace = SWS_CS_BT2020; } else if (frame->colorspace == AVCOL_SPC_BT709) { colorspace = SWS_CS_ITU709; } else if (width >= 1280 || height >= 720) { // Auto-detect: HD and above → BT.709 (most common for IP cameras) colorspace = SWS_CS_ITU709; } else { colorspace = SWS_CS_ITU601; // SD content } // Detect color range: limited (16-235) vs full (0-255) int srcRange = (frame->color_range == AVCOL_RANGE_JPEG) ? 1 : 0; // 0=limited, 1=full int dstRange = 1; // Output always full range (0-255) for display/AI processing // Check if reinit needed if (m_nv12SwsCtx && width == m_nv12LastWidth && height == m_nv12LastHeight && colorspace == m_nv12LastColorspace && srcRange == m_nv12LastRange) { return; // Already configured } // Free old context if (m_nv12SwsCtx) { sws_freeContext(m_nv12SwsCtx); m_nv12SwsCtx = nullptr; } // Create context: NV12 → BGR24, same dimensions (no scaling) // SWS_BILINEAR + SWS_FULL_CHR_H_INT: good quality chroma upsampling (~12ms for 4K) // SWS_ACCURATE_RND: better rounding for color precision // Note: SWS_LANCZOS gives VLC-matching quality but costs 50-80ms — too slow. // VLC achieves its quality via GPU shaders, not CPU processing. m_nv12SwsCtx = sws_getContext(width, height, AV_PIX_FMT_NV12, width, height, AV_PIX_FMT_BGR24, SWS_BILINEAR | SWS_ACCURATE_RND | SWS_FULL_CHR_H_INT, nullptr, nullptr, nullptr); if (!m_nv12SwsCtx) { std::cerr << "Failed to create NV12 SwsContext" << std::endl; return; } // Configure correct color space and range const int* coefficients = sws_getCoefficients(colorspace); int* inv_table; int* table; int curSrcRange, curDstRange, brightness, contrast, saturation; sws_getColorspaceDetails(m_nv12SwsCtx, &inv_table, &curSrcRange, &table, &curDstRange, &brightness, &contrast, &saturation); sws_setColorspaceDetails(m_nv12SwsCtx, coefficients, srcRange, coefficients, dstRange, brightness, contrast, saturation); m_nv12LastWidth = width; m_nv12LastHeight = height; m_nv12LastColorspace = colorspace; m_nv12LastRange = srcRange; } cv::Mat CVideoPlayer::avframeNV12ToCvMat(const AVFrame* frame) { try { if (!frame || frame->width <= 0 || frame->height <= 0) { std::cerr << "Invalid frame! Either null, incorrect format, or zero dimensions." << std::endl; return cv::Mat(); } // Software decode handler if (frame->format != AV_PIX_FMT_NV12) return avframeAnyToCvmat(frame); int width = frame->width; int height = frame->height; // Store original NV12 dimensions for inference coordinate mapping m_nv12OrigWidth = width; m_nv12OrigHeight = height; // Return full-resolution BGR image. // No forced downscale — LabVIEW manages display resolution via SetDisplayResolution(). // If the caller needs a specific display size, SetDisplayResolution(w, h) applies // resizing in GetImage() at the ANSRTSP/ANS*Client level after this returns. // Store original NV12 dimensions for inference coordinate mapping m_nv12OrigWidth = width; m_nv12OrigHeight = height; cv::Mat yPlane(height, width, CV_8UC1, frame->data[0], frame->linesize[0]); cv::Mat uvPlane(height / 2, width / 2, CV_8UC2, frame->data[1], frame->linesize[1]); cv::Mat bgrImage; cv::cvtColorTwoPlane(yPlane, uvPlane, bgrImage, cv::COLOR_YUV2BGR_NV12); if (m_nImageQuality == 1) { bgrImage.convertTo(bgrImage, -1, 255.0 / 219.0, -16.0 * 255.0 / 219.0); } return bgrImage; } catch (const std::exception& e) { std::cerr << "Exception in avframeNV12ToCvMat: " << e.what() << std::endl; return cv::Mat(); } } cv::Mat CVideoPlayer::avframeYUV420PToCvMat(const AVFrame* frame) { try { if (!frame || frame->width <= 0 || frame->height <= 0) { return cv::Mat(); } const int width = frame->width; const int height = frame->height; // YUV420P has 3 separate planes: Y (full res), U (half), V (half). // OpenCV's cvtColor(COLOR_YUV2BGR_I420) expects a single contiguous buffer // with Y on top (H rows) and U,V stacked below (H/2 rows total). // Layout: [Y: W×H] [U: W/2 × H/2] [V: W/2 × H/2] // Total height = H * 3/2, width = W, single channel. // If all planes are contiguous with matching strides, wrap directly const int yStride = frame->linesize[0]; const int uStride = frame->linesize[1]; const int vStride = frame->linesize[2]; // Fast path: planes are packed contiguously with stride == width if (yStride == width && uStride == width / 2 && vStride == width / 2 && frame->data[1] == frame->data[0] + width * height && frame->data[2] == frame->data[1] + (width / 2) * (height / 2)) { // Contiguous I420 — wrap directly, zero copy cv::Mat yuv(height * 3 / 2, width, CV_8UC1, frame->data[0]); cv::Mat bgrImage; cv::cvtColor(yuv, bgrImage, cv::COLOR_YUV2BGR_I420); if (m_nImageQuality == 1) { bgrImage.convertTo(bgrImage, -1, 255.0 / 219.0, -16.0 * 255.0 / 219.0); } return bgrImage; } // Slow path: planes have padding (linesize > width) — copy to contiguous buffer const int uvWidth = width / 2; const int uvHeight = height / 2; const int totalSize = width * height + uvWidth * uvHeight * 2; cv::Mat yuv(height * 3 / 2, width, CV_8UC1); uint8_t* dst = yuv.data; // Copy Y plane (line by line if stride != width) if (yStride == width) { std::memcpy(dst, frame->data[0], width * height); } else { for (int row = 0; row < height; ++row) { std::memcpy(dst + row * width, frame->data[0] + row * yStride, width); } } dst += width * height; // Copy U plane if (uStride == uvWidth) { std::memcpy(dst, frame->data[1], uvWidth * uvHeight); } else { for (int row = 0; row < uvHeight; ++row) { std::memcpy(dst + row * uvWidth, frame->data[1] + row * uStride, uvWidth); } } dst += uvWidth * uvHeight; // Copy V plane if (vStride == uvWidth) { std::memcpy(dst, frame->data[2], uvWidth * uvHeight); } else { for (int row = 0; row < uvHeight; ++row) { std::memcpy(dst + row * uvWidth, frame->data[2] + row * vStride, uvWidth); } } cv::Mat bgrImage; cv::cvtColor(yuv, bgrImage, cv::COLOR_YUV2BGR_I420); if (m_nImageQuality == 1) { bgrImage.convertTo(bgrImage, -1, 255.0 / 219.0, -16.0 * 255.0 / 219.0); } return bgrImage; } catch (const std::exception& e) { std::cerr << "Exception in avframeYUV420PToCvMat: " << e.what() << std::endl; return cv::Mat(); } } cv::Mat CVideoPlayer::avframeToCVMat(const AVFrame* pFrame) { std::lock_guard lock(_mutex); try { // 1. Validate input frame if (!pFrame || !pFrame->data[0] || pFrame->width <= 10 || pFrame->height <= 10) { std::cerr << "Invalid or empty frame data, or invalid dimensions." << std::endl; return cv::Mat(); } switch (pFrame->format) { case AV_PIX_FMT_NV12: return avframeNV12ToCvMat(pFrame); case AV_PIX_FMT_YUV420P: case AV_PIX_FMT_YUVJ420P: return avframeYUV420PToCvMat(pFrame); default: return avframeAnyToCvmat(pFrame); } } catch (const std::exception& e) { std::cerr << "Exception in avframeToCvMat: " << e.what() << std::endl; return cv::Mat(); // Return an empty matrix on error } } CVideoPlayer::CVideoPlayer() : m_bVideoInited(FALSE) , m_bAudioInited(FALSE) , m_bPlaying(FALSE) , m_bPaused(FALSE) , m_nHWDecoding(HW_DECODING_DISABLE)// Software decode by default — saves VRAM (no NVDEC DPB surfaces) , m_bUpdown(FALSE) , m_bSnapshot(FALSE) , m_nSnapVideoFmt(AV_PIX_FMT_YUVJ420P) , m_nVideoCodec(VIDEO_CODEC_NONE) , m_nAudioCodec(AUDIO_CODEC_NONE) , m_nSampleRate(0) , m_nChannel(0) , m_nBitPerSample(0) , m_pSnapFrame(NULL) , m_bRecording(FALSE) , m_bNalFlag(FALSE) , m_pAviCtx(NULL) , m_pAudioListMutex(NULL) , m_audioPlayFlag(FALSE) //, m_audioPlayThread(0) , m_pVideoListMutex(NULL) , m_videoPlayFlag(FALSE) //, m_videoPlayThread(0) , m_nLastAudioPts(AV_NOPTS_VALUE) , m_lastAudioTS(0) { m_Bbox.x = 0; m_Bbox.y = 0; m_Bbox.width = 0; m_Bbox.height = 0; m_bCrop = false; m_pRecordMutex = sys_os_create_mutex(); m_lastJpegImage = ""; m_jpegImage = ""; m_pts = 0; memset(&m_h26XParamSets, 0, sizeof(H26XParamSets)); memset(&m_audioClock, 0, sizeof(HTCLOCK)); memset(&m_videoClock, 0, sizeof(HTCLOCK)); this->_tjInstance = tjInitCompress(); } CVideoPlayer::~CVideoPlayer() { // Lock to ensure no other thread is mid-operation (getImage, getJpegImage, onVideoFrame) // before we free resources. close() stops the decoder which prevents new callbacks. { std::lock_guard lock(_mutex); close(); // Stop decoder first — prevents new onVideoFrame callbacks g_frameQueue.clearQueue(); if (swsCtx != nullptr) { sws_freeContext(swsCtx); swsCtx = nullptr; } if (m_nv12SwsCtx != nullptr) { sws_freeContext(m_nv12SwsCtx); m_nv12SwsCtx = nullptr; } if (this->_tjInstance) { tjDestroy(this->_tjInstance); this->_tjInstance = nullptr; } } // _mutex is destroyed after this block — no other thread should be accessing this object } BOOL CVideoPlayer::open(std::string fileName) { m_sFileName = fileName; return TRUE; } BOOL CVideoPlayer::open(std::string _username, std::string _password, std::string _url) { m_acct = _username; m_pass = _password; m_sFileName = _url; return TRUE; } AVFrame* CVideoPlayer::getNV12Frame() { // Return a CLONE so multiple consumers (tasks sharing the same stream) // each get their own copy. The original m_currentNV12Frame stays valid // until the next getImage() call overwrites it. // (Previously used ownership transfer — only the first caller got NV12, // and the second caller fell back to BGR.) std::lock_guard lock(_mutex); return m_currentNV12Frame ? av_frame_clone(m_currentNV12Frame) : nullptr; } AVFrame* CVideoPlayer::getCudaHWFrame() { // Return a clone of the CUDA HW frame captured by onVideoFrame(). // Clone (not ownership transfer) because multiple callers may request // the frame between onVideoFrame updates (e.g., during warmup when // GetRTSPCVImage is called faster than the decode rate). // extra_hw_frames=2 in the decoder provides surface pool headroom // for the 3 concurrent clones (decoder + player + registry). std::lock_guard lock(_mutex); return m_currentCudaHWFrame ? av_frame_clone(m_currentCudaHWFrame) : nullptr; } bool CVideoPlayer::isCudaHWAccel() const { return m_pVideoDecoder && m_pVideoDecoder->isCudaHWAccel(); } void CVideoPlayer::close() { closeVideo(); closeAudio(); if (m_currentNV12Frame) { av_frame_free(&m_currentNV12Frame); m_currentNV12Frame = nullptr; } if (m_currentCudaHWFrame) { av_frame_free(&m_currentCudaHWFrame); m_currentCudaHWFrame = nullptr; } if (m_pSnapFrame) { av_frame_free(&m_pSnapFrame); m_pSnapFrame = nullptr; } stopRecord(); if (m_pRecordMutex) { sys_os_destroy_sig_mutex(m_pRecordMutex); m_pRecordMutex = NULL; } } void CVideoPlayer::setVolume(int volume) { if (m_pAudioPlay) { m_pAudioPlay->setVolume(volume); } } void CVideoPlayer::snapshot(int videofmt) { m_bSnapshot = TRUE; m_nSnapVideoFmt = videofmt; } BOOL CVideoPlayer::record(std::string baseName) { if (m_bRecording) { return TRUE; } //std::string path = getRecordPath(); std::string file = baseName;// path + "/" + getTempFile(baseName, ".avi"); m_sBaseName = baseName; m_pAviCtx = avi_write_open(file.c_str()); if (NULL == m_pAviCtx) { log_print(HT_LOG_ERR, "%s, avi_write_open failed. %s\r\n", __FUNCTION__, file.c_str()); return FALSE; } if (!onRecord()) { avi_write_close(m_pAviCtx); m_pAviCtx = NULL; return FALSE; } m_bRecording = TRUE; return m_bRecording; } void CVideoPlayer::stopRecord() { sys_os_mutex_enter(m_pRecordMutex); m_bRecording = FALSE; m_bNalFlag = FALSE; memset(&m_h26XParamSets, 0, sizeof(H26XParamSets)); if (m_pAviCtx) { avi_write_close(m_pAviCtx); m_pAviCtx = NULL; } sys_os_mutex_leave(m_pRecordMutex); } void CVideoPlayer::recordVideo(uint8* data, int len, uint32 ts, uint16 seq) { int codec = VIDEO_CODEC_NONE; if (!memcmp(m_pAviCtx->v_fcc, "H264", 4)) { codec = VIDEO_CODEC_H264; } else if (!memcmp(m_pAviCtx->v_fcc, "H265", 4)) { codec = VIDEO_CODEC_H265; } if ((VIDEO_CODEC_H264 == codec || VIDEO_CODEC_H265 == codec) && !m_bNalFlag) { if (avc_get_h26x_paramsets(data, len, codec, &m_h26XParamSets)) { avi_write_nalu(m_pAviCtx, m_h26XParamSets.vps, m_h26XParamSets.vps_size, m_h26XParamSets.sps, m_h26XParamSets.sps_size, m_h26XParamSets.pps, m_h26XParamSets.pps_size); m_bNalFlag = 1; } } recordVideoEx(data, len, ts, seq); if (recordSwitchCheck()) { recordFileSwitch(); } } void CVideoPlayer::recordVideoEx(uint8* data, int len, uint32 ts, uint16 seq) { AVICTX* p_avictx = m_pAviCtx; if (p_avictx->v_width == 0 || p_avictx->v_height == 0) { int codec = VIDEO_CODEC_NONE; if (memcmp(p_avictx->v_fcc, "H264", 4) == 0) { codec = VIDEO_CODEC_H264; } else if (memcmp(p_avictx->v_fcc, "H265", 4) == 0) { codec = VIDEO_CODEC_H265; } else if (memcmp(p_avictx->v_fcc, "JPEG", 4) == 0) { codec = VIDEO_CODEC_JPEG; } else if (memcmp(p_avictx->v_fcc, "MP4V", 4) == 0) { codec = VIDEO_CODEC_MP4; } avc_parse_video_size(codec, data, len, &p_avictx->v_width, &p_avictx->v_height); if (p_avictx->v_width && p_avictx->v_height) { avi_update_header(p_avictx); } } int key = 0; if (memcmp(p_avictx->v_fcc, "H264", 4) == 0) { uint8 nalu_t = (data[4] & 0x1F); key = (nalu_t == 5 || nalu_t == 7 || nalu_t == 8); } else if (memcmp(p_avictx->v_fcc, "H265", 4) == 0) { uint8 nalu_t = (data[4] >> 1) & 0x3F; key = ((nalu_t >= 16 && nalu_t <= 21) || nalu_t == 32 || nalu_t == 33 || nalu_t == 34); } else if (memcmp(p_avictx->v_fcc, "MP4V", 4) == 0) { key = 1; } else if (memcmp(p_avictx->v_fcc, "JPEG", 4) == 0) { key = 1; } avi_write_video(p_avictx, data, len, ts, key); } void CVideoPlayer::recordAudio(uint8* data, int len, uint32 ts, uint16 seq) { AVICTX* p_avictx = m_pAviCtx; avi_write_audio(p_avictx, data, len, ts); if (recordSwitchCheck()) { recordFileSwitch(); } } BOOL CVideoPlayer::recordSwitchCheck() { uint64 tlen = avi_get_file_length(m_pAviCtx); uint32 mtime = avi_get_media_time(m_pAviCtx); uint32 recordSize = 0;// getRecordSize(); if (recordSize == 0) { recordSize = 1048576; // max 1G file size } // Switch according to the recording size if (tlen > recordSize * 1024) { return TRUE; } uint32 recordTime = 0;// getRecordTime(); // Switch according to the recording duration if (recordTime > 0 && mtime > recordTime * 1000) { return TRUE; } return FALSE; } void CVideoPlayer::recordFileSwitch() { AVICTX* p_ctx; AVICTX* p_oldctx = m_pAviCtx; //std::string path = getRecordPath(); std::string file = m_sBaseName;// path + "/" + getTempFile(m_sBaseName, ".avi"); p_ctx = avi_write_open(file.c_str()); if (NULL == p_ctx) { return; } p_ctx->ctxf_video = p_oldctx->ctxf_video; p_ctx->ctxf_audio = p_oldctx->ctxf_audio; if (p_ctx->ctxf_video) { avi_calc_fps(p_oldctx); avi_set_video_info(p_ctx, p_oldctx->v_fps, p_oldctx->v_width, p_oldctx->v_height, p_oldctx->v_fcc); avi_set_video_extra_info(p_ctx, p_oldctx->v_extra, p_oldctx->v_extra_len); } if (p_ctx->ctxf_audio) { avi_set_audio_info(p_ctx, p_oldctx->a_chns, p_oldctx->a_rate, p_oldctx->a_fmt); avi_set_audio_extra_info(p_ctx, p_oldctx->a_extra, p_oldctx->a_extra_len); } avi_write_close(p_oldctx); avi_update_header(p_ctx); m_pAviCtx = p_ctx; if (m_h26XParamSets.vps_size > 0 || m_h26XParamSets.sps_size > 0 || m_h26XParamSets.pps_size > 0) { avi_write_nalu(m_pAviCtx, m_h26XParamSets.vps, m_h26XParamSets.vps_size, m_h26XParamSets.sps, m_h26XParamSets.sps_size, m_h26XParamSets.pps, m_h26XParamSets.pps_size); } } BOOL CVideoPlayer::openVideo(enum AVCodecID codec, uint8* extradata, int extradata_size) { if (m_bVideoInited) { return TRUE; } if (m_pVideoDecoder) { m_bVideoInited = m_pVideoDecoder->init(codec, extradata, extradata_size, m_nHWDecoding, m_nPreferredGpu); } if (m_bVideoInited) { m_pVideoDecoder->setCallback(VideoDecoderCallback, this); m_pVideoListMutex = sys_os_create_mutex(); m_videoPlayFlag = TRUE; //m_videoPlayThread = sys_os_create_thread((void*)VideoPlayThread, this); } m_nVideoCodec = to_video_codec(codec); return m_bVideoInited; } BOOL CVideoPlayer::openVideo(int codec, uint8* extradata, int extradata_size) { return openVideo(to_video_avcodecid(codec), extradata, extradata_size); } void CVideoPlayer::closeVideo() { // Stop decoder outside the player lock to avoid the same lock-ordering // deadlock as StopVideoDecoder() (see comment there). CVideoDecoder* decoder = nullptr; { std::lock_guard lock(_mutex); decoder = m_pVideoDecoder.get(); } if (decoder) { decoder->Stop(); decoder->flush(); } // Now clean up resources under the lock std::lock_guard lock(_mutex); m_videoPlayFlag = FALSE; if (m_pVideoListMutex) { sys_os_destroy_sig_mutex(m_pVideoListMutex); m_pVideoListMutex = NULL; } if (!g_frameQueue.isEmpty())g_frameQueue.clearQueue(); m_bVideoInited = FALSE; } void CVideoPlayer::StartVideoDecoder() { std::lock_guard lock(_mutex); // Clear queue but KEEP m_currentImage — it holds the last good frame // which we'll return while the decoder stabilizes after restart g_frameQueue.clearQueue(); m_lastFrameSeq = 0; m_bWaitingForKeyframe = true; // Skip frames until first keyframe m_cleanFrameCount = 0; // Reset settle counter if (m_pVideoDecoder) { m_pVideoDecoder->Start(); } } void CVideoPlayer::StopVideoDecoder() { // Get decoder pointer under lock, then release BEFORE calling decoder methods. // This avoids a lock-ordering deadlock: // Thread 1 (here): CVideoPlayer::_mutex -> CVideoDecoder::_mutex // Thread 2 (TCP rx decode -> onVideoFrame callback): CVideoDecoder::_mutex -> CVideoPlayer::_mutex CVideoDecoder* decoder = nullptr; { std::lock_guard lock(_mutex); decoder = m_pVideoDecoder.get(); } if (decoder) { decoder->Stop(); // Flush decoder to drain and discard any buffered frames, // so stale reference frames don't corrupt the next session decoder->flush(); // Free NVDEC decoder context and all GPU surfaces (DPB buffers). // Stopped cameras should not hold VRAM — with 100 cameras created // but only 5 running, the 95 idle decoders would consume ~5-10 GB. // The decoder will be re-initialized automatically when the next // video packet arrives after Start() is called. decoder->uninit(); m_bVideoInited = FALSE; } // Clear queue but KEEP m_currentImage and m_lastJpegImage — // getImage()/getJpegImage() will return the last good frame while decoder stabilizes { std::lock_guard lock(_mutex); g_frameQueue.clearQueue(); m_lastFrameSeq = 0; } } BOOL CVideoPlayer::openAudio(enum AVCodecID codec, int samplerate, int channels, int bitpersample) { if (m_bAudioInited) { return TRUE; } if (m_pAudioDecoder) { m_bAudioInited = m_pAudioDecoder->init(codec, samplerate, channels, bitpersample); } if (m_bAudioInited) { m_pAudioDecoder->setCallback(AudioDecoderCallback, this); #if __WINDOWS_OS__ m_pAudioPlay = std::make_unique();/// new CWAudioPlay(); #elif defined(IOS) m_pAudioPlay = std::make_unique(); #elif __LINUX_OS__ m_pAudioPlay = std::make_unique(); #endif if (m_pAudioPlay) { m_pAudioPlay->startPlay(samplerate, channels); } m_pAudioListMutex = sys_os_create_mutex(); m_audioPlayFlag = FALSE;//disable by default //m_audioPlayThread = sys_os_create_thread((void*)AudioPlayThread, this); } m_nAudioCodec = to_audio_codec(codec); m_nSampleRate = samplerate; m_nChannel = channels; m_nBitPerSample = bitpersample; return m_bAudioInited; } void CVideoPlayer::enableAudio(bool status) { if (status)m_audioPlayFlag = TRUE; else m_audioPlayFlag = FALSE; } BOOL CVideoPlayer::openAudio(int codec, int samplerate, int channels, int bitpersample) { return openAudio(to_audio_avcodecid(codec), samplerate, channels, bitpersample); } void CVideoPlayer::closeAudio() { m_audioPlayFlag = FALSE; if (m_pAudioListMutex) { sys_os_destroy_sig_mutex(m_pAudioListMutex); m_pAudioListMutex = NULL; } if (!a_frameQueue.isEmpty())a_frameQueue.clearQueue(); m_bAudioInited = FALSE; } int CVideoPlayer::getVideoWidth() { if (m_pVideoDecoder) { return m_pVideoDecoder->getWidth(); } return 0; } int CVideoPlayer::getVideoHeight() { if (m_pVideoDecoder) { return m_pVideoDecoder->getHeight(); } return 0; } double CVideoPlayer::getFrameRate() { if (m_pVideoDecoder) { return m_pVideoDecoder->getFrameRate(); } return 0; } void CVideoPlayer::setTargetFPS(double intervalMs) { std::lock_guard lock(_mutex); m_targetIntervalMs = intervalMs; m_targetFPSInitialized = false; // reset timing on change } double CVideoPlayer::getLastFrameAgeMs() { std::lock_guard lock(_mutex); if (!m_lastDecoderFrameTimeSet) return 0.0; auto now = std::chrono::steady_clock::now(); return std::chrono::duration(now - m_lastDecoderFrameTime).count(); } void CVideoPlayer::playVideo(uint8* data, int len, uint32 ts, uint16 seq) { if (m_bRecording) { sys_os_mutex_enter(m_pRecordMutex); recordVideo(data, len, ts, seq); sys_os_mutex_leave(m_pRecordMutex); } updateClock(&m_videoClock, ts, getVideoClock()); if (m_bVideoInited) { if (m_bPlaying) { m_pVideoDecoder->decode(data, len, m_videoClock.SyncTime.tv_sec * 1000000 + m_videoClock.SyncTime.tv_usec); } } } void CVideoPlayer::playAudio(uint8* data, int len, uint32 ts, uint16 seq) { if (m_bRecording) { sys_os_mutex_enter(m_pRecordMutex); recordAudio(data, len, ts, seq); sys_os_mutex_leave(m_pRecordMutex); } updateClock(&m_audioClock, ts, getAudioClock()); if (m_bAudioInited) { m_pAudioDecoder->decode(data, len, m_audioClock.SyncTime.tv_sec * 1000000 + m_audioClock.SyncTime.tv_usec); } } void CVideoPlayer::updateClock(HTCLOCK* clock, uint32 ts, int frequency) { if (ts == 0) { return; } if (clock->SyncTime.tv_sec == 0 && clock->SyncTime.tv_usec == 0) { clock->SyncTimestamp = ts; gettimeofday(&clock->SyncTime, NULL); } int timestampDiff = ts - clock->SyncTimestamp; // Divide this by the timestamp frequency to get real time: double timeDiff = timestampDiff / (double)frequency; uint32 const million = 1000000; uint32 seconds, uSeconds; if (timeDiff >= 0.0) { seconds = clock->SyncTime.tv_sec + (uint32)(timeDiff); uSeconds = clock->SyncTime.tv_usec + (uint32)((timeDiff - (uint32)timeDiff) * million); if (uSeconds >= million) { uSeconds -= million; ++seconds; } } else { timeDiff = -timeDiff; seconds = clock->SyncTime.tv_sec - (uint32)(timeDiff); uSeconds = clock->SyncTime.tv_usec - (uint32)((timeDiff - (uint32)timeDiff) * million); if ((int)uSeconds < 0) { uSeconds += million; --seconds; } } // Save these as the new synchronization timestamp & time: clock->SyncTimestamp = ts; clock->SyncTime.tv_sec = seconds; clock->SyncTime.tv_usec = uSeconds; } BOOL CVideoPlayer::initFrame(AVFrame*& frame, int width, int height, AVPixelFormat pixfmt) { if (width == 0 || height == 0 || pixfmt == AV_PIX_FMT_NONE) { return FALSE; } if (NULL == frame || frame->width != width || frame->height != height || frame->format != pixfmt) { if (frame) { av_frame_free(&frame); } frame = av_frame_alloc(); if (NULL == frame) { return FALSE; } frame->format = pixfmt; frame->width = width; frame->height = height; if (0 != av_frame_get_buffer(frame, 0)) { av_frame_free(&frame); return FALSE; } av_frame_make_writable(frame); } return TRUE; } BOOL CVideoPlayer::doSnapshot(AVFrame* frame) { if (m_pSnapFrame) { av_frame_free(&m_pSnapFrame); // Free the previous snapshot frame if it exists } if (!initFrame(m_pSnapFrame, frame->width, frame->height, to_avpixelformat(m_nSnapVideoFmt))) { return FALSE; } if (NULL == convertFrame(frame, m_pSnapFrame, FALSE)) { return FALSE; } return TRUE; } AVFrame* CVideoPlayer::convertFrame(AVFrame* srcframe, AVFrame* dstframe, BOOL updown) { if (!srcframe || !dstframe) { return NULL; } SwsContext* _swsctx = sws_getContext(srcframe->width, srcframe->height, (enum AVPixelFormat)srcframe->format, srcframe->width, srcframe->height, (enum AVPixelFormat)dstframe->format, SWS_BICUBIC, NULL, NULL, NULL); if (!_swsctx) { return NULL; } if (updown) { srcframe->data[0] += srcframe->linesize[0] * (srcframe->height - 1); srcframe->linesize[0] *= -1; srcframe->data[1] += srcframe->linesize[1] * (srcframe->height / 2 - 1); srcframe->linesize[1] *= -1; srcframe->data[2] += srcframe->linesize[2] * (srcframe->height / 2 - 1); srcframe->linesize[2] *= -1; } int ret = sws_scale(_swsctx, srcframe->data, srcframe->linesize, 0, srcframe->height, dstframe->data, dstframe->linesize); sws_freeContext(_swsctx); // Free context after scaling attempt if (ret > 0) { dstframe->pts = srcframe->pts; dstframe->pkt_dts = srcframe->pkt_dts; return dstframe; } else { log_print(HT_LOG_ERR, "%s, sws_scale failed\r\n", __FUNCTION__); return NULL; } } void CVideoPlayer::onVideoFrame(AVFrame* frame) { std::lock_guard lock(_mutex); // Protect against concurrent access if (!frame) return; // Check for null pointer if (m_bSnapshot) { if (doSnapshot(frame)) { m_bSnapshot = FALSE; } } if (m_bPlaying && m_videoPlayFlag) { // Drop any frame with decode errors (corrupted reference frames, etc.) if (frame->decode_error_flags != 0) { fprintf(stderr, "[HWDecode] Dropping frame with decode errors (flags=0x%x)\n", frame->decode_error_flags); return; } // After start/restart, skip corrupted frames until first keyframe (IDR) arrives. // HEVC/H.264 P/B frames received before the first I-frame will produce visual // corruption ("Could not find ref with POC", green/grey artifacts). if (m_bWaitingForKeyframe) { if (frame->key_frame || frame->pict_type == AV_PICTURE_TYPE_I) { m_bWaitingForKeyframe = false; m_cleanFrameCount = 0; fprintf(stderr, "[HWDecode] First keyframe received, settling for %d frames\n", SETTLE_FRAME_COUNT); } else { return; // Drop this frame — not yet safe to decode } } // Record wall-clock time of every decoded frame (even rate-limited ones). // Used by getLastFrameAgeMs() to detect truly stale cameras. m_lastDecoderFrameTime = std::chrono::steady_clock::now(); m_lastDecoderFrameTimeSet = true; // --- Frame rate limiting --- // Skip post-decode processing (clone, queue push, CUDA clone) if not enough // time has elapsed since the last processed frame. The decode itself still // runs for every packet to maintain the H.264/H.265 reference frame chain. if (m_targetIntervalMs > 0.0) { auto now = std::chrono::steady_clock::now(); if (!m_targetFPSInitialized) { m_lastProcessedTime = now; m_targetFPSInitialized = true; } else { auto elapsed = std::chrono::duration(now - m_lastProcessedTime).count(); if (elapsed < m_targetIntervalMs) { return; // Skip this frame — too soon } } m_lastProcessedTime = now; } // --- End frame rate limiting --- // Push frame to queue; during settle period getImage() will ignore the queue // and keep returning the last good cached image g_frameQueue.pushFrame(frame); // pushFrame() clones the frame internally // Capture CUDA HW frame for zero-copy inference. // We're inside decode()'s lock scope (decoder._mutex held) AND onVideoFrame // holds player._mutex — so this is the ONE place where both locks are held // and we can safely clone the CUDA frame without deadlock risk. // cloneCudaHWFrame_unlocked() is safe because decoder._mutex is already held. if (m_pVideoDecoder && m_pVideoDecoder->isCudaHWAccel()) { if (m_currentCudaHWFrame) av_frame_free(&m_currentCudaHWFrame); m_currentCudaHWFrame = m_pVideoDecoder->cloneCudaHWFrame_unlocked(); } // Track how many clean frames have arrived since keyframe if (m_cleanFrameCount < SETTLE_FRAME_COUNT) { m_cleanFrameCount++; if (m_cleanFrameCount == SETTLE_FRAME_COUNT) { fprintf(stderr, "[HWDecode] Settle complete, delivering new frames\n"); } } } } void CVideoPlayer::onAudioFrame(AVFrame* frame) { // Support for audio playback std::lock_guard lock(_mutex); // Protect against concurrent access if (!frame) return; // Check for null pointer if (m_bSnapshot) { if (doSnapshot(frame)) { m_bSnapshot = FALSE; } } if (m_bPlaying && m_audioPlayFlag) { a_frameQueue.pushFrame(frame); // pushFrame() clones the frame internally } } cv::Mat CVideoPlayer::getImage(int& width, int& height, int64_t& pts) { try { // Lock the mutex using RAII (ensures unlock even in exceptions) std::lock_guard lock(_mutex); // Protect against concurrent access if (!m_bPlaying) { // Return the last valid frame if playback is stopped width = m_currentImage.cols; height = m_currentImage.rows; pts = m_pts; return m_currentImage; // Shallow copy (reference counted, safe under mutex) } // While waiting for keyframe or during settle period after restart, // return the last good cached image to avoid showing corrupted frames if (m_bWaitingForKeyframe || m_cleanFrameCount < SETTLE_FRAME_COUNT) { width = m_currentImage.cols; height = m_currentImage.rows; pts = m_pts; return m_currentImage; // Last good frame (may be empty on first-ever start) } // Fast path: check if a new frame has arrived using sequence counter // This avoids expensive av_frame_clone + NV12→BGR conversion when frame hasn't changed uint64_t currentSeq = g_frameQueue.getSequence(); if (currentSeq == m_lastFrameSeq && !m_currentImage.empty()) { width = m_currentImage.cols; height = m_currentImage.rows; pts = m_pts; return m_currentImage; // Same frame, skip all conversion } // Get latest frame from queue if (g_frameQueue.isEmpty()) { width = m_currentImage.cols; height = m_currentImage.rows; pts = m_pts; std::cerr << "No frame available in getImage()" << std::endl; return cv::Mat(); // Return an empty cv::Mat() if no frame is available } AVFrame* frameToProcess = g_frameQueue.getLatestFrame(); if (!frameToProcess) { // If no frame available, return last valid image width = m_currentImage.cols; height = m_currentImage.rows; pts = m_pts; return cv::Mat(); // Return an empty cv::Mat() if no frame is available } try { // Convert AVFrame to cv::Mat m_currentImage = avframeToCVMat(frameToProcess); // Update timestamp and sequence if conversion is successful if (!m_currentImage.empty()) { m_pts++; m_lastFrameSeq = currentSeq; // Mark this sequence as processed } } catch (const std::exception& e) { std::cerr << "Exception while converting AVFrame to cv::Mat: " << e.what() << std::endl; } // Preserve raw YUV/NV12 frame for GPU fast-path inference // (NV12 from HW decode, YUV420P/YUVJ420P from SW decode) if (frameToProcess && (frameToProcess->format == AV_PIX_FMT_NV12 || frameToProcess->format == AV_PIX_FMT_YUV420P || frameToProcess->format == AV_PIX_FMT_YUVJ420P)) { if (m_currentNV12Frame) av_frame_free(&m_currentNV12Frame); m_currentNV12Frame = av_frame_clone(frameToProcess); } // Free the cloned frame to avoid memory leaks av_frame_free(&frameToProcess); // Update frame dimensions and PTS width = m_currentImage.cols; height = m_currentImage.rows; pts = m_pts; // Return the processed image (shallow copy — caller gets reference-counted Mat) return m_currentImage; } catch (const std::exception& e) { std::cerr << "Unexpected exception in getImage(): " << e.what() << std::endl; return cv::Mat(); // Return an empty cv::Mat if an exception occurs } catch (...) { std::cerr << "Unknown exception in getImage()" << std::endl; return cv::Mat(); // Return an empty cv::Mat if an exception occurs } } std::string CVideoPlayer::getJpegImage(int& width, int& height, int64_t& pts) { try { // Use same _mutex as getImage() to protect shared state consistently // recursive_mutex allows nested calls to avframeToJpegString → _mutex std::lock_guard lock(_mutex); // While waiting for keyframe or during settle period after restart, // return the last good cached JPEG to avoid showing corrupted frames if (m_bWaitingForKeyframe || m_cleanFrameCount < SETTLE_FRAME_COUNT) { width = m_Width; height = m_Height; pts = m_pts; return m_lastJpegImage; // Last good JPEG (may be empty on first-ever start) } AVFrame* frameToProcess = g_frameQueue.getLatestFrame(); // Get a safe copy if (!frameToProcess) { return m_lastJpegImage; // Return the last valid JPEG image if no frame is available } try { if (frameToProcess->format == AV_PIX_FMT_NV12) { m_jpegImage = avframeToJpegString(frameToProcess); // Convert frame to JPEG from NV12 } else { m_jpegImage = avframeYUVJ420PToJpegString(frameToProcess); // Convert frame to JPEG from YUVJ420P } } catch (const std::exception& e) { std::cerr << "Exception while converting AVFrame to JPEG string: " << e.what() << std::endl; av_frame_free(&frameToProcess); return m_lastJpegImage; } av_frame_free(&frameToProcess); if (m_pts < INT64_MAX) { m_pts++; } else { m_pts = 0; // Reset to zero when max is reached } // Update the width, height, and pts width = m_Width; height = m_Height; pts = m_pts; if (!m_jpegImage.empty()) { m_lastJpegImage = std::move(m_jpegImage); // Move instead of copy } // Return the most recent valid JPEG image return m_lastJpegImage; } catch (const std::exception& e) { std::cerr << "Unexpected exception in getJpegImage(): " << e.what() << std::endl; } catch (...) { std::cerr << "Unknown exception in getJpegImage()" << std::endl; } // If any exception occurs, return the last valid JPEG image return m_lastJpegImage; }