diff --git a/MediaClient/media/video_player.cpp b/MediaClient/media/video_player.cpp
index 2560c47..02d8839 100644
--- a/MediaClient/media/video_player.cpp
+++ b/MediaClient/media/video_player.cpp
@@ -1360,7 +1360,10 @@ cv::Mat CVideoPlayer::avframeYUV420PToCvMat(const AVFrame* frame) {
 }
 
 cv::Mat CVideoPlayer::avframeToCVMat(const AVFrame* pFrame) {
-	std::lock_guard<std::recursive_mutex> lock(_mutex);
+	// No _mutex here: caller (getImage) releases the mutex before invoking this
+	// so the expensive NV12/YUV420P→BGR conversion does not block onVideoFrame.
+	// NV12/YUV420P paths touch only the caller-owned AVFrame clone and benign
+	// member reads. avframeAnyToCvmat() takes its own lock for swsCtx.
 	try {
 		// 1. Validate input frame
 		if (!pFrame || !pFrame->data[0] || pFrame->width <= 10 || pFrame->height <= 10) {
@@ -2233,87 +2236,98 @@ void CVideoPlayer::onAudioFrame(AVFrame* frame)
 
 cv::Mat CVideoPlayer::getImage(int& width, int& height, int64_t& pts) {
 	try {
-		// Lock the mutex using RAII (ensures unlock even in exceptions)
-		std::lock_guard<std::recursive_mutex> lock(_mutex);  // Protect against concurrent access
+		AVFrame* frameToProcess = nullptr;
+		uint64_t currentSeq = 0;
 
-		if (!m_bPlaying) {
-			// Return the last valid frame if playback is stopped
-			width = m_currentImage.cols;
-			height = m_currentImage.rows;
-			pts = m_pts;
-			return m_currentImage;  // Shallow copy (reference counted, safe under mutex)
-		}
+		// --- Phase 1: short locked section — examine state, pull latest frame ---
+		{
+			std::lock_guard<std::recursive_mutex> lock(_mutex);
 
-		// While waiting for keyframe or during settle period after restart,
-		// return the last good cached image to avoid showing corrupted frames
-		if (m_bWaitingForKeyframe || m_cleanFrameCount < SETTLE_FRAME_COUNT) {
-			width = m_currentImage.cols;
-			height = m_currentImage.rows;
-			pts = m_pts;
-			return m_currentImage;  // Last good frame (may be empty on first-ever start)
-		}
-
-		// Fast path: check if a new frame has arrived using sequence counter
-		// This avoids expensive av_frame_clone + NV12→BGR conversion when frame hasn't changed
-		uint64_t currentSeq = g_frameQueue.getSequence();
-		if (currentSeq == m_lastFrameSeq && !m_currentImage.empty()) {
-			width = m_currentImage.cols;
-			height = m_currentImage.rows;
-			pts = m_pts;
-			return m_currentImage;  // Same frame, skip all conversion
-		}
-
-		// Get latest frame from queue
-		if (g_frameQueue.isEmpty()) {
-			width = m_currentImage.cols;
-			height = m_currentImage.rows;
-			pts = m_pts;
-			std::cerr << "No frame available in getImage()" << std::endl;
-			return cv::Mat();  // Return an empty cv::Mat() if no frame is available
-		}
-		AVFrame* frameToProcess = g_frameQueue.getLatestFrame();
-		if (!frameToProcess) {
-			// If no frame available, return last valid image
-			width = m_currentImage.cols;
-			height = m_currentImage.rows;
-			pts = m_pts;
-			return cv::Mat();  // Return an empty cv::Mat() if no frame is available
-		}
-
-		try {
-			// Convert AVFrame to cv::Mat
-			m_currentImage = avframeToCVMat(frameToProcess);
-
-			// Update timestamp and sequence if conversion is successful
-			if (!m_currentImage.empty()) {
-				m_pts++;
-				m_lastFrameSeq = currentSeq;  // Mark this sequence as processed
+			if (!m_bPlaying) {
+				width = m_currentImage.cols;
+				height = m_currentImage.rows;
+				pts = m_pts;
+				return m_currentImage;  // Shallow copy (reference counted)
 			}
+
+			// While waiting for keyframe or during settle period after restart,
+			// return the last good cached image to avoid showing corrupted frames
+			if (m_bWaitingForKeyframe || m_cleanFrameCount < SETTLE_FRAME_COUNT) {
+				width = m_currentImage.cols;
+				height = m_currentImage.rows;
+				pts = m_pts;
+				return m_currentImage;
+			}
+
+			// Fast path: same frame as last call — skip clone + BGR conversion
+			currentSeq = g_frameQueue.getSequence();
+			if (currentSeq == m_lastFrameSeq && !m_currentImage.empty()) {
+				width = m_currentImage.cols;
+				height = m_currentImage.rows;
+				pts = m_pts;
+				return m_currentImage;
+			}
+
+			if (g_frameQueue.isEmpty()) {
+				width = m_currentImage.cols;
+				height = m_currentImage.rows;
+				pts = m_pts;
+				std::cerr << "No frame available in getImage()" << std::endl;
+				return cv::Mat();
+			}
+
+			// getLatestFrame() clones the AVFrame — we own it from here
+			frameToProcess = g_frameQueue.getLatestFrame();
+			if (!frameToProcess) {
+				width = m_currentImage.cols;
+				height = m_currentImage.rows;
+				pts = m_pts;
+				return cv::Mat();
+			}
+		}
+		// --- _mutex released here ---
+		// At 4K NV12, cvtColorTwoPlane takes ~100–300 ms on CPU; during that
+		// window the decoder callback (onVideoFrame) is free to push the next
+		// frame and the CUDA HW capture path can run in parallel.
+
+		cv::Mat converted;
+		try {
+			converted = avframeToCVMat(frameToProcess);
 		}
 		catch (const std::exception& e) {
 			std::cerr << "Exception while converting AVFrame to cv::Mat: " << e.what() << std::endl;
 		}
 
-		// Preserve raw YUV/NV12 frame for GPU fast-path inference
-		// (NV12 from HW decode, YUV420P/YUVJ420P from SW decode)
-		if (frameToProcess &&
-			(frameToProcess->format == AV_PIX_FMT_NV12 ||
-			 frameToProcess->format == AV_PIX_FMT_YUV420P ||
-			 frameToProcess->format == AV_PIX_FMT_YUVJ420P)) {
-			if (m_currentNV12Frame) av_frame_free(&m_currentNV12Frame);
-			m_currentNV12Frame = av_frame_clone(frameToProcess);
+		// --- Phase 2: short locked section — publish new frame state ---
+		cv::Mat result;  // Snapshot taken under the lock, returned after release.
+		{
+			std::lock_guard<std::recursive_mutex> lock(_mutex);
+
+			if (!converted.empty()) {
+				m_currentImage = converted;
+				m_pts++;
+				m_lastFrameSeq = currentSeq;
+			}
+
+			// Preserve raw YUV/NV12 frame for GPU fast-path inference
+			// (NV12 from HW decode, YUV420P/YUVJ420P from SW decode)
+			if (frameToProcess &&
+				(frameToProcess->format == AV_PIX_FMT_NV12 ||
+				 frameToProcess->format == AV_PIX_FMT_YUV420P ||
+				 frameToProcess->format == AV_PIX_FMT_YUVJ420P)) {
+				if (m_currentNV12Frame) av_frame_free(&m_currentNV12Frame);
+				m_currentNV12Frame = av_frame_clone(frameToProcess);
+			}
+
+			width = m_currentImage.cols;
+			height = m_currentImage.rows;
+			pts = m_pts;
+			result = m_currentImage;  // Shallow copy under lock — refcount keeps buffer alive
 		}
 
-		// Free the cloned frame to avoid memory leaks
 		av_frame_free(&frameToProcess);
 
-		// Update frame dimensions and PTS
-		width = m_currentImage.cols;
-		height = m_currentImage.rows;
-		pts = m_pts;
-
-		// Return the processed image (shallow copy — caller gets reference-counted Mat)
-		return m_currentImage;
+		return result;
 	}
 	catch (const std::exception& e) {
 		std::cerr << "Unexpected exception in getImage(): " << e.what() << std::endl;