Initial setup for CLion

2026-03-28 16:54:11 +11:00
parent 239cc02591
commit 7b4134133c
1136 changed files with 811916 additions and 0 deletions
--- a/MediaClient/media/video_decoder.h
+++ b/MediaClient/media/video_decoder.h
@@ -0,0 +1,172 @@
+#ifndef VIDEO_DECODER_H
+#define VIDEO_DECODER_H
+#include "sys_inc.h"
+#include "media_format.h"
+#include <string>
+#include <mutex>
+#include <vector>
+extern "C"
+{
+#include "libavcodec/avcodec.h"
+#include "libavutil/avutil.h"
+#include "libswscale/swscale.h"
+#include "libavformat/avformat.h"
+#include <libavutil/opt.h>
+}
+
+#define HW_DECODING_AUTO            0       // automatic select video acceleration hardware
+#define HW_DECODING_D3D11           1       // D3D11 video acceleration
+#define HW_DECODING_DXVA            2       // DXVA  video acceleration
+#define HW_DECODING_VAAPI           3       // VAAPI video acceleration
+#define HW_DECODING_OPENCL          4       // OPENCL video acceleration
+#define HW_DECODING_VIDEOTOOLBOX    5       // VideoToolBox video acceleration
+#define HW_DECODING_MEDIACODEC      6       // MediaCodec video acceleration
+#define HW_DECODING_CUDA            7       // CUDA/NVDEC — decoded NV12 stays in GPU VRAM
+#define HW_DECODING_DISABLE         -1      // disable  video acceleration
+
+// Legacy global limit (default: 4). Still works if HWDecoderPool is not configured.
+extern uint32 g_hw_decoder_max;
+
+// ---------------------------------------------------------------------------
+//  HWDecoderPool -- per-GPU hardware decoder session manager
+//
+//  Tracks active HW decoder sessions per GPU and distributes new sessions
+//  to the GPU with the fewest active decoders (least-loaded).
+//
+//  Usage:
+//    // Auto-configure from outside (e.g., ANSRTSP):
+//    HWDecoderPool::instance().configure(numGpus, maxSessionsPerGpu);
+//
+//    // Or leave unconfigured -- falls back to legacy g_hw_decoder_max behaviour.
+// ---------------------------------------------------------------------------
+class HWDecoderPool {
+public:
+    static HWDecoderPool& instance();
+
+    // Configure uniform per-GPU limits. Call once at startup before creating decoders.
+    void configure(int numGpus, int maxPerGpu);
+
+    // Configure per-GPU limits individually (different GPUs may have different capabilities).
+    void configure(const std::vector<int>& maxPerGpuList);
+
+    // Is the pool configured with per-GPU tracking?
+    bool isConfigured() const;
+
+    // Try to acquire a HW decoder slot. Returns the GPU index to use,
+    // or -1 if all GPUs are at capacity.
+    // If preferredGpu >= 0, prefer that GPU (e.g. to match inference GPU for zero-copy).
+    // Falls back to least-loaded if preferred GPU is at capacity.
+    int acquireSlot(int preferredGpu = -1);
+
+    // Release a HW decoder slot on the given GPU.
+    void releaseSlot(int gpuIndex);
+
+    // Get total max sessions across all GPUs.
+    int getTotalMax() const;
+
+    // Get number of active sessions across all GPUs.
+    int getTotalActive() const;
+
+private:
+    HWDecoderPool() = default;
+
+    std::mutex              m_mutex;
+    bool                    m_configured = false;
+    std::vector<int>        m_maxPerGpu;     // max session limit per GPU
+    std::vector<int>        m_activePerGpu;  // active session count per GPU
+};
+
+// ---------------------------------------------------------------------------
+//  SharedHWDeviceCtx -- per-GPU shared AVHWDeviceContext cache
+//
+//  NVIDIA recommends sharing CUDA contexts across decode sessions to reduce
+//  GPU memory overhead. This cache creates one AVHWDeviceContext per GPU
+//  and shares it (via av_buffer_ref) across all decoder sessions on that GPU.
+//
+//  Thread-safe: all methods lock internally.
+// ---------------------------------------------------------------------------
+class SharedHWDeviceCtx {
+public:
+    static SharedHWDeviceCtx& instance();
+
+    // Get (or create) a shared HW device context for the given GPU index and device type.
+    // Returns a new av_buffer_ref to the shared context (caller must av_buffer_unref).
+    // Returns nullptr on failure.
+    AVBufferRef* acquire(int gpuIndex, AVHWDeviceType type);
+
+    // Release all cached contexts (call at shutdown).
+    void releaseAll();
+
+private:
+    SharedHWDeviceCtx() = default;
+    ~SharedHWDeviceCtx();
+
+    struct GpuCtx {
+        AVBufferRef*    ctx = nullptr;
+        AVHWDeviceType  type = AV_HWDEVICE_TYPE_NONE;
+    };
+
+    std::mutex              m_mutex;
+    std::vector<GpuCtx>     m_cache;
+};
+
+typedef void (*VDCB)(AVFrame* frame, void* pUserdata);
+
+class CVideoDecoder
+{
+public:
+	CVideoDecoder();
+	~CVideoDecoder();
+public:
+	BOOL    init(int codec, uint8* extradata = NULL, int extradata_size = 0, int hwMode = HW_DECODING_AUTO, int preferredGpu = -1);
+	BOOL    init(enum AVCodecID codec, uint8* extradata = NULL, int extradata_size = 0, int hwMode = HW_DECODING_AUTO, int preferredGpu = -1);
+	void    uninit();
+	int     getWidth();
+	int     getHeight();
+	double  getFrameRate();
+
+	BOOL    decode(uint8* data, int len, int64_t pts = AV_NOPTS_VALUE);
+	BOOL    decode(AVPacket* pkt);
+	void    setCallback(VDCB pCallback, void* pUserdata) { m_pCallback = pCallback; m_pUserdata = pUserdata; }
+	BOOL    getHWFormat(AVCodecContext* ctx, const AVPixelFormat* pix_fmts, AVPixelFormat* dst);
+	bool	getHardwareTypeForPlatform(int hwMode, std::string& hwtype);
+	bool	findHwConfigForDeviceType(AVHWDeviceType type);
+	void    logSupportedHwTypes();
+	BOOL    isHardwareDecoderEnabled() const { return m_bHardwareDecoderEnabled; }
+	int     getHWGpuIndex() const { return m_hwGpuIndex; }
+	bool    isCudaHWAccel() const { return m_bCudaHWAccel; }
+	// Returns the CUDA HW frame (device pointers). Caller takes ownership.
+	AVFrame* takeCudaHWFrame();
+	// Clone CUDA HW frame without locking — caller MUST already hold _mutex
+	// (used by onVideoFrame callback which runs inside decode()'s lock scope).
+	AVFrame* cloneCudaHWFrame_unlocked();
+	void	Start();
+	void	Stop();
+	void    flush();
+	AVCodecContext* getAVCodeContext() {
+		return m_pContext;
+	}
+private:
+	BOOL    readFrame();
+	int     render(AVFrame* frame);
+	int     hwDecoderInit(AVCodecContext* ctx, int hwMode, int preferredGpu = -1);
+private:
+	BOOL			m_bInited;
+	BOOL			m_bRunning;
+	BOOL			m_bHardwareDecoderEnabled;  // Track if hardware decoder is enabled
+	bool			m_bCudaHWAccel;             // true when using AV_HWDEVICE_TYPE_CUDA
+	int				m_hwGpuIndex;               // GPU index assigned by HWDecoderPool (-1 = legacy)
+	AVFrame*		m_pCudaHWFrame;             // Cloned CUDA HW frame (device ptrs) for inference
+	const AVCodec* m_pCodec;
+	AVCodecContext* m_pContext;
+	AVFrame* m_pFrame;
+	AVFrame* m_pSoftFrame;
+	VDCB            m_pCallback;
+	void* m_pUserdata;
+	AVPixelFormat   m_hwPixFmt;
+	AVBufferRef* m_pHWDeviceCtx;
+	std::recursive_mutex	_mutex;
+};
+#endif
+
+