diff --git a/modules/ANSCV/ANSOpenCV.cpp b/modules/ANSCV/ANSOpenCV.cpp index b7d1362..55b4b3f 100644 --- a/modules/ANSCV/ANSOpenCV.cpp +++ b/modules/ANSCV/ANSOpenCV.cpp @@ -246,7 +246,25 @@ namespace ANSCENTER return jpegStr; } - // ── NvJpegPool: fixed pool of 4 GPU encoders, lock-free acquire ── + // ── NvJpegPool: VRAM-scaled pool of GPU encoders, lock-free acquire ── + + int NvJpegPool::detectPoolSize() { + // Query VRAM via CUDA and scale: 1 encoder per 2 GB, min 1 + int deviceCount = 0; + if (cudaGetDeviceCount(&deviceCount) != cudaSuccess || deviceCount <= 0) + return 0; + + cudaDeviceProp prop{}; + if (cudaGetDeviceProperties(&prop, 0) != cudaSuccess) + return 0; + + size_t vramGB = prop.totalGlobalMem / (1024ULL * 1024ULL * 1024ULL); + int pool = static_cast(vramGB / 2); + if (pool < 1) pool = 1; + + ANS_DBG("ANSCV", "NvJpegPool: GPU=%s, VRAM=%zuGB, poolSize=%d", prop.name, vramGB, pool); + return pool; + } NvJpegPool& NvJpegPool::Instance() { static NvJpegPool instance; @@ -256,7 +274,13 @@ namespace ANSCENTER NvJpegPool::NvJpegPool() { if (!anscv_vendor_gate::IsNvidiaGpuAvailable()) return; - for (int i = 0; i < kPoolSize; ++i) { + _poolSize = detectPoolSize(); + if (_poolSize <= 0) return; + + _encoders.resize(_poolSize); + _inUse = std::make_unique[]>(_poolSize); + + for (int i = 0; i < _poolSize; ++i) { _inUse[i].store(false, std::memory_order_relaxed); _encoders[i] = std::make_unique(); if (!_encoders[i]->isValid()) { @@ -264,16 +288,18 @@ namespace ANSCENTER } } // Pool is available if at least one encoder initialized - for (int i = 0; i < kPoolSize; ++i) { + for (int i = 0; i < _poolSize; ++i) { if (_encoders[i]) { _available = true; break; } } + + ANS_DBG("ANSCV", "NvJpegPool: initialized %d encoder(s), available=%d", _poolSize, _available ? 1 : 0); } std::string NvJpegPool::tryCompress(const cv::Mat& image, int quality) { if (!_available) return ""; // Lock-free slot acquisition: try each slot with compare_exchange - for (int i = 0; i < kPoolSize; ++i) { + for (int i = 0; i < _poolSize; ++i) { if (!_encoders[i]) continue; bool expected = false; if (_inUse[i].compare_exchange_strong(expected, true, std::memory_order_acquire)) { diff --git a/modules/ANSCV/ANSOpenCV.h b/modules/ANSCV/ANSOpenCV.h index 8b1cd53..ab41f51 100644 --- a/modules/ANSCV/ANSOpenCV.h +++ b/modules/ANSCV/ANSOpenCV.h @@ -69,22 +69,25 @@ namespace ANSCENTER // Fixed-size pool of NvJpegCompressors (~40MB VRAM each). // Threads that can't acquire an encoder fall back to TurboJPEG. + // Fixed pool of NvJpegCompressors sized by GPU VRAM. + // Formula: poolSize = VRAM_GB / 2 (min 1, e.g. 2GB→1, 4GB→2, 8GB→4, 10GB→5). + // Threads that can't acquire an encoder fall back to TurboJPEG. class NvJpegPool { public: - static constexpr int kPoolSize = 4; static NvJpegPool& Instance(); - // Try to compress with nvJPEG. Returns empty string if no encoder - // available or on non-NVIDIA hardware — caller should fall back. [[nodiscard]] std::string tryCompress(const cv::Mat& image, int quality); [[nodiscard]] bool isAvailable() const noexcept { return _available; } + [[nodiscard]] int poolSize() const noexcept { return _poolSize; } private: NvJpegPool(); ~NvJpegPool() = default; NvJpegPool(const NvJpegPool&) = delete; NvJpegPool& operator=(const NvJpegPool&) = delete; - bool _available = false; - std::array, kPoolSize> _encoders; - std::array, kPoolSize> _inUse; // lock-free slot flags + static int detectPoolSize(); + bool _available = false; + int _poolSize = 0; + std::vector> _encoders; + std::unique_ptr[]> _inUse; // can't use vector — atomic is non-copyable }; /// /// // ANSOPENCV class provides various image processing functionalities using OpenCV and ANS Center SDK.