From 6c72751a14c1870f1c4689a73a04c7f1e658f901 Mon Sep 17 00:00:00 2001 From: Tuan Nghia Nguyen Date: Thu, 16 Apr 2026 08:33:17 +1000 Subject: [PATCH] Add NvJpegPool (4 encoders) and JPEG passthrough in BmpToJpeg - NvJpegPool: singleton pool of 4 NvJpegCompressor instances with lock-free slot acquisition (~160MB VRAM). Threads that can't grab a slot fall back to TurboJPEG with zero wait. - JPEG passthrough: BmpToJpeg now checks if input is already JPEG (FF D8 FF magic) and copies directly without re-encoding. Co-Authored-By: Claude Opus 4.6 (1M context) --- modules/ANSCV/ANSOpenCV.cpp | 67 ++++++++++++++++++++++++++++++++----- modules/ANSCV/ANSOpenCV.h | 23 +++++++++++++ 2 files changed, 81 insertions(+), 9 deletions(-) diff --git a/modules/ANSCV/ANSOpenCV.cpp b/modules/ANSCV/ANSOpenCV.cpp index 6cbe5f5..b7d1362 100644 --- a/modules/ANSCV/ANSOpenCV.cpp +++ b/modules/ANSCV/ANSOpenCV.cpp @@ -246,17 +246,53 @@ namespace ANSCENTER return jpegStr; } - // ── Unified entry point: nvJPEG on NVIDIA, TurboJPEG otherwise ── + // ── NvJpegPool: fixed pool of 4 GPU encoders, lock-free acquire ── + + NvJpegPool& NvJpegPool::Instance() { + static NvJpegPool instance; + return instance; + } + + NvJpegPool::NvJpegPool() { + if (!anscv_vendor_gate::IsNvidiaGpuAvailable()) return; + + for (int i = 0; i < kPoolSize; ++i) { + _inUse[i].store(false, std::memory_order_relaxed); + _encoders[i] = std::make_unique(); + if (!_encoders[i]->isValid()) { + _encoders[i].reset(); + } + } + // Pool is available if at least one encoder initialized + for (int i = 0; i < kPoolSize; ++i) { + if (_encoders[i]) { _available = true; break; } + } + } + + std::string NvJpegPool::tryCompress(const cv::Mat& image, int quality) { + if (!_available) return ""; + + // Lock-free slot acquisition: try each slot with compare_exchange + for (int i = 0; i < kPoolSize; ++i) { + if (!_encoders[i]) continue; + bool expected = false; + if (_inUse[i].compare_exchange_strong(expected, true, std::memory_order_acquire)) { + std::string result = _encoders[i]->compress(image, quality); + _inUse[i].store(false, std::memory_order_release); + return result; // may be empty on encode failure — caller falls back + } + } + return ""; // All slots busy — caller falls back to TurboJPEG + } + + // ── Unified entry point: nvJPEG pool on NVIDIA, TurboJPEG otherwise ── std::string CompressJpegToString(const cv::Mat& image, int quality) { - if (anscv_vendor_gate::IsNvidiaGpuAvailable()) { - static thread_local NvJpegCompressor nvCompressor; - if (nvCompressor.isValid()) { - std::string result = nvCompressor.compress(image, quality); - if (!result.empty()) return result; - } - // Fall through to TurboJPEG on failure - } + // Try GPU path first (returns "" if non-NVIDIA, pool full, or encode fails) + std::string result = NvJpegPool::Instance().tryCompress(image, quality); + if (!result.empty()) return result; + + // CPU fallback — always available static thread_local TurboJpegCompressor compressor; return compressor.compress(image, quality); } @@ -6633,6 +6669,19 @@ extern "C" __declspec(dllexport) int ANSCV_BmpToJpeg(LStrHandle bmpInput, int qu int bmpSize = (*bmpInput)->cnt; unsigned char* raw = reinterpret_cast((*bmpInput)->str); + // ── Passthrough: input is already JPEG (starts with FF D8 FF) ── + if (bmpSize >= 3 && raw[0] == 0xFF && raw[1] == 0xD8 && raw[2] == 0xFF) { + MgErr error = DSSetHandleSize(jpegOutput, sizeof(int32) + bmpSize * sizeof(uChar)); + if (error != noErr) { + ANS_DBG("ANSCV", "BmpToJpeg: DSSetHandleSize failed (passthrough) - err=%d", error); + return -4; + } + (*jpegOutput)->cnt = bmpSize; + memcpy((*jpegOutput)->str, raw, bmpSize); + ANS_DBG("ANSCV", "BmpToJpeg: PASSTHROUGH - input is already JPEG (%d bytes)", bmpSize); + return 1; + } + // ── Fast path: parse BMP header directly, zero-copy ── // Minimum BMP = file header (14) + info header (40) + some pixels constexpr int kMinBmpSize = sizeof(BmpFileHeader) + sizeof(BmpInfoHeader) + 1; diff --git a/modules/ANSCV/ANSOpenCV.h b/modules/ANSCV/ANSOpenCV.h index 137d32c..8b1cd53 100644 --- a/modules/ANSCV/ANSOpenCV.h +++ b/modules/ANSCV/ANSOpenCV.h @@ -5,6 +5,9 @@ #include "ANSLicense.h" #include "LabVIEWHeader/extcode.h" #include +#include +#include +#include #include // Forward declaration for NI Vision IMAQ Image (avoids nivision.h dependency for consumers) @@ -63,6 +66,26 @@ namespace ANSCENTER unsigned char* _gpuBuffer = nullptr; // reusable device memory size_t _gpuBufferSize = 0; }; + + // Fixed-size pool of NvJpegCompressors (~40MB VRAM each). + // Threads that can't acquire an encoder fall back to TurboJPEG. + class NvJpegPool { + public: + static constexpr int kPoolSize = 4; + static NvJpegPool& Instance(); + // Try to compress with nvJPEG. Returns empty string if no encoder + // available or on non-NVIDIA hardware — caller should fall back. + [[nodiscard]] std::string tryCompress(const cv::Mat& image, int quality); + [[nodiscard]] bool isAvailable() const noexcept { return _available; } + private: + NvJpegPool(); + ~NvJpegPool() = default; + NvJpegPool(const NvJpegPool&) = delete; + NvJpegPool& operator=(const NvJpegPool&) = delete; + bool _available = false; + std::array, kPoolSize> _encoders; + std::array, kPoolSize> _inUse; // lock-free slot flags + }; /// /// // ANSOPENCV class provides various image processing functionalities using OpenCV and ANS Center SDK. ///