Add NvJpegPool (4 encoders) and JPEG passthrough in BmpToJpeg

- NvJpegPool: singleton pool of 4 NvJpegCompressor instances with lock-free slot acquisition (~160MB VRAM). Threads that can't grab a slot fall back to TurboJPEG with zero wait. - JPEG passthrough: BmpToJpeg now checks if input is already JPEG (FF D8 FF magic) and copies directly without re-encoding. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-16 08:33:17 +10:00
parent 53a82da74a
commit 6c72751a14
2 changed files with 81 additions and 9 deletions
--- a/modules/ANSCV/ANSOpenCV.cpp
+++ b/modules/ANSCV/ANSOpenCV.cpp
@@ -246,17 +246,53 @@ namespace ANSCENTER
 		return jpegStr;
 	}

-	// ── Unified entry point: nvJPEG on NVIDIA, TurboJPEG otherwise ──
+	// ── NvJpegPool: fixed pool of 4 GPU encoders, lock-free acquire ──
+
+	NvJpegPool& NvJpegPool::Instance() {
+		static NvJpegPool instance;
+		return instance;
+	}
+
+	NvJpegPool::NvJpegPool() {
+		if (!anscv_vendor_gate::IsNvidiaGpuAvailable()) return;
+
+		for (int i = 0; i < kPoolSize; ++i) {
+			_inUse[i].store(false, std::memory_order_relaxed);
+			_encoders[i] = std::make_unique<NvJpegCompressor>();
+			if (!_encoders[i]->isValid()) {
+				_encoders[i].reset();
+			}
+		}
+		// Pool is available if at least one encoder initialized
+		for (int i = 0; i < kPoolSize; ++i) {
+			if (_encoders[i]) { _available = true; break; }
+		}
+	}
+
+	std::string NvJpegPool::tryCompress(const cv::Mat& image, int quality) {
+		if (!_available) return "";
+
+		// Lock-free slot acquisition: try each slot with compare_exchange
+		for (int i = 0; i < kPoolSize; ++i) {
+			if (!_encoders[i]) continue;
+			bool expected = false;
+			if (_inUse[i].compare_exchange_strong(expected, true, std::memory_order_acquire)) {
+				std::string result = _encoders[i]->compress(image, quality);
+				_inUse[i].store(false, std::memory_order_release);
+				return result;  // may be empty on encode failure — caller falls back
+			}
+		}
+		return "";  // All slots busy — caller falls back to TurboJPEG
+	}
+
+	// ── Unified entry point: nvJPEG pool on NVIDIA, TurboJPEG otherwise ──

 	std::string CompressJpegToString(const cv::Mat& image, int quality) {
-		if (anscv_vendor_gate::IsNvidiaGpuAvailable()) {
-			static thread_local NvJpegCompressor nvCompressor;
-			if (nvCompressor.isValid()) {
-				std::string result = nvCompressor.compress(image, quality);
-				if (!result.empty()) return result;
-			}
-			// Fall through to TurboJPEG on failure
-		}
+		// Try GPU path first (returns "" if non-NVIDIA, pool full, or encode fails)
+		std::string result = NvJpegPool::Instance().tryCompress(image, quality);
+		if (!result.empty()) return result;
+
+		// CPU fallback — always available
 		static thread_local TurboJpegCompressor compressor;
 		return compressor.compress(image, quality);
 	}
@@ -6633,6 +6669,19 @@ extern "C" __declspec(dllexport) int ANSCV_BmpToJpeg(LStrHandle bmpInput, int qu
 		int bmpSize = (*bmpInput)->cnt;
 		unsigned char* raw = reinterpret_cast<unsigned char*>((*bmpInput)->str);

+		// ── Passthrough: input is already JPEG (starts with FF D8 FF) ──
+		if (bmpSize >= 3 && raw[0] == 0xFF && raw[1] == 0xD8 && raw[2] == 0xFF) {
+			MgErr error = DSSetHandleSize(jpegOutput, sizeof(int32) + bmpSize * sizeof(uChar));
+			if (error != noErr) {
+				ANS_DBG("ANSCV", "BmpToJpeg: DSSetHandleSize failed (passthrough) - err=%d", error);
+				return -4;
+			}
+			(*jpegOutput)->cnt = bmpSize;
+			memcpy((*jpegOutput)->str, raw, bmpSize);
+			ANS_DBG("ANSCV", "BmpToJpeg: PASSTHROUGH - input is already JPEG (%d bytes)", bmpSize);
+			return 1;
+		}
+
 		// ── Fast path: parse BMP header directly, zero-copy ──
 		// Minimum BMP = file header (14) + info header (40) + some pixels
 		constexpr int kMinBmpSize = sizeof(BmpFileHeader) + sizeof(BmpInfoHeader) + 1;
--- a/modules/ANSCV/ANSOpenCV.h
+++ b/modules/ANSCV/ANSOpenCV.h
@@ -5,6 +5,9 @@
 #include "ANSLicense.h"
 #include "LabVIEWHeader/extcode.h"
 #include <vector>
+#include <array>
+#include <atomic>
+#include <memory>
 #include <opencv2/opencv.hpp>

 // Forward declaration for NI Vision IMAQ Image (avoids nivision.h dependency for consumers)
@@ -63,6 +66,26 @@ namespace ANSCENTER
 		unsigned char*  _gpuBuffer = nullptr;     // reusable device memory
 		size_t          _gpuBufferSize = 0;
 	};
+
+	// Fixed-size pool of NvJpegCompressors (~40MB VRAM each).
+	// Threads that can't acquire an encoder fall back to TurboJPEG.
+	class NvJpegPool {
+	public:
+		static constexpr int kPoolSize = 4;
+		static NvJpegPool& Instance();
+		// Try to compress with nvJPEG. Returns empty string if no encoder
+		// available or on non-NVIDIA hardware — caller should fall back.
+		[[nodiscard]] std::string tryCompress(const cv::Mat& image, int quality);
+		[[nodiscard]] bool isAvailable() const noexcept { return _available; }
+	private:
+		NvJpegPool();
+		~NvJpegPool() = default;
+		NvJpegPool(const NvJpegPool&) = delete;
+		NvJpegPool& operator=(const NvJpegPool&) = delete;
+		bool                                        _available = false;
+		std::array<std::unique_ptr<NvJpegCompressor>, kPoolSize> _encoders;
+		std::array<std::atomic<bool>, kPoolSize>    _inUse;     // lock-free slot flags
+	};
    /// <summary>
 	/// // ANSOPENCV class provides various image processing functionalities using OpenCV and ANS Center SDK.
    /// </summary>