Add NvJpegPool (4 encoders) and JPEG passthrough in BmpToJpeg

- NvJpegPool: singleton pool of 4 NvJpegCompressor instances with
  lock-free slot acquisition (~160MB VRAM). Threads that can't grab
  a slot fall back to TurboJPEG with zero wait.
- JPEG passthrough: BmpToJpeg now checks if input is already JPEG
  (FF D8 FF magic) and copies directly without re-encoding.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-16 08:33:17 +10:00
parent 53a82da74a
commit 6c72751a14
2 changed files with 81 additions and 9 deletions

View File

@@ -246,17 +246,53 @@ namespace ANSCENTER
return jpegStr;
}
// ── Unified entry point: nvJPEG on NVIDIA, TurboJPEG otherwise ──
// ── NvJpegPool: fixed pool of 4 GPU encoders, lock-free acquire ──
NvJpegPool& NvJpegPool::Instance() {
static NvJpegPool instance;
return instance;
}
NvJpegPool::NvJpegPool() {
if (!anscv_vendor_gate::IsNvidiaGpuAvailable()) return;
for (int i = 0; i < kPoolSize; ++i) {
_inUse[i].store(false, std::memory_order_relaxed);
_encoders[i] = std::make_unique<NvJpegCompressor>();
if (!_encoders[i]->isValid()) {
_encoders[i].reset();
}
}
// Pool is available if at least one encoder initialized
for (int i = 0; i < kPoolSize; ++i) {
if (_encoders[i]) { _available = true; break; }
}
}
std::string NvJpegPool::tryCompress(const cv::Mat& image, int quality) {
if (!_available) return "";
// Lock-free slot acquisition: try each slot with compare_exchange
for (int i = 0; i < kPoolSize; ++i) {
if (!_encoders[i]) continue;
bool expected = false;
if (_inUse[i].compare_exchange_strong(expected, true, std::memory_order_acquire)) {
std::string result = _encoders[i]->compress(image, quality);
_inUse[i].store(false, std::memory_order_release);
return result; // may be empty on encode failure — caller falls back
}
}
return ""; // All slots busy — caller falls back to TurboJPEG
}
// ── Unified entry point: nvJPEG pool on NVIDIA, TurboJPEG otherwise ──
std::string CompressJpegToString(const cv::Mat& image, int quality) {
if (anscv_vendor_gate::IsNvidiaGpuAvailable()) {
static thread_local NvJpegCompressor nvCompressor;
if (nvCompressor.isValid()) {
std::string result = nvCompressor.compress(image, quality);
if (!result.empty()) return result;
}
// Fall through to TurboJPEG on failure
}
// Try GPU path first (returns "" if non-NVIDIA, pool full, or encode fails)
std::string result = NvJpegPool::Instance().tryCompress(image, quality);
if (!result.empty()) return result;
// CPU fallback — always available
static thread_local TurboJpegCompressor compressor;
return compressor.compress(image, quality);
}
@@ -6633,6 +6669,19 @@ extern "C" __declspec(dllexport) int ANSCV_BmpToJpeg(LStrHandle bmpInput, int qu
int bmpSize = (*bmpInput)->cnt;
unsigned char* raw = reinterpret_cast<unsigned char*>((*bmpInput)->str);
// ── Passthrough: input is already JPEG (starts with FF D8 FF) ──
if (bmpSize >= 3 && raw[0] == 0xFF && raw[1] == 0xD8 && raw[2] == 0xFF) {
MgErr error = DSSetHandleSize(jpegOutput, sizeof(int32) + bmpSize * sizeof(uChar));
if (error != noErr) {
ANS_DBG("ANSCV", "BmpToJpeg: DSSetHandleSize failed (passthrough) - err=%d", error);
return -4;
}
(*jpegOutput)->cnt = bmpSize;
memcpy((*jpegOutput)->str, raw, bmpSize);
ANS_DBG("ANSCV", "BmpToJpeg: PASSTHROUGH - input is already JPEG (%d bytes)", bmpSize);
return 1;
}
// ── Fast path: parse BMP header directly, zero-copy ──
// Minimum BMP = file header (14) + info header (40) + some pixels
constexpr int kMinBmpSize = sizeof(BmpFileHeader) + sizeof(BmpInfoHeader) + 1;

View File

@@ -5,6 +5,9 @@
#include "ANSLicense.h"
#include "LabVIEWHeader/extcode.h"
#include <vector>
#include <array>
#include <atomic>
#include <memory>
#include <opencv2/opencv.hpp>
// Forward declaration for NI Vision IMAQ Image (avoids nivision.h dependency for consumers)
@@ -63,6 +66,26 @@ namespace ANSCENTER
unsigned char* _gpuBuffer = nullptr; // reusable device memory
size_t _gpuBufferSize = 0;
};
// Fixed-size pool of NvJpegCompressors (~40MB VRAM each).
// Threads that can't acquire an encoder fall back to TurboJPEG.
class NvJpegPool {
public:
static constexpr int kPoolSize = 4;
static NvJpegPool& Instance();
// Try to compress with nvJPEG. Returns empty string if no encoder
// available or on non-NVIDIA hardware — caller should fall back.
[[nodiscard]] std::string tryCompress(const cv::Mat& image, int quality);
[[nodiscard]] bool isAvailable() const noexcept { return _available; }
private:
NvJpegPool();
~NvJpegPool() = default;
NvJpegPool(const NvJpegPool&) = delete;
NvJpegPool& operator=(const NvJpegPool&) = delete;
bool _available = false;
std::array<std::unique_ptr<NvJpegCompressor>, kPoolSize> _encoders;
std::array<std::atomic<bool>, kPoolSize> _inUse; // lock-free slot flags
};
/// <summary>
/// // ANSOPENCV class provides various image processing functionalities using OpenCV and ANS Center SDK.
/// </summary>