Add NvJpegPool (4 encoders) and JPEG passthrough in BmpToJpeg
- NvJpegPool: singleton pool of 4 NvJpegCompressor instances with lock-free slot acquisition (~160MB VRAM). Threads that can't grab a slot fall back to TurboJPEG with zero wait. - JPEG passthrough: BmpToJpeg now checks if input is already JPEG (FF D8 FF magic) and copies directly without re-encoding. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -246,17 +246,53 @@ namespace ANSCENTER
|
||||
return jpegStr;
|
||||
}
|
||||
|
||||
// ── Unified entry point: nvJPEG on NVIDIA, TurboJPEG otherwise ──
|
||||
// ── NvJpegPool: fixed pool of 4 GPU encoders, lock-free acquire ──
|
||||
|
||||
NvJpegPool& NvJpegPool::Instance() {
|
||||
static NvJpegPool instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
NvJpegPool::NvJpegPool() {
|
||||
if (!anscv_vendor_gate::IsNvidiaGpuAvailable()) return;
|
||||
|
||||
for (int i = 0; i < kPoolSize; ++i) {
|
||||
_inUse[i].store(false, std::memory_order_relaxed);
|
||||
_encoders[i] = std::make_unique<NvJpegCompressor>();
|
||||
if (!_encoders[i]->isValid()) {
|
||||
_encoders[i].reset();
|
||||
}
|
||||
}
|
||||
// Pool is available if at least one encoder initialized
|
||||
for (int i = 0; i < kPoolSize; ++i) {
|
||||
if (_encoders[i]) { _available = true; break; }
|
||||
}
|
||||
}
|
||||
|
||||
std::string NvJpegPool::tryCompress(const cv::Mat& image, int quality) {
|
||||
if (!_available) return "";
|
||||
|
||||
// Lock-free slot acquisition: try each slot with compare_exchange
|
||||
for (int i = 0; i < kPoolSize; ++i) {
|
||||
if (!_encoders[i]) continue;
|
||||
bool expected = false;
|
||||
if (_inUse[i].compare_exchange_strong(expected, true, std::memory_order_acquire)) {
|
||||
std::string result = _encoders[i]->compress(image, quality);
|
||||
_inUse[i].store(false, std::memory_order_release);
|
||||
return result; // may be empty on encode failure — caller falls back
|
||||
}
|
||||
}
|
||||
return ""; // All slots busy — caller falls back to TurboJPEG
|
||||
}
|
||||
|
||||
// ── Unified entry point: nvJPEG pool on NVIDIA, TurboJPEG otherwise ──
|
||||
|
||||
std::string CompressJpegToString(const cv::Mat& image, int quality) {
|
||||
if (anscv_vendor_gate::IsNvidiaGpuAvailable()) {
|
||||
static thread_local NvJpegCompressor nvCompressor;
|
||||
if (nvCompressor.isValid()) {
|
||||
std::string result = nvCompressor.compress(image, quality);
|
||||
if (!result.empty()) return result;
|
||||
}
|
||||
// Fall through to TurboJPEG on failure
|
||||
}
|
||||
// Try GPU path first (returns "" if non-NVIDIA, pool full, or encode fails)
|
||||
std::string result = NvJpegPool::Instance().tryCompress(image, quality);
|
||||
if (!result.empty()) return result;
|
||||
|
||||
// CPU fallback — always available
|
||||
static thread_local TurboJpegCompressor compressor;
|
||||
return compressor.compress(image, quality);
|
||||
}
|
||||
@@ -6633,6 +6669,19 @@ extern "C" __declspec(dllexport) int ANSCV_BmpToJpeg(LStrHandle bmpInput, int qu
|
||||
int bmpSize = (*bmpInput)->cnt;
|
||||
unsigned char* raw = reinterpret_cast<unsigned char*>((*bmpInput)->str);
|
||||
|
||||
// ── Passthrough: input is already JPEG (starts with FF D8 FF) ──
|
||||
if (bmpSize >= 3 && raw[0] == 0xFF && raw[1] == 0xD8 && raw[2] == 0xFF) {
|
||||
MgErr error = DSSetHandleSize(jpegOutput, sizeof(int32) + bmpSize * sizeof(uChar));
|
||||
if (error != noErr) {
|
||||
ANS_DBG("ANSCV", "BmpToJpeg: DSSetHandleSize failed (passthrough) - err=%d", error);
|
||||
return -4;
|
||||
}
|
||||
(*jpegOutput)->cnt = bmpSize;
|
||||
memcpy((*jpegOutput)->str, raw, bmpSize);
|
||||
ANS_DBG("ANSCV", "BmpToJpeg: PASSTHROUGH - input is already JPEG (%d bytes)", bmpSize);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// ── Fast path: parse BMP header directly, zero-copy ──
|
||||
// Minimum BMP = file header (14) + info header (40) + some pixels
|
||||
constexpr int kMinBmpSize = sizeof(BmpFileHeader) + sizeof(BmpInfoHeader) + 1;
|
||||
|
||||
@@ -5,6 +5,9 @@
|
||||
#include "ANSLicense.h"
|
||||
#include "LabVIEWHeader/extcode.h"
|
||||
#include <vector>
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <opencv2/opencv.hpp>
|
||||
|
||||
// Forward declaration for NI Vision IMAQ Image (avoids nivision.h dependency for consumers)
|
||||
@@ -63,6 +66,26 @@ namespace ANSCENTER
|
||||
unsigned char* _gpuBuffer = nullptr; // reusable device memory
|
||||
size_t _gpuBufferSize = 0;
|
||||
};
|
||||
|
||||
// Fixed-size pool of NvJpegCompressors (~40MB VRAM each).
|
||||
// Threads that can't acquire an encoder fall back to TurboJPEG.
|
||||
class NvJpegPool {
|
||||
public:
|
||||
static constexpr int kPoolSize = 4;
|
||||
static NvJpegPool& Instance();
|
||||
// Try to compress with nvJPEG. Returns empty string if no encoder
|
||||
// available or on non-NVIDIA hardware — caller should fall back.
|
||||
[[nodiscard]] std::string tryCompress(const cv::Mat& image, int quality);
|
||||
[[nodiscard]] bool isAvailable() const noexcept { return _available; }
|
||||
private:
|
||||
NvJpegPool();
|
||||
~NvJpegPool() = default;
|
||||
NvJpegPool(const NvJpegPool&) = delete;
|
||||
NvJpegPool& operator=(const NvJpegPool&) = delete;
|
||||
bool _available = false;
|
||||
std::array<std::unique_ptr<NvJpegCompressor>, kPoolSize> _encoders;
|
||||
std::array<std::atomic<bool>, kPoolSize> _inUse; // lock-free slot flags
|
||||
};
|
||||
/// <summary>
|
||||
/// // ANSOPENCV class provides various image processing functionalities using OpenCV and ANS Center SDK.
|
||||
/// </summary>
|
||||
|
||||
Reference in New Issue
Block a user