Add nvJPEG GPU-accelerated JPEG encoding with NVIDIA auto-detection

BmpToJpeg was slow (~25-45ms for 4K) due to two bottlenecks:
1. cv::imdecode for BMP parsing (unnecessary for uncompressed BMP)
2. TurboJPEG CPU encoding (~11ms for 4K)

Fix 1: Zero-copy BMP parsing — parse header directly and wrap pixel
data in cv::Mat without allocation or copy. Eliminates ~47MB of heap
allocations per 4K frame.

Fix 2: NvJpegCompressor class using nvJPEG hardware encoder on NVIDIA
GPUs (~1-2ms for 4K). Integrated into CompressJpegToString so all 5
JPEG encoding callsites benefit automatically. Reusable GPU buffer
avoids per-frame cudaMalloc/cudaFree. Silent fallback to TurboJPEG
on Intel/AMD or if nvJPEG fails.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-16 07:50:13 +10:00
parent 750ccff58b
commit 53a82da74a
2 changed files with 194 additions and 6 deletions

View File

@@ -34,16 +34,35 @@ namespace ANSCENTER
public:
TurboJpegCompressor();
~TurboJpegCompressor() noexcept;
// Delete copy constructor and assignment operator
TurboJpegCompressor(const TurboJpegCompressor&) = delete;
TurboJpegCompressor& operator=(const TurboJpegCompressor&) = delete;
// Your original logic with minimal optimizations
[[nodiscard]] std::string compress(const cv::Mat& image, int quality);
private:
void* _handle = nullptr;
unsigned char* _buffer = nullptr;
unsigned long _bufferSize = 0;
};
// GPU-accelerated JPEG encoder using nvJPEG (NVIDIA only).
// Falls back silently if init fails or on non-NVIDIA hardware.
class NvJpegCompressor {
public:
NvJpegCompressor();
~NvJpegCompressor() noexcept;
NvJpegCompressor(const NvJpegCompressor&) = delete;
NvJpegCompressor& operator=(const NvJpegCompressor&) = delete;
[[nodiscard]] std::string compress(const cv::Mat& image, int quality);
[[nodiscard]] bool isValid() const noexcept { return _valid; }
private:
void cleanup() noexcept;
bool _valid = false;
void* _nvHandle = nullptr; // nvjpegHandle_t
void* _encState = nullptr; // nvjpegEncoderState_t
void* _encParams = nullptr; // nvjpegEncoderParams_t
void* _stream = nullptr; // cudaStream_t
unsigned char* _gpuBuffer = nullptr; // reusable device memory
size_t _gpuBufferSize = 0;
};
/// <summary>
/// // ANSOPENCV class provides various image processing functionalities using OpenCV and ANS Center SDK.
/// </summary>