Add nvJPEG GPU-accelerated JPEG encoding with NVIDIA auto-detection
BmpToJpeg was slow (~25-45ms for 4K) due to two bottlenecks: 1. cv::imdecode for BMP parsing (unnecessary for uncompressed BMP) 2. TurboJPEG CPU encoding (~11ms for 4K) Fix 1: Zero-copy BMP parsing — parse header directly and wrap pixel data in cv::Mat without allocation or copy. Eliminates ~47MB of heap allocations per 4K frame. Fix 2: NvJpegCompressor class using nvJPEG hardware encoder on NVIDIA GPUs (~1-2ms for 4K). Integrated into CompressJpegToString so all 5 JPEG encoding callsites benefit automatically. Reusable GPU buffer avoids per-frame cudaMalloc/cudaFree. Silent fallback to TurboJPEG on Intel/AMD or if nvJPEG fails. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -34,16 +34,35 @@ namespace ANSCENTER
|
||||
public:
|
||||
TurboJpegCompressor();
|
||||
~TurboJpegCompressor() noexcept;
|
||||
// Delete copy constructor and assignment operator
|
||||
TurboJpegCompressor(const TurboJpegCompressor&) = delete;
|
||||
TurboJpegCompressor& operator=(const TurboJpegCompressor&) = delete;
|
||||
// Your original logic with minimal optimizations
|
||||
[[nodiscard]] std::string compress(const cv::Mat& image, int quality);
|
||||
private:
|
||||
void* _handle = nullptr;
|
||||
unsigned char* _buffer = nullptr;
|
||||
unsigned long _bufferSize = 0;
|
||||
};
|
||||
|
||||
// GPU-accelerated JPEG encoder using nvJPEG (NVIDIA only).
|
||||
// Falls back silently if init fails or on non-NVIDIA hardware.
|
||||
class NvJpegCompressor {
|
||||
public:
|
||||
NvJpegCompressor();
|
||||
~NvJpegCompressor() noexcept;
|
||||
NvJpegCompressor(const NvJpegCompressor&) = delete;
|
||||
NvJpegCompressor& operator=(const NvJpegCompressor&) = delete;
|
||||
[[nodiscard]] std::string compress(const cv::Mat& image, int quality);
|
||||
[[nodiscard]] bool isValid() const noexcept { return _valid; }
|
||||
private:
|
||||
void cleanup() noexcept;
|
||||
bool _valid = false;
|
||||
void* _nvHandle = nullptr; // nvjpegHandle_t
|
||||
void* _encState = nullptr; // nvjpegEncoderState_t
|
||||
void* _encParams = nullptr; // nvjpegEncoderParams_t
|
||||
void* _stream = nullptr; // cudaStream_t
|
||||
unsigned char* _gpuBuffer = nullptr; // reusable device memory
|
||||
size_t _gpuBufferSize = 0;
|
||||
};
|
||||
/// <summary>
|
||||
/// // ANSOPENCV class provides various image processing functionalities using OpenCV and ANS Center SDK.
|
||||
/// </summary>
|
||||
|
||||
Reference in New Issue
Block a user