ANSODEngine/engine.cpp

#include "engine.h"
#include <algorithm>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <iterator>
#include <random>
#include "macros.h"
#include "logger.h"
#include "util/Stopwatch.h"
#include "Utility.h"

using namespace nvinfer1;
using namespace Util;

void Logger::log(Severity severity, const char *msg) noexcept {
    switch (severity) {
        case Severity::kVERBOSE:
            spdlog::debug(msg);
            break;
        case Severity::kINFO:
            spdlog::info(msg);
            break;
        case Severity::kWARNING:
            spdlog::warn(msg);
            break;
        case Severity::kERROR:
            spdlog::error(msg);
            break;
        case Severity::kINTERNAL_ERROR:
            spdlog::critical(msg);
            break;
        default:
            spdlog::info("Unexpected severity level");
    }
}

Int8EntropyCalibrator2::Int8EntropyCalibrator2(int32_t batchSize, int32_t inputW, int32_t inputH, const std::string &calibDataDirPath,
                                               const std::string &calibTableName, const std::string &inputBlobName,
                                               const std::array<float, 3> &subVals, const std::array<float, 3> &divVals, bool normalize,
                                               bool readCache)
    : m_batchSize(batchSize), m_inputW(inputW), m_inputH(inputH), m_imgIdx(0), m_calibTableName(calibTableName),
      m_inputBlobName(inputBlobName), m_subVals(subVals), m_divVals(divVals), m_normalize(normalize), m_readCache(readCache) {

    // Allocate GPU memory to hold the entire batch
    m_inputCount = 3 * inputW * inputH * batchSize;
    checkCudaErrorCode(cudaMalloc(&m_deviceInput, m_inputCount * sizeof(float)));

    // Read the name of all the files in the specified directory.
    if (!doesFileExist(calibDataDirPath)) {
        auto msg = "Error, directory at provided path does not exist: " + calibDataDirPath;
        spdlog::error(msg);
        //throw std::runtime_error(msg);
    }

    m_imgPaths = getFilesInDirectory(calibDataDirPath);
    if (m_imgPaths.size() < static_cast<size_t>(batchSize)) {
        auto msg = "Error, there are fewer calibration images than the specified batch size!";
        spdlog::error(msg);
        //throw std::runtime_error(msg);
    }

    // Randomize the calibration data
    auto rd = std::random_device{};
    auto rng = std::default_random_engine{rd()};
    std::shuffle(std::begin(m_imgPaths), std::end(m_imgPaths), rng);
}

int32_t Int8EntropyCalibrator2::getBatchSize() const noexcept {
    // Return the batch size
    return m_batchSize;
}

bool Int8EntropyCalibrator2::getBatch(void **bindings, const char **names, int32_t nbBindings) noexcept {
    // This method will read a batch of images into GPU memory, and place the
    // pointer to the GPU memory in the bindings variable.

    if (m_imgIdx + m_batchSize > static_cast<int>(m_imgPaths.size())) {
        // There are not enough images left to satisfy an entire batch
        return false;
    }

    // Read the calibration images into memory for the current batch
    std::vector<cv::cuda::GpuMat> inputImgs;
    for (int i = m_imgIdx; i < m_imgIdx + m_batchSize; i++) {
        spdlog::info("Reading image {}: {}", i, m_imgPaths[i]);
        auto cpuImg = cv::imread(m_imgPaths[i]);
        if (cpuImg.empty()) {
            spdlog::error("Fatal error: Unable to read image at path: " + m_imgPaths[i]);
            return false;
        }
        cv::cuda::Stream stream;  // Create a custom stream
        cv::cuda::GpuMat gpuImg;
        gpuImg.upload(cpuImg, stream);
        stream.waitForCompletion();
        //cv::cuda::cvtColor(gpuImg, gpuImg, cv::COLOR_BGR2RGB);

        // TODO: Define any preprocessing code here, such as resizing
        auto resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(gpuImg, m_inputH, m_inputW);

        inputImgs.emplace_back(std::move(resized));
    }

    // Convert the batch from NHWC to NCHW
    // ALso apply normalization, scaling, and mean subtraction
    auto mfloat = Engine<float>::blobFromGpuMats(inputImgs, m_subVals, m_divVals, m_normalize, true);
    auto *dataPointer = mfloat.ptr<void>();

    // Copy the GPU buffer to member variable so that it persists
    checkCudaErrorCode(cudaMemcpyAsync(m_deviceInput, dataPointer, m_inputCount * sizeof(float), cudaMemcpyDeviceToDevice));

    m_imgIdx += m_batchSize;
    if (std::string(names[0]) != m_inputBlobName) {
        spdlog::error("Error: Incorrect input name provided!");
        return false;
    }
    bindings[0] = m_deviceInput;
    return true;
}

void const *Int8EntropyCalibrator2::readCalibrationCache(size_t &length) noexcept {
    spdlog::info("Searching for calibration cache: {}", m_calibTableName);
    m_calibCache.clear();
    std::ifstream input(m_calibTableName, std::ios::binary);
    input >> std::noskipws;
    if (m_readCache && input.good()) {
        spdlog::info("Reading calibration cache: {}", m_calibTableName);
        std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(m_calibCache));
    }
    length = m_calibCache.size();
    return length ? m_calibCache.data() : nullptr;
}

void Int8EntropyCalibrator2::writeCalibrationCache(const void *ptr, std::size_t length) noexcept {
    spdlog::info("Writing calibration cache: {}", m_calibTableName);
    spdlog::info("Calibration cache size: {} bytes", length);
    std::ofstream output(m_calibTableName, std::ios::binary);
    output.write(reinterpret_cast<const char *>(ptr), length);
}

Int8EntropyCalibrator2::~Int8EntropyCalibrator2() { checkCudaErrorCode(cudaFree(m_deviceInput)); };

// ============================================================================
// Explicit template instantiation definitions — DLL export
//
// Each line here:
//   1. Forces the compiler to generate every method of Engine<T> in THIS
//      translation unit (engine.obj).
//   2. Marks all those generated symbols __declspec(dllexport) so the linker
//      includes them in the DLL's export table.
//
// Consuming projects include engine.h, which contains the matching
//   extern template class ENGINE_API Engine<float>;
// declaration. That declaration suppresses re-instantiation in the consumer's
// TUs and directs the linker to resolve Engine<float> from this DLL.
//
// To export additional specialisations (e.g. Engine<uint8_t> for INT8 output),
// add a line here and the corresponding extern declaration in engine.h.
// ============================================================================
template class __declspec(dllexport) Engine<float>;
Initial setup for CLion 2026-03-28 16:54:11 +11:00			`#include "engine.h"`
			`#include <algorithm>`
			`#include <filesystem>`
			`#include <fstream>`
			`#include <iostream>`
			`#include <iterator>`
			`#include <random>`
			`#include "macros.h"`
			`#include "logger.h"`
			`#include "util/Stopwatch.h"`
			`#include "Utility.h"`

			`using namespace nvinfer1;`
			`using namespace Util;`

			`void Logger::log(Severity severity, const char *msg) noexcept {`
			`switch (severity) {`
			`case Severity::kVERBOSE:`
			`spdlog::debug(msg);`
			`break;`
			`case Severity::kINFO:`
			`spdlog::info(msg);`
			`break;`
			`case Severity::kWARNING:`
			`spdlog::warn(msg);`
			`break;`
			`case Severity::kERROR:`
			`spdlog::error(msg);`
			`break;`
			`case Severity::kINTERNAL_ERROR:`
			`spdlog::critical(msg);`
			`break;`
			`default:`
			`spdlog::info("Unexpected severity level");`
			`}`
			`}`

			`Int8EntropyCalibrator2::Int8EntropyCalibrator2(int32_t batchSize, int32_t inputW, int32_t inputH, const std::string &calibDataDirPath,`
			`const std::string &calibTableName, const std::string &inputBlobName,`
			`const std::array<float, 3> &subVals, const std::array<float, 3> &divVals, bool normalize,`
			`bool readCache)`
			`: m_batchSize(batchSize), m_inputW(inputW), m_inputH(inputH), m_imgIdx(0), m_calibTableName(calibTableName),`
			`m_inputBlobName(inputBlobName), m_subVals(subVals), m_divVals(divVals), m_normalize(normalize), m_readCache(readCache) {`

			`// Allocate GPU memory to hold the entire batch`
			`m_inputCount = 3 * inputW * inputH * batchSize;`
			`checkCudaErrorCode(cudaMalloc(&m_deviceInput, m_inputCount * sizeof(float)));`

			`// Read the name of all the files in the specified directory.`
			`if (!doesFileExist(calibDataDirPath)) {`
			`auto msg = "Error, directory at provided path does not exist: " + calibDataDirPath;`
			`spdlog::error(msg);`
			`//throw std::runtime_error(msg);`
			`}`

			`m_imgPaths = getFilesInDirectory(calibDataDirPath);`
			`if (m_imgPaths.size() < static_cast<size_t>(batchSize)) {`
			`auto msg = "Error, there are fewer calibration images than the specified batch size!";`
			`spdlog::error(msg);`
			`//throw std::runtime_error(msg);`
			`}`

			`// Randomize the calibration data`
			`auto rd = std::random_device{};`
			`auto rng = std::default_random_engine{rd()};`
			`std::shuffle(std::begin(m_imgPaths), std::end(m_imgPaths), rng);`
			`}`

			`int32_t Int8EntropyCalibrator2::getBatchSize() const noexcept {`
			`// Return the batch size`
			`return m_batchSize;`
			`}`

			`bool Int8EntropyCalibrator2::getBatch(void bindings, const char names, int32_t nbBindings) noexcept {`
			`// This method will read a batch of images into GPU memory, and place the`
			`// pointer to the GPU memory in the bindings variable.`

			`if (m_imgIdx + m_batchSize > static_cast<int>(m_imgPaths.size())) {`
			`// There are not enough images left to satisfy an entire batch`
			`return false;`
			`}`

			`// Read the calibration images into memory for the current batch`
			`std::vector<cv::cuda::GpuMat> inputImgs;`
			`for (int i = m_imgIdx; i < m_imgIdx + m_batchSize; i++) {`
			`spdlog::info("Reading image {}: {}", i, m_imgPaths[i]);`
			`auto cpuImg = cv::imread(m_imgPaths[i]);`
			`if (cpuImg.empty()) {`
			`spdlog::error("Fatal error: Unable to read image at path: " + m_imgPaths[i]);`
			`return false;`
			`}`
			`cv::cuda::Stream stream; // Create a custom stream`
			`cv::cuda::GpuMat gpuImg;`
			`gpuImg.upload(cpuImg, stream);`
			`stream.waitForCompletion();`
			`//cv::cuda::cvtColor(gpuImg, gpuImg, cv::COLOR_BGR2RGB);`

			`// TODO: Define any preprocessing code here, such as resizing`
			`auto resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(gpuImg, m_inputH, m_inputW);`

			`inputImgs.emplace_back(std::move(resized));`
			`}`

			`// Convert the batch from NHWC to NCHW`
			`// ALso apply normalization, scaling, and mean subtraction`
			`auto mfloat = Engine<float>::blobFromGpuMats(inputImgs, m_subVals, m_divVals, m_normalize, true);`
			`auto *dataPointer = mfloat.ptr<void>();`

			`// Copy the GPU buffer to member variable so that it persists`
			`checkCudaErrorCode(cudaMemcpyAsync(m_deviceInput, dataPointer, m_inputCount * sizeof(float), cudaMemcpyDeviceToDevice));`

			`m_imgIdx += m_batchSize;`
			`if (std::string(names[0]) != m_inputBlobName) {`
			`spdlog::error("Error: Incorrect input name provided!");`
			`return false;`
			`}`
			`bindings[0] = m_deviceInput;`
			`return true;`
			`}`

			`void const *Int8EntropyCalibrator2::readCalibrationCache(size_t &length) noexcept {`
			`spdlog::info("Searching for calibration cache: {}", m_calibTableName);`
			`m_calibCache.clear();`
			`std::ifstream input(m_calibTableName, std::ios::binary);`
			`input >> std::noskipws;`
			`if (m_readCache && input.good()) {`
			`spdlog::info("Reading calibration cache: {}", m_calibTableName);`
			`std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(m_calibCache));`
			`}`
			`length = m_calibCache.size();`
			`return length ? m_calibCache.data() : nullptr;`
			`}`

			`void Int8EntropyCalibrator2::writeCalibrationCache(const void *ptr, std::size_t length) noexcept {`
			`spdlog::info("Writing calibration cache: {}", m_calibTableName);`
			`spdlog::info("Calibration cache size: {} bytes", length);`
			`std::ofstream output(m_calibTableName, std::ios::binary);`
			`output.write(reinterpret_cast<const char *>(ptr), length);`
			`}`

			`Int8EntropyCalibrator2::~Int8EntropyCalibrator2() { checkCudaErrorCode(cudaFree(m_deviceInput)); };`

			`// ============================================================================`
			`// Explicit template instantiation definitions — DLL export`
			`//`
			`// Each line here:`
			`// 1. Forces the compiler to generate every method of Engine<T> in THIS`
			`// translation unit (engine.obj).`
			`// 2. Marks all those generated symbols __declspec(dllexport) so the linker`
			`// includes them in the DLL's export table.`
			`//`
			`// Consuming projects include engine.h, which contains the matching`
			`// extern template class ENGINE_API Engine<float>;`
			`// declaration. That declaration suppresses re-instantiation in the consumer's`
			`// TUs and directs the linker to resolve Engine<float> from this DLL.`
			`//`
			`// To export additional specialisations (e.g. Engine<uint8_t> for INT8 output),`
			`// add a line here and the corresponding extern declaration in engine.h.`
			`// ============================================================================`
			`template class __declspec(dllexport) Engine<float>;`