ANSCORE/modules/ANSODEngine/engine.cpp

#include "engine.h"
#include <algorithm>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <iterator>
#include <random>
#include "macros.h"
#include "logger.h"
#include "util/Stopwatch.h"
#include "Utility.h"

using namespace nvinfer1;
using namespace Util;

void Logger::log(Severity severity, const char *msg) noexcept {
    switch (severity) {
        case Severity::kVERBOSE:
            spdlog::debug(msg);
            break;
        case Severity::kINFO:
            spdlog::info(msg);
            break;
        case Severity::kWARNING:
            spdlog::warn(msg);
            break;
        case Severity::kERROR:
            spdlog::error(msg);
            break;
        case Severity::kINTERNAL_ERROR:
            spdlog::critical(msg);
            break;
        default:
            spdlog::info("Unexpected severity level");
    }
}

Int8EntropyCalibrator2::Int8EntropyCalibrator2(int32_t batchSize, int32_t inputW, int32_t inputH, const std::string &calibDataDirPath,
                                               const std::string &calibTableName, const std::string &inputBlobName,
                                               const std::array<float, 3> &subVals, const std::array<float, 3> &divVals, bool normalize,
                                               bool readCache)
    : m_batchSize(batchSize), m_inputW(inputW), m_inputH(inputH), m_imgIdx(0), m_calibTableName(calibTableName),
      m_inputBlobName(inputBlobName), m_subVals(subVals), m_divVals(divVals), m_normalize(normalize), m_readCache(readCache) {

    // Allocate GPU memory to hold the entire batch
    m_inputCount = 3 * inputW * inputH * batchSize;
    checkCudaErrorCode(cudaMalloc(&m_deviceInput, m_inputCount * sizeof(float)));

    // Read the name of all the files in the specified directory.
    if (!doesFileExist(calibDataDirPath)) {
        auto msg = "Error, directory at provided path does not exist: " + calibDataDirPath;
        spdlog::error(msg);
        //throw std::runtime_error(msg);
    }

    m_imgPaths = getFilesInDirectory(calibDataDirPath);
    if (m_imgPaths.size() < static_cast<size_t>(batchSize)) {
        auto msg = "Error, there are fewer calibration images than the specified batch size!";
        spdlog::error(msg);
        //throw std::runtime_error(msg);
    }

    // Randomize the calibration data
    auto rd = std::random_device{};
    auto rng = std::default_random_engine{rd()};
    std::shuffle(std::begin(m_imgPaths), std::end(m_imgPaths), rng);
}

int32_t Int8EntropyCalibrator2::getBatchSize() const noexcept {
    // Return the batch size
    return m_batchSize;
}

bool Int8EntropyCalibrator2::getBatch(void **bindings, const char **names, int32_t nbBindings) noexcept {
    // This method will read a batch of images into GPU memory, and place the
    // pointer to the GPU memory in the bindings variable.

    if (m_imgIdx + m_batchSize > static_cast<int>(m_imgPaths.size())) {
        // There are not enough images left to satisfy an entire batch
        return false;
    }

    // Read the calibration images into memory for the current batch
    std::vector<cv::cuda::GpuMat> inputImgs;
    for (int i = m_imgIdx; i < m_imgIdx + m_batchSize; i++) {
        spdlog::info("Reading image {}: {}", i, m_imgPaths[i]);
        auto cpuImg = cv::imread(m_imgPaths[i]);
        if (cpuImg.empty()) {
            spdlog::error("Fatal error: Unable to read image at path: " + m_imgPaths[i]);
            return false;
        }
        cv::cuda::Stream stream;  // Create a custom stream
        cv::cuda::GpuMat gpuImg;
        gpuImg.upload(cpuImg, stream);
        stream.waitForCompletion();
        //cv::cuda::cvtColor(gpuImg, gpuImg, cv::COLOR_BGR2RGB);

        // TODO: Define any preprocessing code here, such as resizing
        auto resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(gpuImg, m_inputH, m_inputW);

        inputImgs.emplace_back(std::move(resized));
    }

    // Convert the batch from NHWC to NCHW
    // ALso apply normalization, scaling, and mean subtraction
    auto mfloat = Engine<float>::blobFromGpuMats(inputImgs, m_subVals, m_divVals, m_normalize, true);
    auto *dataPointer = mfloat.ptr<void>();

    // Copy the GPU buffer to member variable so that it persists
    checkCudaErrorCode(cudaMemcpyAsync(m_deviceInput, dataPointer, m_inputCount * sizeof(float), cudaMemcpyDeviceToDevice));

    m_imgIdx += m_batchSize;
    if (std::string(names[0]) != m_inputBlobName) {
        spdlog::error("Error: Incorrect input name provided!");
        return false;
    }
    bindings[0] = m_deviceInput;
    return true;
}

void const *Int8EntropyCalibrator2::readCalibrationCache(size_t &length) noexcept {
    spdlog::info("Searching for calibration cache: {}", m_calibTableName);
    m_calibCache.clear();
    std::ifstream input(m_calibTableName, std::ios::binary);
    input >> std::noskipws;
    if (m_readCache && input.good()) {
        spdlog::info("Reading calibration cache: {}", m_calibTableName);
        std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(m_calibCache));
    }
    length = m_calibCache.size();
    return length ? m_calibCache.data() : nullptr;
}

void Int8EntropyCalibrator2::writeCalibrationCache(const void *ptr, std::size_t length) noexcept {
    spdlog::info("Writing calibration cache: {}", m_calibTableName);
    spdlog::info("Calibration cache size: {} bytes", length);
    std::ofstream output(m_calibTableName, std::ios::binary);
    output.write(reinterpret_cast<const char *>(ptr), length);
}

Int8EntropyCalibrator2::~Int8EntropyCalibrator2() { checkCudaErrorCode(cudaFree(m_deviceInput)); };

// ============================================================================
// Explicit template instantiation definitions — DLL export
//
// Each line here:
//   1. Forces the compiler to generate every method of Engine<T> in THIS
//      translation unit (engine.obj).
//   2. Marks all those generated symbols __declspec(dllexport) so the linker
//      includes them in the DLL's export table.
//
// Consuming projects include engine.h, which contains the matching
//   extern template class ENGINE_API Engine<float>;
// declaration. That declaration suppresses re-instantiation in the consumer's
// TUs and directs the linker to resolve Engine<float> from this DLL.
//
// To export additional specialisations (e.g. Engine<uint8_t> for INT8 output),
// add a line here and the corresponding extern declaration in engine.h.
// ============================================================================
template class __declspec(dllexport) Engine<float>;