Files
ANSCORE/modules/ANSODEngine/engine.cpp

162 lines
6.5 KiB
C++

#include "engine.h"
#include <algorithm>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <iterator>
#include <random>
#include "macros.h"
#include "logger.h"
#include "util/Stopwatch.h"
#include "Utility.h"
using namespace nvinfer1;
using namespace Util;
void Logger::log(Severity severity, const char *msg) noexcept {
switch (severity) {
case Severity::kVERBOSE:
spdlog::debug(msg);
break;
case Severity::kINFO:
spdlog::info(msg);
break;
case Severity::kWARNING:
spdlog::warn(msg);
break;
case Severity::kERROR:
spdlog::error(msg);
break;
case Severity::kINTERNAL_ERROR:
spdlog::critical(msg);
break;
default:
spdlog::info("Unexpected severity level");
}
}
Int8EntropyCalibrator2::Int8EntropyCalibrator2(int32_t batchSize, int32_t inputW, int32_t inputH, const std::string &calibDataDirPath,
const std::string &calibTableName, const std::string &inputBlobName,
const std::array<float, 3> &subVals, const std::array<float, 3> &divVals, bool normalize,
bool readCache)
: m_batchSize(batchSize), m_inputW(inputW), m_inputH(inputH), m_imgIdx(0), m_calibTableName(calibTableName),
m_inputBlobName(inputBlobName), m_subVals(subVals), m_divVals(divVals), m_normalize(normalize), m_readCache(readCache) {
// Allocate GPU memory to hold the entire batch
m_inputCount = 3 * inputW * inputH * batchSize;
checkCudaErrorCode(cudaMalloc(&m_deviceInput, m_inputCount * sizeof(float)));
// Read the name of all the files in the specified directory.
if (!doesFileExist(calibDataDirPath)) {
auto msg = "Error, directory at provided path does not exist: " + calibDataDirPath;
spdlog::error(msg);
//throw std::runtime_error(msg);
}
m_imgPaths = getFilesInDirectory(calibDataDirPath);
if (m_imgPaths.size() < static_cast<size_t>(batchSize)) {
auto msg = "Error, there are fewer calibration images than the specified batch size!";
spdlog::error(msg);
//throw std::runtime_error(msg);
}
// Randomize the calibration data
auto rd = std::random_device{};
auto rng = std::default_random_engine{rd()};
std::shuffle(std::begin(m_imgPaths), std::end(m_imgPaths), rng);
}
int32_t Int8EntropyCalibrator2::getBatchSize() const noexcept {
// Return the batch size
return m_batchSize;
}
bool Int8EntropyCalibrator2::getBatch(void **bindings, const char **names, int32_t nbBindings) noexcept {
// This method will read a batch of images into GPU memory, and place the
// pointer to the GPU memory in the bindings variable.
if (m_imgIdx + m_batchSize > static_cast<int>(m_imgPaths.size())) {
// There are not enough images left to satisfy an entire batch
return false;
}
// Read the calibration images into memory for the current batch
std::vector<cv::cuda::GpuMat> inputImgs;
for (int i = m_imgIdx; i < m_imgIdx + m_batchSize; i++) {
spdlog::info("Reading image {}: {}", i, m_imgPaths[i]);
auto cpuImg = cv::imread(m_imgPaths[i]);
if (cpuImg.empty()) {
spdlog::error("Fatal error: Unable to read image at path: " + m_imgPaths[i]);
return false;
}
cv::cuda::Stream stream; // Create a custom stream
cv::cuda::GpuMat gpuImg;
gpuImg.upload(cpuImg, stream);
stream.waitForCompletion();
//cv::cuda::cvtColor(gpuImg, gpuImg, cv::COLOR_BGR2RGB);
// TODO: Define any preprocessing code here, such as resizing
auto resized = Engine<float>::resizeKeepAspectRatioPadRightBottom(gpuImg, m_inputH, m_inputW);
inputImgs.emplace_back(std::move(resized));
}
// Convert the batch from NHWC to NCHW
// ALso apply normalization, scaling, and mean subtraction
auto mfloat = Engine<float>::blobFromGpuMats(inputImgs, m_subVals, m_divVals, m_normalize, true);
auto *dataPointer = mfloat.ptr<void>();
// Copy the GPU buffer to member variable so that it persists
checkCudaErrorCode(cudaMemcpyAsync(m_deviceInput, dataPointer, m_inputCount * sizeof(float), cudaMemcpyDeviceToDevice));
m_imgIdx += m_batchSize;
if (std::string(names[0]) != m_inputBlobName) {
spdlog::error("Error: Incorrect input name provided!");
return false;
}
bindings[0] = m_deviceInput;
return true;
}
void const *Int8EntropyCalibrator2::readCalibrationCache(size_t &length) noexcept {
spdlog::info("Searching for calibration cache: {}", m_calibTableName);
m_calibCache.clear();
std::ifstream input(m_calibTableName, std::ios::binary);
input >> std::noskipws;
if (m_readCache && input.good()) {
spdlog::info("Reading calibration cache: {}", m_calibTableName);
std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(m_calibCache));
}
length = m_calibCache.size();
return length ? m_calibCache.data() : nullptr;
}
void Int8EntropyCalibrator2::writeCalibrationCache(const void *ptr, std::size_t length) noexcept {
spdlog::info("Writing calibration cache: {}", m_calibTableName);
spdlog::info("Calibration cache size: {} bytes", length);
std::ofstream output(m_calibTableName, std::ios::binary);
output.write(reinterpret_cast<const char *>(ptr), length);
}
Int8EntropyCalibrator2::~Int8EntropyCalibrator2() { checkCudaErrorCode(cudaFree(m_deviceInput)); };
// ============================================================================
// Explicit template instantiation definitions — DLL export
//
// Each line here:
// 1. Forces the compiler to generate every method of Engine<T> in THIS
// translation unit (engine.obj).
// 2. Marks all those generated symbols __declspec(dllexport) so the linker
// includes them in the DLL's export table.
//
// Consuming projects include engine.h, which contains the matching
// extern template class ENGINE_API Engine<float>;
// declaration. That declaration suppresses re-instantiation in the consumer's
// TUs and directs the linker to resolve Engine<float> from this DLL.
//
// To export additional specialisations (e.g. Engine<uint8_t> for INT8 output),
// add a line here and the corresponding extern declaration in engine.h.
// ============================================================================
template class __declspec(dllexport) Engine<float>;