#include "engine.h" #include #include #include #include #include #include #include "macros.h" #include "logger.h" #include "util/Stopwatch.h" #include "Utility.h" using namespace nvinfer1; using namespace Util; void Logger::log(Severity severity, const char *msg) noexcept { switch (severity) { case Severity::kVERBOSE: spdlog::debug(msg); break; case Severity::kINFO: spdlog::info(msg); break; case Severity::kWARNING: spdlog::warn(msg); break; case Severity::kERROR: spdlog::error(msg); break; case Severity::kINTERNAL_ERROR: spdlog::critical(msg); break; default: spdlog::info("Unexpected severity level"); } } Int8EntropyCalibrator2::Int8EntropyCalibrator2(int32_t batchSize, int32_t inputW, int32_t inputH, const std::string &calibDataDirPath, const std::string &calibTableName, const std::string &inputBlobName, const std::array &subVals, const std::array &divVals, bool normalize, bool readCache) : m_batchSize(batchSize), m_inputW(inputW), m_inputH(inputH), m_imgIdx(0), m_calibTableName(calibTableName), m_inputBlobName(inputBlobName), m_subVals(subVals), m_divVals(divVals), m_normalize(normalize), m_readCache(readCache) { // Allocate GPU memory to hold the entire batch m_inputCount = 3 * inputW * inputH * batchSize; checkCudaErrorCode(cudaMalloc(&m_deviceInput, m_inputCount * sizeof(float))); // Read the name of all the files in the specified directory. if (!doesFileExist(calibDataDirPath)) { auto msg = "Error, directory at provided path does not exist: " + calibDataDirPath; spdlog::error(msg); //throw std::runtime_error(msg); } m_imgPaths = getFilesInDirectory(calibDataDirPath); if (m_imgPaths.size() < static_cast(batchSize)) { auto msg = "Error, there are fewer calibration images than the specified batch size!"; spdlog::error(msg); //throw std::runtime_error(msg); } // Randomize the calibration data auto rd = std::random_device{}; auto rng = std::default_random_engine{rd()}; std::shuffle(std::begin(m_imgPaths), std::end(m_imgPaths), rng); } int32_t Int8EntropyCalibrator2::getBatchSize() const noexcept { // Return the batch size return m_batchSize; } bool Int8EntropyCalibrator2::getBatch(void **bindings, const char **names, int32_t nbBindings) noexcept { // This method will read a batch of images into GPU memory, and place the // pointer to the GPU memory in the bindings variable. if (m_imgIdx + m_batchSize > static_cast(m_imgPaths.size())) { // There are not enough images left to satisfy an entire batch return false; } // Read the calibration images into memory for the current batch std::vector inputImgs; for (int i = m_imgIdx; i < m_imgIdx + m_batchSize; i++) { spdlog::info("Reading image {}: {}", i, m_imgPaths[i]); auto cpuImg = cv::imread(m_imgPaths[i]); if (cpuImg.empty()) { spdlog::error("Fatal error: Unable to read image at path: " + m_imgPaths[i]); return false; } cv::cuda::Stream stream; // Create a custom stream cv::cuda::GpuMat gpuImg; gpuImg.upload(cpuImg, stream); stream.waitForCompletion(); //cv::cuda::cvtColor(gpuImg, gpuImg, cv::COLOR_BGR2RGB); // TODO: Define any preprocessing code here, such as resizing auto resized = Engine::resizeKeepAspectRatioPadRightBottom(gpuImg, m_inputH, m_inputW); inputImgs.emplace_back(std::move(resized)); } // Convert the batch from NHWC to NCHW // ALso apply normalization, scaling, and mean subtraction auto mfloat = Engine::blobFromGpuMats(inputImgs, m_subVals, m_divVals, m_normalize, true); auto *dataPointer = mfloat.ptr(); // Copy the GPU buffer to member variable so that it persists checkCudaErrorCode(cudaMemcpyAsync(m_deviceInput, dataPointer, m_inputCount * sizeof(float), cudaMemcpyDeviceToDevice)); m_imgIdx += m_batchSize; if (std::string(names[0]) != m_inputBlobName) { spdlog::error("Error: Incorrect input name provided!"); return false; } bindings[0] = m_deviceInput; return true; } void const *Int8EntropyCalibrator2::readCalibrationCache(size_t &length) noexcept { spdlog::info("Searching for calibration cache: {}", m_calibTableName); m_calibCache.clear(); std::ifstream input(m_calibTableName, std::ios::binary); input >> std::noskipws; if (m_readCache && input.good()) { spdlog::info("Reading calibration cache: {}", m_calibTableName); std::copy(std::istream_iterator(input), std::istream_iterator(), std::back_inserter(m_calibCache)); } length = m_calibCache.size(); return length ? m_calibCache.data() : nullptr; } void Int8EntropyCalibrator2::writeCalibrationCache(const void *ptr, std::size_t length) noexcept { spdlog::info("Writing calibration cache: {}", m_calibTableName); spdlog::info("Calibration cache size: {} bytes", length); std::ofstream output(m_calibTableName, std::ios::binary); output.write(reinterpret_cast(ptr), length); } Int8EntropyCalibrator2::~Int8EntropyCalibrator2() { checkCudaErrorCode(cudaFree(m_deviceInput)); }; // ============================================================================ // Explicit template instantiation definitions — DLL export // // Each line here: // 1. Forces the compiler to generate every method of Engine in THIS // translation unit (engine.obj). // 2. Marks all those generated symbols __declspec(dllexport) so the linker // includes them in the DLL's export table. // // Consuming projects include engine.h, which contains the matching // extern template class ENGINE_API Engine; // declaration. That declaration suppresses re-instantiation in the consumer's // TUs and directs the linker to resolve Engine from this DLL. // // To export additional specialisations (e.g. Engine for INT8 output), // add a line here and the corresponding extern declaration in engine.h. // ============================================================================ template class __declspec(dllexport) Engine;