Initial setup for CLion
This commit is contained in:
143
ANSOCR/ANSRTOCR/RTOCRClassifier.cpp
Normal file
143
ANSOCR/ANSRTOCR/RTOCRClassifier.cpp
Normal file
@@ -0,0 +1,143 @@
|
||||
#include "RTOCRClassifier.h"
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/cudaimgproc.hpp>
|
||||
#include <opencv2/cudawarping.hpp>
|
||||
#include <opencv2/cudaarithm.hpp>
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace rtocr {
|
||||
|
||||
bool RTOCRClassifier::Initialize(const std::string& onnxPath, int gpuId,
|
||||
const std::string& engineCacheDir) {
|
||||
try {
|
||||
ANSCENTER::Options options;
|
||||
options.deviceIndex = gpuId;
|
||||
options.precision = ANSCENTER::Precision::FP16;
|
||||
options.maxBatchSize = 1;
|
||||
options.optBatchSize = 1;
|
||||
|
||||
// Fixed input size for classifier
|
||||
options.minInputHeight = kClsImageH;
|
||||
options.optInputHeight = kClsImageH;
|
||||
options.maxInputHeight = kClsImageH;
|
||||
options.minInputWidth = kClsImageW;
|
||||
options.optInputWidth = kClsImageW;
|
||||
options.maxInputWidth = kClsImageW;
|
||||
|
||||
if (!engineCacheDir.empty()) {
|
||||
options.engineFileDir = engineCacheDir;
|
||||
}
|
||||
else {
|
||||
auto pos = onnxPath.find_last_of("/\\");
|
||||
options.engineFileDir = (pos != std::string::npos) ? onnxPath.substr(0, pos) : ".";
|
||||
}
|
||||
|
||||
m_poolKey = { onnxPath,
|
||||
static_cast<int>(options.precision),
|
||||
options.maxBatchSize };
|
||||
m_engine = EnginePoolManager<float>::instance().acquire(
|
||||
m_poolKey, options, onnxPath,
|
||||
kClsSubVals, kClsDivVals, true, -1);
|
||||
m_usingSharedPool = (m_engine != nullptr);
|
||||
|
||||
if (!m_engine) {
|
||||
std::cerr << "[RTOCRClassifier] Failed to build/load TRT engine: " << onnxPath << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::cout << "[RTOCRClassifier] Initialized TRT engine from: " << onnxPath << std::endl;
|
||||
return true;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "[RTOCRClassifier] Initialize failed: " << e.what() << std::endl;
|
||||
m_engine.reset();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::pair<int, float>> RTOCRClassifier::Classify(
|
||||
const std::vector<cv::Mat>& images, float clsThresh) {
|
||||
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
std::vector<std::pair<int, float>> results;
|
||||
|
||||
if (!m_engine || images.empty()) return results;
|
||||
results.reserve(images.size());
|
||||
|
||||
for (size_t i = 0; i < images.size(); i++) {
|
||||
try {
|
||||
if (images[i].empty()) {
|
||||
results.push_back({ 0, 0.0f });
|
||||
continue;
|
||||
}
|
||||
|
||||
// Preprocess: direct resize to 80x160 (PP-LCNet_x1_0_textline_ori)
|
||||
// No aspect ratio preservation — matches PaddleOCR official ResizeImage
|
||||
cv::Mat resized;
|
||||
cv::resize(images[i], resized, cv::Size(kClsImageW, kClsImageH));
|
||||
|
||||
// Upload to GPU (keep BGR order - PaddleOCR official does NOT convert BGR→RGB)
|
||||
cv::cuda::GpuMat gpuImg;
|
||||
gpuImg.upload(resized);
|
||||
|
||||
// Run inference
|
||||
std::vector<std::vector<cv::cuda::GpuMat>> inputs = { { gpuImg } };
|
||||
std::vector<std::vector<std::vector<float>>> featureVectors;
|
||||
|
||||
if (!m_engine->runInference(inputs, featureVectors)) {
|
||||
results.push_back({ 0, 0.0f });
|
||||
continue;
|
||||
}
|
||||
|
||||
if (featureVectors.empty() || featureVectors[0].empty() ||
|
||||
featureVectors[0][0].empty()) {
|
||||
results.push_back({ 0, 0.0f });
|
||||
continue;
|
||||
}
|
||||
|
||||
// Find argmax and use raw output value as score
|
||||
// PaddleOCR v5 models include softmax, so output values are probabilities
|
||||
// Matches PaddleOCR official: score = preds[i, argmax_idx]
|
||||
const std::vector<float>& output = featureVectors[0][0];
|
||||
int numClasses = static_cast<int>(output.size());
|
||||
|
||||
int bestIdx = 0;
|
||||
float bestScore = output[0];
|
||||
for (int c = 1; c < numClasses; c++) {
|
||||
if (output[c] > bestScore) {
|
||||
bestScore = output[c];
|
||||
bestIdx = c;
|
||||
}
|
||||
}
|
||||
|
||||
results.push_back({ bestIdx, bestScore });
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "[RTOCRClassifier] Classify failed for image " << i
|
||||
<< ": " << e.what() << std::endl;
|
||||
results.push_back({ 0, 0.0f });
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
RTOCRClassifier::~RTOCRClassifier() {
|
||||
try {
|
||||
if (m_usingSharedPool) {
|
||||
EnginePoolManager<float>::instance().release(m_poolKey);
|
||||
m_engine.reset();
|
||||
m_usingSharedPool = false;
|
||||
}
|
||||
else if (m_engine) {
|
||||
m_engine.reset();
|
||||
}
|
||||
}
|
||||
catch (...) {}
|
||||
}
|
||||
|
||||
} // namespace rtocr
|
||||
} // namespace ANSCENTER
|
||||
Reference in New Issue
Block a user