#include "RTOCRClassifier.h" #include #include #include #include #include #include namespace ANSCENTER { namespace rtocr { bool RTOCRClassifier::Initialize(const std::string& onnxPath, int gpuId, const std::string& engineCacheDir) { try { ANSCENTER::Options options; options.deviceIndex = gpuId; options.precision = ANSCENTER::Precision::FP16; options.maxBatchSize = 1; options.optBatchSize = 1; // Fixed input size for classifier options.minInputHeight = kClsImageH; options.optInputHeight = kClsImageH; options.maxInputHeight = kClsImageH; options.minInputWidth = kClsImageW; options.optInputWidth = kClsImageW; options.maxInputWidth = kClsImageW; if (!engineCacheDir.empty()) { options.engineFileDir = engineCacheDir; } else { auto pos = onnxPath.find_last_of("/\\"); options.engineFileDir = (pos != std::string::npos) ? onnxPath.substr(0, pos) : "."; } m_poolKey = { onnxPath, static_cast(options.precision), options.maxBatchSize }; m_engine = EnginePoolManager::instance().acquire( m_poolKey, options, onnxPath, kClsSubVals, kClsDivVals, true, -1); m_usingSharedPool = (m_engine != nullptr); if (!m_engine) { std::cerr << "[RTOCRClassifier] Failed to build/load TRT engine: " << onnxPath << std::endl; return false; } std::cout << "[RTOCRClassifier] Initialized TRT engine from: " << onnxPath << std::endl; return true; } catch (const std::exception& e) { std::cerr << "[RTOCRClassifier] Initialize failed: " << e.what() << std::endl; m_engine.reset(); return false; } } std::vector> RTOCRClassifier::Classify( const std::vector& images, float clsThresh) { std::lock_guard lock(_mutex); std::vector> results; if (!m_engine || images.empty()) return results; results.reserve(images.size()); for (size_t i = 0; i < images.size(); i++) { try { if (images[i].empty()) { results.push_back({ 0, 0.0f }); continue; } // Preprocess: direct resize to 80x160 (PP-LCNet_x1_0_textline_ori) // No aspect ratio preservation — matches PaddleOCR official ResizeImage cv::Mat resized; cv::resize(images[i], resized, cv::Size(kClsImageW, kClsImageH)); // Upload to GPU (keep BGR order - PaddleOCR official does NOT convert BGR→RGB) cv::cuda::GpuMat gpuImg; gpuImg.upload(resized); // Run inference std::vector> inputs = { { gpuImg } }; std::vector>> featureVectors; if (!m_engine->runInference(inputs, featureVectors)) { results.push_back({ 0, 0.0f }); continue; } if (featureVectors.empty() || featureVectors[0].empty() || featureVectors[0][0].empty()) { results.push_back({ 0, 0.0f }); continue; } // Find argmax and use raw output value as score // PaddleOCR v5 models include softmax, so output values are probabilities // Matches PaddleOCR official: score = preds[i, argmax_idx] const std::vector& output = featureVectors[0][0]; int numClasses = static_cast(output.size()); int bestIdx = 0; float bestScore = output[0]; for (int c = 1; c < numClasses; c++) { if (output[c] > bestScore) { bestScore = output[c]; bestIdx = c; } } results.push_back({ bestIdx, bestScore }); } catch (const std::exception& e) { std::cerr << "[RTOCRClassifier] Classify failed for image " << i << ": " << e.what() << std::endl; results.push_back({ 0, 0.0f }); } } return results; } RTOCRClassifier::~RTOCRClassifier() { try { if (m_usingSharedPool) { EnginePoolManager::instance().release(m_poolKey); m_engine.reset(); m_usingSharedPool = false; } else if (m_engine) { m_engine.reset(); } } catch (...) {} } } // namespace rtocr } // namespace ANSCENTER