Initial setup for CLion

2026-03-28 16:54:11 +11:00
parent 239cc02591
commit 7b4134133c
1136 changed files with 811916 additions and 0 deletions
--- a/ANSOCR/ANSRTOCR/RTOCRClassifier.cpp
+++ b/ANSOCR/ANSRTOCR/RTOCRClassifier.cpp
@@ -0,0 +1,143 @@
+#include "RTOCRClassifier.h"
+
+#include <opencv2/imgproc.hpp>
+#include <opencv2/cudaimgproc.hpp>
+#include <opencv2/cudawarping.hpp>
+#include <opencv2/cudaarithm.hpp>
+#include <iostream>
+#include <cmath>
+
+namespace ANSCENTER {
+namespace rtocr {
+
+bool RTOCRClassifier::Initialize(const std::string& onnxPath, int gpuId,
+                                  const std::string& engineCacheDir) {
+    try {
+        ANSCENTER::Options options;
+        options.deviceIndex = gpuId;
+        options.precision = ANSCENTER::Precision::FP16;
+        options.maxBatchSize = 1;
+        options.optBatchSize = 1;
+
+        // Fixed input size for classifier
+        options.minInputHeight = kClsImageH;
+        options.optInputHeight = kClsImageH;
+        options.maxInputHeight = kClsImageH;
+        options.minInputWidth  = kClsImageW;
+        options.optInputWidth  = kClsImageW;
+        options.maxInputWidth  = kClsImageW;
+
+        if (!engineCacheDir.empty()) {
+            options.engineFileDir = engineCacheDir;
+        }
+        else {
+            auto pos = onnxPath.find_last_of("/\\");
+            options.engineFileDir = (pos != std::string::npos) ? onnxPath.substr(0, pos) : ".";
+        }
+
+        m_poolKey = { onnxPath,
+            static_cast<int>(options.precision),
+            options.maxBatchSize };
+        m_engine = EnginePoolManager<float>::instance().acquire(
+            m_poolKey, options, onnxPath,
+            kClsSubVals, kClsDivVals, true, -1);
+        m_usingSharedPool = (m_engine != nullptr);
+
+        if (!m_engine) {
+            std::cerr << "[RTOCRClassifier] Failed to build/load TRT engine: " << onnxPath << std::endl;
+            return false;
+        }
+
+        std::cout << "[RTOCRClassifier] Initialized TRT engine from: " << onnxPath << std::endl;
+        return true;
+    }
+    catch (const std::exception& e) {
+        std::cerr << "[RTOCRClassifier] Initialize failed: " << e.what() << std::endl;
+        m_engine.reset();
+        return false;
+    }
+}
+
+std::vector<std::pair<int, float>> RTOCRClassifier::Classify(
+    const std::vector<cv::Mat>& images, float clsThresh) {
+
+    std::lock_guard<std::mutex> lock(_mutex);
+    std::vector<std::pair<int, float>> results;
+
+    if (!m_engine || images.empty()) return results;
+    results.reserve(images.size());
+
+    for (size_t i = 0; i < images.size(); i++) {
+        try {
+            if (images[i].empty()) {
+                results.push_back({ 0, 0.0f });
+                continue;
+            }
+
+            // Preprocess: direct resize to 80x160 (PP-LCNet_x1_0_textline_ori)
+            // No aspect ratio preservation — matches PaddleOCR official ResizeImage
+            cv::Mat resized;
+            cv::resize(images[i], resized, cv::Size(kClsImageW, kClsImageH));
+
+            // Upload to GPU (keep BGR order - PaddleOCR official does NOT convert BGR→RGB)
+            cv::cuda::GpuMat gpuImg;
+            gpuImg.upload(resized);
+
+            // Run inference
+            std::vector<std::vector<cv::cuda::GpuMat>> inputs = { { gpuImg } };
+            std::vector<std::vector<std::vector<float>>> featureVectors;
+
+            if (!m_engine->runInference(inputs, featureVectors)) {
+                results.push_back({ 0, 0.0f });
+                continue;
+            }
+
+            if (featureVectors.empty() || featureVectors[0].empty() ||
+                featureVectors[0][0].empty()) {
+                results.push_back({ 0, 0.0f });
+                continue;
+            }
+
+            // Find argmax and use raw output value as score
+            // PaddleOCR v5 models include softmax, so output values are probabilities
+            // Matches PaddleOCR official: score = preds[i, argmax_idx]
+            const std::vector<float>& output = featureVectors[0][0];
+            int numClasses = static_cast<int>(output.size());
+
+            int bestIdx = 0;
+            float bestScore = output[0];
+            for (int c = 1; c < numClasses; c++) {
+                if (output[c] > bestScore) {
+                    bestScore = output[c];
+                    bestIdx = c;
+                }
+            }
+
+            results.push_back({ bestIdx, bestScore });
+        }
+        catch (const std::exception& e) {
+            std::cerr << "[RTOCRClassifier] Classify failed for image " << i
+                      << ": " << e.what() << std::endl;
+            results.push_back({ 0, 0.0f });
+        }
+    }
+
+    return results;
+}
+
+RTOCRClassifier::~RTOCRClassifier() {
+    try {
+        if (m_usingSharedPool) {
+            EnginePoolManager<float>::instance().release(m_poolKey);
+            m_engine.reset();
+            m_usingSharedPool = false;
+        }
+        else if (m_engine) {
+            m_engine.reset();
+        }
+    }
+    catch (...) {}
+}
+
+} // namespace rtocr
+} // namespace ANSCENTER