Files
ANSCORE/modules/ANSOCR/ANSRTOCR/RTOCRDetector.h

45 lines
1.5 KiB
C++

#pragma once
#include "RTOCRTypes.h"
#include "engine.h"
#include "engine/EnginePoolManager.h"
#include <memory>
#include <mutex>
namespace ANSCENTER {
namespace rtocr {
class RTOCRDetector {
public:
RTOCRDetector() = default;
~RTOCRDetector();
RTOCRDetector(const RTOCRDetector&) = delete;
RTOCRDetector& operator=(const RTOCRDetector&) = delete;
bool Initialize(const std::string& onnxPath, int gpuId = 0,
const std::string& engineCacheDir = "",
int maxSideLen = kDetMaxSideLen);
std::vector<TextBox> Detect(const cv::Mat& image,
int maxSideLen = kDetMaxSideLen,
float dbThresh = kDetDbThresh,
float boxThresh = kDetBoxThresh,
float unclipRatio = kDetUnclipRatio,
bool useDilation = false);
private:
// Postprocessing helpers (matches ONNX/PaddleOCR official flow exactly)
std::array<cv::Point2f, 4> GetMiniBoxes(const cv::RotatedRect& rect);
float BoxScoreFast(const cv::Mat& probMap, const std::array<cv::Point2f, 4>& box);
std::vector<cv::Point2f> UnclipPolygon(const std::array<cv::Point2f, 4>& box, float unclipRatio);
std::shared_ptr<Engine<float>> m_engine = nullptr;
EnginePoolManager<float>::PoolKey m_poolKey;
bool m_usingSharedPool = false;
int m_engineMaxSideLen = kDetMaxSideLen; // Actual TRT engine max spatial dim
std::mutex _mutex;
};
} // namespace rtocr
} // namespace ANSCENTER