Refactor project structure
This commit is contained in:
130
modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp
Normal file
130
modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp
Normal file
@@ -0,0 +1,130 @@
|
||||
#include "PaddleOCRV5Engine.h"
|
||||
#include "EPLoader.h"
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace onnxocr {
|
||||
|
||||
bool PaddleOCRV5Engine::Initialize(const std::string& detModelPath,
|
||||
const std::string& clsModelPath,
|
||||
const std::string& recModelPath,
|
||||
const std::string& dictPath) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
|
||||
try {
|
||||
// Initialize detector (also triggers EPLoader init in BasicOrtHandler)
|
||||
detector_ = std::make_unique<ONNXOCRDetector>(detModelPath);
|
||||
std::cout << "[PaddleOCRV5Engine] Detector initialized: " << detModelPath << std::endl;
|
||||
|
||||
// Ensure this DLL's copy of Ort::Global<void>::api_ is initialized.
|
||||
// BasicOrtHandler sets it in ONNXEngine.dll, but each DLL has its own
|
||||
// inline-static copy. Without this, inference calls from ANSOCR.dll crash.
|
||||
if (Ort::Global<void>::api_ == nullptr) {
|
||||
Ort::InitApi(static_cast<const OrtApi*>(EPLoader::GetOrtApiRaw()));
|
||||
}
|
||||
|
||||
// Initialize classifier (optional)
|
||||
if (!clsModelPath.empty()) {
|
||||
classifier_ = std::make_unique<ONNXOCRClassifier>(clsModelPath);
|
||||
std::cout << "[PaddleOCRV5Engine] Classifier initialized: " << clsModelPath << std::endl;
|
||||
}
|
||||
else {
|
||||
classifier_.reset();
|
||||
std::cout << "[PaddleOCRV5Engine] Classifier skipped (no model path)" << std::endl;
|
||||
}
|
||||
|
||||
// Initialize recognizer
|
||||
recognizer_ = std::make_unique<ONNXOCRRecognizer>(recModelPath);
|
||||
if (!recognizer_->LoadDictionary(dictPath)) {
|
||||
std::cerr << "[PaddleOCRV5Engine] Failed to load dictionary" << std::endl;
|
||||
return false;
|
||||
}
|
||||
std::cout << "[PaddleOCRV5Engine] Recognizer initialized: " << recModelPath << std::endl;
|
||||
|
||||
_initialized = true;
|
||||
std::cout << "[PaddleOCRV5Engine] Pipeline initialized successfully" << std::endl;
|
||||
return true;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "[PaddleOCRV5Engine] Initialization failed: " << e.what() << std::endl;
|
||||
detector_.reset();
|
||||
classifier_.reset();
|
||||
recognizer_.reset();
|
||||
_initialized = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<OCRPredictResult> PaddleOCRV5Engine::ocr(const cv::Mat& img) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
|
||||
std::vector<OCRPredictResult> results;
|
||||
|
||||
if (!_initialized || img.empty()) {
|
||||
return results;
|
||||
}
|
||||
|
||||
// Step 1: Text Detection
|
||||
auto boxes = detector_->Detect(img, _maxSideLen, _detDbThresh, _detBoxThresh, _detUnclipRatio, _useDilation);
|
||||
|
||||
if (boxes.empty()) {
|
||||
return results;
|
||||
}
|
||||
|
||||
// Step 2: Crop detected text regions
|
||||
std::vector<cv::Mat> croppedImages;
|
||||
croppedImages.reserve(boxes.size());
|
||||
for (auto& box : boxes) {
|
||||
cv::Mat cropped = GetRotateCropImage(img, box);
|
||||
if (!cropped.empty()) {
|
||||
croppedImages.push_back(cropped);
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: Classification (optional)
|
||||
std::vector<int> cls_labels(croppedImages.size(), 0);
|
||||
std::vector<float> cls_scores(croppedImages.size(), 0.0f);
|
||||
|
||||
if (classifier_) {
|
||||
classifier_->Classify(croppedImages, cls_labels, cls_scores, _clsThresh);
|
||||
|
||||
// Rotate images classified as upside-down (label=1 and score > threshold)
|
||||
for (size_t i = 0; i < croppedImages.size(); i++) {
|
||||
if (cls_labels[i] % 2 == 1 && cls_scores[i] > _clsThresh) {
|
||||
cv::rotate(croppedImages[i], croppedImages[i], cv::ROTATE_180);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4: Text Recognition
|
||||
auto textLines = recognizer_->RecognizeBatch(croppedImages);
|
||||
|
||||
// Step 5: Combine results
|
||||
for (size_t i = 0; i < boxes.size() && i < textLines.size(); i++) {
|
||||
OCRPredictResult result;
|
||||
|
||||
// Convert TextBox points to box format [[x0,y0], [x1,y1], [x2,y2], [x3,y3]]
|
||||
result.box.resize(4);
|
||||
for (int j = 0; j < 4; j++) {
|
||||
result.box[j] = {
|
||||
static_cast<int>(boxes[i].points[j].x),
|
||||
static_cast<int>(boxes[i].points[j].y)
|
||||
};
|
||||
}
|
||||
|
||||
result.text = textLines[i].text;
|
||||
result.score = textLines[i].score;
|
||||
result.cls_label = cls_labels[i];
|
||||
result.cls_score = cls_scores[i];
|
||||
|
||||
results.push_back(result);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
} // namespace onnxocr
|
||||
} // namespace ANSCENTER
|
||||
Reference in New Issue
Block a user