Refactor project structure
This commit is contained in:
575
modules/ANSOCR/ANSCpuOCR.cpp
Normal file
575
modules/ANSOCR/ANSCpuOCR.cpp
Normal file
@@ -0,0 +1,575 @@
|
||||
#include "ANSCpuOCR.h"
|
||||
#include "Utility.h"
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <omp.h>
|
||||
#include <include/paddleocr_utility.h>
|
||||
|
||||
|
||||
namespace ANSCENTER {
|
||||
|
||||
bool ANSCPUOCR::Initialize(const std::string& licenseKey, OCRModelConfig modelConfig,
|
||||
const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) {
|
||||
try
|
||||
{
|
||||
bool result = ANSOCRBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, engineMode);
|
||||
if (!result) return false;
|
||||
|
||||
//Override the paddleocrv3 for openvino only
|
||||
switch (_modelConfig.ocrLanguage) {
|
||||
case ANSCENTER::OCRLanguage::ENGLISH: {
|
||||
_modelConfig.detectionModelDir = _modelFolder;
|
||||
_modelConfig.recognizerModelDir = _modelFolder;
|
||||
_modelConfig.clsModelDir = _modelFolder;
|
||||
_modelConfig.layoutModelDir = _modelFolder;
|
||||
_modelConfig.layourDictionaryPath = _modelFolder;
|
||||
_modelConfig.tableModelDir = _modelFolder;
|
||||
_modelConfig.tableCharDictionaryPath = _modelFolder;
|
||||
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_en.txt");
|
||||
|
||||
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "EN_DET.pdmodel");
|
||||
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "EN_DET.pdiparams");
|
||||
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
|
||||
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
|
||||
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "ENV4_REC.pdmodel");
|
||||
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "ENV4_REC.pdiparams");
|
||||
|
||||
break;
|
||||
}
|
||||
case ANSCENTER::OCRLanguage::CHINESE: {
|
||||
_modelConfig.detectionModelDir = _modelFolder;
|
||||
_modelConfig.recognizerModelDir = _modelFolder;
|
||||
_modelConfig.clsModelDir = _modelFolder;
|
||||
_modelConfig.layoutModelDir = _modelFolder;
|
||||
_modelConfig.layourDictionaryPath = _modelFolder;
|
||||
_modelConfig.tableModelDir = _modelFolder;
|
||||
_modelConfig.tableCharDictionaryPath = _modelFolder;
|
||||
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_ch.txt");
|
||||
|
||||
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "CHV4_DET.pdmodel");
|
||||
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "CHV4_DET.pdiparams");
|
||||
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
|
||||
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
|
||||
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "CHV4_REC.pdmodel");
|
||||
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "CHV4_REC.pdiparams");
|
||||
break;
|
||||
}
|
||||
case ANSCENTER::OCRLanguage::FRENCH: {
|
||||
_modelConfig.detectionModelDir = _modelFolder;
|
||||
_modelConfig.recognizerModelDir = _modelFolder;
|
||||
_modelConfig.clsModelDir = _modelFolder;
|
||||
_modelConfig.layoutModelDir = _modelFolder;
|
||||
_modelConfig.layourDictionaryPath = _modelFolder;
|
||||
_modelConfig.tableModelDir = _modelFolder;
|
||||
_modelConfig.tableCharDictionaryPath = _modelFolder;
|
||||
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_fr.txt");
|
||||
|
||||
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "MPP_DET.pdmodel");
|
||||
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "MPP_DET.pdiparams");
|
||||
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
|
||||
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
|
||||
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "FR_REC.pdmodel");
|
||||
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "FR_REC.pdiparams");
|
||||
break;
|
||||
}
|
||||
case ANSCENTER::OCRLanguage::GERMANY: {
|
||||
_modelConfig.detectionModelDir = _modelFolder;
|
||||
_modelConfig.recognizerModelDir = _modelFolder;
|
||||
_modelConfig.clsModelDir = _modelFolder;
|
||||
_modelConfig.layoutModelDir = _modelFolder;
|
||||
_modelConfig.layourDictionaryPath = _modelFolder;
|
||||
_modelConfig.tableModelDir = _modelFolder;
|
||||
_modelConfig.tableCharDictionaryPath = _modelFolder;
|
||||
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_gr.txt");
|
||||
|
||||
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "MPP_DET.pdmodel");
|
||||
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "MPP_DET.pdiparams");
|
||||
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
|
||||
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
|
||||
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "GR_REC.pdmodel");
|
||||
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "GR_REC.pdiparams");
|
||||
break;
|
||||
}
|
||||
case ANSCENTER::OCRLanguage::JAPANESE: {
|
||||
_modelConfig.detectionModelDir = _modelFolder;
|
||||
_modelConfig.recognizerModelDir = _modelFolder;
|
||||
_modelConfig.clsModelDir = _modelFolder;
|
||||
_modelConfig.layoutModelDir = _modelFolder;
|
||||
_modelConfig.layourDictionaryPath = _modelFolder;
|
||||
_modelConfig.tableModelDir = _modelFolder;
|
||||
_modelConfig.tableCharDictionaryPath = _modelFolder;
|
||||
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_jp.txt");
|
||||
|
||||
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "MPP_DET.pdmodel");
|
||||
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "MPP_DET.pdiparams");
|
||||
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
|
||||
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
|
||||
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "JP_REC.pdmodel");
|
||||
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "JP_REC.pdiparams");
|
||||
|
||||
break;
|
||||
}
|
||||
case ANSCENTER::OCRLanguage::KOREAN: {
|
||||
_modelConfig.detectionModelDir = _modelFolder;
|
||||
_modelConfig.recognizerModelDir = _modelFolder;
|
||||
_modelConfig.clsModelDir = _modelFolder;
|
||||
_modelConfig.layoutModelDir = _modelFolder;
|
||||
_modelConfig.layourDictionaryPath = _modelFolder;
|
||||
_modelConfig.tableModelDir = _modelFolder;
|
||||
_modelConfig.tableCharDictionaryPath = _modelFolder;
|
||||
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_kr.txt");
|
||||
|
||||
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "MPP_DET.pdmodel");
|
||||
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "MPP_DET.pdiparams");
|
||||
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
|
||||
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
|
||||
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "KR_REC.pdmodel");
|
||||
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "KR_REC.pdiparams");
|
||||
break;
|
||||
}
|
||||
case ANSCENTER::OCRLanguage::CUSTOM: {
|
||||
_modelConfig.detectionModelDir = _modelFolder;
|
||||
_modelConfig.recognizerModelDir = _modelFolder;
|
||||
_modelConfig.clsModelDir = _modelFolder;
|
||||
_modelConfig.layoutModelDir = _modelFolder;
|
||||
_modelConfig.layourDictionaryPath = _modelFolder;
|
||||
_modelConfig.tableModelDir = _modelFolder;
|
||||
_modelConfig.tableCharDictionaryPath = _modelFolder;
|
||||
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_ct.txt");
|
||||
|
||||
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "CT_DET.pdmodel");
|
||||
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "CT_DET.pdiparams");
|
||||
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
|
||||
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
|
||||
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "CT_REC.pdmodel");
|
||||
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "CT_REC.pdiparams");
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
_modelConfig.detectionModelDir = _modelFolder;
|
||||
_modelConfig.recognizerModelDir = _modelFolder;
|
||||
_modelConfig.clsModelDir = _modelFolder;
|
||||
_modelConfig.layoutModelDir = _modelFolder;
|
||||
_modelConfig.layourDictionaryPath = _modelFolder;
|
||||
_modelConfig.tableModelDir = _modelFolder;
|
||||
_modelConfig.tableCharDictionaryPath = _modelFolder;
|
||||
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_ct.txt");
|
||||
|
||||
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "CT_DET.pdmodel");
|
||||
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "CT_DET.pdiparams");
|
||||
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
|
||||
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
|
||||
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "CT_REC.pdmodel");
|
||||
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "CT_REC.pdiparams");
|
||||
break;
|
||||
}
|
||||
}
|
||||
// For now we do have _modelConfig and _modelFolder
|
||||
if (!FileExist(_modelConfig.detectionModelFile)) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::Initialize", "Invalid detector model file", __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!FileExist(_modelConfig.clsModelFile)) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::Initialize", "Invalid classifier model file", __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!FileExist(_modelConfig.recognizerModelFile)) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::Initialize", "Invalid recognizer model file", __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
|
||||
_isInitialized = ppocr->Initialize(_modelConfig.detectionModelFile, _modelConfig.clsModelFile, _modelConfig.recognizerModelFile, _modelConfig.recogizerCharDictionaryPath);
|
||||
return _isInitialized;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
_licenseValid = false;
|
||||
this->_logger.LogFatal("ANSCPUOCR::Initialize", e.what(), __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
catch (...) {
|
||||
_licenseValid = false;
|
||||
this->_logger.LogFatal("ANSCPUOCR::Initialize", "Failed to create OCR objects", __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
// Handle any other exception that occurs during initialization
|
||||
this->_logger.LogFatal("ANSCPUOCR::Initialize", e.what(), __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSCPUOCR::RunInference(const cv::Mat& input) {
|
||||
std::vector<ANSCENTER::OCRObject> output;
|
||||
if (input.empty()) return output;
|
||||
if ((input.cols < 10) || (input.rows < 10)) return output;
|
||||
return RunInference(input, "OCRCPUCAM");
|
||||
}
|
||||
std::vector<ANSCENTER::OCRObject> ANSCPUOCR::RunInference(const cv::Mat& input, const std::string& cameraId) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
|
||||
// Early validation
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (input.empty() || input.cols < 10 || input.rows < 10) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Input image is invalid or too small", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (!ppocr) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::RunInference", "PPOCR instance is null", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
try {
|
||||
// Convert grayscale to BGR if necessary using reusable buffer
|
||||
const cv::Mat* imPtr;
|
||||
if (input.channels() == 1) {
|
||||
cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR);
|
||||
imPtr = &this->_frameBuffer;
|
||||
}
|
||||
else {
|
||||
imPtr = &input;
|
||||
}
|
||||
const cv::Mat& im = *imPtr;
|
||||
|
||||
// Run OCR
|
||||
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(im);
|
||||
|
||||
// Build results
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
OCRObjects.reserve(res_ocr.size());
|
||||
|
||||
const int imgWidth = im.cols;
|
||||
const int imgHeight = im.rows;
|
||||
|
||||
for (const auto& ocr_result : res_ocr) {
|
||||
if (ocr_result.box.size() != 4) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Invalid OCR box size", __FILE__, __LINE__);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Extract corner points
|
||||
const int x0 = static_cast<int>(ocr_result.box[0][0]);
|
||||
const int y0 = static_cast<int>(ocr_result.box[0][1]);
|
||||
const int x1 = static_cast<int>(ocr_result.box[1][0]);
|
||||
const int y2 = static_cast<int>(ocr_result.box[2][1]);
|
||||
|
||||
// Calculate bounding box
|
||||
const int x = std::max(0, x0);
|
||||
const int y = std::max(0, y0);
|
||||
int width = x1 - x0;
|
||||
int height = y2 - static_cast<int>(ocr_result.box[1][1]);
|
||||
|
||||
// Clamp to image bounds
|
||||
width = std::max(1, std::min(imgWidth - x, width));
|
||||
height = std::max(1, std::min(imgHeight - y, height));
|
||||
|
||||
// Skip invalid boxes
|
||||
if (width <= 1 || height <= 1) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Invalid bounding box dimension", __FILE__, __LINE__);
|
||||
continue;
|
||||
}
|
||||
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box = cv::Rect(x, y, width, height);
|
||||
ocrObject.classId = ocr_result.cls_label;
|
||||
ocrObject.confidence = ocr_result.score;
|
||||
ocrObject.className = ocr_result.text;
|
||||
ocrObject.extraInfo = "cls label: " + std::to_string(ocr_result.cls_label)
|
||||
+ "; cls score: " + std::to_string(ocr_result.cls_score);
|
||||
ocrObject.cameraId = cameraId;
|
||||
|
||||
OCRObjects.push_back(std::move(ocrObject));
|
||||
}
|
||||
|
||||
return OCRObjects;
|
||||
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
}
|
||||
catch (...) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::RunInference", "Unknown exception occurred", __FILE__, __LINE__);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSCPUOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
|
||||
// Early validation
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (input.empty()) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (input.cols < 10 || input.rows < 10) {
|
||||
return {};
|
||||
}
|
||||
|
||||
try {
|
||||
// Convert grayscale to BGR if necessary using reusable buffer
|
||||
const cv::Mat* framePtr;
|
||||
if (input.channels() == 1) {
|
||||
cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR);
|
||||
framePtr = &this->_frameBuffer;
|
||||
}
|
||||
else {
|
||||
framePtr = &input; // No clone needed - we only read from it
|
||||
}
|
||||
const cv::Mat& frame = *framePtr;
|
||||
|
||||
const int fWidth = frame.cols;
|
||||
const int fHeight = frame.rows;
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
|
||||
if (!Bbox.empty()) {
|
||||
// Process each bounding box region
|
||||
OCRObjects.reserve(Bbox.size());
|
||||
|
||||
for (const auto& bbox : Bbox) {
|
||||
const int x1 = std::max(0, bbox.x);
|
||||
const int y1 = std::max(0, bbox.y);
|
||||
const int width = std::min(fWidth - x1, bbox.width);
|
||||
const int height = std::min(fHeight - y1, bbox.height);
|
||||
|
||||
if (width < 5 || height < 5) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get cropped region (no copy, just ROI)
|
||||
cv::Mat croppedObject = frame(cv::Rect(x1, y1, width, height));
|
||||
|
||||
// Run inference on cropped region
|
||||
std::vector<ANSCENTER::OCRObject> OCRTempObjects = RunInference(croppedObject);
|
||||
|
||||
for (auto& obj : OCRTempObjects) {
|
||||
// Adjust coordinates to original image space
|
||||
obj.box.x = std::max(0, std::min(fWidth - obj.box.width, obj.box.x + x1));
|
||||
obj.box.y = std::max(0, std::min(fHeight - obj.box.height, obj.box.y + y1));
|
||||
obj.box.width = std::min(fWidth - obj.box.x, obj.box.width);
|
||||
obj.box.height = std::min(fHeight - obj.box.y, obj.box.height);
|
||||
|
||||
OCRObjects.push_back(std::move(obj));
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// No bounding boxes - run OCR on full image
|
||||
if (!ppocr) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::RunInference", "PPOCR instance is null", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(frame);
|
||||
OCRObjects.reserve(res_ocr.size());
|
||||
|
||||
for (const auto& ocr_result : res_ocr) {
|
||||
if (ocr_result.box.size() < 4) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Extract bounding box from corner points
|
||||
const int x = static_cast<int>(ocr_result.box[0][0]);
|
||||
const int y = static_cast<int>(ocr_result.box[0][1]);
|
||||
int width = static_cast<int>(ocr_result.box[1][0]) - x;
|
||||
int height = static_cast<int>(ocr_result.box[2][1]) - static_cast<int>(ocr_result.box[1][1]);
|
||||
|
||||
// Clamp to image bounds
|
||||
const int clampedX = std::max(0, x);
|
||||
const int clampedY = std::max(0, y);
|
||||
width = std::min(fWidth - clampedX, width);
|
||||
height = std::min(fHeight - clampedY, height);
|
||||
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box = cv::Rect(clampedX, clampedY, width, height);
|
||||
ocrObject.classId = ocr_result.cls_label;
|
||||
ocrObject.confidence = ocr_result.score;
|
||||
ocrObject.className = ocr_result.text;
|
||||
ocrObject.extraInfo = "cls label:" + std::to_string(ocr_result.cls_label) +
|
||||
";cls score:" + std::to_string(ocr_result.cls_score);
|
||||
|
||||
OCRObjects.push_back(std::move(ocrObject));
|
||||
}
|
||||
}
|
||||
|
||||
return OCRObjects;
|
||||
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSCPUOCR::RunInference(const cv::Mat& input,
|
||||
const std::vector<cv::Rect>& Bbox,
|
||||
const std::string& cameraId)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
|
||||
// Early validation
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (input.empty()) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (input.cols < 10 || input.rows < 10) {
|
||||
return {};
|
||||
}
|
||||
|
||||
try {
|
||||
// Convert grayscale to BGR if necessary using reusable buffer
|
||||
const cv::Mat* framePtr;
|
||||
if (input.channels() == 1) {
|
||||
cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR);
|
||||
framePtr = &this->_frameBuffer;
|
||||
}
|
||||
else {
|
||||
framePtr = &input; // No clone needed - we only read from it
|
||||
}
|
||||
const cv::Mat& frame = *framePtr;
|
||||
|
||||
const int fWidth = frame.cols;
|
||||
const int fHeight = frame.rows;
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
|
||||
if (!Bbox.empty()) {
|
||||
// Process each bounding box region
|
||||
OCRObjects.reserve(Bbox.size());
|
||||
|
||||
for (const auto& bbox : Bbox) {
|
||||
const int x1 = std::max(0, bbox.x);
|
||||
const int y1 = std::max(0, bbox.y);
|
||||
const int width = std::min(fWidth - x1, bbox.width);
|
||||
const int height = std::min(fHeight - y1, bbox.height);
|
||||
|
||||
if (width < 5 || height < 5) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get cropped region (ROI, no copy)
|
||||
cv::Mat croppedObject = frame(cv::Rect(x1, y1, width, height));
|
||||
|
||||
// Run inference on cropped region
|
||||
std::vector<ANSCENTER::OCRObject> OCRTempObjects = RunInference(croppedObject);
|
||||
|
||||
for (auto& obj : OCRTempObjects) {
|
||||
// Adjust coordinates to original image space
|
||||
obj.box.x = std::max(0, std::min(fWidth - obj.box.width, obj.box.x + x1));
|
||||
obj.box.y = std::max(0, std::min(fHeight - obj.box.height, obj.box.y + y1));
|
||||
obj.box.width = std::min(fWidth - obj.box.x, obj.box.width);
|
||||
obj.box.height = std::min(fHeight - obj.box.y, obj.box.height);
|
||||
obj.cameraId = cameraId;
|
||||
|
||||
OCRObjects.push_back(std::move(obj));
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// No bounding boxes - run OCR on full image
|
||||
if (!ppocr) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::RunInference", "PPOCR instance is null", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(frame);
|
||||
OCRObjects.reserve(res_ocr.size());
|
||||
|
||||
for (const auto& ocr_result : res_ocr) {
|
||||
if (ocr_result.box.size() < 4) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Extract bounding box from corner points
|
||||
const int x = static_cast<int>(ocr_result.box[0][0]);
|
||||
const int y = static_cast<int>(ocr_result.box[0][1]);
|
||||
int width = static_cast<int>(ocr_result.box[1][0]) - x;
|
||||
int height = static_cast<int>(ocr_result.box[2][1]) - static_cast<int>(ocr_result.box[1][1]);
|
||||
|
||||
// Clamp to image bounds
|
||||
const int clampedX = std::max(0, x);
|
||||
const int clampedY = std::max(0, y);
|
||||
width = std::min(fWidth - clampedX, width);
|
||||
height = std::min(fHeight - clampedY, height);
|
||||
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box = cv::Rect(clampedX, clampedY, width, height);
|
||||
ocrObject.classId = ocr_result.cls_label;
|
||||
ocrObject.confidence = ocr_result.score;
|
||||
ocrObject.className = ocr_result.text;
|
||||
ocrObject.extraInfo = "cls label:" + std::to_string(ocr_result.cls_label) +
|
||||
";cls score:" + std::to_string(ocr_result.cls_score);
|
||||
ocrObject.cameraId = cameraId;
|
||||
|
||||
OCRObjects.push_back(std::move(ocrObject));
|
||||
}
|
||||
}
|
||||
|
||||
return OCRObjects;
|
||||
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
ANSCPUOCR::~ANSCPUOCR() {
|
||||
try {
|
||||
Destroy();
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::~ANSCPUOCR()", e.what(), __FILE__, __LINE__);
|
||||
}
|
||||
this->ANSOCRBase::~ANSOCRBase();
|
||||
}
|
||||
bool ANSCPUOCR::Destroy() {
|
||||
try {
|
||||
if (ppocr) ppocr.reset();
|
||||
return true;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::Destroy", e.what(), __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
32
modules/ANSOCR/ANSCpuOCR.h
Normal file
32
modules/ANSOCR/ANSCpuOCR.h
Normal file
@@ -0,0 +1,32 @@
|
||||
#ifndef ANSPaddleOCR_H
|
||||
#define ANSPaddleOCR_H
|
||||
#pragma once
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/imgcodecs.hpp"
|
||||
#include "opencv2/imgproc.hpp"
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include "LabVIEWHeader/extcode.h"
|
||||
#include "ANSLicense.h"
|
||||
#include "ANSOCRBase.h"
|
||||
#include <include/paddleocr.h>
|
||||
|
||||
namespace ANSCENTER {
|
||||
|
||||
class ANSOCR_API ANSCPUOCR :public ANSOCRBase {
|
||||
public:
|
||||
[[nodiscard]] virtual bool Initialize(const std::string& licenseKey, OCRModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) override;
|
||||
[[nodiscard]] std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input);
|
||||
[[nodiscard]] std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input, const std::string &cameraId);
|
||||
[[nodiscard]] std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox);
|
||||
[[nodiscard]] std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input, const std::vector<cv::Rect> &Bbox, const std::string& cameraId);
|
||||
~ANSCPUOCR();
|
||||
[[nodiscard]] bool Destroy();
|
||||
private:
|
||||
std::unique_ptr<PaddleOCR::PPOCR> ppocr = std::make_unique<PaddleOCR::PPOCR>();
|
||||
std::mutex _mutex;
|
||||
cv::Mat _frameBuffer; // Reusable buffer for color conversion
|
||||
|
||||
};
|
||||
}
|
||||
#endif
|
||||
23
modules/ANSOCR/ANSGpuFrameRegistry.cpp
Normal file
23
modules/ANSOCR/ANSGpuFrameRegistry.cpp
Normal file
@@ -0,0 +1,23 @@
|
||||
// ANSGpuFrameRegistry.cpp — Cross-DLL singleton resolver for ANSOCR.dll.
|
||||
//
|
||||
// Finds the canonical ANSGpuFrameRegistry instance exported by ANSCV.dll
|
||||
// via GetProcAddress. No link dependency on ANSCV.lib needed.
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
#include "ANSGpuFrameRegistry.h"
|
||||
|
||||
ANSGpuFrameRegistry* ANSGpuFrameRegistry::resolveProcessWide() {
|
||||
// ANSCV.dll is always loaded before inference starts (it provides RTSP).
|
||||
HMODULE hMod = GetModuleHandleA("ANSCV.dll");
|
||||
if (hMod) {
|
||||
typedef ANSGpuFrameRegistry* (*GetInstanceFn)();
|
||||
auto fn = reinterpret_cast<GetInstanceFn>(
|
||||
GetProcAddress(hMod, "ANSGpuFrameRegistry_GetInstance"));
|
||||
if (fn) return fn();
|
||||
}
|
||||
// Fallback: local instance (unit tests without ANSCV.dll).
|
||||
static ANSGpuFrameRegistry local;
|
||||
return &local;
|
||||
}
|
||||
388
modules/ANSOCR/ANSOCR.cpp
Normal file
388
modules/ANSOCR/ANSOCR.cpp
Normal file
@@ -0,0 +1,388 @@
|
||||
#include "ANSOCR.h"
|
||||
#include "Utility.h"
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <omp.h>
|
||||
namespace ANSCENTER {
|
||||
bool ANSOCR::Initialize(const std::string& licenseKey, OCRModelConfig modelConfig,
|
||||
const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) {
|
||||
try
|
||||
{
|
||||
bool result = ANSOCRBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, engineMode);
|
||||
if (!result) return false;
|
||||
auto option = fastdeploy::RuntimeOption();
|
||||
// Add default values to modelConfig if required.
|
||||
_modelConfig.precisionType = "fp32";
|
||||
_modelConfig.gpuMemory = 4000;
|
||||
_modelConfig.limitType = "max";
|
||||
_modelConfig.cpuThreads = 10;
|
||||
_modelConfig.tableModelMaxLengh = 488;
|
||||
_modelConfig.detectionScoreMode = "slow";
|
||||
_modelConfig.ensureASCII = true;
|
||||
|
||||
if (_modelConfig.limitSideLen <= 0) _modelConfig.limitSideLen = 960;
|
||||
if (_modelConfig.detectionDBThreshold <= 0) _modelConfig.detectionDBThreshold = 0.3;
|
||||
if (_modelConfig.detectionBoxThreshold <= 0) _modelConfig.detectionBoxThreshold = 0.6;
|
||||
if (_modelConfig.detectionDBUnclipRatio <= 0) _modelConfig.detectionDBUnclipRatio = 1.5;
|
||||
|
||||
if (_modelConfig.clsThreshold <= 0) _modelConfig.clsThreshold = 0.9;
|
||||
if (_modelConfig.clsBatchNumber <= 0) _modelConfig.clsBatchNumber = 1;
|
||||
|
||||
if (_modelConfig.recognizerBatchNum <= 0) _modelConfig.recognizerBatchNum = 6;
|
||||
if (_modelConfig.recoginzerImageHeight <= 0) _modelConfig.recoginzerImageHeight = 48;
|
||||
if (_modelConfig.recoginzerImageWidth <= 0) _modelConfig.recoginzerImageWidth = 320;
|
||||
|
||||
if (_modelConfig.layoutScoreThreshold <= 0) _modelConfig.layoutScoreThreshold = 0.5;
|
||||
if (_modelConfig.layoutNMSThreshold <= 0) _modelConfig.layoutNMSThreshold = 0.5;
|
||||
if (_modelConfig.tableBatchNum <= 0) _modelConfig.tableBatchNum = 1;
|
||||
if (_modelConfig.cpuThreads <= 0) _modelConfig.cpuThreads = 10;
|
||||
|
||||
// Handle different engine modes
|
||||
|
||||
// Use CPU
|
||||
_modelConfig.userGPU = false;
|
||||
_modelConfig.useTensorRT = false;
|
||||
option.UseCpu();
|
||||
option.UseOpenVINOBackend();
|
||||
|
||||
auto det_option = option;
|
||||
auto cls_option = option;
|
||||
auto rec_option = option;
|
||||
|
||||
|
||||
if (!FileExist(_modelConfig.detectionModelFile)) {
|
||||
this->_logger.LogFatal("ANSOCR::Initialize", "Invalid detector model file", __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!FileExist(_modelConfig.clsModelFile)) {
|
||||
this->_logger.LogFatal("ANSOCR::Initialize", "Invalid classifier model file", __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!FileExist(_modelConfig.recognizerModelFile)) {
|
||||
this->_logger.LogFatal("ANSOCR::Initialize", "Invalid recognizer model file", __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
// Create FastDeploy Model Instances
|
||||
try {
|
||||
classifier_ = fastdeploy::vision::ocr::Classifier(_modelConfig.clsModelFile, _modelConfig.clsModelParam, cls_option);
|
||||
detector_ = fastdeploy::vision::ocr::DBDetector(_modelConfig.detectionModelFile, _modelConfig.detectionModelParam, det_option);
|
||||
recognizer_ = fastdeploy::vision::ocr::Recognizer(_modelConfig.recognizerModelFile, _modelConfig.recognizerModelParam, _modelConfig.recogizerCharDictionaryPath, rec_option);
|
||||
detector_.GetPreprocessor().SetMaxSideLen(_modelConfig.limitSideLen);
|
||||
detector_.GetPostprocessor().SetDetDBThresh(_modelConfig.detectionDBThreshold);
|
||||
detector_.GetPostprocessor().SetDetDBBoxThresh(_modelConfig.detectionBoxThreshold);
|
||||
detector_.GetPostprocessor().SetDetDBUnclipRatio(_modelConfig.detectionDBUnclipRatio);
|
||||
detector_.GetPostprocessor().SetDetDBScoreMode(_modelConfig.detectionScoreMode);
|
||||
if (_modelConfig.useDilation) detector_.GetPostprocessor().SetUseDilation(0);
|
||||
else detector_.GetPostprocessor().SetUseDilation(1);
|
||||
classifier_.GetPostprocessor().SetClsThresh(_modelConfig.clsThreshold);
|
||||
if (detector_.Initialized() &&
|
||||
classifier_.Initialized() &&
|
||||
recognizer_.Initialized())
|
||||
{
|
||||
this->ppOCR = std::make_unique<fastdeploy::pipeline::PPOCRv4>(&detector_, &classifier_, &recognizer_);
|
||||
this->ppOCR->SetClsBatchSize(_modelConfig.clsBatchNumber);
|
||||
this->ppOCR->SetRecBatchSize(_modelConfig.recognizerBatchNum);
|
||||
_isInitialized = this->ppOCR->Initialized();
|
||||
return _isInitialized;
|
||||
}
|
||||
else {
|
||||
this->_logger.LogFatal("ANSOCR::Initialize", "Failed to create OCR objects", __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
catch (...) {
|
||||
_licenseValid = false;
|
||||
this->_logger.LogFatal("ANSOCR::Initialize", "Failed to create OCR objects", __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
// Handle any other exception that occurs during initialization
|
||||
this->_logger.LogFatal("ANSOCR::Initialize", e.what(), __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input) {
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
if (input.empty()) return OCRObjects;
|
||||
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
|
||||
return RunInference(input, "OCRCam");
|
||||
}
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input, const std::string& cameraId) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
OCRObjects.clear();
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
|
||||
try {
|
||||
if (input.empty()) {
|
||||
this->_logger.LogError("ANSOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
|
||||
auto im = input.clone();
|
||||
fastdeploy::vision::OCRResult res_ocr;
|
||||
this->ppOCR->Predict(&im, &res_ocr);
|
||||
if (res_ocr.boxes.size() > 0) {
|
||||
for (int n = 0; n < res_ocr.boxes.size(); n++) { // number of detections
|
||||
cv::Point rook_points[4];
|
||||
rook_points[0] = cv::Point(static_cast<int>(res_ocr.boxes[n][0]), static_cast<int>(res_ocr.boxes[n][1]));
|
||||
rook_points[1] = cv::Point(static_cast<int>(res_ocr.boxes[n][2]), static_cast<int>(res_ocr.boxes[n][3]));
|
||||
rook_points[2] = cv::Point(static_cast<int>(res_ocr.boxes[n][4]), static_cast<int>(res_ocr.boxes[n][5]));
|
||||
rook_points[3] = cv::Point(static_cast<int>(res_ocr.boxes[n][6]), static_cast<int>(res_ocr.boxes[n][6]));
|
||||
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box.x = rook_points[0].x;
|
||||
ocrObject.box.y = rook_points[0].y;
|
||||
ocrObject.box.width = rook_points[1].x - rook_points[0].x;
|
||||
ocrObject.box.height = rook_points[2].y - rook_points[1].y;
|
||||
ocrObject.polygon = ANSOCRUtility::RectToNormalizedPolygon(ocrObject.box, input.cols, input.rows);
|
||||
ocrObject.classId = res_ocr.cls_labels[n];
|
||||
ocrObject.confidence = res_ocr.rec_scores[n];
|
||||
ocrObject.className = res_ocr.text[n];
|
||||
std::string extraInformation = "cls label:" +
|
||||
std::to_string(res_ocr.cls_labels[n]) +
|
||||
";" +
|
||||
"cls score:" + std::to_string(res_ocr.cls_scores[n]);
|
||||
ocrObject.extraInfo = extraInformation;
|
||||
ocrObject.cameraId = cameraId;
|
||||
// Add extra information for cls score cls label
|
||||
OCRObjects.push_back(ocrObject);
|
||||
}
|
||||
}
|
||||
im.release();
|
||||
return OCRObjects;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
OCRObjects.clear();
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
try {
|
||||
if (Bbox.size() > 0) {
|
||||
if (input.empty()) {
|
||||
this->_logger.LogError("ANSOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
|
||||
cv::Mat frame = input.clone();
|
||||
int fWidth = frame.cols;
|
||||
int fHeight = frame.rows;
|
||||
for (std::vector<cv::Rect>::iterator it = Bbox.begin(); it != Bbox.end(); it++) {
|
||||
int x1, y1, x2, y2;
|
||||
x1 = (*it).x;
|
||||
y1 = (*it).y;
|
||||
x2 = (*it).x + (*it).width;
|
||||
y2 = (*it).y + (*it).height;
|
||||
if ((x1 >= 0) && (y1 >= 0) && (x2 <= fWidth) && (y2 <= fHeight)) {
|
||||
// Get cropped objects
|
||||
cv::Rect objectPos(cv::Point(x1, y1), cv::Point(x2, y2));
|
||||
cv::Mat croppedObject = frame(objectPos);
|
||||
std::vector<ANSCENTER::OCRObject> OCRTempObjects;
|
||||
OCRTempObjects.clear();
|
||||
OCRTempObjects = RunInference(croppedObject);
|
||||
if (OCRTempObjects.size() > 0) {
|
||||
for (int i = 0; i < OCRTempObjects.size(); i++) {
|
||||
ANSCENTER::OCRObject detectionObject;
|
||||
detectionObject = OCRTempObjects[i];
|
||||
// Correct bounding box position as the croppedObject x,y will be orignial (0,0)
|
||||
detectionObject.box.x = OCRTempObjects[i].box.x + x1;
|
||||
detectionObject.box.y = OCRTempObjects[i].box.y + y1;
|
||||
detectionObject.box.width = OCRTempObjects[i].box.width;
|
||||
detectionObject.box.height = OCRTempObjects[i].box.height;
|
||||
detectionObject.polygon = ANSOCRUtility::RectToNormalizedPolygon(detectionObject.box, input.cols, input.rows);
|
||||
detectionObject.cameraId = "OCRCAM";
|
||||
OCRObjects.push_back(detectionObject);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
auto im = input.clone();
|
||||
fastdeploy::vision::OCRResult res_ocr;
|
||||
this->ppOCR->Predict(&im, &res_ocr);
|
||||
if (res_ocr.boxes.size() > 0) {
|
||||
for (int n = 0; n < res_ocr.boxes.size(); n++) { // number of detections
|
||||
cv::Point rook_points[4];
|
||||
rook_points[0] = cv::Point(static_cast<int>(res_ocr.boxes[n][0]), static_cast<int>(res_ocr.boxes[n][1]));
|
||||
rook_points[1] = cv::Point(static_cast<int>(res_ocr.boxes[n][2]), static_cast<int>(res_ocr.boxes[n][3]));
|
||||
rook_points[2] = cv::Point(static_cast<int>(res_ocr.boxes[n][4]), static_cast<int>(res_ocr.boxes[n][5]));
|
||||
rook_points[3] = cv::Point(static_cast<int>(res_ocr.boxes[n][6]), static_cast<int>(res_ocr.boxes[n][6]));
|
||||
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box.x = rook_points[0].x;
|
||||
ocrObject.box.y = rook_points[0].y;
|
||||
ocrObject.box.width = rook_points[1].x - rook_points[0].x;
|
||||
ocrObject.box.height = rook_points[2].y - rook_points[1].y;
|
||||
ocrObject.polygon = ANSOCRUtility::RectToNormalizedPolygon(ocrObject.box, input.cols, input.rows);
|
||||
ocrObject.classId = res_ocr.cls_labels[n];
|
||||
ocrObject.confidence = res_ocr.rec_scores[n];
|
||||
ocrObject.className = res_ocr.text[n];
|
||||
std::string extraInformation = "cls label:" +
|
||||
std::to_string(res_ocr.cls_labels[n]) +
|
||||
";" +
|
||||
"cls score:" + std::to_string(res_ocr.cls_scores[n]);
|
||||
ocrObject.extraInfo = extraInformation;
|
||||
ocrObject.cameraId = "OCRCAM";
|
||||
OCRObjects.push_back(ocrObject);
|
||||
}
|
||||
}
|
||||
im.release();
|
||||
return OCRObjects;
|
||||
}
|
||||
return OCRObjects;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, const std::string& cameraId) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
OCRObjects.clear();
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
try {
|
||||
if (Bbox.size() > 0) {
|
||||
if (input.empty()) {
|
||||
this->_logger.LogError("ANSOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
|
||||
cv::Mat frame = input.clone();
|
||||
int fWidth = frame.cols;
|
||||
int fHeight = frame.rows;
|
||||
for (std::vector<cv::Rect>::iterator it = Bbox.begin(); it != Bbox.end(); it++) {
|
||||
int x1, y1, x2, y2;
|
||||
x1 = (*it).x;
|
||||
y1 = (*it).y;
|
||||
x2 = (*it).x + (*it).width;
|
||||
y2 = (*it).y + (*it).height;
|
||||
if ((x1 >= 0) && (y1 >= 0) && (x2 <= fWidth) && (y2 <= fHeight)) {
|
||||
// Get cropped objects
|
||||
cv::Rect objectPos(cv::Point(x1, y1), cv::Point(x2, y2));
|
||||
cv::Mat croppedObject = frame(objectPos);
|
||||
std::vector<ANSCENTER::OCRObject> OCRTempObjects;
|
||||
OCRTempObjects.clear();
|
||||
OCRTempObjects = RunInference(croppedObject);
|
||||
if (OCRTempObjects.size() > 0) {
|
||||
for (int i = 0; i < OCRTempObjects.size(); i++) {
|
||||
ANSCENTER::OCRObject detectionObject;
|
||||
detectionObject = OCRTempObjects[i];
|
||||
// Correct bounding box position as the croppedObject x,y will be orignial (0,0)
|
||||
detectionObject.box.x = OCRTempObjects[i].box.x + x1;
|
||||
detectionObject.box.y = OCRTempObjects[i].box.y + y1;
|
||||
detectionObject.box.width = OCRTempObjects[i].box.width;
|
||||
detectionObject.box.height = OCRTempObjects[i].box.height;
|
||||
detectionObject.cameraId = cameraId;
|
||||
OCRObjects.push_back(detectionObject);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
auto im = input.clone();
|
||||
fastdeploy::vision::OCRResult res_ocr;
|
||||
this->ppOCR->Predict(&im, &res_ocr);
|
||||
if (res_ocr.boxes.size() > 0) {
|
||||
for (int n = 0; n < res_ocr.boxes.size(); n++) { // number of detections
|
||||
cv::Point rook_points[4];
|
||||
rook_points[0] = cv::Point(static_cast<int>(res_ocr.boxes[n][0]), static_cast<int>(res_ocr.boxes[n][1]));
|
||||
rook_points[1] = cv::Point(static_cast<int>(res_ocr.boxes[n][2]), static_cast<int>(res_ocr.boxes[n][3]));
|
||||
rook_points[2] = cv::Point(static_cast<int>(res_ocr.boxes[n][4]), static_cast<int>(res_ocr.boxes[n][5]));
|
||||
rook_points[3] = cv::Point(static_cast<int>(res_ocr.boxes[n][6]), static_cast<int>(res_ocr.boxes[n][6]));
|
||||
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box.x = rook_points[0].x;
|
||||
ocrObject.box.y = rook_points[0].y;
|
||||
ocrObject.box.width = rook_points[1].x - rook_points[0].x;
|
||||
ocrObject.box.height = rook_points[2].y - rook_points[1].y;
|
||||
|
||||
|
||||
ocrObject.classId = res_ocr.cls_labels[n];
|
||||
ocrObject.confidence = res_ocr.rec_scores[n];
|
||||
ocrObject.className = res_ocr.text[n];
|
||||
std::string extraInformation = "cls label:" +
|
||||
std::to_string(res_ocr.cls_labels[n]) +
|
||||
";" +
|
||||
"cls score:" + std::to_string(res_ocr.cls_scores[n]);
|
||||
ocrObject.extraInfo = extraInformation;
|
||||
ocrObject.cameraId = cameraId;
|
||||
OCRObjects.push_back(ocrObject);
|
||||
}
|
||||
}
|
||||
im.release();
|
||||
return OCRObjects;
|
||||
}
|
||||
return OCRObjects;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
}
|
||||
ANSOCR::~ANSOCR() {
|
||||
try {
|
||||
Destroy();
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSOCR::~ANSOCR()", e.what(), __FILE__, __LINE__);
|
||||
}
|
||||
this->ANSOCRBase::~ANSOCRBase();
|
||||
}
|
||||
bool ANSOCR::Destroy() {
|
||||
try {
|
||||
classifier_.ReleaseReusedBuffer();
|
||||
detector_.ReleaseReusedBuffer();
|
||||
recognizer_.ReleaseReusedBuffer();
|
||||
if(ppOCR)this->ppOCR.reset();
|
||||
return true;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSOCR::Destroy", e.what(), __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
20
modules/ANSOCR/ANSOCR.h
Normal file
20
modules/ANSOCR/ANSOCR.h
Normal file
@@ -0,0 +1,20 @@
|
||||
#ifndef ANSOCR_H
|
||||
#define ANSOCR_H
|
||||
#pragma once
|
||||
#include "ANSOCRBase.h"
|
||||
namespace ANSCENTER {
|
||||
class ANSOCR_API ANSOCR :public ANSOCRBase {
|
||||
public:
|
||||
[[nodiscard]] virtual bool Initialize(const std::string& licenseKey, OCRModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) override;
|
||||
[[nodiscard]] std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input);
|
||||
[[nodiscard]] std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input, const std::string& cameraId);
|
||||
[[nodiscard]] std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox);
|
||||
[[nodiscard]] std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, const std::string& cameraId);
|
||||
|
||||
~ANSOCR();
|
||||
[[nodiscard]] bool Destroy();
|
||||
private:
|
||||
std::recursive_mutex _mutex;
|
||||
};
|
||||
}
|
||||
#endif
|
||||
299
modules/ANSOCR/ANSOCRBase.cpp
Normal file
299
modules/ANSOCR/ANSOCRBase.cpp
Normal file
@@ -0,0 +1,299 @@
|
||||
#include "ANSOCRBase.h"
|
||||
#include "Utility.h"
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <omp.h>
|
||||
#include <json.hpp>
|
||||
#include "ANSLibsLoader.h"
|
||||
|
||||
static bool ansocrLicenceValid = false;
|
||||
// Global once_flag to protect license checking
|
||||
static std::once_flag ansocrLicenseOnceFlag;
|
||||
template <typename T>
|
||||
T GetData(const boost::property_tree::ptree& pt, const std::string& key)
|
||||
{
|
||||
T ret;
|
||||
if (boost::optional<T> data = pt.get_optional<T>(key))
|
||||
{
|
||||
ret = data.get();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
namespace ANSCENTER {
|
||||
/// <summary>
|
||||
/// Base class
|
||||
/// </summary>
|
||||
///
|
||||
///
|
||||
static void VerifyGlobalANSOCRLicense(const std::string& licenseKey) {
|
||||
try {
|
||||
ansocrLicenceValid = ANSCENTER::ANSLicenseHelper::LicenseVerification(licenseKey, 1005, "ANSOCR");//Default productId=1006
|
||||
if (!ansocrLicenceValid) { // we also support ANSTS license
|
||||
ansocrLicenceValid = ANSCENTER::ANSLicenseHelper::LicenseVerification(licenseKey, 1003, "ANSVIS");//Default productId=1003 (ANSVIS)
|
||||
}
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
ansocrLicenceValid = false;
|
||||
}
|
||||
}
|
||||
void ANSOCRBase::CheckLicense() {
|
||||
try {
|
||||
// Check once globally
|
||||
std::call_once(ansocrLicenseOnceFlag, [this]() {
|
||||
VerifyGlobalANSOCRLicense(_licenseKey);
|
||||
});
|
||||
|
||||
// Update this instance's local license flag
|
||||
_licenseValid = ansocrLicenceValid;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
this->_logger.LogFatal("ANSOCRBase::CheckLicense. Error:", e.what(), __FILE__, __LINE__);
|
||||
}
|
||||
}
|
||||
|
||||
bool ANSOCRBase::Init(const std::string& licenseKey, OCRModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) {
|
||||
try {
|
||||
ANSCENTER::ANSLibsLoader::Initialize();
|
||||
|
||||
_licenseKey = licenseKey;
|
||||
_engineMode = engineMode;
|
||||
_licenseValid = false;
|
||||
_modelFolder = "";
|
||||
_modelConfigFile = "";
|
||||
_modelConfig = modelConfig;
|
||||
_modelFolder.clear();
|
||||
_modelConfigFile.clear();
|
||||
|
||||
CheckLicense();
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSOCRBase::Initialize", "Invalid License", __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
_licenseValid = true;
|
||||
|
||||
|
||||
// 0. Check if the modelZipFilePath exist?
|
||||
if (!FileExist(modelZipFilePath)) {
|
||||
this->_logger.LogFatal("ANSOCRBase::Initialize", "Model zip file is not exist", __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
// 1. Unzip model zip file to a special location with folder name as model file (and version)
|
||||
std::string outputFolder;
|
||||
std::vector<std::string> passwordArray;
|
||||
if (!modelZipPassword.empty()) passwordArray.push_back(modelZipPassword);
|
||||
passwordArray.push_back("AnsDemoModels20@!");
|
||||
passwordArray.push_back("Sh7O7nUe7vJ/417W0gWX+dSdfcP9hUqtf/fEqJGqxYL3PedvHubJag==");
|
||||
passwordArray.push_back("3LHxGrjQ7kKDJBD9MX86H96mtKLJaZcTYXrYRdQgW8BKGt7enZHYMg==");
|
||||
std::string modelName = GetFileNameWithoutExtension(modelZipFilePath);
|
||||
//this->_logger.LogDebug("ANSOCRBase::Initialize. Model name", modelName, __FILE__, __LINE__);
|
||||
size_t vectorSize = passwordArray.size();
|
||||
for (size_t i = 0; i < vectorSize; i++) {
|
||||
if (ExtractPasswordProtectedZip(modelZipFilePath, passwordArray[i], modelName, _modelFolder, false))
|
||||
break; // Break the loop when the condition is met.
|
||||
}
|
||||
// 2. Check if the outputFolder exist
|
||||
if (!std::filesystem::exists(_modelFolder)) {
|
||||
this->_logger.LogError("ANSOCRBase::Initialize. Output model folder is not exist", modelName, __FILE__, __LINE__);
|
||||
return false; // That means the model file is not exist or the password is not correct
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSOCRBase::Initialize", e.what(), __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool ANSOCRBase::Initialize(const std::string& licenseKey, OCRModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) {
|
||||
try {
|
||||
|
||||
_licenseKey = licenseKey;
|
||||
_engineMode = engineMode;
|
||||
_licenseValid = false;
|
||||
_modelFolder = "";
|
||||
_modelConfigFile = "";
|
||||
_modelConfig = modelConfig;
|
||||
_modelFolder.clear();
|
||||
_modelConfigFile.clear();
|
||||
|
||||
CheckLicense();
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSOCRBase::Initialize", "Invalid License", __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
_licenseValid = true;
|
||||
|
||||
|
||||
// 0. Check if the modelZipFilePath exist?
|
||||
if (!FileExist(modelZipFilePath)) {
|
||||
this->_logger.LogFatal("ANSOCRBase::Initialize", "Model zip file is not exist", __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
// 1. Unzip model zip file to a special location with folder name as model file (and version)
|
||||
std::string outputFolder;
|
||||
std::vector<std::string> passwordArray;
|
||||
if (!modelZipPassword.empty()) passwordArray.push_back(modelZipPassword);
|
||||
passwordArray.push_back("AnsDemoModels20@!");
|
||||
passwordArray.push_back("Sh7O7nUe7vJ/417W0gWX+dSdfcP9hUqtf/fEqJGqxYL3PedvHubJag==");
|
||||
passwordArray.push_back("3LHxGrjQ7kKDJBD9MX86H96mtKLJaZcTYXrYRdQgW8BKGt7enZHYMg==");
|
||||
std::string modelName = GetFileNameWithoutExtension(modelZipFilePath);
|
||||
//this->_logger.LogDebug("ANSOCRBase::Initialize. Model name", modelName, __FILE__, __LINE__);
|
||||
size_t vectorSize = passwordArray.size();
|
||||
for (size_t i = 0; i < vectorSize; i++) {
|
||||
if (ExtractPasswordProtectedZip(modelZipFilePath, passwordArray[i], modelName, _modelFolder, false))
|
||||
break; // Break the loop when the condition is met.
|
||||
}
|
||||
// 2. Check if the outputFolder exist
|
||||
if (!std::filesystem::exists(_modelFolder)) {
|
||||
this->_logger.LogError("ANSOCRBase::Initialize. Output model folder is not exist", modelName, __FILE__, __LINE__);
|
||||
return false; // That means the model file is not exist or the password is not correct
|
||||
}
|
||||
// 3. Check if the model has the configuration file
|
||||
std::string modelConfigName = "model_config.json";
|
||||
_modelConfigFile = CreateFilePath(_modelFolder, modelConfigName);
|
||||
|
||||
//4. For now we do have the model folder so we will assign paths to OCR models
|
||||
_modelConfig.detectionModelDir = _modelFolder;
|
||||
_modelConfig.recognizerModelDir = _modelFolder;
|
||||
_modelConfig.clsModelDir = _modelFolder;
|
||||
_modelConfig.layoutModelDir = _modelFolder;
|
||||
_modelConfig.layourDictionaryPath = _modelFolder;
|
||||
_modelConfig.tableModelDir = _modelFolder;
|
||||
_modelConfig.tableCharDictionaryPath = _modelFolder;
|
||||
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_ch.txt");
|
||||
|
||||
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "ansocrdec.onnx");
|
||||
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "ansocrdec.onnx");
|
||||
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "ansocrcls.onnx");
|
||||
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "ansocrcls.onnx");
|
||||
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "ansocrrec.onnx");
|
||||
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "ansocrrec.onnx");
|
||||
// For now we do have _modelConfig and _modelFolder
|
||||
return true;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSOCRBase::Initialize", e.what(), __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
std::string ANSCENTER::ANSOCRUtility::OCRDetectionToJsonString(const std::vector<OCRObject>& dets)
|
||||
{
|
||||
if (dets.empty()) {
|
||||
return R"({"results":[]})";
|
||||
}
|
||||
|
||||
try {
|
||||
nlohmann::json root;
|
||||
auto& results = root["results"] = nlohmann::json::array();
|
||||
|
||||
for (const auto& det : dets) {
|
||||
results.push_back({
|
||||
{"class_id", std::to_string(det.classId)},
|
||||
{"track_id", std::to_string(det.trackId)},
|
||||
{"class_name", det.className},
|
||||
{"prob", std::to_string(det.confidence)},
|
||||
{"x", std::to_string(det.box.x)},
|
||||
{"y", std::to_string(det.box.y)},
|
||||
{"width", std::to_string(det.box.width)},
|
||||
{"height", std::to_string(det.box.height)},
|
||||
{"mask", ""}, // TODO: convert masks to comma separated string
|
||||
{"extra_info", det.extraInfo},
|
||||
{"camera_id", det.cameraId},
|
||||
{"polygon", PolygonToString(det.polygon)},
|
||||
{"kps", KeypointsToString(det.kps)}
|
||||
});
|
||||
}
|
||||
|
||||
return root.dump();
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
// Add your error logging here if needed
|
||||
return R"({"results":[],"error":"Serialization failed"})";
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<cv::Rect> ANSCENTER::ANSOCRUtility::GetBoundingBoxes(const std::string& strBBoxes) {
|
||||
std::vector<cv::Rect> bBoxes;
|
||||
bBoxes.clear();
|
||||
std::stringstream ss;
|
||||
ss << strBBoxes;
|
||||
boost::property_tree::ptree pt;
|
||||
boost::property_tree::read_json(ss, pt);
|
||||
BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results"))
|
||||
{
|
||||
const boost::property_tree::ptree& result = child.second;
|
||||
const auto x = GetData<float>(result, "x");
|
||||
const auto y = GetData<float>(result, "y");
|
||||
const auto width = GetData<float>(result, "width");
|
||||
const auto height = GetData<float>(result, "height");
|
||||
cv::Rect rectTemp;
|
||||
rectTemp.x = x;
|
||||
rectTemp.y = y;
|
||||
rectTemp.width = width;
|
||||
rectTemp.height = height;
|
||||
bBoxes.push_back(rectTemp);
|
||||
}
|
||||
return bBoxes;
|
||||
}
|
||||
|
||||
std::string ANSCENTER::ANSOCRUtility::PolygonToString(const std::vector<cv::Point2f>& polygon) {
|
||||
if (polygon.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string result;
|
||||
result.reserve(polygon.size() * 20);
|
||||
|
||||
char buffer[64];
|
||||
for (size_t i = 0; i < polygon.size(); ++i) {
|
||||
if (i > 0) {
|
||||
snprintf(buffer, sizeof(buffer), ";%.3f;%.3f", polygon[i].x, polygon[i].y);
|
||||
}
|
||||
else {
|
||||
snprintf(buffer, sizeof(buffer), "%.3f;%.3f", polygon[i].x, polygon[i].y);
|
||||
}
|
||||
result += buffer;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
std::string ANSCENTER::ANSOCRUtility::KeypointsToString(const std::vector<float>& kps) {
|
||||
if (kps.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string result;
|
||||
result.reserve(kps.size() * 10);
|
||||
|
||||
char buffer[32];
|
||||
for (size_t i = 0; i < kps.size(); ++i) {
|
||||
if (i > 0) result += ';';
|
||||
snprintf(buffer, sizeof(buffer), "%.3f", kps[i]);
|
||||
result += buffer;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
std::vector<cv::Point2f> ANSCENTER::ANSOCRUtility::RectToNormalizedPolygon(const cv::Rect& rect, float imageWidth, float imageHeight) {
|
||||
// Ensure imageWidth and imageHeight are non-zero to avoid division by zero
|
||||
if (imageWidth <= 0 || imageHeight <= 0) {
|
||||
std::vector<cv::Point2f> emptyPolygon;
|
||||
return emptyPolygon;
|
||||
}
|
||||
|
||||
// Calculate normalized points for each corner of the rectangle
|
||||
std::vector<cv::Point2f> polygon = {
|
||||
{ rect.x / imageWidth, rect.y / imageHeight }, // Top-left
|
||||
{ (rect.x + rect.width) / imageWidth, rect.y / imageHeight }, // Top-right
|
||||
{ (rect.x + rect.width) / imageWidth, (rect.y + rect.height) / imageHeight }, // Bottom-right
|
||||
{ rect.x / imageWidth, (rect.y + rect.height) / imageHeight } // Bottom-left
|
||||
};
|
||||
|
||||
return polygon;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
169
modules/ANSOCR/ANSOCRBase.h
Normal file
169
modules/ANSOCR/ANSOCRBase.h
Normal file
@@ -0,0 +1,169 @@
|
||||
#ifndef ANSOCRBASE_H
|
||||
#define ANSOCRBASE_H
|
||||
#define ANSOCR_API __declspec(dllexport)
|
||||
#pragma once
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/imgcodecs.hpp"
|
||||
#include "opencv2/imgproc.hpp"
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include "LabVIEWHeader/extcode.h"
|
||||
#include "ANSLicense.h"
|
||||
namespace ANSCENTER {
|
||||
struct OCRModelConfig {
|
||||
bool userGPU = true;
|
||||
bool useTensorRT = false;
|
||||
int gpuId = 0;
|
||||
int gpuMemory = 4000;
|
||||
int cpuThreads = 10;
|
||||
bool enableMKLDNN = false;
|
||||
bool ensureASCII = true;
|
||||
OCRLanguage ocrLanguage;
|
||||
std::string precisionType;
|
||||
std::string ocrType;
|
||||
std::string limitType;
|
||||
|
||||
bool useDetector = true;
|
||||
bool useRecognizer = true;
|
||||
bool useCLS = true;
|
||||
bool useTable = false;
|
||||
bool useLayout = false;
|
||||
|
||||
std::string detectionModelDir;
|
||||
std::string detectionModelFile;
|
||||
std::string detectionModelParam;
|
||||
std::string detectionScoreMode;
|
||||
|
||||
|
||||
std::string recognizerModelDir;
|
||||
std::string recognizerModelFile;
|
||||
std::string recognizerModelParam;
|
||||
std::string recogizerCharDictionaryPath;
|
||||
|
||||
std::string clsModelDir;
|
||||
std::string clsModelFile;
|
||||
std::string clsModelParam;
|
||||
|
||||
|
||||
std::string layoutModelDir;
|
||||
std::string layourDictionaryPath;
|
||||
std::string tableModelDir;
|
||||
std::string tableCharDictionaryPath;
|
||||
|
||||
int limitSideLen = 960;
|
||||
double detectionDBThreshold = 0.3;
|
||||
double detectionBoxThreshold = 0.6;
|
||||
double detectionDBUnclipRatio = 1.5;
|
||||
bool useDilation = false;
|
||||
bool useAngleCLS = false;
|
||||
double clsThreshold = 0.9;
|
||||
int clsBatchNumber = 1;
|
||||
int recognizerBatchNum = 6;
|
||||
int recoginzerImageHeight = 48;
|
||||
int recoginzerImageWidth = 320;
|
||||
double layoutScoreThreshold = 0.5;
|
||||
double layoutNMSThreshold = 0.5;
|
||||
int tableModelMaxLengh = 488;
|
||||
int tableBatchNum = 1;
|
||||
bool mergeNoSpanStructure = true;
|
||||
};
|
||||
struct OCRObject
|
||||
{
|
||||
int classId{ 0 };
|
||||
int trackId{ 0 };
|
||||
std::string className{};
|
||||
float confidence{ 0.0 };
|
||||
cv::Rect box{};
|
||||
std::vector<cv::Point2f> polygon; // polygon that contain x1 ,y1,x2,y2,x3,y3,x4,y4
|
||||
std::vector<float> kps{}; // Containing keypoints
|
||||
cv::Mat mask{}; // jpeg string of the mask
|
||||
std::string extraInfo; // More information such as facial recognition
|
||||
std::string cameraId;
|
||||
//std::string attributes; // Attributes information in JSON string
|
||||
};
|
||||
class ANSOCR_API ANSOCRBase {
|
||||
protected:
|
||||
bool _licenseValid{ false };
|
||||
bool _isInitialized{ false };
|
||||
std::string _licenseKey;
|
||||
std::string _modelFolder;
|
||||
std::string _modelConfigFile;
|
||||
OCRModelConfig _modelConfig;
|
||||
int _engineMode; //0: Auto detect, 1 GPU, 2 CPU
|
||||
SPDLogger& _logger = SPDLogger::GetInstance("OCR", false);
|
||||
void CheckLicense();
|
||||
[[nodiscard]] bool Init(const std::string& licenseKey, OCRModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode);
|
||||
public:
|
||||
[[nodiscard]] virtual bool Initialize(const std::string& licenseKey, OCRModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode);
|
||||
[[nodiscard]] virtual std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input) = 0;
|
||||
[[nodiscard]] virtual std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input, const std::string& cameraId) = 0;
|
||||
[[nodiscard]] virtual std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox) = 0;
|
||||
[[nodiscard]] virtual std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, const std::string &cameraId) = 0;
|
||||
|
||||
~ANSOCRBase() {
|
||||
try {
|
||||
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
std::cout << "ANSOCRBase::Destroy()" << e.what();
|
||||
}
|
||||
|
||||
};
|
||||
[[nodiscard]] virtual bool Destroy() = 0;
|
||||
};
|
||||
class ANSOCRUtility
|
||||
{
|
||||
public:
|
||||
[[nodiscard]] static std::string OCRDetectionToJsonString(const std::vector<OCRObject>& dets);
|
||||
[[nodiscard]] static std::vector<cv::Rect> GetBoundingBoxes(const std::string& strBBoxes);
|
||||
[[nodiscard]] static std::string PolygonToString(const std::vector<cv::Point2f>& polygon);
|
||||
[[nodiscard]] static std::vector<cv::Point2f> RectToNormalizedPolygon(const cv::Rect& rect, float imageWidth, float imageHeight);
|
||||
[[nodiscard]] static std::string KeypointsToString(const std::vector<float>& kps);
|
||||
private:
|
||||
};
|
||||
}
|
||||
|
||||
// Original signature — backward compatible with third-party apps built against older DLL
|
||||
extern "C" ANSOCR_API int CreateANSOCRHandle(ANSCENTER::ANSOCRBase** Handle, const char* licenseKey, const char* modelFilePath,
|
||||
const char* modelFileZipPassword, int language, int engineMode, int gpuId = 0,
|
||||
double detectorDBThreshold = 0.3, double detectorDBBoxThreshold = 0.6, double detectorDBUnclipRatio = 1.5,
|
||||
double classifierThreshold = 0.9, int useDilation = 0);
|
||||
// Extended version with limitSideLen parameter — new callers should use this
|
||||
extern "C" ANSOCR_API int CreateANSOCRHandleEx(ANSCENTER::ANSOCRBase** Handle, const char* licenseKey, const char* modelFilePath,
|
||||
const char* modelFileZipPassword, int language, int engineMode, int gpuId = 0,
|
||||
double detectorDBThreshold = 0.3, double detectorDBBoxThreshold = 0.6, double detectorDBUnclipRatio = 1.5,
|
||||
double classifierThreshold = 0.9, int useDilation = 0, int limitSideLen = 960);
|
||||
extern "C" ANSOCR_API std::string RunInference(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_string, int32 bufferLength);
|
||||
extern "C" ANSOCR_API std::string RunInferenceWithCamID(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_string, int32 bufferLength, const char* cameraId);
|
||||
extern "C" ANSOCR_API int RunInferenceCV(ANSCENTER::ANSOCRBase** Handle, const cv::Mat &image, std::string &ocrResult);
|
||||
|
||||
extern "C" ANSOCR_API std::string RunInferenceBinary(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_bytes, unsigned int width, unsigned int height);
|
||||
extern "C" ANSOCR_API int ReleaseANSOCRHandle(ANSCENTER::ANSOCRBase** Handle);
|
||||
extern "C" ANSOCR_API std::string RunInferenceImagePath(ANSCENTER::ANSOCRBase** Handle, const char* imageFilePath);
|
||||
extern "C" ANSOCR_API std::string RunInferenceInCroppedImages(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_string, int32 bufferLength, const char* strBboxes);
|
||||
extern "C" ANSOCR_API std::string RunInferenceInCroppedImagesWithCamID(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_string, int32 bufferLength, const char* strBboxes, const char* cameraId);
|
||||
|
||||
//// For LabVIEW API
|
||||
extern "C" ANSOCR_API int RunInference_LV(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_string, int32 bufferLength, LStrHandle detectionResult);
|
||||
extern "C" ANSOCR_API int RunInference_LVWithCamID(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_string, int32 bufferLength, const char* cameraId,LStrHandle detectionResult);
|
||||
|
||||
extern "C" ANSOCR_API int RunInferenceBinary_LV(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_bytes, unsigned int width, unsigned int height, LStrHandle detectionResult);
|
||||
extern "C" ANSOCR_API int RunInferenceImagePath_LV(ANSCENTER::ANSOCRBase** Handle, const char* imageFilePath, LStrHandle detectionResult);
|
||||
extern "C" ANSOCR_API int ANSOCRUnitTest(const char* modelFilePath, const char* imageFilePath, LStrHandle detectionResult);
|
||||
extern "C" ANSOCR_API int RunInferenceInCroppedImages_LV(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_string, int32 bufferLength, const char* strBboxes, LStrHandle detectionResult);
|
||||
extern "C" ANSOCR_API int RunInferenceInCroppedImages_LVWithCamID(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_string, int32 bufferLength, const char* strBboxes, const char* cameraId, LStrHandle detectionResult);
|
||||
extern "C" ANSOCR_API int RunInferenceComplete_LV(ANSCENTER::ANSOCRBase** Handle, cv::Mat** cvImage, const char* cameraId, int getJpegString, int jpegImageSize, LStrHandle detectionResult, LStrHandle imageStr);
|
||||
extern "C" ANSOCR_API int RunInferencesComplete_LV(ANSCENTER::ANSOCRBase** Handle, cv::Mat** cvImage, const char* cameraId, int maxImageSize, const char* strBboxes, LStrHandle detectionResult);
|
||||
|
||||
// V2 Create / Release — handle as uint64_t by value (no pointer-to-pointer)
|
||||
extern "C" ANSOCR_API uint64_t CreateANSOCRHandleEx_V2(const char* licenseKey, const char* modelFilePath,
|
||||
const char* modelFileZipPassword, int language, int engineMode, int gpuId,
|
||||
double detectorDBThreshold, double detectorDBBoxThreshold, double detectorDBUnclipRatio,
|
||||
double classifierThreshold, int useDilation, int limitSideLen);
|
||||
extern "C" ANSOCR_API uint64_t CreateANSOCRHandle_V2(const char* licenseKey, const char* modelFilePath,
|
||||
const char* modelFileZipPassword, int language, int engineMode, int gpuId,
|
||||
double detectorDBThreshold, double detectorDBBoxThreshold, double detectorDBUnclipRatio,
|
||||
double classifierThreshold, int useDilation);
|
||||
extern "C" ANSOCR_API int ReleaseANSOCRHandle_V2(uint64_t handleVal);
|
||||
|
||||
#endif
|
||||
107
modules/ANSOCR/ANSONNXOCR/ONNXOCRClassifier.cpp
Normal file
107
modules/ANSOCR/ANSONNXOCR/ONNXOCRClassifier.cpp
Normal file
@@ -0,0 +1,107 @@
|
||||
#include "ONNXOCRClassifier.h"
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace onnxocr {
|
||||
|
||||
ONNXOCRClassifier::ONNXOCRClassifier(const std::string& onnx_path, unsigned int num_threads)
|
||||
: BasicOrtHandler(onnx_path, num_threads) {
|
||||
}
|
||||
|
||||
Ort::Value ONNXOCRClassifier::transform(const cv::Mat& mat) {
|
||||
cv::Mat resized;
|
||||
// Direct resize to 80x160 (PP-LCNet_x1_0_textline_ori)
|
||||
// No aspect ratio preservation — matches PaddleOCR official ResizeImage
|
||||
cv::resize(mat, resized, cv::Size(kClsImageW, kClsImageH));
|
||||
|
||||
resized.convertTo(resized, CV_32FC3);
|
||||
|
||||
// PP-LCNet uses ImageNet normalization (same as detection)
|
||||
auto data = NormalizeAndPermute(resized);
|
||||
|
||||
input_values_handler.assign(data.begin(), data.end());
|
||||
return Ort::Value::CreateTensor<float>(
|
||||
*memory_info_handler, input_values_handler.data(), input_values_handler.size(),
|
||||
input_node_dims.data(), input_node_dims.size());
|
||||
}
|
||||
|
||||
Ort::Value ONNXOCRClassifier::transformBatch(const std::vector<cv::Mat>& images) {
|
||||
// Not used - classifier processes single images in Classify() loop
|
||||
if (!images.empty()) {
|
||||
return transform(images[0]);
|
||||
}
|
||||
return Ort::Value(nullptr);
|
||||
}
|
||||
|
||||
void ONNXOCRClassifier::Classify(std::vector<cv::Mat>& img_list,
|
||||
std::vector<int>& cls_labels,
|
||||
std::vector<float>& cls_scores,
|
||||
float cls_thresh) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
|
||||
cls_labels.clear();
|
||||
cls_scores.clear();
|
||||
|
||||
if (!ort_session || img_list.empty()) return;
|
||||
|
||||
cls_labels.resize(img_list.size(), 0);
|
||||
cls_scores.resize(img_list.size(), 0.0f);
|
||||
|
||||
// Process one image at a time (dynamic shapes)
|
||||
for (size_t i = 0; i < img_list.size(); i++) {
|
||||
if (img_list[i].empty()) continue;
|
||||
|
||||
try {
|
||||
// Preprocess: direct resize to 80x160 (PP-LCNet_x1_0_textline_ori)
|
||||
// No aspect ratio preservation — matches PaddleOCR official ResizeImage
|
||||
cv::Mat resized;
|
||||
cv::resize(img_list[i], resized, cv::Size(kClsImageW, kClsImageH));
|
||||
|
||||
resized.convertTo(resized, CV_32FC3);
|
||||
// PP-LCNet uses ImageNet normalization (same as detection)
|
||||
auto inputData = NormalizeAndPermute(resized);
|
||||
|
||||
std::array<int64_t, 4> inputShape = { 1, 3, kClsImageH, kClsImageW };
|
||||
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
|
||||
*memory_info_handler, inputData.data(), inputData.size(),
|
||||
inputShape.data(), inputShape.size());
|
||||
|
||||
auto outputTensors = ort_session->Run(
|
||||
Ort::RunOptions{ nullptr },
|
||||
input_node_names.data(), &inputTensor, 1,
|
||||
output_node_names.data(), num_outputs);
|
||||
|
||||
float* outData = outputTensors[0].GetTensorMutableData<float>();
|
||||
auto outShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
|
||||
int numClasses = (outShape.size() > 1) ? static_cast<int>(outShape[1]) : 2;
|
||||
|
||||
// Find argmax and use raw output value as score
|
||||
// PaddleOCR v5 models include softmax, so output values are probabilities
|
||||
// Matches PaddleOCR official: score = preds[i, argmax_idx]
|
||||
int maxIdx = 0;
|
||||
float maxVal = outData[0];
|
||||
for (int c = 1; c < numClasses; c++) {
|
||||
if (outData[c] > maxVal) {
|
||||
maxVal = outData[c];
|
||||
maxIdx = c;
|
||||
}
|
||||
}
|
||||
|
||||
cls_labels[i] = maxIdx;
|
||||
cls_scores[i] = maxVal;
|
||||
}
|
||||
catch (const Ort::Exception& e) {
|
||||
std::cerr << "[ONNXOCRClassifier] Inference failed for image " << i
|
||||
<< ": " << e.what() << std::endl;
|
||||
cls_labels[i] = 0;
|
||||
cls_scores[i] = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace onnxocr
|
||||
} // namespace ANSCENTER
|
||||
32
modules/ANSOCR/ANSONNXOCR/ONNXOCRClassifier.h
Normal file
32
modules/ANSOCR/ANSONNXOCR/ONNXOCRClassifier.h
Normal file
@@ -0,0 +1,32 @@
|
||||
#pragma once
|
||||
|
||||
#include "ONNXOCRTypes.h"
|
||||
#include "ONNXEngine.h"
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace onnxocr {
|
||||
|
||||
class ONNXOCRClassifier : public BasicOrtHandler {
|
||||
public:
|
||||
explicit ONNXOCRClassifier(const std::string& onnx_path, unsigned int num_threads = 1);
|
||||
~ONNXOCRClassifier() override = default;
|
||||
|
||||
// Classify text orientation for a list of cropped images
|
||||
// Returns vector of (cls_label, cls_score) pairs
|
||||
// cls_label: 0 = normal, 1 = rotated 180 degrees
|
||||
void Classify(std::vector<cv::Mat>& img_list,
|
||||
std::vector<int>& cls_labels,
|
||||
std::vector<float>& cls_scores,
|
||||
float cls_thresh = kClsThresh);
|
||||
|
||||
private:
|
||||
Ort::Value transform(const cv::Mat& mat) override;
|
||||
Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
|
||||
|
||||
std::mutex _mutex;
|
||||
};
|
||||
|
||||
} // namespace onnxocr
|
||||
} // namespace ANSCENTER
|
||||
312
modules/ANSOCR/ANSONNXOCR/ONNXOCRDetector.cpp
Normal file
312
modules/ANSOCR/ANSONNXOCR/ONNXOCRDetector.cpp
Normal file
@@ -0,0 +1,312 @@
|
||||
#include "ONNXOCRDetector.h"
|
||||
#include "include/clipper.h"
|
||||
#include "ANSGpuFrameRegistry.h"
|
||||
#include "NV12PreprocessHelper.h"
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace onnxocr {
|
||||
|
||||
ONNXOCRDetector::ONNXOCRDetector(const std::string& onnx_path, unsigned int num_threads)
|
||||
: BasicOrtHandler(onnx_path, num_threads) {
|
||||
}
|
||||
|
||||
Ort::Value ONNXOCRDetector::transform(const cv::Mat& mat) {
|
||||
// Not used directly - detection uses custom Preprocess + manual tensor creation
|
||||
// Provided to satisfy BasicOrtHandler pure virtual
|
||||
cv::Mat canvas;
|
||||
mat.convertTo(canvas, CV_32FC3);
|
||||
auto data = NormalizeAndPermute(canvas);
|
||||
|
||||
input_values_handler.assign(data.begin(), data.end());
|
||||
return Ort::Value::CreateTensor<float>(
|
||||
*memory_info_handler, input_values_handler.data(), input_values_handler.size(),
|
||||
input_node_dims.data(), input_node_dims.size());
|
||||
}
|
||||
|
||||
Ort::Value ONNXOCRDetector::transformBatch(const std::vector<cv::Mat>& images) {
|
||||
// Not used - detection processes single images with dynamic shapes
|
||||
if (!images.empty()) {
|
||||
return transform(images[0]);
|
||||
}
|
||||
return Ort::Value(nullptr);
|
||||
}
|
||||
|
||||
std::vector<TextBox> ONNXOCRDetector::Detect(const cv::Mat& srcImage,
|
||||
int maxSideLen,
|
||||
float dbThresh,
|
||||
float boxThresh,
|
||||
float unclipRatio,
|
||||
bool useDilation) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
|
||||
if (!ort_session || srcImage.empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
// Try to get full-resolution image from NV12 frame (same pattern as ANSONNXYOLO)
|
||||
cv::Mat inferenceImage = srcImage;
|
||||
float bgrScaleX = 1.0f, bgrScaleY = 1.0f;
|
||||
|
||||
GpuFrameData* gpuFrame = tl_currentGpuFrame();
|
||||
if (gpuFrame && gpuFrame->pixelFormat == 23 &&
|
||||
gpuFrame->cpuYPlane && gpuFrame->cpuUvPlane &&
|
||||
gpuFrame->width > 0 && gpuFrame->height > 0) {
|
||||
// Full-res NV12 available — convert to BGR on CPU for ONNX
|
||||
cv::Mat yPlane(gpuFrame->height, gpuFrame->width, CV_8UC1,
|
||||
gpuFrame->cpuYPlane, gpuFrame->cpuYLinesize);
|
||||
cv::Mat uvPlane(gpuFrame->height / 2, gpuFrame->width, CV_8UC1,
|
||||
gpuFrame->cpuUvPlane, gpuFrame->cpuUvLinesize);
|
||||
cv::Mat fullResBGR;
|
||||
cv::cvtColorTwoPlane(yPlane, uvPlane, fullResBGR, cv::COLOR_YUV2BGR_NV12);
|
||||
if (!fullResBGR.empty()) {
|
||||
bgrScaleX = static_cast<float>(srcImage.cols) / fullResBGR.cols;
|
||||
bgrScaleY = static_cast<float>(srcImage.rows) / fullResBGR.rows;
|
||||
inferenceImage = fullResBGR;
|
||||
}
|
||||
}
|
||||
|
||||
int resizeH, resizeW;
|
||||
float ratioH, ratioW;
|
||||
|
||||
// Preprocess (using full-res image if NV12 was available)
|
||||
auto inputData = Preprocess(inferenceImage, maxSideLen, resizeH, resizeW, ratioH, ratioW);
|
||||
|
||||
// Create input tensor with dynamic shape
|
||||
std::array<int64_t, 4> inputShape = { 1, 3, resizeH, resizeW };
|
||||
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
|
||||
*memory_info_handler, inputData.data(), inputData.size(),
|
||||
inputShape.data(), inputShape.size());
|
||||
|
||||
// Run inference
|
||||
auto outputTensors = ort_session->Run(
|
||||
Ort::RunOptions{ nullptr },
|
||||
input_node_names.data(), &inputTensor, 1,
|
||||
output_node_names.data(), num_outputs);
|
||||
|
||||
// Get output data
|
||||
float* outputData = outputTensors[0].GetTensorMutableData<float>();
|
||||
auto outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
|
||||
|
||||
int outH = static_cast<int>(outputShape[2]);
|
||||
int outW = static_cast<int>(outputShape[3]);
|
||||
|
||||
// Postprocess — detection coords are relative to inferenceImage (full-res),
|
||||
// then scaled back to srcImage (display-res) coordinates
|
||||
auto boxes = Postprocess(outputData, outH, outW, ratioH, ratioW,
|
||||
inferenceImage.rows, inferenceImage.cols,
|
||||
dbThresh, boxThresh, unclipRatio, useDilation);
|
||||
|
||||
// Rescale box coordinates from full-res to display-res
|
||||
if (bgrScaleX != 1.0f || bgrScaleY != 1.0f) {
|
||||
for (auto& box : boxes) {
|
||||
for (auto& pt : box.points) {
|
||||
pt.x *= bgrScaleX;
|
||||
pt.y *= bgrScaleY;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return boxes;
|
||||
}
|
||||
|
||||
std::vector<float> ONNXOCRDetector::Preprocess(const cv::Mat& srcImage, int maxSideLen,
|
||||
int& resizeH, int& resizeW,
|
||||
float& ratioH, float& ratioW) {
|
||||
cv::Size newSize = ComputeDetResizeShape(srcImage.rows, srcImage.cols, maxSideLen);
|
||||
resizeW = newSize.width;
|
||||
resizeH = newSize.height;
|
||||
ratioH = static_cast<float>(srcImage.rows) / resizeH;
|
||||
ratioW = static_cast<float>(srcImage.cols) / resizeW;
|
||||
|
||||
cv::Mat resized;
|
||||
cv::resize(srcImage, resized, newSize);
|
||||
resized.convertTo(resized, CV_32FC3);
|
||||
|
||||
return NormalizeAndPermute(resized);
|
||||
}
|
||||
|
||||
// Matches PaddleOCR official DBPostProcess.boxes_from_bitmap flow
|
||||
std::vector<TextBox> ONNXOCRDetector::Postprocess(const float* outputData, int outH, int outW,
|
||||
float ratioH, float ratioW,
|
||||
int srcH, int srcW,
|
||||
float dbThresh, float boxThresh,
|
||||
float unclipRatio, bool useDilation) {
|
||||
// Create probability map from output
|
||||
cv::Mat probMap(outH, outW, CV_32FC1, const_cast<float*>(outputData));
|
||||
|
||||
// Binary threshold
|
||||
cv::Mat binaryMap;
|
||||
cv::threshold(probMap, binaryMap, dbThresh, 255, cv::THRESH_BINARY);
|
||||
binaryMap.convertTo(binaryMap, CV_8UC1);
|
||||
|
||||
// Optional dilation (PaddleOCR default: use_dilation=False, kernel=[[1,1],[1,1]])
|
||||
if (useDilation) {
|
||||
cv::Mat kernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2));
|
||||
cv::dilate(binaryMap, binaryMap, kernel);
|
||||
}
|
||||
|
||||
// Find contours
|
||||
std::vector<std::vector<cv::Point>> contours;
|
||||
cv::findContours(binaryMap, contours, cv::RETR_LIST, cv::CHAIN_APPROX_SIMPLE);
|
||||
|
||||
std::vector<TextBox> boxes;
|
||||
int numContours = std::min(static_cast<int>(contours.size()), kDetMaxCandidates);
|
||||
|
||||
for (int i = 0; i < numContours; i++) {
|
||||
if (contours[i].size() < 4) continue;
|
||||
|
||||
// Step 1: GetMiniBoxes - get ordered 4 corners of min area rect
|
||||
cv::RotatedRect minRect = cv::minAreaRect(contours[i]);
|
||||
float sside = std::min(minRect.size.width, minRect.size.height);
|
||||
if (sside < 3.0f) continue;
|
||||
|
||||
auto ordered = GetMiniBoxes(minRect);
|
||||
|
||||
// Step 2: BoxScoreFast - compute mean prob inside the 4-point box polygon
|
||||
float score = BoxScoreFast(probMap, ordered);
|
||||
if (score < boxThresh) continue;
|
||||
|
||||
// Step 3: UnclipPolygon - expand the 4-point box
|
||||
auto expanded = UnclipPolygon(ordered, unclipRatio);
|
||||
if (expanded.size() < 4) continue;
|
||||
|
||||
// Step 4: Re-compute GetMiniBoxes on the expanded polygon
|
||||
std::vector<cv::Point> expandedInt;
|
||||
expandedInt.reserve(expanded.size());
|
||||
for (auto& p : expanded) {
|
||||
expandedInt.push_back(cv::Point(static_cast<int>(p.x), static_cast<int>(p.y)));
|
||||
}
|
||||
cv::RotatedRect expandedRect = cv::minAreaRect(expandedInt);
|
||||
|
||||
// Filter by min_size + 2 = 5 (matches PaddleOCR official)
|
||||
float expandedSside = std::min(expandedRect.size.width, expandedRect.size.height);
|
||||
if (expandedSside < 5.0f) continue;
|
||||
|
||||
auto expandedOrdered = GetMiniBoxes(expandedRect);
|
||||
|
||||
// Step 5: Scale to original image coordinates
|
||||
TextBox box;
|
||||
for (int j = 0; j < 4; j++) {
|
||||
box.points[j].x = std::clamp(expandedOrdered[j].x * ratioW, 0.0f, static_cast<float>(srcW - 1));
|
||||
box.points[j].y = std::clamp(expandedOrdered[j].y * ratioH, 0.0f, static_cast<float>(srcH - 1));
|
||||
}
|
||||
box.score = score;
|
||||
boxes.push_back(box);
|
||||
}
|
||||
|
||||
SortTextBoxes(boxes);
|
||||
return boxes;
|
||||
}
|
||||
|
||||
// Matches PaddleOCR official GetMiniBoxes: sort by X, assign TL/TR/BL/BR by Y
|
||||
std::array<cv::Point2f, 4> ONNXOCRDetector::GetMiniBoxes(const cv::RotatedRect& rect) {
|
||||
cv::Point2f vertices[4];
|
||||
rect.points(vertices);
|
||||
|
||||
// Sort all 4 points by x-coordinate ascending
|
||||
std::sort(vertices, vertices + 4,
|
||||
[](const cv::Point2f& a, const cv::Point2f& b) { return a.x < b.x; });
|
||||
|
||||
// Left two (indices 0,1): smaller y = top-left, larger y = bottom-left
|
||||
cv::Point2f topLeft, bottomLeft;
|
||||
if (vertices[0].y <= vertices[1].y) {
|
||||
topLeft = vertices[0];
|
||||
bottomLeft = vertices[1];
|
||||
} else {
|
||||
topLeft = vertices[1];
|
||||
bottomLeft = vertices[0];
|
||||
}
|
||||
|
||||
// Right two (indices 2,3): smaller y = top-right, larger y = bottom-right
|
||||
cv::Point2f topRight, bottomRight;
|
||||
if (vertices[2].y <= vertices[3].y) {
|
||||
topRight = vertices[2];
|
||||
bottomRight = vertices[3];
|
||||
} else {
|
||||
topRight = vertices[3];
|
||||
bottomRight = vertices[2];
|
||||
}
|
||||
|
||||
// Order: [TL, TR, BR, BL] (clockwise from top-left)
|
||||
return { topLeft, topRight, bottomRight, bottomLeft };
|
||||
}
|
||||
|
||||
// Matches PaddleOCR official box_score_fast: mean prob value inside the 4-point polygon
|
||||
float ONNXOCRDetector::BoxScoreFast(const cv::Mat& probMap,
|
||||
const std::array<cv::Point2f, 4>& box) {
|
||||
int h = probMap.rows;
|
||||
int w = probMap.cols;
|
||||
|
||||
// Get bounding rectangle with proper clamping (matches PaddleOCR official)
|
||||
float minX = std::min({box[0].x, box[1].x, box[2].x, box[3].x});
|
||||
float maxX = std::max({box[0].x, box[1].x, box[2].x, box[3].x});
|
||||
float minY = std::min({box[0].y, box[1].y, box[2].y, box[3].y});
|
||||
float maxY = std::max({box[0].y, box[1].y, box[2].y, box[3].y});
|
||||
|
||||
int xmin = std::clamp(static_cast<int>(std::floor(minX)), 0, w - 1);
|
||||
int xmax = std::clamp(static_cast<int>(std::ceil(maxX)), 0, w - 1);
|
||||
int ymin = std::clamp(static_cast<int>(std::floor(minY)), 0, h - 1);
|
||||
int ymax = std::clamp(static_cast<int>(std::ceil(maxY)), 0, h - 1);
|
||||
|
||||
if (xmin >= xmax || ymin >= ymax) return 0.0f;
|
||||
|
||||
cv::Mat mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1);
|
||||
|
||||
std::vector<cv::Point> pts(4);
|
||||
for (int j = 0; j < 4; j++) {
|
||||
pts[j] = cv::Point(static_cast<int>(box[j].x) - xmin,
|
||||
static_cast<int>(box[j].y) - ymin);
|
||||
}
|
||||
std::vector<std::vector<cv::Point>> polys = { pts };
|
||||
cv::fillPoly(mask, polys, cv::Scalar(1));
|
||||
|
||||
cv::Mat roiMap = probMap(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1));
|
||||
return static_cast<float>(cv::mean(roiMap, mask)[0]);
|
||||
}
|
||||
|
||||
// Matches PaddleOCR official unclip: expand 4-point box using Clipper with JT_ROUND
|
||||
std::vector<cv::Point2f> ONNXOCRDetector::UnclipPolygon(const std::array<cv::Point2f, 4>& box,
|
||||
float unclipRatio) {
|
||||
// Compute area using Shoelace formula and perimeter
|
||||
float area = 0.0f;
|
||||
float perimeter = 0.0f;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
int j = (i + 1) % 4;
|
||||
area += box[i].x * box[j].y - box[j].x * box[i].y;
|
||||
float dx = box[j].x - box[i].x;
|
||||
float dy = box[j].y - box[i].y;
|
||||
perimeter += std::sqrt(dx * dx + dy * dy);
|
||||
}
|
||||
area = std::abs(area) * 0.5f;
|
||||
if (perimeter < 1.0f) return {};
|
||||
|
||||
float distance = area * unclipRatio / perimeter;
|
||||
|
||||
ClipperLib::Path clipperPath;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
clipperPath.push_back({ static_cast<ClipperLib::cInt>(box[i].x),
|
||||
static_cast<ClipperLib::cInt>(box[i].y) });
|
||||
}
|
||||
|
||||
ClipperLib::ClipperOffset offset;
|
||||
offset.AddPath(clipperPath, ClipperLib::jtRound, ClipperLib::etClosedPolygon);
|
||||
|
||||
ClipperLib::Paths solution;
|
||||
offset.Execute(solution, distance);
|
||||
|
||||
if (solution.empty() || solution[0].empty()) return {};
|
||||
|
||||
std::vector<cv::Point2f> result;
|
||||
for (auto& p : solution[0]) {
|
||||
result.push_back(cv::Point2f(static_cast<float>(p.X), static_cast<float>(p.Y)));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace onnxocr
|
||||
} // namespace ANSCENTER
|
||||
52
modules/ANSOCR/ANSONNXOCR/ONNXOCRDetector.h
Normal file
52
modules/ANSOCR/ANSONNXOCR/ONNXOCRDetector.h
Normal file
@@ -0,0 +1,52 @@
|
||||
#pragma once
|
||||
|
||||
#include "ONNXOCRTypes.h"
|
||||
#include "ONNXEngine.h"
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace onnxocr {
|
||||
|
||||
class ONNXOCRDetector : public BasicOrtHandler {
|
||||
public:
|
||||
explicit ONNXOCRDetector(const std::string& onnx_path, unsigned int num_threads = 1);
|
||||
~ONNXOCRDetector() override = default;
|
||||
|
||||
// Run text detection on an image
|
||||
std::vector<TextBox> Detect(const cv::Mat& srcImage,
|
||||
int maxSideLen = kDetMaxSideLen,
|
||||
float dbThresh = kDetDbThresh,
|
||||
float boxThresh = kDetBoxThresh,
|
||||
float unclipRatio = kDetUnclipRatio,
|
||||
bool useDilation = false);
|
||||
|
||||
private:
|
||||
Ort::Value transform(const cv::Mat& mat) override;
|
||||
Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
|
||||
|
||||
// Preprocessing
|
||||
std::vector<float> Preprocess(const cv::Mat& srcImage, int maxSideLen,
|
||||
int& resizeH, int& resizeW,
|
||||
float& ratioH, float& ratioW);
|
||||
|
||||
// Postprocessing: threshold -> contours -> boxes (matches PaddleOCR official flow)
|
||||
std::vector<TextBox> Postprocess(const float* outputData, int outH, int outW,
|
||||
float ratioH, float ratioW, int srcH, int srcW,
|
||||
float dbThresh, float boxThresh, float unclipRatio,
|
||||
bool useDilation);
|
||||
|
||||
// Get ordered 4 corners [TL, TR, BR, BL] from rotated rect (matches PaddleOCR GetMiniBoxes)
|
||||
std::array<cv::Point2f, 4> GetMiniBoxes(const cv::RotatedRect& rect);
|
||||
|
||||
// Compute mean score inside box polygon on the probability map
|
||||
float BoxScoreFast(const cv::Mat& probMap, const std::array<cv::Point2f, 4>& box);
|
||||
|
||||
// Expand 4-point box using Clipper offset
|
||||
std::vector<cv::Point2f> UnclipPolygon(const std::array<cv::Point2f, 4>& box, float unclipRatio);
|
||||
|
||||
std::mutex _mutex;
|
||||
};
|
||||
|
||||
} // namespace onnxocr
|
||||
} // namespace ANSCENTER
|
||||
165
modules/ANSOCR/ANSONNXOCR/ONNXOCRRecognizer.cpp
Normal file
165
modules/ANSOCR/ANSONNXOCR/ONNXOCRRecognizer.cpp
Normal file
@@ -0,0 +1,165 @@
|
||||
#include "ONNXOCRRecognizer.h"
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <cmath>
|
||||
#include <cfloat>
|
||||
#include <cstring>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace onnxocr {
|
||||
|
||||
ONNXOCRRecognizer::ONNXOCRRecognizer(const std::string& onnx_path, unsigned int num_threads)
|
||||
: BasicOrtHandler(onnx_path, num_threads) {
|
||||
}
|
||||
|
||||
bool ONNXOCRRecognizer::LoadDictionary(const std::string& dictPath) {
|
||||
keys_ = LoadDict(dictPath);
|
||||
if (keys_.size() < 2) {
|
||||
std::cerr << "[ONNXOCRRecognizer] Failed to load dictionary: " << dictPath << std::endl;
|
||||
return false;
|
||||
}
|
||||
std::cout << "[ONNXOCRRecognizer] Loaded dictionary with " << keys_.size()
|
||||
<< " characters from: " << dictPath << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
Ort::Value ONNXOCRRecognizer::transform(const cv::Mat& mat) {
|
||||
// Not used directly - recognition uses custom preprocess with dynamic width
|
||||
cv::Mat resized = ResizeRecImage(mat, imgH_, imgMaxW_);
|
||||
resized.convertTo(resized, CV_32FC3);
|
||||
auto data = NormalizeAndPermuteCls(resized);
|
||||
|
||||
input_values_handler.assign(data.begin(), data.end());
|
||||
return Ort::Value::CreateTensor<float>(
|
||||
*memory_info_handler, input_values_handler.data(), input_values_handler.size(),
|
||||
input_node_dims.data(), input_node_dims.size());
|
||||
}
|
||||
|
||||
Ort::Value ONNXOCRRecognizer::transformBatch(const std::vector<cv::Mat>& images) {
|
||||
// Not used - recognizer processes single images with dynamic widths
|
||||
if (!images.empty()) {
|
||||
return transform(images[0]);
|
||||
}
|
||||
return Ort::Value(nullptr);
|
||||
}
|
||||
|
||||
TextLine ONNXOCRRecognizer::Recognize(const cv::Mat& croppedImage) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
|
||||
if (!ort_session || croppedImage.empty() || keys_.empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
try {
|
||||
// Preprocess: resize to fixed height, proportional width
|
||||
cv::Mat resized = ResizeRecImage(croppedImage, imgH_, imgMaxW_);
|
||||
int resizedW = resized.cols;
|
||||
|
||||
resized.convertTo(resized, CV_32FC3);
|
||||
// Recognition uses (pixel/255 - 0.5) / 0.5 normalization (same as classifier)
|
||||
auto normalizedData = NormalizeAndPermuteCls(resized);
|
||||
|
||||
// Pad to at least kRecImgW width (matching official PaddleOCR behavior)
|
||||
// Official PaddleOCR: padding_im = np.zeros((C, H, W)), then copies normalized
|
||||
// image into left portion. Padding value = 0.0 in normalized space.
|
||||
int imgW = std::max(resizedW, kRecImgW);
|
||||
|
||||
std::vector<float> inputData;
|
||||
if (imgW > resizedW) {
|
||||
// Zero-pad on the right (CHW layout)
|
||||
inputData.resize(3 * imgH_ * imgW, 0.0f);
|
||||
for (int c = 0; c < 3; c++) {
|
||||
for (int y = 0; y < imgH_; y++) {
|
||||
std::memcpy(
|
||||
&inputData[c * imgH_ * imgW + y * imgW],
|
||||
&normalizedData[c * imgH_ * resizedW + y * resizedW],
|
||||
resizedW * sizeof(float));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
inputData = std::move(normalizedData);
|
||||
}
|
||||
|
||||
// Create input tensor with (possibly padded) width
|
||||
std::array<int64_t, 4> inputShape = { 1, 3, imgH_, imgW };
|
||||
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
|
||||
*memory_info_handler, inputData.data(), inputData.size(),
|
||||
inputShape.data(), inputShape.size());
|
||||
|
||||
// Run inference
|
||||
auto outputTensors = ort_session->Run(
|
||||
Ort::RunOptions{ nullptr },
|
||||
input_node_names.data(), &inputTensor, 1,
|
||||
output_node_names.data(), num_outputs);
|
||||
|
||||
// Get output
|
||||
float* outputData = outputTensors[0].GetTensorMutableData<float>();
|
||||
auto outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
|
||||
|
||||
int seqLen = static_cast<int>(outputShape[1]);
|
||||
int numClasses = static_cast<int>(outputShape[2]);
|
||||
|
||||
return CTCDecode(outputData, seqLen, numClasses);
|
||||
}
|
||||
catch (const Ort::Exception& e) {
|
||||
std::cerr << "[ONNXOCRRecognizer] Inference failed: " << e.what() << std::endl;
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<TextLine> ONNXOCRRecognizer::RecognizeBatch(const std::vector<cv::Mat>& croppedImages) {
|
||||
std::vector<TextLine> results;
|
||||
results.reserve(croppedImages.size());
|
||||
|
||||
// Process one at a time (dynamic width per image)
|
||||
for (size_t i = 0; i < croppedImages.size(); i++) {
|
||||
results.push_back(Recognize(croppedImages[i]));
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
TextLine ONNXOCRRecognizer::CTCDecode(const float* outputData, int seqLen, int numClasses) {
|
||||
TextLine result;
|
||||
std::string text;
|
||||
std::vector<float> scores;
|
||||
|
||||
int lastIndex = 0; // CTC blank is index 0
|
||||
|
||||
for (int t = 0; t < seqLen; t++) {
|
||||
// Find argmax for this timestep
|
||||
int maxIndex = 0;
|
||||
float maxValue = -FLT_MAX;
|
||||
|
||||
const float* timeStep = outputData + t * numClasses;
|
||||
for (int c = 0; c < numClasses; c++) {
|
||||
if (timeStep[c] > maxValue) {
|
||||
maxValue = timeStep[c];
|
||||
maxIndex = c;
|
||||
}
|
||||
}
|
||||
|
||||
// CTC decode: skip blanks (index 0) and repeated characters
|
||||
if (maxIndex != 0 && maxIndex != lastIndex) {
|
||||
if (maxIndex > 0 && maxIndex < static_cast<int>(keys_.size())) {
|
||||
text += keys_[maxIndex]; // keys_[0]="#"(blank), keys_[1]=first_char, etc.
|
||||
// Use raw model output value as confidence (PaddleOCR v5 models include softmax)
|
||||
scores.push_back(maxValue);
|
||||
}
|
||||
}
|
||||
lastIndex = maxIndex;
|
||||
}
|
||||
|
||||
result.text = text;
|
||||
if (!scores.empty()) {
|
||||
result.score = std::accumulate(scores.begin(), scores.end(), 0.0f) /
|
||||
static_cast<float>(scores.size());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace onnxocr
|
||||
} // namespace ANSCENTER
|
||||
40
modules/ANSOCR/ANSONNXOCR/ONNXOCRRecognizer.h
Normal file
40
modules/ANSOCR/ANSONNXOCR/ONNXOCRRecognizer.h
Normal file
@@ -0,0 +1,40 @@
|
||||
#pragma once
|
||||
|
||||
#include "ONNXOCRTypes.h"
|
||||
#include "ONNXEngine.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <mutex>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace onnxocr {
|
||||
|
||||
class ONNXOCRRecognizer : public BasicOrtHandler {
|
||||
public:
|
||||
explicit ONNXOCRRecognizer(const std::string& onnx_path, unsigned int num_threads = 1);
|
||||
~ONNXOCRRecognizer() override = default;
|
||||
|
||||
// Load character dictionary (must be called before Recognize)
|
||||
bool LoadDictionary(const std::string& dictPath);
|
||||
|
||||
// Recognize text from a single cropped text image
|
||||
TextLine Recognize(const cv::Mat& croppedImage);
|
||||
|
||||
// Batch recognition for multiple cropped images
|
||||
std::vector<TextLine> RecognizeBatch(const std::vector<cv::Mat>& croppedImages);
|
||||
|
||||
private:
|
||||
Ort::Value transform(const cv::Mat& mat) override;
|
||||
Ort::Value transformBatch(const std::vector<cv::Mat>& images) override;
|
||||
|
||||
// CTC greedy decode
|
||||
TextLine CTCDecode(const float* outputData, int seqLen, int numClasses);
|
||||
|
||||
std::vector<std::string> keys_;
|
||||
int imgH_ = kRecImgH;
|
||||
int imgMaxW_ = kRecImgMaxW;
|
||||
std::mutex _mutex;
|
||||
};
|
||||
|
||||
} // namespace onnxocr
|
||||
} // namespace ANSCENTER
|
||||
212
modules/ANSOCR/ANSONNXOCR/ONNXOCRTypes.h
Normal file
212
modules/ANSOCR/ANSONNXOCR/ONNXOCRTypes.h
Normal file
@@ -0,0 +1,212 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <array>
|
||||
#include <fstream>
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <cmath>
|
||||
#include <opencv2/core.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace onnxocr {
|
||||
|
||||
// Detection normalization constants (BGR channel order, matching PaddleOCR official)
|
||||
// PaddleOCR config: mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
|
||||
// Applied directly to BGR channels WITHOUT BGR→RGB conversion:
|
||||
// Channel 0 (B) → mean=0.485, std=0.229
|
||||
// Channel 1 (G) → mean=0.456, std=0.224
|
||||
// Channel 2 (R) → mean=0.406, std=0.225
|
||||
constexpr float kDetMean0 = 0.485f; // B channel
|
||||
constexpr float kDetMean1 = 0.456f; // G channel
|
||||
constexpr float kDetMean2 = 0.406f; // R channel
|
||||
constexpr float kDetStd0 = 0.229f; // B channel
|
||||
constexpr float kDetStd1 = 0.224f; // G channel
|
||||
constexpr float kDetStd2 = 0.225f; // R channel
|
||||
constexpr float kScale = 1.0f / 255.0f;
|
||||
|
||||
// Detection defaults (PP-OCRv5 server: limit_type=max, limit_side_len=960)
|
||||
constexpr int kDetMaxSideLen = 960;
|
||||
constexpr int kDetMaxSideLimit = 4000; // Safety cap on max dimension
|
||||
constexpr float kDetDbThresh = 0.3f;
|
||||
constexpr float kDetBoxThresh = 0.6f;
|
||||
constexpr float kDetUnclipRatio = 1.5f;
|
||||
constexpr int kDetMaxCandidates = 1000;
|
||||
|
||||
// Classifier defaults (PP-LCNet_x1_0_textline_ori model)
|
||||
// Input: [B, 3, 80, 160], ImageNet normalization, 2-class (0°/180°)
|
||||
// Direct resize to 80x160 (no aspect ratio preservation)
|
||||
constexpr int kClsImageH = 80;
|
||||
constexpr int kClsImageW = 160;
|
||||
constexpr float kClsThresh = 0.9f;
|
||||
|
||||
// Recognition defaults
|
||||
constexpr int kRecImgH = 48;
|
||||
constexpr int kRecImgW = 320; // Default rec width (PP-OCRv5 rec_image_shape[2]=320, min padded width)
|
||||
constexpr int kRecImgMaxW = 960; // Allow wide recognition input for long text lines
|
||||
constexpr int kRecBatchSize = 6;
|
||||
|
||||
// A detected text box: 4 corner points (top-left, top-right, bottom-right, bottom-left)
|
||||
struct TextBox {
|
||||
std::array<cv::Point2f, 4> points;
|
||||
float score = 0.0f;
|
||||
};
|
||||
|
||||
// A single recognized text line
|
||||
struct TextLine {
|
||||
std::string text;
|
||||
float score = 0.0f;
|
||||
};
|
||||
|
||||
// OCR result matching PaddleOCR::OCRPredictResult format
|
||||
struct OCRPredictResult {
|
||||
std::vector<std::vector<int>> box; // 4 corner points [[x,y], ...]
|
||||
std::string text;
|
||||
float score = -1.0f;
|
||||
float cls_score = 0.0f;
|
||||
int cls_label = -1;
|
||||
};
|
||||
|
||||
// Load character dictionary from file
|
||||
inline std::vector<std::string> LoadDict(const std::string& dictPath) {
|
||||
std::vector<std::string> keys;
|
||||
std::ifstream file(dictPath);
|
||||
if (!file.is_open()) return keys;
|
||||
std::string line;
|
||||
while (std::getline(file, line)) {
|
||||
if (!line.empty() && line.back() == '\r') {
|
||||
line.pop_back();
|
||||
}
|
||||
keys.push_back(line);
|
||||
}
|
||||
// CTC blank token at index 0
|
||||
keys.insert(keys.begin(), "#");
|
||||
// Space at end
|
||||
keys.push_back(" ");
|
||||
return keys;
|
||||
}
|
||||
|
||||
// Compute resize dimensions for detection model (multiples of 32)
|
||||
// limit_type='max': scale down if max side > maxSideLen (PP-OCRv5 server default)
|
||||
// maxSideLimit: safety cap on final max dimension (default 4000)
|
||||
inline cv::Size ComputeDetResizeShape(int srcH, int srcW, int maxSideLen,
|
||||
int maxSideLimit = kDetMaxSideLimit) {
|
||||
float ratio = 1.0f;
|
||||
int maxSide = std::max(srcH, srcW);
|
||||
if (maxSide > maxSideLen) {
|
||||
ratio = static_cast<float>(maxSideLen) / static_cast<float>(maxSide);
|
||||
}
|
||||
int newH = static_cast<int>(srcH * ratio);
|
||||
int newW = static_cast<int>(srcW * ratio);
|
||||
|
||||
// Safety cap: clamp if either dimension exceeds maxSideLimit
|
||||
if (std::max(newH, newW) > maxSideLimit) {
|
||||
float clampRatio = static_cast<float>(maxSideLimit) / static_cast<float>(std::max(newH, newW));
|
||||
newH = static_cast<int>(newH * clampRatio);
|
||||
newW = static_cast<int>(newW * clampRatio);
|
||||
}
|
||||
|
||||
newH = std::max(32, static_cast<int>(std::round(newH / 32.0) * 32));
|
||||
newW = std::max(32, static_cast<int>(std::round(newW / 32.0) * 32));
|
||||
return cv::Size(newW, newH);
|
||||
}
|
||||
|
||||
// Normalize BGR float image to CHW BGR vector for detection
|
||||
// BGR channel order preserved (matching PaddleOCR official - no BGR→RGB conversion)
|
||||
inline std::vector<float> NormalizeAndPermute(const cv::Mat& img) {
|
||||
int h = img.rows;
|
||||
int w = img.cols;
|
||||
std::vector<float> result(3 * h * w);
|
||||
for (int y = 0; y < h; y++) {
|
||||
const float* row = img.ptr<float>(y);
|
||||
for (int x = 0; x < w; x++) {
|
||||
float b = row[x * 3 + 0];
|
||||
float g = row[x * 3 + 1];
|
||||
float r = row[x * 3 + 2];
|
||||
// BGR order: channel 0=B, 1=G, 2=R (matching PaddleOCR official)
|
||||
result[0 * h * w + y * w + x] = (b * kScale - kDetMean0) / kDetStd0;
|
||||
result[1 * h * w + y * w + x] = (g * kScale - kDetMean1) / kDetStd1;
|
||||
result[2 * h * w + y * w + x] = (r * kScale - kDetMean2) / kDetStd2;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Normalize for classifier and recognizer: (x/255 - 0.5) / 0.5
|
||||
// BGR channel order preserved (matching PaddleOCR official - no BGR→RGB conversion)
|
||||
inline std::vector<float> NormalizeAndPermuteCls(const cv::Mat& img) {
|
||||
int h = img.rows;
|
||||
int w = img.cols;
|
||||
std::vector<float> result(3 * h * w);
|
||||
for (int y = 0; y < h; y++) {
|
||||
const float* row = img.ptr<float>(y);
|
||||
for (int x = 0; x < w; x++) {
|
||||
float b = row[x * 3 + 0];
|
||||
float g = row[x * 3 + 1];
|
||||
float r = row[x * 3 + 2];
|
||||
// BGR order: channel 0=B, 1=G, 2=R (matching PaddleOCR official)
|
||||
result[0 * h * w + y * w + x] = (b * kScale - 0.5f) / 0.5f;
|
||||
result[1 * h * w + y * w + x] = (g * kScale - 0.5f) / 0.5f;
|
||||
result[2 * h * w + y * w + x] = (r * kScale - 0.5f) / 0.5f;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Sort text boxes from top to bottom, left to right
|
||||
inline void SortTextBoxes(std::vector<TextBox>& boxes) {
|
||||
std::sort(boxes.begin(), boxes.end(),
|
||||
[](const TextBox& a, const TextBox& b) {
|
||||
if (std::abs(a.points[0].y - b.points[0].y) < 10.0f) {
|
||||
return a.points[0].x < b.points[0].x;
|
||||
}
|
||||
return a.points[0].y < b.points[0].y;
|
||||
});
|
||||
}
|
||||
|
||||
// Get rotated and cropped image from text box polygon
|
||||
inline cv::Mat GetRotateCropImage(const cv::Mat& srcImage, const TextBox& box) {
|
||||
auto pts = box.points;
|
||||
float width = static_cast<float>(std::max(
|
||||
cv::norm(pts[0] - pts[1]),
|
||||
cv::norm(pts[2] - pts[3])));
|
||||
float height = static_cast<float>(std::max(
|
||||
cv::norm(pts[0] - pts[3]),
|
||||
cv::norm(pts[1] - pts[2])));
|
||||
|
||||
std::vector<cv::Point2f> srcPts = { pts[0], pts[1], pts[2], pts[3] };
|
||||
std::vector<cv::Point2f> dstPts = {
|
||||
{0, 0}, {width, 0}, {width, height}, {0, height}
|
||||
};
|
||||
|
||||
cv::Mat M = cv::getPerspectiveTransform(srcPts, dstPts);
|
||||
cv::Mat cropped;
|
||||
cv::warpPerspective(srcImage, cropped, M,
|
||||
cv::Size(static_cast<int>(width), static_cast<int>(height)),
|
||||
cv::BORDER_REPLICATE);
|
||||
|
||||
if (cropped.rows > cropped.cols * 1.5f) {
|
||||
cv::Mat rotated;
|
||||
cv::transpose(cropped, rotated);
|
||||
cv::flip(rotated, rotated, 0);
|
||||
return rotated;
|
||||
}
|
||||
return cropped;
|
||||
}
|
||||
|
||||
// Resize recognition image to fixed height, proportional width
|
||||
inline cv::Mat ResizeRecImage(const cv::Mat& img, int targetH, int maxW) {
|
||||
float ratio = static_cast<float>(targetH) / img.rows;
|
||||
int targetW = static_cast<int>(img.cols * ratio);
|
||||
targetW = std::min(targetW, maxW);
|
||||
targetW = std::max(targetW, 1);
|
||||
|
||||
cv::Mat resized;
|
||||
cv::resize(img, resized, cv::Size(targetW, targetH));
|
||||
return resized;
|
||||
}
|
||||
|
||||
} // namespace onnxocr
|
||||
} // namespace ANSCENTER
|
||||
130
modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp
Normal file
130
modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.cpp
Normal file
@@ -0,0 +1,130 @@
|
||||
#include "PaddleOCRV5Engine.h"
|
||||
#include "EPLoader.h"
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace onnxocr {
|
||||
|
||||
bool PaddleOCRV5Engine::Initialize(const std::string& detModelPath,
|
||||
const std::string& clsModelPath,
|
||||
const std::string& recModelPath,
|
||||
const std::string& dictPath) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
|
||||
try {
|
||||
// Initialize detector (also triggers EPLoader init in BasicOrtHandler)
|
||||
detector_ = std::make_unique<ONNXOCRDetector>(detModelPath);
|
||||
std::cout << "[PaddleOCRV5Engine] Detector initialized: " << detModelPath << std::endl;
|
||||
|
||||
// Ensure this DLL's copy of Ort::Global<void>::api_ is initialized.
|
||||
// BasicOrtHandler sets it in ONNXEngine.dll, but each DLL has its own
|
||||
// inline-static copy. Without this, inference calls from ANSOCR.dll crash.
|
||||
if (Ort::Global<void>::api_ == nullptr) {
|
||||
Ort::InitApi(static_cast<const OrtApi*>(EPLoader::GetOrtApiRaw()));
|
||||
}
|
||||
|
||||
// Initialize classifier (optional)
|
||||
if (!clsModelPath.empty()) {
|
||||
classifier_ = std::make_unique<ONNXOCRClassifier>(clsModelPath);
|
||||
std::cout << "[PaddleOCRV5Engine] Classifier initialized: " << clsModelPath << std::endl;
|
||||
}
|
||||
else {
|
||||
classifier_.reset();
|
||||
std::cout << "[PaddleOCRV5Engine] Classifier skipped (no model path)" << std::endl;
|
||||
}
|
||||
|
||||
// Initialize recognizer
|
||||
recognizer_ = std::make_unique<ONNXOCRRecognizer>(recModelPath);
|
||||
if (!recognizer_->LoadDictionary(dictPath)) {
|
||||
std::cerr << "[PaddleOCRV5Engine] Failed to load dictionary" << std::endl;
|
||||
return false;
|
||||
}
|
||||
std::cout << "[PaddleOCRV5Engine] Recognizer initialized: " << recModelPath << std::endl;
|
||||
|
||||
_initialized = true;
|
||||
std::cout << "[PaddleOCRV5Engine] Pipeline initialized successfully" << std::endl;
|
||||
return true;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "[PaddleOCRV5Engine] Initialization failed: " << e.what() << std::endl;
|
||||
detector_.reset();
|
||||
classifier_.reset();
|
||||
recognizer_.reset();
|
||||
_initialized = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<OCRPredictResult> PaddleOCRV5Engine::ocr(const cv::Mat& img) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
|
||||
std::vector<OCRPredictResult> results;
|
||||
|
||||
if (!_initialized || img.empty()) {
|
||||
return results;
|
||||
}
|
||||
|
||||
// Step 1: Text Detection
|
||||
auto boxes = detector_->Detect(img, _maxSideLen, _detDbThresh, _detBoxThresh, _detUnclipRatio, _useDilation);
|
||||
|
||||
if (boxes.empty()) {
|
||||
return results;
|
||||
}
|
||||
|
||||
// Step 2: Crop detected text regions
|
||||
std::vector<cv::Mat> croppedImages;
|
||||
croppedImages.reserve(boxes.size());
|
||||
for (auto& box : boxes) {
|
||||
cv::Mat cropped = GetRotateCropImage(img, box);
|
||||
if (!cropped.empty()) {
|
||||
croppedImages.push_back(cropped);
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: Classification (optional)
|
||||
std::vector<int> cls_labels(croppedImages.size(), 0);
|
||||
std::vector<float> cls_scores(croppedImages.size(), 0.0f);
|
||||
|
||||
if (classifier_) {
|
||||
classifier_->Classify(croppedImages, cls_labels, cls_scores, _clsThresh);
|
||||
|
||||
// Rotate images classified as upside-down (label=1 and score > threshold)
|
||||
for (size_t i = 0; i < croppedImages.size(); i++) {
|
||||
if (cls_labels[i] % 2 == 1 && cls_scores[i] > _clsThresh) {
|
||||
cv::rotate(croppedImages[i], croppedImages[i], cv::ROTATE_180);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4: Text Recognition
|
||||
auto textLines = recognizer_->RecognizeBatch(croppedImages);
|
||||
|
||||
// Step 5: Combine results
|
||||
for (size_t i = 0; i < boxes.size() && i < textLines.size(); i++) {
|
||||
OCRPredictResult result;
|
||||
|
||||
// Convert TextBox points to box format [[x0,y0], [x1,y1], [x2,y2], [x3,y3]]
|
||||
result.box.resize(4);
|
||||
for (int j = 0; j < 4; j++) {
|
||||
result.box[j] = {
|
||||
static_cast<int>(boxes[i].points[j].x),
|
||||
static_cast<int>(boxes[i].points[j].y)
|
||||
};
|
||||
}
|
||||
|
||||
result.text = textLines[i].text;
|
||||
result.score = textLines[i].score;
|
||||
result.cls_label = cls_labels[i];
|
||||
result.cls_score = cls_scores[i];
|
||||
|
||||
results.push_back(result);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
} // namespace onnxocr
|
||||
} // namespace ANSCENTER
|
||||
63
modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.h
Normal file
63
modules/ANSOCR/ANSONNXOCR/PaddleOCRV5Engine.h
Normal file
@@ -0,0 +1,63 @@
|
||||
#pragma once
|
||||
|
||||
#include "ONNXOCRTypes.h"
|
||||
#include "ONNXOCRDetector.h"
|
||||
#include "ONNXOCRClassifier.h"
|
||||
#include "ONNXOCRRecognizer.h"
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace onnxocr {
|
||||
|
||||
// PaddleOCR V5 pipeline engine: Detection -> (Classification) -> Recognition
|
||||
// Mirrors the PaddleOCR::PPOCR interface for drop-in replacement
|
||||
class PaddleOCRV5Engine {
|
||||
public:
|
||||
PaddleOCRV5Engine() = default;
|
||||
~PaddleOCRV5Engine() = default;
|
||||
|
||||
// Initialize the OCR pipeline
|
||||
// clsModelPath can be empty to skip classification
|
||||
bool Initialize(const std::string& detModelPath,
|
||||
const std::string& clsModelPath,
|
||||
const std::string& recModelPath,
|
||||
const std::string& dictPath);
|
||||
|
||||
// Run full OCR pipeline on an image
|
||||
// Returns results matching PaddleOCR::OCRPredictResult format
|
||||
std::vector<OCRPredictResult> ocr(const cv::Mat& img);
|
||||
|
||||
// Configuration setters (matching OCRModelConfig parameters)
|
||||
void SetDetMaxSideLen(int val) { _maxSideLen = val; }
|
||||
void SetDetDbThresh(float val) { _detDbThresh = val; }
|
||||
void SetDetBoxThresh(float val) { _detBoxThresh = val; }
|
||||
void SetDetUnclipRatio(float val) { _detUnclipRatio = val; }
|
||||
void SetClsThresh(float val) { _clsThresh = val; }
|
||||
void SetUseDilation(bool val) { _useDilation = val; }
|
||||
|
||||
private:
|
||||
std::unique_ptr<ONNXOCRDetector> detector_;
|
||||
std::unique_ptr<ONNXOCRClassifier> classifier_; // nullptr if not used
|
||||
std::unique_ptr<ONNXOCRRecognizer> recognizer_;
|
||||
|
||||
std::recursive_mutex _mutex;
|
||||
|
||||
// Detection parameters
|
||||
int _maxSideLen = kDetMaxSideLen;
|
||||
float _detDbThresh = kDetDbThresh;
|
||||
float _detBoxThresh = kDetBoxThresh;
|
||||
float _detUnclipRatio = kDetUnclipRatio;
|
||||
bool _useDilation = false;
|
||||
|
||||
// Classifier parameters
|
||||
float _clsThresh = kClsThresh;
|
||||
|
||||
bool _initialized = false;
|
||||
};
|
||||
|
||||
} // namespace onnxocr
|
||||
} // namespace ANSCENTER
|
||||
423
modules/ANSOCR/ANSOdOCR.cpp
Normal file
423
modules/ANSOCR/ANSOdOCR.cpp
Normal file
@@ -0,0 +1,423 @@
|
||||
#include "ANSOdOCR.h"
|
||||
#include "Utility.h"
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <omp.h>
|
||||
#include "ANSYOLOOD.h"
|
||||
#include "ANSTENSORRTOD.h"
|
||||
namespace ANSCENTER {
|
||||
bool ANSODOCR::Initialize(std::string licenseKey, OCRModelConfig modelConfig,
|
||||
const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) {
|
||||
try
|
||||
{
|
||||
bool result = ANSOCRBase::Init(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, engineMode);
|
||||
if (!result) return false;
|
||||
|
||||
// Check if detector and ocr model files exist
|
||||
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "lpd.onnx");
|
||||
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "ocr.onnx");
|
||||
|
||||
if (!FileExist(_modelConfig.detectionModelFile)) {
|
||||
this->_logger.LogFatal("ANSODOCR::Initialize", "Invalid detector model file", __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!FileExist(_modelConfig.recognizerModelFile)) {
|
||||
this->_logger.LogFatal("ANSODOCR::Initialize", "Invalid OCR recognizer model file", __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
|
||||
// Check the hardware type
|
||||
engineType = ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();// EngineType::CPU;//
|
||||
if (engineType == ANSCENTER::EngineType::NVIDIA_GPU) {
|
||||
// Use TensorRT YoloV11
|
||||
this->_lpDetector = std::make_unique <ANSCENTER::TENSORRTOD>();
|
||||
this->_ocrDetector = std::make_unique<ANSCENTER::TENSORRTOD>();
|
||||
|
||||
}
|
||||
else {
|
||||
// Use ONNX YoloV11
|
||||
this->_lpDetector = std::make_unique <ANSCENTER::YOLOOD>();
|
||||
this->_ocrDetector = std::make_unique<ANSCENTER::YOLOOD>();
|
||||
}
|
||||
|
||||
// Run initialization
|
||||
_ocrModelConfig.detectionScoreThreshold = modelConfig.clsThreshold;
|
||||
_lpdmodelConfig.detectionScoreThreshold = modelConfig.detectionBoxThreshold;
|
||||
if (_lpdmodelConfig.detectionScoreThreshold < 0.25)_lpdmodelConfig.detectionScoreThreshold = 0.25;
|
||||
if (_ocrModelConfig.detectionScoreThreshold < 0.25)_ocrModelConfig.detectionScoreThreshold = 0.25;
|
||||
_lpdmodelConfig.modelConfThreshold = 0.5;
|
||||
_lpdmodelConfig.modelMNSThreshold = 0.5;
|
||||
_ocrModelConfig.modelConfThreshold = 0.5;
|
||||
_ocrModelConfig.modelMNSThreshold = 0.5;
|
||||
|
||||
if (!this->_lpDetector->LoadModelFromFolder("", _lpdmodelConfig, "lpd", "lpd.names", _modelFolder, _lpdLabels)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!this->_ocrDetector->LoadModelFromFolder("", _ocrModelConfig, "ocr", "ocr.names", _modelFolder, _ocrLabels)) {
|
||||
return false;
|
||||
}
|
||||
return _isInitialized;
|
||||
}
|
||||
catch (...) {
|
||||
_licenseValid = false;
|
||||
this->_logger.LogFatal("ANSODOCR::Initialize", "Failed to create OCR objects", __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
// Handle any other exception that occurs during initialization
|
||||
this->_logger.LogFatal("ANSODOCR::Initialize", e.what(), __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSODOCR::RunInference(const cv::Mat& input) {
|
||||
std::vector<ANSCENTER::OCRObject> output;
|
||||
if (input.empty()) return output;
|
||||
if ((input.cols < 10) || (input.rows < 10)) return output;
|
||||
return RunInference(input, "OCRCPUCAM");
|
||||
}
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSODOCR::RunInference(const cv::Mat& input, std::string cameraId) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
OCRObjects.clear();
|
||||
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSODOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSODOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if (input.empty() || input.cols < 10 || input.rows < 10) {
|
||||
this->_logger.LogError("ANSODOCR::RunInference", "Input image is invalid or too small", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
|
||||
try {
|
||||
// Convert grayscale to BGR if necessary
|
||||
cv::Mat im;
|
||||
if (input.channels() == 1) {
|
||||
cv::cvtColor(input, im, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
else {
|
||||
im = input.clone();
|
||||
}
|
||||
|
||||
// Check ppocr instance
|
||||
if (!this->_ocrDetector) {
|
||||
this->_logger.LogFatal("ANSODOCR::RunInference", "PPOCR instance is null", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
|
||||
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(im);
|
||||
|
||||
for (size_t n = 0; n < res_ocr.size(); ++n) {
|
||||
if (res_ocr[n].box.size() != 4) {
|
||||
this->_logger.LogError("ANSODOCR::RunInference", "Invalid OCR box size", __FILE__, __LINE__);
|
||||
continue;
|
||||
}
|
||||
|
||||
cv::Point rook_points[4];
|
||||
for (size_t m = 0; m < 4; ++m) {
|
||||
rook_points[m] = cv::Point(
|
||||
static_cast<int>(res_ocr[n].box[m][0]),
|
||||
static_cast<int>(res_ocr[n].box[m][1])
|
||||
);
|
||||
}
|
||||
|
||||
int x = std::max(0, rook_points[0].x);
|
||||
int y = std::max(0, rook_points[0].y);
|
||||
int width = rook_points[1].x - rook_points[0].x;
|
||||
int height = rook_points[2].y - rook_points[1].y;
|
||||
|
||||
// Clamp width and height
|
||||
width = std::max(1, std::min(im.cols - x, width));
|
||||
height = std::max(1, std::min(im.rows - y, height));
|
||||
|
||||
// Skip invalid boxes
|
||||
if (width <= 1 || height <= 1) {
|
||||
this->_logger.LogError("ANSODOCR::RunInference", "Invalid bounding box dimension", __FILE__, __LINE__);
|
||||
continue;
|
||||
}
|
||||
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box = cv::Rect(x, y, width, height);
|
||||
ocrObject.classId = res_ocr[n].cls_label;
|
||||
ocrObject.confidence = res_ocr[n].score;
|
||||
ocrObject.className = res_ocr[n].text;
|
||||
ocrObject.extraInfo = "cls label: " + std::to_string(res_ocr[n].cls_label)
|
||||
+ "; cls score: " + std::to_string(res_ocr[n].cls_score);
|
||||
ocrObject.cameraId = cameraId;
|
||||
|
||||
OCRObjects.push_back(ocrObject);
|
||||
}
|
||||
|
||||
im.release();
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
this->_logger.LogFatal("ANSODOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
}
|
||||
catch (...) {
|
||||
this->_logger.LogFatal("ANSODOCR::RunInference", "Unknown exception occurred", __FILE__, __LINE__);
|
||||
}
|
||||
|
||||
return OCRObjects;
|
||||
}
|
||||
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSODOCR::RunInference(const cv::Mat& input, std::vector<cv::Rect> Bbox) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
OCRObjects.clear();
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSODOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSODOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
try {
|
||||
if (input.empty()) {
|
||||
this->_logger.LogError("ANSODOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
|
||||
if (Bbox.size() > 0) {
|
||||
// Convert grayscale images to 3-channel BGR if needed
|
||||
cv::Mat frame;
|
||||
if (input.channels() == 1) {
|
||||
cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
else {
|
||||
frame = input.clone();
|
||||
}
|
||||
int fWidth = frame.cols;
|
||||
int fHeight = frame.rows;
|
||||
for (std::vector<cv::Rect>::iterator it = Bbox.begin(); it != Bbox.end(); it++) {
|
||||
int x1, y1, x2, y2, width, height;
|
||||
x1 = (*it).x;
|
||||
y1 = (*it).y;
|
||||
x2 = (*it).x + (*it).width;
|
||||
y2 = (*it).y + (*it).height;
|
||||
x1 = std::max(0, x1);
|
||||
y1 = std::max(0, y1);
|
||||
width = std::min(fWidth - x1, (*it).width);
|
||||
height = std::min(fHeight - y1, (*it).height);
|
||||
if ((x1 >= 0) && (y1 >= 0) && (width >= 5) && (height >= 5))
|
||||
{
|
||||
// Get cropped objects
|
||||
cv::Rect objectPos(x1, y1, width, height);
|
||||
cv::Mat croppedObject = frame(objectPos);
|
||||
std::vector<ANSCENTER::OCRObject> OCRTempObjects;
|
||||
OCRTempObjects.clear();
|
||||
OCRTempObjects = RunInference(croppedObject);
|
||||
if (OCRTempObjects.size() > 0) {
|
||||
for (int i = 0; i < OCRTempObjects.size(); i++) {
|
||||
ANSCENTER::OCRObject detectionObject;
|
||||
detectionObject = OCRTempObjects[i];
|
||||
// Correct bounding box position as the croppedObject x,y will be orignial (0,0)
|
||||
detectionObject.box.x = OCRTempObjects[i].box.x + x1;
|
||||
detectionObject.box.y = OCRTempObjects[i].box.y + y1;
|
||||
detectionObject.box.width = OCRTempObjects[i].box.width;
|
||||
detectionObject.box.height = OCRTempObjects[i].box.height;
|
||||
|
||||
detectionObject.box.x = std::max(0, detectionObject.box.x);
|
||||
detectionObject.box.y = std::max(0, detectionObject.box.y);
|
||||
detectionObject.box.width = std::min(fWidth - detectionObject.box.x, detectionObject.box.width);
|
||||
detectionObject.box.height = std::min(fHeight - detectionObject.box.y, detectionObject.box.height);
|
||||
|
||||
OCRObjects.push_back(detectionObject);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Convert grayscale images to 3-channel BGR if needed
|
||||
cv::Mat frame;
|
||||
if (input.channels() == 1) {
|
||||
cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
else {
|
||||
frame = input.clone();
|
||||
}
|
||||
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(frame);
|
||||
if (res_ocr.size() > 0) {
|
||||
for (int n = 0; n < res_ocr.size(); n++) { // number of detections
|
||||
cv::Point rook_points[4];
|
||||
for (int m = 0; m < res_ocr[n].box.size(); m++) {
|
||||
rook_points[m] =
|
||||
cv::Point(int(res_ocr[n].box[m][0]), int(res_ocr[n].box[m][1]));
|
||||
}
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box.x = rook_points[0].x;
|
||||
ocrObject.box.y = rook_points[0].y;
|
||||
ocrObject.box.width = rook_points[1].x - rook_points[0].x;
|
||||
ocrObject.box.height = rook_points[2].y - rook_points[1].y;
|
||||
|
||||
ocrObject.box.x = std::max(0, ocrObject.box.x);
|
||||
ocrObject.box.y = std::max(0, ocrObject.box.y);
|
||||
ocrObject.box.width = std::min(frame.cols - ocrObject.box.x, ocrObject.box.width);
|
||||
ocrObject.box.height = std::min(frame.rows - ocrObject.box.y, ocrObject.box.height);
|
||||
|
||||
ocrObject.classId = res_ocr[n].cls_label;
|
||||
ocrObject.confidence = res_ocr[n].score;
|
||||
ocrObject.className = res_ocr[n].text;
|
||||
std::string extraInformation = "cls label:" +
|
||||
std::to_string(res_ocr[n].cls_label) +
|
||||
";" +
|
||||
"cls score:" + std::to_string(res_ocr[n].cls_score);
|
||||
ocrObject.extraInfo = extraInformation;
|
||||
// Add extra information for cls score cls label
|
||||
OCRObjects.push_back(ocrObject);
|
||||
}
|
||||
}
|
||||
frame.release();
|
||||
return OCRObjects;
|
||||
}
|
||||
return OCRObjects;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSODOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSODOCR::RunInference(const cv::Mat& input, std::vector<cv::Rect> Bbox, std::string cameraId) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
OCRObjects.clear();
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSODOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSODOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
try {
|
||||
if (input.empty()) {
|
||||
this->_logger.LogError("ANSODOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
|
||||
if (Bbox.size() > 0) {
|
||||
// Convert grayscale images to 3-channel BGR if needed
|
||||
cv::Mat frame;
|
||||
if (input.channels() == 1) {
|
||||
cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
else {
|
||||
frame = input.clone();
|
||||
}
|
||||
int fWidth = frame.cols;
|
||||
int fHeight = frame.rows;
|
||||
for (std::vector<cv::Rect>::iterator it = Bbox.begin(); it != Bbox.end(); it++) {
|
||||
int x1, y1, x2, y2, width, height;
|
||||
x1 = (*it).x;
|
||||
y1 = (*it).y;
|
||||
x2 = (*it).x + (*it).width;
|
||||
y2 = (*it).y + (*it).height;
|
||||
x1 = std::max(0, x1);
|
||||
y1 = std::max(0, y1);
|
||||
width = std::min(fWidth - x1, (*it).width);
|
||||
height = std::min(fHeight - y1, (*it).height);
|
||||
if ((x1 >= 0) && (y1 >= 0) && (width >= 5) && (height >= 5)) {
|
||||
// Get cropped objects
|
||||
cv::Rect objectPos(x1, y1, width, height);
|
||||
cv::Mat croppedObject = frame(objectPos);
|
||||
std::vector<ANSCENTER::OCRObject> OCRTempObjects;
|
||||
OCRTempObjects.clear();
|
||||
OCRTempObjects = RunInference(croppedObject);
|
||||
if (OCRTempObjects.size() > 0) {
|
||||
for (int i = 0; i < OCRTempObjects.size(); i++) {
|
||||
ANSCENTER::OCRObject detectionObject;
|
||||
detectionObject = OCRTempObjects[i];
|
||||
// Correct bounding box position as the croppedObject x,y will be orignial (0,0)
|
||||
detectionObject.box.x = OCRTempObjects[i].box.x + x1;
|
||||
detectionObject.box.y = OCRTempObjects[i].box.y + y1;
|
||||
detectionObject.box.width = OCRTempObjects[i].box.width;
|
||||
detectionObject.box.height = OCRTempObjects[i].box.height;
|
||||
detectionObject.box.x = std::max(0, detectionObject.box.x);
|
||||
detectionObject.box.y = std::max(0, detectionObject.box.y);
|
||||
detectionObject.box.width = std::min(fWidth - detectionObject.box.x, detectionObject.box.width);
|
||||
detectionObject.box.height = std::min(fHeight - detectionObject.box.y, detectionObject.box.height);
|
||||
detectionObject.cameraId = cameraId;
|
||||
OCRObjects.push_back(detectionObject);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
auto im = input.clone();
|
||||
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(im);
|
||||
if (res_ocr.size() > 0) {
|
||||
for (int n = 0; n < res_ocr.size(); n++) { // number of detections
|
||||
cv::Point rook_points[4];
|
||||
for (int m = 0; m < res_ocr[n].box.size(); m++) {
|
||||
rook_points[m] =
|
||||
cv::Point(int(res_ocr[n].box[m][0]), int(res_ocr[n].box[m][1]));
|
||||
}
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box.x = rook_points[0].x;
|
||||
ocrObject.box.y = rook_points[0].y;
|
||||
ocrObject.box.width = rook_points[1].x - rook_points[0].x;
|
||||
ocrObject.box.height = rook_points[2].y - rook_points[1].y;
|
||||
ocrObject.box.x = std::max(0, ocrObject.box.x);
|
||||
ocrObject.box.y = std::max(0, ocrObject.box.y);
|
||||
ocrObject.box.width = std::min(im.cols - ocrObject.box.x, ocrObject.box.width);
|
||||
ocrObject.box.height = std::min(im.rows - ocrObject.box.y, ocrObject.box.height);
|
||||
ocrObject.classId = res_ocr[n].cls_label;
|
||||
ocrObject.confidence = res_ocr[n].score;
|
||||
ocrObject.className = res_ocr[n].text;
|
||||
std::string extraInformation = "cls label:" +
|
||||
std::to_string(res_ocr[n].cls_label) +
|
||||
";" +
|
||||
"cls score:" + std::to_string(res_ocr[n].cls_score);
|
||||
ocrObject.extraInfo = extraInformation;
|
||||
ocrObject.cameraId = cameraId;
|
||||
// Add extra information for cls score cls label
|
||||
OCRObjects.push_back(ocrObject);
|
||||
}
|
||||
}
|
||||
im.release();
|
||||
return OCRObjects;
|
||||
}
|
||||
return OCRObjects;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSODOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
}
|
||||
ANSODOCR::~ANSODOCR() {
|
||||
try {
|
||||
Destroy();
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSODOCR::~ANSODOCR()", e.what(), __FILE__, __LINE__);
|
||||
}
|
||||
this->ANSOCRBase::~ANSOCRBase();
|
||||
}
|
||||
bool ANSODOCR::Destroy() {
|
||||
try {
|
||||
if (this->_ocrDetector) this->_ocrDetector.reset();
|
||||
if (this->_lpDetector) this->_lpDetector.reset();
|
||||
return true;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSODOCR::Destroy", e.what(), __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
387
modules/ANSOCR/ANSOnnxOCR.cpp
Normal file
387
modules/ANSOCR/ANSOnnxOCR.cpp
Normal file
@@ -0,0 +1,387 @@
|
||||
#include "ANSOnnxOCR.h"
|
||||
#include "Utility.h"
|
||||
#include <opencv2/highgui.hpp>
|
||||
|
||||
namespace ANSCENTER {
|
||||
|
||||
bool ANSONNXOCR::Initialize(const std::string& licenseKey, OCRModelConfig modelConfig,
|
||||
const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) {
|
||||
try {
|
||||
bool result = ANSOCRBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, engineMode);
|
||||
if (!result) return false;
|
||||
|
||||
// Validate detection model
|
||||
if (!FileExist(_modelConfig.detectionModelFile)) {
|
||||
this->_logger.LogFatal("ANSONNXOCR::Initialize", "Invalid detector model file: " + _modelConfig.detectionModelFile, __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Validate recognizer model
|
||||
if (!FileExist(_modelConfig.recognizerModelFile)) {
|
||||
this->_logger.LogFatal("ANSONNXOCR::Initialize", "Invalid recognizer model file: " + _modelConfig.recognizerModelFile, __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Classifier is optional - controlled by useCLS flag and file existence
|
||||
std::string clsModelPath;
|
||||
if (_modelConfig.useCLS) {
|
||||
clsModelPath = _modelConfig.clsModelFile;
|
||||
if (!clsModelPath.empty() && !FileExist(clsModelPath)) {
|
||||
this->_logger.LogWarn("ANSONNXOCR::Initialize", "Classifier model not found, skipping: " + clsModelPath, __FILE__, __LINE__);
|
||||
clsModelPath = ""; // Clear to skip classifier
|
||||
}
|
||||
}
|
||||
else {
|
||||
this->_logger.LogDebug("ANSONNXOCR::Initialize", "Classifier disabled (useCLS=false)", __FILE__, __LINE__);
|
||||
}
|
||||
|
||||
try {
|
||||
// Configure engine parameters from modelConfig
|
||||
_engine->SetDetMaxSideLen(_modelConfig.limitSideLen);
|
||||
_engine->SetDetDbThresh(static_cast<float>(_modelConfig.detectionDBThreshold));
|
||||
_engine->SetDetBoxThresh(static_cast<float>(_modelConfig.detectionBoxThreshold));
|
||||
_engine->SetDetUnclipRatio(static_cast<float>(_modelConfig.detectionDBUnclipRatio));
|
||||
_engine->SetClsThresh(static_cast<float>(_modelConfig.clsThreshold));
|
||||
_engine->SetUseDilation(_modelConfig.useDilation);
|
||||
|
||||
_isInitialized = _engine->Initialize(
|
||||
_modelConfig.detectionModelFile,
|
||||
clsModelPath,
|
||||
_modelConfig.recognizerModelFile,
|
||||
_modelConfig.recogizerCharDictionaryPath);
|
||||
|
||||
return _isInitialized;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
_licenseValid = false;
|
||||
this->_logger.LogFatal("ANSONNXOCR::Initialize", e.what(), __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
catch (...) {
|
||||
_licenseValid = false;
|
||||
this->_logger.LogFatal("ANSONNXOCR::Initialize", "Failed to create ONNX OCR engine", __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSONNXOCR::Initialize", e.what(), __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSONNXOCR::RunInference(const cv::Mat& input) {
|
||||
std::vector<ANSCENTER::OCRObject> output;
|
||||
if (input.empty()) return output;
|
||||
if ((input.cols < 10) || (input.rows < 10)) return output;
|
||||
return RunInference(input, "OCRONNXCAM");
|
||||
}
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSONNXOCR::RunInference(const cv::Mat& input, const std::string& cameraId) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSONNXOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSONNXOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if (input.empty() || input.cols < 10 || input.rows < 10) {
|
||||
this->_logger.LogError("ANSONNXOCR::RunInference", "Input image is invalid or too small", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
|
||||
try {
|
||||
// Convert grayscale to BGR if necessary
|
||||
cv::Mat im;
|
||||
if (input.channels() == 1) {
|
||||
cv::cvtColor(input, im, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
else {
|
||||
im = input.clone();
|
||||
}
|
||||
|
||||
if (!_engine) {
|
||||
this->_logger.LogFatal("ANSONNXOCR::RunInference", "Engine instance is null", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
|
||||
std::vector<onnxocr::OCRPredictResult> res_ocr = _engine->ocr(im);
|
||||
|
||||
for (size_t n = 0; n < res_ocr.size(); ++n) {
|
||||
if (res_ocr[n].box.size() != 4) {
|
||||
this->_logger.LogError("ANSONNXOCR::RunInference", "Invalid OCR box size", __FILE__, __LINE__);
|
||||
continue;
|
||||
}
|
||||
|
||||
cv::Point rook_points[4];
|
||||
for (size_t m = 0; m < 4; ++m) {
|
||||
rook_points[m] = cv::Point(
|
||||
static_cast<int>(res_ocr[n].box[m][0]),
|
||||
static_cast<int>(res_ocr[n].box[m][1])
|
||||
);
|
||||
}
|
||||
|
||||
int x = std::max(0, rook_points[0].x);
|
||||
int y = std::max(0, rook_points[0].y);
|
||||
int width = rook_points[1].x - rook_points[0].x;
|
||||
int height = rook_points[2].y - rook_points[1].y;
|
||||
|
||||
width = std::max(1, std::min(im.cols - x, width));
|
||||
height = std::max(1, std::min(im.rows - y, height));
|
||||
|
||||
if (width <= 1 || height <= 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box = cv::Rect(x, y, width, height);
|
||||
ocrObject.classId = res_ocr[n].cls_label;
|
||||
ocrObject.confidence = res_ocr[n].score;
|
||||
ocrObject.className = res_ocr[n].text;
|
||||
ocrObject.extraInfo = "cls label: " + std::to_string(res_ocr[n].cls_label)
|
||||
+ "; cls score: " + std::to_string(res_ocr[n].cls_score);
|
||||
ocrObject.cameraId = cameraId;
|
||||
|
||||
OCRObjects.push_back(ocrObject);
|
||||
}
|
||||
|
||||
im.release();
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
this->_logger.LogFatal("ANSONNXOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
}
|
||||
catch (...) {
|
||||
this->_logger.LogFatal("ANSONNXOCR::RunInference", "Unknown exception occurred", __FILE__, __LINE__);
|
||||
}
|
||||
|
||||
return OCRObjects;
|
||||
}
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSONNXOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSONNXOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSONNXOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
|
||||
try {
|
||||
if (input.empty()) {
|
||||
this->_logger.LogError("ANSONNXOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
|
||||
|
||||
if (Bbox.size() > 0) {
|
||||
cv::Mat frame;
|
||||
if (input.channels() == 1) {
|
||||
cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
else {
|
||||
frame = input.clone();
|
||||
}
|
||||
int fWidth = frame.cols;
|
||||
int fHeight = frame.rows;
|
||||
|
||||
for (auto it = Bbox.begin(); it != Bbox.end(); it++) {
|
||||
int x1 = std::max(0, it->x);
|
||||
int y1 = std::max(0, it->y);
|
||||
int width = std::min(fWidth - x1, it->width);
|
||||
int height = std::min(fHeight - y1, it->height);
|
||||
|
||||
if (x1 >= 0 && y1 >= 0 && width >= 5 && height >= 5) {
|
||||
cv::Rect objectPos(x1, y1, width, height);
|
||||
cv::Mat croppedObject = frame(objectPos);
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> tempObjects = RunInference(croppedObject);
|
||||
|
||||
for (size_t i = 0; i < tempObjects.size(); i++) {
|
||||
ANSCENTER::OCRObject detObj = tempObjects[i];
|
||||
detObj.box.x = tempObjects[i].box.x + x1;
|
||||
detObj.box.y = tempObjects[i].box.y + y1;
|
||||
detObj.box.x = std::max(0, detObj.box.x);
|
||||
detObj.box.y = std::max(0, detObj.box.y);
|
||||
detObj.box.width = std::min(fWidth - detObj.box.x, detObj.box.width);
|
||||
detObj.box.height = std::min(fHeight - detObj.box.y, detObj.box.height);
|
||||
OCRObjects.push_back(detObj);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
cv::Mat frame;
|
||||
if (input.channels() == 1) {
|
||||
cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
else {
|
||||
frame = input.clone();
|
||||
}
|
||||
|
||||
std::vector<onnxocr::OCRPredictResult> res_ocr = _engine->ocr(frame);
|
||||
for (size_t n = 0; n < res_ocr.size(); n++) {
|
||||
if (res_ocr[n].box.size() != 4) continue;
|
||||
|
||||
cv::Point rook_points[4];
|
||||
for (size_t m = 0; m < res_ocr[n].box.size(); m++) {
|
||||
rook_points[m] = cv::Point(
|
||||
static_cast<int>(res_ocr[n].box[m][0]),
|
||||
static_cast<int>(res_ocr[n].box[m][1]));
|
||||
}
|
||||
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box.x = rook_points[0].x;
|
||||
ocrObject.box.y = rook_points[0].y;
|
||||
ocrObject.box.width = rook_points[1].x - rook_points[0].x;
|
||||
ocrObject.box.height = rook_points[2].y - rook_points[1].y;
|
||||
|
||||
ocrObject.box.x = std::max(0, ocrObject.box.x);
|
||||
ocrObject.box.y = std::max(0, ocrObject.box.y);
|
||||
ocrObject.box.width = std::min(frame.cols - ocrObject.box.x, ocrObject.box.width);
|
||||
ocrObject.box.height = std::min(frame.rows - ocrObject.box.y, ocrObject.box.height);
|
||||
|
||||
ocrObject.classId = res_ocr[n].cls_label;
|
||||
ocrObject.confidence = res_ocr[n].score;
|
||||
ocrObject.className = res_ocr[n].text;
|
||||
ocrObject.extraInfo = "cls label:" + std::to_string(res_ocr[n].cls_label)
|
||||
+ ";cls score:" + std::to_string(res_ocr[n].cls_score);
|
||||
OCRObjects.push_back(ocrObject);
|
||||
}
|
||||
frame.release();
|
||||
}
|
||||
return OCRObjects;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSONNXOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSONNXOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, const std::string& cameraId) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSONNXOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSONNXOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
|
||||
try {
|
||||
if (input.empty()) {
|
||||
this->_logger.LogError("ANSONNXOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
|
||||
|
||||
if (Bbox.size() > 0) {
|
||||
cv::Mat frame;
|
||||
if (input.channels() == 1) {
|
||||
cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
else {
|
||||
frame = input.clone();
|
||||
}
|
||||
int fWidth = frame.cols;
|
||||
int fHeight = frame.rows;
|
||||
|
||||
for (auto it = Bbox.begin(); it != Bbox.end(); it++) {
|
||||
int x1 = std::max(0, it->x);
|
||||
int y1 = std::max(0, it->y);
|
||||
int width = std::min(fWidth - x1, it->width);
|
||||
int height = std::min(fHeight - y1, it->height);
|
||||
|
||||
if (x1 >= 0 && y1 >= 0 && width >= 5 && height >= 5) {
|
||||
cv::Rect objectPos(x1, y1, width, height);
|
||||
cv::Mat croppedObject = frame(objectPos);
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> tempObjects = RunInference(croppedObject);
|
||||
|
||||
for (size_t i = 0; i < tempObjects.size(); i++) {
|
||||
ANSCENTER::OCRObject detObj = tempObjects[i];
|
||||
detObj.box.x = tempObjects[i].box.x + x1;
|
||||
detObj.box.y = tempObjects[i].box.y + y1;
|
||||
detObj.box.x = std::max(0, detObj.box.x);
|
||||
detObj.box.y = std::max(0, detObj.box.y);
|
||||
detObj.box.width = std::min(fWidth - detObj.box.x, detObj.box.width);
|
||||
detObj.box.height = std::min(fHeight - detObj.box.y, detObj.box.height);
|
||||
detObj.cameraId = cameraId;
|
||||
OCRObjects.push_back(detObj);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
cv::Mat im = input.clone();
|
||||
std::vector<onnxocr::OCRPredictResult> res_ocr = _engine->ocr(im);
|
||||
for (size_t n = 0; n < res_ocr.size(); n++) {
|
||||
if (res_ocr[n].box.size() != 4) continue;
|
||||
|
||||
cv::Point rook_points[4];
|
||||
for (size_t m = 0; m < res_ocr[n].box.size(); m++) {
|
||||
rook_points[m] = cv::Point(
|
||||
static_cast<int>(res_ocr[n].box[m][0]),
|
||||
static_cast<int>(res_ocr[n].box[m][1]));
|
||||
}
|
||||
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box.x = rook_points[0].x;
|
||||
ocrObject.box.y = rook_points[0].y;
|
||||
ocrObject.box.width = rook_points[1].x - rook_points[0].x;
|
||||
ocrObject.box.height = rook_points[2].y - rook_points[1].y;
|
||||
ocrObject.box.x = std::max(0, ocrObject.box.x);
|
||||
ocrObject.box.y = std::max(0, ocrObject.box.y);
|
||||
ocrObject.box.width = std::min(im.cols - ocrObject.box.x, ocrObject.box.width);
|
||||
ocrObject.box.height = std::min(im.rows - ocrObject.box.y, ocrObject.box.height);
|
||||
ocrObject.classId = res_ocr[n].cls_label;
|
||||
ocrObject.confidence = res_ocr[n].score;
|
||||
ocrObject.className = res_ocr[n].text;
|
||||
ocrObject.extraInfo = "cls label:" + std::to_string(res_ocr[n].cls_label)
|
||||
+ ";cls score:" + std::to_string(res_ocr[n].cls_score);
|
||||
ocrObject.cameraId = cameraId;
|
||||
OCRObjects.push_back(ocrObject);
|
||||
}
|
||||
im.release();
|
||||
}
|
||||
return OCRObjects;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSONNXOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
}
|
||||
|
||||
ANSONNXOCR::~ANSONNXOCR() {
|
||||
try {
|
||||
Destroy();
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSONNXOCR::~ANSONNXOCR()", e.what(), __FILE__, __LINE__);
|
||||
}
|
||||
}
|
||||
|
||||
bool ANSONNXOCR::Destroy() {
|
||||
try {
|
||||
if (_engine) _engine.reset();
|
||||
return true;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSONNXOCR::Destroy", e.what(), __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ANSCENTER
|
||||
34
modules/ANSOCR/ANSOnnxOCR.h
Normal file
34
modules/ANSOCR/ANSOnnxOCR.h
Normal file
@@ -0,0 +1,34 @@
|
||||
#ifndef ANSOnnxOCR_H
|
||||
#define ANSOnnxOCR_H
|
||||
#pragma once
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/imgcodecs.hpp"
|
||||
#include "opencv2/imgproc.hpp"
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include "ANSOCRBase.h"
|
||||
#include "ANSONNXOCR/PaddleOCRV5Engine.h"
|
||||
|
||||
namespace ANSCENTER {
|
||||
|
||||
class ANSOCR_API ANSONNXOCR : public ANSOCRBase {
|
||||
public:
|
||||
virtual bool Initialize(const std::string& licenseKey, OCRModelConfig modelConfig,
|
||||
const std::string& modelZipFilePath, const std::string& modelZipPassword,
|
||||
int engineMode) override;
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input) override;
|
||||
std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input, const std::string& cameraId) override;
|
||||
std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox) override;
|
||||
std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, const std::string& cameraId) override;
|
||||
|
||||
~ANSONNXOCR();
|
||||
bool Destroy() override;
|
||||
|
||||
private:
|
||||
std::unique_ptr<onnxocr::PaddleOCRV5Engine> _engine = std::make_unique<onnxocr::PaddleOCRV5Engine>();
|
||||
std::recursive_mutex _mutex;
|
||||
};
|
||||
}
|
||||
#endif
|
||||
15
modules/ANSOCR/ANSPaddleOCR/include/args.h
Normal file
15
modules/ANSOCR/ANSPaddleOCR/include/args.h
Normal file
@@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include <gflags/gflags.h>
|
||||
|
||||
DECLARE_string(input);
|
||||
DECLARE_string(type);
|
||||
DECLARE_string(output_dir);
|
||||
DECLARE_string(det_model_dir);
|
||||
DECLARE_string(cls_model_dir);
|
||||
DECLARE_string(rec_model_dir);
|
||||
DECLARE_string(lay_model_dir);
|
||||
DECLARE_string(tab_model_dir);
|
||||
DECLARE_string(label_dir);
|
||||
DECLARE_string(layout_dict_dir);
|
||||
DECLARE_string(table_dict_dir);
|
||||
425
modules/ANSOCR/ANSPaddleOCR/include/clipper.h
Normal file
425
modules/ANSOCR/ANSPaddleOCR/include/clipper.h
Normal file
@@ -0,0 +1,425 @@
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* Author : Angus Johnson *
|
||||
* Version : 6.4.2 *
|
||||
* Date : 27 February 2017 *
|
||||
* Website : http://www.angusj.com *
|
||||
* Copyright : Angus Johnson 2010-2017 *
|
||||
* *
|
||||
* License: *
|
||||
* Use, modification & distribution is subject to Boost Software License Ver 1. *
|
||||
* http://www.boost.org/LICENSE_1_0.txt *
|
||||
* *
|
||||
* Attributions: *
|
||||
* The code in this library is an extension of Bala Vatti's clipping algorithm: *
|
||||
* "A generic solution to polygon clipping" *
|
||||
* Communications of the ACM, Vol 35, Issue 7 (July 1992) pp 56-63. *
|
||||
* http://portal.acm.org/citation.cfm?id=129906 *
|
||||
* *
|
||||
* Computer graphics and geometric modeling: implementation and algorithms *
|
||||
* By Max K. Agoston *
|
||||
* Springer; 1 edition (January 4, 2005) *
|
||||
* http://books.google.com/books?q=vatti+clipping+agoston *
|
||||
* *
|
||||
* See also: *
|
||||
* "Polygon Offsetting by Computing Winding Numbers" *
|
||||
* Paper no. DETC2005-85513 pp. 565-575 *
|
||||
* ASME 2005 International Design Engineering Technical Conferences *
|
||||
* and Computers and Information in Engineering Conference (IDETC/CIE2005) *
|
||||
* September 24-28, 2005 , Long Beach, California, USA *
|
||||
* http://www.me.berkeley.edu/~mcmains/pubs/DAC05OffsetPolygon.pdf *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifndef clipper_hpp
|
||||
#define clipper_hpp
|
||||
|
||||
#define CLIPPER_VERSION "6.4.2"
|
||||
|
||||
// use_int32: When enabled 32bit ints are used instead of 64bit ints. This
|
||||
// improve performance but coordinate values are limited to the range +/- 46340
|
||||
//#define use_int32
|
||||
|
||||
// use_xyz: adds a Z member to IntPoint. Adds a minor cost to perfomance.
|
||||
//#define use_xyz
|
||||
|
||||
// use_lines: Enables line clipping. Adds a very minor cost to performance.
|
||||
#define use_lines
|
||||
|
||||
// use_deprecated: Enables temporary support for the obsolete functions
|
||||
//#define use_deprecated
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <list>
|
||||
#include <ostream>
|
||||
#include <queue>
|
||||
#include <set>
|
||||
#include <stdexcept>
|
||||
#include <vector>
|
||||
|
||||
namespace ClipperLib {
|
||||
|
||||
enum ClipType { ctIntersection, ctUnion, ctDifference, ctXor };
|
||||
enum PolyType { ptSubject, ptClip };
|
||||
// By far the most widely used winding rules for polygon filling are
|
||||
// EvenOdd & NonZero (GDI, GDI+, XLib, OpenGL, Cairo, AGG, Quartz, SVG, Gr32)
|
||||
// Others rules include Positive, Negative and ABS_GTR_EQ_TWO (only in OpenGL)
|
||||
// see http://glprogramming.com/red/chapter11.html
|
||||
enum PolyFillType { pftEvenOdd, pftNonZero, pftPositive, pftNegative };
|
||||
|
||||
#ifdef use_int32
|
||||
typedef int cInt;
|
||||
static cInt const loRange = 0x7FFF;
|
||||
static cInt const hiRange = 0x7FFF;
|
||||
#else
|
||||
typedef signed long long cInt;
|
||||
static cInt const loRange = 0x3FFFFFFF;
|
||||
static cInt const hiRange = 0x3FFFFFFFFFFFFFFFLL;
|
||||
typedef signed long long long64; // used by Int128 class
|
||||
typedef unsigned long long ulong64;
|
||||
|
||||
#endif
|
||||
|
||||
struct IntPoint {
|
||||
cInt X;
|
||||
cInt Y;
|
||||
#ifdef use_xyz
|
||||
cInt Z;
|
||||
IntPoint(cInt x = 0, cInt y = 0, cInt z = 0) : X(x), Y(y), Z(z){};
|
||||
#else
|
||||
IntPoint(cInt x = 0, cInt y = 0) : X(x), Y(y){};
|
||||
#endif
|
||||
|
||||
friend inline bool operator==(const IntPoint &a, const IntPoint &b) {
|
||||
return a.X == b.X && a.Y == b.Y;
|
||||
}
|
||||
friend inline bool operator!=(const IntPoint &a, const IntPoint &b) {
|
||||
return a.X != b.X || a.Y != b.Y;
|
||||
}
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
typedef std::vector<IntPoint> Path;
|
||||
typedef std::vector<Path> Paths;
|
||||
|
||||
inline Path &operator<<(Path &poly, const IntPoint &p) {
|
||||
poly.push_back(p);
|
||||
return poly;
|
||||
}
|
||||
inline Paths &operator<<(Paths &polys, const Path &p) {
|
||||
polys.push_back(p);
|
||||
return polys;
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &s, const IntPoint &p);
|
||||
std::ostream &operator<<(std::ostream &s, const Path &p);
|
||||
std::ostream &operator<<(std::ostream &s, const Paths &p);
|
||||
|
||||
struct DoublePoint {
|
||||
double X;
|
||||
double Y;
|
||||
DoublePoint(double x = 0, double y = 0) : X(x), Y(y) {}
|
||||
DoublePoint(IntPoint ip) : X((double)ip.X), Y((double)ip.Y) {}
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#ifdef use_xyz
|
||||
typedef void (*ZFillCallback)(IntPoint &e1bot, IntPoint &e1top, IntPoint &e2bot,
|
||||
IntPoint &e2top, IntPoint &pt);
|
||||
#endif
|
||||
|
||||
enum InitOptions {
|
||||
ioReverseSolution = 1,
|
||||
ioStrictlySimple = 2,
|
||||
ioPreserveCollinear = 4
|
||||
};
|
||||
enum JoinType { jtSquare, jtRound, jtMiter };
|
||||
enum EndType {
|
||||
etClosedPolygon,
|
||||
etClosedLine,
|
||||
etOpenButt,
|
||||
etOpenSquare,
|
||||
etOpenRound
|
||||
};
|
||||
|
||||
class PolyNode;
|
||||
typedef std::vector<PolyNode *> PolyNodes;
|
||||
|
||||
class PolyNode {
|
||||
public:
|
||||
PolyNode();
|
||||
virtual ~PolyNode(){};
|
||||
Path Contour;
|
||||
PolyNodes Childs;
|
||||
PolyNode *Parent;
|
||||
PolyNode *GetNext() const;
|
||||
bool IsHole() const;
|
||||
bool IsOpen() const;
|
||||
int ChildCount() const;
|
||||
|
||||
private:
|
||||
// PolyNode& operator =(PolyNode& other);
|
||||
unsigned Index; // node index in Parent.Childs
|
||||
bool m_IsOpen;
|
||||
JoinType m_jointype;
|
||||
EndType m_endtype;
|
||||
PolyNode *GetNextSiblingUp() const;
|
||||
void AddChild(PolyNode &child);
|
||||
friend class Clipper; // to access Index
|
||||
friend class ClipperOffset;
|
||||
};
|
||||
|
||||
class PolyTree : public PolyNode {
|
||||
public:
|
||||
~PolyTree() { Clear(); };
|
||||
PolyNode *GetFirst() const;
|
||||
void Clear();
|
||||
int Total() const;
|
||||
|
||||
private:
|
||||
// PolyTree& operator =(PolyTree& other);
|
||||
PolyNodes AllNodes;
|
||||
friend class Clipper; // to access AllNodes
|
||||
};
|
||||
|
||||
bool Orientation(const Path &poly);
|
||||
double Area(const Path &poly);
|
||||
int PointInPolygon(const IntPoint &pt, const Path &path);
|
||||
|
||||
void SimplifyPolygon(const Path &in_poly, Paths &out_polys,
|
||||
PolyFillType fillType = pftEvenOdd);
|
||||
void SimplifyPolygons(const Paths &in_polys, Paths &out_polys,
|
||||
PolyFillType fillType = pftEvenOdd);
|
||||
void SimplifyPolygons(Paths &polys, PolyFillType fillType = pftEvenOdd);
|
||||
|
||||
void CleanPolygon(const Path &in_poly, Path &out_poly, double distance = 1.415);
|
||||
void CleanPolygon(Path &poly, double distance = 1.415);
|
||||
void CleanPolygons(const Paths &in_polys, Paths &out_polys,
|
||||
double distance = 1.415);
|
||||
void CleanPolygons(Paths &polys, double distance = 1.415);
|
||||
|
||||
void MinkowskiSum(const Path &pattern, const Path &path, Paths &solution,
|
||||
bool pathIsClosed);
|
||||
void MinkowskiSum(const Path &pattern, const Paths &paths, Paths &solution,
|
||||
bool pathIsClosed);
|
||||
void MinkowskiDiff(const Path &poly1, const Path &poly2, Paths &solution);
|
||||
|
||||
void PolyTreeToPaths(const PolyTree &polytree, Paths &paths);
|
||||
void ClosedPathsFromPolyTree(const PolyTree &polytree, Paths &paths);
|
||||
void OpenPathsFromPolyTree(PolyTree &polytree, Paths &paths);
|
||||
|
||||
void ReversePath(Path &p);
|
||||
void ReversePaths(Paths &p);
|
||||
|
||||
struct IntRect {
|
||||
cInt left;
|
||||
cInt top;
|
||||
cInt right;
|
||||
cInt bottom;
|
||||
};
|
||||
|
||||
// enums that are used internally ...
|
||||
enum EdgeSide { esLeft = 1, esRight = 2 };
|
||||
|
||||
// forward declarations (for stuff used internally) ...
|
||||
struct TEdge;
|
||||
struct IntersectNode;
|
||||
struct LocalMinimum;
|
||||
struct OutPt;
|
||||
struct OutRec;
|
||||
struct Join;
|
||||
|
||||
typedef std::vector<OutRec *> PolyOutList;
|
||||
typedef std::vector<TEdge *> EdgeList;
|
||||
typedef std::vector<Join *> JoinList;
|
||||
typedef std::vector<IntersectNode *> IntersectList;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// ClipperBase is the ancestor to the Clipper class. It should not be
|
||||
// instantiated directly. This class simply abstracts the conversion of sets of
|
||||
// polygon coordinates into edge objects that are stored in a LocalMinima list.
|
||||
class ClipperBase {
|
||||
public:
|
||||
ClipperBase();
|
||||
virtual ~ClipperBase();
|
||||
virtual bool AddPath(const Path &pg, PolyType PolyTyp, bool Closed);
|
||||
bool AddPaths(const Paths &ppg, PolyType PolyTyp, bool Closed);
|
||||
virtual void Clear();
|
||||
IntRect GetBounds();
|
||||
bool PreserveCollinear() { return m_PreserveCollinear; };
|
||||
void PreserveCollinear(bool value) { m_PreserveCollinear = value; };
|
||||
|
||||
protected:
|
||||
void DisposeLocalMinimaList();
|
||||
TEdge *AddBoundsToLML(TEdge *e, bool IsClosed);
|
||||
virtual void Reset();
|
||||
TEdge *ProcessBound(TEdge *E, bool IsClockwise);
|
||||
void InsertScanbeam(const cInt Y);
|
||||
bool PopScanbeam(cInt &Y);
|
||||
bool LocalMinimaPending();
|
||||
bool PopLocalMinima(cInt Y, const LocalMinimum *&locMin);
|
||||
OutRec *CreateOutRec();
|
||||
void DisposeAllOutRecs();
|
||||
void DisposeOutRec(PolyOutList::size_type index);
|
||||
void SwapPositionsInAEL(TEdge *edge1, TEdge *edge2);
|
||||
void DeleteFromAEL(TEdge *e);
|
||||
void UpdateEdgeIntoAEL(TEdge *&e);
|
||||
|
||||
typedef std::vector<LocalMinimum> MinimaList;
|
||||
MinimaList::iterator m_CurrentLM;
|
||||
MinimaList m_MinimaList;
|
||||
|
||||
bool m_UseFullRange;
|
||||
EdgeList m_edges;
|
||||
bool m_PreserveCollinear;
|
||||
bool m_HasOpenPaths;
|
||||
PolyOutList m_PolyOuts;
|
||||
TEdge *m_ActiveEdges;
|
||||
|
||||
typedef std::priority_queue<cInt> ScanbeamList;
|
||||
ScanbeamList m_Scanbeam;
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
class Clipper : public virtual ClipperBase {
|
||||
public:
|
||||
Clipper(int initOptions = 0);
|
||||
bool Execute(ClipType clipType, Paths &solution,
|
||||
PolyFillType fillType = pftEvenOdd);
|
||||
bool Execute(ClipType clipType, Paths &solution, PolyFillType subjFillType,
|
||||
PolyFillType clipFillType);
|
||||
bool Execute(ClipType clipType, PolyTree &polytree,
|
||||
PolyFillType fillType = pftEvenOdd);
|
||||
bool Execute(ClipType clipType, PolyTree &polytree, PolyFillType subjFillType,
|
||||
PolyFillType clipFillType);
|
||||
bool ReverseSolution() { return m_ReverseOutput; };
|
||||
void ReverseSolution(bool value) { m_ReverseOutput = value; };
|
||||
bool StrictlySimple() { return m_StrictSimple; };
|
||||
void StrictlySimple(bool value) { m_StrictSimple = value; };
|
||||
// set the callback function for z value filling on intersections (otherwise Z
|
||||
// is 0)
|
||||
#ifdef use_xyz
|
||||
void ZFillFunction(ZFillCallback zFillFunc);
|
||||
#endif
|
||||
protected:
|
||||
virtual bool ExecuteInternal();
|
||||
|
||||
private:
|
||||
JoinList m_Joins;
|
||||
JoinList m_GhostJoins;
|
||||
IntersectList m_IntersectList;
|
||||
ClipType m_ClipType;
|
||||
typedef std::list<cInt> MaximaList;
|
||||
MaximaList m_Maxima;
|
||||
TEdge *m_SortedEdges;
|
||||
bool m_ExecuteLocked;
|
||||
PolyFillType m_ClipFillType;
|
||||
PolyFillType m_SubjFillType;
|
||||
bool m_ReverseOutput;
|
||||
bool m_UsingPolyTree;
|
||||
bool m_StrictSimple;
|
||||
#ifdef use_xyz
|
||||
ZFillCallback m_ZFill; // custom callback
|
||||
#endif
|
||||
void SetWindingCount(TEdge &edge);
|
||||
bool IsEvenOddFillType(const TEdge &edge) const;
|
||||
bool IsEvenOddAltFillType(const TEdge &edge) const;
|
||||
void InsertLocalMinimaIntoAEL(const cInt botY);
|
||||
void InsertEdgeIntoAEL(TEdge *edge, TEdge *startEdge);
|
||||
void AddEdgeToSEL(TEdge *edge);
|
||||
bool PopEdgeFromSEL(TEdge *&edge);
|
||||
void CopyAELToSEL();
|
||||
void DeleteFromSEL(TEdge *e);
|
||||
void SwapPositionsInSEL(TEdge *edge1, TEdge *edge2);
|
||||
bool IsContributing(const TEdge &edge) const;
|
||||
bool IsTopHorz(const cInt XPos);
|
||||
void DoMaxima(TEdge *e);
|
||||
void ProcessHorizontals();
|
||||
void ProcessHorizontal(TEdge *horzEdge);
|
||||
void AddLocalMaxPoly(TEdge *e1, TEdge *e2, const IntPoint &pt);
|
||||
OutPt *AddLocalMinPoly(TEdge *e1, TEdge *e2, const IntPoint &pt);
|
||||
OutRec *GetOutRec(int idx);
|
||||
void AppendPolygon(TEdge *e1, TEdge *e2);
|
||||
void IntersectEdges(TEdge *e1, TEdge *e2, IntPoint &pt);
|
||||
OutPt *AddOutPt(TEdge *e, const IntPoint &pt);
|
||||
OutPt *GetLastOutPt(TEdge *e);
|
||||
bool ProcessIntersections(const cInt topY);
|
||||
void BuildIntersectList(const cInt topY);
|
||||
void ProcessIntersectList();
|
||||
void ProcessEdgesAtTopOfScanbeam(const cInt topY);
|
||||
void BuildResult(Paths &polys);
|
||||
void BuildResult2(PolyTree &polytree);
|
||||
void SetHoleState(TEdge *e, OutRec *outrec);
|
||||
void DisposeIntersectNodes();
|
||||
bool FixupIntersectionOrder();
|
||||
void FixupOutPolygon(OutRec &outrec);
|
||||
void FixupOutPolyline(OutRec &outrec);
|
||||
bool IsHole(TEdge *e);
|
||||
bool FindOwnerFromSplitRecs(OutRec &outRec, OutRec *&currOrfl);
|
||||
void FixHoleLinkage(OutRec &outrec);
|
||||
void AddJoin(OutPt *op1, OutPt *op2, const IntPoint offPt);
|
||||
void ClearJoins();
|
||||
void ClearGhostJoins();
|
||||
void AddGhostJoin(OutPt *op, const IntPoint offPt);
|
||||
bool JoinPoints(Join *j, OutRec *outRec1, OutRec *outRec2);
|
||||
void JoinCommonEdges();
|
||||
void DoSimplePolygons();
|
||||
void FixupFirstLefts1(OutRec *OldOutRec, OutRec *NewOutRec);
|
||||
void FixupFirstLefts2(OutRec *InnerOutRec, OutRec *OuterOutRec);
|
||||
void FixupFirstLefts3(OutRec *OldOutRec, OutRec *NewOutRec);
|
||||
#ifdef use_xyz
|
||||
void SetZ(IntPoint &pt, TEdge &e1, TEdge &e2);
|
||||
#endif
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
class ClipperOffset {
|
||||
public:
|
||||
ClipperOffset(double miterLimit = 2.0, double roundPrecision = 0.25);
|
||||
~ClipperOffset();
|
||||
void AddPath(const Path &path, JoinType joinType, EndType endType);
|
||||
void AddPaths(const Paths &paths, JoinType joinType, EndType endType);
|
||||
void Execute(Paths &solution, double delta);
|
||||
void Execute(PolyTree &solution, double delta);
|
||||
void Clear();
|
||||
double MiterLimit;
|
||||
double ArcTolerance;
|
||||
|
||||
private:
|
||||
Paths m_destPolys;
|
||||
Path m_srcPoly;
|
||||
Path m_destPoly;
|
||||
std::vector<DoublePoint> m_normals;
|
||||
double m_delta, m_sinA, m_sin, m_cos;
|
||||
double m_miterLim, m_StepsPerRad;
|
||||
IntPoint m_lowest;
|
||||
PolyNode m_polyNodes;
|
||||
|
||||
void FixOrientations();
|
||||
void DoOffset(double delta);
|
||||
void OffsetPoint(int j, int &k, JoinType jointype);
|
||||
void DoSquare(int j, int k);
|
||||
void DoMiter(int j, int k, double r);
|
||||
void DoRound(int j, int k);
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
class clipperException : public std::exception {
|
||||
public:
|
||||
clipperException(const char *description) : m_descr(description) {}
|
||||
virtual ~clipperException() throw() {}
|
||||
virtual const char *what() const throw() { return m_descr.c_str(); }
|
||||
|
||||
private:
|
||||
std::string m_descr;
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
} // ClipperLib namespace
|
||||
|
||||
#endif // clipper_hpp
|
||||
45
modules/ANSOCR/ANSPaddleOCR/include/ocr_cls.h
Normal file
45
modules/ANSOCR/ANSPaddleOCR/include/ocr_cls.h
Normal file
@@ -0,0 +1,45 @@
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/imgcodecs.hpp"
|
||||
#include "opencv2/imgproc.hpp"
|
||||
#include <chrono>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <ostream>
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <numeric>
|
||||
#include <include/preprocess_op.h>
|
||||
#include <include/postprocess_op.h>
|
||||
#include <openvino/openvino.hpp>
|
||||
#include <openvino/core/preprocess/pre_post_process.hpp>
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
class Classifier
|
||||
{
|
||||
public:
|
||||
explicit Classifier(std::string model_path);
|
||||
void Run(std::vector<cv::Mat> img_list, std::vector<OCRPredictResult>& ocr_results);
|
||||
void SetParameters(int cls_batch_num, double cls_thresh);
|
||||
void GetParameters(int& cls_batch_num, double& cls_thresh);
|
||||
private:
|
||||
ov::InferRequest infer_request;
|
||||
std::string model_path;
|
||||
std::shared_ptr<ov::Model> model;
|
||||
ov::CompiledModel compiled_model;
|
||||
std::recursive_mutex _mutex;
|
||||
|
||||
double e = 1.0 / 255.0;
|
||||
std::vector<float> mean_ = { 0.5f, 0.5f, 0.5f };
|
||||
std::vector<float> scale_ = { 0.5f, 0.5f, 0.5f };
|
||||
|
||||
int cls_batch_num_ = 1;
|
||||
double cls_thresh = 0.9;
|
||||
|
||||
std::vector<size_t> cls_image_shape = { 3, 48, 192 };
|
||||
std::string GetOpenVINODevice();
|
||||
// resize
|
||||
ClsResizeImg resize_op_;
|
||||
};
|
||||
}
|
||||
71
modules/ANSOCR/ANSPaddleOCR/include/ocr_det.h
Normal file
71
modules/ANSOCR/ANSPaddleOCR/include/ocr_det.h
Normal file
@@ -0,0 +1,71 @@
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/imgcodecs.hpp"
|
||||
#include "opencv2/imgproc.hpp"
|
||||
#include <chrono>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <ostream>
|
||||
#include <vector>
|
||||
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <numeric>
|
||||
|
||||
#include <include/paddleocr_utility.h>
|
||||
#include <include/preprocess_op.h>
|
||||
#include <include/postprocess_op.h>
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
namespace PaddleOCR {
|
||||
class Detector
|
||||
{
|
||||
public:
|
||||
explicit Detector(std::string model_path);
|
||||
void Run(const cv::Mat& src_img, std::vector<OCRPredictResult>& ocr_results);
|
||||
void SetParameters(std::string limit_type,
|
||||
std::string det_db_score_mode,
|
||||
bool is_scale,
|
||||
double det_db_thresh,
|
||||
double det_db_box_thresh,
|
||||
double det_db_unclip_ratio,
|
||||
bool use_dilation);
|
||||
void GetParameters(std::string& limit_type,
|
||||
std::string& det_db_score_mode,
|
||||
bool& is_scale,
|
||||
double& det_db_thresh,
|
||||
double& det_db_box_thresh,
|
||||
double& det_db_unclip_ratio,
|
||||
bool& use_dilation);
|
||||
|
||||
private:
|
||||
ov::InferRequest infer_request;
|
||||
std::string model_path;
|
||||
cv::Mat src_img;
|
||||
std::shared_ptr<ov::Model> model;
|
||||
ov::CompiledModel compiled_model;
|
||||
std::recursive_mutex _mutex;
|
||||
|
||||
float ratio_h{};
|
||||
float ratio_w{};
|
||||
std::vector<float> mean_ = { 0.485f, 0.456f, 0.406f };
|
||||
std::vector<float> scale_ = { 1 / 0.229f, 1 / 0.224f, 1 / 0.225f };
|
||||
cv::Mat resize_img;
|
||||
double e = 1.0 / 255.0;
|
||||
|
||||
std::string limit_type_ = "max";
|
||||
std::string det_db_score_mode_ = "slow";
|
||||
int limit_side_len_ = 960;
|
||||
bool is_scale_ = true;
|
||||
double det_db_thresh_ = 0.3;
|
||||
double det_db_box_thresh_ = 0.6;
|
||||
double det_db_unclip_ratio_ = 1.5;
|
||||
bool use_dilation_ = false;
|
||||
|
||||
// pre-process
|
||||
ResizeImgType0 resize_op_;
|
||||
Normalize normalize_op_;
|
||||
Permute permute_op_;
|
||||
// post-process
|
||||
DBPostProcessor post_processor_;
|
||||
};
|
||||
}
|
||||
46
modules/ANSOCR/ANSPaddleOCR/include/ocr_rec.h
Normal file
46
modules/ANSOCR/ANSPaddleOCR/include/ocr_rec.h
Normal file
@@ -0,0 +1,46 @@
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/imgcodecs.hpp"
|
||||
#include "opencv2/imgproc.hpp"
|
||||
#include <chrono>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <ostream>
|
||||
#include <vector>
|
||||
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <numeric>
|
||||
|
||||
#include <include/paddleocr_utility.h>
|
||||
#include <include/preprocess_op.h>
|
||||
#include <include/postprocess_op.h>
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
class Recognizer
|
||||
{
|
||||
public:
|
||||
explicit Recognizer(std::string model_path, const std::string& label_path);
|
||||
void Run(const std::vector<cv::Mat>& img_list, std::vector<OCRPredictResult>& ocr_results);
|
||||
void SetParameters(int rec_batch_num);
|
||||
void GetParameters(int& rec_batch_num);
|
||||
private:
|
||||
ov::InferRequest infer_request;
|
||||
std::string model_path;
|
||||
std::shared_ptr<ov::Model> model;
|
||||
ov::CompiledModel compiled_model;
|
||||
std::recursive_mutex _mutex;
|
||||
std::vector<float> mean_ = { 0.5f, 0.5f, 0.5f };
|
||||
std::vector<float> scale_ = { 1 / 0.5f, 1 / 0.5f, 1 / 0.5f };
|
||||
bool is_scale_ = true;
|
||||
std::vector<std::string> label_list_;
|
||||
int rec_img_h_ = 48;
|
||||
int rec_img_w_ = 320;
|
||||
std::vector<int> rec_image_shape_ = { 3, rec_img_h_, rec_img_w_ };
|
||||
int rec_batch_num_ = 1;
|
||||
CrnnResizeImg resize_op_;
|
||||
Normalize normalize_op_;
|
||||
PermuteBatch permute_op_;
|
||||
};
|
||||
}
|
||||
68
modules/ANSOCR/ANSPaddleOCR/include/paddleocr.h
Normal file
68
modules/ANSOCR/ANSPaddleOCR/include/paddleocr.h
Normal file
@@ -0,0 +1,68 @@
|
||||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <include/ocr_cls.h>
|
||||
#include <include/ocr_det.h>
|
||||
#include <include/ocr_rec.h>
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
class PPOCR {
|
||||
public:
|
||||
explicit PPOCR();
|
||||
~PPOCR();
|
||||
|
||||
std::vector<OCRPredictResult> ocr(const cv::Mat& img);
|
||||
bool Initialize(std::string detectionModelDir, std::string classifierModelDir, std::string recognizerModelDir, std::string labelDir);
|
||||
void SetParameters(std::string limit_type,
|
||||
std::string det_db_score_mode,
|
||||
bool is_scale,
|
||||
double det_db_thresh,
|
||||
double det_db_box_thresh,
|
||||
double det_db_unclip_ratio,
|
||||
bool use_dilation,
|
||||
int cls_batch_num,
|
||||
double cls_thresh,
|
||||
int rec_batch_num);
|
||||
void GetParameters(std::string& limit_type,
|
||||
std::string& det_db_score_mode,
|
||||
bool& is_scale,
|
||||
double& det_db_thresh,
|
||||
double& det_db_box_thresh,
|
||||
double& det_db_unclip_ratio,
|
||||
bool& use_dilation,
|
||||
int& cls_batch_num,
|
||||
double& cls_thresh,
|
||||
int& rec_batch_num);
|
||||
protected:
|
||||
std::unique_ptr<Detector> detector_ = nullptr;
|
||||
std::unique_ptr<Classifier> classifier_ = nullptr;
|
||||
std::unique_ptr<Recognizer> recognizer_ = nullptr;
|
||||
std::recursive_mutex _mutex;
|
||||
|
||||
std::string _limit_type;
|
||||
std::string _det_db_score_mode;
|
||||
bool _is_scale;
|
||||
double _det_db_thresh;
|
||||
double _det_db_box_thresh;
|
||||
double _det_db_unclip_ratio;
|
||||
bool _use_dilation;
|
||||
int _cls_batch_num;
|
||||
double _cls_thresh;
|
||||
int _rec_batch_num;
|
||||
};
|
||||
|
||||
} // namespace PaddleOCR
|
||||
110
modules/ANSOCR/ANSPaddleOCR/include/paddleocr_utility.h
Normal file
110
modules/ANSOCR/ANSPaddleOCR/include/paddleocr_utility.h
Normal file
@@ -0,0 +1,110 @@
|
||||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#pragma once
|
||||
#include <chrono>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <ostream>
|
||||
#include <stdlib.h>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <numeric>
|
||||
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/imgcodecs.hpp"
|
||||
#include "opencv2/imgproc.hpp"
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
struct OCRPredictResult {
|
||||
std::vector<std::vector<int>> box;
|
||||
std::string text;
|
||||
float score = -1.0;
|
||||
float cls_score;
|
||||
int cls_label = -1;
|
||||
};
|
||||
|
||||
struct StructurePredictResult {
|
||||
std::vector<float> box;
|
||||
std::vector<std::vector<int>> cell_box;
|
||||
std::string type;
|
||||
std::vector<OCRPredictResult> text_res;
|
||||
std::string html;
|
||||
float html_score = -1;
|
||||
float confidence;
|
||||
};
|
||||
|
||||
class Utility {
|
||||
public:
|
||||
static std::vector<std::string> ReadDict(const std::string &path);
|
||||
|
||||
static void VisualizeBboxes(const cv::Mat &srcimg,
|
||||
const std::vector<OCRPredictResult> &ocr_result,
|
||||
const std::string &save_path);
|
||||
|
||||
static void VisualizeBboxes(const cv::Mat &srcimg,
|
||||
const StructurePredictResult &structure_result,
|
||||
const std::string &save_path);
|
||||
|
||||
template <class ForwardIterator>
|
||||
inline static size_t argmax(ForwardIterator first, ForwardIterator last) {
|
||||
return std::distance(first, std::max_element(first, last));
|
||||
}
|
||||
|
||||
static void GetAllFiles(const char *dir_name,
|
||||
std::vector<std::string> &all_inputs);
|
||||
|
||||
static cv::Mat GetRotateCropImage(const cv::Mat &srcimage,
|
||||
std::vector<std::vector<int>> box);
|
||||
|
||||
static std::vector<int> argsort(const std::vector<float> &array);
|
||||
|
||||
static std::string basename(const std::string &filename);
|
||||
|
||||
static bool PathExists(const std::string &path);
|
||||
|
||||
static void CreateDir(const std::string &path);
|
||||
|
||||
static void print_result(const std::vector<OCRPredictResult> &ocr_result);
|
||||
|
||||
static cv::Mat crop_image(cv::Mat &img, const std::vector<int> &area);
|
||||
static cv::Mat crop_image(cv::Mat &img, const std::vector<float> &area);
|
||||
|
||||
static void sorted_boxes(std::vector<OCRPredictResult> &ocr_result);
|
||||
|
||||
static std::vector<int> xyxyxyxy2xyxy(std::vector<std::vector<int>> &box);
|
||||
static std::vector<int> xyxyxyxy2xyxy(std::vector<int> &box);
|
||||
|
||||
static float fast_exp(float x);
|
||||
static std::vector<float>
|
||||
activation_function_softmax(std::vector<float> &src);
|
||||
static float iou(std::vector<int> &box1, std::vector<int> &box2);
|
||||
static float iou(std::vector<float> &box1, std::vector<float> &box2);
|
||||
|
||||
private:
|
||||
static bool comparison_box(const OCRPredictResult &result1,
|
||||
const OCRPredictResult &result2) {
|
||||
if (result1.box[0][1] < result2.box[0][1]) {
|
||||
return true;
|
||||
} else if (result1.box[0][1] == result2.box[0][1]) {
|
||||
return result1.box[0][0] < result2.box[0][0];
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace PaddleOCR
|
||||
53
modules/ANSOCR/ANSPaddleOCR/include/paddlestructure.h
Normal file
53
modules/ANSOCR/ANSPaddleOCR/include/paddlestructure.h
Normal file
@@ -0,0 +1,53 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <include/paddleocr.h>
|
||||
#include <include/structure_layout.h>
|
||||
#include <include/structure_table.h>
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
class PaddleStructure : public PPOCR {
|
||||
public:
|
||||
explicit PaddleStructure();
|
||||
~PaddleStructure();
|
||||
bool Initialize(std::string layModelDir, std::string layModelDic, std::string tabModelDir, std::string tabModelDic);
|
||||
std::vector<StructurePredictResult> structure(cv::Mat img);
|
||||
|
||||
|
||||
private:
|
||||
Layout *layout_model_ = nullptr;
|
||||
Table *table_model_ = nullptr;
|
||||
|
||||
std::string rebuild_table(std::vector<std::string> rec_html_tags,
|
||||
std::vector<std::vector<int>> rec_boxes,
|
||||
std::vector<OCRPredictResult> &ocr_result);
|
||||
|
||||
float dis(std::vector<int> &box1, std::vector<int> &box2);
|
||||
|
||||
static bool comparison_dis(const std::vector<float> &dis1,
|
||||
const std::vector<float> &dis2) {
|
||||
if (dis1[1] < dis2[1]) {
|
||||
return true;
|
||||
} else if (dis1[1] == dis2[1]) {
|
||||
return dis1[0] < dis2[0];
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace PaddleOCR
|
||||
119
modules/ANSOCR/ANSPaddleOCR/include/postprocess_op.h
Normal file
119
modules/ANSOCR/ANSPaddleOCR/include/postprocess_op.h
Normal file
@@ -0,0 +1,119 @@
|
||||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "include/clipper.h"
|
||||
#include "include/paddleocr_utility.h"
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
class DBPostProcessor {
|
||||
public:
|
||||
void GetContourArea(const std::vector<std::vector<float>> &box,
|
||||
float unclip_ratio, float &distance);
|
||||
|
||||
cv::RotatedRect UnClip(std::vector<std::vector<float>> box,
|
||||
const float &unclip_ratio);
|
||||
|
||||
float **Mat2Vec(cv::Mat mat);
|
||||
|
||||
std::vector<std::vector<int>>
|
||||
OrderPointsClockwise(std::vector<std::vector<int>> pts);
|
||||
|
||||
std::vector<std::vector<float>> GetMiniBoxes(cv::RotatedRect box,
|
||||
float &ssid);
|
||||
|
||||
float BoxScoreFast(std::vector<std::vector<float>> box_array, cv::Mat pred);
|
||||
float PolygonScoreAcc(std::vector<cv::Point> contour, cv::Mat pred);
|
||||
|
||||
std::vector<std::vector<std::vector<int>>>
|
||||
BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap,
|
||||
const float &box_thresh, const float &det_db_unclip_ratio,
|
||||
const std::string &det_db_score_mode);
|
||||
|
||||
std::vector<std::vector<std::vector<int>>>
|
||||
FilterTagDetRes(std::vector<std::vector<std::vector<int>>> boxes,
|
||||
float ratio_h, float ratio_w, cv::Mat srcimg);
|
||||
|
||||
private:
|
||||
static bool XsortInt(std::vector<int> a, std::vector<int> b);
|
||||
|
||||
static bool XsortFp32(std::vector<float> a, std::vector<float> b);
|
||||
|
||||
std::vector<std::vector<float>> Mat2Vector(cv::Mat mat);
|
||||
|
||||
inline int _max(int a, int b) { return a >= b ? a : b; }
|
||||
|
||||
inline int _min(int a, int b) { return a >= b ? b : a; }
|
||||
|
||||
template <class T> inline T clamp(T x, T min, T max) {
|
||||
if (x > max)
|
||||
return max;
|
||||
if (x < min)
|
||||
return min;
|
||||
return x;
|
||||
}
|
||||
|
||||
inline float clampf(float x, float min, float max) {
|
||||
if (x > max)
|
||||
return max;
|
||||
if (x < min)
|
||||
return min;
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
||||
class TablePostProcessor {
|
||||
public:
|
||||
void init(std::string label_path, bool merge_no_span_structure = true);
|
||||
void Run(std::vector<float> &loc_preds, std::vector<float> &structure_probs,
|
||||
std::vector<float> &rec_scores, ov::Shape &loc_preds_shape,
|
||||
ov::Shape &structure_probs_shape,
|
||||
std::vector<std::vector<std::string>> &rec_html_tag_batch,
|
||||
std::vector<std::vector<std::vector<int>>> &rec_boxes_batch,
|
||||
std::vector<int> &width_list, std::vector<int> &height_list);
|
||||
|
||||
private:
|
||||
std::vector<std::string> label_list_;
|
||||
std::string end = "eos";
|
||||
std::string beg = "sos";
|
||||
};
|
||||
|
||||
class PicodetPostProcessor {
|
||||
public:
|
||||
void init(std::string label_path, const double score_threshold = 0.4,
|
||||
const double nms_threshold = 0.5,
|
||||
const std::vector<int> &fpn_stride = {8, 16, 32, 64});
|
||||
void Run(std::vector<StructurePredictResult> &results,
|
||||
std::vector<std::vector<float>> outs, std::vector<int> ori_shape,
|
||||
std::vector<int> resize_shape, int eg_max);
|
||||
std::vector<int> fpn_stride_ = {8, 16, 32, 64};
|
||||
|
||||
private:
|
||||
StructurePredictResult disPred2Bbox(std::vector<float> bbox_pred, int label,
|
||||
float score, int x, int y, int stride,
|
||||
std::vector<int> im_shape, int reg_max);
|
||||
void nms(std::vector<StructurePredictResult> &input_boxes,
|
||||
float nms_threshold);
|
||||
|
||||
std::vector<std::string> label_list_;
|
||||
double score_threshold_ = 0.4;
|
||||
double nms_threshold_ = 0.5;
|
||||
int num_class_ = 5;
|
||||
};
|
||||
|
||||
} // namespace PaddleOCR
|
||||
80
modules/ANSOCR/ANSPaddleOCR/include/preprocess_op.h
Normal file
80
modules/ANSOCR/ANSPaddleOCR/include/preprocess_op.h
Normal file
@@ -0,0 +1,80 @@
|
||||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/imgcodecs.hpp"
|
||||
#include "opencv2/imgproc.hpp"
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
class Normalize {
|
||||
public:
|
||||
virtual void Run(cv::Mat *im, const std::vector<float> &mean,
|
||||
const std::vector<float> &scale, const bool is_scale = true);
|
||||
};
|
||||
|
||||
// RGB -> CHW
|
||||
class Permute {
|
||||
public:
|
||||
virtual void Run(const cv::Mat *im, float *data);
|
||||
};
|
||||
|
||||
class PermuteBatch {
|
||||
public:
|
||||
virtual void Run(const std::vector<cv::Mat> imgs, float *data);
|
||||
};
|
||||
|
||||
class ResizeImgType0 {
|
||||
public:
|
||||
virtual void Run(const cv::Mat &img, cv::Mat &resize_img,
|
||||
std::string limit_type, int limit_side_len, float &ratio_h,
|
||||
float &ratio_w);
|
||||
};
|
||||
|
||||
class CrnnResizeImg {
|
||||
public:
|
||||
virtual void Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
|
||||
const std::vector<int> &rec_image_shape = {3, 32, 320});
|
||||
};
|
||||
|
||||
class ClsResizeImg {
|
||||
public:
|
||||
virtual void Run(const cv::Mat &img, cv::Mat &resize_img,
|
||||
const std::vector<size_t> &rec_image_shape = {3, 48, 192});
|
||||
};
|
||||
|
||||
class TableResizeImg {
|
||||
public:
|
||||
virtual void Run(const cv::Mat &img, cv::Mat &resize_img,
|
||||
const int max_len = 488);
|
||||
};
|
||||
|
||||
class TablePadImg {
|
||||
public:
|
||||
virtual void Run(const cv::Mat &img, cv::Mat &resize_img,
|
||||
const int max_len = 488);
|
||||
};
|
||||
|
||||
class Resize {
|
||||
public:
|
||||
virtual void Run(const cv::Mat &img, cv::Mat &resize_img, const int h,
|
||||
const int w);
|
||||
};
|
||||
|
||||
} // namespace PaddleOCR
|
||||
50
modules/ANSOCR/ANSPaddleOCR/include/structure_layout.h
Normal file
50
modules/ANSOCR/ANSPaddleOCR/include/structure_layout.h
Normal file
@@ -0,0 +1,50 @@
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/imgcodecs.hpp"
|
||||
#include "opencv2/imgproc.hpp"
|
||||
#include <chrono>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <ostream>
|
||||
#include <vector>
|
||||
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <numeric>
|
||||
|
||||
#include <include/paddleocr_utility.h>
|
||||
#include <include/preprocess_op.h>
|
||||
#include <include/postprocess_op.h>
|
||||
#include <openvino/openvino.hpp>
|
||||
#include <openvino/core/preprocess/pre_post_process.hpp>
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
class Layout
|
||||
{
|
||||
public:
|
||||
explicit Layout(std::string model_path, std::string layout_dict_path);
|
||||
void Run(cv::Mat &src_img, std::vector<StructurePredictResult> &structure_result);
|
||||
|
||||
private:
|
||||
|
||||
ov::InferRequest infer_request;
|
||||
std::string model_path;
|
||||
std::shared_ptr<ov::Model> model;
|
||||
ov::CompiledModel compiled_model;
|
||||
|
||||
cv::Mat src_img;
|
||||
cv::Mat resize_img;
|
||||
double e = 1.0 / 255.0;
|
||||
const int layout_img_h_ = 800;
|
||||
const int layout_img_w_ = 608;
|
||||
double layout_nms_threshold = 0.5;
|
||||
double layout_score_threshold = 0.5;
|
||||
std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
|
||||
std::vector<float> scale_ = {0.229f, 0.224f, 0.225f};
|
||||
|
||||
// resize
|
||||
Resize resize_op_;
|
||||
// post-process
|
||||
PicodetPostProcessor post_processor_;
|
||||
};
|
||||
}
|
||||
56
modules/ANSOCR/ANSPaddleOCR/include/structure_table.h
Normal file
56
modules/ANSOCR/ANSPaddleOCR/include/structure_table.h
Normal file
@@ -0,0 +1,56 @@
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/imgcodecs.hpp"
|
||||
#include "opencv2/imgproc.hpp"
|
||||
#include <chrono>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <ostream>
|
||||
#include <vector>
|
||||
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <numeric>
|
||||
|
||||
#include <include/paddleocr_utility.h>
|
||||
#include <include/preprocess_op.h>
|
||||
#include <include/postprocess_op.h>
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
class Table
|
||||
{
|
||||
public:
|
||||
explicit Table(std::string model_path, const std::string table_char_dict_path);
|
||||
void Run(std::vector<cv::Mat> img_list,
|
||||
std::vector<std::vector<std::string>> &structure_html_tags,
|
||||
std::vector<float> &structure_scores,
|
||||
std::vector<std::vector<std::vector<int>>> &structure_boxes);
|
||||
|
||||
private:
|
||||
|
||||
ov::InferRequest infer_request;
|
||||
std::string model_path;
|
||||
std::shared_ptr<ov::Model> model;
|
||||
ov::CompiledModel compiled_model;
|
||||
|
||||
cv::Mat src_img;
|
||||
cv::Mat resize_img;
|
||||
const std::string table_char_dict_path;
|
||||
|
||||
int table_batch_num_ = 1;
|
||||
int table_max_len_ = 488;
|
||||
std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
|
||||
std::vector<float> scale_ = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
|
||||
bool is_scale_ = true;
|
||||
|
||||
// pre-process
|
||||
TableResizeImg resize_op_;
|
||||
Normalize normalize_op_;
|
||||
PermuteBatch permute_op_;
|
||||
TablePadImg pad_op_;
|
||||
|
||||
// post-process
|
||||
TablePostProcessor post_processor_;
|
||||
};
|
||||
}
|
||||
17
modules/ANSOCR/ANSPaddleOCR/src/args.cpp
Normal file
17
modules/ANSOCR/ANSPaddleOCR/src/args.cpp
Normal file
@@ -0,0 +1,17 @@
|
||||
#include <gflags/gflags.h>
|
||||
|
||||
DEFINE_string(input, "", "Required. Path to image file");
|
||||
DEFINE_string(type, "", "Required. Task type ('ocr' or 'structure')");
|
||||
DEFINE_string(output_dir, "./", "Path to output results.");
|
||||
DEFINE_string(det_model_dir, "", "Path to detection model file");
|
||||
DEFINE_string(cls_model_dir, "", "Path to classification model file");
|
||||
DEFINE_string(rec_model_dir, "", "Path to recognition model file");
|
||||
DEFINE_string(lay_model_dir, "", "Path to layout model file");
|
||||
DEFINE_string(tab_model_dir, "", "Path to table model file");
|
||||
DEFINE_string(label_dir, "", "Required. Path to label file");
|
||||
DEFINE_string(layout_dict_dir,
|
||||
"/home/ethan/PaddleOCR_OpenVINO_CPP/data/layout_publaynet_dict.txt",
|
||||
"Path of dictionary.");
|
||||
DEFINE_string(table_dict_dir,
|
||||
"/home/ethan/PaddleOCR_OpenVINO_CPP/data/table_structure_dict.txt",
|
||||
"Path of dictionary.");
|
||||
4382
modules/ANSOCR/ANSPaddleOCR/src/clipper.cpp
Normal file
4382
modules/ANSOCR/ANSPaddleOCR/src/clipper.cpp
Normal file
File diff suppressed because it is too large
Load Diff
115
modules/ANSOCR/ANSPaddleOCR/src/ocr_cls.cpp
Normal file
115
modules/ANSOCR/ANSPaddleOCR/src/ocr_cls.cpp
Normal file
@@ -0,0 +1,115 @@
|
||||
#include "include/ocr_cls.h"
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
std::string Classifier::GetOpenVINODevice() {
|
||||
ov::Core core;
|
||||
std::vector<std::string> available_devices = core.get_available_devices();
|
||||
|
||||
// Prioritize devices: NPU > GPU > CPU
|
||||
std::vector<std::string> priority_devices = { "GPU", "CPU" };
|
||||
for (const auto& device : priority_devices) {
|
||||
if (std::find(available_devices.begin(), available_devices.end(), device) != available_devices.end()) {
|
||||
return device; // Return the first available device based on priority
|
||||
}
|
||||
}
|
||||
return "CPU";
|
||||
}
|
||||
Classifier::Classifier(std::string model_path)
|
||||
{
|
||||
this->model_path = model_path;
|
||||
ov::Core core;
|
||||
this->model = core.read_model(this->model_path);
|
||||
// dimension of batch size is dynamic
|
||||
this->model->reshape({ {ov::Dimension(1, 6), cls_image_shape[0], cls_image_shape[1], cls_image_shape[2]} });
|
||||
// preprocessing API
|
||||
ov::preprocess::PrePostProcessor prep(this->model);
|
||||
// declare section of desired application's input format
|
||||
prep.input().tensor()
|
||||
.set_layout("NHWC")
|
||||
.set_color_format(ov::preprocess::ColorFormat::BGR);
|
||||
// specify actual model layout
|
||||
prep.input().model()
|
||||
.set_layout("NCHW");
|
||||
prep.input().preprocess()
|
||||
.mean(this->mean_)
|
||||
.scale(this->scale_);
|
||||
std::string deviceName = GetOpenVINODevice();
|
||||
this->model = prep.build();
|
||||
//core.set_property(deviceName, ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT));
|
||||
this->compiled_model = core.compile_model(this->model, deviceName);
|
||||
this->infer_request = compiled_model.create_infer_request();
|
||||
}
|
||||
void Classifier::SetParameters(int cls_batch_num, double cls_thresh) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
this->cls_batch_num_ = cls_batch_num;
|
||||
this->cls_thresh = cls_thresh;
|
||||
}
|
||||
void Classifier::GetParameters(int& cls_batch_num, double& cls_thresh) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
cls_batch_num = this->cls_batch_num_;
|
||||
cls_thresh = this->cls_thresh;
|
||||
}
|
||||
void Classifier::Run(std::vector<cv::Mat> img_list, std::vector<OCRPredictResult>& ocr_results)
|
||||
{
|
||||
try {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
std::vector<int> cls_labels(img_list.size(), 0);
|
||||
std::vector<float> cls_scores(img_list.size(), 0);
|
||||
std::vector<double> cls_times;
|
||||
auto input_port = this->compiled_model.input();
|
||||
int img_num = img_list.size();
|
||||
for (int beg_img_no = 0; beg_img_no < img_num; beg_img_no += this->cls_batch_num_) {
|
||||
int end_img_no = std::min(img_num, beg_img_no + this->cls_batch_num_);
|
||||
size_t batch_num = end_img_no - beg_img_no;
|
||||
|
||||
std::vector<ov::Tensor> batch_tensors;
|
||||
ov::Shape intput_shape = { batch_num, cls_image_shape[1], cls_image_shape[2],3 };
|
||||
for (int ino = beg_img_no; ino < end_img_no; ino++) {
|
||||
cv::Mat srcimg;
|
||||
img_list[ino].copyTo(srcimg);
|
||||
cv::Mat resize_img;
|
||||
// preprocess
|
||||
this->resize_op_.Run(srcimg, resize_img, this->cls_image_shape);
|
||||
resize_img.convertTo(resize_img, CV_32FC3, e);
|
||||
if (resize_img.cols < cls_image_shape[2]) {
|
||||
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0,
|
||||
cls_image_shape[2] - resize_img.cols,
|
||||
cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
// prepare input tensor
|
||||
ov::Tensor input_tensor(input_port.get_element_type(), intput_shape, (float*)resize_img.data);
|
||||
batch_tensors.push_back(input_tensor);
|
||||
}
|
||||
|
||||
// set batched input tensors
|
||||
this->infer_request.set_input_tensors(batch_tensors);
|
||||
// start inference
|
||||
//this->infer_request.start_async();
|
||||
//this->infer_request.wait();
|
||||
this->infer_request.infer();
|
||||
// get output tensor
|
||||
auto output = this->infer_request.get_output_tensor();
|
||||
const float* out_data = output.data<const float>();
|
||||
for (size_t batch_idx = 0; batch_idx < output.get_size() / 2; batch_idx++) {
|
||||
int label = int(
|
||||
Utility::argmax(&out_data[batch_idx * 2],
|
||||
&out_data[(batch_idx + 1) * 2]));
|
||||
float score = float(*std::max_element(
|
||||
&out_data[batch_idx * 2],
|
||||
&out_data[(batch_idx + 1) * 2]));
|
||||
cls_labels[beg_img_no + batch_idx] = label;
|
||||
cls_scores[beg_img_no + batch_idx] = score;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < cls_labels.size(); i++) {
|
||||
ocr_results[i].cls_label = cls_labels[i];
|
||||
ocr_results[i].cls_score = cls_scores[i];
|
||||
}
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
124
modules/ANSOCR/ANSPaddleOCR/src/ocr_det.cpp
Normal file
124
modules/ANSOCR/ANSPaddleOCR/src/ocr_det.cpp
Normal file
@@ -0,0 +1,124 @@
|
||||
#include "include/ocr_det.h"
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
Detector::Detector(std::string model_path)
|
||||
{
|
||||
ov::Core core;
|
||||
this->model_path = model_path;
|
||||
this->model = core.read_model(this->model_path);
|
||||
this->model->reshape({ 1, 3, ov::Dimension(32, this->limit_side_len_), ov::Dimension(1, this->limit_side_len_) });
|
||||
//core.set_property("CPU", ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT));
|
||||
this->compiled_model = core.compile_model(this->model, "CPU");
|
||||
//this->compiled_model = core.compile_model(this->model, "CPU");
|
||||
this->infer_request = this->compiled_model.create_infer_request();
|
||||
}
|
||||
void Detector::SetParameters(std::string limit_type,
|
||||
std::string det_db_score_mode,
|
||||
bool is_scale,
|
||||
double det_db_thresh,
|
||||
double det_db_box_thresh,
|
||||
double det_db_unclip_ratio,
|
||||
bool use_dilation)
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
this->limit_type_ = limit_type;
|
||||
this->det_db_score_mode_ = det_db_score_mode;
|
||||
this->is_scale_ = is_scale;
|
||||
this->det_db_thresh_ = det_db_thresh;
|
||||
this->det_db_box_thresh_ = det_db_box_thresh;
|
||||
this->det_db_unclip_ratio_ = det_db_unclip_ratio;
|
||||
this->use_dilation_ = use_dilation;
|
||||
}
|
||||
void Detector::GetParameters(std::string& limit_type,
|
||||
std::string& det_db_score_mode,
|
||||
bool& is_scale,
|
||||
double& det_db_thresh,
|
||||
double& det_db_box_thresh,
|
||||
double& det_db_unclip_ratio,
|
||||
bool& use_dilation)
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
limit_type = this->limit_type_;
|
||||
det_db_score_mode = this->det_db_score_mode_;
|
||||
is_scale = this->is_scale_;
|
||||
det_db_thresh = this->det_db_thresh_;
|
||||
det_db_box_thresh = this->det_db_box_thresh_;
|
||||
det_db_unclip_ratio = this->det_db_unclip_ratio_;
|
||||
use_dilation = this->use_dilation_;
|
||||
}
|
||||
void Detector::Run(const cv::Mat& src_img, std::vector<OCRPredictResult>& ocr_results)
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
try {
|
||||
this->src_img = src_img;
|
||||
this->resize_op_.Run(this->src_img, this->resize_img, this->limit_type_,
|
||||
this->limit_side_len_, this->ratio_h, this->ratio_w);
|
||||
|
||||
this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
|
||||
this->is_scale_);
|
||||
|
||||
std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f);
|
||||
ov::Shape intput_shape = { 1, 3, (size_t)resize_img.rows, (size_t)resize_img.cols };
|
||||
this->permute_op_.Run(&resize_img, input.data());
|
||||
|
||||
std::vector<std::vector<std::vector<int>>> boxes;
|
||||
auto input_port = this->compiled_model.input();
|
||||
|
||||
// -------- set input --------
|
||||
ov::Tensor input_tensor(input_port.get_element_type(), intput_shape, input.data());
|
||||
this->infer_request.set_input_tensor(input_tensor);
|
||||
// -------- start inference --------
|
||||
|
||||
/* this->infer_request.start_async();
|
||||
this->infer_request.wait();*/
|
||||
|
||||
this->infer_request.infer();
|
||||
|
||||
auto output = this->infer_request.get_output_tensor(0);
|
||||
const float* out_data = output.data<const float>();
|
||||
|
||||
ov::Shape output_shape = output.get_shape();
|
||||
const size_t n2 = output_shape[2];
|
||||
const size_t n3 = output_shape[3];
|
||||
const int n = n2 * n3;
|
||||
|
||||
std::vector<float> pred(n, 0.0);
|
||||
std::vector<unsigned char> cbuf(n, ' ');
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
pred[i] = float(out_data[i]);
|
||||
cbuf[i] = (unsigned char)((out_data[i]) * 255);
|
||||
}
|
||||
|
||||
cv::Mat cbuf_map(n2, n3, CV_8UC1, (unsigned char*)cbuf.data());
|
||||
cv::Mat pred_map(n2, n3, CV_32F, (float*)pred.data());
|
||||
|
||||
const double threshold = this->det_db_thresh_ * 255;
|
||||
const double maxvalue = 255;
|
||||
cv::Mat bit_map;
|
||||
cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY);
|
||||
if (this->use_dilation_) {
|
||||
cv::Mat dila_ele =
|
||||
cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2));
|
||||
cv::dilate(bit_map, bit_map, dila_ele);
|
||||
}
|
||||
|
||||
boxes = post_processor_.BoxesFromBitmap(
|
||||
pred_map, bit_map, this->det_db_box_thresh_, this->det_db_unclip_ratio_,
|
||||
this->det_db_score_mode_);
|
||||
|
||||
boxes = post_processor_.FilterTagDetRes(boxes, this->ratio_h, this->ratio_w, this->src_img);
|
||||
for (int i = 0; i < boxes.size(); i++) {
|
||||
OCRPredictResult res;
|
||||
res.box = boxes[i];
|
||||
ocr_results.push_back(res);
|
||||
}
|
||||
// sort boex from top to bottom, from left to right
|
||||
Utility::sorted_boxes(ocr_results);
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
130
modules/ANSOCR/ANSPaddleOCR/src/ocr_rec.cpp
Normal file
130
modules/ANSOCR/ANSPaddleOCR/src/ocr_rec.cpp
Normal file
@@ -0,0 +1,130 @@
|
||||
#include "include/ocr_rec.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace PaddleOCR {
|
||||
Recognizer::Recognizer(string model_path, const string& label_path) {
|
||||
ov::Core core;
|
||||
this->model_path = model_path;
|
||||
this->model = core.read_model(this->model_path);
|
||||
// reshape the model for dynamic batch size and sentence width
|
||||
this->model->reshape({ {ov::Dimension(1, 6), this->rec_image_shape_[0], this->rec_image_shape_[1], -1} });
|
||||
//core.set_property("CPU", ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT));
|
||||
this->compiled_model = core.compile_model(this->model, "CPU");
|
||||
//this->compiled_model = core.compile_model(this->model, "CPU");
|
||||
this->infer_request = this->compiled_model.create_infer_request();
|
||||
this->label_list_ = Utility::ReadDict(label_path);
|
||||
this->label_list_.insert(this->label_list_.begin(),
|
||||
"#"); // blank char for ctc
|
||||
this->label_list_.push_back(" ");
|
||||
}
|
||||
void Recognizer::SetParameters(int rec_batch_num) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
this->rec_batch_num_ = rec_batch_num;
|
||||
|
||||
}
|
||||
void Recognizer::GetParameters(int& rec_batch_num) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
rec_batch_num = this->rec_batch_num_;
|
||||
|
||||
}
|
||||
void Recognizer::Run(const std::vector<cv::Mat> &img_list, std::vector<OCRPredictResult>& ocr_results) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
try {
|
||||
std::vector<std::string> rec_texts(img_list.size(), "");
|
||||
std::vector<float> rec_text_scores(img_list.size(), 0);
|
||||
int img_num = img_list.size();
|
||||
std::vector<float> width_list;
|
||||
for (int i = 0; i < img_num; i++) {
|
||||
width_list.push_back(float(img_list[i].cols) / img_list[i].rows);
|
||||
}
|
||||
std::vector<int> indices = Utility::argsort(width_list);
|
||||
|
||||
for (int beg_img_no = 0; beg_img_no < img_num;
|
||||
beg_img_no += this->rec_batch_num_) {
|
||||
int end_img_no = std::min(img_num, beg_img_no + this->rec_batch_num_);
|
||||
size_t batch_num = end_img_no - beg_img_no;
|
||||
size_t imgH = this->rec_image_shape_[1];
|
||||
size_t imgW = this->rec_image_shape_[2];
|
||||
float max_wh_ratio = imgW * 1.0 / imgH;
|
||||
for (int ino = beg_img_no; ino < end_img_no; ino++) {
|
||||
int h = img_list[indices[ino]].rows;
|
||||
int w = img_list[indices[ino]].cols;
|
||||
float wh_ratio = w * 1.0 / h;
|
||||
max_wh_ratio = std::max(max_wh_ratio, wh_ratio);
|
||||
}
|
||||
|
||||
int batch_width = imgW;
|
||||
std::vector<cv::Mat> norm_img_batch;
|
||||
for (int ino = beg_img_no; ino < end_img_no; ino++) {
|
||||
cv::Mat srcimg;
|
||||
img_list[indices[ino]].copyTo(srcimg);
|
||||
cv::Mat resize_img;
|
||||
// preprocess
|
||||
this->resize_op_.Run(srcimg, resize_img, max_wh_ratio, this->rec_image_shape_);
|
||||
this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
|
||||
this->is_scale_);
|
||||
norm_img_batch.push_back(resize_img);
|
||||
batch_width = std::max(resize_img.cols, batch_width);
|
||||
}
|
||||
// prepare input tensor
|
||||
std::vector<float> input(batch_num * 3 * imgH * batch_width, 0.0f);
|
||||
ov::Shape intput_shape = { batch_num, 3, imgH, (size_t)batch_width };
|
||||
this->permute_op_.Run(norm_img_batch, input.data());
|
||||
auto input_port = this->compiled_model.input();
|
||||
ov::Tensor input_tensor(input_port.get_element_type(), intput_shape, input.data());
|
||||
this->infer_request.set_input_tensor(input_tensor);
|
||||
// start inference
|
||||
/* this->infer_request.start_async();
|
||||
this->infer_request.wait();*/
|
||||
this->infer_request.infer();
|
||||
|
||||
auto output = this->infer_request.get_output_tensor();
|
||||
const float* out_data = output.data<const float>();
|
||||
auto predict_shape = output.get_shape();
|
||||
|
||||
// predict_batch is the result of Last FC with softmax
|
||||
for (int m = 0; m < predict_shape[0]; m++) {
|
||||
std::string str_res;
|
||||
int argmax_idx;
|
||||
int last_index = 0;
|
||||
float score = 0.f;
|
||||
int count = 0;
|
||||
float max_value = 0.0f;
|
||||
|
||||
for (int n = 0; n < predict_shape[1]; n++) {
|
||||
// get idx
|
||||
argmax_idx = int(Utility::argmax(
|
||||
&out_data[(m * predict_shape[1] + n) * predict_shape[2]],
|
||||
&out_data[(m * predict_shape[1] + n + 1) * predict_shape[2]]));
|
||||
// get score
|
||||
max_value = float(*std::max_element(
|
||||
&out_data[(m * predict_shape[1] + n) * predict_shape[2]],
|
||||
&out_data[(m * predict_shape[1] + n + 1) * predict_shape[2]]));
|
||||
|
||||
if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) {
|
||||
score += max_value;
|
||||
count += 1;
|
||||
str_res += this->label_list_[argmax_idx];
|
||||
}
|
||||
last_index = argmax_idx;
|
||||
}
|
||||
score /= count;
|
||||
if (std::isnan(score)) {
|
||||
continue;
|
||||
}
|
||||
rec_texts[indices[beg_img_no + m]] = str_res;
|
||||
rec_text_scores[indices[beg_img_no + m]] = score;
|
||||
}
|
||||
}
|
||||
// sort boex from top to bottom, from left to right
|
||||
for (int i = 0; i < rec_texts.size(); i++) {
|
||||
ocr_results[i].text = rec_texts[i];
|
||||
ocr_results[i].score = rec_text_scores[i];
|
||||
}
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
192
modules/ANSOCR/ANSPaddleOCR/src/paddleocr.cpp
Normal file
192
modules/ANSOCR/ANSPaddleOCR/src/paddleocr.cpp
Normal file
@@ -0,0 +1,192 @@
|
||||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <include/paddleocr.h>
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
PPOCR::PPOCR() {
|
||||
this->_limit_type = "max";
|
||||
this->_det_db_score_mode = "slow";
|
||||
this->_is_scale = true;
|
||||
this->_det_db_thresh = 0.3;
|
||||
this->_det_db_box_thresh = 0.6;
|
||||
this->_det_db_unclip_ratio = 1.5;
|
||||
this->_use_dilation = false;
|
||||
this->_cls_batch_num = 1;
|
||||
this->_cls_thresh = 0.9;
|
||||
this->_rec_batch_num = 1;
|
||||
};
|
||||
|
||||
bool PPOCR::Initialize(std::string detectionModelDir, std::string classifierModelDir, std::string recognizerModelDir, std::string labelDir) {
|
||||
this->detector_ = std::make_unique<Detector>(detectionModelDir);
|
||||
if (!classifierModelDir.empty()) {
|
||||
this->classifier_ = std::make_unique<Classifier>(classifierModelDir);
|
||||
}
|
||||
this->recognizer_ = std::make_unique<Recognizer>(recognizerModelDir, labelDir);
|
||||
if (detector_) detector_->SetParameters(_limit_type, _det_db_score_mode, _is_scale, _det_db_thresh, _det_db_box_thresh, _det_db_unclip_ratio, _use_dilation);
|
||||
if (classifier_) classifier_->SetParameters(_cls_batch_num, _cls_thresh);
|
||||
if (recognizer_) recognizer_->SetParameters(_rec_batch_num);
|
||||
return true;
|
||||
}
|
||||
void PPOCR::SetParameters(std::string limit_type,
|
||||
std::string det_db_score_mode,
|
||||
bool is_scale,
|
||||
double det_db_thresh,
|
||||
double det_db_box_thresh,
|
||||
double det_db_unclip_ratio,
|
||||
bool use_dilation,
|
||||
int cls_batch_num,
|
||||
double cls_thresh,
|
||||
int rec_batch_num)
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
this->_limit_type = limit_type;
|
||||
this->_det_db_score_mode = det_db_score_mode;
|
||||
this->_is_scale = is_scale;
|
||||
this->_det_db_thresh = det_db_thresh;
|
||||
this->_det_db_box_thresh = det_db_box_thresh;
|
||||
this->_det_db_unclip_ratio = det_db_unclip_ratio;
|
||||
this->_use_dilation = use_dilation;
|
||||
this->_cls_batch_num = cls_batch_num;
|
||||
this->_cls_thresh = cls_thresh;
|
||||
this->_rec_batch_num = rec_batch_num;
|
||||
if (detector_) detector_->SetParameters(limit_type, det_db_score_mode, is_scale, det_db_thresh, det_db_box_thresh, det_db_unclip_ratio, use_dilation);
|
||||
if (classifier_) classifier_->SetParameters(cls_batch_num, cls_thresh);
|
||||
if (recognizer_) recognizer_->SetParameters(rec_batch_num);
|
||||
}
|
||||
void PPOCR::GetParameters(std::string& limit_type,
|
||||
std::string& det_db_score_mode,
|
||||
bool& is_scale,
|
||||
double& det_db_thresh,
|
||||
double& det_db_box_thresh,
|
||||
double& det_db_unclip_ratio,
|
||||
bool& use_dilation,
|
||||
int& cls_batch_num,
|
||||
double& cls_thresh,
|
||||
int& rec_batch_num)
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (detector_) detector_->GetParameters(limit_type, det_db_score_mode, is_scale, det_db_thresh, det_db_box_thresh, det_db_unclip_ratio, use_dilation);
|
||||
if (classifier_) classifier_->GetParameters(cls_batch_num, cls_thresh);
|
||||
if (recognizer_) recognizer_->GetParameters(rec_batch_num);
|
||||
}
|
||||
//std::vector<OCRPredictResult> PPOCR::ocr(cv::Mat img)
|
||||
//{
|
||||
// std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
// try {
|
||||
// std::vector<OCRPredictResult> ocr_result;
|
||||
// // detect the sentence in input image
|
||||
// this->detector_->Run(img, ocr_result);
|
||||
// // crop image
|
||||
// std::vector<cv::Mat> img_list;
|
||||
// for (int j = 0; j < ocr_result.size(); j++) {
|
||||
// cv::Mat crop_img;
|
||||
// crop_img = Utility::GetRotateCropImage(img, ocr_result[j].box);
|
||||
// img_list.push_back(crop_img);
|
||||
// }
|
||||
|
||||
// if (this->classifier_ != nullptr) {
|
||||
// // find the reversed sentence and flip it
|
||||
// this->classifier_->Run(img_list, ocr_result);
|
||||
// for (int i = 0; i < img_list.size(); i++) {
|
||||
// if (ocr_result[i].cls_label % 2 == 1 &&
|
||||
// ocr_result[i].cls_score > _cls_thresh) {
|
||||
// cv::rotate(img_list[i], img_list[i], 1);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// // recognize the words in sentence and print them
|
||||
// this->recognizer_->Run(img_list, ocr_result);
|
||||
|
||||
// return ocr_result;
|
||||
// }
|
||||
// catch (const std::exception& e) {
|
||||
// std::cerr << e.what() << std::endl;
|
||||
// return std::vector<OCRPredictResult>();
|
||||
// }
|
||||
//}
|
||||
std::vector<OCRPredictResult> PPOCR::ocr(const cv::Mat& img) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
std::vector<OCRPredictResult> ocr_result;
|
||||
|
||||
try {
|
||||
if (img.empty()) {
|
||||
std::cerr << "[PPOCR] Input image is empty!" << std::endl;
|
||||
return ocr_result;
|
||||
}
|
||||
|
||||
if (!this->detector_ || !this->recognizer_) {
|
||||
std::cerr << "[PPOCR] Detector or recognizer not initialized!" << std::endl;
|
||||
return ocr_result;
|
||||
}
|
||||
|
||||
// Run detector
|
||||
this->detector_->Run(img, ocr_result);
|
||||
|
||||
// Crop each detected region
|
||||
std::vector<cv::Mat> img_list;
|
||||
for (const auto& result : ocr_result) {
|
||||
try {
|
||||
cv::Mat crop_img = Utility::GetRotateCropImage(img, result.box);
|
||||
img_list.push_back(crop_img);
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "[PPOCR] Error cropping region: " << e.what() << std::endl;
|
||||
img_list.push_back(cv::Mat()); // Push empty mat to preserve indexing
|
||||
}
|
||||
}
|
||||
|
||||
// Run classifier if available
|
||||
if (this->classifier_) {
|
||||
try {
|
||||
this->classifier_->Run(img_list, ocr_result);
|
||||
for (size_t i = 0; i < img_list.size() && i < ocr_result.size(); ++i) {
|
||||
if (!img_list[i].empty() &&
|
||||
(ocr_result[i].cls_label % 2 == 1) &&
|
||||
(ocr_result[i].cls_score > _cls_thresh)) {
|
||||
cv::rotate(img_list[i], img_list[i], cv::ROTATE_180); // same as rotate(img, img, 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "[PPOCR] Classifier error: " << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Run recognizer
|
||||
try {
|
||||
this->recognizer_->Run(img_list, ocr_result);
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "[PPOCR] Recognizer error: " << e.what() << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "[PPOCR] General exception: " << e.what() << std::endl;
|
||||
}
|
||||
catch (...) {
|
||||
std::cerr << "[PPOCR] Unknown exception occurred!" << std::endl;
|
||||
}
|
||||
|
||||
return ocr_result;
|
||||
}
|
||||
PPOCR::~PPOCR() {
|
||||
if (detector_) detector_.reset();
|
||||
if (classifier_) classifier_.reset();
|
||||
if (recognizer_) recognizer_.reset();
|
||||
}
|
||||
} // namespace PaddleOCR
|
||||
431
modules/ANSOCR/ANSPaddleOCR/src/paddleocr_utility.cpp
Normal file
431
modules/ANSOCR/ANSPaddleOCR/src/paddleocr_utility.cpp
Normal file
@@ -0,0 +1,431 @@
|
||||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <dirent.h>
|
||||
#include <include/paddleocr_utility.h>
|
||||
#include <iostream>
|
||||
#include <ostream>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <direct.h>
|
||||
#else
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
std::vector<std::string> Utility::ReadDict(const std::string& path) {
|
||||
std::ifstream in(path);
|
||||
std::string line;
|
||||
std::vector<std::string> m_vec;
|
||||
if (in) {
|
||||
while (getline(in, line)) {
|
||||
m_vec.push_back(line);
|
||||
}
|
||||
}
|
||||
else {
|
||||
std::cout << "no such label file: " << path << ", exit the program..."
|
||||
<< std::endl;
|
||||
exit(1);
|
||||
}
|
||||
return m_vec;
|
||||
}
|
||||
|
||||
void Utility::VisualizeBboxes(const cv::Mat& srcimg,
|
||||
const std::vector<OCRPredictResult>& ocr_result,
|
||||
const std::string& save_path) {
|
||||
cv::Mat img_vis;
|
||||
srcimg.copyTo(img_vis);
|
||||
for (int n = 0; n < ocr_result.size(); n++) {
|
||||
cv::Point rook_points[4];
|
||||
for (int m = 0; m < ocr_result[n].box.size(); m++) {
|
||||
rook_points[m] =
|
||||
cv::Point(int(ocr_result[n].box[m][0]), int(ocr_result[n].box[m][1]));
|
||||
}
|
||||
|
||||
const cv::Point* ppt[1] = { rook_points };
|
||||
int npt[] = { 4 };
|
||||
cv::polylines(img_vis, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
|
||||
}
|
||||
|
||||
cv::imwrite(save_path, img_vis);
|
||||
std::cout << "The detection visualized image saved in " + save_path
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
void Utility::VisualizeBboxes(const cv::Mat& srcimg,
|
||||
const StructurePredictResult& structure_result,
|
||||
const std::string& save_path) {
|
||||
cv::Mat img_vis;
|
||||
srcimg.copyTo(img_vis);
|
||||
img_vis = crop_image(img_vis, structure_result.box);
|
||||
for (int n = 0; n < structure_result.cell_box.size(); n++) {
|
||||
if (structure_result.cell_box[n].size() == 8) {
|
||||
cv::Point rook_points[4];
|
||||
for (int m = 0; m < structure_result.cell_box[n].size(); m += 2) {
|
||||
rook_points[m / 2] =
|
||||
cv::Point(int(structure_result.cell_box[n][m]),
|
||||
int(structure_result.cell_box[n][m + 1]));
|
||||
}
|
||||
const cv::Point* ppt[1] = { rook_points };
|
||||
int npt[] = { 4 };
|
||||
cv::polylines(img_vis, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
|
||||
}
|
||||
else if (structure_result.cell_box[n].size() == 4) {
|
||||
cv::Point rook_points[2];
|
||||
rook_points[0] = cv::Point(int(structure_result.cell_box[n][0]),
|
||||
int(structure_result.cell_box[n][1]));
|
||||
rook_points[1] = cv::Point(int(structure_result.cell_box[n][2]),
|
||||
int(structure_result.cell_box[n][3]));
|
||||
cv::rectangle(img_vis, rook_points[0], rook_points[1], CV_RGB(0, 255, 0),
|
||||
2, 8, 0);
|
||||
}
|
||||
}
|
||||
|
||||
cv::imwrite(save_path, img_vis);
|
||||
std::cout << "The table visualized image saved in " + save_path << std::endl;
|
||||
}
|
||||
|
||||
// list all files under a directory
|
||||
void Utility::GetAllFiles(const char* dir_name,
|
||||
std::vector<std::string>& all_inputs) {
|
||||
if (NULL == dir_name) {
|
||||
std::cout << " dir_name is null ! " << std::endl;
|
||||
return;
|
||||
}
|
||||
struct stat s;
|
||||
stat(dir_name, &s);
|
||||
if (!S_ISDIR(s.st_mode)) {
|
||||
std::cout << "dir_name is not a valid directory !" << std::endl;
|
||||
all_inputs.push_back(dir_name);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
struct dirent* filename; // return value for readdir()
|
||||
DIR* dir; // return value for opendir()
|
||||
dir = opendir(dir_name);
|
||||
if (NULL == dir) {
|
||||
std::cout << "Can not open dir " << dir_name << std::endl;
|
||||
return;
|
||||
}
|
||||
std::cout << "Successfully opened the dir !" << std::endl;
|
||||
while ((filename = readdir(dir)) != NULL) {
|
||||
if (strcmp(filename->d_name, ".") == 0 ||
|
||||
strcmp(filename->d_name, "..") == 0)
|
||||
continue;
|
||||
// img_dir + std::string("/") + all_inputs[0];
|
||||
all_inputs.push_back(dir_name + std::string("/") +
|
||||
std::string(filename->d_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat Utility::GetRotateCropImage(const cv::Mat& srcimage,
|
||||
std::vector<std::vector<int>> box) {
|
||||
cv::Mat image;
|
||||
srcimage.copyTo(image);
|
||||
std::vector<std::vector<int>> points = box;
|
||||
|
||||
int x_collect[4] = { box[0][0], box[1][0], box[2][0], box[3][0] };
|
||||
int y_collect[4] = { box[0][1], box[1][1], box[2][1], box[3][1] };
|
||||
int left = int(*std::min_element(x_collect, x_collect + 4));
|
||||
int right = int(*std::max_element(x_collect, x_collect + 4));
|
||||
int top = int(*std::min_element(y_collect, y_collect + 4));
|
||||
int bottom = int(*std::max_element(y_collect, y_collect + 4));
|
||||
|
||||
cv::Mat img_crop;
|
||||
image(cv::Rect(left, top, right - left, bottom - top)).copyTo(img_crop);
|
||||
|
||||
for (int i = 0; i < points.size(); i++) {
|
||||
points[i][0] -= left;
|
||||
points[i][1] -= top;
|
||||
}
|
||||
|
||||
int img_crop_width = int(sqrt(pow(points[0][0] - points[1][0], 2) +
|
||||
pow(points[0][1] - points[1][1], 2)));
|
||||
int img_crop_height = int(sqrt(pow(points[0][0] - points[3][0], 2) +
|
||||
pow(points[0][1] - points[3][1], 2)));
|
||||
|
||||
cv::Point2f pts_std[4];
|
||||
pts_std[0] = cv::Point2f(0., 0.);
|
||||
pts_std[1] = cv::Point2f(img_crop_width, 0.);
|
||||
pts_std[2] = cv::Point2f(img_crop_width, img_crop_height);
|
||||
pts_std[3] = cv::Point2f(0.f, img_crop_height);
|
||||
|
||||
cv::Point2f pointsf[4];
|
||||
pointsf[0] = cv::Point2f(points[0][0], points[0][1]);
|
||||
pointsf[1] = cv::Point2f(points[1][0], points[1][1]);
|
||||
pointsf[2] = cv::Point2f(points[2][0], points[2][1]);
|
||||
pointsf[3] = cv::Point2f(points[3][0], points[3][1]);
|
||||
|
||||
cv::Mat M = cv::getPerspectiveTransform(pointsf, pts_std);
|
||||
|
||||
cv::Mat dst_img;
|
||||
cv::warpPerspective(img_crop, dst_img, M,
|
||||
cv::Size(img_crop_width, img_crop_height),
|
||||
cv::BORDER_REPLICATE);
|
||||
|
||||
if (float(dst_img.rows) >= float(dst_img.cols) * 1.5) {
|
||||
cv::Mat srcCopy = cv::Mat(dst_img.rows, dst_img.cols, dst_img.depth());
|
||||
cv::transpose(dst_img, srcCopy);
|
||||
cv::flip(srcCopy, srcCopy, 0);
|
||||
return srcCopy;
|
||||
}
|
||||
else {
|
||||
return dst_img;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> Utility::argsort(const std::vector<float>& array) {
|
||||
const int array_len(array.size());
|
||||
std::vector<int> array_index(array_len, 0);
|
||||
for (int i = 0; i < array_len; ++i)
|
||||
array_index[i] = i;
|
||||
|
||||
std::sort(
|
||||
array_index.begin(), array_index.end(),
|
||||
[&array](int pos1, int pos2) { return (array[pos1] < array[pos2]); });
|
||||
|
||||
return array_index;
|
||||
}
|
||||
|
||||
std::string Utility::basename(const std::string& filename) {
|
||||
if (filename.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
auto len = filename.length();
|
||||
auto index = filename.find_last_of("/\\");
|
||||
|
||||
if (index == std::string::npos) {
|
||||
return filename;
|
||||
}
|
||||
|
||||
if (index + 1 >= len) {
|
||||
|
||||
len--;
|
||||
index = filename.substr(0, len).find_last_of("/\\");
|
||||
|
||||
if (len == 0) {
|
||||
return filename;
|
||||
}
|
||||
|
||||
if (index == 0) {
|
||||
return filename.substr(1, len - 1);
|
||||
}
|
||||
|
||||
if (index == std::string::npos) {
|
||||
return filename.substr(0, len);
|
||||
}
|
||||
|
||||
return filename.substr(index + 1, len - index - 1);
|
||||
}
|
||||
|
||||
return filename.substr(index + 1, len - index);
|
||||
}
|
||||
|
||||
bool Utility::PathExists(const std::string& path) {
|
||||
#ifdef _WIN32
|
||||
struct _stat buffer;
|
||||
return (_stat(path.c_str(), &buffer) == 0);
|
||||
#else
|
||||
struct stat buffer;
|
||||
return (stat(path.c_str(), &buffer) == 0);
|
||||
#endif // !_WIN32
|
||||
}
|
||||
|
||||
void Utility::CreateDir(const std::string& path) {
|
||||
#ifdef _WIN32
|
||||
_mkdir(path.c_str());
|
||||
#else
|
||||
mkdir(path.c_str(), 0777);
|
||||
#endif // !_WIN32
|
||||
}
|
||||
|
||||
void Utility::print_result(const std::vector<OCRPredictResult>& ocr_result) {
|
||||
for (int i = 0; i < ocr_result.size(); i++) {
|
||||
std::cout << i << "\t";
|
||||
// det
|
||||
std::vector<std::vector<int>> boxes = ocr_result[i].box;
|
||||
if (boxes.size() > 0) {
|
||||
std::cout << "det boxes: [";
|
||||
for (int n = 0; n < boxes.size(); n++) {
|
||||
std::cout << '[' << boxes[n][0] << ',' << boxes[n][1] << "]";
|
||||
if (n != boxes.size() - 1) {
|
||||
std::cout << ',';
|
||||
}
|
||||
}
|
||||
std::cout << "] ";
|
||||
}
|
||||
// rec
|
||||
if (ocr_result[i].score != -1.0) {
|
||||
std::cout << "rec text: " << ocr_result[i].text
|
||||
<< " rec score: " << ocr_result[i].score << " ";
|
||||
}
|
||||
|
||||
// cls
|
||||
if (ocr_result[i].cls_label != -1) {
|
||||
std::cout << "cls label: " << ocr_result[i].cls_label
|
||||
<< " cls score: " << ocr_result[i].cls_score;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat Utility::crop_image(cv::Mat& img, const std::vector<int>& box) {
|
||||
cv::Mat crop_im;
|
||||
int crop_x1 = std::max(0, box[0]);
|
||||
int crop_y1 = std::max(0, box[1]);
|
||||
int crop_x2 = std::min(img.cols - 1, box[2] - 1);
|
||||
int crop_y2 = std::min(img.rows - 1, box[3] - 1);
|
||||
|
||||
crop_im = cv::Mat::zeros(box[3] - box[1], box[2] - box[0], 16);
|
||||
cv::Mat crop_im_window =
|
||||
crop_im(cv::Range(crop_y1 - box[1], crop_y2 + 1 - box[1]),
|
||||
cv::Range(crop_x1 - box[0], crop_x2 + 1 - box[0]));
|
||||
cv::Mat roi_img =
|
||||
img(cv::Range(crop_y1, crop_y2 + 1), cv::Range(crop_x1, crop_x2 + 1));
|
||||
crop_im_window += roi_img;
|
||||
return crop_im;
|
||||
}
|
||||
|
||||
cv::Mat Utility::crop_image(cv::Mat& img, const std::vector<float>& box) {
|
||||
std::vector<int> box_int = { (int)box[0], (int)box[1], (int)box[2],
|
||||
(int)box[3] };
|
||||
return crop_image(img, box_int);
|
||||
}
|
||||
|
||||
void Utility::sorted_boxes(std::vector<OCRPredictResult>& ocr_result) {
|
||||
std::sort(ocr_result.begin(), ocr_result.end(), Utility::comparison_box);
|
||||
if (ocr_result.size() > 0) {
|
||||
for (int i = 0; i < ocr_result.size() - 1; i++) {
|
||||
for (int j = i; j > 0; j--) {
|
||||
if (abs(ocr_result[j + 1].box[0][1] - ocr_result[j].box[0][1]) < 10 &&
|
||||
(ocr_result[j + 1].box[0][0] < ocr_result[j].box[0][0])) {
|
||||
std::swap(ocr_result[i], ocr_result[i + 1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> Utility::xyxyxyxy2xyxy(std::vector<std::vector<int>>& box) {
|
||||
int x_collect[4] = { box[0][0], box[1][0], box[2][0], box[3][0] };
|
||||
int y_collect[4] = { box[0][1], box[1][1], box[2][1], box[3][1] };
|
||||
int left = int(*std::min_element(x_collect, x_collect + 4));
|
||||
int right = int(*std::max_element(x_collect, x_collect + 4));
|
||||
int top = int(*std::min_element(y_collect, y_collect + 4));
|
||||
int bottom = int(*std::max_element(y_collect, y_collect + 4));
|
||||
std::vector<int> box1(4, 0);
|
||||
box1[0] = left;
|
||||
box1[1] = top;
|
||||
box1[2] = right;
|
||||
box1[3] = bottom;
|
||||
return box1;
|
||||
}
|
||||
|
||||
std::vector<int> Utility::xyxyxyxy2xyxy(std::vector<int>& box) {
|
||||
int x_collect[4] = { box[0], box[2], box[4], box[6] };
|
||||
int y_collect[4] = { box[1], box[3], box[5], box[7] };
|
||||
int left = int(*std::min_element(x_collect, x_collect + 4));
|
||||
int right = int(*std::max_element(x_collect, x_collect + 4));
|
||||
int top = int(*std::min_element(y_collect, y_collect + 4));
|
||||
int bottom = int(*std::max_element(y_collect, y_collect + 4));
|
||||
std::vector<int> box1(4, 0);
|
||||
box1[0] = left;
|
||||
box1[1] = top;
|
||||
box1[2] = right;
|
||||
box1[3] = bottom;
|
||||
return box1;
|
||||
}
|
||||
|
||||
float Utility::fast_exp(float x) {
|
||||
union {
|
||||
uint32_t i;
|
||||
float f;
|
||||
} v{};
|
||||
v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
|
||||
return v.f;
|
||||
}
|
||||
|
||||
std::vector<float>
|
||||
Utility::activation_function_softmax(std::vector<float>& src) {
|
||||
int length = src.size();
|
||||
std::vector<float> dst;
|
||||
dst.resize(length);
|
||||
const float alpha = float(*std::max_element(&src[0], &src[0 + length]));
|
||||
float denominator{ 0 };
|
||||
|
||||
for (int i = 0; i < length; ++i) {
|
||||
dst[i] = fast_exp(src[i] - alpha);
|
||||
denominator += dst[i];
|
||||
}
|
||||
|
||||
for (int i = 0; i < length; ++i) {
|
||||
dst[i] /= denominator;
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
float Utility::iou(std::vector<int>& box1, std::vector<int>& box2) {
|
||||
int area1 = std::max(0, box1[2] - box1[0]) * std::max(0, box1[3] - box1[1]);
|
||||
int area2 = std::max(0, box2[2] - box2[0]) * std::max(0, box2[3] - box2[1]);
|
||||
|
||||
// computing the sum_area
|
||||
int sum_area = area1 + area2;
|
||||
|
||||
// find the each point of intersect rectangle
|
||||
int x1 = std::max(box1[0], box2[0]);
|
||||
int y1 = std::max(box1[1], box2[1]);
|
||||
int x2 = std::min(box1[2], box2[2]);
|
||||
int y2 = std::min(box1[3], box2[3]);
|
||||
|
||||
// judge if there is an intersect
|
||||
if (y1 >= y2 || x1 >= x2) {
|
||||
return 0.0;
|
||||
}
|
||||
else {
|
||||
int intersect = (x2 - x1) * (y2 - y1);
|
||||
return intersect / (sum_area - intersect + 0.00000001);
|
||||
}
|
||||
}
|
||||
|
||||
float Utility::iou(std::vector<float>& box1, std::vector<float>& box2) {
|
||||
float area1 = std::max((float)0.0, box1[2] - box1[0]) *
|
||||
std::max((float)0.0, box1[3] - box1[1]);
|
||||
float area2 = std::max((float)0.0, box2[2] - box2[0]) *
|
||||
std::max((float)0.0, box2[3] - box2[1]);
|
||||
|
||||
// computing the sum_area
|
||||
float sum_area = area1 + area2;
|
||||
|
||||
// find the each point of intersect rectangle
|
||||
float x1 = std::max(box1[0], box2[0]);
|
||||
float y1 = std::max(box1[1], box2[1]);
|
||||
float x2 = std::min(box1[2], box2[2]);
|
||||
float y2 = std::min(box1[3], box2[3]);
|
||||
|
||||
// judge if there is an intersect
|
||||
if (y1 >= y2 || x1 >= x2) {
|
||||
return 0.0;
|
||||
}
|
||||
else {
|
||||
float intersect = (x2 - x1) * (y2 - y1);
|
||||
return intersect / (sum_area - intersect + 0.00000001);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace PaddleOCR
|
||||
200
modules/ANSOCR/ANSPaddleOCR/src/paddlestructure.cpp
Normal file
200
modules/ANSOCR/ANSPaddleOCR/src/paddlestructure.cpp
Normal file
@@ -0,0 +1,200 @@
|
||||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <include/paddlestructure.h>
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
PaddleStructure::PaddleStructure() {
|
||||
|
||||
};
|
||||
bool PaddleStructure::Initialize(std::string layModelDir, std::string layModelDic, std::string tabModelDir, std::string tabModelDic) {
|
||||
this->layout_model_ = new Layout(layModelDir, layModelDic);
|
||||
this->table_model_ = new Table(tabModelDir, tabModelDic);
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<StructurePredictResult> PaddleStructure::structure(cv::Mat src_img) {
|
||||
std::vector<StructurePredictResult> structure_results;
|
||||
|
||||
this->layout_model_->Run(src_img, structure_results);
|
||||
cv::Mat roi_img;
|
||||
for (int i = 0; i < structure_results.size(); i++) {
|
||||
// crop image
|
||||
roi_img = Utility::crop_image(src_img, structure_results[i].box);
|
||||
if (structure_results[i].type == "table") {
|
||||
std::vector<std::vector<std::string>> structure_html_tags;
|
||||
std::vector<float> structure_scores(1, 0);
|
||||
std::vector<std::vector<std::vector<int>>> structure_boxes;
|
||||
std::vector<cv::Mat> img_list;
|
||||
|
||||
img_list.push_back(roi_img);
|
||||
this->table_model_->Run(img_list, structure_html_tags, structure_scores, structure_boxes);
|
||||
std::vector<OCRPredictResult> ocr_result;
|
||||
std::string html;
|
||||
int expand_pixel = 3;
|
||||
|
||||
for (int j = 0; j < img_list.size(); j++) {
|
||||
this->detector_->Run(img_list[j], ocr_result);
|
||||
// crop image
|
||||
std::vector<cv::Mat> rec_img_list;
|
||||
std::vector<int> ocr_box;
|
||||
for (int k = 0; k < ocr_result.size(); k++) {
|
||||
ocr_box = Utility::xyxyxyxy2xyxy(ocr_result[k].box);
|
||||
ocr_box[0] = std::max(0, ocr_box[0] - expand_pixel);
|
||||
ocr_box[1] = std::max(0, ocr_box[1] - expand_pixel),
|
||||
ocr_box[2] = std::min(img_list[j].cols, ocr_box[2] + expand_pixel);
|
||||
ocr_box[3] = std::min(img_list[j].rows, ocr_box[3] + expand_pixel);
|
||||
|
||||
cv::Mat crop_img = Utility::crop_image(img_list[j], ocr_box);
|
||||
rec_img_list.push_back(crop_img);
|
||||
}
|
||||
// rec
|
||||
this->recognizer_->Run(rec_img_list, ocr_result);
|
||||
// rebuild table
|
||||
html = this->rebuild_table(structure_html_tags[j], structure_boxes[j],
|
||||
ocr_result);
|
||||
structure_results[i].html = html;
|
||||
structure_results[i].cell_box = structure_boxes[j];
|
||||
structure_results[i].html_score = structure_scores[j];
|
||||
}
|
||||
}
|
||||
else {
|
||||
structure_results[i].text_res = ocr(roi_img);
|
||||
}
|
||||
}
|
||||
return structure_results;
|
||||
};
|
||||
|
||||
std::string
|
||||
PaddleStructure::rebuild_table(std::vector<std::string> structure_html_tags,
|
||||
std::vector<std::vector<int>> structure_boxes,
|
||||
std::vector<OCRPredictResult>& ocr_result) {
|
||||
// match text in same cell
|
||||
std::vector<std::vector<std::string>> matched(structure_boxes.size(),
|
||||
std::vector<std::string>());
|
||||
|
||||
std::vector<int> ocr_box;
|
||||
std::vector<int> structure_box;
|
||||
for (int i = 0; i < ocr_result.size(); i++) {
|
||||
ocr_box = Utility::xyxyxyxy2xyxy(ocr_result[i].box);
|
||||
ocr_box[0] -= 1;
|
||||
ocr_box[1] -= 1;
|
||||
ocr_box[2] += 1;
|
||||
ocr_box[3] += 1;
|
||||
std::vector<std::vector<float>> dis_list(structure_boxes.size(),
|
||||
std::vector<float>(3, 100000.0));
|
||||
for (int j = 0; j < structure_boxes.size(); j++) {
|
||||
if (structure_boxes[i].size() == 8) {
|
||||
structure_box = Utility::xyxyxyxy2xyxy(structure_boxes[j]);
|
||||
}
|
||||
else {
|
||||
structure_box = structure_boxes[j];
|
||||
}
|
||||
dis_list[j][0] = this->dis(ocr_box, structure_box);
|
||||
dis_list[j][1] = 1 - Utility::iou(ocr_box, structure_box);
|
||||
dis_list[j][2] = j;
|
||||
}
|
||||
// find min dis idx
|
||||
std::sort(dis_list.begin(), dis_list.end(),
|
||||
PaddleStructure::comparison_dis);
|
||||
matched[dis_list[0][2]].push_back(ocr_result[i].text);
|
||||
}
|
||||
|
||||
// get pred html
|
||||
std::string html_str = "";
|
||||
int td_tag_idx = 0;
|
||||
for (int i = 0; i < structure_html_tags.size(); i++) {
|
||||
if (structure_html_tags[i].find("</td>") != std::string::npos) {
|
||||
if (structure_html_tags[i].find("<td></td>") != std::string::npos) {
|
||||
html_str += "<td>";
|
||||
}
|
||||
if (matched[td_tag_idx].size() > 0) {
|
||||
bool b_with = false;
|
||||
if (matched[td_tag_idx][0].find("<b>") != std::string::npos &&
|
||||
matched[td_tag_idx].size() > 1) {
|
||||
b_with = true;
|
||||
html_str += "<b>";
|
||||
}
|
||||
for (int j = 0; j < matched[td_tag_idx].size(); j++) {
|
||||
std::string content = matched[td_tag_idx][j];
|
||||
if (matched[td_tag_idx].size() > 1) {
|
||||
// remove blank, <b> and </b>
|
||||
if (content.length() > 0 && content.at(0) == ' ') {
|
||||
content = content.substr(0);
|
||||
}
|
||||
if (content.length() > 2 && content.substr(0, 3) == "<b>") {
|
||||
content = content.substr(3);
|
||||
}
|
||||
if (content.length() > 4 &&
|
||||
content.substr(content.length() - 4) == "</b>") {
|
||||
content = content.substr(0, content.length() - 4);
|
||||
}
|
||||
if (content.empty()) {
|
||||
continue;
|
||||
}
|
||||
// add blank
|
||||
if (j != matched[td_tag_idx].size() - 1 &&
|
||||
content.at(content.length() - 1) != ' ') {
|
||||
content += ' ';
|
||||
}
|
||||
}
|
||||
html_str += content;
|
||||
}
|
||||
if (b_with) {
|
||||
html_str += "</b>";
|
||||
}
|
||||
}
|
||||
if (structure_html_tags[i].find("<td></td>") != std::string::npos) {
|
||||
html_str += "</td>";
|
||||
}
|
||||
else {
|
||||
html_str += structure_html_tags[i];
|
||||
}
|
||||
td_tag_idx += 1;
|
||||
}
|
||||
else {
|
||||
html_str += structure_html_tags[i];
|
||||
}
|
||||
}
|
||||
return html_str;
|
||||
}
|
||||
|
||||
float PaddleStructure::dis(std::vector<int>& box1, std::vector<int>& box2) {
|
||||
int x1_1 = box1[0];
|
||||
int y1_1 = box1[1];
|
||||
int x2_1 = box1[2];
|
||||
int y2_1 = box1[3];
|
||||
|
||||
int x1_2 = box2[0];
|
||||
int y1_2 = box2[1];
|
||||
int x2_2 = box2[2];
|
||||
int y2_2 = box2[3];
|
||||
|
||||
float dis =
|
||||
abs(x1_2 - x1_1) + abs(y1_2 - y1_1) + abs(x2_2 - x2_1) + abs(y2_2 - y2_1);
|
||||
float dis_2 = abs(x1_2 - x1_1) + abs(y1_2 - y1_1);
|
||||
float dis_3 = abs(x2_2 - x2_1) + abs(y2_2 - y2_1);
|
||||
return dis + std::min(dis_2, dis_3);
|
||||
}
|
||||
|
||||
PaddleStructure::~PaddleStructure() {
|
||||
if (this->layout_model_ != nullptr) {
|
||||
delete this->layout_model_;
|
||||
}
|
||||
if (this->table_model_ != nullptr) {
|
||||
delete this->table_model_;
|
||||
}
|
||||
};
|
||||
} // namespace PaddleOCR
|
||||
580
modules/ANSOCR/ANSPaddleOCR/src/postprocess_op.cpp
Normal file
580
modules/ANSOCR/ANSPaddleOCR/src/postprocess_op.cpp
Normal file
@@ -0,0 +1,580 @@
|
||||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <include/postprocess_op.h>
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
void DBPostProcessor::GetContourArea(const std::vector<std::vector<float>>& box,
|
||||
float unclip_ratio, float& distance) {
|
||||
int pts_num = 4;
|
||||
float area = 0.0f;
|
||||
float dist = 0.0f;
|
||||
for (int i = 0; i < pts_num; i++) {
|
||||
area += box[i][0] * box[(i + 1) % pts_num][1] -
|
||||
box[i][1] * box[(i + 1) % pts_num][0];
|
||||
dist += sqrtf((box[i][0] - box[(i + 1) % pts_num][0]) *
|
||||
(box[i][0] - box[(i + 1) % pts_num][0]) +
|
||||
(box[i][1] - box[(i + 1) % pts_num][1]) *
|
||||
(box[i][1] - box[(i + 1) % pts_num][1]));
|
||||
}
|
||||
area = fabs(float(area / 2.0));
|
||||
|
||||
distance = area * unclip_ratio / dist;
|
||||
}
|
||||
|
||||
cv::RotatedRect DBPostProcessor::UnClip(std::vector<std::vector<float>> box,
|
||||
const float& unclip_ratio) {
|
||||
float distance = 1.0;
|
||||
|
||||
GetContourArea(box, unclip_ratio, distance);
|
||||
|
||||
ClipperLib::ClipperOffset offset;
|
||||
ClipperLib::Path p;
|
||||
p << ClipperLib::IntPoint(int(box[0][0]), int(box[0][1]))
|
||||
<< ClipperLib::IntPoint(int(box[1][0]), int(box[1][1]))
|
||||
<< ClipperLib::IntPoint(int(box[2][0]), int(box[2][1]))
|
||||
<< ClipperLib::IntPoint(int(box[3][0]), int(box[3][1]));
|
||||
offset.AddPath(p, ClipperLib::jtRound, ClipperLib::etClosedPolygon);
|
||||
|
||||
ClipperLib::Paths soln;
|
||||
offset.Execute(soln, distance);
|
||||
std::vector<cv::Point2f> points;
|
||||
|
||||
for (int j = 0; j < soln.size(); j++) {
|
||||
for (int i = 0; i < soln[soln.size() - 1].size(); i++) {
|
||||
points.emplace_back(soln[j][i].X, soln[j][i].Y);
|
||||
}
|
||||
}
|
||||
cv::RotatedRect res;
|
||||
if (points.size() <= 0) {
|
||||
res = cv::RotatedRect(cv::Point2f(0, 0), cv::Size2f(1, 1), 0);
|
||||
}
|
||||
else {
|
||||
res = cv::minAreaRect(points);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
float** DBPostProcessor::Mat2Vec(cv::Mat mat) {
|
||||
auto** array = new float* [mat.rows];
|
||||
for (int i = 0; i < mat.rows; ++i)
|
||||
array[i] = new float[mat.cols];
|
||||
for (int i = 0; i < mat.rows; ++i) {
|
||||
for (int j = 0; j < mat.cols; ++j) {
|
||||
array[i][j] = mat.at<float>(i, j);
|
||||
}
|
||||
}
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
std::vector<std::vector<int>>
|
||||
DBPostProcessor::OrderPointsClockwise(std::vector<std::vector<int>> pts) {
|
||||
std::vector<std::vector<int>> box = pts;
|
||||
std::sort(box.begin(), box.end(), XsortInt);
|
||||
|
||||
std::vector<std::vector<int>> leftmost = { box[0], box[1] };
|
||||
std::vector<std::vector<int>> rightmost = { box[2], box[3] };
|
||||
|
||||
if (leftmost[0][1] > leftmost[1][1])
|
||||
std::swap(leftmost[0], leftmost[1]);
|
||||
|
||||
if (rightmost[0][1] > rightmost[1][1])
|
||||
std::swap(rightmost[0], rightmost[1]);
|
||||
|
||||
std::vector<std::vector<int>> rect = { leftmost[0], rightmost[0], rightmost[1],
|
||||
leftmost[1] };
|
||||
return rect;
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>> DBPostProcessor::Mat2Vector(cv::Mat mat) {
|
||||
std::vector<std::vector<float>> img_vec;
|
||||
std::vector<float> tmp;
|
||||
|
||||
for (int i = 0; i < mat.rows; ++i) {
|
||||
tmp.clear();
|
||||
for (int j = 0; j < mat.cols; ++j) {
|
||||
tmp.push_back(mat.at<float>(i, j));
|
||||
}
|
||||
img_vec.push_back(tmp);
|
||||
}
|
||||
return img_vec;
|
||||
}
|
||||
|
||||
bool DBPostProcessor::XsortFp32(std::vector<float> a, std::vector<float> b) {
|
||||
if (a[0] != b[0])
|
||||
return a[0] < b[0];
|
||||
return false;
|
||||
}
|
||||
|
||||
bool DBPostProcessor::XsortInt(std::vector<int> a, std::vector<int> b) {
|
||||
if (a[0] != b[0])
|
||||
return a[0] < b[0];
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>>
|
||||
DBPostProcessor::GetMiniBoxes(cv::RotatedRect box, float& ssid) {
|
||||
ssid = std::max(box.size.width, box.size.height);
|
||||
|
||||
cv::Mat points;
|
||||
cv::boxPoints(box, points);
|
||||
|
||||
auto array = Mat2Vector(points);
|
||||
std::sort(array.begin(), array.end(), XsortFp32);
|
||||
|
||||
std::vector<float> idx1 = array[0], idx2 = array[1], idx3 = array[2],
|
||||
idx4 = array[3];
|
||||
if (array[3][1] <= array[2][1]) {
|
||||
idx2 = array[3];
|
||||
idx3 = array[2];
|
||||
}
|
||||
else {
|
||||
idx2 = array[2];
|
||||
idx3 = array[3];
|
||||
}
|
||||
if (array[1][1] <= array[0][1]) {
|
||||
idx1 = array[1];
|
||||
idx4 = array[0];
|
||||
}
|
||||
else {
|
||||
idx1 = array[0];
|
||||
idx4 = array[1];
|
||||
}
|
||||
|
||||
array[0] = idx1;
|
||||
array[1] = idx2;
|
||||
array[2] = idx3;
|
||||
array[3] = idx4;
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
float DBPostProcessor::PolygonScoreAcc(std::vector<cv::Point> contour,
|
||||
cv::Mat pred) {
|
||||
int width = pred.cols;
|
||||
int height = pred.rows;
|
||||
std::vector<float> box_x;
|
||||
std::vector<float> box_y;
|
||||
for (int i = 0; i < contour.size(); ++i) {
|
||||
box_x.push_back(contour[i].x);
|
||||
box_y.push_back(contour[i].y);
|
||||
}
|
||||
|
||||
int xmin =
|
||||
clamp(int(std::floor(*(std::min_element(box_x.begin(), box_x.end())))), 0,
|
||||
width - 1);
|
||||
int xmax =
|
||||
clamp(int(std::ceil(*(std::max_element(box_x.begin(), box_x.end())))), 0,
|
||||
width - 1);
|
||||
int ymin =
|
||||
clamp(int(std::floor(*(std::min_element(box_y.begin(), box_y.end())))), 0,
|
||||
height - 1);
|
||||
int ymax =
|
||||
clamp(int(std::ceil(*(std::max_element(box_y.begin(), box_y.end())))), 0,
|
||||
height - 1);
|
||||
|
||||
cv::Mat mask;
|
||||
mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1);
|
||||
|
||||
cv::Point* rook_point = new cv::Point[contour.size()];
|
||||
|
||||
for (int i = 0; i < contour.size(); ++i) {
|
||||
rook_point[i] = cv::Point(int(box_x[i]) - xmin, int(box_y[i]) - ymin);
|
||||
}
|
||||
const cv::Point* ppt[1] = { rook_point };
|
||||
int npt[] = { int(contour.size()) };
|
||||
|
||||
cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1));
|
||||
|
||||
cv::Mat croppedImg;
|
||||
pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1))
|
||||
.copyTo(croppedImg);
|
||||
float score = cv::mean(croppedImg, mask)[0];
|
||||
|
||||
delete[] rook_point;
|
||||
return score;
|
||||
}
|
||||
|
||||
float DBPostProcessor::BoxScoreFast(std::vector<std::vector<float>> box_array,
|
||||
cv::Mat pred) {
|
||||
auto array = box_array;
|
||||
int width = pred.cols;
|
||||
int height = pred.rows;
|
||||
|
||||
float box_x[4] = { array[0][0], array[1][0], array[2][0], array[3][0] };
|
||||
float box_y[4] = { array[0][1], array[1][1], array[2][1], array[3][1] };
|
||||
|
||||
int xmin = clamp(int(std::floor(*(std::min_element(box_x, box_x + 4)))), 0,
|
||||
width - 1);
|
||||
int xmax = clamp(int(std::ceil(*(std::max_element(box_x, box_x + 4)))), 0,
|
||||
width - 1);
|
||||
int ymin = clamp(int(std::floor(*(std::min_element(box_y, box_y + 4)))), 0,
|
||||
height - 1);
|
||||
int ymax = clamp(int(std::ceil(*(std::max_element(box_y, box_y + 4)))), 0,
|
||||
height - 1);
|
||||
|
||||
cv::Mat mask;
|
||||
mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1);
|
||||
|
||||
cv::Point root_point[4];
|
||||
root_point[0] = cv::Point(int(array[0][0]) - xmin, int(array[0][1]) - ymin);
|
||||
root_point[1] = cv::Point(int(array[1][0]) - xmin, int(array[1][1]) - ymin);
|
||||
root_point[2] = cv::Point(int(array[2][0]) - xmin, int(array[2][1]) - ymin);
|
||||
root_point[3] = cv::Point(int(array[3][0]) - xmin, int(array[3][1]) - ymin);
|
||||
const cv::Point* ppt[1] = { root_point };
|
||||
int npt[] = { 4 };
|
||||
cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1));
|
||||
|
||||
cv::Mat croppedImg;
|
||||
pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1))
|
||||
.copyTo(croppedImg);
|
||||
|
||||
auto score = cv::mean(croppedImg, mask)[0];
|
||||
return score;
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::vector<int>>> DBPostProcessor::BoxesFromBitmap(
|
||||
const cv::Mat pred, const cv::Mat bitmap, const float& box_thresh,
|
||||
const float& det_db_unclip_ratio, const std::string& det_db_score_mode) {
|
||||
const int min_size = 3;
|
||||
const int max_candidates = 1000;
|
||||
|
||||
int width = bitmap.cols;
|
||||
int height = bitmap.rows;
|
||||
|
||||
std::vector<std::vector<cv::Point>> contours;
|
||||
std::vector<cv::Vec4i> hierarchy;
|
||||
|
||||
cv::findContours(bitmap, contours, hierarchy, cv::RETR_LIST,
|
||||
cv::CHAIN_APPROX_SIMPLE);
|
||||
|
||||
int num_contours =
|
||||
contours.size() >= max_candidates ? max_candidates : contours.size();
|
||||
|
||||
std::vector<std::vector<std::vector<int>>> boxes;
|
||||
|
||||
for (int _i = 0; _i < num_contours; _i++) {
|
||||
if (contours[_i].size() <= 2) {
|
||||
continue;
|
||||
}
|
||||
float ssid;
|
||||
cv::RotatedRect box = cv::minAreaRect(contours[_i]);
|
||||
auto array = GetMiniBoxes(box, ssid);
|
||||
|
||||
auto box_for_unclip = array;
|
||||
// end get_mini_box
|
||||
|
||||
if (ssid < min_size) {
|
||||
continue;
|
||||
}
|
||||
|
||||
float score;
|
||||
if (det_db_score_mode == "slow")
|
||||
/* compute using polygon*/
|
||||
score = PolygonScoreAcc(contours[_i], pred);
|
||||
else
|
||||
score = BoxScoreFast(array, pred);
|
||||
|
||||
if (score < box_thresh)
|
||||
continue;
|
||||
|
||||
// start for unclip
|
||||
cv::RotatedRect points = UnClip(box_for_unclip, det_db_unclip_ratio);
|
||||
if (points.size.height < 1.001 && points.size.width < 1.001) {
|
||||
continue;
|
||||
}
|
||||
// end for unclip
|
||||
|
||||
cv::RotatedRect clipbox = points;
|
||||
auto cliparray = GetMiniBoxes(clipbox, ssid);
|
||||
|
||||
if (ssid < min_size + 2)
|
||||
continue;
|
||||
|
||||
int dest_width = pred.cols;
|
||||
int dest_height = pred.rows;
|
||||
std::vector<std::vector<int>> intcliparray;
|
||||
|
||||
for (int num_pt = 0; num_pt < 4; num_pt++) {
|
||||
std::vector<int> a{ int(clampf(roundf(cliparray[num_pt][0] / float(width) *
|
||||
float(dest_width)),
|
||||
0, float(dest_width))),
|
||||
int(clampf(roundf(cliparray[num_pt][1] /
|
||||
float(height) * float(dest_height)),
|
||||
0, float(dest_height))) };
|
||||
intcliparray.push_back(a);
|
||||
}
|
||||
boxes.push_back(intcliparray);
|
||||
|
||||
} // end for
|
||||
return boxes;
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::vector<int>>> DBPostProcessor::FilterTagDetRes(
|
||||
std::vector<std::vector<std::vector<int>>> boxes, float ratio_h,
|
||||
float ratio_w, cv::Mat srcimg) {
|
||||
int oriimg_h = srcimg.rows;
|
||||
int oriimg_w = srcimg.cols;
|
||||
|
||||
std::vector<std::vector<std::vector<int>>> root_points;
|
||||
for (int n = 0; n < boxes.size(); n++) {
|
||||
boxes[n] = OrderPointsClockwise(boxes[n]);
|
||||
for (int m = 0; m < boxes[0].size(); m++) {
|
||||
boxes[n][m][0] /= ratio_w;
|
||||
boxes[n][m][1] /= ratio_h;
|
||||
|
||||
boxes[n][m][0] = int(_min(_max(boxes[n][m][0], 0), oriimg_w - 1));
|
||||
boxes[n][m][1] = int(_min(_max(boxes[n][m][1], 0), oriimg_h - 1));
|
||||
}
|
||||
}
|
||||
|
||||
for (int n = 0; n < boxes.size(); n++) {
|
||||
int rect_width, rect_height;
|
||||
rect_width = int(sqrt(pow(boxes[n][0][0] - boxes[n][1][0], 2) +
|
||||
pow(boxes[n][0][1] - boxes[n][1][1], 2)));
|
||||
rect_height = int(sqrt(pow(boxes[n][0][0] - boxes[n][3][0], 2) +
|
||||
pow(boxes[n][0][1] - boxes[n][3][1], 2)));
|
||||
if (rect_width <= 4 || rect_height <= 4)
|
||||
continue;
|
||||
root_points.push_back(boxes[n]);
|
||||
}
|
||||
return root_points;
|
||||
}
|
||||
|
||||
void TablePostProcessor::init(std::string label_path,
|
||||
bool merge_no_span_structure) {
|
||||
this->label_list_ = Utility::ReadDict(label_path);
|
||||
if (merge_no_span_structure) {
|
||||
this->label_list_.push_back("<td></td>");
|
||||
std::vector<std::string>::iterator it;
|
||||
for (it = this->label_list_.begin(); it != this->label_list_.end();) {
|
||||
if (*it == "<td>") {
|
||||
it = this->label_list_.erase(it);
|
||||
}
|
||||
else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
// add_special_char
|
||||
this->label_list_.insert(this->label_list_.begin(), this->beg);
|
||||
this->label_list_.push_back(this->end);
|
||||
}
|
||||
|
||||
void TablePostProcessor::Run(
|
||||
std::vector<float>& loc_preds, std::vector<float>& structure_probs,
|
||||
std::vector<float>& rec_scores, ov::Shape& loc_preds_shape,
|
||||
ov::Shape& structure_probs_shape,
|
||||
std::vector<std::vector<std::string>>& rec_html_tag_batch,
|
||||
std::vector<std::vector<std::vector<int>>>& rec_boxes_batch,
|
||||
std::vector<int>& width_list, std::vector<int>& height_list) {
|
||||
for (int batch_idx = 0; batch_idx < structure_probs_shape[0]; batch_idx++) {
|
||||
// image tags and boxs
|
||||
std::vector<std::string> rec_html_tags;
|
||||
std::vector<std::vector<int>> rec_boxes;
|
||||
|
||||
float score = 0.f;
|
||||
int count = 0;
|
||||
float char_score = 0.f;
|
||||
int char_idx = 0;
|
||||
|
||||
// step
|
||||
for (int step_idx = 0; step_idx < structure_probs_shape[1]; step_idx++) {
|
||||
std::string html_tag;
|
||||
std::vector<int> rec_box;
|
||||
// html tag
|
||||
int step_start_idx = (batch_idx * structure_probs_shape[1] + step_idx) *
|
||||
structure_probs_shape[2];
|
||||
char_idx = int(Utility::argmax(
|
||||
&structure_probs[step_start_idx],
|
||||
&structure_probs[step_start_idx + structure_probs_shape[2]]));
|
||||
char_score = float(*std::max_element(
|
||||
&structure_probs[step_start_idx],
|
||||
&structure_probs[step_start_idx + structure_probs_shape[2]]));
|
||||
html_tag = this->label_list_[char_idx];
|
||||
|
||||
if (step_idx > 0 && html_tag == this->end) {
|
||||
break;
|
||||
}
|
||||
if (html_tag == this->beg) {
|
||||
continue;
|
||||
}
|
||||
count += 1;
|
||||
score += char_score;
|
||||
rec_html_tags.push_back(html_tag);
|
||||
|
||||
// box
|
||||
if (html_tag == "<td>" || html_tag == "<td" || html_tag == "<td></td>") {
|
||||
for (int point_idx = 0; point_idx < loc_preds_shape[2]; point_idx++) {
|
||||
step_start_idx = (batch_idx * structure_probs_shape[1] + step_idx) *
|
||||
loc_preds_shape[2] +
|
||||
point_idx;
|
||||
float point = loc_preds[step_start_idx];
|
||||
if (point_idx % 2 == 0) {
|
||||
point = int(point * width_list[batch_idx]);
|
||||
}
|
||||
else {
|
||||
point = int(point * height_list[batch_idx]);
|
||||
}
|
||||
rec_box.push_back(point);
|
||||
}
|
||||
rec_boxes.push_back(rec_box);
|
||||
}
|
||||
}
|
||||
score /= count;
|
||||
if (std::isnan(score) || rec_boxes.size() == 0) {
|
||||
score = -1;
|
||||
}
|
||||
rec_scores.push_back(score);
|
||||
rec_boxes_batch.push_back(rec_boxes);
|
||||
rec_html_tag_batch.push_back(rec_html_tags);
|
||||
}
|
||||
}
|
||||
|
||||
void PicodetPostProcessor::init(std::string label_path,
|
||||
const double score_threshold,
|
||||
const double nms_threshold,
|
||||
const std::vector<int>& fpn_stride) {
|
||||
this->label_list_ = Utility::ReadDict(label_path);
|
||||
this->score_threshold_ = score_threshold;
|
||||
this->nms_threshold_ = nms_threshold;
|
||||
this->num_class_ = label_list_.size();
|
||||
this->fpn_stride_ = fpn_stride;
|
||||
}
|
||||
|
||||
void PicodetPostProcessor::Run(std::vector<StructurePredictResult>& results,
|
||||
std::vector<std::vector<float>> outs,
|
||||
std::vector<int> ori_shape,
|
||||
std::vector<int> resize_shape, int reg_max) {
|
||||
int in_h = resize_shape[0];
|
||||
int in_w = resize_shape[1];
|
||||
float scale_factor_h = resize_shape[0] / float(ori_shape[0]);
|
||||
float scale_factor_w = resize_shape[1] / float(ori_shape[1]);
|
||||
|
||||
std::vector<std::vector<StructurePredictResult>> bbox_results;
|
||||
bbox_results.resize(this->num_class_);
|
||||
for (int i = 0; i < this->fpn_stride_.size(); ++i) {
|
||||
int feature_h = std::ceil((float)in_h / this->fpn_stride_[i]);
|
||||
int feature_w = std::ceil((float)in_w / this->fpn_stride_[i]);
|
||||
for (int idx = 0; idx < feature_h * feature_w; idx++) {
|
||||
// score and label
|
||||
float score = 0;
|
||||
int cur_label = 0;
|
||||
for (int label = 0; label < this->num_class_; label++) {
|
||||
if (outs[i][idx * this->num_class_ + label] > score) {
|
||||
score = outs[i][idx * this->num_class_ + label];
|
||||
cur_label = label;
|
||||
}
|
||||
}
|
||||
// bbox
|
||||
if (score > this->score_threshold_) {
|
||||
int row = idx / feature_w;
|
||||
int col = idx % feature_w;
|
||||
std::vector<float> bbox_pred(
|
||||
outs[i + this->fpn_stride_.size()].begin() + idx * 4 * reg_max,
|
||||
outs[i + this->fpn_stride_.size()].begin() +
|
||||
(idx + 1) * 4 * reg_max);
|
||||
bbox_results[cur_label].push_back(
|
||||
this->disPred2Bbox(bbox_pred, cur_label, score, col, row,
|
||||
this->fpn_stride_[i], resize_shape, reg_max));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < bbox_results.size(); i++) {
|
||||
bool flag = bbox_results[i].size() <= 0;
|
||||
}
|
||||
for (int i = 0; i < bbox_results.size(); i++) {
|
||||
bool flag = bbox_results[i].size() <= 0;
|
||||
if (bbox_results[i].size() <= 0) {
|
||||
continue;
|
||||
}
|
||||
this->nms(bbox_results[i], this->nms_threshold_);
|
||||
for (auto box : bbox_results[i]) {
|
||||
box.box[0] = box.box[0] / scale_factor_w;
|
||||
box.box[2] = box.box[2] / scale_factor_w;
|
||||
box.box[1] = box.box[1] / scale_factor_h;
|
||||
box.box[3] = box.box[3] / scale_factor_h;
|
||||
results.push_back(box);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
StructurePredictResult
|
||||
PicodetPostProcessor::disPred2Bbox(std::vector<float> bbox_pred, int label,
|
||||
float score, int x, int y, int stride,
|
||||
std::vector<int> im_shape, int reg_max) {
|
||||
float ct_x = (x + 0.5) * stride;
|
||||
float ct_y = (y + 0.5) * stride;
|
||||
std::vector<float> dis_pred;
|
||||
dis_pred.resize(4);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
float dis = 0;
|
||||
std::vector<float> bbox_pred_i(bbox_pred.begin() + i * reg_max,
|
||||
bbox_pred.begin() + (i + 1) * reg_max);
|
||||
std::vector<float> dis_after_sm =
|
||||
Utility::activation_function_softmax(bbox_pred_i);
|
||||
for (int j = 0; j < reg_max; j++) {
|
||||
dis += j * dis_after_sm[j];
|
||||
}
|
||||
dis *= stride;
|
||||
dis_pred[i] = dis;
|
||||
}
|
||||
|
||||
float xmin = (std::max)(ct_x - dis_pred[0], .0f);
|
||||
float ymin = (std::max)(ct_y - dis_pred[1], .0f);
|
||||
float xmax = (std::min)(ct_x + dis_pred[2], (float)im_shape[1]);
|
||||
float ymax = (std::min)(ct_y + dis_pred[3], (float)im_shape[0]);
|
||||
|
||||
StructurePredictResult result_item;
|
||||
result_item.box = { xmin, ymin, xmax, ymax };
|
||||
result_item.type = this->label_list_[label];
|
||||
result_item.confidence = score;
|
||||
|
||||
return result_item;
|
||||
}
|
||||
|
||||
void PicodetPostProcessor::nms(std::vector<StructurePredictResult>& input_boxes,
|
||||
float nms_threshold) {
|
||||
std::sort(input_boxes.begin(), input_boxes.end(),
|
||||
[](StructurePredictResult a, StructurePredictResult b) {
|
||||
return a.confidence > b.confidence;
|
||||
});
|
||||
std::vector<int> picked(input_boxes.size(), 1);
|
||||
|
||||
for (int i = 0; i < input_boxes.size(); ++i) {
|
||||
if (picked[i] == 0) {
|
||||
continue;
|
||||
}
|
||||
for (int j = i + 1; j < input_boxes.size(); ++j) {
|
||||
if (picked[j] == 0) {
|
||||
continue;
|
||||
}
|
||||
float iou = Utility::iou(input_boxes[i].box, input_boxes[j].box);
|
||||
if (iou > nms_threshold) {
|
||||
picked[j] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::vector<StructurePredictResult> input_boxes_nms;
|
||||
for (int i = 0; i < input_boxes.size(); ++i) {
|
||||
if (picked[i] == 1) {
|
||||
input_boxes_nms.push_back(input_boxes[i]);
|
||||
}
|
||||
}
|
||||
input_boxes = input_boxes_nms;
|
||||
}
|
||||
|
||||
} // namespace PaddleOCR
|
||||
165
modules/ANSOCR/ANSPaddleOCR/src/preprocess_op.cpp
Normal file
165
modules/ANSOCR/ANSPaddleOCR/src/preprocess_op.cpp
Normal file
@@ -0,0 +1,165 @@
|
||||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <include/preprocess_op.h>
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
void Permute::Run(const cv::Mat* im, float* data) {
|
||||
int rh = im->rows;
|
||||
int rw = im->cols;
|
||||
int rc = im->channels();
|
||||
for (int i = 0; i < rc; ++i) {
|
||||
cv::extractChannel(*im, cv::Mat(rh, rw, CV_32FC1, data + i * rh * rw), i);
|
||||
}
|
||||
}
|
||||
|
||||
void PermuteBatch::Run(const std::vector<cv::Mat> imgs, float* data) {
|
||||
for (int j = 0; j < imgs.size(); j++) {
|
||||
int rh = imgs[j].rows;
|
||||
int rw = imgs[j].cols;
|
||||
int rc = imgs[j].channels();
|
||||
for (int i = 0; i < rc; ++i) {
|
||||
cv::extractChannel(
|
||||
imgs[j], cv::Mat(rh, rw, CV_32FC1, data + (j * rc + i) * rh * rw), i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Normalize::Run(cv::Mat* im, const std::vector<float>& mean,
|
||||
const std::vector<float>& scale, const bool is_scale) {
|
||||
double e = 1.0;
|
||||
if (is_scale) {
|
||||
e /= 255.0;
|
||||
}
|
||||
(*im).convertTo(*im, CV_32FC3, e);
|
||||
std::vector<cv::Mat> bgr_channels(3);
|
||||
cv::split(*im, bgr_channels);
|
||||
for (auto i = 0; i < bgr_channels.size(); i++) {
|
||||
bgr_channels[i].convertTo(bgr_channels[i], CV_32FC1, 1.0 * scale[i],
|
||||
(0.0 - mean[i]) * scale[i]);
|
||||
}
|
||||
cv::merge(bgr_channels, *im);
|
||||
}
|
||||
|
||||
void ResizeImgType0::Run(const cv::Mat& img, cv::Mat& resize_img,
|
||||
std::string limit_type, int limit_side_len,
|
||||
float& ratio_h, float& ratio_w) {
|
||||
int w = img.cols;
|
||||
int h = img.rows;
|
||||
float ratio = 1.f;
|
||||
if (limit_type == "min") {
|
||||
int min_wh = std::min(h, w);
|
||||
if (min_wh < limit_side_len) {
|
||||
if (h < w) {
|
||||
ratio = float(limit_side_len) / float(h);
|
||||
}
|
||||
else {
|
||||
ratio = float(limit_side_len) / float(w);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
int max_wh = std::max(h, w);
|
||||
if (max_wh > limit_side_len) {
|
||||
if (h > w) {
|
||||
ratio = float(limit_side_len) / float(h);
|
||||
}
|
||||
else {
|
||||
ratio = float(limit_side_len) / float(w);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int resize_h = int(float(h) * ratio);
|
||||
int resize_w = int(float(w) * ratio);
|
||||
|
||||
resize_h = std::max(int(round(float(resize_h) / 32) * 32), 32);
|
||||
resize_w = std::max(int(round(float(resize_w) / 32) * 32), 32);
|
||||
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
|
||||
ratio_h = float(resize_h) / float(h);
|
||||
ratio_w = float(resize_w) / float(w);
|
||||
}
|
||||
|
||||
void CrnnResizeImg::Run(const cv::Mat& img, cv::Mat& resize_img, float wh_ratio,
|
||||
const std::vector<int>& rec_image_shape) {
|
||||
int imgC, imgH, imgW;
|
||||
imgC = rec_image_shape[0];
|
||||
imgH = rec_image_shape[1];
|
||||
imgW = rec_image_shape[2];
|
||||
|
||||
imgW = int(imgH * wh_ratio);
|
||||
|
||||
float ratio = float(img.cols) / float(img.rows);
|
||||
int resize_w, resize_h;
|
||||
|
||||
if (ceilf(imgH * ratio) > imgW)
|
||||
resize_w = imgW;
|
||||
else
|
||||
resize_w = int(ceilf(imgH * ratio));
|
||||
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
|
||||
cv::INTER_LINEAR);
|
||||
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0,
|
||||
int(imgW - resize_img.cols), cv::BORDER_CONSTANT,
|
||||
{ 127, 127, 127 });
|
||||
}
|
||||
|
||||
void ClsResizeImg::Run(const cv::Mat& img, cv::Mat& resize_img,
|
||||
const std::vector<size_t>& rec_image_shape) {
|
||||
int imgC, imgH, imgW;
|
||||
imgC = rec_image_shape[0];
|
||||
imgH = rec_image_shape[1];
|
||||
imgW = rec_image_shape[2];
|
||||
|
||||
float ratio = float(img.cols) / float(img.rows);
|
||||
int resize_w, resize_h;
|
||||
if (ceilf(imgH * ratio) > imgW)
|
||||
resize_w = imgW;
|
||||
else
|
||||
resize_w = int(ceilf(imgH * ratio));
|
||||
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
|
||||
cv::INTER_LINEAR);
|
||||
}
|
||||
|
||||
void TableResizeImg::Run(const cv::Mat& img, cv::Mat& resize_img,
|
||||
const int max_len) {
|
||||
int w = img.cols;
|
||||
int h = img.rows;
|
||||
|
||||
int max_wh = w >= h ? w : h;
|
||||
float ratio = w >= h ? float(max_len) / float(w) : float(max_len) / float(h);
|
||||
|
||||
int resize_h = int(float(h) * ratio);
|
||||
int resize_w = int(float(w) * ratio);
|
||||
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
|
||||
}
|
||||
|
||||
void TablePadImg::Run(const cv::Mat& img, cv::Mat& resize_img,
|
||||
const int max_len) {
|
||||
int w = img.cols;
|
||||
int h = img.rows;
|
||||
cv::copyMakeBorder(img, resize_img, 0, max_len - h, 0, max_len - w,
|
||||
cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
|
||||
void Resize::Run(const cv::Mat& img, cv::Mat& resize_img, const int h,
|
||||
const int w) {
|
||||
cv::resize(img, resize_img, cv::Size(w, h));
|
||||
}
|
||||
|
||||
} // namespace PaddleOCR
|
||||
172
modules/ANSOCR/ANSPaddleOCR/src/rec.bak
Normal file
172
modules/ANSOCR/ANSPaddleOCR/src/rec.bak
Normal file
@@ -0,0 +1,172 @@
|
||||
#include "include/rec.h"
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
Rec::Rec() {}
|
||||
|
||||
Rec::~Rec() {}
|
||||
|
||||
bool Rec::init(string model_path, const string &label_path)
|
||||
{
|
||||
this->model_path = model_path;
|
||||
this->model = this->core.read_model(this->model_path);
|
||||
// -------- Step 3. Preprocessing API--------
|
||||
ov::preprocess::PrePostProcessor prep(this->model);
|
||||
// Declare section of desired application's input format
|
||||
prep.input().tensor()
|
||||
.set_layout("NHWC")
|
||||
.set_color_format(ov::preprocess::ColorFormat::BGR);
|
||||
// Specify actual model layout
|
||||
prep.input().model()
|
||||
.set_layout("NCHW");
|
||||
prep.input().preprocess()
|
||||
.mean({0.5f, 0.5f, 0.5f})
|
||||
.scale({0.5f, 0.5f, 0.5f});
|
||||
// Dump preprocessor
|
||||
std::cout << "Preprocessor: " << prep << std::endl;
|
||||
this->model = prep.build();
|
||||
this->label_list_ = Utility::ReadDict(label_path);
|
||||
this->label_list_.insert(this->label_list_.begin(),
|
||||
"#"); // blank char for ctc
|
||||
this->label_list_.push_back(" ");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Rec::run(std::vector<cv::Mat> img_list, std::vector<OCRPredictResult> &ocr_results)
|
||||
{
|
||||
std::vector<std::string> rec_texts(img_list.size(), "");
|
||||
std::vector<float> rec_text_scores(img_list.size(), 0);
|
||||
|
||||
int img_num = img_list.size();
|
||||
std::vector<float> width_list;
|
||||
for (int i = 0; i < img_num; i++) {
|
||||
width_list.push_back(float(img_list[i].cols) / img_list[i].rows);
|
||||
}
|
||||
std::vector<int> indices = Utility::argsort(width_list);
|
||||
|
||||
|
||||
|
||||
for (int beg_img_no = 0; beg_img_no < img_num;
|
||||
beg_img_no += this->rec_batch_num_) {
|
||||
int end_img_no = std::min(img_num, beg_img_no + this->rec_batch_num_);
|
||||
int batch_num = end_img_no - beg_img_no;
|
||||
int imgH = this->rec_image_shape_[1];
|
||||
int imgW = this->rec_image_shape_[2];
|
||||
float max_wh_ratio = imgW * 1.0 / imgH;
|
||||
for (int ino = beg_img_no; ino < end_img_no; ino++) {
|
||||
int h = img_list[indices[ino]].rows;
|
||||
int w = img_list[indices[ino]].cols;
|
||||
float wh_ratio = w * 1.0 / h;
|
||||
max_wh_ratio = std::max(max_wh_ratio, wh_ratio);
|
||||
}
|
||||
|
||||
std::vector<cv::Mat> img_batch;
|
||||
std::vector<ov::Tensor> batch_tensors;
|
||||
|
||||
int batch_width = imgW;
|
||||
std::vector<cv::Mat> norm_img_batch;
|
||||
for (int ino = beg_img_no; ino < end_img_no; ino++) {
|
||||
cv::Mat srcimg;
|
||||
img_list[indices[ino]].copyTo(srcimg);
|
||||
cv::Mat resize_img;
|
||||
this->resize_op_.Run(srcimg, resize_img, max_wh_ratio, this->rec_image_shape_);
|
||||
double e = 1.0;
|
||||
e /= 255.0;
|
||||
resize_img.convertTo(resize_img, CV_32FC3, e);
|
||||
|
||||
norm_img_batch.push_back(resize_img);
|
||||
|
||||
// auto input_tensor = ov::Tensor(this->model->input().get_element_type(), {1, imgH, resize_img.cols, 3});
|
||||
// auto input_data = input_tensor.data<float>();
|
||||
// input_data = (float*)resize_img.data;
|
||||
// batch_tensors.push_back(input_tensor);
|
||||
batch_width = max(resize_img.cols, batch_width);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// for (int batch = 0; batch < batch_num; batch++)
|
||||
// {
|
||||
// for (int h = 0; h < imgH; h++)
|
||||
// {
|
||||
// for (int w = 0; w < batch_width; w++)
|
||||
// {
|
||||
// for (int c = 0; c < 3; c++)
|
||||
// {
|
||||
// int index = c + 3*w + 3*batch_width*h + 3*batch_width*imgH*batch;
|
||||
// data[index] = float(norm_img_batch[batch].at<Vec3b>(h, w)[c]);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
this->model->reshape({batch_num, imgH, batch_width,3});
|
||||
// float data[batch_num * 3 * imgH * batch_width];
|
||||
|
||||
|
||||
|
||||
this->rec_model = this->core.compile_model(this->model, "CPU");
|
||||
this->infer_request = this->rec_model.create_infer_request();
|
||||
auto input_port = this->rec_model.input();
|
||||
ov::Tensor input_tensor = this->infer_request.get_input_tensor();
|
||||
|
||||
const size_t batch_size = norm_img_batch.size();
|
||||
|
||||
for (size_t image_id = 0; image_id < norm_img_batch.size(); ++image_id) {
|
||||
const size_t image_size = ov::shape_size(this->model->input().get_shape()) / batch_size;
|
||||
std::memcpy(input_tensor.data<float>() + image_id * image_size, (float*)norm_img_batch[image_id].data, image_size*sizeof(float));
|
||||
}
|
||||
// ov::Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), data);
|
||||
// this->infer_request.set_input_tensor(input_tensor);
|
||||
// -------- Step 7. Start inference --------
|
||||
this->infer_request.infer();
|
||||
|
||||
auto output = this->infer_request.get_output_tensor();
|
||||
const float *out_data = output.data<const float>();
|
||||
|
||||
auto predict_shape = output.get_shape();
|
||||
|
||||
|
||||
// predict_batch is the result of Last FC with softmax
|
||||
for (int m = 0; m < predict_shape[0]; m++) {
|
||||
std::string str_res;
|
||||
int argmax_idx;
|
||||
int last_index = 0;
|
||||
float score = 0.f;
|
||||
int count = 0;
|
||||
float max_value = 0.0f;
|
||||
|
||||
for (int n = 0; n < predict_shape[1]; n++) {
|
||||
// get idx
|
||||
argmax_idx = int(Utility::argmax(
|
||||
&out_data[(m * predict_shape[1] + n) * predict_shape[2]],
|
||||
&out_data[(m * predict_shape[1] + n + 1) * predict_shape[2]]));
|
||||
// get score
|
||||
max_value = float(*std::max_element(
|
||||
&out_data[(m * predict_shape[1] + n) * predict_shape[2]],
|
||||
&out_data[(m * predict_shape[1] + n + 1) * predict_shape[2]]));
|
||||
|
||||
if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) {
|
||||
score += max_value;
|
||||
count += 1;
|
||||
str_res += this->label_list_[argmax_idx];
|
||||
}
|
||||
last_index = argmax_idx;
|
||||
}
|
||||
score /= count;
|
||||
if (std::isnan(score)) {
|
||||
continue;
|
||||
}
|
||||
rec_texts[indices[beg_img_no + m]] = str_res;
|
||||
rec_text_scores[indices[beg_img_no + m]] = score;
|
||||
}
|
||||
}
|
||||
// sort boex from top to bottom, from left to right
|
||||
for (int i = 0; i < rec_texts.size(); i++) {
|
||||
ocr_results[i].text = rec_texts[i];
|
||||
ocr_results[i].score = rec_text_scores[i];
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
69
modules/ANSOCR/ANSPaddleOCR/src/structure_layout.cpp
Normal file
69
modules/ANSOCR/ANSPaddleOCR/src/structure_layout.cpp
Normal file
@@ -0,0 +1,69 @@
|
||||
#include "include/structure_layout.h"
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
Layout::Layout(std::string model_path, std::string layout_dict_path) {
|
||||
ov::Core core;
|
||||
this->model_path = model_path;
|
||||
this->model = core.read_model(this->model_path);
|
||||
this->model->reshape({ 1, 3, this->layout_img_h_, this->layout_img_w_ });
|
||||
|
||||
// preprocessing API
|
||||
ov::preprocess::PrePostProcessor prep(this->model);
|
||||
// declare section of desired application's input format
|
||||
prep.input().tensor().set_layout("NHWC").set_color_format(ov::preprocess::ColorFormat::BGR);
|
||||
// specify actual model layout
|
||||
prep.input().model().set_layout("NCHW");
|
||||
prep.input().preprocess().mean(this->mean_).scale(this->scale_);
|
||||
// dump preprocessor
|
||||
std::cout << "Preprocessor: " << prep << std::endl;
|
||||
this->model = prep.build();
|
||||
this->compiled_model = core.compile_model(this->model, "CPU");
|
||||
this->infer_request = this->compiled_model.create_infer_request();
|
||||
|
||||
this->post_processor_.init(layout_dict_path, this->layout_score_threshold,
|
||||
this->layout_nms_threshold);
|
||||
}
|
||||
|
||||
void Layout::Run(cv::Mat& src_img, std::vector<StructurePredictResult>& structure_result) {
|
||||
this->src_img = src_img;
|
||||
this->resize_op_.Run(this->src_img, this->resize_img, this->layout_img_h_, this->layout_img_w_);
|
||||
std::vector<std::vector<std::vector<int>>> boxes;
|
||||
auto input_port = this->compiled_model.input();
|
||||
|
||||
// -------- set input --------
|
||||
this->resize_img.convertTo(this->resize_img, CV_32FC3, e);
|
||||
ov::Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), (float*)this->resize_img.data);
|
||||
this->infer_request.set_input_tensor(input_tensor);
|
||||
// -------- start inference --------
|
||||
this->infer_request.infer();
|
||||
|
||||
std::vector<std::vector<float>> out_tensor_list;
|
||||
std::vector<ov::Shape> output_shape_list;
|
||||
for (int j = 0; j < (this->model->outputs()).size(); j++) {
|
||||
auto output = this->infer_request.get_output_tensor(j);
|
||||
auto output_shape = output.get_shape();
|
||||
int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
|
||||
std::multiplies<int>());
|
||||
output_shape_list.push_back(output_shape);
|
||||
|
||||
const float* out_data = output.data<const float>();
|
||||
std::vector<float> out_tensor(out_data, out_data + out_num);
|
||||
out_tensor_list.push_back(out_tensor);
|
||||
}
|
||||
|
||||
std::vector<int> bbox_num;
|
||||
int reg_max = 0;
|
||||
for (int i = 0; i < out_tensor_list.size(); i++) {
|
||||
if (i == this->post_processor_.fpn_stride_.size()) {
|
||||
reg_max = output_shape_list[i][2] / 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
std::vector<int> ori_shape = { this->src_img.rows, this->src_img.cols };
|
||||
std::vector<int> resize_shape = { this->resize_img.rows, this->resize_img.cols };
|
||||
this->post_processor_.Run(structure_result, out_tensor_list, ori_shape, resize_shape,
|
||||
reg_max);
|
||||
bbox_num.push_back(structure_result.size());
|
||||
}
|
||||
}
|
||||
96
modules/ANSOCR/ANSPaddleOCR/src/structure_table.cpp
Normal file
96
modules/ANSOCR/ANSPaddleOCR/src/structure_table.cpp
Normal file
@@ -0,0 +1,96 @@
|
||||
#include "include/structure_table.h"
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
Table::Table(std::string model_path, const std::string table_char_dict_path) {
|
||||
ov::Core core;
|
||||
this->model_path = model_path;
|
||||
this->model = core.read_model(this->model_path);
|
||||
// reshape the model for dynamic batch size and sentence width
|
||||
this->model->reshape({ {ov::Dimension(1, this->table_batch_num_), 3, this->table_max_len_, this->table_max_len_} });
|
||||
this->compiled_model = core.compile_model(this->model, "CPU");
|
||||
this->infer_request = this->compiled_model.create_infer_request();
|
||||
this->post_processor_.init(table_char_dict_path, false);
|
||||
}
|
||||
|
||||
void Table::Run(std::vector<cv::Mat> img_list,
|
||||
std::vector<std::vector<std::string>>& structure_html_tags,
|
||||
std::vector<float>& structure_scores,
|
||||
std::vector<std::vector<std::vector<int>>>& structure_boxes) {
|
||||
int img_num = img_list.size();
|
||||
for (int beg_img_no = 0; beg_img_no < img_num;
|
||||
beg_img_no += this->table_batch_num_) {
|
||||
// preprocess
|
||||
auto preprocess_start = std::chrono::steady_clock::now();
|
||||
int end_img_no = std::min(img_num, beg_img_no + this->table_batch_num_);
|
||||
size_t batch_num = end_img_no - beg_img_no;
|
||||
std::vector<cv::Mat> norm_img_batch;
|
||||
std::vector<int> width_list;
|
||||
std::vector<int> height_list;
|
||||
for (int ino = beg_img_no; ino < end_img_no; ino++) {
|
||||
cv::Mat srcimg;
|
||||
img_list[ino].copyTo(srcimg);
|
||||
cv::Mat resize_img;
|
||||
cv::Mat pad_img;
|
||||
this->resize_op_.Run(srcimg, resize_img, this->table_max_len_);
|
||||
this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
|
||||
this->is_scale_);
|
||||
this->pad_op_.Run(resize_img, pad_img, this->table_max_len_);
|
||||
norm_img_batch.push_back(pad_img);
|
||||
width_list.push_back(srcimg.cols);
|
||||
height_list.push_back(srcimg.rows);
|
||||
}
|
||||
|
||||
size_t tableMaxLen = this->table_max_len_;
|
||||
|
||||
std::vector<float> input(batch_num * 3 * this->table_max_len_ * this->table_max_len_, 0.0f);
|
||||
ov::Shape intput_shape = { batch_num, 3, tableMaxLen, tableMaxLen };
|
||||
this->permute_op_.Run(norm_img_batch, input.data());
|
||||
// inference.
|
||||
auto input_port = this->compiled_model.input();
|
||||
ov::Tensor input_tensor(input_port.get_element_type(), intput_shape, input.data());
|
||||
this->infer_request.set_input_tensor(input_tensor);
|
||||
// start inference
|
||||
this->infer_request.infer();
|
||||
|
||||
auto output0 = this->infer_request.get_output_tensor(0);
|
||||
const float* out_data0 = output0.data<const float>();
|
||||
auto predict_shape0 = output0.get_shape();
|
||||
auto output1 = this->infer_request.get_output_tensor(1);
|
||||
const float* out_data1 = output1.data<const float>();
|
||||
auto predict_shape1 = output1.get_shape();
|
||||
|
||||
int out_num0 = std::accumulate(predict_shape0.begin(), predict_shape0.end(),
|
||||
1, std::multiplies<int>());
|
||||
int out_num1 = std::accumulate(predict_shape1.begin(), predict_shape1.end(),
|
||||
1, std::multiplies<int>());
|
||||
|
||||
std::vector<float> loc_preds(out_data0, out_data0 + out_num0);
|
||||
std::vector<float> structure_probs(out_data1, out_data1 + out_num1);
|
||||
|
||||
// postprocess
|
||||
std::vector<std::vector<std::string>> structure_html_tag_batch;
|
||||
std::vector<float> structure_score_batch;
|
||||
std::vector<std::vector<std::vector<int>>> structure_boxes_batch;
|
||||
this->post_processor_.Run(loc_preds, structure_probs, structure_score_batch,
|
||||
predict_shape0, predict_shape1,
|
||||
structure_html_tag_batch, structure_boxes_batch,
|
||||
width_list, height_list);
|
||||
for (int m = 0; m < predict_shape0[0]; m++) {
|
||||
|
||||
structure_html_tag_batch[m].insert(structure_html_tag_batch[m].begin(),
|
||||
"<table>");
|
||||
structure_html_tag_batch[m].insert(structure_html_tag_batch[m].begin(),
|
||||
"<body>");
|
||||
structure_html_tag_batch[m].insert(structure_html_tag_batch[m].begin(),
|
||||
"<html>");
|
||||
structure_html_tag_batch[m].push_back("</table>");
|
||||
structure_html_tag_batch[m].push_back("</body>");
|
||||
structure_html_tag_batch[m].push_back("</html>");
|
||||
structure_html_tags.push_back(structure_html_tag_batch[m]);
|
||||
structure_scores.push_back(structure_score_batch[m]);
|
||||
structure_boxes.push_back(structure_boxes_batch[m]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
151
modules/ANSOCR/ANSRTOCR/PaddleOCRV5RTEngine.cpp
Normal file
151
modules/ANSOCR/ANSRTOCR/PaddleOCRV5RTEngine.cpp
Normal file
@@ -0,0 +1,151 @@
|
||||
#include "PaddleOCRV5RTEngine.h"
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <iostream>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace rtocr {
|
||||
|
||||
bool PaddleOCRV5RTEngine::Initialize(const std::string& detModelPath,
|
||||
const std::string& clsModelPath,
|
||||
const std::string& recModelPath,
|
||||
const std::string& dictPath,
|
||||
int gpuId,
|
||||
const std::string& engineCacheDir) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
|
||||
gpuId_ = gpuId;
|
||||
if (!engineCacheDir.empty()) {
|
||||
engineCacheDir_ = engineCacheDir;
|
||||
}
|
||||
|
||||
try {
|
||||
// 1. Initialize detector
|
||||
detector_ = std::make_unique<RTOCRDetector>();
|
||||
if (!detector_->Initialize(detModelPath, gpuId_, engineCacheDir_, detMaxSideLen_)) {
|
||||
std::cerr << "[PaddleOCRV5RTEngine] Failed to initialize detector" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// 2. Initialize classifier (optional - only if path provided)
|
||||
if (!clsModelPath.empty()) {
|
||||
classifier_ = std::make_unique<RTOCRClassifier>();
|
||||
if (!classifier_->Initialize(clsModelPath, gpuId_, engineCacheDir_)) {
|
||||
std::cerr << "[PaddleOCRV5RTEngine] Warning: Failed to initialize classifier, skipping"
|
||||
<< std::endl;
|
||||
classifier_.reset();
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Initialize recognizer
|
||||
recognizer_ = std::make_unique<RTOCRRecognizer>();
|
||||
recognizer_->SetRecImageHeight(recImgH_);
|
||||
recognizer_->SetRecImageMaxWidth(recImgMaxW_);
|
||||
if (!recognizer_->Initialize(recModelPath, dictPath, gpuId_, engineCacheDir_)) {
|
||||
std::cerr << "[PaddleOCRV5RTEngine] Failed to initialize recognizer" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::cout << "[PaddleOCRV5RTEngine] Initialized successfully"
|
||||
<< " (detector: yes, classifier: " << (classifier_ ? "yes" : "no")
|
||||
<< ", recognizer: yes)" << std::endl;
|
||||
return true;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "[PaddleOCRV5RTEngine] Initialize failed: " << e.what() << std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<OCRPredictResult> PaddleOCRV5RTEngine::ocr(const cv::Mat& image) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
std::vector<OCRPredictResult> results;
|
||||
|
||||
if (!detector_ || !recognizer_ || image.empty()) return results;
|
||||
|
||||
try {
|
||||
// 1. Detection: find text boxes
|
||||
std::vector<TextBox> textBoxes = detector_->Detect(
|
||||
image, detMaxSideLen_, detDbThresh_, detBoxThresh_,
|
||||
detUnclipRatio_, useDilation_);
|
||||
|
||||
if (textBoxes.empty()) return results;
|
||||
|
||||
// 2. Crop text regions
|
||||
std::vector<cv::Mat> croppedImages;
|
||||
croppedImages.reserve(textBoxes.size());
|
||||
|
||||
for (size_t i = 0; i < textBoxes.size(); i++) {
|
||||
cv::Mat cropped = GetRotateCropImage(image, textBoxes[i]);
|
||||
if (cropped.empty()) continue;
|
||||
croppedImages.push_back(cropped);
|
||||
}
|
||||
|
||||
if (croppedImages.size() != textBoxes.size()) {
|
||||
// Some crops failed, rebuild aligned arrays
|
||||
std::vector<TextBox> validBoxes;
|
||||
std::vector<cv::Mat> validCrops;
|
||||
for (size_t i = 0; i < textBoxes.size(); i++) {
|
||||
cv::Mat cropped = GetRotateCropImage(image, textBoxes[i]);
|
||||
if (!cropped.empty()) {
|
||||
validBoxes.push_back(textBoxes[i]);
|
||||
validCrops.push_back(cropped);
|
||||
}
|
||||
}
|
||||
textBoxes = validBoxes;
|
||||
croppedImages = validCrops;
|
||||
}
|
||||
|
||||
// 3. Classification (optional): check orientation and rotate if needed
|
||||
std::vector<int> clsLabels(croppedImages.size(), 0);
|
||||
std::vector<float> clsScores(croppedImages.size(), 0.0f);
|
||||
|
||||
if (classifier_) {
|
||||
auto clsResults = classifier_->Classify(croppedImages, clsThresh_);
|
||||
for (size_t i = 0; i < clsResults.size() && i < croppedImages.size(); i++) {
|
||||
clsLabels[i] = clsResults[i].first;
|
||||
clsScores[i] = clsResults[i].second;
|
||||
|
||||
// Rotate 180 degrees if label is odd and confidence is high enough
|
||||
if (clsLabels[i] % 2 == 1 && clsScores[i] > clsThresh_) {
|
||||
cv::rotate(croppedImages[i], croppedImages[i], cv::ROTATE_180);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Recognition: extract text from cropped images
|
||||
std::vector<TextLine> textLines = recognizer_->RecognizeBatch(croppedImages);
|
||||
|
||||
// 5. Combine results
|
||||
results.reserve(textBoxes.size());
|
||||
for (size_t i = 0; i < textBoxes.size(); i++) {
|
||||
OCRPredictResult res;
|
||||
|
||||
// Convert box to [[x,y], ...] format
|
||||
for (int j = 0; j < 4; j++) {
|
||||
res.box.push_back({
|
||||
static_cast<int>(textBoxes[i].points[j].x),
|
||||
static_cast<int>(textBoxes[i].points[j].y)
|
||||
});
|
||||
}
|
||||
|
||||
if (i < textLines.size()) {
|
||||
res.text = textLines[i].text;
|
||||
res.score = textLines[i].score;
|
||||
}
|
||||
|
||||
res.cls_label = clsLabels[i];
|
||||
res.cls_score = clsScores[i];
|
||||
|
||||
results.push_back(res);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "[PaddleOCRV5RTEngine] OCR failed: " << e.what() << std::endl;
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace rtocr
|
||||
} // namespace ANSCENTER
|
||||
67
modules/ANSOCR/ANSRTOCR/PaddleOCRV5RTEngine.h
Normal file
67
modules/ANSOCR/ANSRTOCR/PaddleOCRV5RTEngine.h
Normal file
@@ -0,0 +1,67 @@
|
||||
#pragma once
|
||||
|
||||
#include "RTOCRTypes.h"
|
||||
#include "RTOCRDetector.h"
|
||||
#include "RTOCRClassifier.h"
|
||||
#include "RTOCRRecognizer.h"
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "ANSLicense.h"
|
||||
namespace ANSCENTER {
|
||||
namespace rtocr {
|
||||
|
||||
class PaddleOCRV5RTEngine {
|
||||
public:
|
||||
PaddleOCRV5RTEngine() = default;
|
||||
~PaddleOCRV5RTEngine() = default;
|
||||
PaddleOCRV5RTEngine(const PaddleOCRV5RTEngine&) = delete;
|
||||
PaddleOCRV5RTEngine& operator=(const PaddleOCRV5RTEngine&) = delete;
|
||||
|
||||
// Initialize all components
|
||||
// clsModelPath can be empty to skip classifier
|
||||
bool Initialize(const std::string& detModelPath,
|
||||
const std::string& clsModelPath,
|
||||
const std::string& recModelPath,
|
||||
const std::string& dictPath,
|
||||
int gpuId = 0,
|
||||
const std::string& engineCacheDir = "");
|
||||
|
||||
// Run full OCR pipeline: detect → crop → [classify →] recognize
|
||||
std::vector<OCRPredictResult> ocr(const cv::Mat& image);
|
||||
|
||||
// Configuration setters
|
||||
void SetDetMaxSideLen(int v) { detMaxSideLen_ = v; }
|
||||
void SetDetDbThresh(float v) { detDbThresh_ = v; }
|
||||
void SetDetBoxThresh(float v) { detBoxThresh_ = v; }
|
||||
void SetDetUnclipRatio(float v) { detUnclipRatio_ = v; }
|
||||
void SetClsThresh(float v) { clsThresh_ = v; }
|
||||
void SetUseDilation(bool v) { useDilation_ = v; }
|
||||
void SetRecImageHeight(int v) { recImgH_ = v; }
|
||||
void SetRecImageMaxWidth(int v) { recImgMaxW_ = v; }
|
||||
void SetGpuId(int v) { gpuId_ = v; }
|
||||
void SetEngineCacheDir(const std::string& v) { engineCacheDir_ = v; }
|
||||
|
||||
private:
|
||||
std::unique_ptr<RTOCRDetector> detector_;
|
||||
std::unique_ptr<RTOCRClassifier> classifier_; // optional
|
||||
std::unique_ptr<RTOCRRecognizer> recognizer_;
|
||||
|
||||
// Configuration
|
||||
int detMaxSideLen_ = kDetMaxSideLen;
|
||||
float detDbThresh_ = kDetDbThresh;
|
||||
float detBoxThresh_ = kDetBoxThresh;
|
||||
float detUnclipRatio_ = kDetUnclipRatio;
|
||||
float clsThresh_ = kClsThresh;
|
||||
bool useDilation_ = false;
|
||||
int recImgH_ = kRecImgH;
|
||||
int recImgMaxW_ = kRecImgMaxW;
|
||||
int gpuId_ = 0;
|
||||
std::string engineCacheDir_;
|
||||
|
||||
std::recursive_mutex _mutex;
|
||||
};
|
||||
|
||||
} // namespace rtocr
|
||||
} // namespace ANSCENTER
|
||||
143
modules/ANSOCR/ANSRTOCR/RTOCRClassifier.cpp
Normal file
143
modules/ANSOCR/ANSRTOCR/RTOCRClassifier.cpp
Normal file
@@ -0,0 +1,143 @@
|
||||
#include "RTOCRClassifier.h"
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/cudaimgproc.hpp>
|
||||
#include <opencv2/cudawarping.hpp>
|
||||
#include <opencv2/cudaarithm.hpp>
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace rtocr {
|
||||
|
||||
bool RTOCRClassifier::Initialize(const std::string& onnxPath, int gpuId,
|
||||
const std::string& engineCacheDir) {
|
||||
try {
|
||||
ANSCENTER::Options options;
|
||||
options.deviceIndex = gpuId;
|
||||
options.precision = ANSCENTER::Precision::FP16;
|
||||
options.maxBatchSize = 1;
|
||||
options.optBatchSize = 1;
|
||||
|
||||
// Fixed input size for classifier
|
||||
options.minInputHeight = kClsImageH;
|
||||
options.optInputHeight = kClsImageH;
|
||||
options.maxInputHeight = kClsImageH;
|
||||
options.minInputWidth = kClsImageW;
|
||||
options.optInputWidth = kClsImageW;
|
||||
options.maxInputWidth = kClsImageW;
|
||||
|
||||
if (!engineCacheDir.empty()) {
|
||||
options.engineFileDir = engineCacheDir;
|
||||
}
|
||||
else {
|
||||
auto pos = onnxPath.find_last_of("/\\");
|
||||
options.engineFileDir = (pos != std::string::npos) ? onnxPath.substr(0, pos) : ".";
|
||||
}
|
||||
|
||||
m_poolKey = { onnxPath,
|
||||
static_cast<int>(options.precision),
|
||||
options.maxBatchSize };
|
||||
m_engine = EnginePoolManager<float>::instance().acquire(
|
||||
m_poolKey, options, onnxPath,
|
||||
kClsSubVals, kClsDivVals, true, -1);
|
||||
m_usingSharedPool = (m_engine != nullptr);
|
||||
|
||||
if (!m_engine) {
|
||||
std::cerr << "[RTOCRClassifier] Failed to build/load TRT engine: " << onnxPath << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::cout << "[RTOCRClassifier] Initialized TRT engine from: " << onnxPath << std::endl;
|
||||
return true;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "[RTOCRClassifier] Initialize failed: " << e.what() << std::endl;
|
||||
m_engine.reset();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::pair<int, float>> RTOCRClassifier::Classify(
|
||||
const std::vector<cv::Mat>& images, float clsThresh) {
|
||||
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
std::vector<std::pair<int, float>> results;
|
||||
|
||||
if (!m_engine || images.empty()) return results;
|
||||
results.reserve(images.size());
|
||||
|
||||
for (size_t i = 0; i < images.size(); i++) {
|
||||
try {
|
||||
if (images[i].empty()) {
|
||||
results.push_back({ 0, 0.0f });
|
||||
continue;
|
||||
}
|
||||
|
||||
// Preprocess: direct resize to 80x160 (PP-LCNet_x1_0_textline_ori)
|
||||
// No aspect ratio preservation — matches PaddleOCR official ResizeImage
|
||||
cv::Mat resized;
|
||||
cv::resize(images[i], resized, cv::Size(kClsImageW, kClsImageH));
|
||||
|
||||
// Upload to GPU (keep BGR order - PaddleOCR official does NOT convert BGR→RGB)
|
||||
cv::cuda::GpuMat gpuImg;
|
||||
gpuImg.upload(resized);
|
||||
|
||||
// Run inference
|
||||
std::vector<std::vector<cv::cuda::GpuMat>> inputs = { { gpuImg } };
|
||||
std::vector<std::vector<std::vector<float>>> featureVectors;
|
||||
|
||||
if (!m_engine->runInference(inputs, featureVectors)) {
|
||||
results.push_back({ 0, 0.0f });
|
||||
continue;
|
||||
}
|
||||
|
||||
if (featureVectors.empty() || featureVectors[0].empty() ||
|
||||
featureVectors[0][0].empty()) {
|
||||
results.push_back({ 0, 0.0f });
|
||||
continue;
|
||||
}
|
||||
|
||||
// Find argmax and use raw output value as score
|
||||
// PaddleOCR v5 models include softmax, so output values are probabilities
|
||||
// Matches PaddleOCR official: score = preds[i, argmax_idx]
|
||||
const std::vector<float>& output = featureVectors[0][0];
|
||||
int numClasses = static_cast<int>(output.size());
|
||||
|
||||
int bestIdx = 0;
|
||||
float bestScore = output[0];
|
||||
for (int c = 1; c < numClasses; c++) {
|
||||
if (output[c] > bestScore) {
|
||||
bestScore = output[c];
|
||||
bestIdx = c;
|
||||
}
|
||||
}
|
||||
|
||||
results.push_back({ bestIdx, bestScore });
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "[RTOCRClassifier] Classify failed for image " << i
|
||||
<< ": " << e.what() << std::endl;
|
||||
results.push_back({ 0, 0.0f });
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
RTOCRClassifier::~RTOCRClassifier() {
|
||||
try {
|
||||
if (m_usingSharedPool) {
|
||||
EnginePoolManager<float>::instance().release(m_poolKey);
|
||||
m_engine.reset();
|
||||
m_usingSharedPool = false;
|
||||
}
|
||||
else if (m_engine) {
|
||||
m_engine.reset();
|
||||
}
|
||||
}
|
||||
catch (...) {}
|
||||
}
|
||||
|
||||
} // namespace rtocr
|
||||
} // namespace ANSCENTER
|
||||
36
modules/ANSOCR/ANSRTOCR/RTOCRClassifier.h
Normal file
36
modules/ANSOCR/ANSRTOCR/RTOCRClassifier.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#pragma once
|
||||
|
||||
#include "RTOCRTypes.h"
|
||||
#include "engine.h"
|
||||
#include "engine/EnginePoolManager.h"
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace rtocr {
|
||||
|
||||
class RTOCRClassifier {
|
||||
public:
|
||||
RTOCRClassifier() = default;
|
||||
~RTOCRClassifier();
|
||||
RTOCRClassifier(const RTOCRClassifier&) = delete;
|
||||
RTOCRClassifier& operator=(const RTOCRClassifier&) = delete;
|
||||
|
||||
bool Initialize(const std::string& onnxPath, int gpuId = 0,
|
||||
const std::string& engineCacheDir = "");
|
||||
|
||||
// Classify a batch of text images
|
||||
// Returns vector of (cls_label, cls_score) per image
|
||||
// cls_label: 0 = normal, 1 = rotated 180 degrees
|
||||
std::vector<std::pair<int, float>> Classify(
|
||||
const std::vector<cv::Mat>& images, float clsThresh = kClsThresh);
|
||||
|
||||
private:
|
||||
std::shared_ptr<Engine<float>> m_engine = nullptr;
|
||||
EnginePoolManager<float>::PoolKey m_poolKey;
|
||||
bool m_usingSharedPool = false;
|
||||
std::mutex _mutex;
|
||||
};
|
||||
|
||||
} // namespace rtocr
|
||||
} // namespace ANSCENTER
|
||||
403
modules/ANSOCR/ANSRTOCR/RTOCRDetector.cpp
Normal file
403
modules/ANSOCR/ANSRTOCR/RTOCRDetector.cpp
Normal file
@@ -0,0 +1,403 @@
|
||||
#include "RTOCRDetector.h"
|
||||
#include "include/clipper.h"
|
||||
#include "NV12PreprocessHelper.h"
|
||||
#include "ANSGpuFrameRegistry.h"
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
// NV12→BGR fused resize via NV12PreprocessHelper (linked from ANSODEngine.dll)
|
||||
#include <opencv2/cudaimgproc.hpp>
|
||||
#include <opencv2/cudawarping.hpp>
|
||||
#include <opencv2/cudaarithm.hpp>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace rtocr {
|
||||
|
||||
bool RTOCRDetector::Initialize(const std::string& onnxPath, int gpuId,
|
||||
const std::string& engineCacheDir,
|
||||
int maxSideLen) {
|
||||
// Engine cache directory
|
||||
std::string cacheDir;
|
||||
if (!engineCacheDir.empty()) {
|
||||
cacheDir = engineCacheDir;
|
||||
} else {
|
||||
auto pos = onnxPath.find_last_of("/\\");
|
||||
cacheDir = (pos != std::string::npos) ? onnxPath.substr(0, pos) : ".";
|
||||
}
|
||||
|
||||
try {
|
||||
ANSCENTER::Options options;
|
||||
options.deviceIndex = gpuId;
|
||||
// FP32 required for detection: this CNN (DBNet) produces NaN in FP16.
|
||||
// The model has 142 Convolution + 87 Scale (fused BatchNorm) layers whose
|
||||
// intermediate values overflow FP16 range (65504). Mixed precision
|
||||
// (forcing only Sigmoid/Softmax to FP32) is insufficient because the NaN
|
||||
// originates deep in the conv->scale->relu backbone before reaching those layers.
|
||||
// Classifier and recognizer remain FP16 with mixed precision -- only the
|
||||
// detector needs full FP32.
|
||||
options.precision = ANSCENTER::Precision::FP32;
|
||||
options.maxBatchSize = 1;
|
||||
options.optBatchSize = 1;
|
||||
|
||||
// Dynamic spatial dimensions for detection (multiples of 32)
|
||||
options.minInputHeight = 32;
|
||||
options.minInputWidth = 32;
|
||||
options.optInputHeight = std::min(640, maxSideLen);
|
||||
options.optInputWidth = std::min(640, maxSideLen);
|
||||
options.maxInputHeight = maxSideLen;
|
||||
options.maxInputWidth = maxSideLen;
|
||||
options.engineFileDir = cacheDir;
|
||||
|
||||
m_poolKey = { onnxPath,
|
||||
static_cast<int>(options.precision),
|
||||
options.maxBatchSize };
|
||||
m_engine = EnginePoolManager<float>::instance().acquire(
|
||||
m_poolKey, options, onnxPath,
|
||||
kDetSubVals, kDetDivVals, true, -1);
|
||||
m_usingSharedPool = (m_engine != nullptr);
|
||||
|
||||
if (!m_engine) {
|
||||
std::cerr << "[RTOCRDetector] Failed to build/load TRT engine for: "
|
||||
<< onnxPath << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Query actual profile max from the loaded engine
|
||||
int profMaxH = m_engine->getProfileMaxHeight();
|
||||
int profMaxW = m_engine->getProfileMaxWidth();
|
||||
if (profMaxH > 0 && profMaxW > 0) {
|
||||
m_engineMaxSideLen = std::min(profMaxH, profMaxW);
|
||||
} else {
|
||||
m_engineMaxSideLen = maxSideLen;
|
||||
}
|
||||
|
||||
if (m_engineMaxSideLen < maxSideLen) {
|
||||
std::cout << "[RTOCRDetector] Engine built with max " << m_engineMaxSideLen
|
||||
<< "x" << m_engineMaxSideLen << " (requested " << maxSideLen
|
||||
<< " exceeded GPU capacity)" << std::endl;
|
||||
}
|
||||
std::cout << "[RTOCRDetector] Initialized TRT engine from: " << onnxPath << std::endl;
|
||||
return true;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "[RTOCRDetector] Initialize failed: " << e.what() << std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<TextBox> RTOCRDetector::Detect(const cv::Mat& image,
|
||||
int maxSideLen, float dbThresh,
|
||||
float boxThresh, float unclipRatio,
|
||||
bool useDilation) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
|
||||
if (!m_engine || image.empty()) return {};
|
||||
|
||||
try {
|
||||
// Single-pass detection: resize the full image to fit within
|
||||
// the engine's max spatial dimension (same approach as ONNX version).
|
||||
int effectiveMaxSide = std::min(maxSideLen, m_engineMaxSideLen);
|
||||
|
||||
// 1. Compute resize dimensions (multiples of 32)
|
||||
cv::Size resizeShape = ComputeDetResizeShape(image.rows, image.cols, effectiveMaxSide);
|
||||
int newH = resizeShape.height;
|
||||
int newW = resizeShape.width;
|
||||
|
||||
float ratioH = static_cast<float>(image.rows) / newH;
|
||||
float ratioW = static_cast<float>(image.cols) / newW;
|
||||
|
||||
// 2. Upload to GPU and resize — try NV12 fast path first
|
||||
cv::cuda::GpuMat gpuResized;
|
||||
bool usedNV12 = false;
|
||||
|
||||
GpuFrameData* gpuFrame = tl_currentGpuFrame();
|
||||
if (gpuFrame && gpuFrame->pixelFormat == 23 &&
|
||||
gpuFrame->cpuYPlane && gpuFrame->cpuUvPlane &&
|
||||
gpuFrame->width > 0 && gpuFrame->height > 0) {
|
||||
// NV12 fast path: fused NV12→BGR+resize kernel (1 kernel launch)
|
||||
// instead of CPU BGR upload (24MB) + separate resize
|
||||
int fW = gpuFrame->width;
|
||||
int fH = gpuFrame->height;
|
||||
int gpuIdx = m_engine ? m_engine->getOptions().deviceIndex : 0;
|
||||
|
||||
// Get NV12 Y/UV pointers on GPU (from cache or fresh upload)
|
||||
const uint8_t* devY = nullptr;
|
||||
const uint8_t* devUV = nullptr;
|
||||
int yPitch = 0, uvPitch = 0;
|
||||
{
|
||||
auto regLock = ANSGpuFrameRegistry::instance().acquire_lock();
|
||||
if (gpuFrame->gpuCacheValid && gpuFrame->gpuCacheDeviceIdx == gpuIdx) {
|
||||
// Cache hit
|
||||
devY = static_cast<const uint8_t*>(gpuFrame->gpuCacheY);
|
||||
devUV = static_cast<const uint8_t*>(gpuFrame->gpuCacheUV);
|
||||
yPitch = static_cast<int>(gpuFrame->gpuCacheYPitch);
|
||||
uvPitch = static_cast<int>(gpuFrame->gpuCacheUVPitch);
|
||||
} else if (!gpuFrame->gpuCacheValid) {
|
||||
// Cache miss — upload CPU NV12 to GPU
|
||||
size_t yBytes = static_cast<size_t>(fH) * gpuFrame->cpuYLinesize;
|
||||
size_t uvBytes = static_cast<size_t>(fH / 2) * gpuFrame->cpuUvLinesize;
|
||||
|
||||
auto& reg = ANSGpuFrameRegistry::instance();
|
||||
if (reg.canAllocateGpuCache(yBytes + uvBytes)) {
|
||||
cudaMalloc(&gpuFrame->gpuCacheY, yBytes);
|
||||
cudaMalloc(&gpuFrame->gpuCacheUV, uvBytes);
|
||||
cudaMemcpy(gpuFrame->gpuCacheY, gpuFrame->cpuYPlane, yBytes, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(gpuFrame->gpuCacheUV, gpuFrame->cpuUvPlane, uvBytes, cudaMemcpyHostToDevice);
|
||||
gpuFrame->gpuCacheValid = true;
|
||||
gpuFrame->gpuCacheDeviceIdx = gpuIdx;
|
||||
gpuFrame->gpuCacheYPitch = static_cast<size_t>(gpuFrame->cpuYLinesize);
|
||||
gpuFrame->gpuCacheUVPitch = static_cast<size_t>(gpuFrame->cpuUvLinesize);
|
||||
gpuFrame->gpuCacheBytes = yBytes + uvBytes;
|
||||
reg.onGpuCacheCreated(yBytes + uvBytes);
|
||||
|
||||
devY = static_cast<const uint8_t*>(gpuFrame->gpuCacheY);
|
||||
devUV = static_cast<const uint8_t*>(gpuFrame->gpuCacheUV);
|
||||
yPitch = gpuFrame->cpuYLinesize;
|
||||
uvPitch = gpuFrame->cpuUvLinesize;
|
||||
}
|
||||
}
|
||||
} // release registry lock before GPU kernel
|
||||
|
||||
if (devY && devUV) {
|
||||
// Single fused kernel: NV12→BGR + bilinear resize (1 launch, 1 output alloc)
|
||||
gpuResized.create(newH, newW, CV_8UC3);
|
||||
NV12PreprocessHelper::nv12ToBGRResize(
|
||||
devY, yPitch, devUV, uvPitch,
|
||||
gpuResized.ptr<uint8_t>(), static_cast<int>(gpuResized.step),
|
||||
newW, newH, fW, fH);
|
||||
usedNV12 = true;
|
||||
|
||||
// Update ratios to map from full-res NV12 to detection output
|
||||
ratioH = static_cast<float>(fH) / newH;
|
||||
ratioW = static_cast<float>(fW) / newW;
|
||||
}
|
||||
}
|
||||
|
||||
if (!usedNV12) {
|
||||
// Fallback: standard BGR upload
|
||||
cv::cuda::GpuMat gpuImg;
|
||||
gpuImg.upload(image);
|
||||
cv::cuda::resize(gpuImg, gpuResized, resizeShape);
|
||||
}
|
||||
|
||||
// Keep BGR order (PaddleOCR official does NOT convert BGR->RGB)
|
||||
|
||||
// 3. Run inference
|
||||
std::vector<std::vector<cv::cuda::GpuMat>> inputs = { { gpuResized } };
|
||||
std::vector<std::vector<std::vector<float>>> featureVectors;
|
||||
|
||||
if (!m_engine->runInference(inputs, featureVectors)) {
|
||||
std::cerr << "[RTOCRDetector] Inference failed" << std::endl;
|
||||
return {};
|
||||
}
|
||||
|
||||
if (featureVectors.empty() || featureVectors[0].empty()) return {};
|
||||
|
||||
// 4. Reshape output to probability map [H, W]
|
||||
std::vector<float>& output = featureVectors[0][0];
|
||||
int outputSize = static_cast<int>(output.size());
|
||||
|
||||
if (outputSize < newH * newW) {
|
||||
std::cerr << "[RTOCRDetector] Output too small: expected at least "
|
||||
<< newH * newW << " got " << outputSize << std::endl;
|
||||
return {};
|
||||
}
|
||||
|
||||
cv::Mat bitmap(newH, newW, CV_32FC1, output.data());
|
||||
|
||||
// 5. Threshold to binary (matches ONNX/PaddleOCR official order)
|
||||
cv::Mat binaryMap;
|
||||
cv::threshold(bitmap, binaryMap, dbThresh, 255, cv::THRESH_BINARY);
|
||||
binaryMap.convertTo(binaryMap, CV_8UC1);
|
||||
|
||||
// 6. Apply dilation if requested (on binaryMap, matching ONNX version)
|
||||
if (useDilation) {
|
||||
cv::Mat kernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2));
|
||||
cv::dilate(binaryMap, binaryMap, kernel);
|
||||
}
|
||||
|
||||
// 7. Find contours and build text boxes
|
||||
// (matches ONNX/PaddleOCR official DBPostProcess.boxes_from_bitmap flow exactly)
|
||||
std::vector<std::vector<cv::Point>> contours;
|
||||
cv::findContours(binaryMap, contours, cv::RETR_LIST, cv::CHAIN_APPROX_SIMPLE);
|
||||
|
||||
int numCandidates = std::min(static_cast<int>(contours.size()), kDetMaxCandidates);
|
||||
std::vector<TextBox> boxes;
|
||||
|
||||
for (int i = 0; i < numCandidates; i++) {
|
||||
if (contours[i].size() < 4) continue;
|
||||
|
||||
// Step 1: GetMiniBoxes - get ordered 4 corners of min area rect
|
||||
cv::RotatedRect minRect = cv::minAreaRect(contours[i]);
|
||||
float sside = std::min(minRect.size.width, minRect.size.height);
|
||||
if (sside < 3.0f) continue;
|
||||
|
||||
auto ordered = GetMiniBoxes(minRect);
|
||||
|
||||
// Step 2: BoxScoreFast - compute mean prob inside the 4-point box polygon
|
||||
float score = BoxScoreFast(bitmap, ordered);
|
||||
if (score < boxThresh) continue;
|
||||
|
||||
// Step 3: UnclipPolygon - expand the 4-point box
|
||||
auto expanded = UnclipPolygon(ordered, unclipRatio);
|
||||
if (expanded.size() < 4) continue;
|
||||
|
||||
// Step 4: Re-compute GetMiniBoxes on the expanded polygon
|
||||
std::vector<cv::Point> expandedInt;
|
||||
expandedInt.reserve(expanded.size());
|
||||
for (auto& p : expanded) {
|
||||
expandedInt.push_back(cv::Point(static_cast<int>(p.x), static_cast<int>(p.y)));
|
||||
}
|
||||
cv::RotatedRect expandedRect = cv::minAreaRect(expandedInt);
|
||||
|
||||
// Filter by min_size + 2 = 5 (matches PaddleOCR official)
|
||||
float expandedSside = std::min(expandedRect.size.width, expandedRect.size.height);
|
||||
if (expandedSside < 5.0f) continue;
|
||||
|
||||
auto expandedOrdered = GetMiniBoxes(expandedRect);
|
||||
|
||||
// Step 5: Scale to original image coordinates
|
||||
TextBox box;
|
||||
for (int j = 0; j < 4; j++) {
|
||||
box.points[j].x = std::clamp(expandedOrdered[j].x * ratioW, 0.0f, static_cast<float>(image.cols - 1));
|
||||
box.points[j].y = std::clamp(expandedOrdered[j].y * ratioH, 0.0f, static_cast<float>(image.rows - 1));
|
||||
}
|
||||
box.score = score;
|
||||
boxes.push_back(box);
|
||||
}
|
||||
|
||||
SortTextBoxes(boxes);
|
||||
return boxes;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "[RTOCRDetector] Detect failed: " << e.what() << std::endl;
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
// Matches PaddleOCR official GetMiniBoxes: sort by X, assign TL/TR/BL/BR by Y
|
||||
std::array<cv::Point2f, 4> RTOCRDetector::GetMiniBoxes(const cv::RotatedRect& rect) {
|
||||
cv::Point2f vertices[4];
|
||||
rect.points(vertices);
|
||||
|
||||
// Sort all 4 points by x-coordinate ascending
|
||||
std::sort(vertices, vertices + 4,
|
||||
[](const cv::Point2f& a, const cv::Point2f& b) { return a.x < b.x; });
|
||||
|
||||
// Left two (indices 0,1): smaller y = top-left, larger y = bottom-left
|
||||
cv::Point2f topLeft, bottomLeft;
|
||||
if (vertices[0].y <= vertices[1].y) {
|
||||
topLeft = vertices[0];
|
||||
bottomLeft = vertices[1];
|
||||
} else {
|
||||
topLeft = vertices[1];
|
||||
bottomLeft = vertices[0];
|
||||
}
|
||||
|
||||
// Right two (indices 2,3): smaller y = top-right, larger y = bottom-right
|
||||
cv::Point2f topRight, bottomRight;
|
||||
if (vertices[2].y <= vertices[3].y) {
|
||||
topRight = vertices[2];
|
||||
bottomRight = vertices[3];
|
||||
} else {
|
||||
topRight = vertices[3];
|
||||
bottomRight = vertices[2];
|
||||
}
|
||||
|
||||
// Order: [TL, TR, BR, BL] (clockwise from top-left)
|
||||
return { topLeft, topRight, bottomRight, bottomLeft };
|
||||
}
|
||||
|
||||
// Matches PaddleOCR official box_score_fast: mean prob value inside the 4-point polygon
|
||||
float RTOCRDetector::BoxScoreFast(const cv::Mat& probMap,
|
||||
const std::array<cv::Point2f, 4>& box) {
|
||||
int h = probMap.rows;
|
||||
int w = probMap.cols;
|
||||
|
||||
// Get bounding rectangle with proper clamping (matches PaddleOCR official)
|
||||
float minX = std::min({box[0].x, box[1].x, box[2].x, box[3].x});
|
||||
float maxX = std::max({box[0].x, box[1].x, box[2].x, box[3].x});
|
||||
float minY = std::min({box[0].y, box[1].y, box[2].y, box[3].y});
|
||||
float maxY = std::max({box[0].y, box[1].y, box[2].y, box[3].y});
|
||||
|
||||
int xmin = std::clamp(static_cast<int>(std::floor(minX)), 0, w - 1);
|
||||
int xmax = std::clamp(static_cast<int>(std::ceil(maxX)), 0, w - 1);
|
||||
int ymin = std::clamp(static_cast<int>(std::floor(minY)), 0, h - 1);
|
||||
int ymax = std::clamp(static_cast<int>(std::ceil(maxY)), 0, h - 1);
|
||||
|
||||
if (xmin >= xmax || ymin >= ymax) return 0.0f;
|
||||
|
||||
cv::Mat mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1);
|
||||
|
||||
std::vector<cv::Point> pts(4);
|
||||
for (int j = 0; j < 4; j++) {
|
||||
pts[j] = cv::Point(static_cast<int>(box[j].x) - xmin,
|
||||
static_cast<int>(box[j].y) - ymin);
|
||||
}
|
||||
std::vector<std::vector<cv::Point>> polys = { pts };
|
||||
cv::fillPoly(mask, polys, cv::Scalar(1));
|
||||
|
||||
cv::Mat roiMap = probMap(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1));
|
||||
return static_cast<float>(cv::mean(roiMap, mask)[0]);
|
||||
}
|
||||
|
||||
// Matches PaddleOCR official unclip: expand 4-point box using Clipper with JT_ROUND
|
||||
// Uses integer coordinates for Clipper (matching PaddleOCR/ONNX version exactly)
|
||||
std::vector<cv::Point2f> RTOCRDetector::UnclipPolygon(const std::array<cv::Point2f, 4>& box,
|
||||
float unclipRatio) {
|
||||
// Compute area using Shoelace formula and perimeter
|
||||
float area = 0.0f;
|
||||
float perimeter = 0.0f;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
int j = (i + 1) % 4;
|
||||
area += box[i].x * box[j].y - box[j].x * box[i].y;
|
||||
float dx = box[j].x - box[i].x;
|
||||
float dy = box[j].y - box[i].y;
|
||||
perimeter += std::sqrt(dx * dx + dy * dy);
|
||||
}
|
||||
area = std::abs(area) * 0.5f;
|
||||
if (perimeter < 1.0f) return {};
|
||||
|
||||
float distance = area * unclipRatio / perimeter;
|
||||
|
||||
ClipperLib::Path clipperPath;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
clipperPath.push_back({ static_cast<ClipperLib::cInt>(box[i].x),
|
||||
static_cast<ClipperLib::cInt>(box[i].y) });
|
||||
}
|
||||
|
||||
ClipperLib::ClipperOffset offset;
|
||||
offset.AddPath(clipperPath, ClipperLib::jtRound, ClipperLib::etClosedPolygon);
|
||||
|
||||
ClipperLib::Paths solution;
|
||||
offset.Execute(solution, distance);
|
||||
|
||||
if (solution.empty() || solution[0].empty()) return {};
|
||||
|
||||
std::vector<cv::Point2f> result;
|
||||
for (auto& p : solution[0]) {
|
||||
result.push_back(cv::Point2f(static_cast<float>(p.X), static_cast<float>(p.Y)));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
RTOCRDetector::~RTOCRDetector() {
|
||||
try {
|
||||
if (m_usingSharedPool) {
|
||||
EnginePoolManager<float>::instance().release(m_poolKey);
|
||||
m_engine.reset();
|
||||
m_usingSharedPool = false;
|
||||
}
|
||||
else if (m_engine) {
|
||||
m_engine.reset();
|
||||
}
|
||||
}
|
||||
catch (...) {}
|
||||
}
|
||||
|
||||
} // namespace rtocr
|
||||
} // namespace ANSCENTER
|
||||
44
modules/ANSOCR/ANSRTOCR/RTOCRDetector.h
Normal file
44
modules/ANSOCR/ANSRTOCR/RTOCRDetector.h
Normal file
@@ -0,0 +1,44 @@
|
||||
#pragma once
|
||||
|
||||
#include "RTOCRTypes.h"
|
||||
#include "engine.h"
|
||||
#include "engine/EnginePoolManager.h"
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace rtocr {
|
||||
|
||||
class RTOCRDetector {
|
||||
public:
|
||||
RTOCRDetector() = default;
|
||||
~RTOCRDetector();
|
||||
RTOCRDetector(const RTOCRDetector&) = delete;
|
||||
RTOCRDetector& operator=(const RTOCRDetector&) = delete;
|
||||
|
||||
bool Initialize(const std::string& onnxPath, int gpuId = 0,
|
||||
const std::string& engineCacheDir = "",
|
||||
int maxSideLen = kDetMaxSideLen);
|
||||
|
||||
std::vector<TextBox> Detect(const cv::Mat& image,
|
||||
int maxSideLen = kDetMaxSideLen,
|
||||
float dbThresh = kDetDbThresh,
|
||||
float boxThresh = kDetBoxThresh,
|
||||
float unclipRatio = kDetUnclipRatio,
|
||||
bool useDilation = false);
|
||||
|
||||
private:
|
||||
// Postprocessing helpers (matches ONNX/PaddleOCR official flow exactly)
|
||||
std::array<cv::Point2f, 4> GetMiniBoxes(const cv::RotatedRect& rect);
|
||||
float BoxScoreFast(const cv::Mat& probMap, const std::array<cv::Point2f, 4>& box);
|
||||
std::vector<cv::Point2f> UnclipPolygon(const std::array<cv::Point2f, 4>& box, float unclipRatio);
|
||||
|
||||
std::shared_ptr<Engine<float>> m_engine = nullptr;
|
||||
EnginePoolManager<float>::PoolKey m_poolKey;
|
||||
bool m_usingSharedPool = false;
|
||||
int m_engineMaxSideLen = kDetMaxSideLen; // Actual TRT engine max spatial dim
|
||||
std::mutex _mutex;
|
||||
};
|
||||
|
||||
} // namespace rtocr
|
||||
} // namespace ANSCENTER
|
||||
206
modules/ANSOCR/ANSRTOCR/RTOCRRecognizer.cpp
Normal file
206
modules/ANSOCR/ANSRTOCR/RTOCRRecognizer.cpp
Normal file
@@ -0,0 +1,206 @@
|
||||
#include "RTOCRRecognizer.h"
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/cudaimgproc.hpp>
|
||||
#include <opencv2/cudawarping.hpp>
|
||||
#include <opencv2/cudaarithm.hpp>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <cmath>
|
||||
#include <cfloat>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace rtocr {
|
||||
|
||||
bool RTOCRRecognizer::Initialize(const std::string& onnxPath, const std::string& dictPath,
|
||||
int gpuId, const std::string& engineCacheDir) {
|
||||
try {
|
||||
// Load dictionary first
|
||||
keys_ = LoadDict(dictPath);
|
||||
if (keys_.size() < 2) {
|
||||
std::cerr << "[RTOCRRecognizer] Failed to load dictionary: " << dictPath << std::endl;
|
||||
return false;
|
||||
}
|
||||
std::cout << "[RTOCRRecognizer] Loaded dictionary with " << keys_.size()
|
||||
<< " characters from: " << dictPath << std::endl;
|
||||
|
||||
ANSCENTER::Options options;
|
||||
options.deviceIndex = gpuId;
|
||||
options.precision = ANSCENTER::Precision::FP16;
|
||||
options.maxBatchSize = 1;
|
||||
options.optBatchSize = 1;
|
||||
|
||||
// Fixed height, dynamic width for recognition
|
||||
options.minInputHeight = imgH_;
|
||||
options.optInputHeight = imgH_;
|
||||
options.maxInputHeight = imgH_;
|
||||
options.minInputWidth = 32;
|
||||
options.optInputWidth = imgMaxW_;
|
||||
options.maxInputWidth = 960;
|
||||
|
||||
if (!engineCacheDir.empty()) {
|
||||
options.engineFileDir = engineCacheDir;
|
||||
}
|
||||
else {
|
||||
auto pos = onnxPath.find_last_of("/\\");
|
||||
options.engineFileDir = (pos != std::string::npos) ? onnxPath.substr(0, pos) : ".";
|
||||
}
|
||||
|
||||
m_poolKey = { onnxPath,
|
||||
static_cast<int>(options.precision),
|
||||
options.maxBatchSize };
|
||||
m_engine = EnginePoolManager<float>::instance().acquire(
|
||||
m_poolKey, options, onnxPath,
|
||||
kRecSubVals, kRecDivVals, true, -1);
|
||||
m_usingSharedPool = (m_engine != nullptr);
|
||||
|
||||
if (!m_engine) {
|
||||
std::cerr << "[RTOCRRecognizer] Failed to build/load TRT engine: " << onnxPath << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::cout << "[RTOCRRecognizer] Initialized TRT engine from: " << onnxPath << std::endl;
|
||||
return true;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "[RTOCRRecognizer] Initialize failed: " << e.what() << std::endl;
|
||||
m_engine.reset();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
TextLine RTOCRRecognizer::Recognize(const cv::Mat& croppedImage) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
|
||||
if (!m_engine || croppedImage.empty() || keys_.empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
try {
|
||||
// Preprocess: resize to fixed height, proportional width
|
||||
cv::Mat resized = ResizeRecImage(croppedImage, imgH_, imgMaxW_);
|
||||
int resizedW = resized.cols;
|
||||
|
||||
// Pad to at least kRecImgW width (matching official PaddleOCR behavior)
|
||||
// Official PaddleOCR pads with 0.0 in normalized space ≈ pixel value 128 (gray)
|
||||
int imgW = std::max(resizedW, kRecImgW);
|
||||
if (imgW > resizedW) {
|
||||
cv::Mat padded(imgH_, imgW, resized.type(), cv::Scalar(128, 128, 128));
|
||||
resized.copyTo(padded(cv::Rect(0, 0, resizedW, imgH_)));
|
||||
resized = padded;
|
||||
}
|
||||
|
||||
// Upload to GPU (keep BGR order - PaddleOCR official does NOT convert BGR→RGB)
|
||||
cv::cuda::GpuMat gpuImg;
|
||||
gpuImg.upload(resized);
|
||||
|
||||
// Run inference
|
||||
std::vector<std::vector<cv::cuda::GpuMat>> inputs = { { gpuImg } };
|
||||
std::vector<std::vector<std::vector<float>>> featureVectors;
|
||||
|
||||
if (!m_engine->runInference(inputs, featureVectors)) {
|
||||
std::cerr << "[RTOCRRecognizer] Inference failed" << std::endl;
|
||||
return {};
|
||||
}
|
||||
|
||||
if (featureVectors.empty() || featureVectors[0].empty() ||
|
||||
featureVectors[0][0].empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
// Output shape: [1, seqLen, numClasses] flattened to [seqLen * numClasses]
|
||||
// IMPORTANT: The TRT engine output buffer is pre-allocated to MAX dimensions
|
||||
// (e.g. 120 timesteps for max width 960), but the actual inference produces
|
||||
// fewer timesteps for narrower images. We must use the ACTUAL seqLen
|
||||
// derived from the input width, not getOutputDims() which returns max dims.
|
||||
const std::vector<float>& output = featureVectors[0][0];
|
||||
|
||||
// numClasses from dictionary size (keys_ includes blank at index 0)
|
||||
int numClasses = static_cast<int>(keys_.size());
|
||||
|
||||
// Actual seqLen from input width: recognition model stride = 8
|
||||
// (confirmed: 960px input → 120 timesteps, 960/120 = 8)
|
||||
int seqLen = imgW / 8;
|
||||
|
||||
// Sanity check: seqLen * numClasses must not exceed buffer size
|
||||
if (seqLen * numClasses > static_cast<int>(output.size())) {
|
||||
// Fallback: infer from buffer size
|
||||
seqLen = static_cast<int>(output.size()) / numClasses;
|
||||
}
|
||||
|
||||
return CTCDecode(output.data(), seqLen, numClasses);
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "[RTOCRRecognizer] Recognize failed: " << e.what() << std::endl;
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<TextLine> RTOCRRecognizer::RecognizeBatch(const std::vector<cv::Mat>& croppedImages) {
|
||||
std::vector<TextLine> results;
|
||||
results.reserve(croppedImages.size());
|
||||
|
||||
// Process one at a time (each image has different width)
|
||||
for (size_t i = 0; i < croppedImages.size(); i++) {
|
||||
results.push_back(Recognize(croppedImages[i]));
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
TextLine RTOCRRecognizer::CTCDecode(const float* outputData, int seqLen, int numClasses) {
|
||||
TextLine result;
|
||||
std::string text;
|
||||
std::vector<float> scores;
|
||||
|
||||
int lastIndex = 0; // CTC blank is index 0
|
||||
|
||||
for (int t = 0; t < seqLen; t++) {
|
||||
// Find argmax for this timestep
|
||||
int maxIndex = 0;
|
||||
float maxValue = -FLT_MAX;
|
||||
|
||||
const float* timeStep = outputData + t * numClasses;
|
||||
for (int c = 0; c < numClasses; c++) {
|
||||
if (timeStep[c] > maxValue) {
|
||||
maxValue = timeStep[c];
|
||||
maxIndex = c;
|
||||
}
|
||||
}
|
||||
|
||||
// CTC decode: skip blanks (index 0) and repeated characters
|
||||
if (maxIndex != 0 && maxIndex != lastIndex) {
|
||||
if (maxIndex > 0 && maxIndex < static_cast<int>(keys_.size())) {
|
||||
text += keys_[maxIndex]; // keys_[0]="#"(blank), keys_[1]=first_char, etc.
|
||||
// Use raw model output value as confidence (PaddleOCR v5 models include softmax)
|
||||
scores.push_back(maxValue);
|
||||
}
|
||||
}
|
||||
lastIndex = maxIndex;
|
||||
}
|
||||
|
||||
result.text = text;
|
||||
if (!scores.empty()) {
|
||||
result.score = std::accumulate(scores.begin(), scores.end(), 0.0f) /
|
||||
static_cast<float>(scores.size());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
RTOCRRecognizer::~RTOCRRecognizer() {
|
||||
try {
|
||||
if (m_usingSharedPool) {
|
||||
EnginePoolManager<float>::instance().release(m_poolKey);
|
||||
m_engine.reset();
|
||||
m_usingSharedPool = false;
|
||||
}
|
||||
else if (m_engine) {
|
||||
m_engine.reset();
|
||||
}
|
||||
}
|
||||
catch (...) {}
|
||||
}
|
||||
|
||||
} // namespace rtocr
|
||||
} // namespace ANSCENTER
|
||||
41
modules/ANSOCR/ANSRTOCR/RTOCRRecognizer.h
Normal file
41
modules/ANSOCR/ANSRTOCR/RTOCRRecognizer.h
Normal file
@@ -0,0 +1,41 @@
|
||||
#pragma once
|
||||
|
||||
#include "RTOCRTypes.h"
|
||||
#include "engine.h"
|
||||
#include "engine/EnginePoolManager.h"
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace rtocr {
|
||||
|
||||
class RTOCRRecognizer {
|
||||
public:
|
||||
RTOCRRecognizer() = default;
|
||||
~RTOCRRecognizer();
|
||||
RTOCRRecognizer(const RTOCRRecognizer&) = delete;
|
||||
RTOCRRecognizer& operator=(const RTOCRRecognizer&) = delete;
|
||||
|
||||
bool Initialize(const std::string& onnxPath, const std::string& dictPath,
|
||||
int gpuId = 0, const std::string& engineCacheDir = "");
|
||||
|
||||
TextLine Recognize(const cv::Mat& croppedImage);
|
||||
std::vector<TextLine> RecognizeBatch(const std::vector<cv::Mat>& croppedImages);
|
||||
|
||||
void SetRecImageHeight(int h) { imgH_ = h; }
|
||||
void SetRecImageMaxWidth(int w) { imgMaxW_ = w; }
|
||||
|
||||
private:
|
||||
TextLine CTCDecode(const float* outputData, int seqLen, int numClasses);
|
||||
|
||||
std::shared_ptr<Engine<float>> m_engine = nullptr;
|
||||
EnginePoolManager<float>::PoolKey m_poolKey;
|
||||
bool m_usingSharedPool = false;
|
||||
std::vector<std::string> keys_;
|
||||
int imgH_ = kRecImgH;
|
||||
int imgMaxW_ = kRecImgMaxW;
|
||||
std::mutex _mutex;
|
||||
};
|
||||
|
||||
} // namespace rtocr
|
||||
} // namespace ANSCENTER
|
||||
196
modules/ANSOCR/ANSRTOCR/RTOCRTypes.h
Normal file
196
modules/ANSOCR/ANSRTOCR/RTOCRTypes.h
Normal file
@@ -0,0 +1,196 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <array>
|
||||
#include <fstream>
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <cmath>
|
||||
#include <opencv2/core.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
namespace ANSCENTER {
|
||||
namespace rtocr {
|
||||
|
||||
// ============================================================================
|
||||
// Engine normalization constants (BGR channel order, matching PaddleOCR official)
|
||||
// ============================================================================
|
||||
// PaddleOCR processes images in BGR order (no BGR→RGB conversion).
|
||||
// Engine applies: (pixel/255.0 - subVals[c]) / divVals[c] per channel.
|
||||
// When feeding BGR input (no cvtColor), subVals/divVals indices map to:
|
||||
// [0]=B channel, [1]=G channel, [2]=R channel
|
||||
//
|
||||
// PaddleOCR config: mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
|
||||
// These are applied to BGR channels: B=0.485/0.229, G=0.456/0.224, R=0.406/0.225
|
||||
|
||||
// Detection normalization (BGR order)
|
||||
constexpr std::array<float, 3> kDetSubVals = { 0.485f, 0.456f, 0.406f };
|
||||
constexpr std::array<float, 3> kDetDivVals = { 0.229f, 0.224f, 0.225f };
|
||||
|
||||
// Classifier normalization: PP-LCNet_x1_0_textline_ori uses ImageNet normalization (BGR order)
|
||||
// Config: mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], scale=1/255
|
||||
constexpr std::array<float, 3> kClsSubVals = { 0.485f, 0.456f, 0.406f };
|
||||
constexpr std::array<float, 3> kClsDivVals = { 0.229f, 0.224f, 0.225f };
|
||||
|
||||
// Recognition normalization: (pixel/255 - 0.5) / 0.5 (same as classifier)
|
||||
constexpr std::array<float, 3> kRecSubVals = { 0.5f, 0.5f, 0.5f };
|
||||
constexpr std::array<float, 3> kRecDivVals = { 0.5f, 0.5f, 0.5f };
|
||||
|
||||
// ============================================================================
|
||||
// Detection defaults (PP-OCRv5 server)
|
||||
// kDetMaxSideLen is the default max side length for detection preprocessing.
|
||||
// TRT engine auto-fallbacks to smaller max if GPU memory is insufficient during build.
|
||||
// ============================================================================
|
||||
constexpr int kDetMaxSideLen = 2560;
|
||||
constexpr int kDetMaxSideLimit = 4000; // Safety cap on max dimension
|
||||
constexpr float kDetDbThresh = 0.3f;
|
||||
constexpr float kDetBoxThresh = 0.6f;
|
||||
constexpr float kDetUnclipRatio = 1.5f;
|
||||
constexpr int kDetMaxCandidates = 1000;
|
||||
|
||||
// ============================================================================
|
||||
// Classifier defaults (PP-LCNet_x1_0_textline_ori model)
|
||||
// Input: [B, 3, 80, 160], ImageNet normalization, 2-class (0°/180°)
|
||||
// Direct resize to 80x160 (no aspect ratio preservation)
|
||||
// ============================================================================
|
||||
constexpr int kClsImageH = 80;
|
||||
constexpr int kClsImageW = 160;
|
||||
constexpr float kClsThresh = 0.9f;
|
||||
|
||||
// ============================================================================
|
||||
// Recognition defaults
|
||||
// ============================================================================
|
||||
constexpr int kRecImgH = 48;
|
||||
constexpr int kRecImgW = 320; // Default rec width (PP-OCRv5 rec_image_shape[2]=320, min padded width)
|
||||
constexpr int kRecImgMaxW = 960; // Allow wide recognition input for long text lines
|
||||
constexpr int kRecBatchSize = 6;
|
||||
|
||||
// ============================================================================
|
||||
// Data structures
|
||||
// ============================================================================
|
||||
|
||||
// A detected text box: 4 corner points (top-left, top-right, bottom-right, bottom-left)
|
||||
struct TextBox {
|
||||
std::array<cv::Point2f, 4> points;
|
||||
float score = 0.0f;
|
||||
};
|
||||
|
||||
// A single recognized text line
|
||||
struct TextLine {
|
||||
std::string text;
|
||||
float score = 0.0f;
|
||||
};
|
||||
|
||||
// OCR result matching PaddleOCR::OCRPredictResult format
|
||||
struct OCRPredictResult {
|
||||
std::vector<std::vector<int>> box; // 4 corner points [[x,y], ...]
|
||||
std::string text;
|
||||
float score = -1.0f;
|
||||
float cls_score = 0.0f;
|
||||
int cls_label = -1;
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Utility functions
|
||||
// ============================================================================
|
||||
|
||||
// Load character dictionary from file
|
||||
inline std::vector<std::string> LoadDict(const std::string& dictPath) {
|
||||
std::vector<std::string> keys;
|
||||
std::ifstream file(dictPath);
|
||||
if (!file.is_open()) return keys;
|
||||
std::string line;
|
||||
while (std::getline(file, line)) {
|
||||
if (!line.empty() && line.back() == '\r') {
|
||||
line.pop_back();
|
||||
}
|
||||
keys.push_back(line);
|
||||
}
|
||||
// CTC blank token at index 0
|
||||
keys.insert(keys.begin(), "#");
|
||||
// Space at end
|
||||
keys.push_back(" ");
|
||||
return keys;
|
||||
}
|
||||
|
||||
// Compute resize dimensions for detection model (multiples of 32)
|
||||
// limit_type='max': scale down if max side > maxSideLen (PP-OCRv5 server default)
|
||||
// maxSideLimit: safety cap on final max dimension (default 4000)
|
||||
inline cv::Size ComputeDetResizeShape(int srcH, int srcW, int maxSideLen,
|
||||
int maxSideLimit = kDetMaxSideLimit) {
|
||||
float ratio = 1.0f;
|
||||
int maxSide = std::max(srcH, srcW);
|
||||
if (maxSide > maxSideLen) {
|
||||
ratio = static_cast<float>(maxSideLen) / static_cast<float>(maxSide);
|
||||
}
|
||||
int newH = static_cast<int>(srcH * ratio);
|
||||
int newW = static_cast<int>(srcW * ratio);
|
||||
|
||||
// Safety cap: clamp if either dimension exceeds maxSideLimit
|
||||
if (std::max(newH, newW) > maxSideLimit) {
|
||||
float clampRatio = static_cast<float>(maxSideLimit) / static_cast<float>(std::max(newH, newW));
|
||||
newH = static_cast<int>(newH * clampRatio);
|
||||
newW = static_cast<int>(newW * clampRatio);
|
||||
}
|
||||
|
||||
newH = std::max(32, static_cast<int>(std::round(newH / 32.0) * 32));
|
||||
newW = std::max(32, static_cast<int>(std::round(newW / 32.0) * 32));
|
||||
return cv::Size(newW, newH);
|
||||
}
|
||||
|
||||
// Sort text boxes from top to bottom, left to right
|
||||
inline void SortTextBoxes(std::vector<TextBox>& boxes) {
|
||||
std::sort(boxes.begin(), boxes.end(),
|
||||
[](const TextBox& a, const TextBox& b) {
|
||||
if (std::abs(a.points[0].y - b.points[0].y) < 10.0f) {
|
||||
return a.points[0].x < b.points[0].x;
|
||||
}
|
||||
return a.points[0].y < b.points[0].y;
|
||||
});
|
||||
}
|
||||
|
||||
// Get rotated and cropped image from text box polygon
|
||||
inline cv::Mat GetRotateCropImage(const cv::Mat& srcImage, const TextBox& box) {
|
||||
auto pts = box.points;
|
||||
float width = static_cast<float>(std::max(
|
||||
cv::norm(pts[0] - pts[1]),
|
||||
cv::norm(pts[2] - pts[3])));
|
||||
float height = static_cast<float>(std::max(
|
||||
cv::norm(pts[0] - pts[3]),
|
||||
cv::norm(pts[1] - pts[2])));
|
||||
|
||||
std::vector<cv::Point2f> srcPts = { pts[0], pts[1], pts[2], pts[3] };
|
||||
std::vector<cv::Point2f> dstPts = {
|
||||
{0, 0}, {width, 0}, {width, height}, {0, height}
|
||||
};
|
||||
|
||||
cv::Mat M = cv::getPerspectiveTransform(srcPts, dstPts);
|
||||
cv::Mat cropped;
|
||||
cv::warpPerspective(srcImage, cropped, M,
|
||||
cv::Size(static_cast<int>(width), static_cast<int>(height)),
|
||||
cv::BORDER_REPLICATE);
|
||||
|
||||
if (cropped.rows > cropped.cols * 1.5f) {
|
||||
cv::Mat rotated;
|
||||
cv::transpose(cropped, rotated);
|
||||
cv::flip(rotated, rotated, 0);
|
||||
return rotated;
|
||||
}
|
||||
return cropped;
|
||||
}
|
||||
|
||||
// Resize recognition image to fixed height, proportional width
|
||||
inline cv::Mat ResizeRecImage(const cv::Mat& img, int targetH, int maxW) {
|
||||
float ratio = static_cast<float>(targetH) / img.rows;
|
||||
int targetW = static_cast<int>(img.cols * ratio);
|
||||
targetW = std::min(targetW, maxW);
|
||||
targetW = std::max(targetW, 1);
|
||||
|
||||
cv::Mat resized;
|
||||
cv::resize(img, resized, cv::Size(targetW, targetH));
|
||||
return resized;
|
||||
}
|
||||
|
||||
} // namespace rtocr
|
||||
} // namespace ANSCENTER
|
||||
401
modules/ANSOCR/ANSRtOCR.cpp
Normal file
401
modules/ANSOCR/ANSRtOCR.cpp
Normal file
@@ -0,0 +1,401 @@
|
||||
#include "ANSRtOCR.h"
|
||||
#include "Utility.h"
|
||||
#include <opencv2/highgui.hpp>
|
||||
|
||||
namespace ANSCENTER {
|
||||
|
||||
bool ANSRTOCR::Initialize(const std::string& licenseKey, OCRModelConfig modelConfig,
|
||||
const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) {
|
||||
try {
|
||||
bool result = ANSOCRBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, engineMode);
|
||||
if (!result) return false;
|
||||
|
||||
// Validate detection model
|
||||
if (!FileExist(_modelConfig.detectionModelFile)) {
|
||||
this->_logger.LogFatal("ANSRTOCR::Initialize", "Invalid detector model file: " + _modelConfig.detectionModelFile, __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Validate recognizer model
|
||||
if (!FileExist(_modelConfig.recognizerModelFile)) {
|
||||
this->_logger.LogFatal("ANSRTOCR::Initialize", "Invalid recognizer model file: " + _modelConfig.recognizerModelFile, __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Classifier is optional - controlled by useCLS flag and file existence
|
||||
std::string clsModelPath;
|
||||
if (_modelConfig.useCLS) {
|
||||
clsModelPath = _modelConfig.clsModelFile;
|
||||
if (!clsModelPath.empty() && !FileExist(clsModelPath)) {
|
||||
this->_logger.LogWarn("ANSRTOCR::Initialize", "Classifier model not found, skipping: " + clsModelPath, __FILE__, __LINE__);
|
||||
clsModelPath = ""; // Clear to skip classifier
|
||||
}
|
||||
}
|
||||
else {
|
||||
this->_logger.LogDebug("ANSRTOCR::Initialize", "Classifier disabled (useCLS=false)", __FILE__, __LINE__);
|
||||
}
|
||||
|
||||
try {
|
||||
// Configure engine parameters from modelConfig
|
||||
_engine->SetDetMaxSideLen(_modelConfig.limitSideLen);
|
||||
_engine->SetDetDbThresh(static_cast<float>(_modelConfig.detectionDBThreshold));
|
||||
_engine->SetDetBoxThresh(static_cast<float>(_modelConfig.detectionBoxThreshold));
|
||||
_engine->SetDetUnclipRatio(static_cast<float>(_modelConfig.detectionDBUnclipRatio));
|
||||
_engine->SetClsThresh(static_cast<float>(_modelConfig.clsThreshold));
|
||||
_engine->SetUseDilation(_modelConfig.useDilation);
|
||||
_engine->SetGpuId(_modelConfig.gpuId);
|
||||
|
||||
// Determine engine cache directory (same folder as detection model)
|
||||
std::string engineCacheDir;
|
||||
auto pos = _modelConfig.detectionModelFile.find_last_of("/\\");
|
||||
if (pos != std::string::npos) {
|
||||
engineCacheDir = _modelConfig.detectionModelFile.substr(0, pos);
|
||||
}
|
||||
|
||||
_isInitialized = _engine->Initialize(
|
||||
_modelConfig.detectionModelFile,
|
||||
clsModelPath,
|
||||
_modelConfig.recognizerModelFile,
|
||||
_modelConfig.recogizerCharDictionaryPath,
|
||||
_modelConfig.gpuId,
|
||||
engineCacheDir);
|
||||
|
||||
return _isInitialized;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
_licenseValid = false;
|
||||
this->_logger.LogFatal("ANSRTOCR::Initialize", e.what(), __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
catch (...) {
|
||||
_licenseValid = false;
|
||||
this->_logger.LogFatal("ANSRTOCR::Initialize", "Failed to create TensorRT OCR engine", __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSRTOCR::Initialize", e.what(), __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSRTOCR::RunInference(const cv::Mat& input) {
|
||||
std::vector<ANSCENTER::OCRObject> output;
|
||||
if (input.empty()) return output;
|
||||
if ((input.cols < 10) || (input.rows < 10)) return output;
|
||||
return RunInference(input, "OCRRTCAM");
|
||||
}
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSRTOCR::RunInference(const cv::Mat& input, const std::string& cameraId) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSRTOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSRTOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if (input.empty() || input.cols < 10 || input.rows < 10) {
|
||||
this->_logger.LogError("ANSRTOCR::RunInference", "Input image is invalid or too small", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
|
||||
try {
|
||||
// Convert grayscale to BGR if necessary
|
||||
cv::Mat im;
|
||||
if (input.channels() == 1) {
|
||||
cv::cvtColor(input, im, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
else {
|
||||
im = input.clone();
|
||||
}
|
||||
|
||||
if (!_engine) {
|
||||
this->_logger.LogFatal("ANSRTOCR::RunInference", "Engine instance is null", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
|
||||
// The engine handles large images correctly in two stages:
|
||||
// 1. Detection: internally scales to limitSideLen → bounded GPU memory
|
||||
// 2. Recognition: crops each text box from the ORIGINAL full-res image
|
||||
// This preserves text detail without tiling (which fragments text at boundaries).
|
||||
std::vector<rtocr::OCRPredictResult> res_ocr = _engine->ocr(im);
|
||||
|
||||
for (size_t n = 0; n < res_ocr.size(); ++n) {
|
||||
if (res_ocr[n].box.size() != 4) {
|
||||
this->_logger.LogError("ANSRTOCR::RunInference", "Invalid OCR box size", __FILE__, __LINE__);
|
||||
continue;
|
||||
}
|
||||
|
||||
cv::Point rook_points[4];
|
||||
for (size_t m = 0; m < 4; ++m) {
|
||||
rook_points[m] = cv::Point(
|
||||
static_cast<int>(res_ocr[n].box[m][0]),
|
||||
static_cast<int>(res_ocr[n].box[m][1])
|
||||
);
|
||||
}
|
||||
|
||||
int x = std::max(0, rook_points[0].x);
|
||||
int y = std::max(0, rook_points[0].y);
|
||||
int width = rook_points[1].x - rook_points[0].x;
|
||||
int height = rook_points[2].y - rook_points[1].y;
|
||||
|
||||
width = std::max(1, std::min(im.cols - x, width));
|
||||
height = std::max(1, std::min(im.rows - y, height));
|
||||
|
||||
if (width <= 1 || height <= 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box = cv::Rect(x, y, width, height);
|
||||
ocrObject.classId = res_ocr[n].cls_label;
|
||||
ocrObject.confidence = res_ocr[n].score;
|
||||
ocrObject.className = res_ocr[n].text;
|
||||
ocrObject.extraInfo = "cls label: " + std::to_string(res_ocr[n].cls_label)
|
||||
+ "; cls score: " + std::to_string(res_ocr[n].cls_score);
|
||||
ocrObject.cameraId = cameraId;
|
||||
|
||||
OCRObjects.push_back(ocrObject);
|
||||
}
|
||||
|
||||
im.release();
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
this->_logger.LogFatal("ANSRTOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
}
|
||||
catch (...) {
|
||||
this->_logger.LogFatal("ANSRTOCR::RunInference", "Unknown exception occurred", __FILE__, __LINE__);
|
||||
}
|
||||
|
||||
return OCRObjects;
|
||||
}
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSRTOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSRTOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSRTOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
|
||||
try {
|
||||
if (input.empty()) {
|
||||
this->_logger.LogError("ANSRTOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
|
||||
|
||||
if (Bbox.size() > 0) {
|
||||
cv::Mat frame;
|
||||
if (input.channels() == 1) {
|
||||
cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
else {
|
||||
frame = input.clone();
|
||||
}
|
||||
int fWidth = frame.cols;
|
||||
int fHeight = frame.rows;
|
||||
|
||||
for (auto it = Bbox.begin(); it != Bbox.end(); it++) {
|
||||
int x1 = std::max(0, it->x);
|
||||
int y1 = std::max(0, it->y);
|
||||
int width = std::min(fWidth - x1, it->width);
|
||||
int height = std::min(fHeight - y1, it->height);
|
||||
|
||||
if (x1 >= 0 && y1 >= 0 && width >= 5 && height >= 5) {
|
||||
cv::Rect objectPos(x1, y1, width, height);
|
||||
cv::Mat croppedObject = frame(objectPos);
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> tempObjects = RunInference(croppedObject);
|
||||
|
||||
for (size_t i = 0; i < tempObjects.size(); i++) {
|
||||
ANSCENTER::OCRObject detObj = tempObjects[i];
|
||||
detObj.box.x = tempObjects[i].box.x + x1;
|
||||
detObj.box.y = tempObjects[i].box.y + y1;
|
||||
detObj.box.x = std::max(0, detObj.box.x);
|
||||
detObj.box.y = std::max(0, detObj.box.y);
|
||||
detObj.box.width = std::min(fWidth - detObj.box.x, detObj.box.width);
|
||||
detObj.box.height = std::min(fHeight - detObj.box.y, detObj.box.height);
|
||||
OCRObjects.push_back(detObj);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
cv::Mat frame;
|
||||
if (input.channels() == 1) {
|
||||
cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
else {
|
||||
frame = input.clone();
|
||||
}
|
||||
|
||||
std::vector<rtocr::OCRPredictResult> res_ocr = _engine->ocr(frame);
|
||||
for (size_t n = 0; n < res_ocr.size(); n++) {
|
||||
if (res_ocr[n].box.size() != 4) continue;
|
||||
|
||||
cv::Point rook_points[4];
|
||||
for (size_t m = 0; m < res_ocr[n].box.size(); m++) {
|
||||
rook_points[m] = cv::Point(
|
||||
static_cast<int>(res_ocr[n].box[m][0]),
|
||||
static_cast<int>(res_ocr[n].box[m][1]));
|
||||
}
|
||||
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box.x = rook_points[0].x;
|
||||
ocrObject.box.y = rook_points[0].y;
|
||||
ocrObject.box.width = rook_points[1].x - rook_points[0].x;
|
||||
ocrObject.box.height = rook_points[2].y - rook_points[1].y;
|
||||
|
||||
ocrObject.box.x = std::max(0, ocrObject.box.x);
|
||||
ocrObject.box.y = std::max(0, ocrObject.box.y);
|
||||
ocrObject.box.width = std::min(frame.cols - ocrObject.box.x, ocrObject.box.width);
|
||||
ocrObject.box.height = std::min(frame.rows - ocrObject.box.y, ocrObject.box.height);
|
||||
|
||||
ocrObject.classId = res_ocr[n].cls_label;
|
||||
ocrObject.confidence = res_ocr[n].score;
|
||||
ocrObject.className = res_ocr[n].text;
|
||||
ocrObject.extraInfo = "cls label:" + std::to_string(res_ocr[n].cls_label)
|
||||
+ ";cls score:" + std::to_string(res_ocr[n].cls_score);
|
||||
OCRObjects.push_back(ocrObject);
|
||||
}
|
||||
frame.release();
|
||||
}
|
||||
return OCRObjects;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSRTOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSRTOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, const std::string& cameraId) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSRTOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSRTOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
|
||||
try {
|
||||
if (input.empty()) {
|
||||
this->_logger.LogError("ANSRTOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
|
||||
|
||||
if (Bbox.size() > 0) {
|
||||
cv::Mat frame;
|
||||
if (input.channels() == 1) {
|
||||
cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
else {
|
||||
frame = input.clone();
|
||||
}
|
||||
int fWidth = frame.cols;
|
||||
int fHeight = frame.rows;
|
||||
|
||||
for (auto it = Bbox.begin(); it != Bbox.end(); it++) {
|
||||
int x1 = std::max(0, it->x);
|
||||
int y1 = std::max(0, it->y);
|
||||
int width = std::min(fWidth - x1, it->width);
|
||||
int height = std::min(fHeight - y1, it->height);
|
||||
|
||||
if (x1 >= 0 && y1 >= 0 && width >= 5 && height >= 5) {
|
||||
cv::Rect objectPos(x1, y1, width, height);
|
||||
cv::Mat croppedObject = frame(objectPos);
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> tempObjects = RunInference(croppedObject);
|
||||
|
||||
for (size_t i = 0; i < tempObjects.size(); i++) {
|
||||
ANSCENTER::OCRObject detObj = tempObjects[i];
|
||||
detObj.box.x = tempObjects[i].box.x + x1;
|
||||
detObj.box.y = tempObjects[i].box.y + y1;
|
||||
detObj.box.x = std::max(0, detObj.box.x);
|
||||
detObj.box.y = std::max(0, detObj.box.y);
|
||||
detObj.box.width = std::min(fWidth - detObj.box.x, detObj.box.width);
|
||||
detObj.box.height = std::min(fHeight - detObj.box.y, detObj.box.height);
|
||||
detObj.cameraId = cameraId;
|
||||
OCRObjects.push_back(detObj);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
cv::Mat im = input.clone();
|
||||
std::vector<rtocr::OCRPredictResult> res_ocr = _engine->ocr(im);
|
||||
for (size_t n = 0; n < res_ocr.size(); n++) {
|
||||
if (res_ocr[n].box.size() != 4) continue;
|
||||
|
||||
cv::Point rook_points[4];
|
||||
for (size_t m = 0; m < res_ocr[n].box.size(); m++) {
|
||||
rook_points[m] = cv::Point(
|
||||
static_cast<int>(res_ocr[n].box[m][0]),
|
||||
static_cast<int>(res_ocr[n].box[m][1]));
|
||||
}
|
||||
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box.x = rook_points[0].x;
|
||||
ocrObject.box.y = rook_points[0].y;
|
||||
ocrObject.box.width = rook_points[1].x - rook_points[0].x;
|
||||
ocrObject.box.height = rook_points[2].y - rook_points[1].y;
|
||||
ocrObject.box.x = std::max(0, ocrObject.box.x);
|
||||
ocrObject.box.y = std::max(0, ocrObject.box.y);
|
||||
ocrObject.box.width = std::min(im.cols - ocrObject.box.x, ocrObject.box.width);
|
||||
ocrObject.box.height = std::min(im.rows - ocrObject.box.y, ocrObject.box.height);
|
||||
ocrObject.classId = res_ocr[n].cls_label;
|
||||
ocrObject.confidence = res_ocr[n].score;
|
||||
ocrObject.className = res_ocr[n].text;
|
||||
ocrObject.extraInfo = "cls label:" + std::to_string(res_ocr[n].cls_label)
|
||||
+ ";cls score:" + std::to_string(res_ocr[n].cls_score);
|
||||
ocrObject.cameraId = cameraId;
|
||||
OCRObjects.push_back(ocrObject);
|
||||
}
|
||||
im.release();
|
||||
}
|
||||
return OCRObjects;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSRTOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
}
|
||||
|
||||
ANSRTOCR::~ANSRTOCR() {
|
||||
try {
|
||||
Destroy();
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSRTOCR::~ANSRTOCR()", e.what(), __FILE__, __LINE__);
|
||||
}
|
||||
}
|
||||
|
||||
bool ANSRTOCR::Destroy() {
|
||||
try {
|
||||
if (_engine) _engine.reset();
|
||||
return true;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSRTOCR::Destroy", e.what(), __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ANSCENTER
|
||||
31
modules/ANSOCR/ANSRtOCR.h
Normal file
31
modules/ANSOCR/ANSRtOCR.h
Normal file
@@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
#include "ANSOCRBase.h"
|
||||
#include "ANSRTOCR/PaddleOCRV5RTEngine.h"
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
namespace ANSCENTER {
|
||||
|
||||
class ANSOCR_API ANSRTOCR : public ANSOCRBase {
|
||||
public:
|
||||
ANSRTOCR() : _engine(std::make_unique<rtocr::PaddleOCRV5RTEngine>()) {}
|
||||
~ANSRTOCR();
|
||||
|
||||
virtual bool Initialize(const std::string& licenseKey, OCRModelConfig modelConfig,
|
||||
const std::string& modelZipFilePath, const std::string& modelZipPassword,
|
||||
int engineMode) override;
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input) override;
|
||||
std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input, const std::string& cameraId) override;
|
||||
std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox) override;
|
||||
std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, const std::string& cameraId) override;
|
||||
|
||||
bool Destroy() override;
|
||||
|
||||
private:
|
||||
std::unique_ptr<rtocr::PaddleOCRV5RTEngine> _engine;
|
||||
std::recursive_mutex _mutex;
|
||||
};
|
||||
|
||||
} // namespace ANSCENTER
|
||||
63
modules/ANSOCR/CMakeLists.txt
Normal file
63
modules/ANSOCR/CMakeLists.txt
Normal file
@@ -0,0 +1,63 @@
|
||||
# ANSOCR — Optical Character Recognition DLL (PaddleOCR, ONNX, TensorRT)
|
||||
file(GLOB ANSOCR_HEADERS "*.h")
|
||||
file(GLOB ANSOCR_SOURCES "*.cpp")
|
||||
# Remove orphan files not in original vcxproj
|
||||
list(FILTER ANSOCR_SOURCES EXCLUDE REGEX "ANSOdOCR\\.cpp$")
|
||||
list(FILTER ANSOCR_SOURCES EXCLUDE REGEX "ANSOCR\\.cpp$")
|
||||
file(GLOB_RECURSE PADDLE_HEADERS "ANSPaddleOCR/*.h")
|
||||
file(GLOB_RECURSE PADDLE_SOURCES "ANSPaddleOCR/*.cpp")
|
||||
list(FILTER PADDLE_SOURCES EXCLUDE REGEX "args\\.cpp$")
|
||||
file(GLOB_RECURSE ONNXOCR_SOURCES "ANSONNXOCR/*.cpp" "ANSONNXOCR/*.h")
|
||||
file(GLOB_RECURSE RTOCR_SOURCES "ANSRTOCR/*.cpp" "ANSRTOCR/*.h")
|
||||
|
||||
add_library(ANSOCR SHARED
|
||||
${ANSOCR_HEADERS}
|
||||
${ANSOCR_SOURCES}
|
||||
${PADDLE_HEADERS}
|
||||
${PADDLE_SOURCES}
|
||||
${ONNXOCR_SOURCES}
|
||||
${RTOCR_SOURCES}
|
||||
)
|
||||
|
||||
target_include_directories(ANSOCR PUBLIC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
)
|
||||
|
||||
target_include_directories(ANSOCR PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ANSPaddleOCR
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ANSPaddleOCR/include
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ANSONNXOCR
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ANSRTOCR
|
||||
${CMAKE_SOURCE_DIR}/engines/ONNXEngine
|
||||
${CMAKE_SOURCE_DIR}/engines/OpenVINOEngine/include
|
||||
${CMAKE_SOURCE_DIR}/engines/TensorRTAPI/include
|
||||
${CMAKE_SOURCE_DIR}/core/ANSLicensingSystem
|
||||
${SHARED_INCLUDE_DIR}
|
||||
)
|
||||
|
||||
target_link_libraries(ANSOCR
|
||||
PUBLIC ANSLibsLoader
|
||||
PRIVATE ANSODEngine
|
||||
PRIVATE ANSLicensingSystem
|
||||
PRIVATE labview
|
||||
PRIVATE spdlog_dep
|
||||
PRIVATE opencv
|
||||
PRIVATE onnxruntime
|
||||
PRIVATE tensorrt
|
||||
PRIVATE openvino
|
||||
PRIVATE CUDA::cudart
|
||||
)
|
||||
|
||||
target_compile_definitions(ANSOCR PRIVATE UNICODE _UNICODE
|
||||
ANSOCR_EXPORTS
|
||||
_USRDLL
|
||||
ENABLE_ORT_BACKEND
|
||||
ENABLE_OPENVINO_BACKEND
|
||||
WITH_GPU
|
||||
ENABLE_NVJPEG
|
||||
ENABLE_TRT_BACKEND
|
||||
ENABLE_VISION
|
||||
ENABLE_PADDLE2ONNX
|
||||
)
|
||||
|
||||
target_precompile_headers(ANSOCR PRIVATE pch.h)
|
||||
1044
modules/ANSOCR/dllmain.cpp
Normal file
1044
modules/ANSOCR/dllmain.cpp
Normal file
File diff suppressed because it is too large
Load Diff
7
modules/ANSOCR/framework.h
Normal file
7
modules/ANSOCR/framework.h
Normal file
@@ -0,0 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
|
||||
#define NOMINMAX // Prevent windows.h from defining min/max macros
|
||||
// which break std::min / std::max (C2589)
|
||||
// Windows Header Files
|
||||
#include <windows.h>
|
||||
5
modules/ANSOCR/pch.cpp
Normal file
5
modules/ANSOCR/pch.cpp
Normal file
@@ -0,0 +1,5 @@
|
||||
// pch.cpp: source file corresponding to the pre-compiled header
|
||||
|
||||
#include "pch.h"
|
||||
|
||||
// When you are using pre-compiled headers, this source file is necessary for compilation to succeed.
|
||||
13
modules/ANSOCR/pch.h
Normal file
13
modules/ANSOCR/pch.h
Normal file
@@ -0,0 +1,13 @@
|
||||
// pch.h: This is a precompiled header file.
|
||||
// Files listed below are compiled only once, improving build performance for future builds.
|
||||
// This also affects IntelliSense performance, including code completion and many code browsing features.
|
||||
// However, files listed here are ALL re-compiled if any one of them is updated between builds.
|
||||
// Do not add files here that you will be updating frequently as this negates the performance advantage.
|
||||
|
||||
#ifndef PCH_H
|
||||
#define PCH_H
|
||||
|
||||
// add headers that you want to pre-compile here
|
||||
#include "framework.h"
|
||||
|
||||
#endif //PCH_H
|
||||
Reference in New Issue
Block a user