Files
ANSCORE/modules/ANSOCR/ANSCpuOCR.cpp

575 lines
21 KiB
C++

#include "ANSCpuOCR.h"
#include "Utility.h"
#include <opencv2/highgui.hpp>
#include <omp.h>
#include <include/paddleocr_utility.h>
namespace ANSCENTER {
bool ANSCPUOCR::Initialize(const std::string& licenseKey, OCRModelConfig modelConfig,
const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) {
try
{
bool result = ANSOCRBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, engineMode);
if (!result) return false;
//Override the paddleocrv3 for openvino only
switch (_modelConfig.ocrLanguage) {
case ANSCENTER::OCRLanguage::ENGLISH: {
_modelConfig.detectionModelDir = _modelFolder;
_modelConfig.recognizerModelDir = _modelFolder;
_modelConfig.clsModelDir = _modelFolder;
_modelConfig.layoutModelDir = _modelFolder;
_modelConfig.layourDictionaryPath = _modelFolder;
_modelConfig.tableModelDir = _modelFolder;
_modelConfig.tableCharDictionaryPath = _modelFolder;
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_en.txt");
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "EN_DET.pdmodel");
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "EN_DET.pdiparams");
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "ENV4_REC.pdmodel");
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "ENV4_REC.pdiparams");
break;
}
case ANSCENTER::OCRLanguage::CHINESE: {
_modelConfig.detectionModelDir = _modelFolder;
_modelConfig.recognizerModelDir = _modelFolder;
_modelConfig.clsModelDir = _modelFolder;
_modelConfig.layoutModelDir = _modelFolder;
_modelConfig.layourDictionaryPath = _modelFolder;
_modelConfig.tableModelDir = _modelFolder;
_modelConfig.tableCharDictionaryPath = _modelFolder;
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_ch.txt");
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "CHV4_DET.pdmodel");
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "CHV4_DET.pdiparams");
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "CHV4_REC.pdmodel");
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "CHV4_REC.pdiparams");
break;
}
case ANSCENTER::OCRLanguage::FRENCH: {
_modelConfig.detectionModelDir = _modelFolder;
_modelConfig.recognizerModelDir = _modelFolder;
_modelConfig.clsModelDir = _modelFolder;
_modelConfig.layoutModelDir = _modelFolder;
_modelConfig.layourDictionaryPath = _modelFolder;
_modelConfig.tableModelDir = _modelFolder;
_modelConfig.tableCharDictionaryPath = _modelFolder;
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_fr.txt");
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "MPP_DET.pdmodel");
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "MPP_DET.pdiparams");
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "FR_REC.pdmodel");
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "FR_REC.pdiparams");
break;
}
case ANSCENTER::OCRLanguage::GERMANY: {
_modelConfig.detectionModelDir = _modelFolder;
_modelConfig.recognizerModelDir = _modelFolder;
_modelConfig.clsModelDir = _modelFolder;
_modelConfig.layoutModelDir = _modelFolder;
_modelConfig.layourDictionaryPath = _modelFolder;
_modelConfig.tableModelDir = _modelFolder;
_modelConfig.tableCharDictionaryPath = _modelFolder;
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_gr.txt");
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "MPP_DET.pdmodel");
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "MPP_DET.pdiparams");
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "GR_REC.pdmodel");
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "GR_REC.pdiparams");
break;
}
case ANSCENTER::OCRLanguage::JAPANESE: {
_modelConfig.detectionModelDir = _modelFolder;
_modelConfig.recognizerModelDir = _modelFolder;
_modelConfig.clsModelDir = _modelFolder;
_modelConfig.layoutModelDir = _modelFolder;
_modelConfig.layourDictionaryPath = _modelFolder;
_modelConfig.tableModelDir = _modelFolder;
_modelConfig.tableCharDictionaryPath = _modelFolder;
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_jp.txt");
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "MPP_DET.pdmodel");
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "MPP_DET.pdiparams");
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "JP_REC.pdmodel");
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "JP_REC.pdiparams");
break;
}
case ANSCENTER::OCRLanguage::KOREAN: {
_modelConfig.detectionModelDir = _modelFolder;
_modelConfig.recognizerModelDir = _modelFolder;
_modelConfig.clsModelDir = _modelFolder;
_modelConfig.layoutModelDir = _modelFolder;
_modelConfig.layourDictionaryPath = _modelFolder;
_modelConfig.tableModelDir = _modelFolder;
_modelConfig.tableCharDictionaryPath = _modelFolder;
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_kr.txt");
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "MPP_DET.pdmodel");
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "MPP_DET.pdiparams");
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "KR_REC.pdmodel");
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "KR_REC.pdiparams");
break;
}
case ANSCENTER::OCRLanguage::CUSTOM: {
_modelConfig.detectionModelDir = _modelFolder;
_modelConfig.recognizerModelDir = _modelFolder;
_modelConfig.clsModelDir = _modelFolder;
_modelConfig.layoutModelDir = _modelFolder;
_modelConfig.layourDictionaryPath = _modelFolder;
_modelConfig.tableModelDir = _modelFolder;
_modelConfig.tableCharDictionaryPath = _modelFolder;
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_ct.txt");
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "CT_DET.pdmodel");
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "CT_DET.pdiparams");
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "CT_REC.pdmodel");
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "CT_REC.pdiparams");
break;
}
default: {
_modelConfig.detectionModelDir = _modelFolder;
_modelConfig.recognizerModelDir = _modelFolder;
_modelConfig.clsModelDir = _modelFolder;
_modelConfig.layoutModelDir = _modelFolder;
_modelConfig.layourDictionaryPath = _modelFolder;
_modelConfig.tableModelDir = _modelFolder;
_modelConfig.tableCharDictionaryPath = _modelFolder;
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_ct.txt");
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "CT_DET.pdmodel");
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "CT_DET.pdiparams");
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "CT_REC.pdmodel");
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "CT_REC.pdiparams");
break;
}
}
// For now we do have _modelConfig and _modelFolder
if (!FileExist(_modelConfig.detectionModelFile)) {
this->_logger.LogFatal("ANSCPUOCR::Initialize", "Invalid detector model file", __FILE__, __LINE__);
_licenseValid = false;
return false;
}
if (!FileExist(_modelConfig.clsModelFile)) {
this->_logger.LogFatal("ANSCPUOCR::Initialize", "Invalid classifier model file", __FILE__, __LINE__);
_licenseValid = false;
return false;
}
if (!FileExist(_modelConfig.recognizerModelFile)) {
this->_logger.LogFatal("ANSCPUOCR::Initialize", "Invalid recognizer model file", __FILE__, __LINE__);
_licenseValid = false;
return false;
}
try {
_isInitialized = ppocr->Initialize(_modelConfig.detectionModelFile, _modelConfig.clsModelFile, _modelConfig.recognizerModelFile, _modelConfig.recogizerCharDictionaryPath);
return _isInitialized;
}
catch (const std::exception& e) {
_licenseValid = false;
this->_logger.LogFatal("ANSCPUOCR::Initialize", e.what(), __FILE__, __LINE__);
return false;
}
catch (...) {
_licenseValid = false;
this->_logger.LogFatal("ANSCPUOCR::Initialize", "Failed to create OCR objects", __FILE__, __LINE__);
return false;
}
}
catch (std::exception& e) {
// Handle any other exception that occurs during initialization
this->_logger.LogFatal("ANSCPUOCR::Initialize", e.what(), __FILE__, __LINE__);
_licenseValid = false;
return false;
}
}
std::vector<ANSCENTER::OCRObject> ANSCPUOCR::RunInference(const cv::Mat& input) {
std::vector<ANSCENTER::OCRObject> output;
if (input.empty()) return output;
if ((input.cols < 10) || (input.rows < 10)) return output;
return RunInference(input, "OCRCPUCAM");
}
std::vector<ANSCENTER::OCRObject> ANSCPUOCR::RunInference(const cv::Mat& input, const std::string& cameraId) {
std::lock_guard<std::mutex> lock(_mutex);
// Early validation
if (!_licenseValid) {
this->_logger.LogError("ANSCPUOCR::RunInference", "Invalid License", __FILE__, __LINE__);
return {};
}
if (!_isInitialized) {
this->_logger.LogError("ANSCPUOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
return {};
}
if (input.empty() || input.cols < 10 || input.rows < 10) {
this->_logger.LogError("ANSCPUOCR::RunInference", "Input image is invalid or too small", __FILE__, __LINE__);
return {};
}
if (!ppocr) {
this->_logger.LogFatal("ANSCPUOCR::RunInference", "PPOCR instance is null", __FILE__, __LINE__);
return {};
}
try {
// Convert grayscale to BGR if necessary using reusable buffer
const cv::Mat* imPtr;
if (input.channels() == 1) {
cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR);
imPtr = &this->_frameBuffer;
}
else {
imPtr = &input;
}
const cv::Mat& im = *imPtr;
// Run OCR
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(im);
// Build results
std::vector<ANSCENTER::OCRObject> OCRObjects;
OCRObjects.reserve(res_ocr.size());
const int imgWidth = im.cols;
const int imgHeight = im.rows;
for (const auto& ocr_result : res_ocr) {
if (ocr_result.box.size() != 4) {
this->_logger.LogError("ANSCPUOCR::RunInference", "Invalid OCR box size", __FILE__, __LINE__);
continue;
}
// Extract corner points
const int x0 = static_cast<int>(ocr_result.box[0][0]);
const int y0 = static_cast<int>(ocr_result.box[0][1]);
const int x1 = static_cast<int>(ocr_result.box[1][0]);
const int y2 = static_cast<int>(ocr_result.box[2][1]);
// Calculate bounding box
const int x = std::max(0, x0);
const int y = std::max(0, y0);
int width = x1 - x0;
int height = y2 - static_cast<int>(ocr_result.box[1][1]);
// Clamp to image bounds
width = std::max(1, std::min(imgWidth - x, width));
height = std::max(1, std::min(imgHeight - y, height));
// Skip invalid boxes
if (width <= 1 || height <= 1) {
this->_logger.LogError("ANSCPUOCR::RunInference", "Invalid bounding box dimension", __FILE__, __LINE__);
continue;
}
ANSCENTER::OCRObject ocrObject;
ocrObject.box = cv::Rect(x, y, width, height);
ocrObject.classId = ocr_result.cls_label;
ocrObject.confidence = ocr_result.score;
ocrObject.className = ocr_result.text;
ocrObject.extraInfo = "cls label: " + std::to_string(ocr_result.cls_label)
+ "; cls score: " + std::to_string(ocr_result.cls_score);
ocrObject.cameraId = cameraId;
OCRObjects.push_back(std::move(ocrObject));
}
return OCRObjects;
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSCPUOCR::RunInference", e.what(), __FILE__, __LINE__);
}
catch (...) {
this->_logger.LogFatal("ANSCPUOCR::RunInference", "Unknown exception occurred", __FILE__, __LINE__);
}
return {};
}
std::vector<ANSCENTER::OCRObject> ANSCPUOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox) {
std::lock_guard<std::mutex> lock(_mutex);
// Early validation
if (!_licenseValid) {
this->_logger.LogError("ANSCPUOCR::RunInference", "Invalid License", __FILE__, __LINE__);
return {};
}
if (!_isInitialized) {
this->_logger.LogError("ANSCPUOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
return {};
}
if (input.empty()) {
this->_logger.LogError("ANSCPUOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
return {};
}
if (input.cols < 10 || input.rows < 10) {
return {};
}
try {
// Convert grayscale to BGR if necessary using reusable buffer
const cv::Mat* framePtr;
if (input.channels() == 1) {
cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR);
framePtr = &this->_frameBuffer;
}
else {
framePtr = &input; // No clone needed - we only read from it
}
const cv::Mat& frame = *framePtr;
const int fWidth = frame.cols;
const int fHeight = frame.rows;
std::vector<ANSCENTER::OCRObject> OCRObjects;
if (!Bbox.empty()) {
// Process each bounding box region
OCRObjects.reserve(Bbox.size());
for (const auto& bbox : Bbox) {
const int x1 = std::max(0, bbox.x);
const int y1 = std::max(0, bbox.y);
const int width = std::min(fWidth - x1, bbox.width);
const int height = std::min(fHeight - y1, bbox.height);
if (width < 5 || height < 5) {
continue;
}
// Get cropped region (no copy, just ROI)
cv::Mat croppedObject = frame(cv::Rect(x1, y1, width, height));
// Run inference on cropped region
std::vector<ANSCENTER::OCRObject> OCRTempObjects = RunInference(croppedObject);
for (auto& obj : OCRTempObjects) {
// Adjust coordinates to original image space
obj.box.x = std::max(0, std::min(fWidth - obj.box.width, obj.box.x + x1));
obj.box.y = std::max(0, std::min(fHeight - obj.box.height, obj.box.y + y1));
obj.box.width = std::min(fWidth - obj.box.x, obj.box.width);
obj.box.height = std::min(fHeight - obj.box.y, obj.box.height);
OCRObjects.push_back(std::move(obj));
}
}
}
else {
// No bounding boxes - run OCR on full image
if (!ppocr) {
this->_logger.LogFatal("ANSCPUOCR::RunInference", "PPOCR instance is null", __FILE__, __LINE__);
return {};
}
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(frame);
OCRObjects.reserve(res_ocr.size());
for (const auto& ocr_result : res_ocr) {
if (ocr_result.box.size() < 4) {
continue;
}
// Extract bounding box from corner points
const int x = static_cast<int>(ocr_result.box[0][0]);
const int y = static_cast<int>(ocr_result.box[0][1]);
int width = static_cast<int>(ocr_result.box[1][0]) - x;
int height = static_cast<int>(ocr_result.box[2][1]) - static_cast<int>(ocr_result.box[1][1]);
// Clamp to image bounds
const int clampedX = std::max(0, x);
const int clampedY = std::max(0, y);
width = std::min(fWidth - clampedX, width);
height = std::min(fHeight - clampedY, height);
ANSCENTER::OCRObject ocrObject;
ocrObject.box = cv::Rect(clampedX, clampedY, width, height);
ocrObject.classId = ocr_result.cls_label;
ocrObject.confidence = ocr_result.score;
ocrObject.className = ocr_result.text;
ocrObject.extraInfo = "cls label:" + std::to_string(ocr_result.cls_label) +
";cls score:" + std::to_string(ocr_result.cls_score);
OCRObjects.push_back(std::move(ocrObject));
}
}
return OCRObjects;
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSCPUOCR::RunInference", e.what(), __FILE__, __LINE__);
return {};
}
}
std::vector<ANSCENTER::OCRObject> ANSCPUOCR::RunInference(const cv::Mat& input,
const std::vector<cv::Rect>& Bbox,
const std::string& cameraId)
{
std::lock_guard<std::mutex> lock(_mutex);
// Early validation
if (!_licenseValid) {
this->_logger.LogError("ANSCPUOCR::RunInference", "Invalid License", __FILE__, __LINE__);
return {};
}
if (!_isInitialized) {
this->_logger.LogError("ANSCPUOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
return {};
}
if (input.empty()) {
this->_logger.LogError("ANSCPUOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
return {};
}
if (input.cols < 10 || input.rows < 10) {
return {};
}
try {
// Convert grayscale to BGR if necessary using reusable buffer
const cv::Mat* framePtr;
if (input.channels() == 1) {
cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR);
framePtr = &this->_frameBuffer;
}
else {
framePtr = &input; // No clone needed - we only read from it
}
const cv::Mat& frame = *framePtr;
const int fWidth = frame.cols;
const int fHeight = frame.rows;
std::vector<ANSCENTER::OCRObject> OCRObjects;
if (!Bbox.empty()) {
// Process each bounding box region
OCRObjects.reserve(Bbox.size());
for (const auto& bbox : Bbox) {
const int x1 = std::max(0, bbox.x);
const int y1 = std::max(0, bbox.y);
const int width = std::min(fWidth - x1, bbox.width);
const int height = std::min(fHeight - y1, bbox.height);
if (width < 5 || height < 5) {
continue;
}
// Get cropped region (ROI, no copy)
cv::Mat croppedObject = frame(cv::Rect(x1, y1, width, height));
// Run inference on cropped region
std::vector<ANSCENTER::OCRObject> OCRTempObjects = RunInference(croppedObject);
for (auto& obj : OCRTempObjects) {
// Adjust coordinates to original image space
obj.box.x = std::max(0, std::min(fWidth - obj.box.width, obj.box.x + x1));
obj.box.y = std::max(0, std::min(fHeight - obj.box.height, obj.box.y + y1));
obj.box.width = std::min(fWidth - obj.box.x, obj.box.width);
obj.box.height = std::min(fHeight - obj.box.y, obj.box.height);
obj.cameraId = cameraId;
OCRObjects.push_back(std::move(obj));
}
}
}
else {
// No bounding boxes - run OCR on full image
if (!ppocr) {
this->_logger.LogFatal("ANSCPUOCR::RunInference", "PPOCR instance is null", __FILE__, __LINE__);
return {};
}
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(frame);
OCRObjects.reserve(res_ocr.size());
for (const auto& ocr_result : res_ocr) {
if (ocr_result.box.size() < 4) {
continue;
}
// Extract bounding box from corner points
const int x = static_cast<int>(ocr_result.box[0][0]);
const int y = static_cast<int>(ocr_result.box[0][1]);
int width = static_cast<int>(ocr_result.box[1][0]) - x;
int height = static_cast<int>(ocr_result.box[2][1]) - static_cast<int>(ocr_result.box[1][1]);
// Clamp to image bounds
const int clampedX = std::max(0, x);
const int clampedY = std::max(0, y);
width = std::min(fWidth - clampedX, width);
height = std::min(fHeight - clampedY, height);
ANSCENTER::OCRObject ocrObject;
ocrObject.box = cv::Rect(clampedX, clampedY, width, height);
ocrObject.classId = ocr_result.cls_label;
ocrObject.confidence = ocr_result.score;
ocrObject.className = ocr_result.text;
ocrObject.extraInfo = "cls label:" + std::to_string(ocr_result.cls_label) +
";cls score:" + std::to_string(ocr_result.cls_score);
ocrObject.cameraId = cameraId;
OCRObjects.push_back(std::move(ocrObject));
}
}
return OCRObjects;
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSCPUOCR::RunInference", e.what(), __FILE__, __LINE__);
return {};
}
}
ANSCPUOCR::~ANSCPUOCR() {
try {
Destroy();
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSCPUOCR::~ANSCPUOCR()", e.what(), __FILE__, __LINE__);
}
this->ANSOCRBase::~ANSOCRBase();
}
bool ANSCPUOCR::Destroy() {
try {
if (ppocr) ppocr.reset();
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSCPUOCR::Destroy", e.what(), __FILE__, __LINE__);
return false;
}
}
}