Refactor project structure
This commit is contained in:
575
modules/ANSOCR/ANSCpuOCR.cpp
Normal file
575
modules/ANSOCR/ANSCpuOCR.cpp
Normal file
@@ -0,0 +1,575 @@
|
||||
#include "ANSCpuOCR.h"
|
||||
#include "Utility.h"
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <omp.h>
|
||||
#include <include/paddleocr_utility.h>
|
||||
|
||||
|
||||
namespace ANSCENTER {
|
||||
|
||||
bool ANSCPUOCR::Initialize(const std::string& licenseKey, OCRModelConfig modelConfig,
|
||||
const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) {
|
||||
try
|
||||
{
|
||||
bool result = ANSOCRBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, engineMode);
|
||||
if (!result) return false;
|
||||
|
||||
//Override the paddleocrv3 for openvino only
|
||||
switch (_modelConfig.ocrLanguage) {
|
||||
case ANSCENTER::OCRLanguage::ENGLISH: {
|
||||
_modelConfig.detectionModelDir = _modelFolder;
|
||||
_modelConfig.recognizerModelDir = _modelFolder;
|
||||
_modelConfig.clsModelDir = _modelFolder;
|
||||
_modelConfig.layoutModelDir = _modelFolder;
|
||||
_modelConfig.layourDictionaryPath = _modelFolder;
|
||||
_modelConfig.tableModelDir = _modelFolder;
|
||||
_modelConfig.tableCharDictionaryPath = _modelFolder;
|
||||
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_en.txt");
|
||||
|
||||
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "EN_DET.pdmodel");
|
||||
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "EN_DET.pdiparams");
|
||||
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
|
||||
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
|
||||
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "ENV4_REC.pdmodel");
|
||||
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "ENV4_REC.pdiparams");
|
||||
|
||||
break;
|
||||
}
|
||||
case ANSCENTER::OCRLanguage::CHINESE: {
|
||||
_modelConfig.detectionModelDir = _modelFolder;
|
||||
_modelConfig.recognizerModelDir = _modelFolder;
|
||||
_modelConfig.clsModelDir = _modelFolder;
|
||||
_modelConfig.layoutModelDir = _modelFolder;
|
||||
_modelConfig.layourDictionaryPath = _modelFolder;
|
||||
_modelConfig.tableModelDir = _modelFolder;
|
||||
_modelConfig.tableCharDictionaryPath = _modelFolder;
|
||||
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_ch.txt");
|
||||
|
||||
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "CHV4_DET.pdmodel");
|
||||
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "CHV4_DET.pdiparams");
|
||||
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
|
||||
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
|
||||
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "CHV4_REC.pdmodel");
|
||||
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "CHV4_REC.pdiparams");
|
||||
break;
|
||||
}
|
||||
case ANSCENTER::OCRLanguage::FRENCH: {
|
||||
_modelConfig.detectionModelDir = _modelFolder;
|
||||
_modelConfig.recognizerModelDir = _modelFolder;
|
||||
_modelConfig.clsModelDir = _modelFolder;
|
||||
_modelConfig.layoutModelDir = _modelFolder;
|
||||
_modelConfig.layourDictionaryPath = _modelFolder;
|
||||
_modelConfig.tableModelDir = _modelFolder;
|
||||
_modelConfig.tableCharDictionaryPath = _modelFolder;
|
||||
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_fr.txt");
|
||||
|
||||
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "MPP_DET.pdmodel");
|
||||
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "MPP_DET.pdiparams");
|
||||
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
|
||||
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
|
||||
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "FR_REC.pdmodel");
|
||||
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "FR_REC.pdiparams");
|
||||
break;
|
||||
}
|
||||
case ANSCENTER::OCRLanguage::GERMANY: {
|
||||
_modelConfig.detectionModelDir = _modelFolder;
|
||||
_modelConfig.recognizerModelDir = _modelFolder;
|
||||
_modelConfig.clsModelDir = _modelFolder;
|
||||
_modelConfig.layoutModelDir = _modelFolder;
|
||||
_modelConfig.layourDictionaryPath = _modelFolder;
|
||||
_modelConfig.tableModelDir = _modelFolder;
|
||||
_modelConfig.tableCharDictionaryPath = _modelFolder;
|
||||
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_gr.txt");
|
||||
|
||||
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "MPP_DET.pdmodel");
|
||||
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "MPP_DET.pdiparams");
|
||||
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
|
||||
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
|
||||
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "GR_REC.pdmodel");
|
||||
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "GR_REC.pdiparams");
|
||||
break;
|
||||
}
|
||||
case ANSCENTER::OCRLanguage::JAPANESE: {
|
||||
_modelConfig.detectionModelDir = _modelFolder;
|
||||
_modelConfig.recognizerModelDir = _modelFolder;
|
||||
_modelConfig.clsModelDir = _modelFolder;
|
||||
_modelConfig.layoutModelDir = _modelFolder;
|
||||
_modelConfig.layourDictionaryPath = _modelFolder;
|
||||
_modelConfig.tableModelDir = _modelFolder;
|
||||
_modelConfig.tableCharDictionaryPath = _modelFolder;
|
||||
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_jp.txt");
|
||||
|
||||
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "MPP_DET.pdmodel");
|
||||
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "MPP_DET.pdiparams");
|
||||
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
|
||||
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
|
||||
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "JP_REC.pdmodel");
|
||||
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "JP_REC.pdiparams");
|
||||
|
||||
break;
|
||||
}
|
||||
case ANSCENTER::OCRLanguage::KOREAN: {
|
||||
_modelConfig.detectionModelDir = _modelFolder;
|
||||
_modelConfig.recognizerModelDir = _modelFolder;
|
||||
_modelConfig.clsModelDir = _modelFolder;
|
||||
_modelConfig.layoutModelDir = _modelFolder;
|
||||
_modelConfig.layourDictionaryPath = _modelFolder;
|
||||
_modelConfig.tableModelDir = _modelFolder;
|
||||
_modelConfig.tableCharDictionaryPath = _modelFolder;
|
||||
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_kr.txt");
|
||||
|
||||
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "MPP_DET.pdmodel");
|
||||
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "MPP_DET.pdiparams");
|
||||
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
|
||||
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
|
||||
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "KR_REC.pdmodel");
|
||||
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "KR_REC.pdiparams");
|
||||
break;
|
||||
}
|
||||
case ANSCENTER::OCRLanguage::CUSTOM: {
|
||||
_modelConfig.detectionModelDir = _modelFolder;
|
||||
_modelConfig.recognizerModelDir = _modelFolder;
|
||||
_modelConfig.clsModelDir = _modelFolder;
|
||||
_modelConfig.layoutModelDir = _modelFolder;
|
||||
_modelConfig.layourDictionaryPath = _modelFolder;
|
||||
_modelConfig.tableModelDir = _modelFolder;
|
||||
_modelConfig.tableCharDictionaryPath = _modelFolder;
|
||||
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_ct.txt");
|
||||
|
||||
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "CT_DET.pdmodel");
|
||||
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "CT_DET.pdiparams");
|
||||
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
|
||||
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
|
||||
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "CT_REC.pdmodel");
|
||||
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "CT_REC.pdiparams");
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
_modelConfig.detectionModelDir = _modelFolder;
|
||||
_modelConfig.recognizerModelDir = _modelFolder;
|
||||
_modelConfig.clsModelDir = _modelFolder;
|
||||
_modelConfig.layoutModelDir = _modelFolder;
|
||||
_modelConfig.layourDictionaryPath = _modelFolder;
|
||||
_modelConfig.tableModelDir = _modelFolder;
|
||||
_modelConfig.tableCharDictionaryPath = _modelFolder;
|
||||
_modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_ct.txt");
|
||||
|
||||
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "CT_DET.pdmodel");
|
||||
_modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "CT_DET.pdiparams");
|
||||
_modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel");
|
||||
_modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams");
|
||||
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "CT_REC.pdmodel");
|
||||
_modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "CT_REC.pdiparams");
|
||||
break;
|
||||
}
|
||||
}
|
||||
// For now we do have _modelConfig and _modelFolder
|
||||
if (!FileExist(_modelConfig.detectionModelFile)) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::Initialize", "Invalid detector model file", __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!FileExist(_modelConfig.clsModelFile)) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::Initialize", "Invalid classifier model file", __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!FileExist(_modelConfig.recognizerModelFile)) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::Initialize", "Invalid recognizer model file", __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
|
||||
_isInitialized = ppocr->Initialize(_modelConfig.detectionModelFile, _modelConfig.clsModelFile, _modelConfig.recognizerModelFile, _modelConfig.recogizerCharDictionaryPath);
|
||||
return _isInitialized;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
_licenseValid = false;
|
||||
this->_logger.LogFatal("ANSCPUOCR::Initialize", e.what(), __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
catch (...) {
|
||||
_licenseValid = false;
|
||||
this->_logger.LogFatal("ANSCPUOCR::Initialize", "Failed to create OCR objects", __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
// Handle any other exception that occurs during initialization
|
||||
this->_logger.LogFatal("ANSCPUOCR::Initialize", e.what(), __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSCPUOCR::RunInference(const cv::Mat& input) {
|
||||
std::vector<ANSCENTER::OCRObject> output;
|
||||
if (input.empty()) return output;
|
||||
if ((input.cols < 10) || (input.rows < 10)) return output;
|
||||
return RunInference(input, "OCRCPUCAM");
|
||||
}
|
||||
std::vector<ANSCENTER::OCRObject> ANSCPUOCR::RunInference(const cv::Mat& input, const std::string& cameraId) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
|
||||
// Early validation
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (input.empty() || input.cols < 10 || input.rows < 10) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Input image is invalid or too small", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (!ppocr) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::RunInference", "PPOCR instance is null", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
try {
|
||||
// Convert grayscale to BGR if necessary using reusable buffer
|
||||
const cv::Mat* imPtr;
|
||||
if (input.channels() == 1) {
|
||||
cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR);
|
||||
imPtr = &this->_frameBuffer;
|
||||
}
|
||||
else {
|
||||
imPtr = &input;
|
||||
}
|
||||
const cv::Mat& im = *imPtr;
|
||||
|
||||
// Run OCR
|
||||
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(im);
|
||||
|
||||
// Build results
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
OCRObjects.reserve(res_ocr.size());
|
||||
|
||||
const int imgWidth = im.cols;
|
||||
const int imgHeight = im.rows;
|
||||
|
||||
for (const auto& ocr_result : res_ocr) {
|
||||
if (ocr_result.box.size() != 4) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Invalid OCR box size", __FILE__, __LINE__);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Extract corner points
|
||||
const int x0 = static_cast<int>(ocr_result.box[0][0]);
|
||||
const int y0 = static_cast<int>(ocr_result.box[0][1]);
|
||||
const int x1 = static_cast<int>(ocr_result.box[1][0]);
|
||||
const int y2 = static_cast<int>(ocr_result.box[2][1]);
|
||||
|
||||
// Calculate bounding box
|
||||
const int x = std::max(0, x0);
|
||||
const int y = std::max(0, y0);
|
||||
int width = x1 - x0;
|
||||
int height = y2 - static_cast<int>(ocr_result.box[1][1]);
|
||||
|
||||
// Clamp to image bounds
|
||||
width = std::max(1, std::min(imgWidth - x, width));
|
||||
height = std::max(1, std::min(imgHeight - y, height));
|
||||
|
||||
// Skip invalid boxes
|
||||
if (width <= 1 || height <= 1) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Invalid bounding box dimension", __FILE__, __LINE__);
|
||||
continue;
|
||||
}
|
||||
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box = cv::Rect(x, y, width, height);
|
||||
ocrObject.classId = ocr_result.cls_label;
|
||||
ocrObject.confidence = ocr_result.score;
|
||||
ocrObject.className = ocr_result.text;
|
||||
ocrObject.extraInfo = "cls label: " + std::to_string(ocr_result.cls_label)
|
||||
+ "; cls score: " + std::to_string(ocr_result.cls_score);
|
||||
ocrObject.cameraId = cameraId;
|
||||
|
||||
OCRObjects.push_back(std::move(ocrObject));
|
||||
}
|
||||
|
||||
return OCRObjects;
|
||||
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
}
|
||||
catch (...) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::RunInference", "Unknown exception occurred", __FILE__, __LINE__);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSCPUOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
|
||||
// Early validation
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (input.empty()) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (input.cols < 10 || input.rows < 10) {
|
||||
return {};
|
||||
}
|
||||
|
||||
try {
|
||||
// Convert grayscale to BGR if necessary using reusable buffer
|
||||
const cv::Mat* framePtr;
|
||||
if (input.channels() == 1) {
|
||||
cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR);
|
||||
framePtr = &this->_frameBuffer;
|
||||
}
|
||||
else {
|
||||
framePtr = &input; // No clone needed - we only read from it
|
||||
}
|
||||
const cv::Mat& frame = *framePtr;
|
||||
|
||||
const int fWidth = frame.cols;
|
||||
const int fHeight = frame.rows;
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
|
||||
if (!Bbox.empty()) {
|
||||
// Process each bounding box region
|
||||
OCRObjects.reserve(Bbox.size());
|
||||
|
||||
for (const auto& bbox : Bbox) {
|
||||
const int x1 = std::max(0, bbox.x);
|
||||
const int y1 = std::max(0, bbox.y);
|
||||
const int width = std::min(fWidth - x1, bbox.width);
|
||||
const int height = std::min(fHeight - y1, bbox.height);
|
||||
|
||||
if (width < 5 || height < 5) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get cropped region (no copy, just ROI)
|
||||
cv::Mat croppedObject = frame(cv::Rect(x1, y1, width, height));
|
||||
|
||||
// Run inference on cropped region
|
||||
std::vector<ANSCENTER::OCRObject> OCRTempObjects = RunInference(croppedObject);
|
||||
|
||||
for (auto& obj : OCRTempObjects) {
|
||||
// Adjust coordinates to original image space
|
||||
obj.box.x = std::max(0, std::min(fWidth - obj.box.width, obj.box.x + x1));
|
||||
obj.box.y = std::max(0, std::min(fHeight - obj.box.height, obj.box.y + y1));
|
||||
obj.box.width = std::min(fWidth - obj.box.x, obj.box.width);
|
||||
obj.box.height = std::min(fHeight - obj.box.y, obj.box.height);
|
||||
|
||||
OCRObjects.push_back(std::move(obj));
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// No bounding boxes - run OCR on full image
|
||||
if (!ppocr) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::RunInference", "PPOCR instance is null", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(frame);
|
||||
OCRObjects.reserve(res_ocr.size());
|
||||
|
||||
for (const auto& ocr_result : res_ocr) {
|
||||
if (ocr_result.box.size() < 4) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Extract bounding box from corner points
|
||||
const int x = static_cast<int>(ocr_result.box[0][0]);
|
||||
const int y = static_cast<int>(ocr_result.box[0][1]);
|
||||
int width = static_cast<int>(ocr_result.box[1][0]) - x;
|
||||
int height = static_cast<int>(ocr_result.box[2][1]) - static_cast<int>(ocr_result.box[1][1]);
|
||||
|
||||
// Clamp to image bounds
|
||||
const int clampedX = std::max(0, x);
|
||||
const int clampedY = std::max(0, y);
|
||||
width = std::min(fWidth - clampedX, width);
|
||||
height = std::min(fHeight - clampedY, height);
|
||||
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box = cv::Rect(clampedX, clampedY, width, height);
|
||||
ocrObject.classId = ocr_result.cls_label;
|
||||
ocrObject.confidence = ocr_result.score;
|
||||
ocrObject.className = ocr_result.text;
|
||||
ocrObject.extraInfo = "cls label:" + std::to_string(ocr_result.cls_label) +
|
||||
";cls score:" + std::to_string(ocr_result.cls_score);
|
||||
|
||||
OCRObjects.push_back(std::move(ocrObject));
|
||||
}
|
||||
}
|
||||
|
||||
return OCRObjects;
|
||||
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSCPUOCR::RunInference(const cv::Mat& input,
|
||||
const std::vector<cv::Rect>& Bbox,
|
||||
const std::string& cameraId)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
|
||||
// Early validation
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (input.empty()) {
|
||||
this->_logger.LogError("ANSCPUOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (input.cols < 10 || input.rows < 10) {
|
||||
return {};
|
||||
}
|
||||
|
||||
try {
|
||||
// Convert grayscale to BGR if necessary using reusable buffer
|
||||
const cv::Mat* framePtr;
|
||||
if (input.channels() == 1) {
|
||||
cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR);
|
||||
framePtr = &this->_frameBuffer;
|
||||
}
|
||||
else {
|
||||
framePtr = &input; // No clone needed - we only read from it
|
||||
}
|
||||
const cv::Mat& frame = *framePtr;
|
||||
|
||||
const int fWidth = frame.cols;
|
||||
const int fHeight = frame.rows;
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
|
||||
if (!Bbox.empty()) {
|
||||
// Process each bounding box region
|
||||
OCRObjects.reserve(Bbox.size());
|
||||
|
||||
for (const auto& bbox : Bbox) {
|
||||
const int x1 = std::max(0, bbox.x);
|
||||
const int y1 = std::max(0, bbox.y);
|
||||
const int width = std::min(fWidth - x1, bbox.width);
|
||||
const int height = std::min(fHeight - y1, bbox.height);
|
||||
|
||||
if (width < 5 || height < 5) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get cropped region (ROI, no copy)
|
||||
cv::Mat croppedObject = frame(cv::Rect(x1, y1, width, height));
|
||||
|
||||
// Run inference on cropped region
|
||||
std::vector<ANSCENTER::OCRObject> OCRTempObjects = RunInference(croppedObject);
|
||||
|
||||
for (auto& obj : OCRTempObjects) {
|
||||
// Adjust coordinates to original image space
|
||||
obj.box.x = std::max(0, std::min(fWidth - obj.box.width, obj.box.x + x1));
|
||||
obj.box.y = std::max(0, std::min(fHeight - obj.box.height, obj.box.y + y1));
|
||||
obj.box.width = std::min(fWidth - obj.box.x, obj.box.width);
|
||||
obj.box.height = std::min(fHeight - obj.box.y, obj.box.height);
|
||||
obj.cameraId = cameraId;
|
||||
|
||||
OCRObjects.push_back(std::move(obj));
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// No bounding boxes - run OCR on full image
|
||||
if (!ppocr) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::RunInference", "PPOCR instance is null", __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(frame);
|
||||
OCRObjects.reserve(res_ocr.size());
|
||||
|
||||
for (const auto& ocr_result : res_ocr) {
|
||||
if (ocr_result.box.size() < 4) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Extract bounding box from corner points
|
||||
const int x = static_cast<int>(ocr_result.box[0][0]);
|
||||
const int y = static_cast<int>(ocr_result.box[0][1]);
|
||||
int width = static_cast<int>(ocr_result.box[1][0]) - x;
|
||||
int height = static_cast<int>(ocr_result.box[2][1]) - static_cast<int>(ocr_result.box[1][1]);
|
||||
|
||||
// Clamp to image bounds
|
||||
const int clampedX = std::max(0, x);
|
||||
const int clampedY = std::max(0, y);
|
||||
width = std::min(fWidth - clampedX, width);
|
||||
height = std::min(fHeight - clampedY, height);
|
||||
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box = cv::Rect(clampedX, clampedY, width, height);
|
||||
ocrObject.classId = ocr_result.cls_label;
|
||||
ocrObject.confidence = ocr_result.score;
|
||||
ocrObject.className = ocr_result.text;
|
||||
ocrObject.extraInfo = "cls label:" + std::to_string(ocr_result.cls_label) +
|
||||
";cls score:" + std::to_string(ocr_result.cls_score);
|
||||
ocrObject.cameraId = cameraId;
|
||||
|
||||
OCRObjects.push_back(std::move(ocrObject));
|
||||
}
|
||||
}
|
||||
|
||||
return OCRObjects;
|
||||
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
ANSCPUOCR::~ANSCPUOCR() {
|
||||
try {
|
||||
Destroy();
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::~ANSCPUOCR()", e.what(), __FILE__, __LINE__);
|
||||
}
|
||||
this->ANSOCRBase::~ANSOCRBase();
|
||||
}
|
||||
bool ANSCPUOCR::Destroy() {
|
||||
try {
|
||||
if (ppocr) ppocr.reset();
|
||||
return true;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSCPUOCR::Destroy", e.what(), __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user