#include "ANSCpuOCR.h" #include "Utility.h" #include #include #include namespace ANSCENTER { bool ANSCPUOCR::Initialize(const std::string& licenseKey, OCRModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) { try { bool result = ANSOCRBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, engineMode); if (!result) return false; //Override the paddleocrv3 for openvino only switch (_modelConfig.ocrLanguage) { case ANSCENTER::OCRLanguage::ENGLISH: { _modelConfig.detectionModelDir = _modelFolder; _modelConfig.recognizerModelDir = _modelFolder; _modelConfig.clsModelDir = _modelFolder; _modelConfig.layoutModelDir = _modelFolder; _modelConfig.layourDictionaryPath = _modelFolder; _modelConfig.tableModelDir = _modelFolder; _modelConfig.tableCharDictionaryPath = _modelFolder; _modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_en.txt"); _modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "EN_DET.pdmodel"); _modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "EN_DET.pdiparams"); _modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel"); _modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams"); _modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "ENV4_REC.pdmodel"); _modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "ENV4_REC.pdiparams"); break; } case ANSCENTER::OCRLanguage::CHINESE: { _modelConfig.detectionModelDir = _modelFolder; _modelConfig.recognizerModelDir = _modelFolder; _modelConfig.clsModelDir = _modelFolder; _modelConfig.layoutModelDir = _modelFolder; _modelConfig.layourDictionaryPath = _modelFolder; _modelConfig.tableModelDir = _modelFolder; _modelConfig.tableCharDictionaryPath = _modelFolder; _modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_ch.txt"); _modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "CHV4_DET.pdmodel"); _modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "CHV4_DET.pdiparams"); _modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel"); _modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams"); _modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "CHV4_REC.pdmodel"); _modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "CHV4_REC.pdiparams"); break; } case ANSCENTER::OCRLanguage::FRENCH: { _modelConfig.detectionModelDir = _modelFolder; _modelConfig.recognizerModelDir = _modelFolder; _modelConfig.clsModelDir = _modelFolder; _modelConfig.layoutModelDir = _modelFolder; _modelConfig.layourDictionaryPath = _modelFolder; _modelConfig.tableModelDir = _modelFolder; _modelConfig.tableCharDictionaryPath = _modelFolder; _modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_fr.txt"); _modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "MPP_DET.pdmodel"); _modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "MPP_DET.pdiparams"); _modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel"); _modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams"); _modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "FR_REC.pdmodel"); _modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "FR_REC.pdiparams"); break; } case ANSCENTER::OCRLanguage::GERMANY: { _modelConfig.detectionModelDir = _modelFolder; _modelConfig.recognizerModelDir = _modelFolder; _modelConfig.clsModelDir = _modelFolder; _modelConfig.layoutModelDir = _modelFolder; _modelConfig.layourDictionaryPath = _modelFolder; _modelConfig.tableModelDir = _modelFolder; _modelConfig.tableCharDictionaryPath = _modelFolder; _modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_gr.txt"); _modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "MPP_DET.pdmodel"); _modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "MPP_DET.pdiparams"); _modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel"); _modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams"); _modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "GR_REC.pdmodel"); _modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "GR_REC.pdiparams"); break; } case ANSCENTER::OCRLanguage::JAPANESE: { _modelConfig.detectionModelDir = _modelFolder; _modelConfig.recognizerModelDir = _modelFolder; _modelConfig.clsModelDir = _modelFolder; _modelConfig.layoutModelDir = _modelFolder; _modelConfig.layourDictionaryPath = _modelFolder; _modelConfig.tableModelDir = _modelFolder; _modelConfig.tableCharDictionaryPath = _modelFolder; _modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_jp.txt"); _modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "MPP_DET.pdmodel"); _modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "MPP_DET.pdiparams"); _modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel"); _modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams"); _modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "JP_REC.pdmodel"); _modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "JP_REC.pdiparams"); break; } case ANSCENTER::OCRLanguage::KOREAN: { _modelConfig.detectionModelDir = _modelFolder; _modelConfig.recognizerModelDir = _modelFolder; _modelConfig.clsModelDir = _modelFolder; _modelConfig.layoutModelDir = _modelFolder; _modelConfig.layourDictionaryPath = _modelFolder; _modelConfig.tableModelDir = _modelFolder; _modelConfig.tableCharDictionaryPath = _modelFolder; _modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_kr.txt"); _modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "MPP_DET.pdmodel"); _modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "MPP_DET.pdiparams"); _modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel"); _modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams"); _modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "KR_REC.pdmodel"); _modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "KR_REC.pdiparams"); break; } case ANSCENTER::OCRLanguage::CUSTOM: { _modelConfig.detectionModelDir = _modelFolder; _modelConfig.recognizerModelDir = _modelFolder; _modelConfig.clsModelDir = _modelFolder; _modelConfig.layoutModelDir = _modelFolder; _modelConfig.layourDictionaryPath = _modelFolder; _modelConfig.tableModelDir = _modelFolder; _modelConfig.tableCharDictionaryPath = _modelFolder; _modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_ct.txt"); _modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "CT_DET.pdmodel"); _modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "CT_DET.pdiparams"); _modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel"); _modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams"); _modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "CT_REC.pdmodel"); _modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "CT_REC.pdiparams"); break; } default: { _modelConfig.detectionModelDir = _modelFolder; _modelConfig.recognizerModelDir = _modelFolder; _modelConfig.clsModelDir = _modelFolder; _modelConfig.layoutModelDir = _modelFolder; _modelConfig.layourDictionaryPath = _modelFolder; _modelConfig.tableModelDir = _modelFolder; _modelConfig.tableCharDictionaryPath = _modelFolder; _modelConfig.recogizerCharDictionaryPath = CreateFilePath(_modelFolder, "dict_ct.txt"); _modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "CT_DET.pdmodel"); _modelConfig.detectionModelParam = CreateFilePath(_modelFolder, "CT_DET.pdiparams"); _modelConfig.clsModelFile = CreateFilePath(_modelFolder, "CH_CLS.pdmodel"); _modelConfig.clsModelParam = CreateFilePath(_modelFolder, "CH_CLS.pdiparams"); _modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "CT_REC.pdmodel"); _modelConfig.recognizerModelParam = CreateFilePath(_modelFolder, "CT_REC.pdiparams"); break; } } // For now we do have _modelConfig and _modelFolder if (!FileExist(_modelConfig.detectionModelFile)) { this->_logger.LogFatal("ANSCPUOCR::Initialize", "Invalid detector model file", __FILE__, __LINE__); _licenseValid = false; return false; } if (!FileExist(_modelConfig.clsModelFile)) { this->_logger.LogFatal("ANSCPUOCR::Initialize", "Invalid classifier model file", __FILE__, __LINE__); _licenseValid = false; return false; } if (!FileExist(_modelConfig.recognizerModelFile)) { this->_logger.LogFatal("ANSCPUOCR::Initialize", "Invalid recognizer model file", __FILE__, __LINE__); _licenseValid = false; return false; } try { _isInitialized = ppocr->Initialize(_modelConfig.detectionModelFile, _modelConfig.clsModelFile, _modelConfig.recognizerModelFile, _modelConfig.recogizerCharDictionaryPath); return _isInitialized; } catch (const std::exception& e) { _licenseValid = false; this->_logger.LogFatal("ANSCPUOCR::Initialize", e.what(), __FILE__, __LINE__); return false; } catch (...) { _licenseValid = false; this->_logger.LogFatal("ANSCPUOCR::Initialize", "Failed to create OCR objects", __FILE__, __LINE__); return false; } } catch (std::exception& e) { // Handle any other exception that occurs during initialization this->_logger.LogFatal("ANSCPUOCR::Initialize", e.what(), __FILE__, __LINE__); _licenseValid = false; return false; } } std::vector ANSCPUOCR::RunInference(const cv::Mat& input) { std::vector output; if (input.empty()) return output; if ((input.cols < 10) || (input.rows < 10)) return output; return RunInference(input, "OCRCPUCAM"); } std::vector ANSCPUOCR::RunInference(const cv::Mat& input, const std::string& cameraId) { std::lock_guard lock(_mutex); // Early validation if (!_licenseValid) { this->_logger.LogError("ANSCPUOCR::RunInference", "Invalid License", __FILE__, __LINE__); return {}; } if (!_isInitialized) { this->_logger.LogError("ANSCPUOCR::RunInference", "Model is not initialized", __FILE__, __LINE__); return {}; } if (input.empty() || input.cols < 10 || input.rows < 10) { this->_logger.LogError("ANSCPUOCR::RunInference", "Input image is invalid or too small", __FILE__, __LINE__); return {}; } if (!ppocr) { this->_logger.LogFatal("ANSCPUOCR::RunInference", "PPOCR instance is null", __FILE__, __LINE__); return {}; } try { // Convert grayscale to BGR if necessary using reusable buffer const cv::Mat* imPtr; if (input.channels() == 1) { cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR); imPtr = &this->_frameBuffer; } else { imPtr = &input; } const cv::Mat& im = *imPtr; // Run OCR std::vector res_ocr = ppocr->ocr(im); // Build results std::vector OCRObjects; OCRObjects.reserve(res_ocr.size()); const int imgWidth = im.cols; const int imgHeight = im.rows; for (const auto& ocr_result : res_ocr) { if (ocr_result.box.size() != 4) { this->_logger.LogError("ANSCPUOCR::RunInference", "Invalid OCR box size", __FILE__, __LINE__); continue; } // Extract corner points const int x0 = static_cast(ocr_result.box[0][0]); const int y0 = static_cast(ocr_result.box[0][1]); const int x1 = static_cast(ocr_result.box[1][0]); const int y2 = static_cast(ocr_result.box[2][1]); // Calculate bounding box const int x = std::max(0, x0); const int y = std::max(0, y0); int width = x1 - x0; int height = y2 - static_cast(ocr_result.box[1][1]); // Clamp to image bounds width = std::max(1, std::min(imgWidth - x, width)); height = std::max(1, std::min(imgHeight - y, height)); // Skip invalid boxes if (width <= 1 || height <= 1) { this->_logger.LogError("ANSCPUOCR::RunInference", "Invalid bounding box dimension", __FILE__, __LINE__); continue; } ANSCENTER::OCRObject ocrObject; ocrObject.box = cv::Rect(x, y, width, height); ocrObject.classId = ocr_result.cls_label; ocrObject.confidence = ocr_result.score; ocrObject.className = ocr_result.text; ocrObject.extraInfo = "cls label: " + std::to_string(ocr_result.cls_label) + "; cls score: " + std::to_string(ocr_result.cls_score); ocrObject.cameraId = cameraId; OCRObjects.push_back(std::move(ocrObject)); } return OCRObjects; } catch (const std::exception& e) { this->_logger.LogFatal("ANSCPUOCR::RunInference", e.what(), __FILE__, __LINE__); } catch (...) { this->_logger.LogFatal("ANSCPUOCR::RunInference", "Unknown exception occurred", __FILE__, __LINE__); } return {}; } std::vector ANSCPUOCR::RunInference(const cv::Mat& input, const std::vector& Bbox) { std::lock_guard lock(_mutex); // Early validation if (!_licenseValid) { this->_logger.LogError("ANSCPUOCR::RunInference", "Invalid License", __FILE__, __LINE__); return {}; } if (!_isInitialized) { this->_logger.LogError("ANSCPUOCR::RunInference", "Model is not initialized", __FILE__, __LINE__); return {}; } if (input.empty()) { this->_logger.LogError("ANSCPUOCR::RunInference", "Input image is empty", __FILE__, __LINE__); return {}; } if (input.cols < 10 || input.rows < 10) { return {}; } try { // Convert grayscale to BGR if necessary using reusable buffer const cv::Mat* framePtr; if (input.channels() == 1) { cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR); framePtr = &this->_frameBuffer; } else { framePtr = &input; // No clone needed - we only read from it } const cv::Mat& frame = *framePtr; const int fWidth = frame.cols; const int fHeight = frame.rows; std::vector OCRObjects; if (!Bbox.empty()) { // Process each bounding box region OCRObjects.reserve(Bbox.size()); for (const auto& bbox : Bbox) { const int x1 = std::max(0, bbox.x); const int y1 = std::max(0, bbox.y); const int width = std::min(fWidth - x1, bbox.width); const int height = std::min(fHeight - y1, bbox.height); if (width < 5 || height < 5) { continue; } // Get cropped region (no copy, just ROI) cv::Mat croppedObject = frame(cv::Rect(x1, y1, width, height)); // Run inference on cropped region std::vector OCRTempObjects = RunInference(croppedObject); for (auto& obj : OCRTempObjects) { // Adjust coordinates to original image space obj.box.x = std::max(0, std::min(fWidth - obj.box.width, obj.box.x + x1)); obj.box.y = std::max(0, std::min(fHeight - obj.box.height, obj.box.y + y1)); obj.box.width = std::min(fWidth - obj.box.x, obj.box.width); obj.box.height = std::min(fHeight - obj.box.y, obj.box.height); OCRObjects.push_back(std::move(obj)); } } } else { // No bounding boxes - run OCR on full image if (!ppocr) { this->_logger.LogFatal("ANSCPUOCR::RunInference", "PPOCR instance is null", __FILE__, __LINE__); return {}; } std::vector res_ocr = ppocr->ocr(frame); OCRObjects.reserve(res_ocr.size()); for (const auto& ocr_result : res_ocr) { if (ocr_result.box.size() < 4) { continue; } // Extract bounding box from corner points const int x = static_cast(ocr_result.box[0][0]); const int y = static_cast(ocr_result.box[0][1]); int width = static_cast(ocr_result.box[1][0]) - x; int height = static_cast(ocr_result.box[2][1]) - static_cast(ocr_result.box[1][1]); // Clamp to image bounds const int clampedX = std::max(0, x); const int clampedY = std::max(0, y); width = std::min(fWidth - clampedX, width); height = std::min(fHeight - clampedY, height); ANSCENTER::OCRObject ocrObject; ocrObject.box = cv::Rect(clampedX, clampedY, width, height); ocrObject.classId = ocr_result.cls_label; ocrObject.confidence = ocr_result.score; ocrObject.className = ocr_result.text; ocrObject.extraInfo = "cls label:" + std::to_string(ocr_result.cls_label) + ";cls score:" + std::to_string(ocr_result.cls_score); OCRObjects.push_back(std::move(ocrObject)); } } return OCRObjects; } catch (const std::exception& e) { this->_logger.LogFatal("ANSCPUOCR::RunInference", e.what(), __FILE__, __LINE__); return {}; } } std::vector ANSCPUOCR::RunInference(const cv::Mat& input, const std::vector& Bbox, const std::string& cameraId) { std::lock_guard lock(_mutex); // Early validation if (!_licenseValid) { this->_logger.LogError("ANSCPUOCR::RunInference", "Invalid License", __FILE__, __LINE__); return {}; } if (!_isInitialized) { this->_logger.LogError("ANSCPUOCR::RunInference", "Model is not initialized", __FILE__, __LINE__); return {}; } if (input.empty()) { this->_logger.LogError("ANSCPUOCR::RunInference", "Input image is empty", __FILE__, __LINE__); return {}; } if (input.cols < 10 || input.rows < 10) { return {}; } try { // Convert grayscale to BGR if necessary using reusable buffer const cv::Mat* framePtr; if (input.channels() == 1) { cv::cvtColor(input, this->_frameBuffer, cv::COLOR_GRAY2BGR); framePtr = &this->_frameBuffer; } else { framePtr = &input; // No clone needed - we only read from it } const cv::Mat& frame = *framePtr; const int fWidth = frame.cols; const int fHeight = frame.rows; std::vector OCRObjects; if (!Bbox.empty()) { // Process each bounding box region OCRObjects.reserve(Bbox.size()); for (const auto& bbox : Bbox) { const int x1 = std::max(0, bbox.x); const int y1 = std::max(0, bbox.y); const int width = std::min(fWidth - x1, bbox.width); const int height = std::min(fHeight - y1, bbox.height); if (width < 5 || height < 5) { continue; } // Get cropped region (ROI, no copy) cv::Mat croppedObject = frame(cv::Rect(x1, y1, width, height)); // Run inference on cropped region std::vector OCRTempObjects = RunInference(croppedObject); for (auto& obj : OCRTempObjects) { // Adjust coordinates to original image space obj.box.x = std::max(0, std::min(fWidth - obj.box.width, obj.box.x + x1)); obj.box.y = std::max(0, std::min(fHeight - obj.box.height, obj.box.y + y1)); obj.box.width = std::min(fWidth - obj.box.x, obj.box.width); obj.box.height = std::min(fHeight - obj.box.y, obj.box.height); obj.cameraId = cameraId; OCRObjects.push_back(std::move(obj)); } } } else { // No bounding boxes - run OCR on full image if (!ppocr) { this->_logger.LogFatal("ANSCPUOCR::RunInference", "PPOCR instance is null", __FILE__, __LINE__); return {}; } std::vector res_ocr = ppocr->ocr(frame); OCRObjects.reserve(res_ocr.size()); for (const auto& ocr_result : res_ocr) { if (ocr_result.box.size() < 4) { continue; } // Extract bounding box from corner points const int x = static_cast(ocr_result.box[0][0]); const int y = static_cast(ocr_result.box[0][1]); int width = static_cast(ocr_result.box[1][0]) - x; int height = static_cast(ocr_result.box[2][1]) - static_cast(ocr_result.box[1][1]); // Clamp to image bounds const int clampedX = std::max(0, x); const int clampedY = std::max(0, y); width = std::min(fWidth - clampedX, width); height = std::min(fHeight - clampedY, height); ANSCENTER::OCRObject ocrObject; ocrObject.box = cv::Rect(clampedX, clampedY, width, height); ocrObject.classId = ocr_result.cls_label; ocrObject.confidence = ocr_result.score; ocrObject.className = ocr_result.text; ocrObject.extraInfo = "cls label:" + std::to_string(ocr_result.cls_label) + ";cls score:" + std::to_string(ocr_result.cls_score); ocrObject.cameraId = cameraId; OCRObjects.push_back(std::move(ocrObject)); } } return OCRObjects; } catch (const std::exception& e) { this->_logger.LogFatal("ANSCPUOCR::RunInference", e.what(), __FILE__, __LINE__); return {}; } } ANSCPUOCR::~ANSCPUOCR() { try { Destroy(); } catch (std::exception& e) { this->_logger.LogFatal("ANSCPUOCR::~ANSCPUOCR()", e.what(), __FILE__, __LINE__); } this->ANSOCRBase::~ANSOCRBase(); } bool ANSCPUOCR::Destroy() { try { if (ppocr) ppocr.reset(); return true; } catch (std::exception& e) { this->_logger.LogFatal("ANSCPUOCR::Destroy", e.what(), __FILE__, __LINE__); return false; } } }