#include "ANSOdOCR.h" #include "Utility.h" #include #include #include "ANSYOLOOD.h" #include "ANSTENSORRTOD.h" namespace ANSCENTER { bool ANSODOCR::Initialize(std::string licenseKey, OCRModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) { try { bool result = ANSOCRBase::Init(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, engineMode); if (!result) return false; // Check if detector and ocr model files exist _modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "lpd.onnx"); _modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "ocr.onnx"); if (!FileExist(_modelConfig.detectionModelFile)) { this->_logger.LogFatal("ANSODOCR::Initialize", "Invalid detector model file", __FILE__, __LINE__); _licenseValid = false; return false; } if (!FileExist(_modelConfig.recognizerModelFile)) { this->_logger.LogFatal("ANSODOCR::Initialize", "Invalid OCR recognizer model file", __FILE__, __LINE__); _licenseValid = false; return false; } try { // Check the hardware type engineType = ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();// EngineType::CPU;// if (engineType == ANSCENTER::EngineType::NVIDIA_GPU) { // Use TensorRT YoloV11 this->_lpDetector = std::make_unique (); this->_ocrDetector = std::make_unique(); } else { // Use ONNX YoloV11 this->_lpDetector = std::make_unique (); this->_ocrDetector = std::make_unique(); } // Run initialization _ocrModelConfig.detectionScoreThreshold = modelConfig.clsThreshold; _lpdmodelConfig.detectionScoreThreshold = modelConfig.detectionBoxThreshold; if (_lpdmodelConfig.detectionScoreThreshold < 0.25)_lpdmodelConfig.detectionScoreThreshold = 0.25; if (_ocrModelConfig.detectionScoreThreshold < 0.25)_ocrModelConfig.detectionScoreThreshold = 0.25; _lpdmodelConfig.modelConfThreshold = 0.5; _lpdmodelConfig.modelMNSThreshold = 0.5; _ocrModelConfig.modelConfThreshold = 0.5; _ocrModelConfig.modelMNSThreshold = 0.5; if (!this->_lpDetector->LoadModelFromFolder("", _lpdmodelConfig, "lpd", "lpd.names", _modelFolder, _lpdLabels)) { return false; } if (!this->_ocrDetector->LoadModelFromFolder("", _ocrModelConfig, "ocr", "ocr.names", _modelFolder, _ocrLabels)) { return false; } return _isInitialized; } catch (...) { _licenseValid = false; this->_logger.LogFatal("ANSODOCR::Initialize", "Failed to create OCR objects", __FILE__, __LINE__); return false; } } catch (std::exception& e) { // Handle any other exception that occurs during initialization this->_logger.LogFatal("ANSODOCR::Initialize", e.what(), __FILE__, __LINE__); _licenseValid = false; return false; } } std::vector ANSODOCR::RunInference(const cv::Mat& input) { std::vector output; if (input.empty()) return output; if ((input.cols < 10) || (input.rows < 10)) return output; return RunInference(input, "OCRCPUCAM"); } std::vector ANSODOCR::RunInference(const cv::Mat& input, std::string cameraId) { std::lock_guard lock(_mutex); std::vector OCRObjects; OCRObjects.clear(); if (!_licenseValid) { this->_logger.LogError("ANSODOCR::RunInference", "Invalid License", __FILE__, __LINE__); return OCRObjects; } if (!_isInitialized) { this->_logger.LogError("ANSODOCR::RunInference", "Model is not initialized", __FILE__, __LINE__); return OCRObjects; } if (input.empty() || input.cols < 10 || input.rows < 10) { this->_logger.LogError("ANSODOCR::RunInference", "Input image is invalid or too small", __FILE__, __LINE__); return OCRObjects; } try { // Convert grayscale to BGR if necessary cv::Mat im; if (input.channels() == 1) { cv::cvtColor(input, im, cv::COLOR_GRAY2BGR); } else { im = input.clone(); } // Check ppocr instance if (!this->_ocrDetector) { this->_logger.LogFatal("ANSODOCR::RunInference", "PPOCR instance is null", __FILE__, __LINE__); return OCRObjects; } std::vector res_ocr = ppocr->ocr(im); for (size_t n = 0; n < res_ocr.size(); ++n) { if (res_ocr[n].box.size() != 4) { this->_logger.LogError("ANSODOCR::RunInference", "Invalid OCR box size", __FILE__, __LINE__); continue; } cv::Point rook_points[4]; for (size_t m = 0; m < 4; ++m) { rook_points[m] = cv::Point( static_cast(res_ocr[n].box[m][0]), static_cast(res_ocr[n].box[m][1]) ); } int x = std::max(0, rook_points[0].x); int y = std::max(0, rook_points[0].y); int width = rook_points[1].x - rook_points[0].x; int height = rook_points[2].y - rook_points[1].y; // Clamp width and height width = std::max(1, std::min(im.cols - x, width)); height = std::max(1, std::min(im.rows - y, height)); // Skip invalid boxes if (width <= 1 || height <= 1) { this->_logger.LogError("ANSODOCR::RunInference", "Invalid bounding box dimension", __FILE__, __LINE__); continue; } ANSCENTER::OCRObject ocrObject; ocrObject.box = cv::Rect(x, y, width, height); ocrObject.classId = res_ocr[n].cls_label; ocrObject.confidence = res_ocr[n].score; ocrObject.className = res_ocr[n].text; ocrObject.extraInfo = "cls label: " + std::to_string(res_ocr[n].cls_label) + "; cls score: " + std::to_string(res_ocr[n].cls_score); ocrObject.cameraId = cameraId; OCRObjects.push_back(ocrObject); } im.release(); } catch (const std::exception& e) { this->_logger.LogFatal("ANSODOCR::RunInference", e.what(), __FILE__, __LINE__); } catch (...) { this->_logger.LogFatal("ANSODOCR::RunInference", "Unknown exception occurred", __FILE__, __LINE__); } return OCRObjects; } std::vector ANSODOCR::RunInference(const cv::Mat& input, std::vector Bbox) { std::lock_guard lock(_mutex); std::vector OCRObjects; OCRObjects.clear(); if (!_licenseValid) { this->_logger.LogError("ANSODOCR::RunInference", "Invalid License", __FILE__, __LINE__); return OCRObjects; } if (!_isInitialized) { this->_logger.LogError("ANSODOCR::RunInference", "Model is not initialized", __FILE__, __LINE__); return OCRObjects; } try { if (input.empty()) { this->_logger.LogError("ANSODOCR::RunInference", "Input image is empty", __FILE__, __LINE__); return OCRObjects; } if ((input.cols < 10) || (input.rows < 10)) return OCRObjects; if (Bbox.size() > 0) { // Convert grayscale images to 3-channel BGR if needed cv::Mat frame; if (input.channels() == 1) { cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR); } else { frame = input.clone(); } int fWidth = frame.cols; int fHeight = frame.rows; for (std::vector::iterator it = Bbox.begin(); it != Bbox.end(); it++) { int x1, y1, x2, y2, width, height; x1 = (*it).x; y1 = (*it).y; x2 = (*it).x + (*it).width; y2 = (*it).y + (*it).height; x1 = std::max(0, x1); y1 = std::max(0, y1); width = std::min(fWidth - x1, (*it).width); height = std::min(fHeight - y1, (*it).height); if ((x1 >= 0) && (y1 >= 0) && (width >= 5) && (height >= 5)) { // Get cropped objects cv::Rect objectPos(x1, y1, width, height); cv::Mat croppedObject = frame(objectPos); std::vector OCRTempObjects; OCRTempObjects.clear(); OCRTempObjects = RunInference(croppedObject); if (OCRTempObjects.size() > 0) { for (int i = 0; i < OCRTempObjects.size(); i++) { ANSCENTER::OCRObject detectionObject; detectionObject = OCRTempObjects[i]; // Correct bounding box position as the croppedObject x,y will be orignial (0,0) detectionObject.box.x = OCRTempObjects[i].box.x + x1; detectionObject.box.y = OCRTempObjects[i].box.y + y1; detectionObject.box.width = OCRTempObjects[i].box.width; detectionObject.box.height = OCRTempObjects[i].box.height; detectionObject.box.x = std::max(0, detectionObject.box.x); detectionObject.box.y = std::max(0, detectionObject.box.y); detectionObject.box.width = std::min(fWidth - detectionObject.box.x, detectionObject.box.width); detectionObject.box.height = std::min(fHeight - detectionObject.box.y, detectionObject.box.height); OCRObjects.push_back(detectionObject); } } } } } else { // Convert grayscale images to 3-channel BGR if needed cv::Mat frame; if (input.channels() == 1) { cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR); } else { frame = input.clone(); } std::vector res_ocr = ppocr->ocr(frame); if (res_ocr.size() > 0) { for (int n = 0; n < res_ocr.size(); n++) { // number of detections cv::Point rook_points[4]; for (int m = 0; m < res_ocr[n].box.size(); m++) { rook_points[m] = cv::Point(int(res_ocr[n].box[m][0]), int(res_ocr[n].box[m][1])); } ANSCENTER::OCRObject ocrObject; ocrObject.box.x = rook_points[0].x; ocrObject.box.y = rook_points[0].y; ocrObject.box.width = rook_points[1].x - rook_points[0].x; ocrObject.box.height = rook_points[2].y - rook_points[1].y; ocrObject.box.x = std::max(0, ocrObject.box.x); ocrObject.box.y = std::max(0, ocrObject.box.y); ocrObject.box.width = std::min(frame.cols - ocrObject.box.x, ocrObject.box.width); ocrObject.box.height = std::min(frame.rows - ocrObject.box.y, ocrObject.box.height); ocrObject.classId = res_ocr[n].cls_label; ocrObject.confidence = res_ocr[n].score; ocrObject.className = res_ocr[n].text; std::string extraInformation = "cls label:" + std::to_string(res_ocr[n].cls_label) + ";" + "cls score:" + std::to_string(res_ocr[n].cls_score); ocrObject.extraInfo = extraInformation; // Add extra information for cls score cls label OCRObjects.push_back(ocrObject); } } frame.release(); return OCRObjects; } return OCRObjects; } catch (std::exception& e) { this->_logger.LogFatal("ANSODOCR::RunInference", e.what(), __FILE__, __LINE__); return OCRObjects; } } std::vector ANSODOCR::RunInference(const cv::Mat& input, std::vector Bbox, std::string cameraId) { std::lock_guard lock(_mutex); std::vector OCRObjects; OCRObjects.clear(); if (!_licenseValid) { this->_logger.LogError("ANSODOCR::RunInference", "Invalid License", __FILE__, __LINE__); return OCRObjects; } if (!_isInitialized) { this->_logger.LogError("ANSODOCR::RunInference", "Model is not initialized", __FILE__, __LINE__); return OCRObjects; } try { if (input.empty()) { this->_logger.LogError("ANSODOCR::RunInference", "Input image is empty", __FILE__, __LINE__); return OCRObjects; } if ((input.cols < 10) || (input.rows < 10)) return OCRObjects; if (Bbox.size() > 0) { // Convert grayscale images to 3-channel BGR if needed cv::Mat frame; if (input.channels() == 1) { cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR); } else { frame = input.clone(); } int fWidth = frame.cols; int fHeight = frame.rows; for (std::vector::iterator it = Bbox.begin(); it != Bbox.end(); it++) { int x1, y1, x2, y2, width, height; x1 = (*it).x; y1 = (*it).y; x2 = (*it).x + (*it).width; y2 = (*it).y + (*it).height; x1 = std::max(0, x1); y1 = std::max(0, y1); width = std::min(fWidth - x1, (*it).width); height = std::min(fHeight - y1, (*it).height); if ((x1 >= 0) && (y1 >= 0) && (width >= 5) && (height >= 5)) { // Get cropped objects cv::Rect objectPos(x1, y1, width, height); cv::Mat croppedObject = frame(objectPos); std::vector OCRTempObjects; OCRTempObjects.clear(); OCRTempObjects = RunInference(croppedObject); if (OCRTempObjects.size() > 0) { for (int i = 0; i < OCRTempObjects.size(); i++) { ANSCENTER::OCRObject detectionObject; detectionObject = OCRTempObjects[i]; // Correct bounding box position as the croppedObject x,y will be orignial (0,0) detectionObject.box.x = OCRTempObjects[i].box.x + x1; detectionObject.box.y = OCRTempObjects[i].box.y + y1; detectionObject.box.width = OCRTempObjects[i].box.width; detectionObject.box.height = OCRTempObjects[i].box.height; detectionObject.box.x = std::max(0, detectionObject.box.x); detectionObject.box.y = std::max(0, detectionObject.box.y); detectionObject.box.width = std::min(fWidth - detectionObject.box.x, detectionObject.box.width); detectionObject.box.height = std::min(fHeight - detectionObject.box.y, detectionObject.box.height); detectionObject.cameraId = cameraId; OCRObjects.push_back(detectionObject); } } } } } else { auto im = input.clone(); std::vector res_ocr = ppocr->ocr(im); if (res_ocr.size() > 0) { for (int n = 0; n < res_ocr.size(); n++) { // number of detections cv::Point rook_points[4]; for (int m = 0; m < res_ocr[n].box.size(); m++) { rook_points[m] = cv::Point(int(res_ocr[n].box[m][0]), int(res_ocr[n].box[m][1])); } ANSCENTER::OCRObject ocrObject; ocrObject.box.x = rook_points[0].x; ocrObject.box.y = rook_points[0].y; ocrObject.box.width = rook_points[1].x - rook_points[0].x; ocrObject.box.height = rook_points[2].y - rook_points[1].y; ocrObject.box.x = std::max(0, ocrObject.box.x); ocrObject.box.y = std::max(0, ocrObject.box.y); ocrObject.box.width = std::min(im.cols - ocrObject.box.x, ocrObject.box.width); ocrObject.box.height = std::min(im.rows - ocrObject.box.y, ocrObject.box.height); ocrObject.classId = res_ocr[n].cls_label; ocrObject.confidence = res_ocr[n].score; ocrObject.className = res_ocr[n].text; std::string extraInformation = "cls label:" + std::to_string(res_ocr[n].cls_label) + ";" + "cls score:" + std::to_string(res_ocr[n].cls_score); ocrObject.extraInfo = extraInformation; ocrObject.cameraId = cameraId; // Add extra information for cls score cls label OCRObjects.push_back(ocrObject); } } im.release(); return OCRObjects; } return OCRObjects; } catch (std::exception& e) { this->_logger.LogFatal("ANSODOCR::RunInference", e.what(), __FILE__, __LINE__); return OCRObjects; } } ANSODOCR::~ANSODOCR() { try { Destroy(); } catch (std::exception& e) { this->_logger.LogFatal("ANSODOCR::~ANSODOCR()", e.what(), __FILE__, __LINE__); } this->ANSOCRBase::~ANSOCRBase(); } bool ANSODOCR::Destroy() { try { if (this->_ocrDetector) this->_ocrDetector.reset(); if (this->_lpDetector) this->_lpDetector.reset(); return true; } catch (std::exception& e) { this->_logger.LogFatal("ANSODOCR::Destroy", e.what(), __FILE__, __LINE__); return false; } } }