#include "ANSOCR.h" #include "Utility.h" #include #include namespace ANSCENTER { bool ANSOCR::Initialize(const std::string& licenseKey, OCRModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) { try { bool result = ANSOCRBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, engineMode); if (!result) return false; auto option = fastdeploy::RuntimeOption(); // Add default values to modelConfig if required. _modelConfig.precisionType = "fp32"; _modelConfig.gpuMemory = 4000; _modelConfig.limitType = "max"; _modelConfig.cpuThreads = 10; _modelConfig.tableModelMaxLengh = 488; _modelConfig.detectionScoreMode = "slow"; _modelConfig.ensureASCII = true; if (_modelConfig.limitSideLen <= 0) _modelConfig.limitSideLen = 960; if (_modelConfig.detectionDBThreshold <= 0) _modelConfig.detectionDBThreshold = 0.3; if (_modelConfig.detectionBoxThreshold <= 0) _modelConfig.detectionBoxThreshold = 0.6; if (_modelConfig.detectionDBUnclipRatio <= 0) _modelConfig.detectionDBUnclipRatio = 1.5; if (_modelConfig.clsThreshold <= 0) _modelConfig.clsThreshold = 0.9; if (_modelConfig.clsBatchNumber <= 0) _modelConfig.clsBatchNumber = 1; if (_modelConfig.recognizerBatchNum <= 0) _modelConfig.recognizerBatchNum = 6; if (_modelConfig.recoginzerImageHeight <= 0) _modelConfig.recoginzerImageHeight = 48; if (_modelConfig.recoginzerImageWidth <= 0) _modelConfig.recoginzerImageWidth = 320; if (_modelConfig.layoutScoreThreshold <= 0) _modelConfig.layoutScoreThreshold = 0.5; if (_modelConfig.layoutNMSThreshold <= 0) _modelConfig.layoutNMSThreshold = 0.5; if (_modelConfig.tableBatchNum <= 0) _modelConfig.tableBatchNum = 1; if (_modelConfig.cpuThreads <= 0) _modelConfig.cpuThreads = 10; // Handle different engine modes // Use CPU _modelConfig.userGPU = false; _modelConfig.useTensorRT = false; option.UseCpu(); option.UseOpenVINOBackend(); auto det_option = option; auto cls_option = option; auto rec_option = option; if (!FileExist(_modelConfig.detectionModelFile)) { this->_logger.LogFatal("ANSOCR::Initialize", "Invalid detector model file", __FILE__, __LINE__); _licenseValid = false; return false; } if (!FileExist(_modelConfig.clsModelFile)) { this->_logger.LogFatal("ANSOCR::Initialize", "Invalid classifier model file", __FILE__, __LINE__); _licenseValid = false; return false; } if (!FileExist(_modelConfig.recognizerModelFile)) { this->_logger.LogFatal("ANSOCR::Initialize", "Invalid recognizer model file", __FILE__, __LINE__); _licenseValid = false; return false; } // Create FastDeploy Model Instances try { classifier_ = fastdeploy::vision::ocr::Classifier(_modelConfig.clsModelFile, _modelConfig.clsModelParam, cls_option); detector_ = fastdeploy::vision::ocr::DBDetector(_modelConfig.detectionModelFile, _modelConfig.detectionModelParam, det_option); recognizer_ = fastdeploy::vision::ocr::Recognizer(_modelConfig.recognizerModelFile, _modelConfig.recognizerModelParam, _modelConfig.recogizerCharDictionaryPath, rec_option); detector_.GetPreprocessor().SetMaxSideLen(_modelConfig.limitSideLen); detector_.GetPostprocessor().SetDetDBThresh(_modelConfig.detectionDBThreshold); detector_.GetPostprocessor().SetDetDBBoxThresh(_modelConfig.detectionBoxThreshold); detector_.GetPostprocessor().SetDetDBUnclipRatio(_modelConfig.detectionDBUnclipRatio); detector_.GetPostprocessor().SetDetDBScoreMode(_modelConfig.detectionScoreMode); if (_modelConfig.useDilation) detector_.GetPostprocessor().SetUseDilation(0); else detector_.GetPostprocessor().SetUseDilation(1); classifier_.GetPostprocessor().SetClsThresh(_modelConfig.clsThreshold); if (detector_.Initialized() && classifier_.Initialized() && recognizer_.Initialized()) { this->ppOCR = std::make_unique(&detector_, &classifier_, &recognizer_); this->ppOCR->SetClsBatchSize(_modelConfig.clsBatchNumber); this->ppOCR->SetRecBatchSize(_modelConfig.recognizerBatchNum); _isInitialized = this->ppOCR->Initialized(); return _isInitialized; } else { this->_logger.LogFatal("ANSOCR::Initialize", "Failed to create OCR objects", __FILE__, __LINE__); return false; } } catch (...) { _licenseValid = false; this->_logger.LogFatal("ANSOCR::Initialize", "Failed to create OCR objects", __FILE__, __LINE__); return false; } } catch (std::exception& e) { // Handle any other exception that occurs during initialization this->_logger.LogFatal("ANSOCR::Initialize", e.what(), __FILE__, __LINE__); _licenseValid = false; return false; } } std::vector ANSOCR::RunInference(const cv::Mat& input) { std::vector OCRObjects; if (input.empty()) return OCRObjects; if ((input.cols < 10) || (input.rows < 10)) return OCRObjects; return RunInference(input, "OCRCam"); } std::vector ANSOCR::RunInference(const cv::Mat& input, const std::string& cameraId) { // No coarse _mutex — ppOCR->Predict() / engine has its own internal lock std::vector OCRObjects; OCRObjects.clear(); if (!_licenseValid) { this->_logger.LogError("ANSOCR::RunInference", "Invalid License", __FILE__, __LINE__); return OCRObjects; } if (!_isInitialized) { this->_logger.LogError("ANSOCR::RunInference", "Model is not initialized", __FILE__, __LINE__); return OCRObjects; } try { if (input.empty()) { this->_logger.LogError("ANSOCR::RunInference", "Input image is empty", __FILE__, __LINE__); return OCRObjects; } if ((input.cols < 10) || (input.rows < 10)) return OCRObjects; auto im = input.clone(); fastdeploy::vision::OCRResult res_ocr; this->ppOCR->Predict(&im, &res_ocr); if (res_ocr.boxes.size() > 0) { for (int n = 0; n < res_ocr.boxes.size(); n++) { // number of detections cv::Point rook_points[4]; rook_points[0] = cv::Point(static_cast(res_ocr.boxes[n][0]), static_cast(res_ocr.boxes[n][1])); rook_points[1] = cv::Point(static_cast(res_ocr.boxes[n][2]), static_cast(res_ocr.boxes[n][3])); rook_points[2] = cv::Point(static_cast(res_ocr.boxes[n][4]), static_cast(res_ocr.boxes[n][5])); rook_points[3] = cv::Point(static_cast(res_ocr.boxes[n][6]), static_cast(res_ocr.boxes[n][6])); ANSCENTER::OCRObject ocrObject; ocrObject.box.x = rook_points[0].x; ocrObject.box.y = rook_points[0].y; ocrObject.box.width = rook_points[1].x - rook_points[0].x; ocrObject.box.height = rook_points[2].y - rook_points[1].y; ocrObject.polygon = ANSOCRUtility::RectToNormalizedPolygon(ocrObject.box, input.cols, input.rows); ocrObject.classId = res_ocr.cls_labels[n]; ocrObject.confidence = res_ocr.rec_scores[n]; ocrObject.className = res_ocr.text[n]; std::string extraInformation = "cls label:" + std::to_string(res_ocr.cls_labels[n]) + ";" + "cls score:" + std::to_string(res_ocr.cls_scores[n]); ocrObject.extraInfo = extraInformation; ocrObject.cameraId = cameraId; // Add extra information for cls score cls label OCRObjects.push_back(ocrObject); } } im.release(); return OCRObjects; } catch (std::exception& e) { this->_logger.LogFatal("ANSOCR::RunInference", e.what(), __FILE__, __LINE__); return OCRObjects; } } std::vector ANSOCR::RunInference(const cv::Mat& input, const std::vector& Bbox) { // No coarse _mutex — ppOCR->Predict() / engine has its own internal lock std::vector OCRObjects; OCRObjects.clear(); if (!_licenseValid) { this->_logger.LogError("ANSOCR::RunInference", "Invalid License", __FILE__, __LINE__); return OCRObjects; } if (!_isInitialized) { this->_logger.LogError("ANSOCR::RunInference", "Model is not initialized", __FILE__, __LINE__); return OCRObjects; } try { if (Bbox.size() > 0) { if (input.empty()) { this->_logger.LogError("ANSOCR::RunInference", "Input image is empty", __FILE__, __LINE__); return OCRObjects; } if ((input.cols < 10) || (input.rows < 10)) return OCRObjects; cv::Mat frame = input.clone(); int fWidth = frame.cols; int fHeight = frame.rows; for (std::vector::iterator it = Bbox.begin(); it != Bbox.end(); it++) { int x1, y1, x2, y2; x1 = (*it).x; y1 = (*it).y; x2 = (*it).x + (*it).width; y2 = (*it).y + (*it).height; if ((x1 >= 0) && (y1 >= 0) && (x2 <= fWidth) && (y2 <= fHeight)) { // Get cropped objects cv::Rect objectPos(cv::Point(x1, y1), cv::Point(x2, y2)); cv::Mat croppedObject = frame(objectPos); std::vector OCRTempObjects; OCRTempObjects.clear(); OCRTempObjects = RunInference(croppedObject); if (OCRTempObjects.size() > 0) { for (int i = 0; i < OCRTempObjects.size(); i++) { ANSCENTER::OCRObject detectionObject; detectionObject = OCRTempObjects[i]; // Correct bounding box position as the croppedObject x,y will be orignial (0,0) detectionObject.box.x = OCRTempObjects[i].box.x + x1; detectionObject.box.y = OCRTempObjects[i].box.y + y1; detectionObject.box.width = OCRTempObjects[i].box.width; detectionObject.box.height = OCRTempObjects[i].box.height; detectionObject.polygon = ANSOCRUtility::RectToNormalizedPolygon(detectionObject.box, input.cols, input.rows); detectionObject.cameraId = "OCRCAM"; OCRObjects.push_back(detectionObject); } } } } } else { auto im = input.clone(); fastdeploy::vision::OCRResult res_ocr; this->ppOCR->Predict(&im, &res_ocr); if (res_ocr.boxes.size() > 0) { for (int n = 0; n < res_ocr.boxes.size(); n++) { // number of detections cv::Point rook_points[4]; rook_points[0] = cv::Point(static_cast(res_ocr.boxes[n][0]), static_cast(res_ocr.boxes[n][1])); rook_points[1] = cv::Point(static_cast(res_ocr.boxes[n][2]), static_cast(res_ocr.boxes[n][3])); rook_points[2] = cv::Point(static_cast(res_ocr.boxes[n][4]), static_cast(res_ocr.boxes[n][5])); rook_points[3] = cv::Point(static_cast(res_ocr.boxes[n][6]), static_cast(res_ocr.boxes[n][6])); ANSCENTER::OCRObject ocrObject; ocrObject.box.x = rook_points[0].x; ocrObject.box.y = rook_points[0].y; ocrObject.box.width = rook_points[1].x - rook_points[0].x; ocrObject.box.height = rook_points[2].y - rook_points[1].y; ocrObject.polygon = ANSOCRUtility::RectToNormalizedPolygon(ocrObject.box, input.cols, input.rows); ocrObject.classId = res_ocr.cls_labels[n]; ocrObject.confidence = res_ocr.rec_scores[n]; ocrObject.className = res_ocr.text[n]; std::string extraInformation = "cls label:" + std::to_string(res_ocr.cls_labels[n]) + ";" + "cls score:" + std::to_string(res_ocr.cls_scores[n]); ocrObject.extraInfo = extraInformation; ocrObject.cameraId = "OCRCAM"; OCRObjects.push_back(ocrObject); } } im.release(); return OCRObjects; } return OCRObjects; } catch (std::exception& e) { this->_logger.LogFatal("ANSOCR::RunInference", e.what(), __FILE__, __LINE__); return OCRObjects; } } std::vector ANSOCR::RunInference(const cv::Mat& input, const std::vector& Bbox, const std::string& cameraId) { // No coarse _mutex — ppOCR->Predict() / engine has its own internal lock std::vector OCRObjects; OCRObjects.clear(); if (!_licenseValid) { this->_logger.LogError("ANSOCR::RunInference", "Invalid License", __FILE__, __LINE__); return OCRObjects; } if (!_isInitialized) { this->_logger.LogError("ANSOCR::RunInference", "Model is not initialized", __FILE__, __LINE__); return OCRObjects; } try { if (Bbox.size() > 0) { if (input.empty()) { this->_logger.LogError("ANSOCR::RunInference", "Input image is empty", __FILE__, __LINE__); return OCRObjects; } if ((input.cols < 10) || (input.rows < 10)) return OCRObjects; cv::Mat frame = input.clone(); int fWidth = frame.cols; int fHeight = frame.rows; for (std::vector::iterator it = Bbox.begin(); it != Bbox.end(); it++) { int x1, y1, x2, y2; x1 = (*it).x; y1 = (*it).y; x2 = (*it).x + (*it).width; y2 = (*it).y + (*it).height; if ((x1 >= 0) && (y1 >= 0) && (x2 <= fWidth) && (y2 <= fHeight)) { // Get cropped objects cv::Rect objectPos(cv::Point(x1, y1), cv::Point(x2, y2)); cv::Mat croppedObject = frame(objectPos); std::vector OCRTempObjects; OCRTempObjects.clear(); OCRTempObjects = RunInference(croppedObject); if (OCRTempObjects.size() > 0) { for (int i = 0; i < OCRTempObjects.size(); i++) { ANSCENTER::OCRObject detectionObject; detectionObject = OCRTempObjects[i]; // Correct bounding box position as the croppedObject x,y will be orignial (0,0) detectionObject.box.x = OCRTempObjects[i].box.x + x1; detectionObject.box.y = OCRTempObjects[i].box.y + y1; detectionObject.box.width = OCRTempObjects[i].box.width; detectionObject.box.height = OCRTempObjects[i].box.height; detectionObject.cameraId = cameraId; OCRObjects.push_back(detectionObject); } } } } } else { auto im = input.clone(); fastdeploy::vision::OCRResult res_ocr; this->ppOCR->Predict(&im, &res_ocr); if (res_ocr.boxes.size() > 0) { for (int n = 0; n < res_ocr.boxes.size(); n++) { // number of detections cv::Point rook_points[4]; rook_points[0] = cv::Point(static_cast(res_ocr.boxes[n][0]), static_cast(res_ocr.boxes[n][1])); rook_points[1] = cv::Point(static_cast(res_ocr.boxes[n][2]), static_cast(res_ocr.boxes[n][3])); rook_points[2] = cv::Point(static_cast(res_ocr.boxes[n][4]), static_cast(res_ocr.boxes[n][5])); rook_points[3] = cv::Point(static_cast(res_ocr.boxes[n][6]), static_cast(res_ocr.boxes[n][6])); ANSCENTER::OCRObject ocrObject; ocrObject.box.x = rook_points[0].x; ocrObject.box.y = rook_points[0].y; ocrObject.box.width = rook_points[1].x - rook_points[0].x; ocrObject.box.height = rook_points[2].y - rook_points[1].y; ocrObject.classId = res_ocr.cls_labels[n]; ocrObject.confidence = res_ocr.rec_scores[n]; ocrObject.className = res_ocr.text[n]; std::string extraInformation = "cls label:" + std::to_string(res_ocr.cls_labels[n]) + ";" + "cls score:" + std::to_string(res_ocr.cls_scores[n]); ocrObject.extraInfo = extraInformation; ocrObject.cameraId = cameraId; OCRObjects.push_back(ocrObject); } } im.release(); return OCRObjects; } return OCRObjects; } catch (std::exception& e) { this->_logger.LogFatal("ANSOCR::RunInference", e.what(), __FILE__, __LINE__); return OCRObjects; } } ANSOCR::~ANSOCR() { try { Destroy(); } catch (std::exception& e) { this->_logger.LogFatal("ANSOCR::~ANSOCR()", e.what(), __FILE__, __LINE__); } this->ANSOCRBase::~ANSOCRBase(); } bool ANSOCR::Destroy() { try { classifier_.ReleaseReusedBuffer(); detector_.ReleaseReusedBuffer(); recognizer_.ReleaseReusedBuffer(); if(ppOCR)this->ppOCR.reset(); return true; } catch (std::exception& e) { this->_logger.LogFatal("ANSOCR::Destroy", e.what(), __FILE__, __LINE__); return false; } } };