Refactor project structure

2026-03-28 19:56:39 +11:00
parent 1d267378b2
commit 8a2e721058
511 changed files with 59 additions and 48 deletions
--- a/modules/ANSOCR/ANSOCR.cpp
+++ b/modules/ANSOCR/ANSOCR.cpp
@@ -0,0 +1,388 @@
+#include "ANSOCR.h"
+#include "Utility.h"
+#include <opencv2/highgui.hpp>
+#include <omp.h>
+namespace ANSCENTER {	
+	bool ANSOCR::Initialize(const std::string& licenseKey, OCRModelConfig modelConfig,
+		const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) {
+		try
+		{
+			bool result = ANSOCRBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, engineMode);
+			if (!result) return false;
+			auto option = fastdeploy::RuntimeOption();
+			// Add default values to modelConfig if required.
+			_modelConfig.precisionType = "fp32";
+			_modelConfig.gpuMemory = 4000;
+			_modelConfig.limitType = "max";
+			_modelConfig.cpuThreads = 10;
+			_modelConfig.tableModelMaxLengh = 488;
+			_modelConfig.detectionScoreMode = "slow";
+			_modelConfig.ensureASCII = true;
+
+			if (_modelConfig.limitSideLen <= 0) _modelConfig.limitSideLen = 960;
+			if (_modelConfig.detectionDBThreshold <= 0) _modelConfig.detectionDBThreshold = 0.3;
+			if (_modelConfig.detectionBoxThreshold <= 0) _modelConfig.detectionBoxThreshold = 0.6;
+			if (_modelConfig.detectionDBUnclipRatio <= 0) _modelConfig.detectionDBUnclipRatio = 1.5;
+
+			if (_modelConfig.clsThreshold <= 0) _modelConfig.clsThreshold = 0.9;
+			if (_modelConfig.clsBatchNumber <= 0) _modelConfig.clsBatchNumber = 1;
+
+			if (_modelConfig.recognizerBatchNum <= 0) _modelConfig.recognizerBatchNum = 6;
+			if (_modelConfig.recoginzerImageHeight <= 0) _modelConfig.recoginzerImageHeight = 48;
+			if (_modelConfig.recoginzerImageWidth <= 0) _modelConfig.recoginzerImageWidth = 320;
+
+			if (_modelConfig.layoutScoreThreshold <= 0) _modelConfig.layoutScoreThreshold = 0.5;
+			if (_modelConfig.layoutNMSThreshold <= 0) _modelConfig.layoutNMSThreshold = 0.5;
+			if (_modelConfig.tableBatchNum <= 0) _modelConfig.tableBatchNum = 1;
+			if (_modelConfig.cpuThreads <= 0) _modelConfig.cpuThreads = 10;
+
+			// Handle different engine modes
+			
+			// Use CPU
+			_modelConfig.userGPU = false;
+			_modelConfig.useTensorRT = false;
+			option.UseCpu();
+			option.UseOpenVINOBackend();
+
+			auto det_option = option;
+			auto cls_option = option;
+			auto rec_option = option;
+
+			
+			if (!FileExist(_modelConfig.detectionModelFile)) {
+				this->_logger.LogFatal("ANSOCR::Initialize", "Invalid detector model file", __FILE__, __LINE__);
+				_licenseValid = false;
+				return false;
+			}
+
+			if (!FileExist(_modelConfig.clsModelFile)) {
+				this->_logger.LogFatal("ANSOCR::Initialize", "Invalid classifier model file", __FILE__, __LINE__);
+				_licenseValid = false;
+				return false;
+			}
+
+			if (!FileExist(_modelConfig.recognizerModelFile)) {
+				this->_logger.LogFatal("ANSOCR::Initialize", "Invalid recognizer model file", __FILE__, __LINE__);
+				_licenseValid = false;
+				return false;
+			}
+			// Create FastDeploy Model Instances
+			try {
+				classifier_ = fastdeploy::vision::ocr::Classifier(_modelConfig.clsModelFile, _modelConfig.clsModelParam, cls_option);
+				detector_ = fastdeploy::vision::ocr::DBDetector(_modelConfig.detectionModelFile, _modelConfig.detectionModelParam, det_option);
+				recognizer_ = fastdeploy::vision::ocr::Recognizer(_modelConfig.recognizerModelFile, _modelConfig.recognizerModelParam, _modelConfig.recogizerCharDictionaryPath, rec_option);
+				detector_.GetPreprocessor().SetMaxSideLen(_modelConfig.limitSideLen);
+				detector_.GetPostprocessor().SetDetDBThresh(_modelConfig.detectionDBThreshold);
+				detector_.GetPostprocessor().SetDetDBBoxThresh(_modelConfig.detectionBoxThreshold);
+				detector_.GetPostprocessor().SetDetDBUnclipRatio(_modelConfig.detectionDBUnclipRatio);
+				detector_.GetPostprocessor().SetDetDBScoreMode(_modelConfig.detectionScoreMode);
+				if (_modelConfig.useDilation) detector_.GetPostprocessor().SetUseDilation(0);
+				else detector_.GetPostprocessor().SetUseDilation(1);
+				classifier_.GetPostprocessor().SetClsThresh(_modelConfig.clsThreshold);
+				if (detector_.Initialized() &&
+					classifier_.Initialized() &&
+					recognizer_.Initialized())
+				{
+					this->ppOCR = std::make_unique<fastdeploy::pipeline::PPOCRv4>(&detector_, &classifier_, &recognizer_);
+					this->ppOCR->SetClsBatchSize(_modelConfig.clsBatchNumber);
+					this->ppOCR->SetRecBatchSize(_modelConfig.recognizerBatchNum);
+					_isInitialized = this->ppOCR->Initialized();
+					return _isInitialized;
+				}
+				else {
+					this->_logger.LogFatal("ANSOCR::Initialize", "Failed to create OCR objects", __FILE__, __LINE__);
+					return false;
+				}
+			}
+			catch (...) {
+				_licenseValid = false;
+				this->_logger.LogFatal("ANSOCR::Initialize", "Failed to create OCR objects", __FILE__, __LINE__);
+				return false;
+			}
+		}
+		catch (std::exception& e) {
+			// Handle any other exception that occurs during initialization
+			this->_logger.LogFatal("ANSOCR::Initialize", e.what(), __FILE__, __LINE__);
+			_licenseValid = false;
+			return false;
+		}
+	}
+
+
+
+	std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input) {
+		std::vector<ANSCENTER::OCRObject> OCRObjects;
+		if (input.empty()) return OCRObjects;
+		if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
+		return RunInference(input, "OCRCam");
+	}
+
+	std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input, const std::string& cameraId) {
+		std::lock_guard<std::recursive_mutex> lock(_mutex);
+		std::vector<ANSCENTER::OCRObject> OCRObjects;
+		OCRObjects.clear();
+		if (!_licenseValid) {
+			this->_logger.LogError("ANSOCR::RunInference", "Invalid License", __FILE__, __LINE__);
+			return OCRObjects;
+		}
+		if (!_isInitialized) {
+			this->_logger.LogError("ANSOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
+			return OCRObjects;
+		}
+
+		try {
+			if (input.empty()) {
+				this->_logger.LogError("ANSOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
+				return OCRObjects;
+			}
+			if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
+			auto im = input.clone();
+			fastdeploy::vision::OCRResult res_ocr;
+			this->ppOCR->Predict(&im, &res_ocr);
+			if (res_ocr.boxes.size() > 0) {
+				for (int n = 0; n < res_ocr.boxes.size(); n++) { // number of detections
+					cv::Point rook_points[4];
+					rook_points[0] = cv::Point(static_cast<int>(res_ocr.boxes[n][0]), static_cast<int>(res_ocr.boxes[n][1]));
+					rook_points[1] = cv::Point(static_cast<int>(res_ocr.boxes[n][2]), static_cast<int>(res_ocr.boxes[n][3]));
+					rook_points[2] = cv::Point(static_cast<int>(res_ocr.boxes[n][4]), static_cast<int>(res_ocr.boxes[n][5]));
+					rook_points[3] = cv::Point(static_cast<int>(res_ocr.boxes[n][6]), static_cast<int>(res_ocr.boxes[n][6]));
+
+					ANSCENTER::OCRObject ocrObject;
+					ocrObject.box.x = rook_points[0].x;
+					ocrObject.box.y = rook_points[0].y;
+					ocrObject.box.width = rook_points[1].x - rook_points[0].x;
+					ocrObject.box.height = rook_points[2].y - rook_points[1].y;
+					ocrObject.polygon = ANSOCRUtility::RectToNormalizedPolygon(ocrObject.box, input.cols, input.rows);
+					ocrObject.classId = res_ocr.cls_labels[n];
+					ocrObject.confidence = res_ocr.rec_scores[n];
+					ocrObject.className = res_ocr.text[n];
+					std::string extraInformation = "cls label:" +
+						std::to_string(res_ocr.cls_labels[n]) +
+						";" +
+						"cls score:" + std::to_string(res_ocr.cls_scores[n]);
+					ocrObject.extraInfo = extraInformation;
+					ocrObject.cameraId = cameraId;
+					// Add extra information for cls score  cls label
+					OCRObjects.push_back(ocrObject);
+				}
+			}
+			im.release();
+			return OCRObjects;
+		}
+		catch (std::exception& e) {
+			this->_logger.LogFatal("ANSOCR::RunInference", e.what(), __FILE__, __LINE__);
+			return OCRObjects;
+		}
+	}
+
+
+	std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox) {
+		std::lock_guard<std::recursive_mutex> lock(_mutex);
+		std::vector<ANSCENTER::OCRObject> OCRObjects;
+		OCRObjects.clear();
+		if (!_licenseValid) {
+			this->_logger.LogError("ANSOCR::RunInference", "Invalid License", __FILE__, __LINE__);
+			return OCRObjects;
+		}
+		if (!_isInitialized) {
+			this->_logger.LogError("ANSOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
+			return OCRObjects;
+		}
+		try {
+			if (Bbox.size() > 0) {
+				if (input.empty()) {
+					this->_logger.LogError("ANSOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
+					return OCRObjects;
+				}
+				if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
+				cv::Mat frame = input.clone();
+				int fWidth = frame.cols;
+				int fHeight = frame.rows;
+				for (std::vector<cv::Rect>::iterator it = Bbox.begin(); it != Bbox.end(); it++) {
+					int x1, y1, x2, y2;
+					x1 = (*it).x;
+					y1 = (*it).y;
+					x2 = (*it).x + (*it).width;
+					y2 = (*it).y + (*it).height;
+					if ((x1 >= 0) && (y1 >= 0) && (x2 <= fWidth) && (y2 <= fHeight)) {
+						// Get cropped objects
+						cv::Rect objectPos(cv::Point(x1, y1), cv::Point(x2, y2));
+						cv::Mat croppedObject = frame(objectPos);
+						std::vector<ANSCENTER::OCRObject> OCRTempObjects;
+						OCRTempObjects.clear();
+						OCRTempObjects = RunInference(croppedObject);
+						if (OCRTempObjects.size() > 0) {
+							for (int i = 0; i < OCRTempObjects.size(); i++) {
+								ANSCENTER::OCRObject detectionObject;
+								detectionObject = OCRTempObjects[i];
+								// Correct bounding box position as the croppedObject x,y will be orignial (0,0)
+								detectionObject.box.x = OCRTempObjects[i].box.x + x1;
+								detectionObject.box.y = OCRTempObjects[i].box.y + y1;
+								detectionObject.box.width = OCRTempObjects[i].box.width;
+								detectionObject.box.height = OCRTempObjects[i].box.height;
+								detectionObject.polygon = ANSOCRUtility::RectToNormalizedPolygon(detectionObject.box, input.cols, input.rows);
+								detectionObject.cameraId = "OCRCAM";
+								OCRObjects.push_back(detectionObject);
+							}
+						}
+					}
+				}
+			}
+			else {
+				auto im = input.clone();
+				fastdeploy::vision::OCRResult res_ocr;
+				this->ppOCR->Predict(&im, &res_ocr);
+				if (res_ocr.boxes.size() > 0) {
+					for (int n = 0; n < res_ocr.boxes.size(); n++) { // number of detections
+						cv::Point rook_points[4];
+						rook_points[0] = cv::Point(static_cast<int>(res_ocr.boxes[n][0]), static_cast<int>(res_ocr.boxes[n][1]));
+						rook_points[1] = cv::Point(static_cast<int>(res_ocr.boxes[n][2]), static_cast<int>(res_ocr.boxes[n][3]));
+						rook_points[2] = cv::Point(static_cast<int>(res_ocr.boxes[n][4]), static_cast<int>(res_ocr.boxes[n][5]));
+						rook_points[3] = cv::Point(static_cast<int>(res_ocr.boxes[n][6]), static_cast<int>(res_ocr.boxes[n][6]));
+
+						ANSCENTER::OCRObject ocrObject;
+						ocrObject.box.x = rook_points[0].x;
+						ocrObject.box.y = rook_points[0].y;
+						ocrObject.box.width = rook_points[1].x - rook_points[0].x;
+						ocrObject.box.height = rook_points[2].y - rook_points[1].y;
+						ocrObject.polygon = ANSOCRUtility::RectToNormalizedPolygon(ocrObject.box, input.cols, input.rows);
+						ocrObject.classId = res_ocr.cls_labels[n];
+						ocrObject.confidence = res_ocr.rec_scores[n];
+						ocrObject.className = res_ocr.text[n];
+						std::string extraInformation = "cls label:" +
+							std::to_string(res_ocr.cls_labels[n]) +
+							";" +
+							"cls score:" + std::to_string(res_ocr.cls_scores[n]);
+						ocrObject.extraInfo = extraInformation;
+						ocrObject.cameraId = "OCRCAM";
+						OCRObjects.push_back(ocrObject);
+					}
+				}
+				im.release();
+				return OCRObjects;
+			}
+			return OCRObjects;
+		}
+		catch (std::exception& e) {
+			this->_logger.LogFatal("ANSOCR::RunInference", e.what(), __FILE__, __LINE__);
+			return OCRObjects;
+		}
+	}
+
+
+	std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, const std::string& cameraId) {
+		std::lock_guard<std::recursive_mutex> lock(_mutex);
+		std::vector<ANSCENTER::OCRObject> OCRObjects;
+		OCRObjects.clear();
+		if (!_licenseValid) {
+			this->_logger.LogError("ANSOCR::RunInference", "Invalid License", __FILE__, __LINE__);
+			return OCRObjects;
+		}
+		if (!_isInitialized) {
+			this->_logger.LogError("ANSOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
+			return OCRObjects;
+		}
+		try {
+			if (Bbox.size() > 0) {
+				if (input.empty()) {
+					this->_logger.LogError("ANSOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
+					return OCRObjects;
+				}	
+				if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
+				cv::Mat frame = input.clone();
+				int fWidth = frame.cols;
+				int fHeight = frame.rows;
+				for (std::vector<cv::Rect>::iterator it = Bbox.begin(); it != Bbox.end(); it++) {
+					int x1, y1, x2, y2;
+					x1 = (*it).x;
+					y1 = (*it).y;
+					x2 = (*it).x + (*it).width;
+					y2 = (*it).y + (*it).height;
+					if ((x1 >= 0) && (y1 >= 0) && (x2 <= fWidth) && (y2 <= fHeight)) {
+						// Get cropped objects
+						cv::Rect objectPos(cv::Point(x1, y1), cv::Point(x2, y2));
+						cv::Mat croppedObject = frame(objectPos);
+						std::vector<ANSCENTER::OCRObject> OCRTempObjects;
+						OCRTempObjects.clear();
+						OCRTempObjects = RunInference(croppedObject);
+						if (OCRTempObjects.size() > 0) {
+							for (int i = 0; i < OCRTempObjects.size(); i++) {
+								ANSCENTER::OCRObject detectionObject;
+								detectionObject = OCRTempObjects[i];
+								// Correct bounding box position as the croppedObject x,y will be orignial (0,0)
+								detectionObject.box.x = OCRTempObjects[i].box.x + x1;
+								detectionObject.box.y = OCRTempObjects[i].box.y + y1;
+								detectionObject.box.width = OCRTempObjects[i].box.width;
+								detectionObject.box.height = OCRTempObjects[i].box.height;
+								detectionObject.cameraId = cameraId;
+								OCRObjects.push_back(detectionObject);
+							}
+						}
+					}
+				}
+			}
+			else {
+				auto im = input.clone();
+				fastdeploy::vision::OCRResult res_ocr;
+				this->ppOCR->Predict(&im, &res_ocr);
+				if (res_ocr.boxes.size() > 0) {
+					for (int n = 0; n < res_ocr.boxes.size(); n++) { // number of detections
+						cv::Point rook_points[4];
+						rook_points[0] = cv::Point(static_cast<int>(res_ocr.boxes[n][0]), static_cast<int>(res_ocr.boxes[n][1]));
+						rook_points[1] = cv::Point(static_cast<int>(res_ocr.boxes[n][2]), static_cast<int>(res_ocr.boxes[n][3]));
+						rook_points[2] = cv::Point(static_cast<int>(res_ocr.boxes[n][4]), static_cast<int>(res_ocr.boxes[n][5]));
+						rook_points[3] = cv::Point(static_cast<int>(res_ocr.boxes[n][6]), static_cast<int>(res_ocr.boxes[n][6]));
+
+						ANSCENTER::OCRObject ocrObject;
+						ocrObject.box.x = rook_points[0].x;
+						ocrObject.box.y = rook_points[0].y;
+						ocrObject.box.width = rook_points[1].x - rook_points[0].x;
+						ocrObject.box.height = rook_points[2].y - rook_points[1].y;
+
+
+						ocrObject.classId = res_ocr.cls_labels[n];
+						ocrObject.confidence = res_ocr.rec_scores[n];
+						ocrObject.className = res_ocr.text[n];
+						std::string extraInformation = "cls label:" +
+							std::to_string(res_ocr.cls_labels[n]) +
+							";" +
+							"cls score:" + std::to_string(res_ocr.cls_scores[n]);
+						ocrObject.extraInfo = extraInformation;
+						ocrObject.cameraId = cameraId;
+						OCRObjects.push_back(ocrObject);
+					}
+				}
+				im.release();
+				return OCRObjects;
+			}
+			return OCRObjects;
+		}
+		catch (std::exception& e) {
+			this->_logger.LogFatal("ANSOCR::RunInference", e.what(), __FILE__, __LINE__);
+			return OCRObjects;
+		}
+	}
+	ANSOCR::~ANSOCR() {
+		try {
+			Destroy();
+		}
+		catch (std::exception& e) {
+			this->_logger.LogFatal("ANSOCR::~ANSOCR()", e.what(), __FILE__, __LINE__);
+		}
+		this->ANSOCRBase::~ANSOCRBase();
+	}
+	bool ANSOCR::Destroy() {
+		try {
+			classifier_.ReleaseReusedBuffer();
+			detector_.ReleaseReusedBuffer();
+			recognizer_.ReleaseReusedBuffer();
+			if(ppOCR)this->ppOCR.reset();
+			return true;
+		}
+		catch (std::exception& e) {
+			this->_logger.LogFatal("ANSOCR::Destroy", e.what(), __FILE__, __LINE__);
+			return false;
+		}
+	}
+};
+