Refactor project structure

This commit is contained in:
2026-03-28 19:56:39 +11:00
parent 1d267378b2
commit 8a2e721058
511 changed files with 59 additions and 48 deletions

388
modules/ANSOCR/ANSOCR.cpp Normal file
View File

@@ -0,0 +1,388 @@
#include "ANSOCR.h"
#include "Utility.h"
#include <opencv2/highgui.hpp>
#include <omp.h>
namespace ANSCENTER {
bool ANSOCR::Initialize(const std::string& licenseKey, OCRModelConfig modelConfig,
const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) {
try
{
bool result = ANSOCRBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, engineMode);
if (!result) return false;
auto option = fastdeploy::RuntimeOption();
// Add default values to modelConfig if required.
_modelConfig.precisionType = "fp32";
_modelConfig.gpuMemory = 4000;
_modelConfig.limitType = "max";
_modelConfig.cpuThreads = 10;
_modelConfig.tableModelMaxLengh = 488;
_modelConfig.detectionScoreMode = "slow";
_modelConfig.ensureASCII = true;
if (_modelConfig.limitSideLen <= 0) _modelConfig.limitSideLen = 960;
if (_modelConfig.detectionDBThreshold <= 0) _modelConfig.detectionDBThreshold = 0.3;
if (_modelConfig.detectionBoxThreshold <= 0) _modelConfig.detectionBoxThreshold = 0.6;
if (_modelConfig.detectionDBUnclipRatio <= 0) _modelConfig.detectionDBUnclipRatio = 1.5;
if (_modelConfig.clsThreshold <= 0) _modelConfig.clsThreshold = 0.9;
if (_modelConfig.clsBatchNumber <= 0) _modelConfig.clsBatchNumber = 1;
if (_modelConfig.recognizerBatchNum <= 0) _modelConfig.recognizerBatchNum = 6;
if (_modelConfig.recoginzerImageHeight <= 0) _modelConfig.recoginzerImageHeight = 48;
if (_modelConfig.recoginzerImageWidth <= 0) _modelConfig.recoginzerImageWidth = 320;
if (_modelConfig.layoutScoreThreshold <= 0) _modelConfig.layoutScoreThreshold = 0.5;
if (_modelConfig.layoutNMSThreshold <= 0) _modelConfig.layoutNMSThreshold = 0.5;
if (_modelConfig.tableBatchNum <= 0) _modelConfig.tableBatchNum = 1;
if (_modelConfig.cpuThreads <= 0) _modelConfig.cpuThreads = 10;
// Handle different engine modes
// Use CPU
_modelConfig.userGPU = false;
_modelConfig.useTensorRT = false;
option.UseCpu();
option.UseOpenVINOBackend();
auto det_option = option;
auto cls_option = option;
auto rec_option = option;
if (!FileExist(_modelConfig.detectionModelFile)) {
this->_logger.LogFatal("ANSOCR::Initialize", "Invalid detector model file", __FILE__, __LINE__);
_licenseValid = false;
return false;
}
if (!FileExist(_modelConfig.clsModelFile)) {
this->_logger.LogFatal("ANSOCR::Initialize", "Invalid classifier model file", __FILE__, __LINE__);
_licenseValid = false;
return false;
}
if (!FileExist(_modelConfig.recognizerModelFile)) {
this->_logger.LogFatal("ANSOCR::Initialize", "Invalid recognizer model file", __FILE__, __LINE__);
_licenseValid = false;
return false;
}
// Create FastDeploy Model Instances
try {
classifier_ = fastdeploy::vision::ocr::Classifier(_modelConfig.clsModelFile, _modelConfig.clsModelParam, cls_option);
detector_ = fastdeploy::vision::ocr::DBDetector(_modelConfig.detectionModelFile, _modelConfig.detectionModelParam, det_option);
recognizer_ = fastdeploy::vision::ocr::Recognizer(_modelConfig.recognizerModelFile, _modelConfig.recognizerModelParam, _modelConfig.recogizerCharDictionaryPath, rec_option);
detector_.GetPreprocessor().SetMaxSideLen(_modelConfig.limitSideLen);
detector_.GetPostprocessor().SetDetDBThresh(_modelConfig.detectionDBThreshold);
detector_.GetPostprocessor().SetDetDBBoxThresh(_modelConfig.detectionBoxThreshold);
detector_.GetPostprocessor().SetDetDBUnclipRatio(_modelConfig.detectionDBUnclipRatio);
detector_.GetPostprocessor().SetDetDBScoreMode(_modelConfig.detectionScoreMode);
if (_modelConfig.useDilation) detector_.GetPostprocessor().SetUseDilation(0);
else detector_.GetPostprocessor().SetUseDilation(1);
classifier_.GetPostprocessor().SetClsThresh(_modelConfig.clsThreshold);
if (detector_.Initialized() &&
classifier_.Initialized() &&
recognizer_.Initialized())
{
this->ppOCR = std::make_unique<fastdeploy::pipeline::PPOCRv4>(&detector_, &classifier_, &recognizer_);
this->ppOCR->SetClsBatchSize(_modelConfig.clsBatchNumber);
this->ppOCR->SetRecBatchSize(_modelConfig.recognizerBatchNum);
_isInitialized = this->ppOCR->Initialized();
return _isInitialized;
}
else {
this->_logger.LogFatal("ANSOCR::Initialize", "Failed to create OCR objects", __FILE__, __LINE__);
return false;
}
}
catch (...) {
_licenseValid = false;
this->_logger.LogFatal("ANSOCR::Initialize", "Failed to create OCR objects", __FILE__, __LINE__);
return false;
}
}
catch (std::exception& e) {
// Handle any other exception that occurs during initialization
this->_logger.LogFatal("ANSOCR::Initialize", e.what(), __FILE__, __LINE__);
_licenseValid = false;
return false;
}
}
std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input) {
std::vector<ANSCENTER::OCRObject> OCRObjects;
if (input.empty()) return OCRObjects;
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
return RunInference(input, "OCRCam");
}
std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input, const std::string& cameraId) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
std::vector<ANSCENTER::OCRObject> OCRObjects;
OCRObjects.clear();
if (!_licenseValid) {
this->_logger.LogError("ANSOCR::RunInference", "Invalid License", __FILE__, __LINE__);
return OCRObjects;
}
if (!_isInitialized) {
this->_logger.LogError("ANSOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
return OCRObjects;
}
try {
if (input.empty()) {
this->_logger.LogError("ANSOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
return OCRObjects;
}
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
auto im = input.clone();
fastdeploy::vision::OCRResult res_ocr;
this->ppOCR->Predict(&im, &res_ocr);
if (res_ocr.boxes.size() > 0) {
for (int n = 0; n < res_ocr.boxes.size(); n++) { // number of detections
cv::Point rook_points[4];
rook_points[0] = cv::Point(static_cast<int>(res_ocr.boxes[n][0]), static_cast<int>(res_ocr.boxes[n][1]));
rook_points[1] = cv::Point(static_cast<int>(res_ocr.boxes[n][2]), static_cast<int>(res_ocr.boxes[n][3]));
rook_points[2] = cv::Point(static_cast<int>(res_ocr.boxes[n][4]), static_cast<int>(res_ocr.boxes[n][5]));
rook_points[3] = cv::Point(static_cast<int>(res_ocr.boxes[n][6]), static_cast<int>(res_ocr.boxes[n][6]));
ANSCENTER::OCRObject ocrObject;
ocrObject.box.x = rook_points[0].x;
ocrObject.box.y = rook_points[0].y;
ocrObject.box.width = rook_points[1].x - rook_points[0].x;
ocrObject.box.height = rook_points[2].y - rook_points[1].y;
ocrObject.polygon = ANSOCRUtility::RectToNormalizedPolygon(ocrObject.box, input.cols, input.rows);
ocrObject.classId = res_ocr.cls_labels[n];
ocrObject.confidence = res_ocr.rec_scores[n];
ocrObject.className = res_ocr.text[n];
std::string extraInformation = "cls label:" +
std::to_string(res_ocr.cls_labels[n]) +
";" +
"cls score:" + std::to_string(res_ocr.cls_scores[n]);
ocrObject.extraInfo = extraInformation;
ocrObject.cameraId = cameraId;
// Add extra information for cls score cls label
OCRObjects.push_back(ocrObject);
}
}
im.release();
return OCRObjects;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSOCR::RunInference", e.what(), __FILE__, __LINE__);
return OCRObjects;
}
}
std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
std::vector<ANSCENTER::OCRObject> OCRObjects;
OCRObjects.clear();
if (!_licenseValid) {
this->_logger.LogError("ANSOCR::RunInference", "Invalid License", __FILE__, __LINE__);
return OCRObjects;
}
if (!_isInitialized) {
this->_logger.LogError("ANSOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
return OCRObjects;
}
try {
if (Bbox.size() > 0) {
if (input.empty()) {
this->_logger.LogError("ANSOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
return OCRObjects;
}
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
cv::Mat frame = input.clone();
int fWidth = frame.cols;
int fHeight = frame.rows;
for (std::vector<cv::Rect>::iterator it = Bbox.begin(); it != Bbox.end(); it++) {
int x1, y1, x2, y2;
x1 = (*it).x;
y1 = (*it).y;
x2 = (*it).x + (*it).width;
y2 = (*it).y + (*it).height;
if ((x1 >= 0) && (y1 >= 0) && (x2 <= fWidth) && (y2 <= fHeight)) {
// Get cropped objects
cv::Rect objectPos(cv::Point(x1, y1), cv::Point(x2, y2));
cv::Mat croppedObject = frame(objectPos);
std::vector<ANSCENTER::OCRObject> OCRTempObjects;
OCRTempObjects.clear();
OCRTempObjects = RunInference(croppedObject);
if (OCRTempObjects.size() > 0) {
for (int i = 0; i < OCRTempObjects.size(); i++) {
ANSCENTER::OCRObject detectionObject;
detectionObject = OCRTempObjects[i];
// Correct bounding box position as the croppedObject x,y will be orignial (0,0)
detectionObject.box.x = OCRTempObjects[i].box.x + x1;
detectionObject.box.y = OCRTempObjects[i].box.y + y1;
detectionObject.box.width = OCRTempObjects[i].box.width;
detectionObject.box.height = OCRTempObjects[i].box.height;
detectionObject.polygon = ANSOCRUtility::RectToNormalizedPolygon(detectionObject.box, input.cols, input.rows);
detectionObject.cameraId = "OCRCAM";
OCRObjects.push_back(detectionObject);
}
}
}
}
}
else {
auto im = input.clone();
fastdeploy::vision::OCRResult res_ocr;
this->ppOCR->Predict(&im, &res_ocr);
if (res_ocr.boxes.size() > 0) {
for (int n = 0; n < res_ocr.boxes.size(); n++) { // number of detections
cv::Point rook_points[4];
rook_points[0] = cv::Point(static_cast<int>(res_ocr.boxes[n][0]), static_cast<int>(res_ocr.boxes[n][1]));
rook_points[1] = cv::Point(static_cast<int>(res_ocr.boxes[n][2]), static_cast<int>(res_ocr.boxes[n][3]));
rook_points[2] = cv::Point(static_cast<int>(res_ocr.boxes[n][4]), static_cast<int>(res_ocr.boxes[n][5]));
rook_points[3] = cv::Point(static_cast<int>(res_ocr.boxes[n][6]), static_cast<int>(res_ocr.boxes[n][6]));
ANSCENTER::OCRObject ocrObject;
ocrObject.box.x = rook_points[0].x;
ocrObject.box.y = rook_points[0].y;
ocrObject.box.width = rook_points[1].x - rook_points[0].x;
ocrObject.box.height = rook_points[2].y - rook_points[1].y;
ocrObject.polygon = ANSOCRUtility::RectToNormalizedPolygon(ocrObject.box, input.cols, input.rows);
ocrObject.classId = res_ocr.cls_labels[n];
ocrObject.confidence = res_ocr.rec_scores[n];
ocrObject.className = res_ocr.text[n];
std::string extraInformation = "cls label:" +
std::to_string(res_ocr.cls_labels[n]) +
";" +
"cls score:" + std::to_string(res_ocr.cls_scores[n]);
ocrObject.extraInfo = extraInformation;
ocrObject.cameraId = "OCRCAM";
OCRObjects.push_back(ocrObject);
}
}
im.release();
return OCRObjects;
}
return OCRObjects;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSOCR::RunInference", e.what(), __FILE__, __LINE__);
return OCRObjects;
}
}
std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, const std::string& cameraId) {
std::lock_guard<std::recursive_mutex> lock(_mutex);
std::vector<ANSCENTER::OCRObject> OCRObjects;
OCRObjects.clear();
if (!_licenseValid) {
this->_logger.LogError("ANSOCR::RunInference", "Invalid License", __FILE__, __LINE__);
return OCRObjects;
}
if (!_isInitialized) {
this->_logger.LogError("ANSOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
return OCRObjects;
}
try {
if (Bbox.size() > 0) {
if (input.empty()) {
this->_logger.LogError("ANSOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
return OCRObjects;
}
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
cv::Mat frame = input.clone();
int fWidth = frame.cols;
int fHeight = frame.rows;
for (std::vector<cv::Rect>::iterator it = Bbox.begin(); it != Bbox.end(); it++) {
int x1, y1, x2, y2;
x1 = (*it).x;
y1 = (*it).y;
x2 = (*it).x + (*it).width;
y2 = (*it).y + (*it).height;
if ((x1 >= 0) && (y1 >= 0) && (x2 <= fWidth) && (y2 <= fHeight)) {
// Get cropped objects
cv::Rect objectPos(cv::Point(x1, y1), cv::Point(x2, y2));
cv::Mat croppedObject = frame(objectPos);
std::vector<ANSCENTER::OCRObject> OCRTempObjects;
OCRTempObjects.clear();
OCRTempObjects = RunInference(croppedObject);
if (OCRTempObjects.size() > 0) {
for (int i = 0; i < OCRTempObjects.size(); i++) {
ANSCENTER::OCRObject detectionObject;
detectionObject = OCRTempObjects[i];
// Correct bounding box position as the croppedObject x,y will be orignial (0,0)
detectionObject.box.x = OCRTempObjects[i].box.x + x1;
detectionObject.box.y = OCRTempObjects[i].box.y + y1;
detectionObject.box.width = OCRTempObjects[i].box.width;
detectionObject.box.height = OCRTempObjects[i].box.height;
detectionObject.cameraId = cameraId;
OCRObjects.push_back(detectionObject);
}
}
}
}
}
else {
auto im = input.clone();
fastdeploy::vision::OCRResult res_ocr;
this->ppOCR->Predict(&im, &res_ocr);
if (res_ocr.boxes.size() > 0) {
for (int n = 0; n < res_ocr.boxes.size(); n++) { // number of detections
cv::Point rook_points[4];
rook_points[0] = cv::Point(static_cast<int>(res_ocr.boxes[n][0]), static_cast<int>(res_ocr.boxes[n][1]));
rook_points[1] = cv::Point(static_cast<int>(res_ocr.boxes[n][2]), static_cast<int>(res_ocr.boxes[n][3]));
rook_points[2] = cv::Point(static_cast<int>(res_ocr.boxes[n][4]), static_cast<int>(res_ocr.boxes[n][5]));
rook_points[3] = cv::Point(static_cast<int>(res_ocr.boxes[n][6]), static_cast<int>(res_ocr.boxes[n][6]));
ANSCENTER::OCRObject ocrObject;
ocrObject.box.x = rook_points[0].x;
ocrObject.box.y = rook_points[0].y;
ocrObject.box.width = rook_points[1].x - rook_points[0].x;
ocrObject.box.height = rook_points[2].y - rook_points[1].y;
ocrObject.classId = res_ocr.cls_labels[n];
ocrObject.confidence = res_ocr.rec_scores[n];
ocrObject.className = res_ocr.text[n];
std::string extraInformation = "cls label:" +
std::to_string(res_ocr.cls_labels[n]) +
";" +
"cls score:" + std::to_string(res_ocr.cls_scores[n]);
ocrObject.extraInfo = extraInformation;
ocrObject.cameraId = cameraId;
OCRObjects.push_back(ocrObject);
}
}
im.release();
return OCRObjects;
}
return OCRObjects;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSOCR::RunInference", e.what(), __FILE__, __LINE__);
return OCRObjects;
}
}
ANSOCR::~ANSOCR() {
try {
Destroy();
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSOCR::~ANSOCR()", e.what(), __FILE__, __LINE__);
}
this->ANSOCRBase::~ANSOCRBase();
}
bool ANSOCR::Destroy() {
try {
classifier_.ReleaseReusedBuffer();
detector_.ReleaseReusedBuffer();
recognizer_.ReleaseReusedBuffer();
if(ppOCR)this->ppOCR.reset();
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSOCR::Destroy", e.what(), __FILE__, __LINE__);
return false;
}
}
};