Refactor project structure
This commit is contained in:
388
modules/ANSOCR/ANSOCR.cpp
Normal file
388
modules/ANSOCR/ANSOCR.cpp
Normal file
@@ -0,0 +1,388 @@
|
||||
#include "ANSOCR.h"
|
||||
#include "Utility.h"
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <omp.h>
|
||||
namespace ANSCENTER {
|
||||
bool ANSOCR::Initialize(const std::string& licenseKey, OCRModelConfig modelConfig,
|
||||
const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) {
|
||||
try
|
||||
{
|
||||
bool result = ANSOCRBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, engineMode);
|
||||
if (!result) return false;
|
||||
auto option = fastdeploy::RuntimeOption();
|
||||
// Add default values to modelConfig if required.
|
||||
_modelConfig.precisionType = "fp32";
|
||||
_modelConfig.gpuMemory = 4000;
|
||||
_modelConfig.limitType = "max";
|
||||
_modelConfig.cpuThreads = 10;
|
||||
_modelConfig.tableModelMaxLengh = 488;
|
||||
_modelConfig.detectionScoreMode = "slow";
|
||||
_modelConfig.ensureASCII = true;
|
||||
|
||||
if (_modelConfig.limitSideLen <= 0) _modelConfig.limitSideLen = 960;
|
||||
if (_modelConfig.detectionDBThreshold <= 0) _modelConfig.detectionDBThreshold = 0.3;
|
||||
if (_modelConfig.detectionBoxThreshold <= 0) _modelConfig.detectionBoxThreshold = 0.6;
|
||||
if (_modelConfig.detectionDBUnclipRatio <= 0) _modelConfig.detectionDBUnclipRatio = 1.5;
|
||||
|
||||
if (_modelConfig.clsThreshold <= 0) _modelConfig.clsThreshold = 0.9;
|
||||
if (_modelConfig.clsBatchNumber <= 0) _modelConfig.clsBatchNumber = 1;
|
||||
|
||||
if (_modelConfig.recognizerBatchNum <= 0) _modelConfig.recognizerBatchNum = 6;
|
||||
if (_modelConfig.recoginzerImageHeight <= 0) _modelConfig.recoginzerImageHeight = 48;
|
||||
if (_modelConfig.recoginzerImageWidth <= 0) _modelConfig.recoginzerImageWidth = 320;
|
||||
|
||||
if (_modelConfig.layoutScoreThreshold <= 0) _modelConfig.layoutScoreThreshold = 0.5;
|
||||
if (_modelConfig.layoutNMSThreshold <= 0) _modelConfig.layoutNMSThreshold = 0.5;
|
||||
if (_modelConfig.tableBatchNum <= 0) _modelConfig.tableBatchNum = 1;
|
||||
if (_modelConfig.cpuThreads <= 0) _modelConfig.cpuThreads = 10;
|
||||
|
||||
// Handle different engine modes
|
||||
|
||||
// Use CPU
|
||||
_modelConfig.userGPU = false;
|
||||
_modelConfig.useTensorRT = false;
|
||||
option.UseCpu();
|
||||
option.UseOpenVINOBackend();
|
||||
|
||||
auto det_option = option;
|
||||
auto cls_option = option;
|
||||
auto rec_option = option;
|
||||
|
||||
|
||||
if (!FileExist(_modelConfig.detectionModelFile)) {
|
||||
this->_logger.LogFatal("ANSOCR::Initialize", "Invalid detector model file", __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!FileExist(_modelConfig.clsModelFile)) {
|
||||
this->_logger.LogFatal("ANSOCR::Initialize", "Invalid classifier model file", __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!FileExist(_modelConfig.recognizerModelFile)) {
|
||||
this->_logger.LogFatal("ANSOCR::Initialize", "Invalid recognizer model file", __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
// Create FastDeploy Model Instances
|
||||
try {
|
||||
classifier_ = fastdeploy::vision::ocr::Classifier(_modelConfig.clsModelFile, _modelConfig.clsModelParam, cls_option);
|
||||
detector_ = fastdeploy::vision::ocr::DBDetector(_modelConfig.detectionModelFile, _modelConfig.detectionModelParam, det_option);
|
||||
recognizer_ = fastdeploy::vision::ocr::Recognizer(_modelConfig.recognizerModelFile, _modelConfig.recognizerModelParam, _modelConfig.recogizerCharDictionaryPath, rec_option);
|
||||
detector_.GetPreprocessor().SetMaxSideLen(_modelConfig.limitSideLen);
|
||||
detector_.GetPostprocessor().SetDetDBThresh(_modelConfig.detectionDBThreshold);
|
||||
detector_.GetPostprocessor().SetDetDBBoxThresh(_modelConfig.detectionBoxThreshold);
|
||||
detector_.GetPostprocessor().SetDetDBUnclipRatio(_modelConfig.detectionDBUnclipRatio);
|
||||
detector_.GetPostprocessor().SetDetDBScoreMode(_modelConfig.detectionScoreMode);
|
||||
if (_modelConfig.useDilation) detector_.GetPostprocessor().SetUseDilation(0);
|
||||
else detector_.GetPostprocessor().SetUseDilation(1);
|
||||
classifier_.GetPostprocessor().SetClsThresh(_modelConfig.clsThreshold);
|
||||
if (detector_.Initialized() &&
|
||||
classifier_.Initialized() &&
|
||||
recognizer_.Initialized())
|
||||
{
|
||||
this->ppOCR = std::make_unique<fastdeploy::pipeline::PPOCRv4>(&detector_, &classifier_, &recognizer_);
|
||||
this->ppOCR->SetClsBatchSize(_modelConfig.clsBatchNumber);
|
||||
this->ppOCR->SetRecBatchSize(_modelConfig.recognizerBatchNum);
|
||||
_isInitialized = this->ppOCR->Initialized();
|
||||
return _isInitialized;
|
||||
}
|
||||
else {
|
||||
this->_logger.LogFatal("ANSOCR::Initialize", "Failed to create OCR objects", __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
catch (...) {
|
||||
_licenseValid = false;
|
||||
this->_logger.LogFatal("ANSOCR::Initialize", "Failed to create OCR objects", __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
// Handle any other exception that occurs during initialization
|
||||
this->_logger.LogFatal("ANSOCR::Initialize", e.what(), __FILE__, __LINE__);
|
||||
_licenseValid = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input) {
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
if (input.empty()) return OCRObjects;
|
||||
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
|
||||
return RunInference(input, "OCRCam");
|
||||
}
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input, const std::string& cameraId) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
OCRObjects.clear();
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
|
||||
try {
|
||||
if (input.empty()) {
|
||||
this->_logger.LogError("ANSOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
|
||||
auto im = input.clone();
|
||||
fastdeploy::vision::OCRResult res_ocr;
|
||||
this->ppOCR->Predict(&im, &res_ocr);
|
||||
if (res_ocr.boxes.size() > 0) {
|
||||
for (int n = 0; n < res_ocr.boxes.size(); n++) { // number of detections
|
||||
cv::Point rook_points[4];
|
||||
rook_points[0] = cv::Point(static_cast<int>(res_ocr.boxes[n][0]), static_cast<int>(res_ocr.boxes[n][1]));
|
||||
rook_points[1] = cv::Point(static_cast<int>(res_ocr.boxes[n][2]), static_cast<int>(res_ocr.boxes[n][3]));
|
||||
rook_points[2] = cv::Point(static_cast<int>(res_ocr.boxes[n][4]), static_cast<int>(res_ocr.boxes[n][5]));
|
||||
rook_points[3] = cv::Point(static_cast<int>(res_ocr.boxes[n][6]), static_cast<int>(res_ocr.boxes[n][6]));
|
||||
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box.x = rook_points[0].x;
|
||||
ocrObject.box.y = rook_points[0].y;
|
||||
ocrObject.box.width = rook_points[1].x - rook_points[0].x;
|
||||
ocrObject.box.height = rook_points[2].y - rook_points[1].y;
|
||||
ocrObject.polygon = ANSOCRUtility::RectToNormalizedPolygon(ocrObject.box, input.cols, input.rows);
|
||||
ocrObject.classId = res_ocr.cls_labels[n];
|
||||
ocrObject.confidence = res_ocr.rec_scores[n];
|
||||
ocrObject.className = res_ocr.text[n];
|
||||
std::string extraInformation = "cls label:" +
|
||||
std::to_string(res_ocr.cls_labels[n]) +
|
||||
";" +
|
||||
"cls score:" + std::to_string(res_ocr.cls_scores[n]);
|
||||
ocrObject.extraInfo = extraInformation;
|
||||
ocrObject.cameraId = cameraId;
|
||||
// Add extra information for cls score cls label
|
||||
OCRObjects.push_back(ocrObject);
|
||||
}
|
||||
}
|
||||
im.release();
|
||||
return OCRObjects;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
OCRObjects.clear();
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
try {
|
||||
if (Bbox.size() > 0) {
|
||||
if (input.empty()) {
|
||||
this->_logger.LogError("ANSOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
|
||||
cv::Mat frame = input.clone();
|
||||
int fWidth = frame.cols;
|
||||
int fHeight = frame.rows;
|
||||
for (std::vector<cv::Rect>::iterator it = Bbox.begin(); it != Bbox.end(); it++) {
|
||||
int x1, y1, x2, y2;
|
||||
x1 = (*it).x;
|
||||
y1 = (*it).y;
|
||||
x2 = (*it).x + (*it).width;
|
||||
y2 = (*it).y + (*it).height;
|
||||
if ((x1 >= 0) && (y1 >= 0) && (x2 <= fWidth) && (y2 <= fHeight)) {
|
||||
// Get cropped objects
|
||||
cv::Rect objectPos(cv::Point(x1, y1), cv::Point(x2, y2));
|
||||
cv::Mat croppedObject = frame(objectPos);
|
||||
std::vector<ANSCENTER::OCRObject> OCRTempObjects;
|
||||
OCRTempObjects.clear();
|
||||
OCRTempObjects = RunInference(croppedObject);
|
||||
if (OCRTempObjects.size() > 0) {
|
||||
for (int i = 0; i < OCRTempObjects.size(); i++) {
|
||||
ANSCENTER::OCRObject detectionObject;
|
||||
detectionObject = OCRTempObjects[i];
|
||||
// Correct bounding box position as the croppedObject x,y will be orignial (0,0)
|
||||
detectionObject.box.x = OCRTempObjects[i].box.x + x1;
|
||||
detectionObject.box.y = OCRTempObjects[i].box.y + y1;
|
||||
detectionObject.box.width = OCRTempObjects[i].box.width;
|
||||
detectionObject.box.height = OCRTempObjects[i].box.height;
|
||||
detectionObject.polygon = ANSOCRUtility::RectToNormalizedPolygon(detectionObject.box, input.cols, input.rows);
|
||||
detectionObject.cameraId = "OCRCAM";
|
||||
OCRObjects.push_back(detectionObject);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
auto im = input.clone();
|
||||
fastdeploy::vision::OCRResult res_ocr;
|
||||
this->ppOCR->Predict(&im, &res_ocr);
|
||||
if (res_ocr.boxes.size() > 0) {
|
||||
for (int n = 0; n < res_ocr.boxes.size(); n++) { // number of detections
|
||||
cv::Point rook_points[4];
|
||||
rook_points[0] = cv::Point(static_cast<int>(res_ocr.boxes[n][0]), static_cast<int>(res_ocr.boxes[n][1]));
|
||||
rook_points[1] = cv::Point(static_cast<int>(res_ocr.boxes[n][2]), static_cast<int>(res_ocr.boxes[n][3]));
|
||||
rook_points[2] = cv::Point(static_cast<int>(res_ocr.boxes[n][4]), static_cast<int>(res_ocr.boxes[n][5]));
|
||||
rook_points[3] = cv::Point(static_cast<int>(res_ocr.boxes[n][6]), static_cast<int>(res_ocr.boxes[n][6]));
|
||||
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box.x = rook_points[0].x;
|
||||
ocrObject.box.y = rook_points[0].y;
|
||||
ocrObject.box.width = rook_points[1].x - rook_points[0].x;
|
||||
ocrObject.box.height = rook_points[2].y - rook_points[1].y;
|
||||
ocrObject.polygon = ANSOCRUtility::RectToNormalizedPolygon(ocrObject.box, input.cols, input.rows);
|
||||
ocrObject.classId = res_ocr.cls_labels[n];
|
||||
ocrObject.confidence = res_ocr.rec_scores[n];
|
||||
ocrObject.className = res_ocr.text[n];
|
||||
std::string extraInformation = "cls label:" +
|
||||
std::to_string(res_ocr.cls_labels[n]) +
|
||||
";" +
|
||||
"cls score:" + std::to_string(res_ocr.cls_scores[n]);
|
||||
ocrObject.extraInfo = extraInformation;
|
||||
ocrObject.cameraId = "OCRCAM";
|
||||
OCRObjects.push_back(ocrObject);
|
||||
}
|
||||
}
|
||||
im.release();
|
||||
return OCRObjects;
|
||||
}
|
||||
return OCRObjects;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::vector<ANSCENTER::OCRObject> ANSOCR::RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, const std::string& cameraId) {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
||||
OCRObjects.clear();
|
||||
if (!_licenseValid) {
|
||||
this->_logger.LogError("ANSOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if (!_isInitialized) {
|
||||
this->_logger.LogError("ANSOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
try {
|
||||
if (Bbox.size() > 0) {
|
||||
if (input.empty()) {
|
||||
this->_logger.LogError("ANSOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
|
||||
cv::Mat frame = input.clone();
|
||||
int fWidth = frame.cols;
|
||||
int fHeight = frame.rows;
|
||||
for (std::vector<cv::Rect>::iterator it = Bbox.begin(); it != Bbox.end(); it++) {
|
||||
int x1, y1, x2, y2;
|
||||
x1 = (*it).x;
|
||||
y1 = (*it).y;
|
||||
x2 = (*it).x + (*it).width;
|
||||
y2 = (*it).y + (*it).height;
|
||||
if ((x1 >= 0) && (y1 >= 0) && (x2 <= fWidth) && (y2 <= fHeight)) {
|
||||
// Get cropped objects
|
||||
cv::Rect objectPos(cv::Point(x1, y1), cv::Point(x2, y2));
|
||||
cv::Mat croppedObject = frame(objectPos);
|
||||
std::vector<ANSCENTER::OCRObject> OCRTempObjects;
|
||||
OCRTempObjects.clear();
|
||||
OCRTempObjects = RunInference(croppedObject);
|
||||
if (OCRTempObjects.size() > 0) {
|
||||
for (int i = 0; i < OCRTempObjects.size(); i++) {
|
||||
ANSCENTER::OCRObject detectionObject;
|
||||
detectionObject = OCRTempObjects[i];
|
||||
// Correct bounding box position as the croppedObject x,y will be orignial (0,0)
|
||||
detectionObject.box.x = OCRTempObjects[i].box.x + x1;
|
||||
detectionObject.box.y = OCRTempObjects[i].box.y + y1;
|
||||
detectionObject.box.width = OCRTempObjects[i].box.width;
|
||||
detectionObject.box.height = OCRTempObjects[i].box.height;
|
||||
detectionObject.cameraId = cameraId;
|
||||
OCRObjects.push_back(detectionObject);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
auto im = input.clone();
|
||||
fastdeploy::vision::OCRResult res_ocr;
|
||||
this->ppOCR->Predict(&im, &res_ocr);
|
||||
if (res_ocr.boxes.size() > 0) {
|
||||
for (int n = 0; n < res_ocr.boxes.size(); n++) { // number of detections
|
||||
cv::Point rook_points[4];
|
||||
rook_points[0] = cv::Point(static_cast<int>(res_ocr.boxes[n][0]), static_cast<int>(res_ocr.boxes[n][1]));
|
||||
rook_points[1] = cv::Point(static_cast<int>(res_ocr.boxes[n][2]), static_cast<int>(res_ocr.boxes[n][3]));
|
||||
rook_points[2] = cv::Point(static_cast<int>(res_ocr.boxes[n][4]), static_cast<int>(res_ocr.boxes[n][5]));
|
||||
rook_points[3] = cv::Point(static_cast<int>(res_ocr.boxes[n][6]), static_cast<int>(res_ocr.boxes[n][6]));
|
||||
|
||||
ANSCENTER::OCRObject ocrObject;
|
||||
ocrObject.box.x = rook_points[0].x;
|
||||
ocrObject.box.y = rook_points[0].y;
|
||||
ocrObject.box.width = rook_points[1].x - rook_points[0].x;
|
||||
ocrObject.box.height = rook_points[2].y - rook_points[1].y;
|
||||
|
||||
|
||||
ocrObject.classId = res_ocr.cls_labels[n];
|
||||
ocrObject.confidence = res_ocr.rec_scores[n];
|
||||
ocrObject.className = res_ocr.text[n];
|
||||
std::string extraInformation = "cls label:" +
|
||||
std::to_string(res_ocr.cls_labels[n]) +
|
||||
";" +
|
||||
"cls score:" + std::to_string(res_ocr.cls_scores[n]);
|
||||
ocrObject.extraInfo = extraInformation;
|
||||
ocrObject.cameraId = cameraId;
|
||||
OCRObjects.push_back(ocrObject);
|
||||
}
|
||||
}
|
||||
im.release();
|
||||
return OCRObjects;
|
||||
}
|
||||
return OCRObjects;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSOCR::RunInference", e.what(), __FILE__, __LINE__);
|
||||
return OCRObjects;
|
||||
}
|
||||
}
|
||||
ANSOCR::~ANSOCR() {
|
||||
try {
|
||||
Destroy();
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSOCR::~ANSOCR()", e.what(), __FILE__, __LINE__);
|
||||
}
|
||||
this->ANSOCRBase::~ANSOCRBase();
|
||||
}
|
||||
bool ANSOCR::Destroy() {
|
||||
try {
|
||||
classifier_.ReleaseReusedBuffer();
|
||||
detector_.ReleaseReusedBuffer();
|
||||
recognizer_.ReleaseReusedBuffer();
|
||||
if(ppOCR)this->ppOCR.reset();
|
||||
return true;
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
this->_logger.LogFatal("ANSOCR::Destroy", e.what(), __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user