423 lines
15 KiB
C++
423 lines
15 KiB
C++
#include "ANSOdOCR.h"
|
|
#include "Utility.h"
|
|
#include <opencv2/highgui.hpp>
|
|
#include <omp.h>
|
|
#include "ANSYOLOOD.h"
|
|
#include "ANSTENSORRTOD.h"
|
|
namespace ANSCENTER {
|
|
bool ANSODOCR::Initialize(std::string licenseKey, OCRModelConfig modelConfig,
|
|
const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) {
|
|
try
|
|
{
|
|
bool result = ANSOCRBase::Init(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, engineMode);
|
|
if (!result) return false;
|
|
|
|
// Check if detector and ocr model files exist
|
|
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "lpd.onnx");
|
|
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "ocr.onnx");
|
|
|
|
if (!FileExist(_modelConfig.detectionModelFile)) {
|
|
this->_logger.LogFatal("ANSODOCR::Initialize", "Invalid detector model file", __FILE__, __LINE__);
|
|
_licenseValid = false;
|
|
return false;
|
|
}
|
|
|
|
if (!FileExist(_modelConfig.recognizerModelFile)) {
|
|
this->_logger.LogFatal("ANSODOCR::Initialize", "Invalid OCR recognizer model file", __FILE__, __LINE__);
|
|
_licenseValid = false;
|
|
return false;
|
|
}
|
|
try {
|
|
|
|
// Check the hardware type
|
|
engineType = ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();// EngineType::CPU;//
|
|
if (engineType == ANSCENTER::EngineType::NVIDIA_GPU) {
|
|
// Use TensorRT YoloV11
|
|
this->_lpDetector = std::make_unique <ANSCENTER::TENSORRTOD>();
|
|
this->_ocrDetector = std::make_unique<ANSCENTER::TENSORRTOD>();
|
|
|
|
}
|
|
else {
|
|
// Use ONNX YoloV11
|
|
this->_lpDetector = std::make_unique <ANSCENTER::YOLOOD>();
|
|
this->_ocrDetector = std::make_unique<ANSCENTER::YOLOOD>();
|
|
}
|
|
|
|
// Run initialization
|
|
_ocrModelConfig.detectionScoreThreshold = modelConfig.clsThreshold;
|
|
_lpdmodelConfig.detectionScoreThreshold = modelConfig.detectionBoxThreshold;
|
|
if (_lpdmodelConfig.detectionScoreThreshold < 0.25)_lpdmodelConfig.detectionScoreThreshold = 0.25;
|
|
if (_ocrModelConfig.detectionScoreThreshold < 0.25)_ocrModelConfig.detectionScoreThreshold = 0.25;
|
|
_lpdmodelConfig.modelConfThreshold = 0.5;
|
|
_lpdmodelConfig.modelMNSThreshold = 0.5;
|
|
_ocrModelConfig.modelConfThreshold = 0.5;
|
|
_ocrModelConfig.modelMNSThreshold = 0.5;
|
|
|
|
if (!this->_lpDetector->LoadModelFromFolder("", _lpdmodelConfig, "lpd", "lpd.names", _modelFolder, _lpdLabels)) {
|
|
return false;
|
|
}
|
|
|
|
if (!this->_ocrDetector->LoadModelFromFolder("", _ocrModelConfig, "ocr", "ocr.names", _modelFolder, _ocrLabels)) {
|
|
return false;
|
|
}
|
|
return _isInitialized;
|
|
}
|
|
catch (...) {
|
|
_licenseValid = false;
|
|
this->_logger.LogFatal("ANSODOCR::Initialize", "Failed to create OCR objects", __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
catch (std::exception& e) {
|
|
// Handle any other exception that occurs during initialization
|
|
this->_logger.LogFatal("ANSODOCR::Initialize", e.what(), __FILE__, __LINE__);
|
|
_licenseValid = false;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
std::vector<ANSCENTER::OCRObject> ANSODOCR::RunInference(const cv::Mat& input) {
|
|
std::vector<ANSCENTER::OCRObject> output;
|
|
if (input.empty()) return output;
|
|
if ((input.cols < 10) || (input.rows < 10)) return output;
|
|
return RunInference(input, "OCRCPUCAM");
|
|
}
|
|
|
|
std::vector<ANSCENTER::OCRObject> ANSODOCR::RunInference(const cv::Mat& input, std::string cameraId) {
|
|
std::lock_guard<std::mutex> lock(_mutex);
|
|
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
|
OCRObjects.clear();
|
|
|
|
if (!_licenseValid) {
|
|
this->_logger.LogError("ANSODOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
|
return OCRObjects;
|
|
}
|
|
if (!_isInitialized) {
|
|
this->_logger.LogError("ANSODOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
|
return OCRObjects;
|
|
}
|
|
if (input.empty() || input.cols < 10 || input.rows < 10) {
|
|
this->_logger.LogError("ANSODOCR::RunInference", "Input image is invalid or too small", __FILE__, __LINE__);
|
|
return OCRObjects;
|
|
}
|
|
|
|
try {
|
|
// Convert grayscale to BGR if necessary
|
|
cv::Mat im;
|
|
if (input.channels() == 1) {
|
|
cv::cvtColor(input, im, cv::COLOR_GRAY2BGR);
|
|
}
|
|
else {
|
|
im = input.clone();
|
|
}
|
|
|
|
// Check ppocr instance
|
|
if (!this->_ocrDetector) {
|
|
this->_logger.LogFatal("ANSODOCR::RunInference", "PPOCR instance is null", __FILE__, __LINE__);
|
|
return OCRObjects;
|
|
}
|
|
|
|
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(im);
|
|
|
|
for (size_t n = 0; n < res_ocr.size(); ++n) {
|
|
if (res_ocr[n].box.size() != 4) {
|
|
this->_logger.LogError("ANSODOCR::RunInference", "Invalid OCR box size", __FILE__, __LINE__);
|
|
continue;
|
|
}
|
|
|
|
cv::Point rook_points[4];
|
|
for (size_t m = 0; m < 4; ++m) {
|
|
rook_points[m] = cv::Point(
|
|
static_cast<int>(res_ocr[n].box[m][0]),
|
|
static_cast<int>(res_ocr[n].box[m][1])
|
|
);
|
|
}
|
|
|
|
int x = std::max(0, rook_points[0].x);
|
|
int y = std::max(0, rook_points[0].y);
|
|
int width = rook_points[1].x - rook_points[0].x;
|
|
int height = rook_points[2].y - rook_points[1].y;
|
|
|
|
// Clamp width and height
|
|
width = std::max(1, std::min(im.cols - x, width));
|
|
height = std::max(1, std::min(im.rows - y, height));
|
|
|
|
// Skip invalid boxes
|
|
if (width <= 1 || height <= 1) {
|
|
this->_logger.LogError("ANSODOCR::RunInference", "Invalid bounding box dimension", __FILE__, __LINE__);
|
|
continue;
|
|
}
|
|
|
|
ANSCENTER::OCRObject ocrObject;
|
|
ocrObject.box = cv::Rect(x, y, width, height);
|
|
ocrObject.classId = res_ocr[n].cls_label;
|
|
ocrObject.confidence = res_ocr[n].score;
|
|
ocrObject.className = res_ocr[n].text;
|
|
ocrObject.extraInfo = "cls label: " + std::to_string(res_ocr[n].cls_label)
|
|
+ "; cls score: " + std::to_string(res_ocr[n].cls_score);
|
|
ocrObject.cameraId = cameraId;
|
|
|
|
OCRObjects.push_back(ocrObject);
|
|
}
|
|
|
|
im.release();
|
|
}
|
|
catch (const std::exception& e) {
|
|
this->_logger.LogFatal("ANSODOCR::RunInference", e.what(), __FILE__, __LINE__);
|
|
}
|
|
catch (...) {
|
|
this->_logger.LogFatal("ANSODOCR::RunInference", "Unknown exception occurred", __FILE__, __LINE__);
|
|
}
|
|
|
|
return OCRObjects;
|
|
}
|
|
|
|
|
|
std::vector<ANSCENTER::OCRObject> ANSODOCR::RunInference(const cv::Mat& input, std::vector<cv::Rect> Bbox) {
|
|
std::lock_guard<std::mutex> lock(_mutex);
|
|
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
|
OCRObjects.clear();
|
|
if (!_licenseValid) {
|
|
this->_logger.LogError("ANSODOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
|
return OCRObjects;
|
|
}
|
|
if (!_isInitialized) {
|
|
this->_logger.LogError("ANSODOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
|
return OCRObjects;
|
|
}
|
|
try {
|
|
if (input.empty()) {
|
|
this->_logger.LogError("ANSODOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
|
|
return OCRObjects;
|
|
}
|
|
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
|
|
if (Bbox.size() > 0) {
|
|
// Convert grayscale images to 3-channel BGR if needed
|
|
cv::Mat frame;
|
|
if (input.channels() == 1) {
|
|
cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR);
|
|
}
|
|
else {
|
|
frame = input.clone();
|
|
}
|
|
int fWidth = frame.cols;
|
|
int fHeight = frame.rows;
|
|
for (std::vector<cv::Rect>::iterator it = Bbox.begin(); it != Bbox.end(); it++) {
|
|
int x1, y1, x2, y2, width, height;
|
|
x1 = (*it).x;
|
|
y1 = (*it).y;
|
|
x2 = (*it).x + (*it).width;
|
|
y2 = (*it).y + (*it).height;
|
|
x1 = std::max(0, x1);
|
|
y1 = std::max(0, y1);
|
|
width = std::min(fWidth - x1, (*it).width);
|
|
height = std::min(fHeight - y1, (*it).height);
|
|
if ((x1 >= 0) && (y1 >= 0) && (width >= 5) && (height >= 5))
|
|
{
|
|
// Get cropped objects
|
|
cv::Rect objectPos(x1, y1, width, height);
|
|
cv::Mat croppedObject = frame(objectPos);
|
|
std::vector<ANSCENTER::OCRObject> OCRTempObjects;
|
|
OCRTempObjects.clear();
|
|
OCRTempObjects = RunInference(croppedObject);
|
|
if (OCRTempObjects.size() > 0) {
|
|
for (int i = 0; i < OCRTempObjects.size(); i++) {
|
|
ANSCENTER::OCRObject detectionObject;
|
|
detectionObject = OCRTempObjects[i];
|
|
// Correct bounding box position as the croppedObject x,y will be orignial (0,0)
|
|
detectionObject.box.x = OCRTempObjects[i].box.x + x1;
|
|
detectionObject.box.y = OCRTempObjects[i].box.y + y1;
|
|
detectionObject.box.width = OCRTempObjects[i].box.width;
|
|
detectionObject.box.height = OCRTempObjects[i].box.height;
|
|
|
|
detectionObject.box.x = std::max(0, detectionObject.box.x);
|
|
detectionObject.box.y = std::max(0, detectionObject.box.y);
|
|
detectionObject.box.width = std::min(fWidth - detectionObject.box.x, detectionObject.box.width);
|
|
detectionObject.box.height = std::min(fHeight - detectionObject.box.y, detectionObject.box.height);
|
|
|
|
OCRObjects.push_back(detectionObject);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
// Convert grayscale images to 3-channel BGR if needed
|
|
cv::Mat frame;
|
|
if (input.channels() == 1) {
|
|
cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR);
|
|
}
|
|
else {
|
|
frame = input.clone();
|
|
}
|
|
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(frame);
|
|
if (res_ocr.size() > 0) {
|
|
for (int n = 0; n < res_ocr.size(); n++) { // number of detections
|
|
cv::Point rook_points[4];
|
|
for (int m = 0; m < res_ocr[n].box.size(); m++) {
|
|
rook_points[m] =
|
|
cv::Point(int(res_ocr[n].box[m][0]), int(res_ocr[n].box[m][1]));
|
|
}
|
|
ANSCENTER::OCRObject ocrObject;
|
|
ocrObject.box.x = rook_points[0].x;
|
|
ocrObject.box.y = rook_points[0].y;
|
|
ocrObject.box.width = rook_points[1].x - rook_points[0].x;
|
|
ocrObject.box.height = rook_points[2].y - rook_points[1].y;
|
|
|
|
ocrObject.box.x = std::max(0, ocrObject.box.x);
|
|
ocrObject.box.y = std::max(0, ocrObject.box.y);
|
|
ocrObject.box.width = std::min(frame.cols - ocrObject.box.x, ocrObject.box.width);
|
|
ocrObject.box.height = std::min(frame.rows - ocrObject.box.y, ocrObject.box.height);
|
|
|
|
ocrObject.classId = res_ocr[n].cls_label;
|
|
ocrObject.confidence = res_ocr[n].score;
|
|
ocrObject.className = res_ocr[n].text;
|
|
std::string extraInformation = "cls label:" +
|
|
std::to_string(res_ocr[n].cls_label) +
|
|
";" +
|
|
"cls score:" + std::to_string(res_ocr[n].cls_score);
|
|
ocrObject.extraInfo = extraInformation;
|
|
// Add extra information for cls score cls label
|
|
OCRObjects.push_back(ocrObject);
|
|
}
|
|
}
|
|
frame.release();
|
|
return OCRObjects;
|
|
}
|
|
return OCRObjects;
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("ANSODOCR::RunInference", e.what(), __FILE__, __LINE__);
|
|
return OCRObjects;
|
|
}
|
|
}
|
|
|
|
std::vector<ANSCENTER::OCRObject> ANSODOCR::RunInference(const cv::Mat& input, std::vector<cv::Rect> Bbox, std::string cameraId) {
|
|
std::lock_guard<std::mutex> lock(_mutex);
|
|
std::vector<ANSCENTER::OCRObject> OCRObjects;
|
|
OCRObjects.clear();
|
|
if (!_licenseValid) {
|
|
this->_logger.LogError("ANSODOCR::RunInference", "Invalid License", __FILE__, __LINE__);
|
|
return OCRObjects;
|
|
}
|
|
if (!_isInitialized) {
|
|
this->_logger.LogError("ANSODOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
|
|
return OCRObjects;
|
|
}
|
|
try {
|
|
if (input.empty()) {
|
|
this->_logger.LogError("ANSODOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
|
|
return OCRObjects;
|
|
}
|
|
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
|
|
if (Bbox.size() > 0) {
|
|
// Convert grayscale images to 3-channel BGR if needed
|
|
cv::Mat frame;
|
|
if (input.channels() == 1) {
|
|
cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR);
|
|
}
|
|
else {
|
|
frame = input.clone();
|
|
}
|
|
int fWidth = frame.cols;
|
|
int fHeight = frame.rows;
|
|
for (std::vector<cv::Rect>::iterator it = Bbox.begin(); it != Bbox.end(); it++) {
|
|
int x1, y1, x2, y2, width, height;
|
|
x1 = (*it).x;
|
|
y1 = (*it).y;
|
|
x2 = (*it).x + (*it).width;
|
|
y2 = (*it).y + (*it).height;
|
|
x1 = std::max(0, x1);
|
|
y1 = std::max(0, y1);
|
|
width = std::min(fWidth - x1, (*it).width);
|
|
height = std::min(fHeight - y1, (*it).height);
|
|
if ((x1 >= 0) && (y1 >= 0) && (width >= 5) && (height >= 5)) {
|
|
// Get cropped objects
|
|
cv::Rect objectPos(x1, y1, width, height);
|
|
cv::Mat croppedObject = frame(objectPos);
|
|
std::vector<ANSCENTER::OCRObject> OCRTempObjects;
|
|
OCRTempObjects.clear();
|
|
OCRTempObjects = RunInference(croppedObject);
|
|
if (OCRTempObjects.size() > 0) {
|
|
for (int i = 0; i < OCRTempObjects.size(); i++) {
|
|
ANSCENTER::OCRObject detectionObject;
|
|
detectionObject = OCRTempObjects[i];
|
|
// Correct bounding box position as the croppedObject x,y will be orignial (0,0)
|
|
detectionObject.box.x = OCRTempObjects[i].box.x + x1;
|
|
detectionObject.box.y = OCRTempObjects[i].box.y + y1;
|
|
detectionObject.box.width = OCRTempObjects[i].box.width;
|
|
detectionObject.box.height = OCRTempObjects[i].box.height;
|
|
detectionObject.box.x = std::max(0, detectionObject.box.x);
|
|
detectionObject.box.y = std::max(0, detectionObject.box.y);
|
|
detectionObject.box.width = std::min(fWidth - detectionObject.box.x, detectionObject.box.width);
|
|
detectionObject.box.height = std::min(fHeight - detectionObject.box.y, detectionObject.box.height);
|
|
detectionObject.cameraId = cameraId;
|
|
OCRObjects.push_back(detectionObject);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
auto im = input.clone();
|
|
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(im);
|
|
if (res_ocr.size() > 0) {
|
|
for (int n = 0; n < res_ocr.size(); n++) { // number of detections
|
|
cv::Point rook_points[4];
|
|
for (int m = 0; m < res_ocr[n].box.size(); m++) {
|
|
rook_points[m] =
|
|
cv::Point(int(res_ocr[n].box[m][0]), int(res_ocr[n].box[m][1]));
|
|
}
|
|
ANSCENTER::OCRObject ocrObject;
|
|
ocrObject.box.x = rook_points[0].x;
|
|
ocrObject.box.y = rook_points[0].y;
|
|
ocrObject.box.width = rook_points[1].x - rook_points[0].x;
|
|
ocrObject.box.height = rook_points[2].y - rook_points[1].y;
|
|
ocrObject.box.x = std::max(0, ocrObject.box.x);
|
|
ocrObject.box.y = std::max(0, ocrObject.box.y);
|
|
ocrObject.box.width = std::min(im.cols - ocrObject.box.x, ocrObject.box.width);
|
|
ocrObject.box.height = std::min(im.rows - ocrObject.box.y, ocrObject.box.height);
|
|
ocrObject.classId = res_ocr[n].cls_label;
|
|
ocrObject.confidence = res_ocr[n].score;
|
|
ocrObject.className = res_ocr[n].text;
|
|
std::string extraInformation = "cls label:" +
|
|
std::to_string(res_ocr[n].cls_label) +
|
|
";" +
|
|
"cls score:" + std::to_string(res_ocr[n].cls_score);
|
|
ocrObject.extraInfo = extraInformation;
|
|
ocrObject.cameraId = cameraId;
|
|
// Add extra information for cls score cls label
|
|
OCRObjects.push_back(ocrObject);
|
|
}
|
|
}
|
|
im.release();
|
|
return OCRObjects;
|
|
}
|
|
return OCRObjects;
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("ANSODOCR::RunInference", e.what(), __FILE__, __LINE__);
|
|
return OCRObjects;
|
|
}
|
|
}
|
|
ANSODOCR::~ANSODOCR() {
|
|
try {
|
|
Destroy();
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("ANSODOCR::~ANSODOCR()", e.what(), __FILE__, __LINE__);
|
|
}
|
|
this->ANSOCRBase::~ANSOCRBase();
|
|
}
|
|
bool ANSODOCR::Destroy() {
|
|
try {
|
|
if (this->_ocrDetector) this->_ocrDetector.reset();
|
|
if (this->_lpDetector) this->_lpDetector.reset();
|
|
return true;
|
|
}
|
|
catch (std::exception& e) {
|
|
this->_logger.LogFatal("ANSODOCR::Destroy", e.what(), __FILE__, __LINE__);
|
|
return false;
|
|
}
|
|
}
|
|
} |