Files
ANSCORE/ANSOCR/ANSOdOCR.cpp

423 lines
15 KiB
C++

#include "ANSOdOCR.h"
#include "Utility.h"
#include <opencv2/highgui.hpp>
#include <omp.h>
#include "ANSYOLOOD.h"
#include "ANSTENSORRTOD.h"
namespace ANSCENTER {
bool ANSODOCR::Initialize(std::string licenseKey, OCRModelConfig modelConfig,
const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) {
try
{
bool result = ANSOCRBase::Init(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, engineMode);
if (!result) return false;
// Check if detector and ocr model files exist
_modelConfig.detectionModelFile = CreateFilePath(_modelFolder, "lpd.onnx");
_modelConfig.recognizerModelFile = CreateFilePath(_modelFolder, "ocr.onnx");
if (!FileExist(_modelConfig.detectionModelFile)) {
this->_logger.LogFatal("ANSODOCR::Initialize", "Invalid detector model file", __FILE__, __LINE__);
_licenseValid = false;
return false;
}
if (!FileExist(_modelConfig.recognizerModelFile)) {
this->_logger.LogFatal("ANSODOCR::Initialize", "Invalid OCR recognizer model file", __FILE__, __LINE__);
_licenseValid = false;
return false;
}
try {
// Check the hardware type
engineType = ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();// EngineType::CPU;//
if (engineType == ANSCENTER::EngineType::NVIDIA_GPU) {
// Use TensorRT YoloV11
this->_lpDetector = std::make_unique <ANSCENTER::TENSORRTOD>();
this->_ocrDetector = std::make_unique<ANSCENTER::TENSORRTOD>();
}
else {
// Use ONNX YoloV11
this->_lpDetector = std::make_unique <ANSCENTER::YOLOOD>();
this->_ocrDetector = std::make_unique<ANSCENTER::YOLOOD>();
}
// Run initialization
_ocrModelConfig.detectionScoreThreshold = modelConfig.clsThreshold;
_lpdmodelConfig.detectionScoreThreshold = modelConfig.detectionBoxThreshold;
if (_lpdmodelConfig.detectionScoreThreshold < 0.25)_lpdmodelConfig.detectionScoreThreshold = 0.25;
if (_ocrModelConfig.detectionScoreThreshold < 0.25)_ocrModelConfig.detectionScoreThreshold = 0.25;
_lpdmodelConfig.modelConfThreshold = 0.5;
_lpdmodelConfig.modelMNSThreshold = 0.5;
_ocrModelConfig.modelConfThreshold = 0.5;
_ocrModelConfig.modelMNSThreshold = 0.5;
if (!this->_lpDetector->LoadModelFromFolder("", _lpdmodelConfig, "lpd", "lpd.names", _modelFolder, _lpdLabels)) {
return false;
}
if (!this->_ocrDetector->LoadModelFromFolder("", _ocrModelConfig, "ocr", "ocr.names", _modelFolder, _ocrLabels)) {
return false;
}
return _isInitialized;
}
catch (...) {
_licenseValid = false;
this->_logger.LogFatal("ANSODOCR::Initialize", "Failed to create OCR objects", __FILE__, __LINE__);
return false;
}
}
catch (std::exception& e) {
// Handle any other exception that occurs during initialization
this->_logger.LogFatal("ANSODOCR::Initialize", e.what(), __FILE__, __LINE__);
_licenseValid = false;
return false;
}
}
std::vector<ANSCENTER::OCRObject> ANSODOCR::RunInference(const cv::Mat& input) {
std::vector<ANSCENTER::OCRObject> output;
if (input.empty()) return output;
if ((input.cols < 10) || (input.rows < 10)) return output;
return RunInference(input, "OCRCPUCAM");
}
std::vector<ANSCENTER::OCRObject> ANSODOCR::RunInference(const cv::Mat& input, std::string cameraId) {
std::lock_guard<std::mutex> lock(_mutex);
std::vector<ANSCENTER::OCRObject> OCRObjects;
OCRObjects.clear();
if (!_licenseValid) {
this->_logger.LogError("ANSODOCR::RunInference", "Invalid License", __FILE__, __LINE__);
return OCRObjects;
}
if (!_isInitialized) {
this->_logger.LogError("ANSODOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
return OCRObjects;
}
if (input.empty() || input.cols < 10 || input.rows < 10) {
this->_logger.LogError("ANSODOCR::RunInference", "Input image is invalid or too small", __FILE__, __LINE__);
return OCRObjects;
}
try {
// Convert grayscale to BGR if necessary
cv::Mat im;
if (input.channels() == 1) {
cv::cvtColor(input, im, cv::COLOR_GRAY2BGR);
}
else {
im = input.clone();
}
// Check ppocr instance
if (!this->_ocrDetector) {
this->_logger.LogFatal("ANSODOCR::RunInference", "PPOCR instance is null", __FILE__, __LINE__);
return OCRObjects;
}
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(im);
for (size_t n = 0; n < res_ocr.size(); ++n) {
if (res_ocr[n].box.size() != 4) {
this->_logger.LogError("ANSODOCR::RunInference", "Invalid OCR box size", __FILE__, __LINE__);
continue;
}
cv::Point rook_points[4];
for (size_t m = 0; m < 4; ++m) {
rook_points[m] = cv::Point(
static_cast<int>(res_ocr[n].box[m][0]),
static_cast<int>(res_ocr[n].box[m][1])
);
}
int x = std::max(0, rook_points[0].x);
int y = std::max(0, rook_points[0].y);
int width = rook_points[1].x - rook_points[0].x;
int height = rook_points[2].y - rook_points[1].y;
// Clamp width and height
width = std::max(1, std::min(im.cols - x, width));
height = std::max(1, std::min(im.rows - y, height));
// Skip invalid boxes
if (width <= 1 || height <= 1) {
this->_logger.LogError("ANSODOCR::RunInference", "Invalid bounding box dimension", __FILE__, __LINE__);
continue;
}
ANSCENTER::OCRObject ocrObject;
ocrObject.box = cv::Rect(x, y, width, height);
ocrObject.classId = res_ocr[n].cls_label;
ocrObject.confidence = res_ocr[n].score;
ocrObject.className = res_ocr[n].text;
ocrObject.extraInfo = "cls label: " + std::to_string(res_ocr[n].cls_label)
+ "; cls score: " + std::to_string(res_ocr[n].cls_score);
ocrObject.cameraId = cameraId;
OCRObjects.push_back(ocrObject);
}
im.release();
}
catch (const std::exception& e) {
this->_logger.LogFatal("ANSODOCR::RunInference", e.what(), __FILE__, __LINE__);
}
catch (...) {
this->_logger.LogFatal("ANSODOCR::RunInference", "Unknown exception occurred", __FILE__, __LINE__);
}
return OCRObjects;
}
std::vector<ANSCENTER::OCRObject> ANSODOCR::RunInference(const cv::Mat& input, std::vector<cv::Rect> Bbox) {
std::lock_guard<std::mutex> lock(_mutex);
std::vector<ANSCENTER::OCRObject> OCRObjects;
OCRObjects.clear();
if (!_licenseValid) {
this->_logger.LogError("ANSODOCR::RunInference", "Invalid License", __FILE__, __LINE__);
return OCRObjects;
}
if (!_isInitialized) {
this->_logger.LogError("ANSODOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
return OCRObjects;
}
try {
if (input.empty()) {
this->_logger.LogError("ANSODOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
return OCRObjects;
}
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
if (Bbox.size() > 0) {
// Convert grayscale images to 3-channel BGR if needed
cv::Mat frame;
if (input.channels() == 1) {
cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR);
}
else {
frame = input.clone();
}
int fWidth = frame.cols;
int fHeight = frame.rows;
for (std::vector<cv::Rect>::iterator it = Bbox.begin(); it != Bbox.end(); it++) {
int x1, y1, x2, y2, width, height;
x1 = (*it).x;
y1 = (*it).y;
x2 = (*it).x + (*it).width;
y2 = (*it).y + (*it).height;
x1 = std::max(0, x1);
y1 = std::max(0, y1);
width = std::min(fWidth - x1, (*it).width);
height = std::min(fHeight - y1, (*it).height);
if ((x1 >= 0) && (y1 >= 0) && (width >= 5) && (height >= 5))
{
// Get cropped objects
cv::Rect objectPos(x1, y1, width, height);
cv::Mat croppedObject = frame(objectPos);
std::vector<ANSCENTER::OCRObject> OCRTempObjects;
OCRTempObjects.clear();
OCRTempObjects = RunInference(croppedObject);
if (OCRTempObjects.size() > 0) {
for (int i = 0; i < OCRTempObjects.size(); i++) {
ANSCENTER::OCRObject detectionObject;
detectionObject = OCRTempObjects[i];
// Correct bounding box position as the croppedObject x,y will be orignial (0,0)
detectionObject.box.x = OCRTempObjects[i].box.x + x1;
detectionObject.box.y = OCRTempObjects[i].box.y + y1;
detectionObject.box.width = OCRTempObjects[i].box.width;
detectionObject.box.height = OCRTempObjects[i].box.height;
detectionObject.box.x = std::max(0, detectionObject.box.x);
detectionObject.box.y = std::max(0, detectionObject.box.y);
detectionObject.box.width = std::min(fWidth - detectionObject.box.x, detectionObject.box.width);
detectionObject.box.height = std::min(fHeight - detectionObject.box.y, detectionObject.box.height);
OCRObjects.push_back(detectionObject);
}
}
}
}
}
else {
// Convert grayscale images to 3-channel BGR if needed
cv::Mat frame;
if (input.channels() == 1) {
cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR);
}
else {
frame = input.clone();
}
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(frame);
if (res_ocr.size() > 0) {
for (int n = 0; n < res_ocr.size(); n++) { // number of detections
cv::Point rook_points[4];
for (int m = 0; m < res_ocr[n].box.size(); m++) {
rook_points[m] =
cv::Point(int(res_ocr[n].box[m][0]), int(res_ocr[n].box[m][1]));
}
ANSCENTER::OCRObject ocrObject;
ocrObject.box.x = rook_points[0].x;
ocrObject.box.y = rook_points[0].y;
ocrObject.box.width = rook_points[1].x - rook_points[0].x;
ocrObject.box.height = rook_points[2].y - rook_points[1].y;
ocrObject.box.x = std::max(0, ocrObject.box.x);
ocrObject.box.y = std::max(0, ocrObject.box.y);
ocrObject.box.width = std::min(frame.cols - ocrObject.box.x, ocrObject.box.width);
ocrObject.box.height = std::min(frame.rows - ocrObject.box.y, ocrObject.box.height);
ocrObject.classId = res_ocr[n].cls_label;
ocrObject.confidence = res_ocr[n].score;
ocrObject.className = res_ocr[n].text;
std::string extraInformation = "cls label:" +
std::to_string(res_ocr[n].cls_label) +
";" +
"cls score:" + std::to_string(res_ocr[n].cls_score);
ocrObject.extraInfo = extraInformation;
// Add extra information for cls score cls label
OCRObjects.push_back(ocrObject);
}
}
frame.release();
return OCRObjects;
}
return OCRObjects;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODOCR::RunInference", e.what(), __FILE__, __LINE__);
return OCRObjects;
}
}
std::vector<ANSCENTER::OCRObject> ANSODOCR::RunInference(const cv::Mat& input, std::vector<cv::Rect> Bbox, std::string cameraId) {
std::lock_guard<std::mutex> lock(_mutex);
std::vector<ANSCENTER::OCRObject> OCRObjects;
OCRObjects.clear();
if (!_licenseValid) {
this->_logger.LogError("ANSODOCR::RunInference", "Invalid License", __FILE__, __LINE__);
return OCRObjects;
}
if (!_isInitialized) {
this->_logger.LogError("ANSODOCR::RunInference", "Model is not initialized", __FILE__, __LINE__);
return OCRObjects;
}
try {
if (input.empty()) {
this->_logger.LogError("ANSODOCR::RunInference", "Input image is empty", __FILE__, __LINE__);
return OCRObjects;
}
if ((input.cols < 10) || (input.rows < 10)) return OCRObjects;
if (Bbox.size() > 0) {
// Convert grayscale images to 3-channel BGR if needed
cv::Mat frame;
if (input.channels() == 1) {
cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR);
}
else {
frame = input.clone();
}
int fWidth = frame.cols;
int fHeight = frame.rows;
for (std::vector<cv::Rect>::iterator it = Bbox.begin(); it != Bbox.end(); it++) {
int x1, y1, x2, y2, width, height;
x1 = (*it).x;
y1 = (*it).y;
x2 = (*it).x + (*it).width;
y2 = (*it).y + (*it).height;
x1 = std::max(0, x1);
y1 = std::max(0, y1);
width = std::min(fWidth - x1, (*it).width);
height = std::min(fHeight - y1, (*it).height);
if ((x1 >= 0) && (y1 >= 0) && (width >= 5) && (height >= 5)) {
// Get cropped objects
cv::Rect objectPos(x1, y1, width, height);
cv::Mat croppedObject = frame(objectPos);
std::vector<ANSCENTER::OCRObject> OCRTempObjects;
OCRTempObjects.clear();
OCRTempObjects = RunInference(croppedObject);
if (OCRTempObjects.size() > 0) {
for (int i = 0; i < OCRTempObjects.size(); i++) {
ANSCENTER::OCRObject detectionObject;
detectionObject = OCRTempObjects[i];
// Correct bounding box position as the croppedObject x,y will be orignial (0,0)
detectionObject.box.x = OCRTempObjects[i].box.x + x1;
detectionObject.box.y = OCRTempObjects[i].box.y + y1;
detectionObject.box.width = OCRTempObjects[i].box.width;
detectionObject.box.height = OCRTempObjects[i].box.height;
detectionObject.box.x = std::max(0, detectionObject.box.x);
detectionObject.box.y = std::max(0, detectionObject.box.y);
detectionObject.box.width = std::min(fWidth - detectionObject.box.x, detectionObject.box.width);
detectionObject.box.height = std::min(fHeight - detectionObject.box.y, detectionObject.box.height);
detectionObject.cameraId = cameraId;
OCRObjects.push_back(detectionObject);
}
}
}
}
}
else {
auto im = input.clone();
std::vector<PaddleOCR::OCRPredictResult> res_ocr = ppocr->ocr(im);
if (res_ocr.size() > 0) {
for (int n = 0; n < res_ocr.size(); n++) { // number of detections
cv::Point rook_points[4];
for (int m = 0; m < res_ocr[n].box.size(); m++) {
rook_points[m] =
cv::Point(int(res_ocr[n].box[m][0]), int(res_ocr[n].box[m][1]));
}
ANSCENTER::OCRObject ocrObject;
ocrObject.box.x = rook_points[0].x;
ocrObject.box.y = rook_points[0].y;
ocrObject.box.width = rook_points[1].x - rook_points[0].x;
ocrObject.box.height = rook_points[2].y - rook_points[1].y;
ocrObject.box.x = std::max(0, ocrObject.box.x);
ocrObject.box.y = std::max(0, ocrObject.box.y);
ocrObject.box.width = std::min(im.cols - ocrObject.box.x, ocrObject.box.width);
ocrObject.box.height = std::min(im.rows - ocrObject.box.y, ocrObject.box.height);
ocrObject.classId = res_ocr[n].cls_label;
ocrObject.confidence = res_ocr[n].score;
ocrObject.className = res_ocr[n].text;
std::string extraInformation = "cls label:" +
std::to_string(res_ocr[n].cls_label) +
";" +
"cls score:" + std::to_string(res_ocr[n].cls_score);
ocrObject.extraInfo = extraInformation;
ocrObject.cameraId = cameraId;
// Add extra information for cls score cls label
OCRObjects.push_back(ocrObject);
}
}
im.release();
return OCRObjects;
}
return OCRObjects;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODOCR::RunInference", e.what(), __FILE__, __LINE__);
return OCRObjects;
}
}
ANSODOCR::~ANSODOCR() {
try {
Destroy();
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODOCR::~ANSODOCR()", e.what(), __FILE__, __LINE__);
}
this->ANSOCRBase::~ANSOCRBase();
}
bool ANSODOCR::Destroy() {
try {
if (this->_ocrDetector) this->_ocrDetector.reset();
if (this->_lpDetector) this->_lpDetector.reset();
return true;
}
catch (std::exception& e) {
this->_logger.LogFatal("ANSODOCR::Destroy", e.what(), __FILE__, __LINE__);
return false;
}
}
}