#include "ANSOnnxOCR.h" #include "Utility.h" #include namespace ANSCENTER { bool ANSONNXOCR::Initialize(const std::string& licenseKey, OCRModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode) { try { bool result = ANSOCRBase::Initialize(licenseKey, modelConfig, modelZipFilePath, modelZipPassword, engineMode); if (!result) return false; // Validate detection model if (!FileExist(_modelConfig.detectionModelFile)) { this->_logger.LogFatal("ANSONNXOCR::Initialize", "Invalid detector model file: " + _modelConfig.detectionModelFile, __FILE__, __LINE__); _licenseValid = false; return false; } // Validate recognizer model if (!FileExist(_modelConfig.recognizerModelFile)) { this->_logger.LogFatal("ANSONNXOCR::Initialize", "Invalid recognizer model file: " + _modelConfig.recognizerModelFile, __FILE__, __LINE__); _licenseValid = false; return false; } // Classifier is optional - controlled by useCLS flag and file existence std::string clsModelPath; if (_modelConfig.useCLS) { clsModelPath = _modelConfig.clsModelFile; if (!clsModelPath.empty() && !FileExist(clsModelPath)) { this->_logger.LogWarn("ANSONNXOCR::Initialize", "Classifier model not found, skipping: " + clsModelPath, __FILE__, __LINE__); clsModelPath = ""; // Clear to skip classifier } } else { this->_logger.LogDebug("ANSONNXOCR::Initialize", "Classifier disabled (useCLS=false)", __FILE__, __LINE__); } try { // Configure engine parameters from modelConfig _engine->SetDetMaxSideLen(_modelConfig.limitSideLen); _engine->SetDetDbThresh(static_cast(_modelConfig.detectionDBThreshold)); _engine->SetDetBoxThresh(static_cast(_modelConfig.detectionBoxThreshold)); _engine->SetDetUnclipRatio(static_cast(_modelConfig.detectionDBUnclipRatio)); _engine->SetClsThresh(static_cast(_modelConfig.clsThreshold)); _engine->SetUseDilation(_modelConfig.useDilation); _isInitialized = _engine->Initialize( _modelConfig.detectionModelFile, clsModelPath, _modelConfig.recognizerModelFile, _modelConfig.recogizerCharDictionaryPath); return _isInitialized; } catch (const std::exception& e) { _licenseValid = false; this->_logger.LogFatal("ANSONNXOCR::Initialize", e.what(), __FILE__, __LINE__); return false; } catch (...) { _licenseValid = false; this->_logger.LogFatal("ANSONNXOCR::Initialize", "Failed to create ONNX OCR engine", __FILE__, __LINE__); return false; } } catch (std::exception& e) { this->_logger.LogFatal("ANSONNXOCR::Initialize", e.what(), __FILE__, __LINE__); _licenseValid = false; return false; } } std::vector ANSONNXOCR::RunInference(const cv::Mat& input) { std::vector output; if (input.empty()) return output; if ((input.cols < 10) || (input.rows < 10)) return output; return RunInference(input, "OCRONNXCAM"); } std::vector ANSONNXOCR::RunInference(const cv::Mat& input, const std::string& cameraId) { // No coarse _mutex — _engine->ocr() has its own internal lock std::vector OCRObjects; if (!_licenseValid) { this->_logger.LogError("ANSONNXOCR::RunInference", "Invalid License", __FILE__, __LINE__); return OCRObjects; } if (!_isInitialized) { this->_logger.LogError("ANSONNXOCR::RunInference", "Model is not initialized", __FILE__, __LINE__); return OCRObjects; } if (input.empty() || input.cols < 10 || input.rows < 10) { this->_logger.LogError("ANSONNXOCR::RunInference", "Input image is invalid or too small", __FILE__, __LINE__); return OCRObjects; } try { // Convert grayscale to BGR if necessary cv::Mat im; if (input.channels() == 1) { cv::cvtColor(input, im, cv::COLOR_GRAY2BGR); } else { im = input.clone(); } if (!_engine) { this->_logger.LogFatal("ANSONNXOCR::RunInference", "Engine instance is null", __FILE__, __LINE__); return OCRObjects; } std::vector res_ocr = _engine->ocr(im); for (size_t n = 0; n < res_ocr.size(); ++n) { if (res_ocr[n].box.size() != 4) { this->_logger.LogError("ANSONNXOCR::RunInference", "Invalid OCR box size", __FILE__, __LINE__); continue; } cv::Point rook_points[4]; for (size_t m = 0; m < 4; ++m) { rook_points[m] = cv::Point( static_cast(res_ocr[n].box[m][0]), static_cast(res_ocr[n].box[m][1]) ); } int x = std::max(0, rook_points[0].x); int y = std::max(0, rook_points[0].y); int width = rook_points[1].x - rook_points[0].x; int height = rook_points[2].y - rook_points[1].y; width = std::max(1, std::min(im.cols - x, width)); height = std::max(1, std::min(im.rows - y, height)); if (width <= 1 || height <= 1) { continue; } ANSCENTER::OCRObject ocrObject; ocrObject.box = cv::Rect(x, y, width, height); ocrObject.classId = res_ocr[n].cls_label; ocrObject.confidence = res_ocr[n].score; ocrObject.className = res_ocr[n].text; ocrObject.extraInfo = "cls label: " + std::to_string(res_ocr[n].cls_label) + "; cls score: " + std::to_string(res_ocr[n].cls_score); ocrObject.cameraId = cameraId; OCRObjects.push_back(ocrObject); } im.release(); } catch (const std::exception& e) { this->_logger.LogFatal("ANSONNXOCR::RunInference", e.what(), __FILE__, __LINE__); } catch (...) { this->_logger.LogFatal("ANSONNXOCR::RunInference", "Unknown exception occurred", __FILE__, __LINE__); } return OCRObjects; } std::vector ANSONNXOCR::RunInference(const cv::Mat& input, const std::vector& Bbox) { // No coarse _mutex — _engine->ocr() has its own internal lock std::vector OCRObjects; if (!_licenseValid) { this->_logger.LogError("ANSONNXOCR::RunInference", "Invalid License", __FILE__, __LINE__); return OCRObjects; } if (!_isInitialized) { this->_logger.LogError("ANSONNXOCR::RunInference", "Model is not initialized", __FILE__, __LINE__); return OCRObjects; } try { if (input.empty()) { this->_logger.LogError("ANSONNXOCR::RunInference", "Input image is empty", __FILE__, __LINE__); return OCRObjects; } if ((input.cols < 10) || (input.rows < 10)) return OCRObjects; if (Bbox.size() > 0) { cv::Mat frame; if (input.channels() == 1) { cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR); } else { frame = input.clone(); } int fWidth = frame.cols; int fHeight = frame.rows; for (auto it = Bbox.begin(); it != Bbox.end(); it++) { int x1 = std::max(0, it->x); int y1 = std::max(0, it->y); int width = std::min(fWidth - x1, it->width); int height = std::min(fHeight - y1, it->height); if (x1 >= 0 && y1 >= 0 && width >= 5 && height >= 5) { cv::Rect objectPos(x1, y1, width, height); cv::Mat croppedObject = frame(objectPos); std::vector tempObjects = RunInference(croppedObject); for (size_t i = 0; i < tempObjects.size(); i++) { ANSCENTER::OCRObject detObj = tempObjects[i]; detObj.box.x = tempObjects[i].box.x + x1; detObj.box.y = tempObjects[i].box.y + y1; detObj.box.x = std::max(0, detObj.box.x); detObj.box.y = std::max(0, detObj.box.y); detObj.box.width = std::min(fWidth - detObj.box.x, detObj.box.width); detObj.box.height = std::min(fHeight - detObj.box.y, detObj.box.height); OCRObjects.push_back(detObj); } } } } else { cv::Mat frame; if (input.channels() == 1) { cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR); } else { frame = input.clone(); } std::vector res_ocr = _engine->ocr(frame); for (size_t n = 0; n < res_ocr.size(); n++) { if (res_ocr[n].box.size() != 4) continue; cv::Point rook_points[4]; for (size_t m = 0; m < res_ocr[n].box.size(); m++) { rook_points[m] = cv::Point( static_cast(res_ocr[n].box[m][0]), static_cast(res_ocr[n].box[m][1])); } ANSCENTER::OCRObject ocrObject; ocrObject.box.x = rook_points[0].x; ocrObject.box.y = rook_points[0].y; ocrObject.box.width = rook_points[1].x - rook_points[0].x; ocrObject.box.height = rook_points[2].y - rook_points[1].y; ocrObject.box.x = std::max(0, ocrObject.box.x); ocrObject.box.y = std::max(0, ocrObject.box.y); ocrObject.box.width = std::min(frame.cols - ocrObject.box.x, ocrObject.box.width); ocrObject.box.height = std::min(frame.rows - ocrObject.box.y, ocrObject.box.height); ocrObject.classId = res_ocr[n].cls_label; ocrObject.confidence = res_ocr[n].score; ocrObject.className = res_ocr[n].text; ocrObject.extraInfo = "cls label:" + std::to_string(res_ocr[n].cls_label) + ";cls score:" + std::to_string(res_ocr[n].cls_score); OCRObjects.push_back(ocrObject); } frame.release(); } return OCRObjects; } catch (std::exception& e) { this->_logger.LogFatal("ANSONNXOCR::RunInference", e.what(), __FILE__, __LINE__); return OCRObjects; } } std::vector ANSONNXOCR::RunInference(const cv::Mat& input, const std::vector& Bbox, const std::string& cameraId) { // No coarse _mutex — _engine->ocr() has its own internal lock std::vector OCRObjects; if (!_licenseValid) { this->_logger.LogError("ANSONNXOCR::RunInference", "Invalid License", __FILE__, __LINE__); return OCRObjects; } if (!_isInitialized) { this->_logger.LogError("ANSONNXOCR::RunInference", "Model is not initialized", __FILE__, __LINE__); return OCRObjects; } try { if (input.empty()) { this->_logger.LogError("ANSONNXOCR::RunInference", "Input image is empty", __FILE__, __LINE__); return OCRObjects; } if ((input.cols < 10) || (input.rows < 10)) return OCRObjects; if (Bbox.size() > 0) { cv::Mat frame; if (input.channels() == 1) { cv::cvtColor(input, frame, cv::COLOR_GRAY2BGR); } else { frame = input.clone(); } int fWidth = frame.cols; int fHeight = frame.rows; for (auto it = Bbox.begin(); it != Bbox.end(); it++) { int x1 = std::max(0, it->x); int y1 = std::max(0, it->y); int width = std::min(fWidth - x1, it->width); int height = std::min(fHeight - y1, it->height); if (x1 >= 0 && y1 >= 0 && width >= 5 && height >= 5) { cv::Rect objectPos(x1, y1, width, height); cv::Mat croppedObject = frame(objectPos); std::vector tempObjects = RunInference(croppedObject); for (size_t i = 0; i < tempObjects.size(); i++) { ANSCENTER::OCRObject detObj = tempObjects[i]; detObj.box.x = tempObjects[i].box.x + x1; detObj.box.y = tempObjects[i].box.y + y1; detObj.box.x = std::max(0, detObj.box.x); detObj.box.y = std::max(0, detObj.box.y); detObj.box.width = std::min(fWidth - detObj.box.x, detObj.box.width); detObj.box.height = std::min(fHeight - detObj.box.y, detObj.box.height); detObj.cameraId = cameraId; OCRObjects.push_back(detObj); } } } } else { cv::Mat im = input.clone(); std::vector res_ocr = _engine->ocr(im); for (size_t n = 0; n < res_ocr.size(); n++) { if (res_ocr[n].box.size() != 4) continue; cv::Point rook_points[4]; for (size_t m = 0; m < res_ocr[n].box.size(); m++) { rook_points[m] = cv::Point( static_cast(res_ocr[n].box[m][0]), static_cast(res_ocr[n].box[m][1])); } ANSCENTER::OCRObject ocrObject; ocrObject.box.x = rook_points[0].x; ocrObject.box.y = rook_points[0].y; ocrObject.box.width = rook_points[1].x - rook_points[0].x; ocrObject.box.height = rook_points[2].y - rook_points[1].y; ocrObject.box.x = std::max(0, ocrObject.box.x); ocrObject.box.y = std::max(0, ocrObject.box.y); ocrObject.box.width = std::min(im.cols - ocrObject.box.x, ocrObject.box.width); ocrObject.box.height = std::min(im.rows - ocrObject.box.y, ocrObject.box.height); ocrObject.classId = res_ocr[n].cls_label; ocrObject.confidence = res_ocr[n].score; ocrObject.className = res_ocr[n].text; ocrObject.extraInfo = "cls label:" + std::to_string(res_ocr[n].cls_label) + ";cls score:" + std::to_string(res_ocr[n].cls_score); ocrObject.cameraId = cameraId; OCRObjects.push_back(ocrObject); } im.release(); } return OCRObjects; } catch (std::exception& e) { this->_logger.LogFatal("ANSONNXOCR::RunInference", e.what(), __FILE__, __LINE__); return OCRObjects; } } ANSONNXOCR::~ANSONNXOCR() { try { Destroy(); } catch (std::exception& e) { this->_logger.LogFatal("ANSONNXOCR::~ANSONNXOCR()", e.what(), __FILE__, __LINE__); } } bool ANSONNXOCR::Destroy() { try { if (_engine) _engine.reset(); return true; } catch (std::exception& e) { this->_logger.LogFatal("ANSONNXOCR::Destroy", e.what(), __FILE__, __LINE__); return false; } } std::pair ANSONNXOCR::RecognizeText(const cv::Mat& croppedImage) { // No coarse _mutex — _engine->recognizeOnly() has its own internal lock if (!_isInitialized || !_engine || croppedImage.empty()) return {"", 0.0f}; auto result = _engine->recognizeOnly(croppedImage); return {result.text, result.score}; } } // namespace ANSCENTER