Files
ANSCORE/modules/ANSOCR/ANSOCRBase.h

170 lines
8.8 KiB
C
Raw Normal View History

2026-03-28 16:54:11 +11:00
#ifndef ANSOCRBASE_H
#define ANSOCRBASE_H
#define ANSOCR_API __declspec(dllexport)
#pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include <iostream>
#include <vector>
#include "LabVIEWHeader/extcode.h"
#include "ANSLicense.h"
namespace ANSCENTER {
struct OCRModelConfig {
bool userGPU = true;
bool useTensorRT = false;
int gpuId = 0;
int gpuMemory = 4000;
int cpuThreads = 10;
bool enableMKLDNN = false;
bool ensureASCII = true;
OCRLanguage ocrLanguage;
std::string precisionType;
std::string ocrType;
std::string limitType;
bool useDetector = true;
bool useRecognizer = true;
bool useCLS = true;
bool useTable = false;
bool useLayout = false;
std::string detectionModelDir;
std::string detectionModelFile;
std::string detectionModelParam;
std::string detectionScoreMode;
std::string recognizerModelDir;
std::string recognizerModelFile;
std::string recognizerModelParam;
std::string recogizerCharDictionaryPath;
std::string clsModelDir;
std::string clsModelFile;
std::string clsModelParam;
std::string layoutModelDir;
std::string layourDictionaryPath;
std::string tableModelDir;
std::string tableCharDictionaryPath;
int limitSideLen = 960;
double detectionDBThreshold = 0.3;
double detectionBoxThreshold = 0.6;
double detectionDBUnclipRatio = 1.5;
bool useDilation = false;
bool useAngleCLS = false;
double clsThreshold = 0.9;
int clsBatchNumber = 1;
int recognizerBatchNum = 6;
int recoginzerImageHeight = 48;
int recoginzerImageWidth = 320;
double layoutScoreThreshold = 0.5;
double layoutNMSThreshold = 0.5;
int tableModelMaxLengh = 488;
int tableBatchNum = 1;
bool mergeNoSpanStructure = true;
};
struct OCRObject
{
int classId{ 0 };
int trackId{ 0 };
std::string className{};
float confidence{ 0.0 };
cv::Rect box{};
std::vector<cv::Point2f> polygon; // polygon that contain x1 ,y1,x2,y2,x3,y3,x4,y4
std::vector<float> kps{}; // Containing keypoints
cv::Mat mask{}; // jpeg string of the mask
std::string extraInfo; // More information such as facial recognition
std::string cameraId;
//std::string attributes; // Attributes information in JSON string
};
class ANSOCR_API ANSOCRBase {
protected:
bool _licenseValid{ false };
bool _isInitialized{ false };
std::string _licenseKey;
std::string _modelFolder;
std::string _modelConfigFile;
OCRModelConfig _modelConfig;
int _engineMode; //0: Auto detect, 1 GPU, 2 CPU
SPDLogger& _logger = SPDLogger::GetInstance("OCR", false);
void CheckLicense();
[[nodiscard]] bool Init(const std::string& licenseKey, OCRModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode);
public:
[[nodiscard]] virtual bool Initialize(const std::string& licenseKey, OCRModelConfig modelConfig, const std::string& modelZipFilePath, const std::string& modelZipPassword, int engineMode);
[[nodiscard]] virtual std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input) = 0;
[[nodiscard]] virtual std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input, const std::string& cameraId) = 0;
[[nodiscard]] virtual std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox) = 0;
[[nodiscard]] virtual std::vector<ANSCENTER::OCRObject> RunInference(const cv::Mat& input, const std::vector<cv::Rect>& Bbox, const std::string &cameraId) = 0;
~ANSOCRBase() {
try {
}
catch (std::exception& e) {
std::cout << "ANSOCRBase::Destroy()" << e.what();
}
};
[[nodiscard]] virtual bool Destroy() = 0;
};
class ANSOCRUtility
{
public:
[[nodiscard]] static std::string OCRDetectionToJsonString(const std::vector<OCRObject>& dets);
[[nodiscard]] static std::vector<cv::Rect> GetBoundingBoxes(const std::string& strBBoxes);
[[nodiscard]] static std::string PolygonToString(const std::vector<cv::Point2f>& polygon);
[[nodiscard]] static std::vector<cv::Point2f> RectToNormalizedPolygon(const cv::Rect& rect, float imageWidth, float imageHeight);
[[nodiscard]] static std::string KeypointsToString(const std::vector<float>& kps);
private:
};
}
// Original signature — backward compatible with third-party apps built against older DLL
extern "C" ANSOCR_API int CreateANSOCRHandle(ANSCENTER::ANSOCRBase** Handle, const char* licenseKey, const char* modelFilePath,
const char* modelFileZipPassword, int language, int engineMode, int gpuId = 0,
double detectorDBThreshold = 0.3, double detectorDBBoxThreshold = 0.6, double detectorDBUnclipRatio = 1.5,
double classifierThreshold = 0.9, int useDilation = 0);
// Extended version with limitSideLen parameter — new callers should use this
extern "C" ANSOCR_API int CreateANSOCRHandleEx(ANSCENTER::ANSOCRBase** Handle, const char* licenseKey, const char* modelFilePath,
const char* modelFileZipPassword, int language, int engineMode, int gpuId = 0,
double detectorDBThreshold = 0.3, double detectorDBBoxThreshold = 0.6, double detectorDBUnclipRatio = 1.5,
double classifierThreshold = 0.9, int useDilation = 0, int limitSideLen = 960);
extern "C" ANSOCR_API std::string RunInference(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_string, int32 bufferLength);
extern "C" ANSOCR_API std::string RunInferenceWithCamID(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_string, int32 bufferLength, const char* cameraId);
extern "C" ANSOCR_API int RunInferenceCV(ANSCENTER::ANSOCRBase** Handle, const cv::Mat &image, std::string &ocrResult);
extern "C" ANSOCR_API std::string RunInferenceBinary(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_bytes, unsigned int width, unsigned int height);
extern "C" ANSOCR_API int ReleaseANSOCRHandle(ANSCENTER::ANSOCRBase** Handle);
extern "C" ANSOCR_API std::string RunInferenceImagePath(ANSCENTER::ANSOCRBase** Handle, const char* imageFilePath);
extern "C" ANSOCR_API std::string RunInferenceInCroppedImages(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_string, int32 bufferLength, const char* strBboxes);
extern "C" ANSOCR_API std::string RunInferenceInCroppedImagesWithCamID(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_string, int32 bufferLength, const char* strBboxes, const char* cameraId);
//// For LabVIEW API
extern "C" ANSOCR_API int RunInference_LV(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_string, int32 bufferLength, LStrHandle detectionResult);
extern "C" ANSOCR_API int RunInference_LVWithCamID(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_string, int32 bufferLength, const char* cameraId,LStrHandle detectionResult);
extern "C" ANSOCR_API int RunInferenceBinary_LV(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_bytes, unsigned int width, unsigned int height, LStrHandle detectionResult);
extern "C" ANSOCR_API int RunInferenceImagePath_LV(ANSCENTER::ANSOCRBase** Handle, const char* imageFilePath, LStrHandle detectionResult);
extern "C" ANSOCR_API int ANSOCRUnitTest(const char* modelFilePath, const char* imageFilePath, LStrHandle detectionResult);
extern "C" ANSOCR_API int RunInferenceInCroppedImages_LV(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_string, int32 bufferLength, const char* strBboxes, LStrHandle detectionResult);
extern "C" ANSOCR_API int RunInferenceInCroppedImages_LVWithCamID(ANSCENTER::ANSOCRBase** Handle, unsigned char* jpeg_string, int32 bufferLength, const char* strBboxes, const char* cameraId, LStrHandle detectionResult);
extern "C" ANSOCR_API int RunInferenceComplete_LV(ANSCENTER::ANSOCRBase** Handle, cv::Mat** cvImage, const char* cameraId, int getJpegString, int jpegImageSize, LStrHandle detectionResult, LStrHandle imageStr);
extern "C" ANSOCR_API int RunInferencesComplete_LV(ANSCENTER::ANSOCRBase** Handle, cv::Mat** cvImage, const char* cameraId, int maxImageSize, const char* strBboxes, LStrHandle detectionResult);
// V2 Create / Release — handle as uint64_t by value (no pointer-to-pointer)
extern "C" ANSOCR_API uint64_t CreateANSOCRHandleEx_V2(const char* licenseKey, const char* modelFilePath,
const char* modelFileZipPassword, int language, int engineMode, int gpuId,
double detectorDBThreshold, double detectorDBBoxThreshold, double detectorDBUnclipRatio,
double classifierThreshold, int useDilation, int limitSideLen);
extern "C" ANSOCR_API uint64_t CreateANSOCRHandle_V2(const char* licenseKey, const char* modelFilePath,
const char* modelFileZipPassword, int language, int engineMode, int gpuId,
double detectorDBThreshold, double detectorDBBoxThreshold, double detectorDBUnclipRatio,
double classifierThreshold, int useDilation);
extern "C" ANSOCR_API int ReleaseANSOCRHandle_V2(uint64_t handleVal);
#endif