Initial OCR to support ALPR mode with country support
This commit is contained in:
@@ -16,6 +16,12 @@
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <ANSOCRBase.h>
|
||||
#include "C:/ANSLibs/nlohmann/json.hpp"
|
||||
|
||||
#ifdef WIN32
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#ifdef WIN32
|
||||
const char sep = '\\';
|
||||
@@ -193,6 +199,70 @@ struct ImageViewerState {
|
||||
bool dirty = true;
|
||||
};
|
||||
|
||||
#ifdef WIN32
|
||||
// Render Unicode text onto a cv::Mat using Windows GDI
|
||||
static void putTextUnicode(cv::Mat& img, const std::string& text, cv::Point org,
|
||||
double fontScale, cv::Scalar color, int thickness) {
|
||||
// Convert UTF-8 to wide string
|
||||
int wlen = MultiByteToWideChar(CP_UTF8, 0, text.c_str(), -1, nullptr, 0);
|
||||
std::wstring wtext(wlen - 1, 0);
|
||||
MultiByteToWideChar(CP_UTF8, 0, text.c_str(), -1, &wtext[0], wlen);
|
||||
|
||||
// Create a compatible DC and bitmap
|
||||
HDC hdc = CreateCompatibleDC(nullptr);
|
||||
int fontHeight = (int)(fontScale * 30); // approximate pixel height
|
||||
|
||||
HFONT hFont = CreateFontW(fontHeight, 0, 0, 0,
|
||||
(thickness > 2) ? FW_BOLD : FW_NORMAL,
|
||||
FALSE, FALSE, FALSE,
|
||||
DEFAULT_CHARSET, OUT_DEFAULT_PRECIS, CLIP_DEFAULT_PRECIS,
|
||||
ANTIALIASED_QUALITY, DEFAULT_PITCH | FF_SWISS, L"Yu Gothic UI");
|
||||
HFONT hOldFont = (HFONT)SelectObject(hdc, hFont);
|
||||
|
||||
// Measure text size
|
||||
SIZE sz;
|
||||
GetTextExtentPoint32W(hdc, wtext.c_str(), (int)wtext.size(), &sz);
|
||||
|
||||
// Create a DIB section so we can read pixels back
|
||||
BITMAPINFO bmi = {};
|
||||
bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
|
||||
bmi.bmiHeader.biWidth = sz.cx;
|
||||
bmi.bmiHeader.biHeight = -sz.cy; // top-down
|
||||
bmi.bmiHeader.biPlanes = 1;
|
||||
bmi.bmiHeader.biBitCount = 32;
|
||||
bmi.bmiHeader.biCompression = BI_RGB;
|
||||
void* bits = nullptr;
|
||||
HBITMAP hBmp = CreateDIBSection(hdc, &bmi, DIB_RGB_COLORS, &bits, nullptr, 0);
|
||||
HBITMAP hOldBmp = (HBITMAP)SelectObject(hdc, hBmp);
|
||||
|
||||
// Draw text onto the bitmap
|
||||
SetBkMode(hdc, TRANSPARENT);
|
||||
SetTextColor(hdc, RGB((int)color[2], (int)color[1], (int)color[0])); // BGR to RGB
|
||||
TextOutW(hdc, 0, 0, wtext.c_str(), (int)wtext.size());
|
||||
|
||||
// Copy rendered text onto the cv::Mat
|
||||
cv::Mat textImg(sz.cy, sz.cx, CV_8UC4, bits);
|
||||
for (int row = 0; row < sz.cy; ++row) {
|
||||
for (int col = 0; col < sz.cx; ++col) {
|
||||
cv::Vec4b px = textImg.at<cv::Vec4b>(row, col);
|
||||
if (px[0] != 0 || px[1] != 0 || px[2] != 0) {
|
||||
int dy = org.y + row;
|
||||
int dx = org.x + col;
|
||||
if (dy >= 0 && dy < img.rows && dx >= 0 && dx < img.cols) {
|
||||
img.at<cv::Vec3b>(dy, dx) = cv::Vec3b(px[0], px[1], px[2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SelectObject(hdc, hOldBmp);
|
||||
SelectObject(hdc, hOldFont);
|
||||
DeleteObject(hBmp);
|
||||
DeleteObject(hFont);
|
||||
DeleteDC(hdc);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void onViewerMouse(int event, int x, int y, int flags, void* userdata) {
|
||||
ImageViewerState& s = *(ImageViewerState*)userdata;
|
||||
if (event == cv::EVENT_MOUSEWHEEL) {
|
||||
@@ -234,23 +304,27 @@ int TestOCRv5mage() {
|
||||
std::cout << "Current working directory: " << currentPath << std::endl;
|
||||
std::string licenseKey = "";
|
||||
std::string modelFilePath = "C:\\Projects\\ANSVIS\\Models\\ANS_GenericOCR_v2.0.zip";
|
||||
std::string imagePath = "E:\\Programs\\DemoAssets\\Images\\OCR\\ref3_000.bmp";
|
||||
std::string imagePath = "C:\\Programs\\ModelTraining\\JALPR\\data\\20260329_174127_834.jpg";//"E:\\Programs\\DemoAssets\\Images\\OCR\\ref3_000.bmp";
|
||||
|
||||
int language = 0; // CUSTOM
|
||||
int engine = 1;// GPU
|
||||
int engine = 0;// GPU
|
||||
|
||||
// For high-resolution images with PP-OCRv5 server models, use higher limitSideLen
|
||||
// (default 960 downscales large images too aggressively, missing small text)
|
||||
int gpuId = 0;
|
||||
double detDBThresh = 0.3, detBoxThresh = 0.6, detUnclipRatio = 1.5;
|
||||
double detDBThresh = 0.5, detBoxThresh = 0.3, detUnclipRatio = 1.2;
|
||||
double clsThresh = 0.9;
|
||||
int useDilation = 0;
|
||||
int useDilation = 1;
|
||||
int limitSideLen = 2560; // 2560 Higher resolution for server-grade detection
|
||||
|
||||
int createResult = CreateANSOCRHandleEx(&infHandle, licenseKey.c_str(), modelFilePath.c_str(), "",
|
||||
language, engine, gpuId, detDBThresh, detBoxThresh, detUnclipRatio, clsThresh, useDilation, limitSideLen);
|
||||
std::cout << "ANSOCR Engine Creation:" << createResult << std::endl;
|
||||
|
||||
// Enable ALPR mode with Japanese plate format
|
||||
SetANSOCRMode(&infHandle, 1); // OCR_ALPR
|
||||
SetANSOCRALPRCountry(&infHandle, 0); // ALPR_JAPAN
|
||||
|
||||
cv::Mat input = cv::imread(imagePath, cv::IMREAD_COLOR);
|
||||
if (input.empty()) {
|
||||
std::cerr << "Failed to load image: " << imagePath << std::endl;
|
||||
@@ -269,7 +343,7 @@ int TestOCRv5mage() {
|
||||
auto warmupEnd = std::chrono::high_resolution_clock::now();
|
||||
double warmupMs = std::chrono::duration<double, std::milli>(warmupEnd - warmupStart).count();
|
||||
std::cout << "Warmup inference: " << warmupMs << " ms" << std::endl;
|
||||
std::cout << "Result:" << detectionResult << std::endl;
|
||||
std::cout << "ALPR Result:" << detectionResult << std::endl;
|
||||
|
||||
// --- Benchmark: run N iterations and report stats ---
|
||||
const int benchmarkIterations = 10;
|
||||
@@ -305,24 +379,50 @@ int TestOCRv5mage() {
|
||||
int textOffset = 8;
|
||||
|
||||
if (!detectionResult.empty()) {
|
||||
pt.clear();
|
||||
std::stringstream ss;
|
||||
ss.clear();
|
||||
ss << detectionResult;
|
||||
boost::property_tree::read_json(ss, pt);
|
||||
BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results"))
|
||||
{
|
||||
const boost::property_tree::ptree& result = child.second;
|
||||
const auto class_id = GetData<int>(result, "class_id");
|
||||
const auto class_name = GetData<std::string>(result, "class_name");
|
||||
const auto x = GetData<float>(result, "x");
|
||||
const auto y = GetData<float>(result, "y");
|
||||
const auto w = GetData<float>(result, "width");
|
||||
const auto h = GetData<float>(result, "height");
|
||||
cv::rectangle(frame, cv::Rect((int)x, (int)y, (int)w, (int)h),
|
||||
// Use nlohmann::json for proper parsing of nested alpr_info
|
||||
nlohmann::json jsonResult = nlohmann::json::parse(detectionResult);
|
||||
for (const auto& result : jsonResult["results"]) {
|
||||
const std::string class_name = result.value("class_name", "");
|
||||
const int x = std::stoi(result.value("x", "0"));
|
||||
const int y = std::stoi(result.value("y", "0"));
|
||||
const int w = std::stoi(result.value("width", "0"));
|
||||
const int h = std::stoi(result.value("height", "0"));
|
||||
|
||||
cv::rectangle(frame, cv::Rect(x, y, w, h),
|
||||
cv::Scalar(0, 255, 0), boxThickness);
|
||||
cv::putText(frame, class_name, cv::Point((int)x, (int)y - textOffset),
|
||||
|
||||
// Display ALPR structured info if available
|
||||
std::string displayText = class_name;
|
||||
if (result.contains("alpr_info")) {
|
||||
const auto& alpr = result["alpr_info"];
|
||||
std::cout << "\n=== ALPR Result ===" << std::endl;
|
||||
std::cout << " Format: " << alpr.value("format", "") << std::endl;
|
||||
std::cout << " Valid: " << (alpr.value("valid", false) ? "YES" : "NO") << std::endl;
|
||||
std::cout << " Region: " << alpr.value("region", "") << std::endl;
|
||||
std::cout << " Classification: " << alpr.value("classification", "") << std::endl;
|
||||
std::cout << " Kana: " << alpr.value("kana", "") << std::endl;
|
||||
std::cout << " Designation: " << alpr.value("designation", "") << std::endl;
|
||||
std::cout << " Full Plate: " << class_name << std::endl;
|
||||
|
||||
// Build a compact display string for the viewer
|
||||
displayText = alpr.value("region", "") + " " +
|
||||
alpr.value("classification", "") + " " +
|
||||
alpr.value("kana", "") + " " +
|
||||
alpr.value("designation", "");
|
||||
}
|
||||
|
||||
#ifdef WIN32
|
||||
{
|
||||
int textH = (int)(fontScale * 30);
|
||||
int ty = y - textOffset - textH;
|
||||
if (ty < 0) ty = y + boxThickness + 2;
|
||||
putTextUnicode(frame, displayText, cv::Point(x, ty),
|
||||
fontScale, cv::Scalar(0, 0, 255), fontThickness);
|
||||
}
|
||||
#else
|
||||
cv::putText(frame, displayText, cv::Point(x, y - textOffset),
|
||||
cv::FONT_HERSHEY_SIMPLEX, fontScale, cv::Scalar(0, 0, 255), fontThickness, cv::LINE_AA);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -384,15 +484,21 @@ int TestOCRv5mage() {
|
||||
if (cv::getWindowProperty(winName, cv::WND_PROP_VISIBLE) < 1) break;
|
||||
}
|
||||
|
||||
// Release OCR handle BEFORE OpenCV cleanup to avoid CUDA teardown errors
|
||||
// (TensorRT needs the CUDA context alive to free GPU resources cleanly)
|
||||
ReleaseANSOCRHandle(&infHandle);
|
||||
cv::destroyAllWindows();
|
||||
frame.release();
|
||||
input.release();
|
||||
ReleaseANSOCRHandle(&infHandle);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
#ifdef WIN32
|
||||
SetConsoleOutputCP(CP_UTF8);
|
||||
SetConsoleCP(CP_UTF8);
|
||||
#endif
|
||||
TestOCRv5mage();
|
||||
|
||||
//ANSOCR_VideoTest();
|
||||
|
||||
Reference in New Issue
Block a user