#include #include #include "boost/property_tree/ptree.hpp" #include "boost/property_tree/json_parser.hpp" #include "boost/foreach.hpp" #include "boost/optional.hpp" #include #include #include #include #include #include #include #include #include #include #include #include #include "C:/ANSLibs/nlohmann/json.hpp" #ifdef WIN32 #define NOMINMAX #include #endif #ifdef WIN32 const char sep = '\\'; #else const char sep = '/'; #endif using namespace cv; template T GetData(const boost::property_tree::ptree& pt, const std::string& key) { T ret; if (boost::optional data = pt.get_optional(key)) { ret = data.get(); } return ret; } unsigned char* CVMatToBytes(cv::Mat image, unsigned int& bufferLengh) { int size = int(image.total() * image.elemSize()); std::cout << "size:" << size << std::endl; unsigned char* bytes = new unsigned char[size]; // you will have to delete[] that later std::memcpy(bytes, image.data, size * sizeof(unsigned char)); bufferLengh = size * sizeof(unsigned char); return bytes; } int TestOCRImage() { ANSCENTER::ANSOCRBase* infHandle = nullptr; boost::property_tree::ptree root; boost::property_tree::ptree detectionObjects; boost::property_tree::ptree pt; std::filesystem::path currentPath = std::filesystem::current_path(); std::cout << "Current working directory: " << currentPath << std::endl; std::string licenseKey = ""; std::string modelFilePath = currentPath.string() + "\\ansocrmodels.zip"; std::string imagePath = currentPath.string() + "\\ocrsample.png"; std::string defaultDir = "C:\\Programs\\DemoAssets\\ANSAIModels"; if (!std::filesystem::exists(modelFilePath)) modelFilePath = defaultDir + "\\ANS_GenericOCR_v1.0.zip"; if (!std::filesystem::exists(imagePath)) imagePath = defaultDir + "\\ocrsample.png"; imagePath = "C:\\Projects\\ANSVIS\\Documentation\\TestImages\\OCR\\ocrsample.png"; int language = 0; // CUSTOM int engine = 0; int createResult = CreateANSOCRHandle(&infHandle, licenseKey.c_str(), modelFilePath.c_str(), "", language, engine); std::cout << "ANSOCR Engine Creation:" << createResult << std::endl; cv::Mat input = cv::imread(imagePath, cv::IMREAD_COLOR); cv::Mat frame = input.clone(); int height = frame.rows; int width = frame.cols; unsigned int bufferLength = 0; unsigned char* jpeg_string = CVMatToBytes(frame, bufferLength); std::string detectionResult = RunInferenceBinary(&infHandle, jpeg_string, width, height); std::cout << "Result:" << detectionResult; delete jpeg_string; if (!detectionResult.empty()) { pt.clear(); std::stringstream ss; ss.clear(); ss << detectionResult; boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results")) { const boost::property_tree::ptree& result = child.second; const auto class_id = GetData(result, "class_id"); const auto class_name = GetData(result, "class_name"); const auto x = GetData(result, "x"); const auto y = GetData(result, "y"); const auto width = GetData(result, "width"); const auto height = GetData(result, "height"); cv::rectangle(frame, cv::Rect(x, y, width, height), 123, 2); cv::putText(frame, cv::format("%s", class_name), cv::Point(x, y - 5), 0, 2.0, cv::Scalar(0, 0, 255), 3, cv::LINE_AA); } } cv::resize(frame, frame, cv::Size(frame.cols / 2, frame.rows / 2)); // to half size or even smaller frame.release(); ReleaseANSOCRHandle(&infHandle); return 0; } int ANSOCR_VideoTest() { // Get the current working directory std::filesystem::path currentPath = std::filesystem::current_path(); // Print the current working directory std::cout << "Current working directory: " << currentPath << std::endl; boost::property_tree::ptree root; boost::property_tree::ptree detectionObjects; boost::property_tree::ptree pt; ANSCENTER::ANSOCRBase* infHandle; std::string licenseKey = ""; std::string modelFilePath = "C:\\ProgramData\\ANSCENTER\\Shared\\ANS_GenericOCR_v1.0.zip"; std::string videoFilePath = "C:\\Programs\\DemoAssets\\Videos\\ALRP\\ALPR1.mp4"; cv::VideoCapture capture(videoFilePath); if (!capture.isOpened()) { printf("could not read this video file...\n"); return -1; } int language = 0;// CUSTOM int engine = 0; int createResult = CreateANSOCRHandle(&infHandle, licenseKey.c_str(), modelFilePath.c_str(), "", language, engine); while (true) { cv::Mat frame; if (!capture.read(frame)) // if not success, break loop { std::cout << "\n Cannot read the video file. please check your video.\n"; break; } auto start = std::chrono::system_clock::now(); unsigned int bufferLength = 0; unsigned char* jpeg_string = CVMatToBytes(frame, bufferLength); int height = frame.rows; int width = frame.cols; std::string detectionResult = RunInferenceBinary(&infHandle, jpeg_string, width, height); if (!detectionResult.empty()) { pt.clear(); std::stringstream ss; ss.clear(); ss << detectionResult; boost::property_tree::read_json(ss, pt); BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results")) { const boost::property_tree::ptree& result = child.second; const auto class_id = GetData(result, "class_id"); const auto class_name = GetData(result, "class_name"); const auto x = GetData(result, "x"); const auto y = GetData(result, "y"); const auto width = GetData(result, "width"); const auto height = GetData(result, "height"); cv::rectangle(frame, cv::Rect(x, y, width, height), 123, 2); cv::putText(frame, cv::format("%s", class_name), cv::Point(x, y - 5), 0, 2.0, cv::Scalar(0, 0, 255), 3, cv::LINE_AA); } } auto end = std::chrono::system_clock::now(); auto elapsed = std::chrono::duration_cast(end - start); printf("Time = %lld ms\n", static_cast(elapsed.count())); // cv::resize(frame, frame, cv::Size(frame.cols / 2, frame.rows / 2)); // to half size or even smaller cv::imshow("ANSOCR", frame); if (cv::waitKey(30) == 27) // Wait for 'esc' key press to exit { std::cout << "End of inserting faces.\n"; } frame.release(); delete jpeg_string; } capture.release(); cv::destroyAllWindows(); ReleaseANSOCRHandle(&infHandle); } // Viewer state for zoom/pan struct ImageViewerState { cv::Mat image; // Full-resolution annotated image double zoom = 1.0; // 1.0 = fit-to-screen double panX = 0.0; // Top-left corner in original image coords double panY = 0.0; int dispW, dispH; // Display window size (pixels) double fitScale; // Base scale to fit image into window bool dragging = false; int dragX0, dragY0; double panX0, panY0; bool dirty = true; }; #ifdef WIN32 // Render Unicode text onto a cv::Mat using Windows GDI static void putTextUnicode(cv::Mat& img, const std::string& text, cv::Point org, double fontScale, cv::Scalar color, int thickness) { // Convert UTF-8 to wide string int wlen = MultiByteToWideChar(CP_UTF8, 0, text.c_str(), -1, nullptr, 0); std::wstring wtext(wlen - 1, 0); MultiByteToWideChar(CP_UTF8, 0, text.c_str(), -1, &wtext[0], wlen); // Create a compatible DC and bitmap HDC hdc = CreateCompatibleDC(nullptr); int fontHeight = (int)(fontScale * 30); // approximate pixel height HFONT hFont = CreateFontW(fontHeight, 0, 0, 0, (thickness > 2) ? FW_BOLD : FW_NORMAL, FALSE, FALSE, FALSE, DEFAULT_CHARSET, OUT_DEFAULT_PRECIS, CLIP_DEFAULT_PRECIS, ANTIALIASED_QUALITY, DEFAULT_PITCH | FF_SWISS, L"Yu Gothic UI"); HFONT hOldFont = (HFONT)SelectObject(hdc, hFont); // Measure text size SIZE sz; GetTextExtentPoint32W(hdc, wtext.c_str(), (int)wtext.size(), &sz); // Create a DIB section so we can read pixels back BITMAPINFO bmi = {}; bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER); bmi.bmiHeader.biWidth = sz.cx; bmi.bmiHeader.biHeight = -sz.cy; // top-down bmi.bmiHeader.biPlanes = 1; bmi.bmiHeader.biBitCount = 32; bmi.bmiHeader.biCompression = BI_RGB; void* bits = nullptr; HBITMAP hBmp = CreateDIBSection(hdc, &bmi, DIB_RGB_COLORS, &bits, nullptr, 0); HBITMAP hOldBmp = (HBITMAP)SelectObject(hdc, hBmp); // Draw text onto the bitmap SetBkMode(hdc, TRANSPARENT); SetTextColor(hdc, RGB((int)color[2], (int)color[1], (int)color[0])); // BGR to RGB TextOutW(hdc, 0, 0, wtext.c_str(), (int)wtext.size()); // Copy rendered text onto the cv::Mat cv::Mat textImg(sz.cy, sz.cx, CV_8UC4, bits); for (int row = 0; row < sz.cy; ++row) { for (int col = 0; col < sz.cx; ++col) { cv::Vec4b px = textImg.at(row, col); if (px[0] != 0 || px[1] != 0 || px[2] != 0) { int dy = org.y + row; int dx = org.x + col; if (dy >= 0 && dy < img.rows && dx >= 0 && dx < img.cols) { img.at(dy, dx) = cv::Vec3b(px[0], px[1], px[2]); } } } } SelectObject(hdc, hOldBmp); SelectObject(hdc, hOldFont); DeleteObject(hBmp); DeleteObject(hFont); DeleteDC(hdc); } #endif static void onViewerMouse(int event, int x, int y, int flags, void* userdata) { ImageViewerState& s = *(ImageViewerState*)userdata; if (event == cv::EVENT_MOUSEWHEEL) { double factor = (cv::getMouseWheelDelta(flags) > 0) ? 1.25 : 0.8; // Zoom centered on mouse cursor position double sc = s.fitScale * s.zoom; double imgX = s.panX + x / sc; double imgY = s.panY + y / sc; s.zoom = std::clamp(s.zoom * factor, 0.2, 50.0); double newSc = s.fitScale * s.zoom; s.panX = imgX - x / newSc; s.panY = imgY - y / newSc; s.dirty = true; } else if (event == cv::EVENT_LBUTTONDOWN) { s.dragging = true; s.dragX0 = x; s.dragY0 = y; s.panX0 = s.panX; s.panY0 = s.panY; } else if (event == cv::EVENT_MOUSEMOVE && s.dragging) { double sc = s.fitScale * s.zoom; s.panX = s.panX0 - (x - s.dragX0) / sc; s.panY = s.panY0 - (y - s.dragY0) / sc; s.dirty = true; } else if (event == cv::EVENT_LBUTTONUP) { s.dragging = false; } } int TestOCRv5mage() { ANSCENTER::ANSOCRBase* infHandle = nullptr; boost::property_tree::ptree root; boost::property_tree::ptree detectionObjects; boost::property_tree::ptree pt; std::filesystem::path currentPath = std::filesystem::current_path(); std::cout << "Current working directory: " << currentPath << std::endl; std::string licenseKey = ""; std::string modelFilePath = "C:\\Projects\\ANSVIS\\Models\\ANS_GenericOCR_v2.0.zip"; std::string imagePath = "C:\\Programs\\ModelTraining\\JLPD\\data\\0b9b013343f0bd8c7809653dfab16eac_jpeg.rf.1438e1237023ad7a254605942193df99.jpg";//"E:\\Programs\\DemoAssets\\Images\\OCR\\ref3_000.bmp"; int language = 0; // CUSTOM int engine = 1;// GPU // For high-resolution images with PP-OCRv5 server models, use higher limitSideLen // (default 960 downscales large images too aggressively, missing small text) int gpuId = 0; double detDBThresh = 0.5, detBoxThresh = 0.3, detUnclipRatio = 1.2; double clsThresh = 0.9; int useDilation = 1; int limitSideLen = 2560; // 2560 Higher resolution for server-grade detection int createResult = CreateANSOCRHandleEx(&infHandle, licenseKey.c_str(), modelFilePath.c_str(), "", language, engine, gpuId, detDBThresh, detBoxThresh, detUnclipRatio, clsThresh, useDilation, limitSideLen); std::cout << "ANSOCR Engine Creation:" << createResult << std::endl; // Enable ALPR mode with Japanese plate format SetANSOCRMode(&infHandle, 1); // OCR_ALPR SetANSOCRALPRCountry(&infHandle, 0); // ALPR_JAPAN cv::Mat input = cv::imread(imagePath, cv::IMREAD_COLOR); if (input.empty()) { std::cerr << "Failed to load image: " << imagePath << std::endl; ReleaseANSOCRHandle(&infHandle); return -1; } cv::Mat frame = input.clone(); int height = frame.rows; int width = frame.cols; unsigned int bufferLength = 0; unsigned char* jpeg_string = CVMatToBytes(frame, bufferLength); // --- Warmup run (first run includes GPU kernel compilation / cache warmup) --- auto warmupStart = std::chrono::high_resolution_clock::now(); std::string detectionResult = RunInferenceBinary(&infHandle, jpeg_string, width, height); auto warmupEnd = std::chrono::high_resolution_clock::now(); double warmupMs = std::chrono::duration(warmupEnd - warmupStart).count(); std::cout << "Warmup inference: " << warmupMs << " ms" << std::endl; std::cout << "ALPR Result:" << detectionResult << std::endl; // --- Benchmark: run N iterations and report stats --- const int benchmarkIterations = 10; std::vector times; times.reserve(benchmarkIterations); for (int i = 0; i < benchmarkIterations; ++i) { auto t0 = std::chrono::high_resolution_clock::now(); std::string result = RunInferenceBinary(&infHandle, jpeg_string, width, height); auto t1 = std::chrono::high_resolution_clock::now(); double ms = std::chrono::duration(t1 - t0).count(); times.push_back(ms); std::cout << " Run " << (i + 1) << "/" << benchmarkIterations << ": " << ms << " ms" << std::endl; } std::sort(times.begin(), times.end()); double sum = std::accumulate(times.begin(), times.end(), 0.0); double avg = sum / benchmarkIterations; double median = (benchmarkIterations % 2 == 0) ? (times[benchmarkIterations / 2 - 1] + times[benchmarkIterations / 2]) / 2.0 : times[benchmarkIterations / 2]; std::cout << "\n=== Benchmark (" << benchmarkIterations << " runs) ===" << std::endl; std::cout << " Avg: " << avg << " ms" << std::endl; std::cout << " Median: " << median << " ms" << std::endl; std::cout << " Min: " << times.front() << " ms" << std::endl; std::cout << " Max: " << times.back() << " ms" << std::endl; std::cout << " FPS: " << (1000.0 / avg) << std::endl; delete[] jpeg_string; // Draw OCR results on frame — 1.5x of original (was fontScale=1.5, thickness=3, offset=5) double fontScale = 2.25; // 1.5 * 1.5 int boxThickness = 3; int fontThickness = 5; // ceil(3 * 1.5) int textOffset = 8; if (!detectionResult.empty()) { // Use nlohmann::json for proper parsing of nested alpr_info nlohmann::json jsonResult = nlohmann::json::parse(detectionResult); for (const auto& result : jsonResult["results"]) { const std::string class_name = result.value("class_name", ""); const int x = std::stoi(result.value("x", "0")); const int y = std::stoi(result.value("y", "0")); const int w = std::stoi(result.value("width", "0")); const int h = std::stoi(result.value("height", "0")); cv::rectangle(frame, cv::Rect(x, y, w, h), cv::Scalar(0, 255, 0), boxThickness); // Display ALPR structured info if available std::string displayText = class_name; if (result.contains("alpr_info")) { const auto& alpr = result["alpr_info"]; std::cout << "\n=== ALPR Result ===" << std::endl; std::cout << " Format: " << alpr.value("format", "") << std::endl; std::cout << " Valid: " << (alpr.value("valid", false) ? "YES" : "NO") << std::endl; std::cout << " Region: " << alpr.value("region", "") << std::endl; std::cout << " Classification: " << alpr.value("classification", "") << std::endl; std::cout << " Kana: " << alpr.value("kana", "") << std::endl; std::cout << " Designation: " << alpr.value("designation", "") << std::endl; std::cout << " Full Plate: " << class_name << std::endl; // Build a compact display string for the viewer displayText = alpr.value("region", "") + " " + alpr.value("classification", "") + " " + alpr.value("kana", "") + " " + alpr.value("designation", ""); } #ifdef WIN32 { int textH = (int)(fontScale * 30); int ty = y - textOffset - textH; if (ty < 0) ty = y + boxThickness + 2; putTextUnicode(frame, displayText, cv::Point(x, ty), fontScale, cv::Scalar(0, 0, 255), fontThickness); } #else cv::putText(frame, displayText, cv::Point(x, y - textOffset), cv::FONT_HERSHEY_SIMPLEX, fontScale, cv::Scalar(0, 0, 255), fontThickness, cv::LINE_AA); #endif } } // === Interactive Image Viewer (zoom/pan) === ImageViewerState vs; vs.image = frame; // Calculate scale to fit image into ~80% of a 1920x1080 screen const int maxWinW = 1600, maxWinH = 900; double scaleX = (double)maxWinW / frame.cols; double scaleY = (double)maxWinH / frame.rows; vs.fitScale = std::min(scaleX, scaleY); if (vs.fitScale > 1.0) vs.fitScale = 1.0; // Don't upscale small images vs.dispW = (int)(frame.cols * vs.fitScale); vs.dispH = (int)(frame.rows * vs.fitScale); const std::string winName = "ANSOCR [Scroll=Zoom | Drag=Pan | R=Reset | ESC=Quit]"; cv::namedWindow(winName, cv::WINDOW_AUTOSIZE); cv::setMouseCallback(winName, onViewerMouse, &vs); while (true) { if (vs.dirty) { double sc = vs.fitScale * vs.zoom; int srcW = std::min((int)(vs.dispW / sc), vs.image.cols); int srcH = std::min((int)(vs.dispH / sc), vs.image.rows); if (srcW <= 0) srcW = 1; if (srcH <= 0) srcH = 1; int sx = std::clamp((int)vs.panX, 0, std::max(0, vs.image.cols - srcW)); int sy = std::clamp((int)vs.panY, 0, std::max(0, vs.image.rows - srcH)); vs.panX = sx; vs.panY = sy; cv::Mat roi = vs.image(cv::Rect(sx, sy, srcW, srcH)); cv::Mat display; cv::resize(roi, display, cv::Size(vs.dispW, vs.dispH), 0, 0, (sc >= 1.0) ? cv::INTER_LINEAR : cv::INTER_AREA); // Overlay zoom info cv::putText(display, cv::format("Zoom: %.1fx (%dx%d)", vs.zoom, vs.image.cols, vs.image.rows), cv::Point(10, 25), cv::FONT_HERSHEY_SIMPLEX, 0.6, cv::Scalar(0, 255, 0), 2); cv::imshow(winName, display); vs.dirty = false; } int key = cv::waitKey(30) & 0xFF; if (key == 27) break; // ESC to quit if (key == 'r' || key == 'R') { vs.zoom = 1.0; vs.panX = 0; vs.panY = 0; vs.dirty = true; // Reset view } if (key == '+' || key == '=') { vs.zoom = std::min(vs.zoom * 1.25, 50.0); vs.dirty = true; // Keyboard zoom in } if (key == '-' || key == '_') { vs.zoom = std::max(vs.zoom * 0.8, 0.2); vs.dirty = true; // Keyboard zoom out } // Quit when user closes the window (clicks X button) if (cv::getWindowProperty(winName, cv::WND_PROP_VISIBLE) < 1) break; } // Release OCR handle BEFORE OpenCV cleanup to avoid CUDA teardown errors // (TensorRT needs the CUDA context alive to free GPU resources cleanly) ReleaseANSOCRHandle(&infHandle); cv::destroyAllWindows(); frame.release(); input.release(); return 0; } int main() { #ifdef WIN32 SetConsoleOutputCP(CP_UTF8); SetConsoleCP(CP_UTF8); #endif TestOCRv5mage(); //ANSOCR_VideoTest(); // TestOCRImage(); /* for (int i = 0; i < 20; i++) { TestOCRImage(); }*/ return 0; }