ANSCORE/tests/ANSOCR-UnitTest/ANSOCR-UnitTest.cpp

#include <iostream>
#include <opencv2/highgui.hpp>
#include "boost/property_tree/ptree.hpp"
#include "boost/property_tree/json_parser.hpp"
#include "boost/foreach.hpp"
#include "boost/optional.hpp"
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/dnn/dnn.hpp>
#include <iostream>
#include <vector>
#include <numeric>
#include <string>
#include <functional>
#include <filesystem>
#include <algorithm>
#include <chrono>
#include <ANSOCRBase.h>
#include "C:/ANSLibs/nlohmann/json.hpp"

#ifdef WIN32
#define NOMINMAX
#include <windows.h>
#endif

#ifdef WIN32
const char sep = '\\';
#else
const char sep = '/';
#endif

using namespace cv;
template <typename T>
T GetData(const boost::property_tree::ptree& pt, const std::string& key)
{
    T ret;
    if (boost::optional<T> data = pt.get_optional<T>(key))
    {
        ret = data.get();
    }
    return ret;
}
unsigned char* CVMatToBytes(cv::Mat image, unsigned int& bufferLengh)
{
    int size = int(image.total() * image.elemSize());
    std::cout << "size:" << size << std::endl;
    unsigned char* bytes = new unsigned char[size];  // you will have to delete[] that later
    std::memcpy(bytes, image.data, size * sizeof(unsigned char));
    bufferLengh = size * sizeof(unsigned char);
    return bytes;
}
int TestOCRImage() {

    ANSCENTER::ANSOCRBase* infHandle = nullptr;

    boost::property_tree::ptree root;
    boost::property_tree::ptree detectionObjects;
    boost::property_tree::ptree pt;
    std::filesystem::path currentPath = std::filesystem::current_path();
    std::cout << "Current working directory: " << currentPath << std::endl;
    std::string licenseKey = "";
    std::string modelFilePath = currentPath.string() + "\\ansocrmodels.zip";
    std::string imagePath = currentPath.string() + "\\ocrsample.png";
    std::string defaultDir = "C:\\Programs\\DemoAssets\\ANSAIModels";
    if (!std::filesystem::exists(modelFilePath)) modelFilePath = defaultDir + "\\ANS_GenericOCR_v1.0.zip";
    if (!std::filesystem::exists(imagePath)) imagePath = defaultDir + "\\ocrsample.png";
    imagePath = "C:\\Projects\\ANSVIS\\Documentation\\TestImages\\OCR\\ocrsample.png";

    int language = 0;  // CUSTOM
    int engine = 0;

    int createResult = CreateANSOCRHandle(&infHandle, licenseKey.c_str(), modelFilePath.c_str(), "", language, engine);
    std::cout << "ANSOCR Engine Creation:" << createResult << std::endl;

    cv::Mat input = cv::imread(imagePath, cv::IMREAD_COLOR);
    cv::Mat frame = input.clone();
    int height = frame.rows;
    int width = frame.cols;
    unsigned int bufferLength = 0;
    unsigned char* jpeg_string = CVMatToBytes(frame, bufferLength);
    std::string detectionResult = RunInferenceBinary(&infHandle, jpeg_string, width, height);
    std::cout << "Result:" << detectionResult;
    delete jpeg_string;
    if (!detectionResult.empty()) {
        pt.clear();
        std::stringstream ss;
        ss.clear();
        ss << detectionResult;
        boost::property_tree::read_json(ss, pt);
        BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results"))
        {
            const boost::property_tree::ptree& result = child.second;
            const auto class_id = GetData<int>(result, "class_id");
            const auto class_name = GetData<std::string>(result, "class_name");
            const auto x = GetData<float>(result, "x");
            const auto y = GetData<float>(result, "y");
            const auto width = GetData<float>(result, "width");
            const auto height = GetData<float>(result, "height");
            cv::rectangle(frame, cv::Rect(x, y, width, height), 123, 2);
            cv::putText(frame, cv::format("%s", class_name), cv::Point(x, y - 5),
                0, 2.0, cv::Scalar(0, 0, 255), 3, cv::LINE_AA);
        }
    }
    cv::resize(frame, frame, cv::Size(frame.cols / 2, frame.rows / 2));  // to half size or even smaller
    frame.release();


    ReleaseANSOCRHandle(&infHandle);
    return 0;

}

int ANSOCR_VideoTest() {
    // Get the current working directory
    std::filesystem::path currentPath = std::filesystem::current_path();
    // Print the current working directory
    std::cout << "Current working directory: " << currentPath << std::endl;
    boost::property_tree::ptree root;
    boost::property_tree::ptree detectionObjects;
    boost::property_tree::ptree pt;

    ANSCENTER::ANSOCRBase* infHandle;
    std::string licenseKey = "";
    std::string modelFilePath = "C:\\ProgramData\\ANSCENTER\\Shared\\ANS_GenericOCR_v1.0.zip";
    std::string videoFilePath = "C:\\Programs\\DemoAssets\\Videos\\ALRP\\ALPR1.mp4";
    cv::VideoCapture capture(videoFilePath);
    if (!capture.isOpened()) {
        printf("could not read this video file...\n");
        return -1;
    }
    int language = 0;// CUSTOM
    int engine = 0;
    int createResult = CreateANSOCRHandle(&infHandle, licenseKey.c_str(), modelFilePath.c_str(), "", language, engine);

    while (true)
    {
        cv::Mat frame;
        if (!capture.read(frame)) // if not success, break loop
        {
            std::cout << "\n Cannot read the video file. please check your video.\n";
            break;
        }
        auto start = std::chrono::system_clock::now();
        unsigned int bufferLength = 0;
        unsigned char* jpeg_string = CVMatToBytes(frame, bufferLength);
        int height = frame.rows;
        int width = frame.cols;
        std::string detectionResult = RunInferenceBinary(&infHandle, jpeg_string, width, height);

        if (!detectionResult.empty()) {
            pt.clear();
            std::stringstream ss;
            ss.clear();
            ss << detectionResult;
            boost::property_tree::read_json(ss, pt);
            BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results"))
            {
                const boost::property_tree::ptree& result = child.second;
                const auto class_id = GetData<int>(result, "class_id");
                const auto class_name = GetData<std::string>(result, "class_name");
                const auto x = GetData<float>(result, "x");
                const auto y = GetData<float>(result, "y");
                const auto width = GetData<float>(result, "width");
                const auto height = GetData<float>(result, "height");
                cv::rectangle(frame, cv::Rect(x, y, width, height), 123, 2);
                cv::putText(frame, cv::format("%s", class_name), cv::Point(x, y - 5),
                    0, 2.0, cv::Scalar(0, 0, 255), 3, cv::LINE_AA);
            }
        }

        auto end = std::chrono::system_clock::now();
        auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
        printf("Time = %lld ms\n", static_cast<long long int>(elapsed.count()));
      //  cv::resize(frame, frame, cv::Size(frame.cols / 2, frame.rows / 2)); // to half size or even smaller
        cv::imshow("ANSOCR", frame);
        if (cv::waitKey(30) == 27) // Wait for 'esc' key press to exit
        {
            std::cout << "End of inserting faces.\n";
        }
        frame.release();
        delete jpeg_string;
    }
    capture.release();
    cv::destroyAllWindows();
    ReleaseANSOCRHandle(&infHandle);
}

// Viewer state for zoom/pan
struct ImageViewerState {
    cv::Mat image;          // Full-resolution annotated image
    double zoom = 1.0;      // 1.0 = fit-to-screen
    double panX = 0.0;      // Top-left corner in original image coords
    double panY = 0.0;
    int dispW, dispH;       // Display window size (pixels)
    double fitScale;        // Base scale to fit image into window
    bool dragging = false;
    int dragX0, dragY0;
    double panX0, panY0;
    bool dirty = true;
};

#ifdef WIN32
// Render Unicode text onto a cv::Mat using Windows GDI
static void putTextUnicode(cv::Mat& img, const std::string& text, cv::Point org,
                           double fontScale, cv::Scalar color, int thickness) {
    // Convert UTF-8 to wide string
    int wlen = MultiByteToWideChar(CP_UTF8, 0, text.c_str(), -1, nullptr, 0);
    std::wstring wtext(wlen - 1, 0);
    MultiByteToWideChar(CP_UTF8, 0, text.c_str(), -1, &wtext[0], wlen);

    // Create a compatible DC and bitmap
    HDC hdc = CreateCompatibleDC(nullptr);
    int fontHeight = (int)(fontScale * 30);  // approximate pixel height

    HFONT hFont = CreateFontW(fontHeight, 0, 0, 0,
        (thickness > 2) ? FW_BOLD : FW_NORMAL,
        FALSE, FALSE, FALSE,
        DEFAULT_CHARSET, OUT_DEFAULT_PRECIS, CLIP_DEFAULT_PRECIS,
        ANTIALIASED_QUALITY, DEFAULT_PITCH | FF_SWISS, L"Yu Gothic UI");
    HFONT hOldFont = (HFONT)SelectObject(hdc, hFont);

    // Measure text size
    SIZE sz;
    GetTextExtentPoint32W(hdc, wtext.c_str(), (int)wtext.size(), &sz);

    // Create a DIB section so we can read pixels back
    BITMAPINFO bmi = {};
    bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
    bmi.bmiHeader.biWidth = sz.cx;
    bmi.bmiHeader.biHeight = -sz.cy;  // top-down
    bmi.bmiHeader.biPlanes = 1;
    bmi.bmiHeader.biBitCount = 32;
    bmi.bmiHeader.biCompression = BI_RGB;
    void* bits = nullptr;
    HBITMAP hBmp = CreateDIBSection(hdc, &bmi, DIB_RGB_COLORS, &bits, nullptr, 0);
    HBITMAP hOldBmp = (HBITMAP)SelectObject(hdc, hBmp);

    // Draw text onto the bitmap
    SetBkMode(hdc, TRANSPARENT);
    SetTextColor(hdc, RGB((int)color[2], (int)color[1], (int)color[0]));  // BGR to RGB
    TextOutW(hdc, 0, 0, wtext.c_str(), (int)wtext.size());

    // Copy rendered text onto the cv::Mat
    cv::Mat textImg(sz.cy, sz.cx, CV_8UC4, bits);
    for (int row = 0; row < sz.cy; ++row) {
        for (int col = 0; col < sz.cx; ++col) {
            cv::Vec4b px = textImg.at<cv::Vec4b>(row, col);
            if (px[0] != 0 || px[1] != 0 || px[2] != 0) {
                int dy = org.y + row;
                int dx = org.x + col;
                if (dy >= 0 && dy < img.rows && dx >= 0 && dx < img.cols) {
                    img.at<cv::Vec3b>(dy, dx) = cv::Vec3b(px[0], px[1], px[2]);
                }
            }
        }
    }

    SelectObject(hdc, hOldBmp);
    SelectObject(hdc, hOldFont);
    DeleteObject(hBmp);
    DeleteObject(hFont);
    DeleteDC(hdc);
}
#endif

static void onViewerMouse(int event, int x, int y, int flags, void* userdata) {
    ImageViewerState& s = *(ImageViewerState*)userdata;
    if (event == cv::EVENT_MOUSEWHEEL) {
        double factor = (cv::getMouseWheelDelta(flags) > 0) ? 1.25 : 0.8;
        // Zoom centered on mouse cursor position
        double sc = s.fitScale * s.zoom;
        double imgX = s.panX + x / sc;
        double imgY = s.panY + y / sc;
        s.zoom = std::clamp(s.zoom * factor, 0.2, 50.0);
        double newSc = s.fitScale * s.zoom;
        s.panX = imgX - x / newSc;
        s.panY = imgY - y / newSc;
        s.dirty = true;
    }
    else if (event == cv::EVENT_LBUTTONDOWN) {
        s.dragging = true;
        s.dragX0 = x; s.dragY0 = y;
        s.panX0 = s.panX; s.panY0 = s.panY;
    }
    else if (event == cv::EVENT_MOUSEMOVE && s.dragging) {
        double sc = s.fitScale * s.zoom;
        s.panX = s.panX0 - (x - s.dragX0) / sc;
        s.panY = s.panY0 - (y - s.dragY0) / sc;
        s.dirty = true;
    }
    else if (event == cv::EVENT_LBUTTONUP) {
        s.dragging = false;
    }
}

int TestOCRv5mage() {

    ANSCENTER::ANSOCRBase* infHandle = nullptr;

    boost::property_tree::ptree root;
    boost::property_tree::ptree detectionObjects;
    boost::property_tree::ptree pt;
    std::filesystem::path currentPath = std::filesystem::current_path();
    std::cout << "Current working directory: " << currentPath << std::endl;
    std::string licenseKey = "";
    std::string modelFilePath = "C:\\Projects\\ANSVIS\\Models\\ANS_GenericOCR_v2.0.zip";
    std::string imagePath = "C:\\Programs\\ModelTraining\\JLPD\\data\\0b9b013343f0bd8c7809653dfab16eac_jpeg.rf.1438e1237023ad7a254605942193df99.jpg";//"E:\\Programs\\DemoAssets\\Images\\OCR\\ref3_000.bmp";

    int language = 0;  // CUSTOM
    int engine = 1;// GPU

    // For high-resolution images with PP-OCRv5 server models, use higher limitSideLen
    // (default 960 downscales large images too aggressively, missing small text)
    int gpuId = 0;
    double detDBThresh = 0.5, detBoxThresh = 0.3, detUnclipRatio = 1.2;
    double clsThresh = 0.9;
    int useDilation = 1;
    int limitSideLen = 2560;  // 2560 Higher resolution for server-grade detection

    int createResult = CreateANSOCRHandleEx(&infHandle, licenseKey.c_str(), modelFilePath.c_str(), "",
        language, engine, gpuId, detDBThresh, detBoxThresh, detUnclipRatio, clsThresh, useDilation, limitSideLen);
    std::cout << "ANSOCR Engine Creation:" << createResult << std::endl;

    // Enable ALPR mode with Japanese plate format
    SetANSOCRMode(&infHandle, 1);           // OCR_ALPR
    SetANSOCRCountry(&infHandle, 5);        // JAPAN

    cv::Mat input = cv::imread(imagePath, cv::IMREAD_COLOR);
    if (input.empty()) {
        std::cerr << "Failed to load image: " << imagePath << std::endl;
        ReleaseANSOCRHandle(&infHandle);
        return -1;
    }
    cv::Mat frame = input.clone();
    int height = frame.rows;
    int width = frame.cols;
    unsigned int bufferLength = 0;
    unsigned char* jpeg_string = CVMatToBytes(frame, bufferLength);

    // --- Warmup run (first run includes GPU kernel compilation / cache warmup) ---
    auto warmupStart = std::chrono::high_resolution_clock::now();
    std::string detectionResult = RunInferenceBinary(&infHandle, jpeg_string, width, height);
    auto warmupEnd = std::chrono::high_resolution_clock::now();
    double warmupMs = std::chrono::duration<double, std::milli>(warmupEnd - warmupStart).count();
    std::cout << "Warmup inference: " << warmupMs << " ms" << std::endl;
    std::cout << "ALPR Result:" << detectionResult << std::endl;

    // --- Benchmark: run N iterations and report stats ---
    const int benchmarkIterations = 10;
    std::vector<double> times;
    times.reserve(benchmarkIterations);
    for (int i = 0; i < benchmarkIterations; ++i) {
        auto t0 = std::chrono::high_resolution_clock::now();
        std::string result = RunInferenceBinary(&infHandle, jpeg_string, width, height);
        auto t1 = std::chrono::high_resolution_clock::now();
        double ms = std::chrono::duration<double, std::milli>(t1 - t0).count();
        times.push_back(ms);
        std::cout << "  Run " << (i + 1) << "/" << benchmarkIterations << ": " << ms << " ms" << std::endl;
    }
    std::sort(times.begin(), times.end());
    double sum = std::accumulate(times.begin(), times.end(), 0.0);
    double avg = sum / benchmarkIterations;
    double median = (benchmarkIterations % 2 == 0)
        ? (times[benchmarkIterations / 2 - 1] + times[benchmarkIterations / 2]) / 2.0
        : times[benchmarkIterations / 2];
    std::cout << "\n=== Benchmark (" << benchmarkIterations << " runs) ===" << std::endl;
    std::cout << "  Avg:    " << avg << " ms" << std::endl;
    std::cout << "  Median: " << median << " ms" << std::endl;
    std::cout << "  Min:    " << times.front() << " ms" << std::endl;
    std::cout << "  Max:    " << times.back() << " ms" << std::endl;
    std::cout << "  FPS:    " << (1000.0 / avg) << std::endl;

    delete[] jpeg_string;

    // Draw OCR results on frame — 1.5x of original (was fontScale=1.5, thickness=3, offset=5)
    double fontScale = 2.25;   // 1.5 * 1.5
    int boxThickness = 3;
    int fontThickness = 5;     // ceil(3 * 1.5)
    int textOffset = 8;

    if (!detectionResult.empty()) {
        // Use nlohmann::json for proper parsing of nested alpr_info
        nlohmann::json jsonResult = nlohmann::json::parse(detectionResult);
        for (const auto& result : jsonResult["results"]) {
            const std::string class_name = result.value("class_name", "");
            const int x = std::stoi(result.value("x", "0"));
            const int y = std::stoi(result.value("y", "0"));
            const int w = std::stoi(result.value("width", "0"));
            const int h = std::stoi(result.value("height", "0"));

            cv::rectangle(frame, cv::Rect(x, y, w, h),
                cv::Scalar(0, 255, 0), boxThickness);

            // Display ALPR structured info from extra_info field
            std::string displayText = class_name;
            std::string extraInfo = result.value("extra_info", "");
            if (!extraInfo.empty()) {
                try {
                    nlohmann::json alpr = nlohmann::json::parse(extraInfo);
                    if (alpr.contains("format")) {
                        std::cout << "\n=== ALPR Result ===" << std::endl;
                        std::cout << "  Format:         " << alpr.value("format", "") << std::endl;
                        std::cout << "  Valid:           " << (alpr.value("valid", false) ? "YES" : "NO") << std::endl;
                        std::cout << "  Region:          " << alpr.value("region", "") << std::endl;
                        std::cout << "  Classification:  " << alpr.value("classification", "") << std::endl;
                        std::cout << "  Kana:            " << alpr.value("kana", "") << std::endl;
                        std::cout << "  Designation:     " << alpr.value("designation", "") << std::endl;
                        std::cout << "  Full Plate:      " << class_name << std::endl;

                        displayText = alpr.value("region", "") + " " +
                            alpr.value("classification", "") + " " +
                            alpr.value("kana", "") + " " +
                            alpr.value("designation", "");
                    }
                } catch (...) {}
            }

#ifdef WIN32
            {
                int textH = (int)(fontScale * 30);
                int ty = y - textOffset - textH;
                if (ty < 0) ty = y + boxThickness + 2;
                putTextUnicode(frame, displayText, cv::Point(x, ty),
                    fontScale, cv::Scalar(0, 0, 255), fontThickness);
            }
#else
            cv::putText(frame, displayText, cv::Point(x, y - textOffset),
                cv::FONT_HERSHEY_SIMPLEX, fontScale, cv::Scalar(0, 0, 255), fontThickness, cv::LINE_AA);
#endif
        }
    }

    // === Interactive Image Viewer (zoom/pan) ===
    ImageViewerState vs;
    vs.image = frame;

    // Calculate scale to fit image into ~80% of a 1920x1080 screen
    const int maxWinW = 1600, maxWinH = 900;
    double scaleX = (double)maxWinW / frame.cols;
    double scaleY = (double)maxWinH / frame.rows;
    vs.fitScale = std::min(scaleX, scaleY);
    if (vs.fitScale > 1.0) vs.fitScale = 1.0;  // Don't upscale small images
    vs.dispW = (int)(frame.cols * vs.fitScale);
    vs.dispH = (int)(frame.rows * vs.fitScale);

    const std::string winName = "ANSOCR  [Scroll=Zoom | Drag=Pan | R=Reset | ESC=Quit]";
    cv::namedWindow(winName, cv::WINDOW_AUTOSIZE);
    cv::setMouseCallback(winName, onViewerMouse, &vs);

    while (true) {
        if (vs.dirty) {
            double sc = vs.fitScale * vs.zoom;
            int srcW = std::min((int)(vs.dispW / sc), vs.image.cols);
            int srcH = std::min((int)(vs.dispH / sc), vs.image.rows);
            if (srcW <= 0) srcW = 1;
            if (srcH <= 0) srcH = 1;
            int sx = std::clamp((int)vs.panX, 0, std::max(0, vs.image.cols - srcW));
            int sy = std::clamp((int)vs.panY, 0, std::max(0, vs.image.rows - srcH));
            vs.panX = sx;
            vs.panY = sy;

            cv::Mat roi = vs.image(cv::Rect(sx, sy, srcW, srcH));
            cv::Mat display;
            cv::resize(roi, display, cv::Size(vs.dispW, vs.dispH), 0, 0,
                (sc >= 1.0) ? cv::INTER_LINEAR : cv::INTER_AREA);

            // Overlay zoom info
            cv::putText(display, cv::format("Zoom: %.1fx  (%dx%d)", vs.zoom, vs.image.cols, vs.image.rows),
                cv::Point(10, 25), cv::FONT_HERSHEY_SIMPLEX, 0.6, cv::Scalar(0, 255, 0), 2);

            cv::imshow(winName, display);
            vs.dirty = false;
        }

        int key = cv::waitKey(30) & 0xFF;
        if (key == 27) break;  // ESC to quit
        if (key == 'r' || key == 'R') {
            vs.zoom = 1.0; vs.panX = 0; vs.panY = 0; vs.dirty = true;  // Reset view
        }
        if (key == '+' || key == '=') {
            vs.zoom = std::min(vs.zoom * 1.25, 50.0); vs.dirty = true;  // Keyboard zoom in
        }
        if (key == '-' || key == '_') {
            vs.zoom = std::max(vs.zoom * 0.8, 0.2); vs.dirty = true;  // Keyboard zoom out
        }

        // Quit when user closes the window (clicks X button)
        if (cv::getWindowProperty(winName, cv::WND_PROP_VISIBLE) < 1) break;
    }

    // Release OCR handle BEFORE OpenCV cleanup to avoid CUDA teardown errors
    // (TensorRT needs the CUDA context alive to free GPU resources cleanly)
    ReleaseANSOCRHandle(&infHandle);
    cv::destroyAllWindows();
    frame.release();
    input.release();
    return 0;
}

int main()
{
#ifdef WIN32
    SetConsoleOutputCP(CP_UTF8);
    SetConsoleCP(CP_UTF8);
#endif
    TestOCRv5mage();

	//ANSOCR_VideoTest();
   // TestOCRImage();
 /*   for (int i = 0; i < 20; i++) {
		TestOCRImage();
    }*/
     return 0;
}