Add unit tests
This commit is contained in:
404
tests/ANSOCR-UnitTest/ANSOCR-UnitTest.cpp
Normal file
404
tests/ANSOCR-UnitTest/ANSOCR-UnitTest.cpp
Normal file
@@ -0,0 +1,404 @@
|
||||
#include <iostream>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include "boost/property_tree/ptree.hpp"
|
||||
#include "boost/property_tree/json_parser.hpp"
|
||||
#include "boost/foreach.hpp"
|
||||
#include "boost/optional.hpp"
|
||||
#include <opencv2/core.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/dnn/dnn.hpp>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <filesystem>
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <ANSOCRBase.h>
|
||||
|
||||
#ifdef WIN32
|
||||
const char sep = '\\';
|
||||
#else
|
||||
const char sep = '/';
|
||||
#endif
|
||||
|
||||
using namespace cv;
|
||||
template <typename T>
|
||||
T GetData(const boost::property_tree::ptree& pt, const std::string& key)
|
||||
{
|
||||
T ret;
|
||||
if (boost::optional<T> data = pt.get_optional<T>(key))
|
||||
{
|
||||
ret = data.get();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
unsigned char* CVMatToBytes(cv::Mat image, unsigned int& bufferLengh)
|
||||
{
|
||||
int size = int(image.total() * image.elemSize());
|
||||
std::cout << "size:" << size << std::endl;
|
||||
unsigned char* bytes = new unsigned char[size]; // you will have to delete[] that later
|
||||
std::memcpy(bytes, image.data, size * sizeof(unsigned char));
|
||||
bufferLengh = size * sizeof(unsigned char);
|
||||
return bytes;
|
||||
}
|
||||
int TestOCRImage() {
|
||||
|
||||
ANSCENTER::ANSOCRBase* infHandle = nullptr;
|
||||
|
||||
boost::property_tree::ptree root;
|
||||
boost::property_tree::ptree detectionObjects;
|
||||
boost::property_tree::ptree pt;
|
||||
std::filesystem::path currentPath = std::filesystem::current_path();
|
||||
std::cout << "Current working directory: " << currentPath << std::endl;
|
||||
std::string licenseKey = "";
|
||||
std::string modelFilePath = currentPath.string() + "\\ansocrmodels.zip";
|
||||
std::string imagePath = currentPath.string() + "\\ocrsample.png";
|
||||
std::string defaultDir = "C:\\Programs\\DemoAssets\\ANSAIModels";
|
||||
if (!std::filesystem::exists(modelFilePath)) modelFilePath = defaultDir + "\\ANS_GenericOCR_v1.0.zip";
|
||||
if (!std::filesystem::exists(imagePath)) imagePath = defaultDir + "\\ocrsample.png";
|
||||
imagePath = "C:\\Projects\\ANSVIS\\Documentation\\TestImages\\OCR\\ocrsample.png";
|
||||
|
||||
int language = 0; // CUSTOM
|
||||
int engine = 0;
|
||||
|
||||
int createResult = CreateANSOCRHandle(&infHandle, licenseKey.c_str(), modelFilePath.c_str(), "", language, engine);
|
||||
std::cout << "ANSOCR Engine Creation:" << createResult << std::endl;
|
||||
|
||||
cv::Mat input = cv::imread(imagePath, cv::IMREAD_COLOR);
|
||||
cv::Mat frame = input.clone();
|
||||
int height = frame.rows;
|
||||
int width = frame.cols;
|
||||
unsigned int bufferLength = 0;
|
||||
unsigned char* jpeg_string = CVMatToBytes(frame, bufferLength);
|
||||
std::string detectionResult = RunInferenceBinary(&infHandle, jpeg_string, width, height);
|
||||
std::cout << "Result:" << detectionResult;
|
||||
delete jpeg_string;
|
||||
if (!detectionResult.empty()) {
|
||||
pt.clear();
|
||||
std::stringstream ss;
|
||||
ss.clear();
|
||||
ss << detectionResult;
|
||||
boost::property_tree::read_json(ss, pt);
|
||||
BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results"))
|
||||
{
|
||||
const boost::property_tree::ptree& result = child.second;
|
||||
const auto class_id = GetData<int>(result, "class_id");
|
||||
const auto class_name = GetData<std::string>(result, "class_name");
|
||||
const auto x = GetData<float>(result, "x");
|
||||
const auto y = GetData<float>(result, "y");
|
||||
const auto width = GetData<float>(result, "width");
|
||||
const auto height = GetData<float>(result, "height");
|
||||
cv::rectangle(frame, cv::Rect(x, y, width, height), 123, 2);
|
||||
cv::putText(frame, cv::format("%s", class_name), cv::Point(x, y - 5),
|
||||
0, 2.0, cv::Scalar(0, 0, 255), 3, cv::LINE_AA);
|
||||
}
|
||||
}
|
||||
cv::resize(frame, frame, cv::Size(frame.cols / 2, frame.rows / 2)); // to half size or even smaller
|
||||
frame.release();
|
||||
|
||||
|
||||
ReleaseANSOCRHandle(&infHandle);
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
int ANSOCR_VideoTest() {
|
||||
// Get the current working directory
|
||||
std::filesystem::path currentPath = std::filesystem::current_path();
|
||||
// Print the current working directory
|
||||
std::cout << "Current working directory: " << currentPath << std::endl;
|
||||
boost::property_tree::ptree root;
|
||||
boost::property_tree::ptree detectionObjects;
|
||||
boost::property_tree::ptree pt;
|
||||
|
||||
ANSCENTER::ANSOCRBase* infHandle;
|
||||
std::string licenseKey = "";
|
||||
std::string modelFilePath = "C:\\ProgramData\\ANSCENTER\\Shared\\ANS_GenericOCR_v1.0.zip";
|
||||
std::string videoFilePath = "C:\\Programs\\DemoAssets\\Videos\\ALRP\\ALPR1.mp4";
|
||||
cv::VideoCapture capture(videoFilePath);
|
||||
if (!capture.isOpened()) {
|
||||
printf("could not read this video file...\n");
|
||||
return -1;
|
||||
}
|
||||
int language = 0;// CUSTOM
|
||||
int engine = 0;
|
||||
int createResult = CreateANSOCRHandle(&infHandle, licenseKey.c_str(), modelFilePath.c_str(), "", language, engine);
|
||||
|
||||
while (true)
|
||||
{
|
||||
cv::Mat frame;
|
||||
if (!capture.read(frame)) // if not success, break loop
|
||||
{
|
||||
std::cout << "\n Cannot read the video file. please check your video.\n";
|
||||
break;
|
||||
}
|
||||
auto start = std::chrono::system_clock::now();
|
||||
unsigned int bufferLength = 0;
|
||||
unsigned char* jpeg_string = CVMatToBytes(frame, bufferLength);
|
||||
int height = frame.rows;
|
||||
int width = frame.cols;
|
||||
std::string detectionResult = RunInferenceBinary(&infHandle, jpeg_string, width, height);
|
||||
|
||||
if (!detectionResult.empty()) {
|
||||
pt.clear();
|
||||
std::stringstream ss;
|
||||
ss.clear();
|
||||
ss << detectionResult;
|
||||
boost::property_tree::read_json(ss, pt);
|
||||
BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results"))
|
||||
{
|
||||
const boost::property_tree::ptree& result = child.second;
|
||||
const auto class_id = GetData<int>(result, "class_id");
|
||||
const auto class_name = GetData<std::string>(result, "class_name");
|
||||
const auto x = GetData<float>(result, "x");
|
||||
const auto y = GetData<float>(result, "y");
|
||||
const auto width = GetData<float>(result, "width");
|
||||
const auto height = GetData<float>(result, "height");
|
||||
cv::rectangle(frame, cv::Rect(x, y, width, height), 123, 2);
|
||||
cv::putText(frame, cv::format("%s", class_name), cv::Point(x, y - 5),
|
||||
0, 2.0, cv::Scalar(0, 0, 255), 3, cv::LINE_AA);
|
||||
}
|
||||
}
|
||||
|
||||
auto end = std::chrono::system_clock::now();
|
||||
auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
|
||||
printf("Time = %lld ms\n", static_cast<long long int>(elapsed.count()));
|
||||
// cv::resize(frame, frame, cv::Size(frame.cols / 2, frame.rows / 2)); // to half size or even smaller
|
||||
cv::imshow("ANSOCR", frame);
|
||||
if (cv::waitKey(30) == 27) // Wait for 'esc' key press to exit
|
||||
{
|
||||
std::cout << "End of inserting faces.\n";
|
||||
}
|
||||
frame.release();
|
||||
delete jpeg_string;
|
||||
}
|
||||
capture.release();
|
||||
cv::destroyAllWindows();
|
||||
ReleaseANSOCRHandle(&infHandle);
|
||||
}
|
||||
|
||||
// Viewer state for zoom/pan
|
||||
struct ImageViewerState {
|
||||
cv::Mat image; // Full-resolution annotated image
|
||||
double zoom = 1.0; // 1.0 = fit-to-screen
|
||||
double panX = 0.0; // Top-left corner in original image coords
|
||||
double panY = 0.0;
|
||||
int dispW, dispH; // Display window size (pixels)
|
||||
double fitScale; // Base scale to fit image into window
|
||||
bool dragging = false;
|
||||
int dragX0, dragY0;
|
||||
double panX0, panY0;
|
||||
bool dirty = true;
|
||||
};
|
||||
|
||||
static void onViewerMouse(int event, int x, int y, int flags, void* userdata) {
|
||||
ImageViewerState& s = *(ImageViewerState*)userdata;
|
||||
if (event == cv::EVENT_MOUSEWHEEL) {
|
||||
double factor = (cv::getMouseWheelDelta(flags) > 0) ? 1.25 : 0.8;
|
||||
// Zoom centered on mouse cursor position
|
||||
double sc = s.fitScale * s.zoom;
|
||||
double imgX = s.panX + x / sc;
|
||||
double imgY = s.panY + y / sc;
|
||||
s.zoom = std::clamp(s.zoom * factor, 0.2, 50.0);
|
||||
double newSc = s.fitScale * s.zoom;
|
||||
s.panX = imgX - x / newSc;
|
||||
s.panY = imgY - y / newSc;
|
||||
s.dirty = true;
|
||||
}
|
||||
else if (event == cv::EVENT_LBUTTONDOWN) {
|
||||
s.dragging = true;
|
||||
s.dragX0 = x; s.dragY0 = y;
|
||||
s.panX0 = s.panX; s.panY0 = s.panY;
|
||||
}
|
||||
else if (event == cv::EVENT_MOUSEMOVE && s.dragging) {
|
||||
double sc = s.fitScale * s.zoom;
|
||||
s.panX = s.panX0 - (x - s.dragX0) / sc;
|
||||
s.panY = s.panY0 - (y - s.dragY0) / sc;
|
||||
s.dirty = true;
|
||||
}
|
||||
else if (event == cv::EVENT_LBUTTONUP) {
|
||||
s.dragging = false;
|
||||
}
|
||||
}
|
||||
|
||||
int TestOCRv5mage() {
|
||||
|
||||
ANSCENTER::ANSOCRBase* infHandle = nullptr;
|
||||
|
||||
boost::property_tree::ptree root;
|
||||
boost::property_tree::ptree detectionObjects;
|
||||
boost::property_tree::ptree pt;
|
||||
std::filesystem::path currentPath = std::filesystem::current_path();
|
||||
std::cout << "Current working directory: " << currentPath << std::endl;
|
||||
std::string licenseKey = "";
|
||||
std::string modelFilePath = "C:\\Projects\\ANSVIS\\Models\\ANS_GenericOCR_v2.0.zip";
|
||||
std::string imagePath = "E:\\Programs\\DemoAssets\\Images\\OCR\\ref3_000.bmp";
|
||||
|
||||
int language = 0; // CUSTOM
|
||||
int engine = 1;// GPU
|
||||
|
||||
// For high-resolution images with PP-OCRv5 server models, use higher limitSideLen
|
||||
// (default 960 downscales large images too aggressively, missing small text)
|
||||
int gpuId = 0;
|
||||
double detDBThresh = 0.3, detBoxThresh = 0.6, detUnclipRatio = 1.5;
|
||||
double clsThresh = 0.9;
|
||||
int useDilation = 0;
|
||||
int limitSideLen = 2560; // 2560 Higher resolution for server-grade detection
|
||||
|
||||
int createResult = CreateANSOCRHandleEx(&infHandle, licenseKey.c_str(), modelFilePath.c_str(), "",
|
||||
language, engine, gpuId, detDBThresh, detBoxThresh, detUnclipRatio, clsThresh, useDilation, limitSideLen);
|
||||
std::cout << "ANSOCR Engine Creation:" << createResult << std::endl;
|
||||
|
||||
cv::Mat input = cv::imread(imagePath, cv::IMREAD_COLOR);
|
||||
if (input.empty()) {
|
||||
std::cerr << "Failed to load image: " << imagePath << std::endl;
|
||||
ReleaseANSOCRHandle(&infHandle);
|
||||
return -1;
|
||||
}
|
||||
cv::Mat frame = input.clone();
|
||||
int height = frame.rows;
|
||||
int width = frame.cols;
|
||||
unsigned int bufferLength = 0;
|
||||
unsigned char* jpeg_string = CVMatToBytes(frame, bufferLength);
|
||||
|
||||
// --- Warmup run (first run includes GPU kernel compilation / cache warmup) ---
|
||||
auto warmupStart = std::chrono::high_resolution_clock::now();
|
||||
std::string detectionResult = RunInferenceBinary(&infHandle, jpeg_string, width, height);
|
||||
auto warmupEnd = std::chrono::high_resolution_clock::now();
|
||||
double warmupMs = std::chrono::duration<double, std::milli>(warmupEnd - warmupStart).count();
|
||||
std::cout << "Warmup inference: " << warmupMs << " ms" << std::endl;
|
||||
std::cout << "Result:" << detectionResult << std::endl;
|
||||
|
||||
// --- Benchmark: run N iterations and report stats ---
|
||||
const int benchmarkIterations = 10;
|
||||
std::vector<double> times;
|
||||
times.reserve(benchmarkIterations);
|
||||
for (int i = 0; i < benchmarkIterations; ++i) {
|
||||
auto t0 = std::chrono::high_resolution_clock::now();
|
||||
std::string result = RunInferenceBinary(&infHandle, jpeg_string, width, height);
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
double ms = std::chrono::duration<double, std::milli>(t1 - t0).count();
|
||||
times.push_back(ms);
|
||||
std::cout << " Run " << (i + 1) << "/" << benchmarkIterations << ": " << ms << " ms" << std::endl;
|
||||
}
|
||||
std::sort(times.begin(), times.end());
|
||||
double sum = std::accumulate(times.begin(), times.end(), 0.0);
|
||||
double avg = sum / benchmarkIterations;
|
||||
double median = (benchmarkIterations % 2 == 0)
|
||||
? (times[benchmarkIterations / 2 - 1] + times[benchmarkIterations / 2]) / 2.0
|
||||
: times[benchmarkIterations / 2];
|
||||
std::cout << "\n=== Benchmark (" << benchmarkIterations << " runs) ===" << std::endl;
|
||||
std::cout << " Avg: " << avg << " ms" << std::endl;
|
||||
std::cout << " Median: " << median << " ms" << std::endl;
|
||||
std::cout << " Min: " << times.front() << " ms" << std::endl;
|
||||
std::cout << " Max: " << times.back() << " ms" << std::endl;
|
||||
std::cout << " FPS: " << (1000.0 / avg) << std::endl;
|
||||
|
||||
delete[] jpeg_string;
|
||||
|
||||
// Draw OCR results on frame — 1.5x of original (was fontScale=1.5, thickness=3, offset=5)
|
||||
double fontScale = 2.25; // 1.5 * 1.5
|
||||
int boxThickness = 3;
|
||||
int fontThickness = 5; // ceil(3 * 1.5)
|
||||
int textOffset = 8;
|
||||
|
||||
if (!detectionResult.empty()) {
|
||||
pt.clear();
|
||||
std::stringstream ss;
|
||||
ss.clear();
|
||||
ss << detectionResult;
|
||||
boost::property_tree::read_json(ss, pt);
|
||||
BOOST_FOREACH(const boost::property_tree::ptree::value_type & child, pt.get_child("results"))
|
||||
{
|
||||
const boost::property_tree::ptree& result = child.second;
|
||||
const auto class_id = GetData<int>(result, "class_id");
|
||||
const auto class_name = GetData<std::string>(result, "class_name");
|
||||
const auto x = GetData<float>(result, "x");
|
||||
const auto y = GetData<float>(result, "y");
|
||||
const auto w = GetData<float>(result, "width");
|
||||
const auto h = GetData<float>(result, "height");
|
||||
cv::rectangle(frame, cv::Rect((int)x, (int)y, (int)w, (int)h),
|
||||
cv::Scalar(0, 255, 0), boxThickness);
|
||||
cv::putText(frame, class_name, cv::Point((int)x, (int)y - textOffset),
|
||||
cv::FONT_HERSHEY_SIMPLEX, fontScale, cv::Scalar(0, 0, 255), fontThickness, cv::LINE_AA);
|
||||
}
|
||||
}
|
||||
|
||||
// === Interactive Image Viewer (zoom/pan) ===
|
||||
ImageViewerState vs;
|
||||
vs.image = frame;
|
||||
|
||||
// Calculate scale to fit image into ~80% of a 1920x1080 screen
|
||||
const int maxWinW = 1600, maxWinH = 900;
|
||||
double scaleX = (double)maxWinW / frame.cols;
|
||||
double scaleY = (double)maxWinH / frame.rows;
|
||||
vs.fitScale = std::min(scaleX, scaleY);
|
||||
if (vs.fitScale > 1.0) vs.fitScale = 1.0; // Don't upscale small images
|
||||
vs.dispW = (int)(frame.cols * vs.fitScale);
|
||||
vs.dispH = (int)(frame.rows * vs.fitScale);
|
||||
|
||||
const std::string winName = "ANSOCR [Scroll=Zoom | Drag=Pan | R=Reset | ESC=Quit]";
|
||||
cv::namedWindow(winName, cv::WINDOW_AUTOSIZE);
|
||||
cv::setMouseCallback(winName, onViewerMouse, &vs);
|
||||
|
||||
while (true) {
|
||||
if (vs.dirty) {
|
||||
double sc = vs.fitScale * vs.zoom;
|
||||
int srcW = std::min((int)(vs.dispW / sc), vs.image.cols);
|
||||
int srcH = std::min((int)(vs.dispH / sc), vs.image.rows);
|
||||
if (srcW <= 0) srcW = 1;
|
||||
if (srcH <= 0) srcH = 1;
|
||||
int sx = std::clamp((int)vs.panX, 0, std::max(0, vs.image.cols - srcW));
|
||||
int sy = std::clamp((int)vs.panY, 0, std::max(0, vs.image.rows - srcH));
|
||||
vs.panX = sx;
|
||||
vs.panY = sy;
|
||||
|
||||
cv::Mat roi = vs.image(cv::Rect(sx, sy, srcW, srcH));
|
||||
cv::Mat display;
|
||||
cv::resize(roi, display, cv::Size(vs.dispW, vs.dispH), 0, 0,
|
||||
(sc >= 1.0) ? cv::INTER_LINEAR : cv::INTER_AREA);
|
||||
|
||||
// Overlay zoom info
|
||||
cv::putText(display, cv::format("Zoom: %.1fx (%dx%d)", vs.zoom, vs.image.cols, vs.image.rows),
|
||||
cv::Point(10, 25), cv::FONT_HERSHEY_SIMPLEX, 0.6, cv::Scalar(0, 255, 0), 2);
|
||||
|
||||
cv::imshow(winName, display);
|
||||
vs.dirty = false;
|
||||
}
|
||||
|
||||
int key = cv::waitKey(30) & 0xFF;
|
||||
if (key == 27) break; // ESC to quit
|
||||
if (key == 'r' || key == 'R') {
|
||||
vs.zoom = 1.0; vs.panX = 0; vs.panY = 0; vs.dirty = true; // Reset view
|
||||
}
|
||||
if (key == '+' || key == '=') {
|
||||
vs.zoom = std::min(vs.zoom * 1.25, 50.0); vs.dirty = true; // Keyboard zoom in
|
||||
}
|
||||
if (key == '-' || key == '_') {
|
||||
vs.zoom = std::max(vs.zoom * 0.8, 0.2); vs.dirty = true; // Keyboard zoom out
|
||||
}
|
||||
|
||||
// Quit when user closes the window (clicks X button)
|
||||
if (cv::getWindowProperty(winName, cv::WND_PROP_VISIBLE) < 1) break;
|
||||
}
|
||||
|
||||
cv::destroyAllWindows();
|
||||
frame.release();
|
||||
input.release();
|
||||
ReleaseANSOCRHandle(&infHandle);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
TestOCRv5mage();
|
||||
|
||||
//ANSOCR_VideoTest();
|
||||
// TestOCRImage();
|
||||
/* for (int i = 0; i < 20; i++) {
|
||||
TestOCRImage();
|
||||
}*/
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user