From 808df4656d9394330017cf5fdc337f4d84724170 Mon Sep 17 00:00:00 2001 From: Tuan Nghia Nguyen Date: Wed, 15 Apr 2026 23:00:19 +1000 Subject: [PATCH] Fix ALPR Batch and memory leak --- .claude/settings.local.json | 16 +- cmake/Dependencies.cmake | 4 +- modules/ANSCV/ANSOpenCV.cpp | 1011 ++++++++++++++++++++- modules/ANSCV/ANSOpenCV.h | 24 + modules/ANSLPR/ANSLPR_OCR.cpp | 756 ++++++++++++++- modules/ANSLPR/ANSLPR_OCR.h | 73 ++ tests/ANSCV-UnitTest/ANSCV-UnitTest.cpp | 12 +- tests/ANSLPR-UnitTest/ANSLPR-UnitTest.cpp | 4 +- 8 files changed, 1846 insertions(+), 54 deletions(-) diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 1c8e0c7..ab66893 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -151,7 +151,21 @@ "Bash(powershell -Command '& { & '\\\\''C:\\\\Program Files\\\\Microsoft Visual Studio\\\\2022\\\\Community\\\\Common7\\\\Tools\\\\Launch-VsDevShell.ps1'\\\\'' -Arch amd64 -HostArch amd64 > $null 2>&1; Set-Location '\\\\''C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\\\\cmake-build-release'\\\\''; cmake --build . --target ANSOCR 2>&1 | Select-Object -Last 60 }')", "Bash(powershell -Command '& { & '\\\\''C:\\\\Program Files\\\\Microsoft Visual Studio\\\\2022\\\\Community\\\\Common7\\\\Tools\\\\Launch-VsDevShell.ps1'\\\\'' -Arch amd64 -HostArch amd64 > $null 2>&1; Set-Location '\\\\''C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\\\\cmake-build-release'\\\\''; cmake --build . --target ANSLPR 2>&1 | Select-Object -Last 40 }')", "Bash(powershell -Command '& { & '\\\\''C:\\\\Program Files\\\\Microsoft Visual Studio\\\\2022\\\\Community\\\\Common7\\\\Tools\\\\Launch-VsDevShell.ps1'\\\\'' -Arch amd64 -HostArch amd64 > $null 2>&1; Set-Location '\\\\''C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\\\\cmake-build-release'\\\\''; cmake --build . --target ANSLPR 2>&1 | Select-Object -Last 30 }')", - "Bash(powershell -Command '& { & '\\\\''C:\\\\Program Files\\\\Microsoft Visual Studio\\\\2022\\\\Community\\\\Common7\\\\Tools\\\\Launch-VsDevShell.ps1'\\\\'' -Arch amd64 -HostArch amd64 > $null 2>&1; dumpbin /exports '\\\\''C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\\\\cmake-build-release\\\\bin\\\\ANSLPR.dll'\\\\'' 2>&1 | Select-String '\\\\''RunInferencesBatch'\\\\'' }')" + "Bash(powershell -Command '& { & '\\\\''C:\\\\Program Files\\\\Microsoft Visual Studio\\\\2022\\\\Community\\\\Common7\\\\Tools\\\\Launch-VsDevShell.ps1'\\\\'' -Arch amd64 -HostArch amd64 > $null 2>&1; dumpbin /exports '\\\\''C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\\\\cmake-build-release\\\\bin\\\\ANSLPR.dll'\\\\'' 2>&1 | Select-String '\\\\''RunInferencesBatch'\\\\'' }')", + "Bash(powershell -Command '& { & '\\\\''C:\\\\Program Files\\\\Microsoft Visual Studio\\\\2022\\\\Community\\\\Common7\\\\Tools\\\\Launch-VsDevShell.ps1'\\\\'' -Arch amd64 -HostArch amd64 > $null 2>&1; dumpbin /exports '\\\\''C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\\\\cmake-build-release\\\\bin\\\\ANSLPR.dll'\\\\'' 2>&1 | Select-String '\\\\''RunInferencesBatch|RectifyPlateROI|RecoverKanaFromBottomHalf'\\\\'' }')", + "Bash(powershell -Command '& { & '\\\\''C:\\\\Program Files\\\\Microsoft Visual Studio\\\\2022\\\\Community\\\\Common7\\\\Tools\\\\Launch-VsDevShell.ps1'\\\\'' -Arch amd64 -HostArch amd64 > $null 2>&1; Set-Location '\\\\''C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\\\\cmake-build-release'\\\\''; cmake --build . --target ANSLPR 2>&1 | Select-Object -Last 20 }')", + "Bash(powershell -Command '& { & '\\\\''C:\\\\Program Files\\\\Microsoft Visual Studio\\\\2022\\\\Community\\\\Common7\\\\Tools\\\\Launch-VsDevShell.ps1'\\\\'' -Arch amd64 -HostArch amd64 > $null 2>&1; Set-Location '\\\\''C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\\\\cmake-build-release'\\\\''; \\(Get-Item '\\\\''C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\\\\modules\\\\ANSLPR\\\\ANSLPR_OCR.cpp'\\\\''\\).LastWriteTime; \\(Get-Item '\\\\''.\\\\bin\\\\ANSLPR.dll'\\\\''\\).LastWriteTime }')", + "Bash(powershell -Command \"\\(Get-Item 'C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\\\\modules\\\\ANSLPR\\\\ANSLPR_OCR.cpp'\\).LastWriteTime = Get-Date\")", + "Bash(powershell -Command '& { & '\\\\''C:\\\\Program Files\\\\Microsoft Visual Studio\\\\2022\\\\Community\\\\Common7\\\\Tools\\\\Launch-VsDevShell.ps1'\\\\'' -Arch amd64 -HostArch amd64 > $null 2>&1; Set-Location '\\\\''C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\\\\cmake-build-release'\\\\''; cmake --build . --target ANSLPR 2>&1 | Select-Object -Last 8 }')", + "Bash(powershell -Command '& { & '\\\\''C:\\\\Program Files\\\\Microsoft Visual Studio\\\\2022\\\\Community\\\\Common7\\\\Tools\\\\Launch-VsDevShell.ps1'\\\\'' -Arch amd64 -HostArch amd64 > $null 2>&1; Set-Location '\\\\''C:\\\\Projects\\\\CLionProjects\\\\ANSCORE\\\\cmake-build-release'\\\\''; cmake --build . --target ANSLPR 2>&1 | Select-Object -Last 6 }')", + "Bash(tasklist /M ANSLPR.dll)", + "Bash(cmd.exe /c \"tasklist /M ANSLPR.dll\")", + "Read(//c/ANSLibs/**)", + "Read(//c/ANSLibs/ffmpeg/**)", + "Bash(where ffmpeg:*)", + "Bash(grep -n \"ImagesToMP4FF\\\\|//bool ANSOPENCV::ImagesToMP4\" \"C:/Projects/CLionProjects/ANSCORE/modules/ANSCV/ANSOpenCV.cpp\")", + "Read(//c/Windows/System32/**)", + "Read(//c//**)" ] } } diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 8619c95..acd7c67 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -168,12 +168,12 @@ else() if(WIN32) target_link_libraries(ffmpeg INTERFACE avcodec.lib avdevice.lib avfilter.lib avformat.lib - avutil.lib postproc.lib swresample.lib swscale.lib + avutil.lib swresample.lib swscale.lib ) else() target_link_libraries(ffmpeg INTERFACE avcodec avdevice avfilter avformat - avutil postproc swresample swscale + avutil swresample swscale ) endif() message(STATUS "FFmpeg: using ANSLibs at ${FFMPEG_INCLUDE_DIR}") diff --git a/modules/ANSCV/ANSOpenCV.cpp b/modules/ANSCV/ANSOpenCV.cpp index c1f8f36..8540acf 100644 --- a/modules/ANSCV/ANSOpenCV.cpp +++ b/modules/ANSCV/ANSOpenCV.cpp @@ -2,6 +2,7 @@ #include "ANSMatRegistry.h" #include #include +#include #include #include #include "boost/property_tree/ptree.hpp" @@ -35,6 +36,9 @@ extern "C" { #include #include #include +#include +#include +#include } std::mutex imageMutex; // Global mutex for thread safety std::timed_mutex timeImageMutex; @@ -1907,31 +1911,104 @@ namespace ANSCENTER mp4OutputPath += ".mp4"; } - // Try codecs in order of preference + // ---- libx264 tuning for smaller files at preserved quality ---- + // OpenCV's FFmpeg wrapper (>= 4.6) reads OPENCV_FFMPEG_WRITER_OPTIONS + // at open() time and forwards the options to the encoder. Key points: + // + // video_codec;libx264 — FORCE libx264 encoder by name. On Windows, + // the default H.264 encoder registered in opencv_videoio_ffmpeg is + // often OpenH264, which silently ignores crf/preset/tune. Without + // this override, the tuning below has no effect. + // crf;26 — smaller than default 23, still visually good + // preset;slow — better compression efficiency + // tune;stillimage — optimised for mostly-static frames + // movflags;+faststart — moov atom at front (good for HTTP streaming) + // + // We set the env var only around the H.264 codec attempts and restore + // it immediately so the MP4V/MJPG fallback path is never polluted by + // video_codec;libx264 (which would be a codec-id mismatch). + constexpr const char* kWriterOptsEnv = "OPENCV_FFMPEG_WRITER_OPTIONS"; + constexpr const char* kX264WriterOpts = + "video_codec;libx264|crf;26|preset;slow|tune;stillimage|movflags;+faststart"; + + std::string prevWriterOpts; + bool hadPrevWriterOpts = false; + if (const char* prev = std::getenv(kWriterOptsEnv)) { + prevWriterOpts = prev; + hadPrevWriterOpts = true; + } + + auto setX264Opts = [&]() { + _putenv_s(kWriterOptsEnv, kX264WriterOpts); + }; + auto restoreOpts = [&]() { + if (hadPrevWriterOpts) { + _putenv_s(kWriterOptsEnv, prevWriterOpts.c_str()); + } else { + _putenv_s(kWriterOptsEnv, ""); + } + }; + + // Try codecs in order of preference. + // avc1 / H264 / x264 route to libx264 via FFmpeg when forced via the + // video_codec option above. MP4V and MJPG are last-resort fallbacks — + // they produce substantially larger files. const std::vector> codecs = { - {"x264", cv::VideoWriter::fourcc('x', '2', '6', '4')}, + {"avc1", cv::VideoWriter::fourcc('a', 'v', 'c', '1')}, {"H264", cv::VideoWriter::fourcc('H', '2', '6', '4')}, + {"x264", cv::VideoWriter::fourcc('x', '2', '6', '4')}, {"MP4V", cv::VideoWriter::fourcc('M', 'P', '4', 'V')}, {"MJPG", cv::VideoWriter::fourcc('M', 'J', 'P', 'G')} }; bool codecFound = false; + std::string usedCodec; for (const auto& [name, fourcc] : codecs) { + const bool isH264Family = + (name == "avc1" || name == "H264" || name == "x264"); + + if (isH264Family) { + setX264Opts(); + } else { + restoreOpts(); + } + videoWriter.open(mp4OutputPath, fourcc, fps, cv::Size(videoWidth, videoHeight), true); + if (videoWriter.isOpened()) { - std::cout << "Using codec: " << name << std::endl; + std::cout << "Using codec: " << name + << (isH264Family ? " (libx264 forced, crf=26, preset=slow, tune=stillimage)" : "") + << std::endl; + usedCodec = name; codecFound = true; break; } videoWriter.release(); } + // Always restore the env var after we're done — don't leak the + // libx264 override into the rest of the process. + restoreOpts(); + if (!codecFound) { std::cerr << "Error: Could not open video writer with any codec!" << std::endl; return false; } + // Warn loudly if we fell through to a non-H.264 fallback — these + // produce files many times larger than H.264 at similar quality. + if (usedCodec == "MP4V" || usedCodec == "MJPG") { + std::cerr << "Warning: H.264 (libx264) encoder unavailable, fell back to " + << usedCodec << ". Output file will be significantly larger. " + << "Check that opencv_videoio_ffmpeg is present and that the " + << "bundled FFmpeg was built with libx264 support." << std::endl; + } + + // Hint for non-FFmpeg backends (e.g. MJPG fallback on some platforms). + // Ignored by libx264 which is controlled via the env var above. + videoWriter.set(cv::VIDEOWRITER_PROP_QUALITY, 85.0); + // Pre-allocate reusable matrix cv::Mat img; cv::Mat resizedImg; @@ -1976,6 +2053,824 @@ namespace ANSCENTER return false; } } + // ================================================================ + // ImagesToMP4FF — Direct FFmpeg (libav*) encoder pipeline + // ================================================================ + // Encoder preference: libx265 (HEVC) > libx264 (H.264) > mpeg4. + // Produces substantially smaller files than ImagesToMP4 at + // equivalent quality because we drive libx265/libx264 directly, + // bypassing OpenCV's VideoWriter + opencv_videoio_ffmpeg wrapper + // (which on Windows often ends up using OpenH264 with no tunables). + // + // Threading: mutex-per-output-file, independent of ImagesToMP4's + // mutex map, so the two functions can coexist without interference. + // ================================================================ + bool ANSOPENCV::ImagesToMP4FF(const std::string& imageFolder, + const std::string& outputVideoPath, + int maxWidth, int fps) { + + // ---- Per-output-file mutex (independent of ImagesToMP4's map) ---- + static std::mutex mapMutexFF; + static std::map> fileMutexesFF; + + std::shared_ptr fileMutex; + { + std::lock_guard lock(mapMutexFF); + std::string canonicalPath = std::filesystem::canonical( + std::filesystem::path(outputVideoPath).parent_path()).string() + + "/" + std::filesystem::path(outputVideoPath).filename().string(); + + if (fileMutexesFF.find(canonicalPath) == fileMutexesFF.end()) { + fileMutexesFF[canonicalPath] = std::make_unique(); + } + fileMutex = std::shared_ptr( + fileMutexesFF[canonicalPath].get(), [](std::timed_mutex*) {}); + } + + std::unique_lock lock(*fileMutex, std::defer_lock); + if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { + std::cerr << "Error: Another thread is writing to " << outputVideoPath << std::endl; + return false; + } + + // ---- RAII bag for FFmpeg resources ------------------------------- + struct FFState { + AVFormatContext* fmt_ctx = nullptr; + AVCodecContext* codec_ctx = nullptr; + AVFrame* frame = nullptr; + AVPacket* pkt = nullptr; + SwsContext* sws = nullptr; + ~FFState() { + if (sws) { sws_freeContext(sws); sws = nullptr; } + if (frame) { av_frame_free(&frame); } + if (pkt) { av_packet_free(&pkt); } + if (codec_ctx) { avcodec_free_context(&codec_ctx); } + if (fmt_ctx) { + if (fmt_ctx->pb && !(fmt_ctx->oformat->flags & AVFMT_NOFILE)) { + avio_closep(&fmt_ctx->pb); + } + avformat_free_context(fmt_ctx); + fmt_ctx = nullptr; + } + } + } ff; + + auto ffErr = [](int err) -> std::string { + char buf[AV_ERROR_MAX_STRING_SIZE] = {0}; + av_strerror(err, buf, sizeof(buf)); + return std::string(buf); + }; + + try { + // Clamp FPS to [1, 60] + fps = max(1, min(60, fps)); + + // ---- Collect image files ---- + std::vector imageFiles; + const std::vector extensions = { "*.jpg", "*.jpeg", "*.png", "*.bmp" }; + for (const auto& ext : extensions) { + std::vector temp; + cv::glob(imageFolder + "/" + ext, temp, false); + imageFiles.insert(imageFiles.end(), + std::make_move_iterator(temp.begin()), + std::make_move_iterator(temp.end())); + } + if (imageFiles.empty()) { + std::cerr << "Error: No images found in folder: " << imageFolder << std::endl; + return false; + } + std::sort(imageFiles.begin(), imageFiles.end()); + + // Cap at 5 minutes max duration + const int maxFrames = fps * 300; + if (static_cast(imageFiles.size()) > maxFrames) { + std::cout << "Warning: Truncating from " << imageFiles.size() + << " to " << maxFrames << " images (5-minute limit at " + << fps << " FPS)" << std::endl; + imageFiles.resize(maxFrames); + } + const int numImages = static_cast(imageFiles.size()); + + // ---- First image -> dimensions ---- + cv::Mat firstImage = cv::imread(imageFiles[0], cv::IMREAD_COLOR); + if (firstImage.empty()) { + std::cerr << "Error: Could not read first image: " << imageFiles[0] << std::endl; + return false; + } + + int videoWidth = firstImage.cols; + int videoHeight = firstImage.rows; + bool needsResize = false; + if (maxWidth > 0 && firstImage.cols > maxWidth) { + double scale = static_cast(maxWidth) / firstImage.cols; + videoWidth = static_cast(std::round(firstImage.cols * scale)); + videoHeight = static_cast(std::round(firstImage.rows * scale)); + needsResize = true; + } + // Force even dims (required for YUV420P) + videoWidth = (videoWidth / 2) * 2; + videoHeight = (videoHeight / 2) * 2; + if (videoWidth < 2 || videoHeight < 2) { + std::cerr << "Error: Resulting video dimensions too small: " + << videoWidth << "x" << videoHeight << std::endl; + return false; + } + + std::cout << "[FF Thread " << std::this_thread::get_id() << "] " + << "Image: " << firstImage.cols << "x" << firstImage.rows + << " -> Video: " << videoWidth << "x" << videoHeight + << " | " << numImages << " frames @ " << fps << " FPS" + << " (~" << (numImages / fps) << "s)" << std::endl; + firstImage.release(); + + // Ensure .mp4 extension + std::string mp4OutputPath = outputVideoPath; + if (mp4OutputPath.size() < 4 || + mp4OutputPath.substr(mp4OutputPath.size() - 4) != ".mp4") { + mp4OutputPath += ".mp4"; + } + + // ---- Encoder selection ---- + struct EncChoice { + const char* name; + const char* display; + const char* crf; // empty = no crf (bitrate-targeted) + const char* preset; // empty = no preset + const char* tune; // empty = no tune + bool isHEVC; + }; + const std::vector encChoices = { + // HEVC: CRF 28 ≈ H.264 CRF 23 in perceived quality. + // libx265 has no 'stillimage' tune; default is fine. + { "libx265", "HEVC/H.265 (libx265)", "28", "slow", "", true }, + // H.264: CRF 26 with stillimage tune for slideshow content. + { "libx264", "H.264 (libx264)", "26", "slow", "stillimage", false }, + // MPEG-4 Part 2 fallback: uses bitrate, not CRF. Larger output. + { "mpeg4", "MPEG-4 Part 2 (native FFmpeg)", "", "", "", false }, + }; + + const AVCodec* codec = nullptr; + const EncChoice* chosen = nullptr; + for (const auto& e : encChoices) { + if (const AVCodec* c = avcodec_find_encoder_by_name(e.name)) { + codec = c; + chosen = &e; + break; + } + } + if (!codec) { + std::cerr << "[FFmpeg] Error: no suitable encoder available " + "(tried libx265, libx264, mpeg4). Bundled FFmpeg was built " + "without any of these encoders." << std::endl; + return false; + } + std::cout << "[FFmpeg] Using encoder: " << chosen->display; + if (chosen->crf[0]) std::cout << " crf=" << chosen->crf; + if (chosen->preset[0]) std::cout << " preset=" << chosen->preset; + if (chosen->tune[0]) std::cout << " tune=" << chosen->tune; + std::cout << std::endl; + + int ret = 0; + + // ---- 1. Allocate output format context (MP4 muxer) ---- + ret = avformat_alloc_output_context2(&ff.fmt_ctx, nullptr, "mp4", mp4OutputPath.c_str()); + if (ret < 0 || !ff.fmt_ctx) { + std::cerr << "[FFmpeg] avformat_alloc_output_context2 failed: " << ffErr(ret) << std::endl; + return false; + } + + // ---- 2. New stream (owned by fmt_ctx) ---- + AVStream* stream = avformat_new_stream(ff.fmt_ctx, nullptr); + if (!stream) { + std::cerr << "[FFmpeg] avformat_new_stream failed" << std::endl; + return false; + } + + // ---- 3. Codec context ---- + ff.codec_ctx = avcodec_alloc_context3(codec); + if (!ff.codec_ctx) { + std::cerr << "[FFmpeg] avcodec_alloc_context3 failed" << std::endl; + return false; + } + ff.codec_ctx->width = videoWidth; + ff.codec_ctx->height = videoHeight; + ff.codec_ctx->pix_fmt = AV_PIX_FMT_YUV420P; + ff.codec_ctx->time_base = AVRational{ 1, fps }; + ff.codec_ctx->framerate = AVRational{ fps, 1 }; + ff.codec_ctx->gop_size = fps * 2; // keyframe every ~2s + ff.codec_ctx->max_b_frames = 2; + + // HEVC in MP4: force 'hvc1' codec tag so QuickTime and Apple + // players accept the file. Without this, libx265 defaults to + // 'hev1' which some players refuse. + if (chosen->isHEVC) { + ff.codec_ctx->codec_tag = MKTAG('h', 'v', 'c', '1'); + } + + // MP4 muxer requires global header flag for most codecs + if (ff.fmt_ctx->oformat->flags & AVFMT_GLOBALHEADER) { + ff.codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; + } + + // MPEG-4 fallback: set a modest target bitrate (no CRF support) + if (std::strcmp(chosen->name, "mpeg4") == 0) { + ff.codec_ctx->bit_rate = 1500000; // ~1.5 Mbps + } + + // ---- 4. Encoder-private options ---- + AVDictionary* encOpts = nullptr; + if (chosen->crf[0]) av_dict_set(&encOpts, "crf", chosen->crf, 0); + if (chosen->preset[0]) av_dict_set(&encOpts, "preset", chosen->preset, 0); + if (chosen->tune[0]) av_dict_set(&encOpts, "tune", chosen->tune, 0); + + // ---- 5. Open encoder ---- + ret = avcodec_open2(ff.codec_ctx, codec, &encOpts); + if (ret < 0) { + std::cerr << "[FFmpeg] avcodec_open2 failed: " << ffErr(ret) << std::endl; + av_dict_free(&encOpts); + return false; + } + // Report any options the encoder silently ignored + if (encOpts) { + AVDictionaryEntry* e = nullptr; + while ((e = av_dict_get(encOpts, "", e, AV_DICT_IGNORE_SUFFIX))) { + std::cerr << "[FFmpeg] Warning: encoder ignored option " + << e->key << "=" << e->value << std::endl; + } + av_dict_free(&encOpts); + } + + // ---- 6. Copy codec params -> stream ---- + ret = avcodec_parameters_from_context(stream->codecpar, ff.codec_ctx); + if (ret < 0) { + std::cerr << "[FFmpeg] avcodec_parameters_from_context failed: " << ffErr(ret) << std::endl; + return false; + } + stream->time_base = ff.codec_ctx->time_base; + + // ---- 7. Open output file ---- + if (!(ff.fmt_ctx->oformat->flags & AVFMT_NOFILE)) { + ret = avio_open(&ff.fmt_ctx->pb, mp4OutputPath.c_str(), AVIO_FLAG_WRITE); + if (ret < 0) { + std::cerr << "[FFmpeg] avio_open('" << mp4OutputPath << "') failed: " << ffErr(ret) << std::endl; + return false; + } + } + + // ---- 8. Write MP4 header with +faststart ---- + { + AVDictionary* muxOpts = nullptr; + av_dict_set(&muxOpts, "movflags", "+faststart", 0); + ret = avformat_write_header(ff.fmt_ctx, &muxOpts); + av_dict_free(&muxOpts); + if (ret < 0) { + std::cerr << "[FFmpeg] avformat_write_header failed: " << ffErr(ret) << std::endl; + return false; + } + } + + // ---- 9. Allocate AVFrame (YUV420P) + AVPacket ---- + ff.frame = av_frame_alloc(); + ff.pkt = av_packet_alloc(); + if (!ff.frame || !ff.pkt) { + std::cerr << "[FFmpeg] av_frame_alloc / av_packet_alloc failed" << std::endl; + return false; + } + ff.frame->format = AV_PIX_FMT_YUV420P; + ff.frame->width = videoWidth; + ff.frame->height = videoHeight; + ret = av_frame_get_buffer(ff.frame, 0); + if (ret < 0) { + std::cerr << "[FFmpeg] av_frame_get_buffer failed: " << ffErr(ret) << std::endl; + return false; + } + + // ---- 10. BGR24 -> YUV420P converter ---- + ff.sws = sws_getContext( + videoWidth, videoHeight, AV_PIX_FMT_BGR24, + videoWidth, videoHeight, AV_PIX_FMT_YUV420P, + SWS_BILINEAR, nullptr, nullptr, nullptr); + if (!ff.sws) { + std::cerr << "[FFmpeg] sws_getContext failed" << std::endl; + return false; + } + + // ---- Helper: drain any packets the encoder has ready ---- + auto drainPackets = [&]() -> bool { + for (;;) { + int r = avcodec_receive_packet(ff.codec_ctx, ff.pkt); + if (r == AVERROR(EAGAIN) || r == AVERROR_EOF) return true; + if (r < 0) { + std::cerr << "[FFmpeg] avcodec_receive_packet failed: " << ffErr(r) << std::endl; + return false; + } + av_packet_rescale_ts(ff.pkt, ff.codec_ctx->time_base, stream->time_base); + ff.pkt->stream_index = stream->index; + r = av_interleaved_write_frame(ff.fmt_ctx, ff.pkt); + av_packet_unref(ff.pkt); + if (r < 0) { + std::cerr << "[FFmpeg] av_interleaved_write_frame failed: " << ffErr(r) << std::endl; + return false; + } + } + }; + + // ---- 11. Encoding loop ---- + cv::Mat img; + cv::Mat resizedImg; + int64_t framesEncoded = 0; + + for (int i = 0; i < numImages; ++i) { + img = cv::imread(imageFiles[i], cv::IMREAD_COLOR); + if (img.empty()) { + std::cerr << "Warning: Could not read: " << imageFiles[i] << std::endl; + continue; + } + + cv::Mat* src = &img; + if (needsResize || img.cols != videoWidth || img.rows != videoHeight) { + cv::resize(img, resizedImg, cv::Size(videoWidth, videoHeight), + 0, 0, cv::INTER_AREA); + src = &resizedImg; + } + + ret = av_frame_make_writable(ff.frame); + if (ret < 0) { + std::cerr << "[FFmpeg] av_frame_make_writable failed: " << ffErr(ret) << std::endl; + return false; + } + + const uint8_t* srcSlices[4] = { src->data, nullptr, nullptr, nullptr }; + int srcStride[4] = { static_cast(src->step[0]), 0, 0, 0 }; + sws_scale(ff.sws, srcSlices, srcStride, 0, videoHeight, + ff.frame->data, ff.frame->linesize); + + ff.frame->pts = framesEncoded; + + ret = avcodec_send_frame(ff.codec_ctx, ff.frame); + if (ret < 0) { + std::cerr << "[FFmpeg] avcodec_send_frame failed: " << ffErr(ret) << std::endl; + return false; + } + if (!drainPackets()) return false; + + framesEncoded++; + img.release(); + } + + // ---- 12. Flush encoder ---- + ret = avcodec_send_frame(ff.codec_ctx, nullptr); + if (ret < 0 && ret != AVERROR_EOF) { + std::cerr << "[FFmpeg] flush send_frame failed: " << ffErr(ret) << std::endl; + return false; + } + if (!drainPackets()) return false; + + // ---- 13. Write trailer (finalises moov; with +faststart, + // FFmpeg rewrites the file to move moov to the front) ---- + ret = av_write_trailer(ff.fmt_ctx); + if (ret < 0) { + std::cerr << "[FFmpeg] av_write_trailer failed: " << ffErr(ret) << std::endl; + return false; + } + + std::cout << "[FFmpeg] Video created: " << mp4OutputPath + << " (" << framesEncoded << " frames, " + << fps << " FPS, ~" << (framesEncoded / fps) << "s)" + << " via " << chosen->display << std::endl; + + return true; + } + catch (const cv::Exception& e) { + std::cerr << "[FFmpeg] OpenCV exception: " << e.what() << std::endl; + return false; + } + catch (const std::exception& e) { + std::cerr << "[FFmpeg] Exception: " << e.what() << std::endl; + return false; + } + } + + // ================================================================ + // ImagesToMP4HW — Hardware-accelerated FFmpeg encoder pipeline + // ================================================================ + // Preference order: NVIDIA NVENC > Intel QSV > AMD AMF, HEVC first, + // then H.264 at each vendor, then software (libx265/libx264/mpeg4). + // Each encoder is probed by attempting a real avcodec_open2(); the + // first one to succeed is used. This avoids guessing based on GPU + // presence — we just try and let FFmpeg tell us what works. + // + // Per-encoder quality targets: + // NVENC: rc=vbr, cq=28 (HEVC) / cq=24 (H.264), preset=p5, tune=hq + // QSV: global_quality=28 (HEVC) / 24 (H.264), preset=slower + // AMF: quality=quality, rc=cqp, qp ~24/26/28 (HEVC) / 22/24/26 (H.264) + // libx265: crf=28, preset=slow + // libx264: crf=26, preset=slow, tune=stillimage + // mpeg4: bit_rate=1.5 Mbps (no CRF support) + // + // Pixel format: NV12 for QSV/AMF (native), YUV420P everywhere else. + // ================================================================ + bool ANSOPENCV::ImagesToMP4HW(const std::string& imageFolder, + const std::string& outputVideoPath, + int maxWidth, int fps) { + + // ---- Per-output-file mutex (independent of the other two) ---- + static std::mutex mapMutexHW; + static std::map> fileMutexesHW; + + std::shared_ptr fileMutex; + { + std::lock_guard lock(mapMutexHW); + std::string canonicalPath = std::filesystem::canonical( + std::filesystem::path(outputVideoPath).parent_path()).string() + + "/" + std::filesystem::path(outputVideoPath).filename().string(); + + if (fileMutexesHW.find(canonicalPath) == fileMutexesHW.end()) { + fileMutexesHW[canonicalPath] = std::make_unique(); + } + fileMutex = std::shared_ptr( + fileMutexesHW[canonicalPath].get(), [](std::timed_mutex*) {}); + } + + std::unique_lock lock(*fileMutex, std::defer_lock); + if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { + std::cerr << "Error: Another thread is writing to " << outputVideoPath << std::endl; + return false; + } + + // ---- RAII bag for FFmpeg resources ---- + struct FFState { + AVFormatContext* fmt_ctx = nullptr; + AVCodecContext* codec_ctx = nullptr; + AVFrame* frame = nullptr; + AVPacket* pkt = nullptr; + SwsContext* sws = nullptr; + ~FFState() { + if (sws) { sws_freeContext(sws); sws = nullptr; } + if (frame) { av_frame_free(&frame); } + if (pkt) { av_packet_free(&pkt); } + if (codec_ctx) { avcodec_free_context(&codec_ctx); } + if (fmt_ctx) { + if (fmt_ctx->pb && !(fmt_ctx->oformat->flags & AVFMT_NOFILE)) { + avio_closep(&fmt_ctx->pb); + } + avformat_free_context(fmt_ctx); + fmt_ctx = nullptr; + } + } + } ff; + + auto ffErr = [](int err) -> std::string { + char buf[AV_ERROR_MAX_STRING_SIZE] = {0}; + av_strerror(err, buf, sizeof(buf)); + return std::string(buf); + }; + + try { + // Clamp FPS to [1, 60] + fps = max(1, min(60, fps)); + + // ---- Collect image files ---- + std::vector imageFiles; + const std::vector extensions = { "*.jpg", "*.jpeg", "*.png", "*.bmp" }; + for (const auto& ext : extensions) { + std::vector temp; + cv::glob(imageFolder + "/" + ext, temp, false); + imageFiles.insert(imageFiles.end(), + std::make_move_iterator(temp.begin()), + std::make_move_iterator(temp.end())); + } + if (imageFiles.empty()) { + std::cerr << "Error: No images found in folder: " << imageFolder << std::endl; + return false; + } + std::sort(imageFiles.begin(), imageFiles.end()); + + const int maxFrames = fps * 300; + if (static_cast(imageFiles.size()) > maxFrames) { + std::cout << "Warning: Truncating from " << imageFiles.size() + << " to " << maxFrames << " images (5-minute limit at " + << fps << " FPS)" << std::endl; + imageFiles.resize(maxFrames); + } + const int numImages = static_cast(imageFiles.size()); + + // ---- First image -> dimensions ---- + cv::Mat firstImage = cv::imread(imageFiles[0], cv::IMREAD_COLOR); + if (firstImage.empty()) { + std::cerr << "Error: Could not read first image: " << imageFiles[0] << std::endl; + return false; + } + + int videoWidth = firstImage.cols; + int videoHeight = firstImage.rows; + bool needsResize = false; + if (maxWidth > 0 && firstImage.cols > maxWidth) { + double scale = static_cast(maxWidth) / firstImage.cols; + videoWidth = static_cast(std::round(firstImage.cols * scale)); + videoHeight = static_cast(std::round(firstImage.rows * scale)); + needsResize = true; + } + videoWidth = (videoWidth / 2) * 2; + videoHeight = (videoHeight / 2) * 2; + if (videoWidth < 2 || videoHeight < 2) { + std::cerr << "Error: Resulting video dimensions too small: " + << videoWidth << "x" << videoHeight << std::endl; + return false; + } + + std::cout << "[FF-HW Thread " << std::this_thread::get_id() << "] " + << "Image: " << firstImage.cols << "x" << firstImage.rows + << " -> Video: " << videoWidth << "x" << videoHeight + << " | " << numImages << " frames @ " << fps << " FPS" + << " (~" << (numImages / fps) << "s)" << std::endl; + firstImage.release(); + + // Ensure .mp4 extension + std::string mp4OutputPath = outputVideoPath; + if (mp4OutputPath.size() < 4 || + mp4OutputPath.substr(mp4OutputPath.size() - 4) != ".mp4") { + mp4OutputPath += ".mp4"; + } + + int ret = 0; + + // ---- Allocate output format context (needed before codec-open probe) ---- + ret = avformat_alloc_output_context2(&ff.fmt_ctx, nullptr, "mp4", mp4OutputPath.c_str()); + if (ret < 0 || !ff.fmt_ctx) { + std::cerr << "[FF-HW] avformat_alloc_output_context2 failed: " << ffErr(ret) << std::endl; + return false; + } + + // ---- Encoder preference list ---- + struct KV { const char* k; const char* v; }; + struct EncChoice { + const char* name; // avcodec encoder name + const char* display; // human-readable + bool isHEVC; // affects codec_tag for MP4 + AVPixelFormat pixFmt; // NV12 for QSV/AMF, YUV420P else + int maxBFrames; // 0 for hardware encoders that don't like B-frames + std::vector opts; + }; + + const std::vector encoders = { + // ---- HEVC hardware ---- + // NOTE: using LEGACY NVENC preset names (slow/medium/fast/hq/...) not + // the newer p1..p7 naming, because the latter requires FFmpeg >= 4.4 + // + NVIDIA Video Codec SDK 10.0. Legacy names work on both old and + // new builds. The newer 'tune' option doesn't exist in older NVENC + // wrappers either, so we omit it and rely on the preset for quality. + { "hevc_nvenc", "NVIDIA HEVC (NVENC)", true, AV_PIX_FMT_YUV420P, 0, { + {"preset", "slow"}, {"rc", "vbr"}, {"cq", "28"} + }}, + { "hevc_qsv", "Intel HEVC (QSV)", true, AV_PIX_FMT_NV12, 0, { + {"global_quality", "28"}, {"preset", "slower"} + }}, + { "hevc_amf", "AMD HEVC (AMF)", true, AV_PIX_FMT_NV12, 0, { + {"quality", "quality"}, {"rc", "cqp"}, + {"qp_i", "24"}, {"qp_p", "26"}, {"qp_b", "28"} + }}, + // ---- H.264 hardware ---- + { "h264_nvenc", "NVIDIA H.264 (NVENC)", false, AV_PIX_FMT_YUV420P, 0, { + {"preset", "slow"}, {"rc", "vbr"}, {"cq", "24"} + }}, + { "h264_qsv", "Intel H.264 (QSV)", false, AV_PIX_FMT_NV12, 0, { + {"global_quality", "24"}, {"preset", "slower"} + }}, + { "h264_amf", "AMD H.264 (AMF)", false, AV_PIX_FMT_NV12, 0, { + {"quality", "quality"}, {"rc", "cqp"}, + {"qp_i", "22"}, {"qp_p", "24"}, {"qp_b", "26"} + }}, + // ---- Software fallbacks ---- + { "libx265", "HEVC/H.265 (libx265)", true, AV_PIX_FMT_YUV420P, 2, { + {"crf", "28"}, {"preset", "slow"} + }}, + { "libx264", "H.264 (libx264)", false, AV_PIX_FMT_YUV420P, 2, { + {"crf", "26"}, {"preset", "slow"}, {"tune", "stillimage"} + }}, + { "mpeg4", "MPEG-4 Part 2 (native FFmpeg)", false, AV_PIX_FMT_YUV420P, 2, {} }, + }; + + const AVCodec* codec = nullptr; + const EncChoice* chosen = nullptr; + + // ---- Probe loop: try each encoder until one opens successfully ---- + for (const auto& e : encoders) { + const AVCodec* c = avcodec_find_encoder_by_name(e.name); + if (!c) { + std::cout << "[FF-HW] skip " << e.display + << " (not compiled into FFmpeg)" << std::endl; + continue; + } + + AVCodecContext* ctx = avcodec_alloc_context3(c); + if (!ctx) continue; + + ctx->width = videoWidth; + ctx->height = videoHeight; + ctx->pix_fmt = e.pixFmt; + ctx->time_base = AVRational{ 1, fps }; + ctx->framerate = AVRational{ fps, 1 }; + ctx->gop_size = fps * 2; + ctx->max_b_frames = e.maxBFrames; + + if (e.isHEVC) { + ctx->codec_tag = MKTAG('h', 'v', 'c', '1'); + } + if (ff.fmt_ctx->oformat->flags & AVFMT_GLOBALHEADER) { + ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; + } + if (std::strcmp(e.name, "mpeg4") == 0) { + ctx->bit_rate = 1500000; // ~1.5 Mbps fallback + } + + AVDictionary* opts = nullptr; + for (const auto& kv : e.opts) { + av_dict_set(&opts, kv.k, kv.v, 0); + } + + int r = avcodec_open2(ctx, c, &opts); + av_dict_free(&opts); + + if (r < 0) { + std::cout << "[FF-HW] skip " << e.display + << " (open failed: " << ffErr(r) << ")" << std::endl; + avcodec_free_context(&ctx); + continue; + } + + // Success — commit this context to the RAII bag + ff.codec_ctx = ctx; + codec = c; + chosen = &e; + std::cout << "[FF-HW] Using encoder: " << e.display; + for (const auto& kv : e.opts) std::cout << " " << kv.k << "=" << kv.v; + std::cout << std::endl; + break; + } + + if (!ff.codec_ctx) { + std::cerr << "[FF-HW] Error: no encoder could be opened. " + "Bundled FFmpeg has neither hardware (NVENC/QSV/AMF) nor " + "software (libx265/libx264/mpeg4) encoders available." << std::endl; + return false; + } + + // ---- Create output stream, copy codec params ---- + AVStream* stream = avformat_new_stream(ff.fmt_ctx, nullptr); + if (!stream) { + std::cerr << "[FF-HW] avformat_new_stream failed" << std::endl; + return false; + } + ret = avcodec_parameters_from_context(stream->codecpar, ff.codec_ctx); + if (ret < 0) { + std::cerr << "[FF-HW] avcodec_parameters_from_context failed: " << ffErr(ret) << std::endl; + return false; + } + stream->time_base = ff.codec_ctx->time_base; + + // ---- Open output file ---- + if (!(ff.fmt_ctx->oformat->flags & AVFMT_NOFILE)) { + ret = avio_open(&ff.fmt_ctx->pb, mp4OutputPath.c_str(), AVIO_FLAG_WRITE); + if (ret < 0) { + std::cerr << "[FF-HW] avio_open('" << mp4OutputPath << "') failed: " << ffErr(ret) << std::endl; + return false; + } + } + + // ---- Write MP4 header with +faststart ---- + { + AVDictionary* muxOpts = nullptr; + av_dict_set(&muxOpts, "movflags", "+faststart", 0); + ret = avformat_write_header(ff.fmt_ctx, &muxOpts); + av_dict_free(&muxOpts); + if (ret < 0) { + std::cerr << "[FF-HW] avformat_write_header failed: " << ffErr(ret) << std::endl; + return false; + } + } + + // ---- Allocate AVFrame with chosen pix_fmt + AVPacket ---- + ff.frame = av_frame_alloc(); + ff.pkt = av_packet_alloc(); + if (!ff.frame || !ff.pkt) { + std::cerr << "[FF-HW] av_frame_alloc / av_packet_alloc failed" << std::endl; + return false; + } + ff.frame->format = chosen->pixFmt; + ff.frame->width = videoWidth; + ff.frame->height = videoHeight; + ret = av_frame_get_buffer(ff.frame, 0); + if (ret < 0) { + std::cerr << "[FF-HW] av_frame_get_buffer failed: " << ffErr(ret) << std::endl; + return false; + } + + // ---- BGR24 -> chosen pix_fmt converter ---- + ff.sws = sws_getContext( + videoWidth, videoHeight, AV_PIX_FMT_BGR24, + videoWidth, videoHeight, chosen->pixFmt, + SWS_BILINEAR, nullptr, nullptr, nullptr); + if (!ff.sws) { + std::cerr << "[FF-HW] sws_getContext failed" << std::endl; + return false; + } + + // ---- Drain helper ---- + auto drainPackets = [&]() -> bool { + for (;;) { + int r = avcodec_receive_packet(ff.codec_ctx, ff.pkt); + if (r == AVERROR(EAGAIN) || r == AVERROR_EOF) return true; + if (r < 0) { + std::cerr << "[FF-HW] avcodec_receive_packet failed: " << ffErr(r) << std::endl; + return false; + } + av_packet_rescale_ts(ff.pkt, ff.codec_ctx->time_base, stream->time_base); + ff.pkt->stream_index = stream->index; + r = av_interleaved_write_frame(ff.fmt_ctx, ff.pkt); + av_packet_unref(ff.pkt); + if (r < 0) { + std::cerr << "[FF-HW] av_interleaved_write_frame failed: " << ffErr(r) << std::endl; + return false; + } + } + }; + + // ---- Encoding loop ---- + cv::Mat img; + cv::Mat resizedImg; + int64_t framesEncoded = 0; + + for (int i = 0; i < numImages; ++i) { + img = cv::imread(imageFiles[i], cv::IMREAD_COLOR); + if (img.empty()) { + std::cerr << "Warning: Could not read: " << imageFiles[i] << std::endl; + continue; + } + + cv::Mat* src = &img; + if (needsResize || img.cols != videoWidth || img.rows != videoHeight) { + cv::resize(img, resizedImg, cv::Size(videoWidth, videoHeight), + 0, 0, cv::INTER_AREA); + src = &resizedImg; + } + + ret = av_frame_make_writable(ff.frame); + if (ret < 0) { + std::cerr << "[FF-HW] av_frame_make_writable failed: " << ffErr(ret) << std::endl; + return false; + } + + const uint8_t* srcSlices[4] = { src->data, nullptr, nullptr, nullptr }; + int srcStride[4] = { static_cast(src->step[0]), 0, 0, 0 }; + sws_scale(ff.sws, srcSlices, srcStride, 0, videoHeight, + ff.frame->data, ff.frame->linesize); + + ff.frame->pts = framesEncoded; + + ret = avcodec_send_frame(ff.codec_ctx, ff.frame); + if (ret < 0) { + std::cerr << "[FF-HW] avcodec_send_frame failed: " << ffErr(ret) << std::endl; + return false; + } + if (!drainPackets()) return false; + + framesEncoded++; + img.release(); + } + + // ---- Flush encoder ---- + ret = avcodec_send_frame(ff.codec_ctx, nullptr); + if (ret < 0 && ret != AVERROR_EOF) { + std::cerr << "[FF-HW] flush send_frame failed: " << ffErr(ret) << std::endl; + return false; + } + if (!drainPackets()) return false; + + // ---- Write trailer ---- + ret = av_write_trailer(ff.fmt_ctx); + if (ret < 0) { + std::cerr << "[FF-HW] av_write_trailer failed: " << ffErr(ret) << std::endl; + return false; + } + + std::cout << "[FF-HW] Video created: " << mp4OutputPath + << " (" << framesEncoded << " frames, " + << fps << " FPS, ~" << (framesEncoded / fps) << "s)" + << " via " << chosen->display << std::endl; + + return true; + } + catch (const cv::Exception& e) { + std::cerr << "[FF-HW] OpenCV exception: " << e.what() << std::endl; + return false; + } + catch (const std::exception& e) { + std::cerr << "[FF-HW] Exception: " << e.what() << std::endl; + return false; + } + } + //bool ANSOPENCV::ImagesToMP4(const std::string& imageFolder, const std::string& outputVideoPath, int targetDurationSec) { // std::unique_lock lock(timeImageMutex, std::defer_lock); // if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { @@ -4500,6 +5395,8 @@ extern "C" __declspec(dllexport) int ANSCV_ImagesToMP4_S( return -1; } + fps = 10; + bool success = ANSCENTER::ANSOPENCV::ImagesToMP4( imageFolder, outputVideoPath, maxWidth, fps); @@ -4522,6 +5419,114 @@ extern "C" __declspec(dllexport) int ANSCV_ImagesToMP4_S( } } +// ---------------------------------------------------------------------------- +// Direct-FFmpeg variant — routes to ANSCENTER::ANSOPENCV::ImagesToMP4FF which +// encodes with libx265 (preferred) / libx264 / mpeg4 through the libav* API. +// Same fps=10 hardcoding as ANSCV_ImagesToMP4_S for consistency. +// ---------------------------------------------------------------------------- +extern "C" __declspec(dllexport) int ANSCV_ImagesToMP4FF_S( + const char* imageFolder, + const char* outputVideoPath, + int maxWidth, int fps) { + + try { + if (!imageFolder || strlen(imageFolder) == 0) { + std::cerr << "Error: Invalid image folder path!" << std::endl; + return -1; + } + + if (!outputVideoPath || strlen(outputVideoPath) == 0) { + std::cerr << "Error: Invalid output video path!" << std::endl; + return -1; + } + + fps = 10; + + bool success = ANSCENTER::ANSOPENCV::ImagesToMP4FF( + imageFolder, outputVideoPath, maxWidth, fps); + + if (!success) { + std::cerr << "Error: Failed to create MP4 (FFmpeg) from: " + << imageFolder << std::endl; + return 0; + } + + return 1; + } + catch (const std::exception& e) { + std::cerr << "Error: Exception in ANSCV_ImagesToMP4FF_S: " + << e.what() << std::endl; + return -2; + } + catch (...) { + std::cerr << "Error: Unknown exception in ANSCV_ImagesToMP4FF_S!" << std::endl; + return -3; + } +} + +// ---------------------------------------------------------------------------- +// Prints the copyright license of the FFmpeg libraries actually linked into +// ANSCV.dll. The FFmpeg symbols are resolved here (inside the DLL) where +// libavcodec / libavformat / libavutil are linked, so callers that don't link +// FFmpeg themselves (e.g. ANSCV-UnitTest) can still get the info. +// +// LGPL v2.1+ → commercial/closed-source distribution OK (subject to LGPL +// requirements like allowing relinking with a modified FFmpeg). +// GPL v2+ → ANSCV.dll is a derivative work and must be GPL-compatible. +// ---------------------------------------------------------------------------- +extern "C" __declspec(dllexport) void ANSCV_PrintFFmpegLicense_S() { + std::cout << "[FFmpeg] avutil license: " << avutil_license() << std::endl; + std::cout << "[FFmpeg] avcodec license: " << avcodec_license() << std::endl; + std::cout << "[FFmpeg] avformat license: " << avformat_license() << std::endl; + std::cout << "[FFmpeg] swscale license: " << swscale_license() << std::endl; +} + +// ---------------------------------------------------------------------------- +// Hardware-accelerated variant — routes to ANSCENTER::ANSOPENCV::ImagesToMP4HW +// which probes NVIDIA NVENC, Intel QSV, and AMD AMF HEVC/H.264 encoders in +// order, then falls back to software (libx265/libx264/mpeg4) if none work. +// Same fps=10 hardcoding as the other two variants. +// ---------------------------------------------------------------------------- +extern "C" __declspec(dllexport) int ANSCV_ImagesToMP4HW_S( + const char* imageFolder, + const char* outputVideoPath, + int maxWidth, int fps) { + + try { + if (!imageFolder || strlen(imageFolder) == 0) { + std::cerr << "Error: Invalid image folder path!" << std::endl; + return -1; + } + + if (!outputVideoPath || strlen(outputVideoPath) == 0) { + std::cerr << "Error: Invalid output video path!" << std::endl; + return -1; + } + + fps = 10; + + bool success = ANSCENTER::ANSOPENCV::ImagesToMP4HW( + imageFolder, outputVideoPath, maxWidth, fps); + + if (!success) { + std::cerr << "Error: Failed to create MP4 (FFmpeg-HW) from: " + << imageFolder << std::endl; + return 0; + } + + return 1; + } + catch (const std::exception& e) { + std::cerr << "Error: Exception in ANSCV_ImagesToMP4HW_S: " + << e.what() << std::endl; + return -2; + } + catch (...) { + std::cerr << "Error: Unknown exception in ANSCV_ImagesToMP4HW_S!" << std::endl; + return -3; + } +} + // ============================================================================ // V2 functions: accept uint64_t handleVal by value instead of ANSOPENCV** // This eliminates the LabVIEW buffer reuse bug with double-pointer handles. diff --git a/modules/ANSCV/ANSOpenCV.h b/modules/ANSCV/ANSOpenCV.h index b689590..dd0325f 100644 --- a/modules/ANSCV/ANSOpenCV.h +++ b/modules/ANSCV/ANSOpenCV.h @@ -86,6 +86,19 @@ namespace ANSCENTER static bool resizeImage(cv::Mat& inputImage, int resizeWidth, int orginalImageSize=0); static bool cropImage(cv::Mat& inputImage, const cv::Rect& resizeROI, int originalImageSize=0); static bool ImagesToMP4(const std::string& imageFolder, const std::string& outputVideoPath, int maxWidth, int fps); + // Direct FFmpeg (libav*) encoder path. Prefers HEVC/H.265 (libx265), + // falls back to H.264 (libx264), then MPEG-4 Part 2 (mpeg4). + // Produces significantly smaller files than ImagesToMP4 for the same + // visual quality. Same parameter meaning as ImagesToMP4. + static bool ImagesToMP4FF(const std::string& imageFolder, const std::string& outputVideoPath, int maxWidth, int fps); + // Hardware-accelerated FFmpeg path. Tries, in order: + // hevc_nvenc, hevc_qsv, hevc_amf (NVIDIA/Intel/AMD HEVC) + // h264_nvenc, h264_qsv, h264_amf (NVIDIA/Intel/AMD H.264) + // libx265, libx264, mpeg4 (software fallbacks) + // First encoder that opens successfully is used. HEVC is preferred + // everywhere because it compresses ~40% smaller than H.264 at the + // same visual quality, and hardware HEVC is free on any modern GPU. + static bool ImagesToMP4HW(const std::string& imageFolder, const std::string& outputVideoPath, int maxWidth, int fps); private: void CheckLicense(); @@ -166,6 +179,17 @@ extern "C" __declspec(dllexport) int ANSCV_ImagePatternMatchs_S(cv::Mat** image extern "C" __declspec(dllexport) int ANSCV_ImagesToMP4_S(const char* imageFolder, const char* outputVideoPath, int maxWidth, int fps); +// Direct-FFmpeg variant. Same signature as ANSCV_ImagesToMP4_S but uses libav* +// encoders directly with libx265/libx264 tuning for smaller output files. +extern "C" __declspec(dllexport) int ANSCV_ImagesToMP4FF_S(const char* imageFolder, const char* outputVideoPath, int maxWidth, int fps); +// Hardware-accelerated variant. Tries NVIDIA (NVENC), Intel (QSV), and AMD +// (AMF) HEVC/H.264 encoders in order, then falls back to software encoders. +// Same signature as ANSCV_ImagesToMP4_S. +extern "C" __declspec(dllexport) int ANSCV_ImagesToMP4HW_S(const char* imageFolder, const char* outputVideoPath, int maxWidth, int fps); +// Prints the license string of the FFmpeg libraries linked into ANSCV.dll +// (LGPL vs GPL). Useful for verifying whether the bundled FFmpeg build is +// commercially safe to distribute. Prints to stdout. +extern "C" __declspec(dllexport) void ANSCV_PrintFFmpegLicense_S(); // IMAQ -> cv::Mat conversion (NI Vision Image*, auto-detects indirection level) extern "C" __declspec(dllexport) int ANSCV_IMAQ2Image(void* imaqHandle, cv::Mat** imageOut); diff --git a/modules/ANSLPR/ANSLPR_OCR.cpp b/modules/ANSLPR/ANSLPR_OCR.cpp index bc1790a..874f5f4 100644 --- a/modules/ANSLPR/ANSLPR_OCR.cpp +++ b/modules/ANSLPR/ANSLPR_OCR.cpp @@ -547,6 +547,471 @@ namespace ANSCENTER return colour; } + // ── Classical perspective rectification ───────────────────────────── + // Takes the axis-aligned LP YOLO bbox and tries to warp the plate to + // a tight rectangle whose height is fixed and whose width preserves + // the detected plate's actual aspect ratio. This removes camera + // tilt/yaw, strips background margin, and normalizes character + // spacing — which makes the recognizer see an image much closer to + // its training distribution and reduces silent character drops. + // + // Works entirely in classical OpenCV (Canny + findContours + + // approxPolyDP + getPerspectiveTransform + warpPerspective), so it + // needs no new models and no retraining. Fails gracefully (returns + // false) on plates where the border can't be isolated — caller falls + // back to the padded axis-aligned crop in that case. + std::vector + ANSALPR_OCR::OrderQuadCorners(const std::vector& pts) { + // Standard TL/TR/BR/BL ordering via x+y / y-x extrema. Robust to + // input winding order (clockwise vs counter-clockwise) and to + // approxPolyDP starting the polygon at an arbitrary corner. + std::vector ordered(4); + if (pts.size() != 4) return ordered; + + auto sum = [](const cv::Point& p) { return p.x + p.y; }; + auto diff = [](const cv::Point& p) { return p.y - p.x; }; + + int idxMinSum = 0, idxMaxSum = 0, idxMinDiff = 0, idxMaxDiff = 0; + for (int i = 1; i < 4; ++i) { + if (sum(pts[i]) < sum(pts[idxMinSum])) idxMinSum = i; + if (sum(pts[i]) > sum(pts[idxMaxSum])) idxMaxSum = i; + if (diff(pts[i]) < diff(pts[idxMinDiff])) idxMinDiff = i; + if (diff(pts[i]) > diff(pts[idxMaxDiff])) idxMaxDiff = i; + } + ordered[0] = cv::Point2f(static_cast(pts[idxMinSum].x), static_cast(pts[idxMinSum].y)); // TL + ordered[1] = cv::Point2f(static_cast(pts[idxMinDiff].x), static_cast(pts[idxMinDiff].y)); // TR + ordered[2] = cv::Point2f(static_cast(pts[idxMaxSum].x), static_cast(pts[idxMaxSum].y)); // BR + ordered[3] = cv::Point2f(static_cast(pts[idxMaxDiff].x), static_cast(pts[idxMaxDiff].y)); // BL + return ordered; + } + + bool ANSALPR_OCR::RectifyPlateROI( + const cv::Mat& source, + const cv::Rect& bbox, + cv::Mat& outRectified) const + { + if (source.empty()) return false; + cv::Rect clamped = bbox & cv::Rect(0, 0, source.cols, source.rows); + if (clamped.width <= 20 || clamped.height <= 10) return false; + + const cv::Mat roi = source(clamped); + const double roiArea = static_cast(roi.rows) * roi.cols; + const double minArea = roiArea * kRectifyAreaFraction; + + // Step 1: grayscale + blur + Canny to find plate border edges. + cv::Mat gray; + if (roi.channels() == 3) { + cv::cvtColor(roi, gray, cv::COLOR_BGR2GRAY); + } else if (roi.channels() == 4) { + cv::cvtColor(roi, gray, cv::COLOR_BGRA2GRAY); + } else { + gray = roi; + } + cv::GaussianBlur(gray, gray, cv::Size(5, 5), 0); + cv::Mat edges; + cv::Canny(gray, edges, 50, 150); + + // Close small gaps in the plate border so findContours sees it as + // one closed shape rather than several broken line segments. + cv::Mat kernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3)); + cv::morphologyEx(edges, edges, cv::MORPH_CLOSE, kernel); + + // Step 2: find external contours. + std::vector> contours; + cv::findContours(edges, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE); + if (contours.empty()) return false; + + // Step 3: find the largest contour whose approxPolyDP collapses + // to 4 vertices. That's most likely the plate border. + std::vector bestQuad; + double bestArea = 0.0; + for (const auto& c : contours) { + const double area = cv::contourArea(c); + if (area < minArea) continue; + + // Sweep epsilon — tighter approximations require more vertices, + // looser approximations collapse to fewer. We want the + // smallest epsilon at which the contour becomes a quadrilateral. + std::vector approx; + const double perimeter = cv::arcLength(c, true); + for (double eps = 0.02; eps <= 0.08; eps += 0.01) { + cv::approxPolyDP(c, approx, eps * perimeter, true); + if (approx.size() == 4) break; + } + if (approx.size() == 4 && area > bestArea) { + // Verify the quadrilateral is convex — a non-convex + // 4-point contour is almost certainly not a plate + if (cv::isContourConvex(approx)) { + bestArea = area; + bestQuad = approx; + } + } + } + + // Step 4: fallback — minAreaRect on the largest contour. This + // handles pure rotation but not arbitrary perspective skew. + if (bestQuad.empty()) { + auto largest = std::max_element(contours.begin(), contours.end(), + [](const std::vector& a, const std::vector& b) { + return cv::contourArea(a) < cv::contourArea(b); + }); + if (largest == contours.end()) return false; + if (cv::contourArea(*largest) < minArea) return false; + + cv::RotatedRect rr = cv::minAreaRect(*largest); + cv::Point2f pts[4]; + rr.points(pts); + bestQuad.reserve(4); + for (int i = 0; i < 4; ++i) { + bestQuad.emplace_back(static_cast(pts[i].x), + static_cast(pts[i].y)); + } + } + + // Step 5: order the 4 corners as TL/TR/BR/BL. + std::vector srcCorners = OrderQuadCorners(bestQuad); + + // Measure the source quadrilateral's dimensions so the output + // rectangle preserves the real plate aspect ratio. Without this, + // a wide single-row plate would be squashed to 2:1 and a 2-row + // plate would be stretched to wrong proportions. + auto pointDist = [](const cv::Point2f& a, const cv::Point2f& b) -> float { + const float dx = a.x - b.x; + const float dy = a.y - b.y; + return std::sqrt(dx * dx + dy * dy); + }; + const float topEdge = pointDist(srcCorners[0], srcCorners[1]); + const float bottomEdge = pointDist(srcCorners[3], srcCorners[2]); + const float leftEdge = pointDist(srcCorners[0], srcCorners[3]); + const float rightEdge = pointDist(srcCorners[1], srcCorners[2]); + const float srcW = std::max(topEdge, bottomEdge); + const float srcH = std::max(leftEdge, rightEdge); + if (srcW < 20.f || srcH < 10.f) return false; + + const float srcAspect = srcW / srcH; + // Gate rectification on plausible plate aspect ratios. Anything + // wildly outside the range isn't a plate; fall back to the axis- + // aligned crop rather than produce a distorted warp. + if (srcAspect < kMinPlateAspect || srcAspect > kMaxPlateAspect) { + return false; + } + + // Step 6: warp to a rectangle that preserves aspect ratio. Height + // is fixed (kRectifiedHeight) so downstream sizing is predictable. + const int outH = kRectifiedHeight; + const int outW = std::clamp(static_cast(std::round(outH * srcAspect)), + kRectifiedHeight, // min: square + kRectifiedHeight * 6); // max: 6:1 long plates + std::vector dstCorners = { + { 0.f, 0.f }, + { static_cast(outW - 1), 0.f }, + { static_cast(outW - 1), static_cast(outH - 1) }, + { 0.f, static_cast(outH - 1) } + }; + + const cv::Mat M = cv::getPerspectiveTransform(srcCorners, dstCorners); + cv::warpPerspective(roi, outRectified, M, cv::Size(outW, outH), + cv::INTER_LINEAR, cv::BORDER_REPLICATE); + return !outRectified.empty(); + } + + // ── Japan-only: kana recovery on a plate where the fast path silently + // dropped the hiragana from the bottom row ──────────────────────── + ANSALPR_OCR::CodepointClassCounts + ANSALPR_OCR::CountCodepointClasses(const std::string& text) { + CodepointClassCounts counts; + size_t pos = 0; + while (pos < text.size()) { + const size_t before = pos; + uint32_t cp = ANSOCRUtility::NextUTF8Codepoint(text, pos); + if (cp == 0 || pos == before) break; + if (ANSOCRUtility::IsCharClass(cp, CHAR_DIGIT)) counts.digit++; + if (ANSOCRUtility::IsCharClass(cp, CHAR_KANJI)) counts.kanji++; + if (ANSOCRUtility::IsCharClass(cp, CHAR_HIRAGANA)) counts.hiragana++; + if (ANSOCRUtility::IsCharClass(cp, CHAR_KATAKANA)) counts.katakana++; + } + return counts; + } + + bool ANSALPR_OCR::IsJapaneseIncomplete(const std::string& text) { + // A valid Japanese plate has at least one kanji in the region + // zone, at least one hiragana/katakana in the kana zone, and at + // least four digits split between classification (top) and + // designation (bottom). + // + // We only consider a plate "incomplete and worth recovering" + // when it ALREADY LOOKS Japanese on the fast path — i.e. the + // kanji region was found successfully. Gating on kanji > 0 + // prevents the recovery path from firing on non-Japanese plates + // (Latin-only, European, Macau, etc.) where there's no kana to + // find anyway, which previously wasted ~35 ms per plate burning + // all recovery attempts on a search that can never succeed. + // + // For non-Japanese plates the function returns false, recovery + // is skipped, and latency is identical to the pre-recovery + // baseline. + const CodepointClassCounts c = CountCodepointClasses(text); + if (c.kanji == 0) return false; // Not a Japanese plate + if (c.digit < 4) return false; // Not enough digits — probably garbage + const int kana = c.hiragana + c.katakana; + return (kana == 0); // Kanji + digits present, kana missing + } + + // Strip screws/rivets/dirt that the recognizer misreads as small + // round punctuation glyphs. The blacklist is deliberately narrow: + // only characters that are never legitimate plate content on any + // country we support. Middle dots (・ and ·) are KEPT because they + // are legitimate padding on Japanese plates with <4 designation + // digits (e.g. "・274"), and they get normalised to "0" by + // ALPRPostProcessing's zone corrections anyway. + std::string ANSALPR_OCR::StripPlateArtifacts(const std::string& text) { + if (text.empty()) return text; + std::string stripped; + stripped.reserve(text.size()); + size_t pos = 0; + while (pos < text.size()) { + const size_t before = pos; + uint32_t cp = ANSOCRUtility::NextUTF8Codepoint(text, pos); + if (cp == 0 || pos == before) break; + + bool drop = false; + switch (cp) { + // Small round glyphs that mimic screws / rivets + case 0x00B0: // ° degree sign + case 0x02DA: // ˚ ring above + case 0x2218: // ∘ ring operator + case 0x25CB: // ○ white circle + case 0x25CF: // ● black circle + case 0x25E6: // ◦ white bullet + case 0x2022: // • bullet + case 0x2219: // ∙ bullet operator + case 0x25A0: // ■ black square + case 0x25A1: // □ white square + // Quote-like glyphs picked up from plate border / dirt + case 0x0022: // " ASCII double quote + case 0x0027: // ' ASCII apostrophe + case 0x201C: // " LEFT DOUBLE QUOTATION MARK (smart quote) + case 0x201D: // " RIGHT DOUBLE QUOTATION MARK + case 0x201E: // „ DOUBLE LOW-9 QUOTATION MARK + case 0x201F: // ‟ DOUBLE HIGH-REVERSED-9 QUOTATION MARK + case 0x2018: // ' LEFT SINGLE QUOTATION MARK + case 0x2019: // ' RIGHT SINGLE QUOTATION MARK + case 0x201A: // ‚ SINGLE LOW-9 QUOTATION MARK + case 0x201B: // ‛ SINGLE HIGH-REVERSED-9 QUOTATION MARK + case 0x00AB: // « LEFT-POINTING DOUBLE ANGLE QUOTATION + case 0x00BB: // » RIGHT-POINTING DOUBLE ANGLE QUOTATION + case 0x2039: // ‹ SINGLE LEFT-POINTING ANGLE QUOTATION + case 0x203A: // › SINGLE RIGHT-POINTING ANGLE QUOTATION + case 0x301D: // 〝 REVERSED DOUBLE PRIME QUOTATION + case 0x301E: // 〞 DOUBLE PRIME QUOTATION + case 0x301F: // 〟 LOW DOUBLE PRIME QUOTATION + case 0x300A: // 《 LEFT DOUBLE ANGLE BRACKET + case 0x300B: // 》 RIGHT DOUBLE ANGLE BRACKET + case 0x3008: // 〈 LEFT ANGLE BRACKET + case 0x3009: // 〉 RIGHT ANGLE BRACKET + // Ideographic punctuation that isn't valid plate content + case 0x3002: // 。 ideographic full stop + case 0x3001: // 、 ideographic comma + case 0x300C: // 「 left corner bracket + case 0x300D: // 」 right corner bracket + case 0x300E: // 『 left white corner bracket + case 0x300F: // 』 right white corner bracket + // ASCII punctuation noise picked up from plate borders + case 0x0060: // ` grave accent + case 0x007E: // ~ tilde + case 0x005E: // ^ caret + case 0x007C: // | vertical bar + case 0x005C: // \ backslash + case 0x002F: // / forward slash + case 0x0028: // ( left paren + case 0x0029: // ) right paren + case 0x005B: // [ left bracket + case 0x005D: // ] right bracket + case 0x007B: // { left brace + case 0x007D: // } right brace + case 0x003C: // < less than + case 0x003E: // > greater than + // Misc symbols that round glyphs can collapse to + case 0x00A9: // © copyright sign + case 0x00AE: // ® registered sign + case 0x2117: // ℗ sound recording copyright + case 0x2122: // ™ trademark + drop = true; + break; + default: + break; + } + if (!drop) { + stripped.append(text, before, pos - before); + } + } + + // Collapse runs of spaces introduced by stripping, and trim. + std::string collapsed; + collapsed.reserve(stripped.size()); + bool prevSpace = false; + for (char c : stripped) { + if (c == ' ') { + if (!prevSpace) collapsed.push_back(c); + prevSpace = true; + } else { + collapsed.push_back(c); + prevSpace = false; + } + } + const size_t first = collapsed.find_first_not_of(' '); + if (first == std::string::npos) return ""; + const size_t last = collapsed.find_last_not_of(' '); + return collapsed.substr(first, last - first + 1); + } + + std::string ANSALPR_OCR::RecoverKanaFromBottomHalf( + const cv::Mat& plateROI, int halfH) const + { + if (!_ocrEngine || plateROI.empty()) return ""; + const int plateW = plateROI.cols; + const int plateH = plateROI.rows; + if (plateW < 40 || plateH < 30 || halfH <= 0 || halfH >= plateH) { + ANS_DBG("ALPR_Kana", + "Recovery SKIP: plate too small (%dx%d, halfH=%d)", + plateW, plateH, halfH); + return ""; + } + + ANS_DBG("ALPR_Kana", + "Recovery START: plate=%dx%d halfH=%d bottomHalf=%dx%d", + plateW, plateH, halfH, plateW, plateH - halfH); + + // The kana on a Japanese plate sits in the left ~30% of the + // bottom row and is roughly square. Try 3 well-chosen crop + // positions — one center, one slightly high, one wider — and + // bail out on the first that yields a hiragana/katakana hit. + // + // 3 attempts is the sweet spot: it catches the common row-split + // variation without burning linear time on every fail-case. + // Previous versions tried 7 attempts, which added ~20 ms/plate + // of pure waste when recovery couldn't find any kana anyway. + // + // Tiles shorter than 48 px are upscaled to 48 px height before + // recognition so the recognizer sees something close to its + // training distribution. PaddleOCR's rec model expects 48 px + // height and breaks down when given very small crops. + struct TileSpec { + float widthFraction; // fraction of plateW + float yOffset; // 0.0 = top of bottom half, 1.0 = bottom + }; + const TileSpec attempts[] = { + { 0.30f, 0.50f }, // primary: 30% wide, centered vertically + { 0.30f, 0.35f }, // row split landed too low — try higher + { 0.35f, 0.50f }, // slightly wider crop for off-center kana + }; + + int attemptNo = 0; + for (const TileSpec& spec : attempts) { + attemptNo++; + int tileW = static_cast(plateW * spec.widthFraction); + if (tileW < 30) tileW = 30; + if (tileW > plateW) tileW = plateW; + + // Prefer square tile, but allow non-square if the bottom + // half is short. Clipped to bottom-half height. + int tileH = tileW; + const int bottomHalfH = plateH - halfH; + if (tileH > bottomHalfH) tileH = bottomHalfH; + if (tileH < 20) continue; + + const int centerY = halfH + static_cast(bottomHalfH * spec.yOffset); + int cy = centerY - tileH / 2; + if (cy < halfH) cy = halfH; + if (cy + tileH > plateH) cy = plateH - tileH; + if (cy < 0) cy = 0; + + const int cx = 0; + int cw = tileW; + int ch = tileH; + if (cx + cw > plateW) cw = plateW - cx; + if (cy + ch > plateH) ch = plateH - cy; + if (cw <= 10 || ch <= 10) continue; + + cv::Mat kanaTile = plateROI(cv::Rect(cx, cy, cw, ch)); + + // Upscale tiles shorter than 48 px so the recognizer sees + // something close to its training input size. Preserve + // aspect ratio; cv::INTER_CUBIC keeps character strokes + // sharper than bilinear. + cv::Mat tileForRec; + if (kanaTile.rows < 48) { + const double scale = 48.0 / kanaTile.rows; + cv::resize(kanaTile, tileForRec, cv::Size(), + scale, scale, cv::INTER_CUBIC); + } else { + tileForRec = kanaTile; + } + + std::vector tileBatch{ tileForRec }; + auto tileResults = _ocrEngine->RecognizeTextBatch(tileBatch); + if (tileResults.empty()) { + ANS_DBG("ALPR_Kana", + "Attempt %d: tile=%dx%d (rec=%dx%d w=%.2f y=%.2f) " + "→ recognizer returned empty batch", + attemptNo, cw, ch, tileForRec.cols, tileForRec.rows, + spec.widthFraction, spec.yOffset); + continue; + } + + const std::string& text = tileResults[0].first; + const float conf = tileResults[0].second; + ANS_DBG("ALPR_Kana", + "Attempt %d: tile=%dx%d (rec=%dx%d w=%.2f y=%.2f) " + "→ '%s' conf=%.3f", + attemptNo, cw, ch, tileForRec.cols, tileForRec.rows, + spec.widthFraction, spec.yOffset, text.c_str(), conf); + + if (text.empty()) continue; + + // Japanese plate kana is ALWAYS exactly 1 hiragana or + // katakana character. We accept ONLY that — nothing else. + // Kanji, Latin letters, digits, punctuation, everything + // non-kana is rejected. The returned string is exactly the + // one kana codepoint or empty. + // + // Strictness is deliberate: the relaxed "any letter class" + // accept path was letting through kanji bleed from the + // region-name zone when the tile positioning was slightly + // off, producing wrong plate text like "59-V3 西 752.23" or + // "JCL 三". With strict-only accept, a miss in the recovery + // is silent and the fast-path result passes through unchanged. + std::string firstKana; // first CHAR_HIRAGANA / CHAR_KATAKANA hit + int codepointCount = 0; + size_t pos = 0; + while (pos < text.size()) { + const size_t before = pos; + uint32_t cp = ANSOCRUtility::NextUTF8Codepoint(text, pos); + if (cp == 0 || pos == before) break; + codepointCount++; + if (!firstKana.empty()) continue; + + if (ANSOCRUtility::IsCharClass(cp, CHAR_HIRAGANA) || + ANSOCRUtility::IsCharClass(cp, CHAR_KATAKANA)) { + firstKana = text.substr(before, pos - before); + } + } + + if (!firstKana.empty()) { + ANS_DBG("ALPR_Kana", + "Recovery SUCCESS at attempt %d: extracted '%s' " + "from raw '%s' (%d codepoints, conf=%.3f)", + attemptNo, firstKana.c_str(), text.c_str(), + codepointCount, conf); + return firstKana; + } + } + ANS_DBG("ALPR_Kana", + "Recovery FAILED: no kana found in %d attempts", + attemptNo); + return ""; + } + // ── Full-frame vs pipeline auto-detection ──────────────────────────── // Mirror of ANSALPR_OD::shouldUseALPRChecker. The auto-detection logic // watches whether consecutive frames from a given camera have the exact @@ -818,16 +1283,37 @@ namespace ANSCENTER } // Step 2: Collect crops from every valid plate. Wide plates - // (aspect >= 2.0) are treated as a single text line; narrow + // (aspect >= 2.1) are treated as a single text line; narrow // plates (2-row layouts like Japanese) are split horizontally // at H/2 into top and bottom rows. All crops go through a // single batched recognizer call, bypassing the OCR text-line // detector entirely — for ALPR the LP YOLO box already bounds // the text region precisely. + // + // Per-plate preprocessing pipeline: + // 1. Pad the YOLO LP bbox by 5% on each side so the plate + // border is visible to the rectifier and edge characters + // aren't clipped by a tight detector output. + // 2. Try classical perspective rectification (Canny + + // findContours + approxPolyDP + warpPerspective) to + // straighten tilted / skewed plates. Falls back to the + // padded axis-aligned crop on failure — no regression. + // 3. Run the 2-row split heuristic on whichever plate image + // we ended up with, using an aspect threshold of 2.1 so + // perfect-2:1 rectified Japanese plates still split. + // + // Rectification is gated on _country == JAPAN at runtime. + // For all other countries we skip the classical-CV pipeline + // entirely and use the plain padded axis-aligned crop — this + // keeps non-Japan inference on the original fast path and + // lets SetCountry(nonJapan) take effect on the very next + // frame without a restart. + const bool useRectification = (_country == Country::JAPAN); struct PlateInfo { size_t origIndex; // into lprOutput std::vector cropIndices; // into allCrops - cv::Mat plateROI; // full (unsplit) ROI, kept for colour + cv::Mat plateROI; // full (unsplit) ROI, kept for colour + kana recovery + int halfH = 0; // row-split Y inside plateROI (0 = single row) }; std::vector allCrops; std::vector plateInfos; @@ -842,30 +1328,58 @@ namespace ANSCENTER const int y1 = std::max(0, box.y); const int width = std::min(frameWidth - x1, box.width); const int height = std::min(frameHeight - y1, box.height); - if (width <= 0 || height <= 0) continue; - cv::Mat plateROI = frame(cv::Rect(x1, y1, width, height)); + // Pad the YOLO LP bbox by 5% on each side. Gives the + // rectifier some background for edge detection and helps + // when the detector cropped a character edge. + const int padX = std::max(2, width * 5 / 100); + const int padY = std::max(2, height * 5 / 100); + const int px = std::max(0, x1 - padX); + const int py = std::max(0, y1 - padY); + const int pw = std::min(frameWidth - px, width + 2 * padX); + const int ph = std::min(frameHeight - py, height + 2 * padY); + const cv::Rect paddedBox(px, py, pw, ph); + + // Perspective rectification is Japan-only to preserve + // baseline latency on all other countries. On non-Japan + // plates we go straight to the padded axis-aligned crop. + cv::Mat plateROI; + if (useRectification) { + cv::Mat rectified; + if (RectifyPlateROI(frame, paddedBox, rectified)) { + plateROI = rectified; // owning 3-channel BGR + } else { + plateROI = frame(paddedBox); // non-owning view + } + } else { + plateROI = frame(paddedBox); // non-owning view + } PlateInfo info; info.origIndex = i; info.plateROI = plateROI; - const float aspect = static_cast(width) / - std::max(1, height); + const int plateW = plateROI.cols; + const int plateH = plateROI.rows; + const float aspect = static_cast(plateW) / + std::max(1, plateH); - // 2-row heuristic: aspect < 2.0 → split top/bottom. - // Threshold tuned to catch Japanese square plates - // (~1.5–1.9) while leaving wide EU/VN plates (3.0+) - // untouched. - if (aspect < 2.0f && height >= 24) { - const int halfH = height / 2; + // 2-row heuristic: aspect < 2.1 → split top/bottom. + // Bumped from 2.0 so a perfectly rectified Japanese plate + // (aspect == 2.0) still splits correctly despite floating- + // point rounding. Threshold still excludes wide EU/VN + // plates (aspect 3.0+). + if (aspect < 2.1f && plateH >= 24) { + const int halfH = plateH / 2; + info.halfH = halfH; info.cropIndices.push_back(allCrops.size()); - allCrops.push_back(plateROI(cv::Rect(0, 0, width, halfH))); + allCrops.push_back(plateROI(cv::Rect(0, 0, plateW, halfH))); info.cropIndices.push_back(allCrops.size()); - allCrops.push_back(plateROI(cv::Rect(0, halfH, width, height - halfH))); + allCrops.push_back(plateROI(cv::Rect(0, halfH, plateW, plateH - halfH))); } else { + info.halfH = 0; info.cropIndices.push_back(allCrops.size()); allCrops.push_back(plateROI); } @@ -895,14 +1409,68 @@ namespace ANSCENTER cv::Size(frameWidth, frameHeight), cameraId); for (const auto& info : plateInfos) { - std::string combinedText; - for (size_t cropIdx : info.cropIndices) { - if (cropIdx >= ocrResults.size()) continue; - const std::string& lineText = ocrResults[cropIdx].first; - if (lineText.empty()) continue; - if (!combinedText.empty()) combinedText += " "; - combinedText += lineText; + // Reassemble row-by-row so we can target the bottom row + // for kana recovery when the fast path silently dropped + // the hiragana on a Japanese 2-row plate. + std::string topText, bottomText; + if (info.cropIndices.size() == 2) { + if (info.cropIndices[0] < ocrResults.size()) + topText = ocrResults[info.cropIndices[0]].first; + if (info.cropIndices[1] < ocrResults.size()) + bottomText = ocrResults[info.cropIndices[1]].first; + } else if (!info.cropIndices.empty() && + info.cropIndices[0] < ocrResults.size()) { + topText = ocrResults[info.cropIndices[0]].first; } + + // Strip screw/rivet artifacts (°, ○, etc.) picked up from + // plate fasteners before any downstream processing. Runs + // on every row regardless of country — these glyphs are + // never legitimate plate content anywhere. + topText = StripPlateArtifacts(topText); + bottomText = StripPlateArtifacts(bottomText); + + std::string combinedText = topText; + if (!bottomText.empty()) { + if (!combinedText.empty()) combinedText += " "; + combinedText += bottomText; + } + + // Japan-only kana recovery: if the fast-path output is + // missing hiragana/katakana, re-crop the kana region and + // run the recognizer on just that tile. Clean plates + // pass the IsJapaneseIncomplete check and skip this + // block entirely — zero cost. + if (_country == Country::JAPAN && info.halfH > 0 && + IsJapaneseIncomplete(combinedText)) { + ANS_DBG("ALPR_Kana", + "RunInference: firing recovery on plate '%s' " + "(plateROI=%dx%d halfH=%d)", + combinedText.c_str(), + info.plateROI.cols, info.plateROI.rows, + info.halfH); + std::string recovered = StripPlateArtifacts( + RecoverKanaFromBottomHalf(info.plateROI, info.halfH)); + if (!recovered.empty()) { + // Prepend the recovered kana to the bottom row + // text so the final combined string reads + // "region classification kana designation". + if (bottomText.empty()) { + bottomText = recovered; + } else { + bottomText = recovered + " " + bottomText; + } + combinedText = topText; + if (!bottomText.empty()) { + if (!combinedText.empty()) combinedText += " "; + combinedText += bottomText; + } + ANS_DBG("ALPR_Kana", + "RunInference: spliced result '%s'", + combinedText.c_str()); + } + } + if (combinedText.empty()) continue; Object lprObject = lprOutput[info.origIndex]; @@ -1014,16 +1582,27 @@ namespace ANSCENTER std::vector> lpBatch = _lpDetector->RunInferencesBatch(vehicleCrops, cameraId); - // ── 3. Flatten plates, splitting 2-row plates into top/bot ─ - // Same aspect-ratio heuristic as ANSALPR_OCR::RunInference - // (lines ~820-870): narrow plates (aspect < 2.0) are split - // horizontally into two recognizer crops, wide plates stay as - // one. The recMap lets us stitch the per-crop OCR outputs - // back into per-plate combined strings. + // ── 3. Flatten plates, applying preprocessing per plate ─── + // For each detected plate we: + // 1. Pad the LP bbox by 5% so the rectifier sees the + // plate border and tight detector crops don't clip + // edge characters. + // 2. If country == JAPAN, try classical perspective + // rectification — if it succeeds the plateROI is a + // tight, straightened 2D warp of the real plate; if + // it fails we fall back to the padded axis-aligned + // crop. For non-Japan countries we skip rectification + // entirely to preserve baseline latency. + // 3. Apply the same 2-row split heuristic as RunInference + // (aspect < 2.1 → split top/bottom). + // The halfH field lets the assembly loop call the kana + // recovery helper with the correct row-split boundary. + const bool useRectification = (_country == Country::JAPAN); struct PlateMeta { - size_t vehIdx; // index into vehicleCrops / clamped - Object lpObj; // LP detection in VEHICLE-local coords - cv::Mat plateROI; // full plate crop (kept for colour) + size_t vehIdx; // index into vehicleCrops / clamped + Object lpObj; // LP detection in VEHICLE-local coords + cv::Mat plateROI; // full plate crop (kept for colour + kana recovery) + int halfH = 0; // row-split Y inside plateROI (0 = single row) std::vector cropIndices; // indices into allCrops below }; std::vector allCrops; @@ -1036,23 +1615,49 @@ namespace ANSCENTER for (const auto& lp : lpBatch[v]) { cv::Rect lpBox = lp.box & vehRect; if (lpBox.width <= 0 || lpBox.height <= 0) continue; - cv::Mat plateROI = veh(lpBox); + + // Pad by 5% on each side for the rectifier. + const int padX = std::max(2, lpBox.width * 5 / 100); + const int padY = std::max(2, lpBox.height * 5 / 100); + cv::Rect paddedBox( + lpBox.x - padX, lpBox.y - padY, + lpBox.width + 2 * padX, + lpBox.height + 2 * padY); + paddedBox &= vehRect; + if (paddedBox.width <= 0 || paddedBox.height <= 0) continue; + + // Perspective rectification is Japan-only to preserve + // baseline latency on all other countries. + cv::Mat plateROI; + if (useRectification) { + cv::Mat rectified; + if (RectifyPlateROI(veh, paddedBox, rectified)) { + plateROI = rectified; // owning canonical + } else { + plateROI = veh(paddedBox); // non-owning view + } + } else { + plateROI = veh(paddedBox); // non-owning view + } PlateMeta pm; pm.vehIdx = v; pm.lpObj = lp; pm.plateROI = plateROI; + const int plateW = plateROI.cols; + const int plateH = plateROI.rows; const float aspect = - static_cast(plateROI.cols) / - std::max(1, plateROI.rows); - if (aspect < 2.0f && plateROI.rows >= 24) { - const int halfH = plateROI.rows / 2; + static_cast(plateW) / std::max(1, plateH); + if (aspect < 2.1f && plateH >= 24) { + const int halfH = plateH / 2; + pm.halfH = halfH; pm.cropIndices.push_back(allCrops.size()); - allCrops.push_back(plateROI(cv::Rect(0, 0, plateROI.cols, halfH))); + allCrops.push_back(plateROI(cv::Rect(0, 0, plateW, halfH))); pm.cropIndices.push_back(allCrops.size()); - allCrops.push_back(plateROI(cv::Rect(0, halfH, plateROI.cols, plateROI.rows - halfH))); + allCrops.push_back(plateROI(cv::Rect(0, halfH, plateW, plateH - halfH))); } else { + pm.halfH = 0; pm.cropIndices.push_back(allCrops.size()); allCrops.push_back(plateROI); } @@ -1070,14 +1675,59 @@ namespace ANSCENTER std::vector output; output.reserve(metas.size()); for (const auto& pm : metas) { - std::string combined; - for (size_t c : pm.cropIndices) { - if (c >= ocrResults.size()) continue; - const std::string& line = ocrResults[c].first; - if (line.empty()) continue; - if (!combined.empty()) combined += " "; - combined += line; + // Reassemble row-by-row so Japan kana recovery can splice + // the recovered hiragana into the bottom row specifically. + std::string topText, bottomText; + if (pm.cropIndices.size() == 2) { + if (pm.cropIndices[0] < ocrResults.size()) + topText = ocrResults[pm.cropIndices[0]].first; + if (pm.cropIndices[1] < ocrResults.size()) + bottomText = ocrResults[pm.cropIndices[1]].first; + } else if (!pm.cropIndices.empty() && + pm.cropIndices[0] < ocrResults.size()) { + topText = ocrResults[pm.cropIndices[0]].first; } + + // Strip screw/rivet artifacts (°, ○, etc.) picked up from + // plate fasteners before any downstream processing. + topText = StripPlateArtifacts(topText); + bottomText = StripPlateArtifacts(bottomText); + + std::string combined = topText; + if (!bottomText.empty()) { + if (!combined.empty()) combined += " "; + combined += bottomText; + } + + // Japan-only kana recovery fast-path fallback. Zero cost + // on clean plates (gated by country and by UTF-8 codepoint + // class count — clean plates return early). + if (_country == Country::JAPAN && pm.halfH > 0 && + IsJapaneseIncomplete(combined)) { + ANS_DBG("ALPR_Kana", + "RunInferencesBatch: firing recovery on plate " + "'%s' (plateROI=%dx%d halfH=%d)", + combined.c_str(), + pm.plateROI.cols, pm.plateROI.rows, pm.halfH); + std::string recovered = StripPlateArtifacts( + RecoverKanaFromBottomHalf(pm.plateROI, pm.halfH)); + if (!recovered.empty()) { + if (bottomText.empty()) { + bottomText = recovered; + } else { + bottomText = recovered + " " + bottomText; + } + combined = topText; + if (!bottomText.empty()) { + if (!combined.empty()) combined += " "; + combined += bottomText; + } + ANS_DBG("ALPR_Kana", + "RunInferencesBatch: spliced result '%s'", + combined.c_str()); + } + } + if (combined.empty()) continue; Object out = pm.lpObj; @@ -1183,10 +1833,28 @@ namespace ANSCENTER } void ANSALPR_OCR::SetCountry(Country country) { + const Country previous = _country; _country = country; if (_ocrEngine) { _ocrEngine->SetCountry(country); } + // Log every SetCountry call so runtime country switches are + // visible and we can confirm the update landed on the right + // handle. The recovery + rectification gates read _country on + // every frame, so this change takes effect on the very next + // RunInference / RunInferencesBatch call — no restart needed. + ANS_DBG("ALPR_SetCountry", + "country changed %d -> %d (Japan=%d, Vietnam=%d, " + "China=%d, Australia=%d, USA=%d, Indonesia=%d) — " + "rectification+recovery gates update on next frame", + static_cast(previous), + static_cast(country), + static_cast(Country::JAPAN), + static_cast(Country::VIETNAM), + static_cast(Country::CHINA), + static_cast(Country::AUSTRALIA), + static_cast(Country::USA), + static_cast(Country::INDONESIA)); } bool ANSALPR_OCR::Destroy() { diff --git a/modules/ANSLPR/ANSLPR_OCR.h b/modules/ANSLPR/ANSLPR_OCR.h index 1e9eec1..4a115f0 100644 --- a/modules/ANSLPR/ANSLPR_OCR.h +++ b/modules/ANSLPR/ANSLPR_OCR.h @@ -125,6 +125,79 @@ namespace ANSCENTER // --- OCR helper --- [[nodiscard]] std::string RunOCROnPlate(const cv::Mat& plateROI, const std::string& cameraId); + // ---------------------------------------------------------------- + // Plate preprocessing: classical perspective rectification + // + // Takes an LP YOLO bounding box and tries to find the plate's + // actual 4 corners via Canny + findContours + approxPolyDP. When + // that succeeds, the plate is warped to a rectangle whose height + // is fixed (kRectifiedHeight) and whose width preserves the + // detected plate's aspect ratio. This produces a tight, + // perspective-corrected crop that the recognizer handles more + // reliably than the tilted / skewed axis-aligned bbox. + // + // Falls back to minAreaRect on the largest contour if no 4-point + // polygon is found, and returns false outright if nothing + // plausible can be isolated. Callers must handle the false case + // by using the (padded) axis-aligned crop instead. + // ---------------------------------------------------------------- + static constexpr int kRectifiedHeight = 220; + static constexpr float kMinPlateAspect = 1.3f; + static constexpr float kMaxPlateAspect = 6.0f; + static constexpr float kRectifyAreaFraction = 0.30f; + + [[nodiscard]] bool RectifyPlateROI( + const cv::Mat& source, + const cv::Rect& bbox, + cv::Mat& outRectified) const; + + // Order an arbitrary quadrilateral as + // [top-left, top-right, bottom-right, bottom-left] (in that order) + // using the x+y / y-x extreme trick so perspective transforms land + // right-side-up regardless of input winding. + [[nodiscard]] static std::vector + OrderQuadCorners(const std::vector& pts); + + // ---------------------------------------------------------------- + // Japan-only: targeted kana recovery + // + // The PaddleOCR v5 recognizer's CTC decoder silently drops a + // character when it sits next to a large blank region in the + // input image — which is exactly the layout of the bottom row + // of a Japanese plate (single small hiragana on the left, big + // gap, then 4 digits on the right). We detect this failure + // mode by counting UTF-8 codepoint classes in the fast-path + // output, and if hiragana/katakana is missing we re-run the + // recognizer on a tight crop of the kana region only. The + // recognizer handles that tight crop correctly because the + // input matches what it was trained on (a dense text-line + // image with no large blank stretches). + // ---------------------------------------------------------------- + struct CodepointClassCounts { + int digit = 0; + int kanji = 0; + int hiragana = 0; + int katakana = 0; + }; + [[nodiscard]] static CodepointClassCounts CountCodepointClasses(const std::string& text); + [[nodiscard]] static bool IsJapaneseIncomplete(const std::string& text); + + // Strip non-text artifacts (screws, rivets, dirt, stickers) that + // the OCR recognizer occasionally picks up from plate surface + // features. These glyphs (degree sign, ring above, circles, + // ideographic punctuation, etc.) are never legitimate plate + // characters in any supported country, so we can drop them + // unconditionally. Runs of spaces resulting from stripped + // characters are collapsed and leading/trailing spaces trimmed. + [[nodiscard]] static std::string StripPlateArtifacts(const std::string& text); + + // Run recognizer-only on a tight crop of the left portion of the + // bottom half, trying three vertical offsets to absorb row-split + // inaccuracies. Returns the first non-empty result that contains + // a hiragana or katakana codepoint, or empty string on failure. + [[nodiscard]] std::string RecoverKanaFromBottomHalf( + const cv::Mat& plateROI, int halfH) const; + public: ANSALPR_OCR(); ~ANSALPR_OCR(); diff --git a/tests/ANSCV-UnitTest/ANSCV-UnitTest.cpp b/tests/ANSCV-UnitTest/ANSCV-UnitTest.cpp index d91791b..5c5a4f3 100644 --- a/tests/ANSCV-UnitTest/ANSCV-UnitTest.cpp +++ b/tests/ANSCV-UnitTest/ANSCV-UnitTest.cpp @@ -17,6 +17,7 @@ using namespace ANSCENTER; using namespace cv; using namespace std; + cv::Mat JpegStringToMat(const std::string& jpegStr) { if (jpegStr.length() > 10) { try { @@ -1366,8 +1367,8 @@ int TestGetImage() { } int GenerateVideo() { - std::string imageFolder = "E:\\Programs\\DemoAssets\\ImageSeries\\20260413_152604.321"; - std::string outputVideoPath = "E:\\Programs\\DemoAssets\\ImageSeries\\output3.mp4"; + std::string imageFolder = "E:\\Programs\\DemoAssets\\ImageSeries\\20260415_142435.655"; + std::string outputVideoPath = "E:\\Programs\\DemoAssets\\ImageSeries\\output7.mp4"; int conversionResult = ANSCV_ImagesToMP4_S(imageFolder.c_str(), outputVideoPath.c_str(), 0,20); if (!conversionResult) { std::cerr << "Failed to convert images to MP4." << std::endl; @@ -1418,6 +1419,13 @@ int OpenCVFunctionTest() { int main() { ANSCENTER::ANSOPENCV::InitCameraNetwork(); + + // Print the FFmpeg library license strings. The FFmpeg symbols are + // resolved inside ANSCV.dll (which is linked against libavcodec etc.), + // so this works without the unit test having to link FFmpeg itself. + //ANSCV_PrintFFmpegLicense_S(); + + //OpenCVFunctionTest(); GenerateVideo(); //VideoTestClient(); diff --git a/tests/ANSLPR-UnitTest/ANSLPR-UnitTest.cpp b/tests/ANSLPR-UnitTest/ANSLPR-UnitTest.cpp index fd4715f..3dce4f4 100644 --- a/tests/ANSLPR-UnitTest/ANSLPR-UnitTest.cpp +++ b/tests/ANSLPR-UnitTest/ANSLPR-UnitTest.cpp @@ -3656,7 +3656,7 @@ int ALPR_OCR_Test() { ANSCENTER::ANSALPR* infHandle = nullptr; std::string licenseKey = ""; std::string modelFilePath = "C:\\Projects\\ANSVIS\\Models\\ANS_GenericALPR_v2.0.zip"; - std::string imagePath = "C:\\Programs\\ModelTraining\\JLPD\\data\\test3.jpg"; + std::string imagePath = "C:\\Programs\\ModelTraining\\JLPD\\data\\test6.jpg"; int engineType = 2; // ANSALPR_OCR double detectionThreshold = 0.3; @@ -3830,7 +3830,7 @@ int ALPR_OCR_VideoTest() { } // Step 2: Set country (JAPAN = 5 — adjust to match the dataset if needed) - ANSALPR_SetCountry(&infHandle, 5); + ANSALPR_SetCountry(&infHandle, 1); std::cout << "Country set to JAPAN" << std::endl; // Step 3: Load engine