diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 1bef8a6..e69de29 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -1,7 +0,0 @@ -{ - "permissions": { - "allow": [ - "Bash(cmake -B cmake-build-release -S .)" - ] - } -} diff --git a/MediaClient/media/video_decoder.cpp b/MediaClient/media/video_decoder.cpp index 8d8f39b..963d311 100644 --- a/MediaClient/media/video_decoder.cpp +++ b/MediaClient/media/video_decoder.cpp @@ -332,8 +332,28 @@ void CVideoDecoder::uninit() { std::lock_guard lock(_mutex); + // [MEDIA_DecClose] heartbeat — paired with [MEDIA_DecInit] for leak diagnosis. + // Pair count over a long run reveals whether avcodec_open2 calls are + // matched by full teardowns. If close-count < init-count, the FFmpeg + // codec context (and its custom get_buffer2 arena) is leaking per reopen. + { + static std::atomic s_closeCount{0}; + const uint64_t n = s_closeCount.fetch_add(1) + 1; + ANS_DBG("MEDIA_DecClose", + "uninit ENTRY #%llu inited=%d codec=%s %dx%d hwEnabled=%d cudaHW=%d gpu=%d (this=%p)", + (unsigned long long)n, + (int)m_bInited, + (m_pCodec && m_pCodec->name) ? m_pCodec->name : "?", + m_pContext ? m_pContext->width : 0, + m_pContext ? m_pContext->height : 0, + (int)m_bHardwareDecoderEnabled, + (int)m_bCudaHWAccel, + m_hwGpuIndex, + (void*)this); + } + // Stop processing first - // Backup first + // Backup first BOOL wasRunning = m_bRunning; m_bRunning = FALSE; diff --git a/engines/TensorRTAPI/include/engine/EngineRunInference.inl b/engines/TensorRTAPI/include/engine/EngineRunInference.inl index 882895c..b0619ba 100644 --- a/engines/TensorRTAPI/include/engine/EngineRunInference.inl +++ b/engines/TensorRTAPI/include/engine/EngineRunInference.inl @@ -6,6 +6,19 @@ #include "TRTCompat.h" #include "ANSLicense.h" // ANS_DBG macro for DebugView logging +#ifdef _WIN32 +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif +# ifndef NOMINMAX +# define NOMINMAX +# endif +# include +# include +# include +# pragma comment(lib, "psapi.lib") +#endif + // Per-device mutex for CUDA graph capture. // TRT's enqueueV3 uses shared internal resources (workspace, memory pools) // at the CUDA context level. When two Engine instances on the same GPU @@ -398,6 +411,56 @@ bool Engine::runInference(const std::vector>& i const int64_t myInfNum = s_globalInfCount.fetch_add(1) + 1; s_globalActiveInf.fetch_add(1); + // ── Process-wide host-RAM heartbeat (once per ~60s) ────────────────────── + // Diagnostic for long-run leak hunts: if [PROC_MEM] privateMB climbs while + // [TRT_SM100] VRAM stays flat, the leak is on the host side (FFmpeg + // contexts, RTSP threads, GDI objects). Cheap when not firing — single + // atomic load + one compare in the hot path. +#ifdef _WIN32 + { + using clk = std::chrono::steady_clock; + static std::atomic s_hbLastNs{0}; + const int64_t nowNs = clk::now().time_since_epoch().count(); + int64_t prev = s_hbLastNs.load(std::memory_order_relaxed); + constexpr int64_t kIntervalNs = 60LL * 1'000'000'000LL; + if (nowNs - prev >= kIntervalNs && + s_hbLastNs.compare_exchange_strong(prev, nowNs, + std::memory_order_relaxed)) { + PROCESS_MEMORY_COUNTERS_EX pmc{}; + pmc.cb = sizeof(pmc); + GetProcessMemoryInfo(GetCurrentProcess(), + reinterpret_cast(&pmc), + sizeof(pmc)); + DWORD gdi = GetGuiResources(GetCurrentProcess(), GR_GDIOBJECTS); + DWORD usr = GetGuiResources(GetCurrentProcess(), GR_USEROBJECTS); + + // Thread count via Toolhelp snapshot (filter to current PID). + DWORD threads = 0; + HANDLE snap = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0); + if (snap != INVALID_HANDLE_VALUE) { + THREADENTRY32 te{ sizeof(te) }; + const DWORD pid = GetCurrentProcessId(); + if (Thread32First(snap, &te)) { + do { + if (te.th32OwnerProcessID == pid) ++threads; + } while (Thread32Next(snap, &te)); + } + CloseHandle(snap); + } + + ANS_DBG("PROC_MEM", + "privateMB=%llu workingMB=%llu peakWorkingMB=%llu " + "pagefileMB=%llu gdi=%lu user=%lu threads=%lu", + (unsigned long long)(pmc.PrivateUsage >> 20), + (unsigned long long)(pmc.WorkingSetSize >> 20), + (unsigned long long)(pmc.PeakWorkingSetSize >> 20), + (unsigned long long)(pmc.PagefileUsage >> 20), + (unsigned long)gdi, (unsigned long)usr, + (unsigned long)threads); + } + } +#endif + // Per-thread tracking { static thread_local int64_t s_infCount = 0; @@ -935,15 +998,29 @@ bool Engine::runInference(const std::vector>& i } // ============================================================================ - // Per-inference total timing breakdown (mutex wait + preprocess + GPU) + // Slow-inference alarm — ONE-SIDED FILTER, NOT A DISTRIBUTION // ============================================================================ + // This emits a DebugView line ONLY when a single inference's total wall + // time (mutex-wait + GPU execution) exceeds 100 ms. Fast calls are silent. + // + // Consequence: if you aggregate `[TRT_Slow]` lines and compute an average, + // you get the mean of the slow *tail*, NOT the real average inference + // time. Expect this avg to look dramatic (~200–400 ms) because by design + // every sample here is already slow. A typical inference on a healthy + // system fires this line for ~1–3% of calls; >10% indicates a problem. + // + // For the true per-inference distribution, look at `[TRT_SM100] #N ... + // avgMs=... maxMs=...` (running-average, emitted every 50 inferences). + // The tag was previously `[TRT_Timing]` which misled readers into + // interpreting the avg as overall pipeline latency. { double totalMs = std::chrono::duration( std::chrono::steady_clock::now() - _mutexWaitStart).count(); double gpuMs = totalMs - _mutexWaitMs; // Everything after mutex acquired - // Log every inference that takes >100ms total (including mutex wait) if (totalMs > 100.0) { - ANS_DBG("TRT_Timing", "total=%.1fms (mutex=%.1fms gpu=%.1fms) batch=%d active=%d", + ANS_DBG("TRT_Slow", + "SLOW inference total=%.1fms (mutex=%.1fms gpu=%.1fms) batch=%d active=%d " + "(this filter only fires for calls >100ms)", totalMs, _mutexWaitMs, gpuMs, batchSize, s_globalActiveInf.load()); } } diff --git a/modules/ANSCV/ANSFLV.cpp b/modules/ANSCV/ANSFLV.cpp index 821a098..1079e80 100644 --- a/modules/ANSCV/ANSFLV.cpp +++ b/modules/ANSCV/ANSFLV.cpp @@ -2,6 +2,7 @@ #include "ANSMatRegistry.h" #include "ANSGpuFrameOps.h" #include "ANSCVVendorGate.h" // anscv_vendor_gate::IsNvidiaGpuAvailable() +#include "ANSLicense.h" // ANS_DBG macro #include #include #include "media_codec.h" @@ -251,6 +252,23 @@ namespace ANSCENTER { return _pLastFrame; // Shallow copy (fast) } + // Early stale-out: if the decoder hasn't produced a frame in 5s the + // source is dead. Skip _playerClient->getImage() entirely and return + // the cached frame with unchanged _pts so LabVIEW sees STALE PTS one + // poll earlier and triggers reconnect. + if (!_pLastFrame.empty()) { + double ageMs = _playerClient->getLastFrameAgeMs(); + if (ageMs >= 5000.0) { + ANS_DBG("FLV_GetImage", + "EARLY STALE: ageMs=%.1f pts=%lld url=%s — skipping getImage()", + ageMs, (long long)_pts, _url.c_str()); + width = _imageWidth; + height = _imageHeight; + pts = _pts; + return _pLastFrame; + } + } + int imageW = 0, imageH = 0; int64_t currentPts = 0; diff --git a/modules/ANSCV/ANSMJPEG.cpp b/modules/ANSCV/ANSMJPEG.cpp index 56af91b..cee454e 100644 --- a/modules/ANSCV/ANSMJPEG.cpp +++ b/modules/ANSCV/ANSMJPEG.cpp @@ -2,6 +2,7 @@ #include "ANSMatRegistry.h" #include "ANSGpuFrameOps.h" #include "ANSCVVendorGate.h" // anscv_vendor_gate::IsNvidiaGpuAvailable() +#include "ANSLicense.h" // ANS_DBG macro #include #include #include "media_codec.h" @@ -239,6 +240,23 @@ namespace ANSCENTER { return _pLastFrame; // Shallow copy (fast) } + // Early stale-out: if the decoder hasn't produced a frame in 5s the + // source is dead. Skip _playerClient->getImage() entirely and return + // the cached frame with unchanged _pts so LabVIEW sees STALE PTS one + // poll earlier and triggers reconnect. + if (!_pLastFrame.empty()) { + double ageMs = _playerClient->getLastFrameAgeMs(); + if (ageMs >= 5000.0) { + ANS_DBG("MJPEG_GetImage", + "EARLY STALE: ageMs=%.1f pts=%lld url=%s — skipping getImage()", + ageMs, (long long)_pts, _url.c_str()); + width = _imageWidth; + height = _imageHeight; + pts = _pts; + return _pLastFrame; + } + } + int imageW = 0, imageH = 0; int64_t currentPts = 0; diff --git a/modules/ANSCV/ANSOpenCV.cpp b/modules/ANSCV/ANSOpenCV.cpp index ffe5255..f92f0b3 100644 --- a/modules/ANSCV/ANSOpenCV.cpp +++ b/modules/ANSCV/ANSOpenCV.cpp @@ -473,7 +473,8 @@ namespace ANSCENTER //} std::string ANSOPENCV::EncodeJpegString(const cv::Mat& img, int quality) { - std::lock_guard lock(_mutex); + // Lock-free: each call creates its own tjInitCompress handle and local + // buffers. No shared mutable state — safe to run concurrently. tjhandle _jpegCompressor = nullptr; unsigned char* jpegBuf = nullptr; @@ -571,7 +572,7 @@ namespace ANSCENTER return ""; } std::string ANSOPENCV::MatToBinaryData(const cv::Mat& image) { - std::lock_guard lock(_mutex); + // Lock-free: forwards to EncodeJpegString which is itself lock-free. // Check if the image is empty or has invalid data if (image.empty() || !image.data || !image.u) { return ""; @@ -591,7 +592,8 @@ namespace ANSCENTER return ""; } void ANSOPENCV::ImageResize(const cv::Mat& inputFrame, int width, int height, cv::Mat& outputFrame) { - std::lock_guard lock(_mutex); + // Lock-free: _licenseValid is std::atomic, cv::resize is reentrant, + // all Mats here are local. Safe to call concurrently across threads. if (!_licenseValid) { outputFrame = inputFrame; @@ -647,9 +649,10 @@ namespace ANSCENTER outputFrame = inputFrame; } } - void ANSOPENCV::ImageResizeWithRatio(const cv::Mat& inputFrame, int width, cv::Mat& outputFrame) + void ANSOPENCV::ImageResizeWithRatio(const cv::Mat& inputFrame, int width, cv::Mat& outputFrame) { - std::lock_guard lock(_mutex); + // Lock-free: _licenseValid is std::atomic, cv::resize is reentrant, + // all Mats here are local. Safe to call concurrently across threads. if (!_licenseValid) { outputFrame = inputFrame; // Shallow copy (fast) return; @@ -702,7 +705,7 @@ namespace ANSCENTER } } cv::Mat ANSOPENCV::BlurObjects(const cv::Mat& image, const std::vector& objects) { - std::lock_guard lock(_mutex); + // Lock-free: operates on local Mats only. Safe across threads. // Check for valid license and empty input if (!_licenseValid || image.empty()) return image; @@ -725,7 +728,7 @@ namespace ANSCENTER return outputImage; } cv::Mat ANSOPENCV::BlurBackground(const cv::Mat& image, const std::vector& objects) { - std::lock_guard lock(_mutex); + // Lock-free: operates on local Mats only. Safe across threads. // Check for valid license and empty input if (!_licenseValid || image.empty()) return image; @@ -749,7 +752,7 @@ namespace ANSCENTER return blurredImage; } cv::Mat ANSOPENCV::ToGray(const cv::Mat& image) { - std::lock_guard lock(_mutex); + // Lock-free: operates on local Mats only. Safe across threads. // Check for valid license if (!_licenseValid) return image; @@ -779,7 +782,7 @@ namespace ANSCENTER return grayMat; } cv::Mat ANSOPENCV::ImageDenoise(const cv::Mat& image) { - std::lock_guard lock(_mutex); + // Lock-free: operates on local Mats only. Safe across threads. if (!_licenseValid || image.empty()) { return image; @@ -797,7 +800,7 @@ namespace ANSCENTER return denoised_image; } cv::Mat ANSOPENCV::ImageCrop(const cv::Mat& inputImage, const cv::Rect& resizeROI, int originalImageSize) { - std::lock_guard lock(_mutex); + // Lock-free: operates on local Mats only. Safe across threads. // License validation if (!_licenseValid) { @@ -870,7 +873,7 @@ namespace ANSCENTER } } cv::Mat ANSOPENCV::ImageRepair(const cv::Mat& image) { - std::lock_guard lock(_mutex); + // Lock-free: operates on local Mats only. Safe across threads. if (!_licenseValid || image.empty()) { return image; @@ -904,7 +907,7 @@ namespace ANSCENTER } } std::string ANSOPENCV::PatternMatches(cv::Mat& image, cv::Mat& templateImage, double threshold) { - std::lock_guard lock(_mutex); + // Lock-free: operates on local Mats only. Safe across threads. std::vector detectedObjects; @@ -968,7 +971,7 @@ namespace ANSCENTER } } std::string ANSOPENCV::QRDecoder(const cv::Mat& image) { - std::lock_guard lock(_mutex); + // Lock-free: operates on local Mats only. Safe across threads. if (!_licenseValid || image.empty()) { return ""; @@ -1067,7 +1070,7 @@ namespace ANSCENTER } } std::string ANSOPENCV::QRDecoderWithBBox(const cv::Mat& image, int maxImageSize, const std::vector& bBox) { - std::lock_guard lock(_mutex); + // Lock-free: operates on local Mats only. Safe across threads. if (!_licenseValid || image.empty()) { return ""; @@ -1199,7 +1202,8 @@ namespace ANSCENTER } } std::string ANSOPENCV::MatToBase64(const cv::Mat& image) { - std::lock_guard lock(_mutex); + // Lock-free: _licenseValid is std::atomic, and CompressJpegToString + // uses a thread_local TurboJpegCompressor. Safe across threads. if (!_licenseValid || image.empty()) { return ""; @@ -1228,7 +1232,7 @@ namespace ANSCENTER } } cv::Mat ANSOPENCV::ImageDarkEnhancement(const cv::Mat& img, double brightnessScaleFactor) { - std::lock_guard lock(_mutex); + // Lock-free: operates on local Mats only. Safe across threads. if (!_licenseValid || img.empty()) { return img; // Shallow copy (fast) @@ -1259,7 +1263,7 @@ namespace ANSCENTER } } cv::Mat ANSOPENCV::ImageContrastEnhancement(const cv::Mat& src) { - std::lock_guard lock(_mutex); + // Lock-free: operates on local Mats only. Safe across threads. double clipLimit = 2.0; if (!_licenseValid || src.empty()) { return src; @@ -1312,7 +1316,7 @@ namespace ANSCENTER } cv::Mat ANSOPENCV::ImageWhiteBalance(const cv::Mat& src) { - std::lock_guard lock(_mutex); + // Lock-free: operates on local Mats only. Safe across threads. if (!_licenseValid || src.empty()) { return src; // Shallow copy (fast) @@ -1366,7 +1370,7 @@ namespace ANSCENTER } } std::vector ANSOPENCV::GetBoundingBoxes(std::string strBBoxes) { - std::lock_guard lock(_mutex); + // Lock-free: operates on local data only. Safe across threads. std::vector bBoxes; if (!_licenseValid) return bBoxes; @@ -1410,7 +1414,7 @@ namespace ANSCENTER } cv::Mat ANSOPENCV::RotateImage(const cv::Mat& image, double angle) { - std::lock_guard lock(_mutex); + // Lock-free: operates on local Mats only. Safe across threads. if (!_licenseValid || image.empty()) { return image; // Shallow copy (fast) @@ -1473,7 +1477,7 @@ namespace ANSCENTER } cv::Mat ANSOPENCV::FlipImage(const cv::Mat& image, int flipCode) { - std::lock_guard lock(_mutex); + // Lock-free: operates on local Mats only. Safe across threads. if (!_licenseValid || image.empty()) { return image; // Shallow copy (fast) @@ -1502,7 +1506,7 @@ namespace ANSCENTER } cv::Mat ANSOPENCV::ShiftImage(const cv::Mat& image, int shiftX, int shiftY) { - std::lock_guard lock(_mutex); + // Lock-free: operates on local Mats only. Safe across threads. if (!_licenseValid) return image; if (image.empty()) return image; @@ -1529,7 +1533,7 @@ namespace ANSCENTER } cv::Mat ANSOPENCV::AddGaussianNoise(const cv::Mat& image, double mean, double stddev) { - std::lock_guard lock(_mutex); + // Lock-free: operates on local Mats only. Safe across threads. if (!_licenseValid || image.empty()) { return image; @@ -1568,7 +1572,7 @@ namespace ANSCENTER } cv::Mat ANSOPENCV::AddSaltAndPepperNoise(const cv::Mat& image, double amount) { - std::lock_guard lock(_mutex); + // Lock-free: operates on local Mats only. Safe across threads. if (!_licenseValid || image.empty()) { return image; @@ -1607,7 +1611,7 @@ namespace ANSCENTER } cv::Mat ANSOPENCV::AddSpeckleNoise(const cv::Mat& image, double stddev) { - std::lock_guard lock(_mutex); + // Lock-free: operates on local Mats only. Safe across threads. if (!_licenseValid || image.empty()) { return image; // Shallow copy (fast) @@ -1755,7 +1759,7 @@ namespace ANSCENTER } double ANSOPENCV::CalculateIoU(const cv::Rect& box1, const cv::Rect& box2) { - std::lock_guard lock(_mutex); + // Lock-free: pure computation over inputs. Safe across threads. int x1 = max(box1.x, box2.x); int y1 = max(box1.y, box2.y); int x2 = min(box1.x + box1.width, box2.x + box2.width); @@ -1769,7 +1773,7 @@ namespace ANSCENTER return iou; } void ANSOPENCV::NonMaximumSuppression(std::vector& detectedObjects, double iouThreshold) { - std::lock_guard lock(_mutex); + // Lock-free: operates on caller-owned vector. Safe across threads. std::sort(detectedObjects.begin(), detectedObjects.end(), [](const DetectionObject& a, const DetectionObject& b) { return a.confidence > b.confidence; @@ -1794,7 +1798,7 @@ namespace ANSCENTER } cv::Mat ANSOPENCV::ImageResizeV2(const cv::Mat& inputImage, int resizeWidth, int originalImageSize) { - std::lock_guard lock(_mutex); + // Lock-free: operates on local Mats only. Safe across threads. if (!_licenseValid) { std::cerr << "Error: License is not valid in ImageResizeV2." << std::endl; @@ -4210,12 +4214,9 @@ extern "C" __declspec(dllexport) void ANSCV_FreeCameraResource() { } extern "C" __declspec(dllexport) int ANSCV_ResizeImage_Static(unsigned char* inputImage, unsigned int bufferLength, int width, int height, int& newWidth, int& newHeight, LStrHandle outputImage) { - //std::lock_guard lock(imageMutex); // Automatically locks and unlocks - std::unique_lock lock(timeImageMutex, std::defer_lock); - if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { - std::cerr << "Error: Mutex timeout in ANSCV_ResizeImage_Static!" << std::endl; - return -6; - } + // Lock-free: operates on caller-owned input/output buffers only. No + // registered cv::Mat is touched, so the global timeImageMutex would + // serialize calls without protecting any shared state. try { cv::Mat inputFrame = cv::imdecode(cv::Mat(1, bufferLength, CV_8UC1, inputImage), cv::IMREAD_COLOR); cv::Mat outputFrame = ANSCENTER::ANSOPENCV::resizeImageToFit(inputFrame, width, height, newWidth, newHeight); @@ -5019,38 +5020,48 @@ extern "C" __declspec(dllexport) int ANSCV_CreateImageFromFile_S(const char* ima // Image Preprocessing extern "C" __declspec(dllexport) int ANSCV_ImageAutoWhiteBalance_S(cv::Mat** imageIn) { gpu_frame_invalidate(imageIn ? *imageIn : nullptr); - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } try { + // Shallow-copy input under lock so the processor sees a stable Mat + // even if another thread writes to *imageIn concurrently. + cv::Mat localInput; + { + std::unique_lock lock(timeImageMutex, std::defer_lock); + if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { + std::cerr << "Error: Mutex timeout in ANSCV_ImageAutoWhiteBalance_S!" << std::endl; + return -6; + } + if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { + std::cerr << "Error: Invalid or empty input image in ANSCV_ImageAutoWhiteBalance_S!" << std::endl; + return -2; + } + localInput = **imageIn; // ref-counted shallow copy + } + ANSCENTER::ANSOPENCV ansCVInstance; if (!ansCVInstance.Init("")) { std::cerr << "Error: Failed to initialize ANSCV instance!" << std::endl; return -5; } - cv::Mat imOut = ansCVInstance.ImageWhiteBalance(**imageIn); - // Thread-safe assignment + cv::Mat imOut = ansCVInstance.ImageWhiteBalance(localInput); + if (imOut.empty()) { + std::cerr << "Error: White balance processing failed in ANSCV_ImageAutoWhiteBalance_S!" << std::endl; + return 0; + } + + // Swap back under lock. { std::unique_lock lock(timeImageMutex, std::defer_lock); if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { - std::cerr << "Error: Mutex timeout in ANSCV_ReSizeImage_S!" << std::endl; + std::cerr << "Error: Mutex timeout in ANSCV_ImageAutoWhiteBalance_S!" << std::endl; return -6; } - //std::lock_guard lock(imageMutex); - if (imOut.empty()) { - std::cerr << "Error: White balance processing failed in ANSCV_ImageAutoWhiteBalance_S!" << std::endl; - return 0; - } - else { - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } - **imageIn = std::move(imOut); - return 1; + if (!imageIn || !(*imageIn)) { + std::cerr << "Error: Image became invalid in ANSCV_ImageAutoWhiteBalance_S!" << std::endl; + return -2; } + **imageIn = std::move(imOut); + return 1; } } catch (const std::exception& e) { @@ -5066,32 +5077,40 @@ extern "C" __declspec(dllexport) int ANSCV_ImageAutoWhiteBalance_S(cv::Mat** ima extern "C" __declspec(dllexport) int ANSCV_ImageBrightEnhance_S(cv::Mat** imageIn, double brightnessScaleFactor) { gpu_frame_invalidate(imageIn ? *imageIn : nullptr); try { - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } - ANSCENTER::ANSOPENCV ansCVInstance; - ansCVInstance.Init(""); // Initialize ANSCV instance - cv::Mat imOut = ansCVInstance.ImageDarkEnhancement(**imageIn, brightnessScaleFactor); + cv::Mat localInput; { std::unique_lock lock(timeImageMutex, std::defer_lock); if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { - std::cerr << "Error: Mutex timeout in ANSCV_ReSizeImage_S!" << std::endl; + std::cerr << "Error: Mutex timeout in ANSCV_ImageBrightEnhance_S!" << std::endl; return -6; } - //std::lock_guard lock(imageMutex); // Lock only during shared resource write - if (imOut.empty()) { - std::cerr << "Error: Brightness enhancement failed in ANSCV_ImageBrightEnhance_S!" << std::endl; - return 0; + if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { + std::cerr << "Error: Invalid or empty input image in ANSCV_ImageBrightEnhance_S!" << std::endl; + return -2; } - else { - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } - **imageIn = std::move(imOut); - return 1; + localInput = **imageIn; + } + + ANSCENTER::ANSOPENCV ansCVInstance; + ansCVInstance.Init(""); + cv::Mat imOut = ansCVInstance.ImageDarkEnhancement(localInput, brightnessScaleFactor); + if (imOut.empty()) { + std::cerr << "Error: Brightness enhancement failed in ANSCV_ImageBrightEnhance_S!" << std::endl; + return 0; + } + + { + std::unique_lock lock(timeImageMutex, std::defer_lock); + if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { + std::cerr << "Error: Mutex timeout in ANSCV_ImageBrightEnhance_S!" << std::endl; + return -6; } + if (!imageIn || !(*imageIn)) { + std::cerr << "Error: Image became invalid in ANSCV_ImageBrightEnhance_S!" << std::endl; + return -2; + } + **imageIn = std::move(imOut); + return 1; } } catch (const std::exception& e) { @@ -5107,37 +5126,41 @@ extern "C" __declspec(dllexport) int ANSCV_ImageBrightEnhance_S(cv::Mat** imageI extern "C" __declspec(dllexport) int ANSCV_ImageContrastEnhance_S(cv::Mat** imageIn) { gpu_frame_invalidate(imageIn ? *imageIn : nullptr); - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } try { - ANSCENTER::ANSOPENCV ansCVInstance; - ansCVInstance.Init(""); // Initialize ANSCV instance - - // Perform white balance correction - cv::Mat imOut = ansCVInstance.ImageContrastEnhancement(**imageIn); - + cv::Mat localInput; { - // Assign processed image back to input pointer - //std::lock_guard lock(imageMutex); // Lock only during shared resource write std::unique_lock lock(timeImageMutex, std::defer_lock); if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { - std::cerr << "Error: Mutex timeout in ANSCV_ReSizeImage_S!" << std::endl; + std::cerr << "Error: Mutex timeout in ANSCV_ImageContrastEnhance_S!" << std::endl; return -6; } - if (imOut.empty()) { - std::cerr << "Error: White balance processing failed in ANSCV_ImageContrastEnhance_S!" << std::endl; - return 0; + if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { + std::cerr << "Error: Invalid or empty input image in ANSCV_ImageContrastEnhance_S!" << std::endl; + return -2; } - else { - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } - **imageIn = std::move(imOut); - return 1; // Success + localInput = **imageIn; + } + + ANSCENTER::ANSOPENCV ansCVInstance; + ansCVInstance.Init(""); + cv::Mat imOut = ansCVInstance.ImageContrastEnhancement(localInput); + if (imOut.empty()) { + std::cerr << "Error: Contrast enhancement failed in ANSCV_ImageContrastEnhance_S!" << std::endl; + return 0; + } + + { + std::unique_lock lock(timeImageMutex, std::defer_lock); + if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { + std::cerr << "Error: Mutex timeout in ANSCV_ImageContrastEnhance_S!" << std::endl; + return -6; } + if (!imageIn || !(*imageIn)) { + std::cerr << "Error: Image became invalid in ANSCV_ImageContrastEnhance_S!" << std::endl; + return -2; + } + **imageIn = std::move(imOut); + return 1; } } catch (const std::exception& e) { @@ -5153,34 +5176,40 @@ extern "C" __declspec(dllexport) int ANSCV_ImageContrastEnhance_S(cv::Mat** ima extern "C" __declspec(dllexport) int ANSCV_ImageDenoise_S(cv::Mat** imageIn) { gpu_frame_invalidate(imageIn ? *imageIn : nullptr); try { - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } - ANSCENTER::ANSOPENCV ansCVInstance; - ansCVInstance.Init(""); // Initialize ANSCV instance - - // Perform denoising - cv::Mat imOut = ansCVInstance.ImageDenoise(**imageIn); + cv::Mat localInput; { - //std::lock_guard lock(imageMutex); // Lock only during shared resource modification std::unique_lock lock(timeImageMutex, std::defer_lock); if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { - std::cerr << "Error: Mutex timeout in ANSCV_ReSizeImage_S!" << std::endl; + std::cerr << "Error: Mutex timeout in ANSCV_ImageDenoise_S!" << std::endl; return -6; } - if (imOut.empty()) { - std::cerr << "Error: Denoising processing failed in ANSCV_ImageDenoise_S!" << std::endl; - return 0; + if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { + std::cerr << "Error: Invalid or empty input image in ANSCV_ImageDenoise_S!" << std::endl; + return -2; } - else { - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } - **imageIn = std::move(imOut); - return 1; // Success + localInput = **imageIn; + } + + ANSCENTER::ANSOPENCV ansCVInstance; + ansCVInstance.Init(""); + cv::Mat imOut = ansCVInstance.ImageDenoise(localInput); + if (imOut.empty()) { + std::cerr << "Error: Denoising failed in ANSCV_ImageDenoise_S!" << std::endl; + return 0; + } + + { + std::unique_lock lock(timeImageMutex, std::defer_lock); + if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { + std::cerr << "Error: Mutex timeout in ANSCV_ImageDenoise_S!" << std::endl; + return -6; } + if (!imageIn || !(*imageIn)) { + std::cerr << "Error: Image became invalid in ANSCV_ImageDenoise_S!" << std::endl; + return -2; + } + **imageIn = std::move(imOut); + return 1; } } catch (const std::exception& e) { @@ -5195,34 +5224,40 @@ extern "C" __declspec(dllexport) int ANSCV_ImageDenoise_S(cv::Mat** imageIn) { extern "C" __declspec(dllexport) int ANSCV_ImageRepair_S(cv::Mat** imageIn) { gpu_frame_invalidate(imageIn ? *imageIn : nullptr); try { - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } - ANSCENTER::ANSOPENCV ansCVInstance; - ansCVInstance.Init(""); // Initialize ANSCV instance - - // Perform image repair - cv::Mat imOut = ansCVInstance.ImageRepair(**imageIn); + cv::Mat localInput; { - //std::lock_guard lock(imageMutex); // Lock only during shared resource modification std::unique_lock lock(timeImageMutex, std::defer_lock); if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { - std::cerr << "Error: Mutex timeout in ANSCV_ReSizeImage_S!" << std::endl; + std::cerr << "Error: Mutex timeout in ANSCV_ImageRepair_S!" << std::endl; return -6; } - if (imOut.empty()) { - std::cerr << "Error: Image repair processing failed in ANSCV_ImageRepair_S!" << std::endl; - return 0; + if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { + std::cerr << "Error: Invalid or empty input image in ANSCV_ImageRepair_S!" << std::endl; + return -2; } - else { - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } - **imageIn = std::move(imOut); - return 1; // Success + localInput = **imageIn; + } + + ANSCENTER::ANSOPENCV ansCVInstance; + ansCVInstance.Init(""); + cv::Mat imOut = ansCVInstance.ImageRepair(localInput); + if (imOut.empty()) { + std::cerr << "Error: Image repair failed in ANSCV_ImageRepair_S!" << std::endl; + return 0; + } + + { + std::unique_lock lock(timeImageMutex, std::defer_lock); + if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { + std::cerr << "Error: Mutex timeout in ANSCV_ImageRepair_S!" << std::endl; + return -6; } + if (!imageIn || !(*imageIn)) { + std::cerr << "Error: Image became invalid in ANSCV_ImageRepair_S!" << std::endl; + return -2; + } + **imageIn = std::move(imOut); + return 1; } } catch (const std::exception& e) { @@ -5237,34 +5272,40 @@ extern "C" __declspec(dllexport) int ANSCV_ImageRepair_S(cv::Mat** imageIn) { extern "C" __declspec(dllexport) int ANSCV_ImageToGray_S(cv::Mat** imageIn) { gpu_frame_invalidate(imageIn ? *imageIn : nullptr); try { - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } - ANSCENTER::ANSOPENCV ansCVInstance; - ansCVInstance.Init(""); // Initialize ANSCV instance - // Perform white balance correction - cv::Mat imOut = ansCVInstance.ToGray(**imageIn); + cv::Mat localInput; { - std::unique_lock lock(timeImageMutex, std::defer_lock); if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { - std::cerr << "Error: Mutex timeout in ANSCV_ReSizeImage_S!" << std::endl; + std::cerr << "Error: Mutex timeout in ANSCV_ImageToGray_S!" << std::endl; return -6; } - //std::lock_guard lock(imageMutex); // Lock only during shared resource modification - if (imOut.empty()) { - std::cerr << "Error: White balance processing failed in ANSCV_ImageToGray_S!" << std::endl; - return 0; + if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { + std::cerr << "Error: Invalid or empty input image in ANSCV_ImageToGray_S!" << std::endl; + return -2; } - else { - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } - **imageIn = std::move(imOut); - return 1; + localInput = **imageIn; + } + + ANSCENTER::ANSOPENCV ansCVInstance; + ansCVInstance.Init(""); + cv::Mat imOut = ansCVInstance.ToGray(localInput); + if (imOut.empty()) { + std::cerr << "Error: Gray conversion failed in ANSCV_ImageToGray_S!" << std::endl; + return 0; + } + + { + std::unique_lock lock(timeImageMutex, std::defer_lock); + if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { + std::cerr << "Error: Mutex timeout in ANSCV_ImageToGray_S!" << std::endl; + return -6; } + if (!imageIn || !(*imageIn)) { + std::cerr << "Error: Image became invalid in ANSCV_ImageToGray_S!" << std::endl; + return -2; + } + **imageIn = std::move(imOut); + return 1; } } catch (const std::exception& e) { @@ -5279,35 +5320,40 @@ extern "C" __declspec(dllexport) int ANSCV_ImageToGray_S(cv::Mat** imageIn) { extern "C" __declspec(dllexport) int ANSCV_ImageRotate_S(cv::Mat** imageIn, double angle) { gpu_frame_invalidate(imageIn ? *imageIn : nullptr); try { - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } - ANSCENTER::ANSOPENCV ansCVInstance; - ansCVInstance.Init(""); // Initialize ANSCV instance - - // Perform white balance correction - cv::Mat imOut = ansCVInstance.RotateImage(**imageIn, angle); - // Assign processed image back to input pointer + cv::Mat localInput; { std::unique_lock lock(timeImageMutex, std::defer_lock); if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { - std::cerr << "Error: Mutex timeout in ANSCV_ReSizeImage_S!" << std::endl; + std::cerr << "Error: Mutex timeout in ANSCV_ImageRotate_S!" << std::endl; return -6; } - //std::lock_guard lock(imageMutex); // Ensure thread safety - if (imOut.empty()) { - std::cerr << "Error: White balance processing failed in ANSCV_ImageRotate_S!" << std::endl; - return 0; + if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { + std::cerr << "Error: Invalid or empty input image in ANSCV_ImageRotate_S!" << std::endl; + return -2; } - else { - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } - **imageIn = std::move(imOut); - return 1; + localInput = **imageIn; + } + + ANSCENTER::ANSOPENCV ansCVInstance; + ansCVInstance.Init(""); + cv::Mat imOut = ansCVInstance.RotateImage(localInput, angle); + if (imOut.empty()) { + std::cerr << "Error: Rotation failed in ANSCV_ImageRotate_S!" << std::endl; + return 0; + } + + { + std::unique_lock lock(timeImageMutex, std::defer_lock); + if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { + std::cerr << "Error: Mutex timeout in ANSCV_ImageRotate_S!" << std::endl; + return -6; } + if (!imageIn || !(*imageIn)) { + std::cerr << "Error: Image became invalid in ANSCV_ImageRotate_S!" << std::endl; + return -2; + } + **imageIn = std::move(imOut); + return 1; } } catch (const std::exception& e) { @@ -5323,35 +5369,40 @@ extern "C" __declspec(dllexport) int ANSCV_ImageRotate_S(cv::Mat** imageIn, dou extern "C" __declspec(dllexport) int ANSCV_ImageFlip_S(cv::Mat** imageIn, int flipCode) { gpu_frame_invalidate(imageIn ? *imageIn : nullptr); try { - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } - ANSCENTER::ANSOPENCV ansCVInstance; - ansCVInstance.Init(""); // Initialize ANSCV instance - - // Perform white balance correction - cv::Mat imOut = ansCVInstance.FlipImage(**imageIn, flipCode); - // Assign processed image back to input pointer + cv::Mat localInput; { std::unique_lock lock(timeImageMutex, std::defer_lock); if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { - std::cerr << "Error: Mutex timeout in ANSCV_ReSizeImage_S!" << std::endl; + std::cerr << "Error: Mutex timeout in ANSCV_ImageFlip_S!" << std::endl; return -6; } - //std::lock_guard lock(imageMutex); // Ensure thread safety - if (imOut.empty()) { - std::cerr << "Error: White balance processing failed in ANSCV_ImageFlip_S!" << std::endl; - return 0; + if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { + std::cerr << "Error: Invalid or empty input image in ANSCV_ImageFlip_S!" << std::endl; + return -2; } - else { - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } - **imageIn = std::move(imOut); - return 1; + localInput = **imageIn; + } + + ANSCENTER::ANSOPENCV ansCVInstance; + ansCVInstance.Init(""); + cv::Mat imOut = ansCVInstance.FlipImage(localInput, flipCode); + if (imOut.empty()) { + std::cerr << "Error: Flip failed in ANSCV_ImageFlip_S!" << std::endl; + return 0; + } + + { + std::unique_lock lock(timeImageMutex, std::defer_lock); + if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { + std::cerr << "Error: Mutex timeout in ANSCV_ImageFlip_S!" << std::endl; + return -6; } + if (!imageIn || !(*imageIn)) { + std::cerr << "Error: Image became invalid in ANSCV_ImageFlip_S!" << std::endl; + return -2; + } + **imageIn = std::move(imOut); + return 1; } } catch (const std::exception& e) { @@ -5368,36 +5419,41 @@ extern "C" __declspec(dllexport) int ANSCV_ImageFlip_S(cv::Mat** imageIn, int f extern "C" __declspec(dllexport) int ANSCV_ImageBlurObjects_S(cv::Mat** imageIn, const char* strBboxes) { gpu_frame_invalidate(imageIn ? *imageIn : nullptr); try { - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } - ANSCENTER::ANSOPENCV ansCVInstance; - ansCVInstance.Init(""); // Initialize ANSCV instance - std::vector objects = ansCVInstance.GetBoundingBoxes(strBboxes); - // Perform white balance correction - cv::Mat imOut = ansCVInstance.BlurObjects(**imageIn, objects); - - // Assign processed image back to input pointer + cv::Mat localInput; { std::unique_lock lock(timeImageMutex, std::defer_lock); if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { - std::cerr << "Error: Mutex timeout in ANSCV_ReSizeImage_S!" << std::endl; + std::cerr << "Error: Mutex timeout in ANSCV_ImageBlurObjects_S!" << std::endl; return -6; } - //std::lock_guard lock(imageMutex); // Ensure thread safety - if (imOut.empty()) { - std::cerr << "Error: White balance processing failed in ANSCV_ImageBlurObjects_S!" << std::endl; - return 0; + if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { + std::cerr << "Error: Invalid or empty input image in ANSCV_ImageBlurObjects_S!" << std::endl; + return -2; } - else { - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } - **imageIn = std::move(imOut); - return 1; + localInput = **imageIn; + } + + ANSCENTER::ANSOPENCV ansCVInstance; + ansCVInstance.Init(""); + std::vector objects = ansCVInstance.GetBoundingBoxes(strBboxes); + cv::Mat imOut = ansCVInstance.BlurObjects(localInput, objects); + if (imOut.empty()) { + std::cerr << "Error: BlurObjects failed in ANSCV_ImageBlurObjects_S!" << std::endl; + return 0; + } + + { + std::unique_lock lock(timeImageMutex, std::defer_lock); + if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { + std::cerr << "Error: Mutex timeout in ANSCV_ImageBlurObjects_S!" << std::endl; + return -6; } + if (!imageIn || !(*imageIn)) { + std::cerr << "Error: Image became invalid in ANSCV_ImageBlurObjects_S!" << std::endl; + return -2; + } + **imageIn = std::move(imOut); + return 1; } } catch (const std::exception& e) { @@ -5413,35 +5469,41 @@ extern "C" __declspec(dllexport) int ANSCV_ImageBlurObjects_S(cv::Mat** imageIn extern "C" __declspec(dllexport) int ANSCV_ImageBlurBackground_S(cv::Mat** imageIn, const char* strBboxes) { gpu_frame_invalidate(imageIn ? *imageIn : nullptr); try { - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } - ANSCENTER::ANSOPENCV ansCVInstance; - ansCVInstance.Init(""); // Initialize ANSCV instance - std::vector objects = ansCVInstance.GetBoundingBoxes(strBboxes); - // Perform white balance correction - cv::Mat imOut = ansCVInstance.BlurBackground(**imageIn, objects); - // Assign processed image back to input pointer + cv::Mat localInput; { std::unique_lock lock(timeImageMutex, std::defer_lock); if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { - std::cerr << "Error: Mutex timeout in ANSCV_ReSizeImage_S!" << std::endl; + std::cerr << "Error: Mutex timeout in ANSCV_ImageBlurBackground_S!" << std::endl; return -6; } - //std::lock_guard lock(imageMutex); // Ensure thread safety - if (imOut.empty()) { - std::cerr << "Error: White balance processing failed in ANSCV_ImageBlurBackground_S!" << std::endl; - return 0; + if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { + std::cerr << "Error: Invalid or empty input image in ANSCV_ImageBlurBackground_S!" << std::endl; + return -2; } - else { - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } - **imageIn = std::move(imOut); - return 1; + localInput = **imageIn; + } + + ANSCENTER::ANSOPENCV ansCVInstance; + ansCVInstance.Init(""); + std::vector objects = ansCVInstance.GetBoundingBoxes(strBboxes); + cv::Mat imOut = ansCVInstance.BlurBackground(localInput, objects); + if (imOut.empty()) { + std::cerr << "Error: BlurBackground failed in ANSCV_ImageBlurBackground_S!" << std::endl; + return 0; + } + + { + std::unique_lock lock(timeImageMutex, std::defer_lock); + if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { + std::cerr << "Error: Mutex timeout in ANSCV_ImageBlurBackground_S!" << std::endl; + return -6; } + if (!imageIn || !(*imageIn)) { + std::cerr << "Error: Image became invalid in ANSCV_ImageBlurBackground_S!" << std::endl; + return -2; + } + **imageIn = std::move(imOut); + return 1; } } catch (const std::exception& e) { @@ -5456,44 +5518,37 @@ extern "C" __declspec(dllexport) int ANSCV_ImageBlurBackground_S(cv::Mat** imag extern "C" __declspec(dllexport) int ANSCV_ImageQRDecoder_S(cv::Mat** imageIn, int maxImageWidth, const char* strBboxes, LStrHandle detectedQRText) { try { - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } - ANSCENTER::ANSOPENCV ansCVInstance; - ansCVInstance.Init(""); // Initialize ANSCV instance - std::vector Bboxes = ansCVInstance.GetBoundingBoxes(strBboxes); - // Decode the QR code - std::string qrText = ansCVInstance.QRDecoderWithBBox(**imageIn, maxImageWidth, Bboxes); + cv::Mat localInput; { - // Assign QR decoded text to detectedQRText handle std::unique_lock lock(timeImageMutex, std::defer_lock); if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { - std::cerr << "Error: Mutex timeout in ANSCV_ReSizeImage_S!" << std::endl; + std::cerr << "Error: Mutex timeout in ANSCV_ImageQRDecoder_S!" << std::endl; return -6; } - //std::lock_guard lock(imageMutex); // Ensure thread safety when modifying the handle - if (qrText.empty()) { - std::cerr << "Error: QR decoding failed in ANSCV_ImageQRDecoder_S!" << std::endl; - return 0; - } - int size = qrText.length(); - if (size > 0) { - MgErr error; - error = DSSetHandleSize(detectedQRText, sizeof(int32) + size * sizeof(uChar)); - if (error == noErr) { - (*detectedQRText)->cnt = size; - memcpy((*detectedQRText)->str, qrText.c_str(), size); - return 1; // Success - } - else { - return 0; // Error setting handle size - } - } - else { - return 0; // No QR code found + if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { + std::cerr << "Error: Invalid or empty input image in ANSCV_ImageQRDecoder_S!" << std::endl; + return -2; } + localInput = **imageIn; } + + ANSCENTER::ANSOPENCV ansCVInstance; + ansCVInstance.Init(""); + std::vector Bboxes = ansCVInstance.GetBoundingBoxes(strBboxes); + std::string qrText = ansCVInstance.QRDecoderWithBBox(localInput, maxImageWidth, Bboxes); + if (qrText.empty()) { + std::cerr << "Error: QR decoding failed in ANSCV_ImageQRDecoder_S!" << std::endl; + return 0; + } + + // detectedQRText is a caller-owned LabVIEW handle; no global lock needed. + const int size = static_cast(qrText.length()); + if (size <= 0) return 0; + MgErr error = DSSetHandleSize(detectedQRText, sizeof(int32) + size * sizeof(uChar)); + if (error != noErr) return 0; + (*detectedQRText)->cnt = size; + memcpy((*detectedQRText)->str, qrText.c_str(), size); + return 1; } catch (const std::exception& e) { std::cerr << "Error: Exception occurred in ANSCV_ImageQRDecoder_S: " << e.what() << std::endl; @@ -5507,47 +5562,41 @@ extern "C" __declspec(dllexport) int ANSCV_ImageQRDecoder_S(cv::Mat** imageIn, i extern "C" __declspec(dllexport) int ANSCV_ImagePatternMatchs_S(cv::Mat** imageIn, const char* templateFilePath, double threshold, LStrHandle detectedMatchedLocations) { try { - if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { - std::cerr << "Error: Invalid or empty input image in ANSCV_CloneImage_S!" << std::endl; - return -2; - } - ANSCENTER::ANSOPENCV ansCVInstance; - ansCVInstance.Init(""); // Initialize ANSCV instance - - // Load template image - cv::Mat templateImage = cv::imread(templateFilePath, cv::IMREAD_COLOR); - if (templateImage.empty()) { - std::cerr << "Error: Failed to load template image from " << templateFilePath << std::endl; - return -2; // Return error if template cannot be loaded - } - - // Perform pattern matching - std::string strMatchedLocations = ansCVInstance.PatternMatches(**imageIn, templateImage, threshold); + cv::Mat localInput; { std::unique_lock lock(timeImageMutex, std::defer_lock); if (!lock.try_lock_for(std::chrono::milliseconds(MUTEX_TIMEOUT_MS))) { - std::cerr << "Error: Mutex timeout in ANSCV_ReSizeImage_S!" << std::endl; + std::cerr << "Error: Mutex timeout in ANSCV_ImagePatternMatchs_S!" << std::endl; return -6; } - //std::lock_guard lock(imageMutex); // Ensure thread safety when modifying detectedMatchedLocations - int size = strMatchedLocations.length(); - if (size > 0) { - MgErr error; - error = DSSetHandleSize(detectedMatchedLocations, sizeof(int32) + size * sizeof(uChar)); - if (error == noErr) { - (*detectedMatchedLocations)->cnt = size; - memcpy((*detectedMatchedLocations)->str, strMatchedLocations.c_str(), size); - return 1; // Success - } - else { - std::cerr << "Error: Failed to set handle size for detectedMatchedLocations!" << std::endl; - return 0; // Error setting handle size - } - } - else { - return 0; // No matches found + if (!imageIn || !(*imageIn) || (*imageIn)->empty() || !(*imageIn)->data) { + std::cerr << "Error: Invalid or empty input image in ANSCV_ImagePatternMatchs_S!" << std::endl; + return -2; } + localInput = **imageIn; } + + cv::Mat templateImage = cv::imread(templateFilePath, cv::IMREAD_COLOR); + if (templateImage.empty()) { + std::cerr << "Error: Failed to load template image from " << templateFilePath << std::endl; + return -2; + } + + ANSCENTER::ANSOPENCV ansCVInstance; + ansCVInstance.Init(""); + std::string strMatchedLocations = ansCVInstance.PatternMatches(localInput, templateImage, threshold); + + // detectedMatchedLocations is a caller-owned LabVIEW handle; no global lock needed. + const int size = static_cast(strMatchedLocations.length()); + if (size <= 0) return 0; + MgErr error = DSSetHandleSize(detectedMatchedLocations, sizeof(int32) + size * sizeof(uChar)); + if (error != noErr) { + std::cerr << "Error: Failed to set handle size for detectedMatchedLocations!" << std::endl; + return 0; + } + (*detectedMatchedLocations)->cnt = size; + memcpy((*detectedMatchedLocations)->str, strMatchedLocations.c_str(), size); + return 1; } catch (const std::exception& e) { std::cerr << "Error: Exception occurred in ANSCV_ImagePatternMatchs_S: " << e.what() << std::endl; diff --git a/modules/ANSCV/ANSOpenCV.h b/modules/ANSCV/ANSOpenCV.h index ab41f51..a646b0d 100644 --- a/modules/ANSCV/ANSOpenCV.h +++ b/modules/ANSCV/ANSOpenCV.h @@ -155,7 +155,9 @@ namespace ANSCENTER std::recursive_mutex _mutex; //std::once_flag licenseOnceFlag; // For one-time license check - bool _licenseValid = false; + // Atomic so lock-free methods (ImageResize, ImageResizeWithRatio, + // MatToBinaryData, EncodeJpegString) can read it without _mutex. + std::atomic _licenseValid{ false }; public: }; } diff --git a/modules/ANSCV/ANSRTMP.cpp b/modules/ANSCV/ANSRTMP.cpp index dfe996e..b88f655 100644 --- a/modules/ANSCV/ANSRTMP.cpp +++ b/modules/ANSCV/ANSRTMP.cpp @@ -2,6 +2,7 @@ #include "ANSMatRegistry.h" #include "ANSGpuFrameOps.h" #include "ANSCVVendorGate.h" // anscv_vendor_gate::IsNvidiaGpuAvailable() +#include "ANSLicense.h" // ANS_DBG macro #include #include "media_codec.h" #include @@ -245,6 +246,23 @@ namespace ANSCENTER { return _pLastFrame; // Shallow copy (fast) } + // Early stale-out: if the decoder hasn't produced a frame in 5s the + // source is dead. Skip _playerClient->getImage() entirely and return + // the cached frame with unchanged _pts so LabVIEW sees STALE PTS one + // poll earlier and triggers reconnect. + if (!_pLastFrame.empty()) { + double ageMs = _playerClient->getLastFrameAgeMs(); + if (ageMs >= 5000.0) { + ANS_DBG("RTMP_GetImage", + "EARLY STALE: ageMs=%.1f pts=%lld url=%s — skipping getImage()", + ageMs, (long long)_pts, _url.c_str()); + width = _imageWidth; + height = _imageHeight; + pts = _pts; + return _pLastFrame; + } + } + int imageW = 0, imageH = 0; int64_t currentPts = 0; diff --git a/modules/ANSCV/ANSSRT.cpp b/modules/ANSCV/ANSSRT.cpp index b25d809..01621d3 100644 --- a/modules/ANSCV/ANSSRT.cpp +++ b/modules/ANSCV/ANSSRT.cpp @@ -2,6 +2,7 @@ #include "ANSMatRegistry.h" #include "ANSGpuFrameOps.h" #include "ANSCVVendorGate.h" // anscv_vendor_gate::IsNvidiaGpuAvailable() +#include "ANSLicense.h" // ANS_DBG macro #include #include "media_codec.h" #include @@ -253,6 +254,23 @@ namespace ANSCENTER { return _pLastFrame; // Shallow copy (fast) } + // Early stale-out: if the decoder hasn't produced a frame in 5s the + // source is dead. Skip _playerClient->getImage() entirely and return + // the cached frame with unchanged _pts so LabVIEW sees STALE PTS one + // poll earlier and triggers reconnect. + if (!_pLastFrame.empty()) { + double ageMs = _playerClient->getLastFrameAgeMs(); + if (ageMs >= 5000.0) { + ANS_DBG("SRT_GetImage", + "EARLY STALE: ageMs=%.1f pts=%lld url=%s — skipping getImage()", + ageMs, (long long)_pts, _url.c_str()); + width = _imageWidth; + height = _imageHeight; + pts = _pts; + return _pLastFrame; + } + } + int imageW = 0, imageH = 0; int64_t currentPts = 0; diff --git a/modules/ANSFR/ANSFaceRecognizer.cpp b/modules/ANSFR/ANSFaceRecognizer.cpp index b80ef53..8c9e21e 100644 --- a/modules/ANSFR/ANSFaceRecognizer.cpp +++ b/modules/ANSFR/ANSFaceRecognizer.cpp @@ -91,9 +91,14 @@ namespace ANSCENTER { } if (!m_trtEngine) { - // Enable batch support - m_options.optBatchSize = 8; - m_options.maxBatchSize = 32; + // Enable batch support. maxBatchSize controls the TRT workspace + // allocation (~linear in batch); opt is the kernel-selection sweet + // spot. Max=4 was picked to fit 4 concurrent face crops per frame + // comfortably on 8 GB GPUs while freeing ~1.5 GB VRAM vs max=32 + // — most scenes have ≤4 faces visible, so throughput cost is + // near-zero (amortized per-face latency drops too at lower batch). + m_options.optBatchSize = 4; + m_options.maxBatchSize = 4; m_options.maxInputHeight = GPU_FACE_HEIGHT; m_options.minInputHeight = GPU_FACE_HEIGHT; diff --git a/modules/ANSLPR/ANSLPR_OD.cpp b/modules/ANSLPR/ANSLPR_OD.cpp index fa7b994..452de04 100644 --- a/modules/ANSLPR/ANSLPR_OD.cpp +++ b/modules/ANSLPR/ANSLPR_OD.cpp @@ -534,8 +534,12 @@ namespace ANSCENTER { _ocrModelConfig.inpHeight = 640; _ocrModelConfig.inpWidth = 640; - _ocrModelConfig.gpuOptBatchSize = 8; - _ocrModelConfig.gpuMaxBatchSize = 32; // desired max; engine builder auto-caps by GPU VRAM + // Max=4 chosen to fit typical plate counts per frame on 8 GB GPUs. + // Was opt=8/max=32 which sized TRT workspace for 32 concurrent plates + // (~1 GB for this model alone). Cap of 4 is still >= the usual 1–3 + // plates visible per camera frame, amortized throughput unchanged. + _ocrModelConfig.gpuOptBatchSize = 4; + _ocrModelConfig.gpuMaxBatchSize = 4; // desired max; engine builder auto-caps by GPU VRAM _ocrModelConfig.maxInputHeight = 640; _ocrModelConfig.maxInputWidth = 640; _ocrModelConfig.minInputHeight = 640; @@ -545,8 +549,9 @@ namespace ANSCENTER { _lpColourModelConfig.inpHeight = 224; _lpColourModelConfig.inpWidth = 224; - _lpColourModelConfig.gpuOptBatchSize = 8; - _lpColourModelConfig.gpuMaxBatchSize = 32; // desired max; engine builder auto-caps by GPU VRAM + // See _ocrModelConfig above — matching batch cap for consistency. + _lpColourModelConfig.gpuOptBatchSize = 4; + _lpColourModelConfig.gpuMaxBatchSize = 4; // desired max; engine builder auto-caps by GPU VRAM _lpColourModelConfig.maxInputHeight = 224; _lpColourModelConfig.maxInputWidth = 224; _lpColourModelConfig.minInputHeight = 224; diff --git a/modules/ANSOCR/ANSRTOCR/RTOCRRecognizer.cpp b/modules/ANSOCR/ANSRTOCR/RTOCRRecognizer.cpp index 758963d..06f8b4c 100644 --- a/modules/ANSOCR/ANSRTOCR/RTOCRRecognizer.cpp +++ b/modules/ANSOCR/ANSRTOCR/RTOCRRecognizer.cpp @@ -28,8 +28,11 @@ bool RTOCRRecognizer::Initialize(const std::string& onnxPath, const std::string& ANSCENTER::Options options; options.deviceIndex = gpuId; options.precision = ANSCENTER::Precision::FP16; - options.maxBatchSize = 1; - options.optBatchSize = 1; + // maxBatch=4 matches FaceRecognizer / ALPR configuration — allows the + // recognizer to process up to 4 detected text lines in one call, + // amortizing per-invocation overhead while keeping TRT workspace small. + options.maxBatchSize = 4; + options.optBatchSize = 4; // Fixed height, dynamic width for recognition options.minInputHeight = imgH_; diff --git a/modules/ANSOCR/dllmain.cpp b/modules/ANSOCR/dllmain.cpp index 6dbc522..6628f94 100644 --- a/modules/ANSOCR/dllmain.cpp +++ b/modules/ANSOCR/dllmain.cpp @@ -185,11 +185,22 @@ extern "C" ANSOCR_API int CreateANSOCRHandleEx(ANSCENTER::ANSOCRBase** Handle, ANSCENTER::ANSLibsLoader::Initialize(); ANSCENTER::EngineType engineType = ANSCENTER::ANSLicenseHelper::CheckHardwareInformation(); { + // Describe the backend the engine-selector below will actually choose + // for this (hardware, engineMode) combination. Previous versions of + // this log claimed "TensorRT OCR enabled" based on hardware alone, + // which was misleading because engineMode=0 (auto) unconditionally + // picked ONNX — users saw the log and assumed TRT was running. + const bool isNvidia = (engineType == ANSCENTER::EngineType::NVIDIA_GPU); + const bool willUseTRT = + isNvidia && (engineMode == 0 /* auto → TRT on NVIDIA */ || + engineMode == 1 /* GPU → TRT on NVIDIA */); const char* vendorTag = - engineType == ANSCENTER::EngineType::NVIDIA_GPU ? "NVIDIA_GPU (TensorRT OCR enabled)" : - engineType == ANSCENTER::EngineType::AMD_GPU ? "AMD_GPU (ONNX Runtime / DirectML, TensorRT OCR DISABLED)" : - engineType == ANSCENTER::EngineType::OPENVINO_GPU ? "OPENVINO_GPU (ONNX Runtime / OpenVINO, TensorRT OCR DISABLED)" : - "CPU (ONNX Runtime, TensorRT OCR DISABLED)"; + engineType == ANSCENTER::EngineType::NVIDIA_GPU + ? (willUseTRT ? "NVIDIA_GPU (TensorRT OCR active)" + : "NVIDIA_GPU (TensorRT available, but engineMode forces ONNX)") + : engineType == ANSCENTER::EngineType::AMD_GPU ? "AMD_GPU (ONNX Runtime / DirectML, TensorRT OCR unavailable)" + : engineType == ANSCENTER::EngineType::OPENVINO_GPU ? "OPENVINO_GPU (ONNX Runtime / OpenVINO, TensorRT OCR unavailable)" + : "CPU (ONNX Runtime, TensorRT OCR unavailable)"; char buf[192]; snprintf(buf, sizeof(buf), "[ANSOCR] CreateANSOCRHandleEx: detected engineType=%d [%s], engineMode=%d\n", @@ -230,10 +241,23 @@ extern "C" ANSOCR_API int CreateANSOCRHandleEx(ANSCENTER::ANSOCRBase** Handle, // select, including DirectML for AMD). const bool isNvidia = (engineType == ANSCENTER::EngineType::NVIDIA_GPU); switch (engineMode) { - case 0:// Auto-detect, always use ONNX for better compatibility, especially on AMD GPUs and high-res images - (*Handle) = new ANSCENTER::ANSONNXOCR(); + case 0: // Auto-detect — prefer TensorRT on NVIDIA, ONNX elsewhere. + // Previous policy was "always ONNX" for cross-platform safety, + // but on NVIDIA that defeated the point: each ANSONNXOCR handle + // allocates its own cls/dec/rec OrtSessions (no dedupe), which + // wasted ~300–600 MB VRAM per extra instance and ran ~2× slower + // than ANSRTOCR's shared-engine path via EnginePoolManager. + if (isNvidia) { + limitSideLen = 960; + (*Handle) = new ANSCENTER::ANSRTOCR(); + } else { + // AMD / Intel / CPU — ANSRTOCR hard-requires CUDA and would + // crash. ANSONNXOCR auto-picks the correct ORT EP + // (DirectML on AMD, OpenVINO on Intel, CPU otherwise). + (*Handle) = new ANSCENTER::ANSONNXOCR(); + } break; - case 1:// GPU — use TensorRT engine ONLY on NVIDIA hardware. + case 1: // GPU — use TensorRT engine ONLY on NVIDIA hardware. if (isNvidia) { limitSideLen = 960; (*Handle) = new ANSCENTER::ANSRTOCR(); @@ -244,7 +268,7 @@ extern "C" ANSOCR_API int CreateANSOCRHandleEx(ANSCENTER::ANSOCRBase** Handle, (*Handle) = new ANSCENTER::ANSONNXOCR(); } break; - case 2:// CPU + case 2: // CPU (*Handle) = new ANSCENTER::ANSONNXOCR(); break; default: diff --git a/modules/ANSODEngine/dllmain.cpp b/modules/ANSODEngine/dllmain.cpp index c62c34f..88fcd2d 100644 --- a/modules/ANSODEngine/dllmain.cpp +++ b/modules/ANSODEngine/dllmain.cpp @@ -426,27 +426,37 @@ extern "C" ANSODENGINE_API std::string CreateANSODHandle(ANSCENTER::ANSODBase** ANSCENTER::EngineType engineType = ANSCENTER::ANSLicenseHelper::CheckHardwareInformation(); if (autoDetectEngine==-1)engineType=ANSCENTER::EngineType::CPU;// We force to use CPU - //Force modelType to ANSONNXYOLO and ANSRTYOLO if detectionType is detection and modelType is TENSORRT or ONNX - - if ((modelType == 4) || // TensorRT - (modelType == 14)|| // TensorRT Yolov10 - (modelType == 22)|| // TensorRT Pose - (modelType == 24)) // TensorRT Segmentation - { - if (engineType == ANSCENTER::EngineType::NVIDIA_GPU) modelType = 31; // RTYOLO - else modelType=30;// ONNXYOLO - } - else if ((modelType == 3) || // YoloV8/YoloV11 (Object Detection) - (modelType == 17)|| // YOLO V12 - (modelType == 20) || // ONNX Classification - (modelType == 21) || // ONNX Pose - (modelType == 23) || // ONNX Segmentation - (modelType == 25)) // OBB Segmentation - { - modelType = 30; // ONNXYOLO - } - else { - // do nothing, use the modelType specified by user + // Route detection / pose / segmentation / OBB / classification to the best + // available backend: prefer TensorRT on NVIDIA, otherwise the matching ONNX + // handler. Unlisted modelType values are left untouched for the switch below. + // See CreateANSODHandleEx for the full rationale — three correctness bugs + // were fixed in that dispatcher and must be kept in sync across copies. + const bool onNvidia = (engineType == ANSCENTER::EngineType::NVIDIA_GPU); + switch (modelType) { + // ── Detection family: YOLOv8 / V11 / V12 / generic TRT / V10-RTOD ── + case 3: // YOLOV8 / YOLOV11 + case 4: // generic TensorRT + case 14: // YOLOv10RTOD (TRT end-to-end NMS) + case 17: // YOLOV12 + modelType = onNvidia ? 31 /* RTYOLO */ : 30 /* ONNXYOLO */; + break; + // ── Pose ───────────────────────────────────────────────────────────── + case 21: // ONNXPOSE + case 22: // RTPOSE + modelType = onNvidia ? 22 /* RTPOSE */ : 21 /* ONNXPOSE */; + break; + // ── Segmentation ───────────────────────────────────────────────────── + case 23: // ONNXSEG + case 24: // RTSEG + modelType = onNvidia ? 24 /* RTSEG */ : 23 /* ONNXSEG */; + break; + // ── OBB / Classification (ONNX-only today — leave as-is) ───────────── + case 20: // ONNXCL + case 25: // ONNXOBB + break; + default: + // Any other modelType is handled directly by the switch below. + break; } switch (detectionType) { @@ -764,27 +774,53 @@ extern "C" ANSODENGINE_API int CreateANSODHandleEx(ANSCENTER::ANSODBase** Handl ANSCENTER::EngineType engineType = ANSCENTER::ANSLicenseHelper::CheckHardwareInformation(); if (autoDetectEngine==-1)engineType=ANSCENTER::EngineType::CPU;// We force to use CPU - //Force modelType to ANSONNXYOLO and ANSRTYOLO if detectionType is detection and modelType is TENSORRT or ONNX - - if ((modelType == 4) || // TensorRT - (modelType == 14)|| // TensorRT Yolov10 - (modelType == 22)|| // TensorRT Pose - (modelType == 24)) // TensorRT Segmentation - { - if (engineType == ANSCENTER::EngineType::NVIDIA_GPU) modelType = 31; // RTYOLO - else modelType=30;// ONNXYOLO - } - else if ((modelType == 3) || // YoloV8/YoloV11 (Object Detection) - (modelType == 17)|| // YOLO V12 - (modelType == 20) || // ONNX Classification - (modelType == 21) || // ONNX Pose - (modelType == 23) || // ONNX Segmentation - (modelType == 25)) // OBB Segmentation - { - modelType = 30; // ONNXYOLO - } - else { - // do nothing, use the modelType specified by user + // Route detection / pose / segmentation / OBB / classification to the best + // available backend: prefer TensorRT on NVIDIA, otherwise the matching ONNX + // handler. Unlisted modelType values are left untouched for the switch below. + // + // Previous revisions of this block had two correctness bugs: + // (1) modelType == 3 / 17 (YoloV8/V11/V12 detection) was hard-wired to + // ONNXYOLO even on NVIDIA — bypassing the TensorRT path entirely and + // duplicating VRAM when multiple handles loaded the same .onnx (ORT + // has no EnginePoolManager dedupe). + // (2) modelType == 20 / 21 / 23 / 25 (ONNX CLS / POSE / SEG / OBB) was + // rewritten to 30 (ONNXYOLO = detection), making the dedicated + // case 20 / 21 / 23 / 25 handlers unreachable dead code. A user + // passing modelType=20 for classification ended up with a YOLO head. + // (3) modelType == 22 / 24 (TRT pose / TRT seg) on a non-NVIDIA box fell + // back to ONNXYOLO instead of the correct ONNXPOSE / ONNXSEG handler. + const bool onNvidia = (engineType == ANSCENTER::EngineType::NVIDIA_GPU); + switch (modelType) { + // ── Detection family: YOLOv8 / V11 / V12 / generic TRT / V10-RTOD ── + case 3: // YOLOV8 / YOLOV11 + case 4: // generic TensorRT + case 14: // YOLOv10RTOD (TRT end-to-end NMS) + case 17: // YOLOV12 + modelType = onNvidia ? 31 /* RTYOLO */ : 30 /* ONNXYOLO */; + break; + // ── Pose ───────────────────────────────────────────────────────────── + case 21: // ONNXPOSE + case 22: // RTPOSE + modelType = onNvidia ? 22 /* RTPOSE */ : 21 /* ONNXPOSE */; + break; + // ── Segmentation ───────────────────────────────────────────────────── + case 23: // ONNXSEG + case 24: // RTSEG + modelType = onNvidia ? 24 /* RTSEG */ : 23 /* ONNXSEG */; + break; + // ── Oriented Bounding Box (ONNX-only today) ────────────────────────── + case 25: // ONNXOBB — no TRT variant; leave as-is + break; + // ── Classification (ONNX-only in this dispatcher) ──────────────────── + case 20: // ONNXCL — no TRT variant; leave as-is + break; + default: + // Any other modelType is handled directly by the switch below + // (TENSORFLOW, YOLOV4, YOLOV5, FACEDETECT, FACERECOGNIZE, ALPR, + // OCR, ANOMALIB, POSE, SAM, ODHUBMODEL, CUSTOMDETECTOR, CUSTOMPY, + // MOTIONDETECTOR, MOVIENET, ONNXSAM3, RTSAM3, ONNXYOLO=30, + // RTYOLO=31). Do nothing — keep user's value. + break; } // returnModelType will be set after the switch to reflect the actual // model class that was instantiated (e.g. RTYOLO→ONNXYOLO on AMD). @@ -1151,26 +1187,39 @@ extern "C" __declspec(dllexport) int LoadModelFromFolder(ANSCENTER::ANSODBase** if (autoDetectEngine==-1)engineType=ANSCENTER::EngineType::CPU;// We force to use CPU - //Force modelType to ANSONNXYOLO and ANSRTYOLO if detectionType is detection and modelType is TENSORRT or ONNX - if ((modelType == 4) || // TensorRT - (modelType == 14) || // TensorRT Yolov10 - (modelType == 22) || // TensorRT Pose - (modelType == 24)) // TensorRT Segmentation + // Route detection / pose / segmentation / OBB / classification to the best + // available backend: prefer TensorRT on NVIDIA, otherwise the matching ONNX + // handler. Unlisted modelType values are left untouched for the switch below. + // See CreateANSODHandleEx for the full rationale — three correctness bugs + // were fixed in that dispatcher and must be kept in sync across copies. { - if (engineType == ANSCENTER::EngineType::NVIDIA_GPU)modelType = 31; // RTYOLO - else modelType = 30;// ONNXYOLO - } - else if ((modelType == 3) || // YoloV8/YoloV11 (Object Detection) - (modelType == 17) || // YOLO V12 - (modelType == 20) || // ONNX Classification - (modelType == 21) || // ONNX Pose - (modelType == 23) || // ONNX Segmentation - (modelType == 25)) // OBB Segmentation - { - modelType = 30; // ONNXYOLO - } - else { - // do nothing, use the modelType specified by user + const bool onNvidia = (engineType == ANSCENTER::EngineType::NVIDIA_GPU); + switch (modelType) { + // ── Detection family: YOLOv8 / V11 / V12 / generic TRT / V10-RTOD ── + case 3: // YOLOV8 / YOLOV11 + case 4: // generic TensorRT + case 14: // YOLOv10RTOD (TRT end-to-end NMS) + case 17: // YOLOV12 + modelType = onNvidia ? 31 /* RTYOLO */ : 30 /* ONNXYOLO */; + break; + // ── Pose ───────────────────────────────────────────────────────── + case 21: // ONNXPOSE + case 22: // RTPOSE + modelType = onNvidia ? 22 /* RTPOSE */ : 21 /* ONNXPOSE */; + break; + // ── Segmentation ───────────────────────────────────────────────── + case 23: // ONNXSEG + case 24: // RTSEG + modelType = onNvidia ? 24 /* RTSEG */ : 23 /* ONNXSEG */; + break; + // ── OBB / Classification (ONNX-only today — leave as-is) ───────── + case 20: // ONNXCL + case 25: // ONNXOBB + break; + default: + // Any other modelType is handled directly by the switch below. + break; + } } // NOTE: We intentionally do NOT destroy any existing *Handle here. // LabVIEW reuses DLL parameter buffer addresses, so *Handle may point @@ -1461,26 +1510,39 @@ ANSODENGINE_API int OptimizeModelStr(const char* modelFilePath, const char* mode ANSCENTER::EngineType engineType = ANSCENTER::ANSLicenseHelper::CheckHardwareInformation(); - //Force modelType to ANSONNXYOLO and ANSRTYOLO if detectionType is detection and modelType is TENSORRT or ONNX - if ((modelType == 4) || // TensorRT - (modelType == 14) || // TensorRT Yolov10 - (modelType == 22) || // TensorRT Pose - (modelType == 24)) // TensorRT Segmentation + // Route detection / pose / segmentation / OBB / classification to the best + // available backend: prefer TensorRT on NVIDIA, otherwise the matching ONNX + // handler. Unlisted modelType values are left untouched for the switch below. + // See CreateANSODHandleEx for the full rationale — three correctness bugs + // were fixed in that dispatcher and must be kept in sync across copies. { - if (engineType == ANSCENTER::EngineType::NVIDIA_GPU)modelType = 31; // RTYOLO - else modelType = 30;// ONNXYOLO - } - else if ((modelType == 3) || // YoloV8/YoloV11 (Object Detection) - (modelType == 17) || // YOLO V12 - (modelType == 20) || // ONNX Classification - (modelType == 21) || // ONNX Pose - (modelType == 23) || // ONNX Segmentation - (modelType == 25)) // OBB Segmentation - { - modelType = 30; // ONNXYOLO - } - else { - // do nothing, use the modelType specified by user + const bool onNvidia = (engineType == ANSCENTER::EngineType::NVIDIA_GPU); + switch (modelType) { + // ── Detection family: YOLOv8 / V11 / V12 / generic TRT / V10-RTOD ── + case 3: // YOLOV8 / YOLOV11 + case 4: // generic TensorRT + case 14: // YOLOv10RTOD (TRT end-to-end NMS) + case 17: // YOLOV12 + modelType = onNvidia ? 31 /* RTYOLO */ : 30 /* ONNXYOLO */; + break; + // ── Pose ───────────────────────────────────────────────────────── + case 21: // ONNXPOSE + case 22: // RTPOSE + modelType = onNvidia ? 22 /* RTPOSE */ : 21 /* ONNXPOSE */; + break; + // ── Segmentation ───────────────────────────────────────────────── + case 23: // ONNXSEG + case 24: // RTSEG + modelType = onNvidia ? 24 /* RTSEG */ : 23 /* ONNXSEG */; + break; + // ── OBB / Classification (ONNX-only today — leave as-is) ───────── + case 20: // ONNXCL + case 25: // ONNXOBB + break; + default: + // Any other modelType is handled directly by the switch below. + break; + } } diff --git a/modules/ANSODEngine/engine.h b/modules/ANSODEngine/engine.h index 49a0b69..1492924 100644 --- a/modules/ANSODEngine/engine.h +++ b/modules/ANSODEngine/engine.h @@ -720,8 +720,24 @@ void Engine::lockGpuClocks(int deviceIndex, int requestedMHz) { if (rc == nvml_types::SUCCESS) { m_clocksLocked = true; m_nvmlDeviceIdx = static_cast(deviceIndex); + // Always emit to DebugView so operators can confirm the lock took + // effect without needing to read engine-level verbose output. + ANS_DBG("TRT_Clock", + "GPU clocks LOCKED at %u MHz (device %d) — P-state will stay high, " + "no WDDM down-clock between inferences", + targetMHz, deviceIndex); if (m_verbose) std::cout << "Info: GPU clocks locked at " << targetMHz << " MHz (device " << deviceIndex << ")" << std::endl; } else { + // Surface the failure reason + remediation in DebugView. Most common + // failure is access-denied (requires Administrator) or the driver + // refusing the requested frequency. Users see this in the log and + // know to elevate, set NVCP 'Prefer maximum performance', or run + // `nvidia-smi -lgc ,` before launching. + ANS_DBG("TRT_Clock", + "GPU clock lock FAILED (nvml rc=%s) — expect 2-3x inference latency from " + "WDDM down-clocking. Fix: run as Admin, OR set NVCP 'Prefer maximum " + "performance' for this app, OR: nvidia-smi -lgc %u,%u", + errName(rc), targetMHz, targetMHz); if (m_verbose) { std::cout << "Warning: nvmlDeviceSetGpuLockedClocks failed: " << errName(rc) << std::endl; std::cout << " (Run as Administrator, or use: nvidia-smi -lgc " << targetMHz << "," << targetMHz << ")" << std::endl;