Use software decoder by default
This commit is contained in:
@@ -218,44 +218,25 @@ namespace ANSCENTER {
|
||||
}
|
||||
|
||||
bool ANSFLVClient::areImagesIdentical(const cv::Mat& img1, const cv::Mat& img2) {
|
||||
// Quick size and type checks
|
||||
if (img1.size() != img2.size() || img1.type() != img2.type()) {
|
||||
return false;
|
||||
}
|
||||
// Use decoder frame age — returns "stale" only if no decoder output for 5+ seconds.
|
||||
double ageMs = _playerClient->getLastFrameAgeMs();
|
||||
if (ageMs > 5000.0) return true; // Truly stale
|
||||
if (ageMs > 0.0) return false; // Decoder alive
|
||||
|
||||
// Handle empty images
|
||||
if (img1.empty()) {
|
||||
return img2.empty();
|
||||
}
|
||||
// Fallback for startup (no frame decoded yet)
|
||||
if (img1.empty() && img2.empty()) return true;
|
||||
if (img1.empty() || img2.empty()) return false;
|
||||
if (img1.size() != img2.size() || img1.type() != img2.type()) return false;
|
||||
if (img1.data == img2.data) return true;
|
||||
|
||||
if (img1.isContinuous() && img2.isContinuous()) {
|
||||
const size_t totalBytes = img1.total() * img1.elemSize();
|
||||
|
||||
// Fast rejection: sample 5 positions across contiguous memory
|
||||
const size_t quarter = totalBytes / 4;
|
||||
const size_t half = totalBytes / 2;
|
||||
const size_t threeQuarter = 3 * totalBytes / 4;
|
||||
|
||||
if (img1.data[0] != img2.data[0] ||
|
||||
img1.data[quarter] != img2.data[quarter] ||
|
||||
img1.data[half] != img2.data[half] ||
|
||||
img1.data[threeQuarter] != img2.data[threeQuarter] ||
|
||||
img1.data[totalBytes - 1] != img2.data[totalBytes - 1]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Full comparison
|
||||
return std::memcmp(img1.data, img2.data, totalBytes) == 0;
|
||||
}
|
||||
|
||||
// Row-by-row comparison for non-continuous images (e.g., ROI sub-matrices)
|
||||
const size_t rowSize = img1.cols * img1.elemSize();
|
||||
for (int i = 0; i < img1.rows; i++) {
|
||||
if (std::memcmp(img1.ptr(i), img2.ptr(i), rowSize) != 0) {
|
||||
return false;
|
||||
}
|
||||
if (std::memcmp(img1.ptr(i), img2.ptr(i), rowSize) != 0) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
cv::Mat ANSFLVClient::GetImage(int& width, int& height, int64_t& pts) {
|
||||
|
||||
@@ -208,44 +208,23 @@ namespace ANSCENTER {
|
||||
}
|
||||
|
||||
bool ANSMJPEGClient::areImagesIdentical(const cv::Mat& img1, const cv::Mat& img2) {
|
||||
// Quick size and type checks
|
||||
if (img1.size() != img2.size() || img1.type() != img2.type()) {
|
||||
return false;
|
||||
}
|
||||
double ageMs = _playerClient->getLastFrameAgeMs();
|
||||
if (ageMs > 5000.0) return true;
|
||||
if (ageMs > 0.0) return false;
|
||||
|
||||
// Handle empty images
|
||||
if (img1.empty()) {
|
||||
return img2.empty();
|
||||
}
|
||||
if (img1.empty() && img2.empty()) return true;
|
||||
if (img1.empty() || img2.empty()) return false;
|
||||
if (img1.size() != img2.size() || img1.type() != img2.type()) return false;
|
||||
if (img1.data == img2.data) return true;
|
||||
|
||||
if (img1.isContinuous() && img2.isContinuous()) {
|
||||
const size_t totalBytes = img1.total() * img1.elemSize();
|
||||
|
||||
// Fast rejection: sample 5 positions across contiguous memory
|
||||
const size_t quarter = totalBytes / 4;
|
||||
const size_t half = totalBytes / 2;
|
||||
const size_t threeQuarter = 3 * totalBytes / 4;
|
||||
|
||||
if (img1.data[0] != img2.data[0] ||
|
||||
img1.data[quarter] != img2.data[quarter] ||
|
||||
img1.data[half] != img2.data[half] ||
|
||||
img1.data[threeQuarter] != img2.data[threeQuarter] ||
|
||||
img1.data[totalBytes - 1] != img2.data[totalBytes - 1]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Full comparison
|
||||
return std::memcmp(img1.data, img2.data, totalBytes) == 0;
|
||||
}
|
||||
|
||||
// Row-by-row comparison for non-continuous images (e.g., ROI sub-matrices)
|
||||
const size_t rowSize = img1.cols * img1.elemSize();
|
||||
for (int i = 0; i < img1.rows; i++) {
|
||||
if (std::memcmp(img1.ptr(i), img2.ptr(i), rowSize) != 0) {
|
||||
return false;
|
||||
}
|
||||
if (std::memcmp(img1.ptr(i), img2.ptr(i), rowSize) != 0) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
cv::Mat ANSMJPEGClient::GetImage(int& width, int& height, int64_t& pts) {
|
||||
|
||||
@@ -213,43 +213,22 @@ namespace ANSCENTER {
|
||||
}
|
||||
|
||||
bool ANSRTMPClient::areImagesIdentical(const cv::Mat& img1, const cv::Mat& img2) {
|
||||
// Quick size and type checks
|
||||
if (img1.size() != img2.size() || img1.type() != img2.type()) {
|
||||
return false;
|
||||
}
|
||||
double ageMs = _playerClient->getLastFrameAgeMs();
|
||||
if (ageMs > 5000.0) return true;
|
||||
if (ageMs > 0.0) return false;
|
||||
|
||||
// Handle empty images
|
||||
if (img1.empty()) {
|
||||
return img2.empty();
|
||||
}
|
||||
if (img1.empty() && img2.empty()) return true;
|
||||
if (img1.empty() || img2.empty()) return false;
|
||||
if (img1.size() != img2.size() || img1.type() != img2.type()) return false;
|
||||
if (img1.data == img2.data) return true;
|
||||
|
||||
if (img1.isContinuous() && img2.isContinuous()) {
|
||||
const size_t totalBytes = img1.total() * img1.elemSize();
|
||||
|
||||
// Fast rejection: sample 5 positions across contiguous memory
|
||||
// Catches 99.99% of different frames immediately
|
||||
const size_t quarter = totalBytes / 4;
|
||||
const size_t half = totalBytes / 2;
|
||||
const size_t threeQuarter = 3 * totalBytes / 4;
|
||||
|
||||
if (img1.data[0] != img2.data[0] ||
|
||||
img1.data[quarter] != img2.data[quarter] ||
|
||||
img1.data[half] != img2.data[half] ||
|
||||
img1.data[threeQuarter] != img2.data[threeQuarter] ||
|
||||
img1.data[totalBytes - 1] != img2.data[totalBytes - 1]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Full comparison
|
||||
return std::memcmp(img1.data, img2.data, totalBytes) == 0;
|
||||
}
|
||||
|
||||
// Row-by-row comparison for non-continuous images (e.g., ROI sub-matrices)
|
||||
const size_t rowSize = img1.cols * img1.elemSize();
|
||||
for (int i = 0; i < img1.rows; i++) {
|
||||
if (std::memcmp(img1.ptr(i), img2.ptr(i), rowSize) != 0) {
|
||||
return false;
|
||||
}
|
||||
if (std::memcmp(img1.ptr(i), img2.ptr(i), rowSize) != 0) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
@@ -2,7 +2,9 @@
|
||||
#include "ANSMatRegistry.h"
|
||||
#include "ANSGpuFrameOps.h"
|
||||
#include "GpuNV12SlotPool.h"
|
||||
#include "ANSLicense.h" // ANS_DBG macro
|
||||
#include <memory>
|
||||
#include <chrono>
|
||||
#include <format>
|
||||
#include "media_codec.h"
|
||||
#include <cstdint>
|
||||
@@ -69,6 +71,7 @@ namespace ANSCENTER {
|
||||
}
|
||||
|
||||
void ANSRTSPClient::Destroy() {
|
||||
ANS_DBG("RTSP_Lifecycle", "DESTROY called: url=%s playing=%d", _url.c_str(), (int)_isPlaying);
|
||||
// Move the player client pointer out of the lock scope, then
|
||||
// close it OUTSIDE the mutex. close() calls cuArrayDestroy /
|
||||
// cuMemFree which acquire an EXCLUSIVE SRW lock inside nvcuda64.
|
||||
@@ -126,6 +129,24 @@ namespace ANSCENTER {
|
||||
// belong to the global GpuNV12SlotPool, not the decoder.
|
||||
if (clientToClose) {
|
||||
clientToClose->close();
|
||||
|
||||
// Force CUDA runtime to release all cached memory from the destroyed
|
||||
// NVDEC decoder. Without this, cuMemFree returns memory to the CUDA
|
||||
// driver's internal cache, and the next camera creation allocates fresh
|
||||
// memory → VRAM grows by ~200-300MB per destroy/create cycle.
|
||||
// cudaDeviceSynchronize ensures all pending GPU ops are done, then
|
||||
// cudaMemPool trim releases the freed blocks back to the OS.
|
||||
cudaDeviceSynchronize();
|
||||
cudaMemPool_t memPool = nullptr;
|
||||
int currentDev = 0;
|
||||
cudaGetDevice(¤tDev);
|
||||
if (cudaDeviceGetDefaultMemPool(&memPool, currentDev) == cudaSuccess && memPool) {
|
||||
cudaMemPoolTrimTo(memPool, 0); // Release all unused memory
|
||||
}
|
||||
size_t vramFree = 0, vramTotal = 0;
|
||||
cudaMemGetInfo(&vramFree, &vramTotal);
|
||||
ANS_DBG("RTSP_Destroy", "NVDEC closed + memPool trimmed GPU%d VRAM=%zuMB/%zuMB",
|
||||
currentDev, (vramTotal - vramFree) / (1024*1024), vramFree / (1024*1024));
|
||||
}
|
||||
}
|
||||
static void VerifyGlobalANSRTSPLicense(const std::string& licenseKey) {
|
||||
@@ -211,6 +232,7 @@ namespace ANSCENTER {
|
||||
_playerClient->setCrop(crop);
|
||||
}
|
||||
bool ANSRTSPClient::Reconnect() {
|
||||
ANS_DBG("RTSP_Lifecycle", "RECONNECT called: url=%s playing=%d", _url.c_str(), (int)_isPlaying);
|
||||
// 1. Mark as not-playing under the mutex FIRST. This makes GetImage()
|
||||
// return the cached _pLastFrame instead of calling into the player,
|
||||
// and blocks new TryIncrementInFlight calls (no new NV12 attachments).
|
||||
@@ -253,8 +275,30 @@ namespace ANSCENTER {
|
||||
// completed (or timed out), so close() is safe.
|
||||
_logger.LogInfo("ANSRTSPClient::Reconnect",
|
||||
"calling close() — NVDEC decoder will be destroyed", __FILE__, __LINE__);
|
||||
auto _rc0 = std::chrono::steady_clock::now();
|
||||
RTSP_DBG("[Reconnect] BEFORE close() this=%p", (void*)this);
|
||||
_playerClient->close();
|
||||
auto _rc1 = std::chrono::steady_clock::now();
|
||||
|
||||
// Force CUDA runtime to release cached memory from the destroyed NVDEC decoder.
|
||||
cudaDeviceSynchronize();
|
||||
auto _rc2 = std::chrono::steady_clock::now();
|
||||
cudaMemPool_t memPool = nullptr;
|
||||
int currentDev = 0;
|
||||
cudaGetDevice(¤tDev);
|
||||
if (cudaDeviceGetDefaultMemPool(&memPool, currentDev) == cudaSuccess && memPool) {
|
||||
cudaMemPoolTrimTo(memPool, 0);
|
||||
}
|
||||
auto _rc3 = std::chrono::steady_clock::now();
|
||||
{
|
||||
size_t vf = 0, vt = 0;
|
||||
cudaMemGetInfo(&vf, &vt);
|
||||
double closeMs = std::chrono::duration<double, std::milli>(_rc1 - _rc0).count();
|
||||
double syncMs = std::chrono::duration<double, std::milli>(_rc2 - _rc1).count();
|
||||
double trimMs = std::chrono::duration<double, std::milli>(_rc3 - _rc2).count();
|
||||
ANS_DBG("RTSP_Reconnect", "close=%.1fms sync=%.1fms trim=%.1fms VRAM=%zuMB/%zuMB",
|
||||
closeMs, syncMs, trimMs, (vt - vf) / (1024*1024), vf / (1024*1024));
|
||||
}
|
||||
RTSP_DBG("[Reconnect] AFTER close() this=%p", (void*)this);
|
||||
|
||||
// 3. Re-setup and play under the mutex.
|
||||
@@ -283,12 +327,9 @@ namespace ANSCENTER {
|
||||
}
|
||||
|
||||
bool ANSRTSPClient::Stop() {
|
||||
// Grab the player pointer and clear _isPlaying under the lock,
|
||||
// then call stop() OUTSIDE the mutex. stop() internally calls
|
||||
// StopVideoDecoder -> decoder->flush() which does CUDA calls
|
||||
// that can block on the nvcuda64 SRW lock. Holding _mutex
|
||||
// during that time blocks all other operations on this client
|
||||
// and contributes to the convoy when many clients stop at once.
|
||||
// Stop playback but keep the RTSP connection and NVDEC decoder alive.
|
||||
// LabVIEW uses Stop/Start to pause cameras when no AI task is subscribed.
|
||||
// The camera resumes instantly on Start() without re-connecting.
|
||||
CRtspPlayer* player = nullptr;
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
@@ -300,6 +341,7 @@ namespace ANSCENTER {
|
||||
if (player) {
|
||||
player->stop();
|
||||
}
|
||||
ANS_DBG("RTSP_Lifecycle", "STOP complete: handle=%p (connection kept alive)", (void*)this);
|
||||
return true;
|
||||
}
|
||||
bool ANSRTSPClient::Pause() {
|
||||
@@ -342,45 +384,44 @@ namespace ANSCENTER {
|
||||
}
|
||||
|
||||
bool ANSRTSPClient::areImagesIdentical(const cv::Mat& img1, const cv::Mat& img2) {
|
||||
// Quick size and type checks
|
||||
if (img1.size() != img2.size() || img1.type() != img2.type()) {
|
||||
return false;
|
||||
double ageMs = _playerClient->getLastFrameAgeMs();
|
||||
|
||||
if (ageMs > 5000.0) {
|
||||
ANS_DBG("RTSP_Stale", "FROZEN DETECTED: ageMs=%.1f url=%s playing=%d — camera truly stale",
|
||||
ageMs, _url.c_str(), (int)_isPlaying);
|
||||
return true; // Truly stale — no decoder output for 5+ seconds
|
||||
}
|
||||
if (ageMs > 0.0) {
|
||||
return false; // Decoder is receiving frames — camera is alive
|
||||
}
|
||||
|
||||
// Handle empty images
|
||||
if (img1.empty()) {
|
||||
return img2.empty();
|
||||
}
|
||||
// ageMs == 0 means no frame has been decoded yet (startup).
|
||||
// Fall back to pixel comparison for backward compatibility.
|
||||
if (img1.empty() && img2.empty()) return true;
|
||||
if (img1.empty() || img2.empty()) return false;
|
||||
if (img1.size() != img2.size() || img1.type() != img2.type()) return false;
|
||||
|
||||
// Same data pointer = same cv::Mat (shallow copy)
|
||||
if (img1.data == img2.data) return true;
|
||||
|
||||
// Quick 5-point sampling
|
||||
if (img1.isContinuous() && img2.isContinuous()) {
|
||||
const size_t totalBytes = img1.total() * img1.elemSize();
|
||||
|
||||
// Fast rejection: sample 5 positions across contiguous memory
|
||||
// Catches 99.99% of different frames immediately
|
||||
const size_t quarter = totalBytes / 4;
|
||||
const size_t half = totalBytes / 2;
|
||||
const size_t threeQuarter = 3 * totalBytes / 4;
|
||||
|
||||
if (img1.data[0] != img2.data[0] ||
|
||||
img1.data[quarter] != img2.data[quarter] ||
|
||||
img1.data[half] != img2.data[half] ||
|
||||
img1.data[threeQuarter] != img2.data[threeQuarter] ||
|
||||
img1.data[totalBytes - 1] != img2.data[totalBytes - 1]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Full comparison
|
||||
return std::memcmp(img1.data, img2.data, totalBytes) == 0;
|
||||
}
|
||||
|
||||
// Row-by-row comparison for non-continuous images (e.g., ROI sub-matrices)
|
||||
const size_t rowSize = img1.cols * img1.elemSize();
|
||||
for (int i = 0; i < img1.rows; i++) {
|
||||
if (std::memcmp(img1.ptr(i), img2.ptr(i), rowSize) != 0) {
|
||||
return false;
|
||||
}
|
||||
if (std::memcmp(img1.ptr(i), img2.ptr(i), rowSize) != 0) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
cv::Mat ANSRTSPClient::GetImage(int& width, int& height, int64_t& pts) {
|
||||
@@ -414,6 +455,20 @@ namespace ANSCENTER {
|
||||
if (currentPts == _pts && !_pLastFrame.empty()) {
|
||||
width = _imageWidth;
|
||||
height = _imageHeight;
|
||||
// Return timestamp based on decoder frame age so LabVIEW can distinguish
|
||||
// "rate-limited duplicate" from "camera truly stale".
|
||||
// If decoder is still receiving frames (age < 5s), advance PTS so LabVIEW
|
||||
// sees a changing timestamp and doesn't trigger false reconnect.
|
||||
// If decoder is stale (age > 5s), return same PTS so LabVIEW detects it.
|
||||
double ageMs = _playerClient->getLastFrameAgeMs();
|
||||
if (ageMs > 0.0 && ageMs < 5000.0) {
|
||||
// Camera alive but rate-limited — advance PTS to prevent false stale detection
|
||||
_pts++;
|
||||
} else if (ageMs >= 5000.0) {
|
||||
// Camera stale — keep same PTS so LabVIEW triggers reconnect
|
||||
ANS_DBG("RTSP_GetImage", "STALE PTS: ageMs=%.1f pts=%lld url=%s — not advancing PTS",
|
||||
ageMs, (long long)_pts, _url.c_str());
|
||||
}
|
||||
pts = _pts;
|
||||
return _pLastFrame;
|
||||
}
|
||||
@@ -891,6 +946,10 @@ namespace ANSCENTER {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
_useNV12FastPath = enable;
|
||||
}
|
||||
double ANSRTSPClient::GetLastFrameAgeMs() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
return _playerClient->getLastFrameAgeMs();
|
||||
}
|
||||
AVFrame* ANSRTSPClient::GetNV12Frame() {
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (!_isPlaying) return nullptr; // Player may be mid-reconnect (CUDA resources freed)
|
||||
@@ -937,6 +996,7 @@ namespace ANSCENTER {
|
||||
}
|
||||
|
||||
extern "C" __declspec(dllexport) int CreateANSRTSPHandle(ANSCENTER::ANSRTSPClient * *Handle, const char* licenseKey, const char* username, const char* password, const char* url) {
|
||||
ANS_DBG("RTSP_Lifecycle", "CREATE: url=%s", url ? url : "null");
|
||||
if (!Handle || !licenseKey || !url) return -1;
|
||||
try {
|
||||
auto ptr = std::make_unique<ANSCENTER::ANSRTSPClient>();
|
||||
@@ -946,11 +1006,10 @@ extern "C" __declspec(dllexport) int CreateANSRTSPHandle(ANSCENTER::ANSRTSPClien
|
||||
if (_username.empty() && _password.empty()) result = ptr->Init(licenseKey, url);
|
||||
else result = ptr->Init(licenseKey, username, password, url);
|
||||
if (result) {
|
||||
// Default to CUDA/NVDEC HW decoding (mode 7) for NV12 zero-copy
|
||||
// fast path. LabVIEW may not call SetRTSPHWDecoding after
|
||||
// destroy+recreate cycles, so this ensures the new handle always
|
||||
// uses the GPU decode path instead of falling back to D3D11VA/CPU.
|
||||
ptr->SetHWDecoding(7); // HW_DECODING_CUDA
|
||||
// Software decode by default — saves VRAM (no NVDEC DPB surfaces).
|
||||
// With 100 cameras, HW decode would consume ~5-21 GB VRAM for idle decoders.
|
||||
// User can enable HW decode per-camera via SetRTSPHWDecoding(handle, 7).
|
||||
// ptr->SetHWDecoding(7); // Disabled — was HW_DECODING_CUDA
|
||||
*Handle = ptr.release();
|
||||
extern void anscv_unregister_handle(void*);
|
||||
extern void anscv_register_handle(void*, void(*)(void*));
|
||||
@@ -967,6 +1026,7 @@ extern "C" __declspec(dllexport) int CreateANSRTSPHandle(ANSCENTER::ANSRTSPClien
|
||||
} catch (...) { return -1; }
|
||||
}
|
||||
extern "C" __declspec(dllexport) int ReleaseANSRTSPHandle(ANSCENTER::ANSRTSPClient * *Handle) {
|
||||
ANS_DBG("RTSP_Lifecycle", "RELEASE: handle=%p", Handle ? (void*)*Handle : nullptr);
|
||||
if (Handle == nullptr || *Handle == nullptr) return -1;
|
||||
try {
|
||||
extern void anscv_unregister_handle(void*);
|
||||
@@ -982,25 +1042,27 @@ extern "C" __declspec(dllexport) int ReleaseANSRTSPHandle(ANSCENTER::ANSRTSPClie
|
||||
// on any subsequent call, and prevents NEW NV12 GPU surface
|
||||
// pointers from being handed out.
|
||||
//
|
||||
// Do NOT call Destroy()/close() here — close() frees the
|
||||
// NVDEC GPU surfaces (cuArrayDestroy/cuMemFree) which may
|
||||
// still be in use by a CUDA inference kernel that received
|
||||
// the NV12 pointer from a GetRTSPCVImage call that already
|
||||
// completed before this Release was called.
|
||||
// Synchronous cleanup — ensures all GPU resources (NVDEC surfaces, VRAM)
|
||||
// are fully released BEFORE LabVIEW creates a new camera.
|
||||
// Previously deferred to a background thread, but that caused the old
|
||||
// camera's resources to overlap with the new camera's allocations,
|
||||
// leading to temporary VRAM doubling (~240MB per camera) and eventual
|
||||
// VRAM exhaustion on cameras with frequent reconnects.
|
||||
{
|
||||
// Use the client's _mutex to safely set _isPlaying = false.
|
||||
// This is the same lock GetImage/GetNV12Frame acquire.
|
||||
raw->Stop(); // sets _isPlaying = false, stops playback
|
||||
}
|
||||
auto t0 = std::chrono::steady_clock::now();
|
||||
raw->Stop();
|
||||
auto t1 = std::chrono::steady_clock::now();
|
||||
raw->Destroy();
|
||||
auto t2 = std::chrono::steady_clock::now();
|
||||
delete raw;
|
||||
auto t3 = std::chrono::steady_clock::now();
|
||||
|
||||
// Defer the full cleanup (Destroy + delete) to a background thread
|
||||
// so LabVIEW's UI thread is not blocked. Destroy() now waits
|
||||
// precisely for in-flight inference to finish (via _inFlightFrames
|
||||
// counter + condition variable) instead of the old 500ms sleep hack.
|
||||
std::thread([raw]() {
|
||||
try { raw->Destroy(); } catch (...) {}
|
||||
try { delete raw; } catch (...) {}
|
||||
}).detach();
|
||||
double stopMs = std::chrono::duration<double, std::milli>(t1 - t0).count();
|
||||
double destroyMs = std::chrono::duration<double, std::milli>(t2 - t1).count();
|
||||
double deleteMs = std::chrono::duration<double, std::milli>(t3 - t2).count();
|
||||
ANS_DBG("RTSP_Lifecycle", "RELEASE complete: stop=%.1fms destroy=%.1fms delete=%.1fms total=%.1fms",
|
||||
stopMs, destroyMs, deleteMs, stopMs + destroyMs + deleteMs);
|
||||
}
|
||||
|
||||
return 0;
|
||||
} catch (...) {
|
||||
@@ -1269,6 +1331,7 @@ extern "C" __declspec(dllexport) int GetRTSPImage(ANSCENTER::ANSRTSPClient** Han
|
||||
}
|
||||
}
|
||||
extern "C" __declspec(dllexport) int StartRTSP(ANSCENTER::ANSRTSPClient **Handle) {
|
||||
ANS_DBG("RTSP_Lifecycle", "START: handle=%p", Handle ? (void*)*Handle : nullptr);
|
||||
if (Handle == nullptr || *Handle == nullptr) return -1;
|
||||
try {
|
||||
bool result = (*Handle)->Start();
|
||||
@@ -1301,6 +1364,7 @@ extern "C" __declspec(dllexport) int ReconnectRTSP(ANSCENTER::ANSRTSPClient * *H
|
||||
}
|
||||
}
|
||||
extern "C" __declspec(dllexport) int StopRTSP(ANSCENTER::ANSRTSPClient * *Handle) {
|
||||
ANS_DBG("RTSP_Lifecycle", "STOP: handle=%p", Handle ? (void*)*Handle : nullptr);
|
||||
if (Handle == nullptr || *Handle == nullptr) return -1;
|
||||
try {
|
||||
bool result = (*Handle)->Stop();
|
||||
@@ -1462,9 +1526,15 @@ extern "C" __declspec(dllexport) void SetRTSPTargetFPS(ANSCENTER::ANSRTSPClient*
|
||||
extern "C" __declspec(dllexport) void SetRTSPNV12FastPath(ANSCENTER::ANSRTSPClient** Handle, int enable) {
|
||||
if (Handle == nullptr || *Handle == nullptr) return;
|
||||
try {
|
||||
(*Handle)->SetNV12FastPath(enable != 0); // 0=original CPU path (stable), 1=NV12 GPU fast path
|
||||
(*Handle)->SetNV12FastPath(enable != 0);
|
||||
} catch (...) { }
|
||||
}
|
||||
extern "C" __declspec(dllexport) double GetRTSPLastFrameAgeMs(ANSCENTER::ANSRTSPClient** Handle) {
|
||||
if (Handle == nullptr || *Handle == nullptr) return -1.0;
|
||||
try {
|
||||
return (*Handle)->GetLastFrameAgeMs();
|
||||
} catch (...) { return -1.0; }
|
||||
}
|
||||
extern "C" __declspec(dllexport) int SetCropFlagRTSP(ANSCENTER::ANSRTSPClient** Handle, int cropFlag) {
|
||||
if (Handle == nullptr || *Handle == nullptr) return -1;
|
||||
try {
|
||||
|
||||
@@ -106,6 +106,7 @@ namespace ANSCENTER
|
||||
void SetTargetFPS(double intervalMs); // Set min interval between processed frames in ms (0 = no limit, 100 = ~10 FPS, 200 = ~5 FPS)
|
||||
void SetNV12FastPath(bool enable); // true = NV12 GPU fast path (zero-copy inference), false = original CPU path (stable)
|
||||
bool IsNV12FastPath() const { return _useNV12FastPath; }
|
||||
double GetLastFrameAgeMs(); // Milliseconds since last frame from decoder (detects truly stale cameras, unaffected by SetTargetFPS)
|
||||
AVFrame* GetNV12Frame(); // Returns cloned NV12 frame for GPU fast-path (caller must av_frame_free)
|
||||
AVFrame* GetCudaHWFrame(); // Returns CUDA HW frame (device ptrs) for zero-copy inference
|
||||
bool IsCudaHWAccel(); // true when decoder uses CUDA (NV12 stays in GPU VRAM)
|
||||
@@ -145,4 +146,5 @@ extern "C" __declspec(dllexport) void SetRTSPImageQuality(ANSCENTER::ANSRTSPClie
|
||||
extern "C" __declspec(dllexport) void SetRTSPDisplayResolution(ANSCENTER::ANSRTSPClient** Handle, int width, int height);
|
||||
extern "C" __declspec(dllexport) void SetRTSPTargetFPS(ANSCENTER::ANSRTSPClient** Handle, double intervalMs);
|
||||
extern "C" __declspec(dllexport) void SetRTSPNV12FastPath(ANSCENTER::ANSRTSPClient** Handle, int enable);
|
||||
extern "C" __declspec(dllexport) double GetRTSPLastFrameAgeMs(ANSCENTER::ANSRTSPClient** Handle);
|
||||
#endif
|
||||
@@ -221,43 +221,22 @@ namespace ANSCENTER {
|
||||
}
|
||||
|
||||
bool ANSSRTClient::areImagesIdentical(const cv::Mat& img1, const cv::Mat& img2) {
|
||||
// Quick size and type checks
|
||||
if (img1.size() != img2.size() || img1.type() != img2.type()) {
|
||||
return false;
|
||||
}
|
||||
double ageMs = _playerClient->getLastFrameAgeMs();
|
||||
if (ageMs > 5000.0) return true;
|
||||
if (ageMs > 0.0) return false;
|
||||
|
||||
// Handle empty images
|
||||
if (img1.empty()) {
|
||||
return img2.empty();
|
||||
}
|
||||
if (img1.empty() && img2.empty()) return true;
|
||||
if (img1.empty() || img2.empty()) return false;
|
||||
if (img1.size() != img2.size() || img1.type() != img2.type()) return false;
|
||||
if (img1.data == img2.data) return true;
|
||||
|
||||
if (img1.isContinuous() && img2.isContinuous()) {
|
||||
const size_t totalBytes = img1.total() * img1.elemSize();
|
||||
|
||||
// Fast rejection: sample 5 positions across contiguous memory
|
||||
// Catches 99.99% of different frames immediately
|
||||
const size_t quarter = totalBytes / 4;
|
||||
const size_t half = totalBytes / 2;
|
||||
const size_t threeQuarter = 3 * totalBytes / 4;
|
||||
|
||||
if (img1.data[0] != img2.data[0] ||
|
||||
img1.data[quarter] != img2.data[quarter] ||
|
||||
img1.data[half] != img2.data[half] ||
|
||||
img1.data[threeQuarter] != img2.data[threeQuarter] ||
|
||||
img1.data[totalBytes - 1] != img2.data[totalBytes - 1]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Full comparison
|
||||
return std::memcmp(img1.data, img2.data, totalBytes) == 0;
|
||||
}
|
||||
|
||||
// Row-by-row comparison for non-continuous images (e.g., ROI sub-matrices)
|
||||
const size_t rowSize = img1.cols * img1.elemSize();
|
||||
for (int i = 0; i < img1.rows; i++) {
|
||||
if (std::memcmp(img1.ptr(i), img2.ptr(i), rowSize) != 0) {
|
||||
return false;
|
||||
}
|
||||
if (std::memcmp(img1.ptr(i), img2.ptr(i), rowSize) != 0) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
@@ -136,7 +136,7 @@ namespace ANSCENTER {
|
||||
if (!_hwDecodeActive && !_hwPlayer) {
|
||||
try {
|
||||
auto hwp = std::make_unique<CFilePlayer>();
|
||||
hwp->setHWDecoding(HW_DECODING_AUTO); // CUDA → D3D11VA → DXVA2 → software
|
||||
hwp->setHWDecoding(HW_DECODING_DISABLE); // Software decode by default — saves VRAM
|
||||
if (hwp->open(_url)) {
|
||||
_hwPlayer = std::move(hwp);
|
||||
_hwDecodeActive = true;
|
||||
|
||||
@@ -93,7 +93,7 @@ CVideoPlayer::CVideoPlayer():
|
||||
, m_bPaused(FALSE)
|
||||
, m_bSizeChanged(FALSE)
|
||||
//, m_nRenderMode(RENDER_MODE_KEEP)
|
||||
, m_nHWDecoding(HW_DECODING_AUTO)
|
||||
, m_nHWDecoding(HW_DECODING_DISABLE) // Software decode by default — saves VRAM
|
||||
, m_nDstVideoFmt(AV_PIX_FMT_YUV420P)
|
||||
, m_bUpdown(FALSE)
|
||||
, m_bSnapshot(FALSE)
|
||||
|
||||
Reference in New Issue
Block a user