Fix ALPR Batch and memory leak

2026-04-15 09:23:05 +10:00
parent 7778f8c214
commit b05c49ad93
9 changed files with 686 additions and 83 deletions
--- a/modules/ANSLPR/dllmain.cpp
+++ b/modules/ANSLPR/dllmain.cpp
@@ -104,6 +104,19 @@ public:
 	ALPRHandleGuard& operator=(const ALPRHandleGuard&) = delete;
 };

+// RAII guard — sets the per-thread current GPU frame pointer and always
+// clears it on scope exit, even if the wrapped inference call throws.
+// Without this, a throwing RunInference leaves tl_currentGpuFrame pointing
+// at a GpuFrameData that may be freed before the next call on this thread,
+// causing use-after-free or stale NV12 data on subsequent frames.
+class GpuFrameScope {
+public:
+	explicit GpuFrameScope(GpuFrameData* f) { tl_currentGpuFrame() = f; }
+	~GpuFrameScope()                        { tl_currentGpuFrame() = nullptr; }
+	GpuFrameScope(const GpuFrameScope&) = delete;
+	GpuFrameScope& operator=(const GpuFrameScope&) = delete;
+};
+
 BOOL APIENTRY DllMain( HMODULE hModule,
                       DWORD  ul_reason_for_call,
                       LPVOID lpReserved
@@ -465,9 +478,6 @@ extern "C" ANSLPR_API int ANSALPR_RunInferenceComplete_LV(

 	try {
 		const cv::Mat& localImage = **cvImage;  // No clone — RunInference takes const ref
-		// Set thread-local NV12 frame data for fast-path inference
-		// Cleared after first RunInference to prevent NV12 mismatch on cropped sub-images (OCR, etc.)
-		tl_currentGpuFrame() = ANSGpuFrameRegistry::instance().lookup(*cvImage);

 		int originalWidth = localImage.cols;
 		int originalHeight = localImage.rows;
@@ -476,8 +486,13 @@ extern "C" ANSLPR_API int ANSALPR_RunInferenceComplete_LV(
 			return -2;
 		}

-		std::vector<ANSCENTER::Object> outputs = engine->RunInference(localImage, cameraId);
-		tl_currentGpuFrame() = nullptr;
+		std::vector<ANSCENTER::Object> outputs;
+		{
+			// Scoped NV12 fast-path pointer; cleared on any exit path (normal or
+			// throw) so the next call on this thread cannot see a stale frame.
+			GpuFrameScope _gfs(ANSGpuFrameRegistry::instance().lookup(*cvImage));
+			outputs = engine->RunInference(localImage, cameraId);
+		}

 		bool getJpeg = (getJpegString == 1);
 		std::string stImage;
@@ -567,15 +582,17 @@ extern "C" ANSLPR_API int		   ANSALPR_RunInferenceComplete_CPP(ANSCENTER::ANSALP

 	try {
 		const cv::Mat& localImage = **cvImage;  // No clone — RunInference takes const ref
-		// Set thread-local NV12 frame data for fast-path inference
-		// Cleared after first RunInference to prevent NV12 mismatch on cropped sub-images (OCR, etc.)
-		tl_currentGpuFrame() = ANSGpuFrameRegistry::instance().lookup(*cvImage);

 		int originalWidth = localImage.cols;
 		int originalHeight = localImage.rows;
 		int maxImageSize = originalWidth;

-		std::vector<ANSCENTER::Object> outputs = engine->RunInference(localImage, cameraId);
+		std::vector<ANSCENTER::Object> outputs;
+		{
+			// Scoped NV12 fast-path pointer; cleared on any exit path (normal or throw).
+			GpuFrameScope _gfs(ANSGpuFrameRegistry::instance().lookup(*cvImage));
+			outputs = engine->RunInference(localImage, cameraId);
+		}
 		bool getJpeg = (getJpegString == 1);
 		std::string stImage;

@@ -646,9 +663,6 @@ extern "C" ANSLPR_API int ANSALPR_RunInferencesComplete_LV(

 	try {
 		const cv::Mat& localImage = **cvImage;  // No clone — RunInference takes const ref
-		// Set thread-local NV12 frame data for fast-path inference
-		// Cleared after first RunInference to prevent NV12 mismatch on cropped sub-images (OCR, etc.)
-		tl_currentGpuFrame() = ANSGpuFrameRegistry::instance().lookup(*cvImage);

 		std::vector<ANSCENTER::Object> objectDetectionResults;
 		std::vector<cv::Rect> bBox = ANSCENTER::ANSALPR::GetBoundingBoxes(strBboxes);
@@ -659,10 +673,14 @@ extern "C" ANSLPR_API int ANSALPR_RunInferencesComplete_LV(
 		const double scaleFactor = (maxImageSize > 0) ? static_cast<double>(originalWidth) / maxImageSize : 1.0;

 		if (bBox.empty()) {
+			// Full-frame path: NV12 fast-path is only safe for the full-frame
+			// inference call. Scope the TL pointer so it is cleared on any exit
+			// path (normal or thrown) and is NOT seen by any subsequent
+			// cropped-image inference (which would mismatch the NV12 cache).
+			GpuFrameScope _gfs(ANSGpuFrameRegistry::instance().lookup(*cvImage));
 			objectDetectionResults = engine->RunInference(localImage, cameraId);
 		}
-		tl_currentGpuFrame() = nullptr;  // Clear before crop-based inference
-		if (!bBox.empty()) {
+		else {
 			for (const auto& rect : bBox) {
 				cv::Rect scaledRect;
 				scaledRect.x = static_cast<int>(rect.x * scaleFactor);
@@ -708,6 +726,103 @@ extern "C" ANSLPR_API int ANSALPR_RunInferencesComplete_LV(
 	}
 }

+// Dedicated pipeline-mode export. Unlike ANSALPR_RunInferencesComplete_LV
+// this function:
+//   1. Always runs with tracker OFF, voting OFF, dedup OFF — it targets
+//      callers that already have precise per-vehicle bboxes and want raw,
+//      stateless results.
+//   2. Issues ONE batched LP-detect call and ONE batched recognizer call
+//      per frame via ANSALPR::RunInferencesBatch, instead of looping
+//      engine->RunInference once per crop. This eliminates the per-shape
+//      ORT/TRT allocator churn that causes ANSALPR_OCR memory growth when
+//      the legacy pipeline-mode loop is used under LabVIEW worker threads.
+//   3. Does NOT touch tl_currentGpuFrame — the caller is working with
+//      cropped regions, not the NV12-keyed source Mat, so the fast-path
+//      pointer is meaningless here. Eliminates a class of UAF risk.
+// Output coordinates are rescaled back into the caller's resized space,
+// matching the convention used by ANSALPR_RunInferencesComplete_LV.
+extern "C" ANSLPR_API int ANSALPR_RunInferencesBatch_LV(
+	ANSCENTER::ANSALPR** Handle,
+	cv::Mat** cvImage,
+	const char* cameraId,
+	int maxImageSize,
+	const char* strBboxes,
+	LStrHandle detectionResult)
+{
+	if (!Handle || !*Handle) return -1;
+	if (!cvImage || !(*cvImage) || (*cvImage)->empty()) return -2;
+
+	ALPRHandleGuard guard(AcquireALPRHandle(*Handle));
+	if (!guard) return -3;
+	auto* engine = guard.get();
+
+	try {
+		const cv::Mat& frame = **cvImage;
+		const int frameW = frame.cols;
+		const int frameH = frame.rows;
+		if (frameW <= 0 || frameH <= 0) return -2;
+
+		// Same scaling convention as ANSALPR_RunInferencesComplete_LV:
+		// the bboxes in `strBboxes` are in a resized coordinate space;
+		// scale them up to the full frame for the crop, then rescale the
+		// plate outputs back to the caller's space.
+		const double scale =
+			(maxImageSize > 0) ? static_cast<double>(frameW) / maxImageSize : 1.0;
+
+		std::vector<cv::Rect> rawBoxes = ANSCENTER::ANSALPR::GetBoundingBoxes(strBboxes);
+		std::vector<cv::Rect> scaledBoxes;
+		scaledBoxes.reserve(rawBoxes.size());
+		const cv::Rect frameRect(0, 0, frameW, frameH);
+		for (const auto& r : rawBoxes) {
+			cv::Rect s(
+				static_cast<int>(r.x * scale),
+				static_cast<int>(r.y * scale),
+				static_cast<int>(r.width  * scale),
+				static_cast<int>(r.height * scale));
+			s &= frameRect;
+			if (s.width > 0 && s.height > 0) scaledBoxes.push_back(s);
+		}
+
+		// Empty bbox list → fall through to full-frame RunInference so
+		// existing LabVIEW code that passes "" still works, matching the
+		// behaviour of ANSALPR_RunInferencesComplete_LV.
+		std::vector<ANSCENTER::Object> results;
+		if (scaledBoxes.empty()) {
+			results = engine->RunInference(frame, cameraId);
+		} else {
+			results = engine->RunInferencesBatch(frame, scaledBoxes, cameraId);
+		}
+
+		// Rescale plate boxes back into the caller's resized space.
+		if (scale != 1.0) {
+			const double inv = 1.0 / scale;
+			for (auto& o : results) {
+				o.box.x      = static_cast<int>(o.box.x      * inv);
+				o.box.y      = static_cast<int>(o.box.y      * inv);
+				o.box.width  = static_cast<int>(o.box.width  * inv);
+				o.box.height = static_cast<int>(o.box.height * inv);
+			}
+		}
+
+		std::string json = engine->VectorDetectionToJsonString(results);
+		if (json.empty()) return 0;
+
+		const int size = static_cast<int>(json.length());
+		MgErr err = DSSetHandleSize(detectionResult, sizeof(int32) + size * sizeof(uChar));
+		if (err != noErr) return 0;
+
+		(*detectionResult)->cnt = size;
+		memcpy((*detectionResult)->str, json.c_str(), size);
+		return 1;
+	}
+	catch (const std::exception& /*ex*/) {
+		return 0;
+	}
+	catch (...) {
+		return 0;
+	}
+}
+

 extern "C" ANSLPR_API int		   ANSALPR_SetFormat(ANSCENTER::ANSALPR** Handle, const char* format) {
 	if (!Handle || !*Handle) return -1;
@@ -1042,9 +1157,6 @@ extern "C" ANSLPR_API int ANSALPR_RunInferenceComplete_LV_V2(

 	try {
 		const cv::Mat& localImage = **cvImage;  // No clone — RunInference takes const ref
-		// Set thread-local NV12 frame data for fast-path inference
-		// Cleared after first RunInference to prevent NV12 mismatch on cropped sub-images (OCR, etc.)
-		tl_currentGpuFrame() = ANSGpuFrameRegistry::instance().lookup(*cvImage);

 		int originalWidth = localImage.cols;
 		int originalHeight = localImage.rows;
@@ -1053,8 +1165,12 @@ extern "C" ANSLPR_API int ANSALPR_RunInferenceComplete_LV_V2(
 			return -2;
 		}

-		std::vector<ANSCENTER::Object> outputs = engine->RunInference(localImage, cameraId);
-		tl_currentGpuFrame() = nullptr;
+		std::vector<ANSCENTER::Object> outputs;
+		{
+			// Scoped NV12 fast-path pointer; cleared on any exit path (normal or throw).
+			GpuFrameScope _gfs(ANSGpuFrameRegistry::instance().lookup(*cvImage));
+			outputs = engine->RunInference(localImage, cameraId);
+		}

 		bool getJpeg = (getJpegString == 1);
 		std::string stImage;
@@ -1132,6 +1248,85 @@ extern "C" ANSLPR_API int ANSALPR_RunInferenceComplete_LV_V2(
 	}
 }

+// V2 uint64_t handle variant of ANSALPR_RunInferencesBatch_LV.
+// See the non-V2 version for semantics — identical behaviour, differs only
+// in how the caller passes the ALPR engine handle (by value instead of via
+// a LabVIEW Handle** pointer-to-pointer).
+extern "C" ANSLPR_API int ANSALPR_RunInferencesBatch_LV_V2(
+	uint64_t handleVal,
+	cv::Mat** cvImage,
+	const char* cameraId,
+	int maxImageSize,
+	const char* strBboxes,
+	LStrHandle detectionResult)
+{
+	ANSCENTER::ANSALPR* _v2Direct = reinterpret_cast<ANSCENTER::ANSALPR*>(handleVal);
+	if (_v2Direct == nullptr) return -1;
+	if (!cvImage || !(*cvImage) || (*cvImage)->empty()) return -2;
+
+	ALPRHandleGuard guard(AcquireALPRHandle(_v2Direct));
+	if (!guard) return -3;
+	auto* engine = guard.get();
+
+	try {
+		const cv::Mat& frame = **cvImage;
+		const int frameW = frame.cols;
+		const int frameH = frame.rows;
+		if (frameW <= 0 || frameH <= 0) return -2;
+
+		const double scale =
+			(maxImageSize > 0) ? static_cast<double>(frameW) / maxImageSize : 1.0;
+
+		std::vector<cv::Rect> rawBoxes = ANSCENTER::ANSALPR::GetBoundingBoxes(strBboxes);
+		std::vector<cv::Rect> scaledBoxes;
+		scaledBoxes.reserve(rawBoxes.size());
+		const cv::Rect frameRect(0, 0, frameW, frameH);
+		for (const auto& r : rawBoxes) {
+			cv::Rect s(
+				static_cast<int>(r.x * scale),
+				static_cast<int>(r.y * scale),
+				static_cast<int>(r.width  * scale),
+				static_cast<int>(r.height * scale));
+			s &= frameRect;
+			if (s.width > 0 && s.height > 0) scaledBoxes.push_back(s);
+		}
+
+		std::vector<ANSCENTER::Object> results;
+		if (scaledBoxes.empty()) {
+			results = engine->RunInference(frame, cameraId);
+		} else {
+			results = engine->RunInferencesBatch(frame, scaledBoxes, cameraId);
+		}
+
+		if (scale != 1.0) {
+			const double inv = 1.0 / scale;
+			for (auto& o : results) {
+				o.box.x      = static_cast<int>(o.box.x      * inv);
+				o.box.y      = static_cast<int>(o.box.y      * inv);
+				o.box.width  = static_cast<int>(o.box.width  * inv);
+				o.box.height = static_cast<int>(o.box.height * inv);
+			}
+		}
+
+		std::string json = engine->VectorDetectionToJsonString(results);
+		if (json.empty()) return 0;
+
+		const int size = static_cast<int>(json.length());
+		MgErr err = DSSetHandleSize(detectionResult, sizeof(int32) + size * sizeof(uChar));
+		if (err != noErr) return 0;
+
+		(*detectionResult)->cnt = size;
+		memcpy((*detectionResult)->str, json.c_str(), size);
+		return 1;
+	}
+	catch (const std::exception& /*ex*/) {
+		return 0;
+	}
+	catch (...) {
+		return 0;
+	}
+}
+
 extern "C" ANSLPR_API int ANSALPR_RunInferencesComplete_LV_V2(
 	uint64_t handleVal,
 	cv::Mat** cvImage,
@@ -1150,9 +1345,6 @@ extern "C" ANSLPR_API int ANSALPR_RunInferencesComplete_LV_V2(

 	try {
 		const cv::Mat& localImage = **cvImage;  // No clone — RunInference takes const ref
-		// Set thread-local NV12 frame data for fast-path inference
-		// Cleared after first RunInference to prevent NV12 mismatch on cropped sub-images (OCR, etc.)
-		tl_currentGpuFrame() = ANSGpuFrameRegistry::instance().lookup(*cvImage);

 		std::vector<ANSCENTER::Object> objectDetectionResults;
 		std::vector<cv::Rect> bBox = ANSCENTER::ANSALPR::GetBoundingBoxes(strBboxes);
@@ -1163,10 +1355,14 @@ extern "C" ANSLPR_API int ANSALPR_RunInferencesComplete_LV_V2(
 		const double scaleFactor = (maxImageSize > 0) ? static_cast<double>(originalWidth) / maxImageSize : 1.0;

 		if (bBox.empty()) {
+			// Full-frame path: NV12 fast-path is only safe for the full-frame
+			// inference call. Scope the TL pointer so it is cleared on any exit
+			// path (normal or thrown) and is NOT seen by any subsequent
+			// cropped-image inference (which would mismatch the NV12 cache).
+			GpuFrameScope _gfs(ANSGpuFrameRegistry::instance().lookup(*cvImage));
 			objectDetectionResults = engine->RunInference(localImage, cameraId);
 		}
-		tl_currentGpuFrame() = nullptr;  // Clear before crop-based inference
-		if (!bBox.empty()) {
+		else {
 			for (const auto& rect : bBox) {
 				cv::Rect scaledRect;
 				scaledRect.x = static_cast<int>(rect.x * scaleFactor);