diff --git a/modules/ANSOCR/ANSONNXOCR/ONNXOCRRecognizer.cpp b/modules/ANSOCR/ANSONNXOCR/ONNXOCRRecognizer.cpp
index 52879bf..8cc8a17 100644
--- a/modules/ANSOCR/ANSONNXOCR/ONNXOCRRecognizer.cpp
+++ b/modules/ANSOCR/ANSONNXOCR/ONNXOCRRecognizer.cpp
@@ -1,4 +1,5 @@
 #include "ONNXOCRRecognizer.h"
+#include "ANSLicense.h"   // ANS_DBG
 
 #include <opencv2/imgproc.hpp>
 #include <iostream>
@@ -74,6 +75,17 @@ int ONNXOCRRecognizer::RoundUpToBucket(int resizedW) const {
     return imgMaxW_;
 }
 
+// Round real batch size UP to the next entry in kRecBatchLadder, clamped to
+// kRecMaxBatch.  Padded slots in the input tensor are zero-filled and the
+// corresponding output rows are discarded after Run().  See the comment on
+// kRecBatchLadder in ONNXOCRTypes.h for the rationale.
+static int RoundUpToBatchLadder(size_t n) {
+    for (int b : kRecBatchLadder) {
+        if (b >= static_cast<int>(n)) return b;
+    }
+    return kRecMaxBatch;
+}
+
 // Resize + normalize a single crop into a CHW float vector at width
 // `bucketW`, padding with zeros on the right when needed. The returned
 // vector has exactly 3*imgH_*bucketW elements.
@@ -147,19 +159,29 @@ void ONNXOCRRecognizer::RunBatchAtWidth(const std::vector<cv::Mat>& crops,
     if (crops.empty()) return;
 
     try {
-        const size_t batchN = crops.size();
+        // Real number of crops vs. padded batch size submitted to ORT.
+        // Padding to the ladder keeps the recognizer's input-shape set
+        // bounded (≤24 distinct shapes) so OpenVINO/DML don't recompile
+        // kernels mid-stream — see kRecBatchLadder in ONNXOCRTypes.h.
+        // The padded slots are zeros; their decoded output is discarded.
+        const size_t realN = crops.size();
+        const size_t paddedN = static_cast<size_t>(RoundUpToBatchLadder(realN));
         const size_t perImage = static_cast<size_t>(3) * imgH_ * bucketW;
+        ANS_DBG("ANSONNXRec_run",
+                "RunBatchAtWidth: bucketW=%d realN=%zu paddedN=%zu",
+                bucketW, realN, paddedN);
 
-        // Stack N preprocessed crops into one [N,3,H,W] buffer
-        std::vector<float> batchInput(batchN * perImage, 0.0f);
-        for (size_t i = 0; i < batchN; ++i) {
+        // Stack realN preprocessed crops into a [paddedN,3,H,W] buffer.
+        // The remaining (paddedN - realN) slots stay zero-initialized.
+        std::vector<float> batchInput(paddedN * perImage, 0.0f);
+        for (size_t i = 0; i < realN; ++i) {
             auto img = PreprocessCropToBucket(crops[i], imgH_, bucketW);
             std::memcpy(&batchInput[i * perImage], img.data(),
                         perImage * sizeof(float));
         }
 
         std::array<int64_t, 4> inputShape = {
-            static_cast<int64_t>(batchN), 3,
+            static_cast<int64_t>(paddedN), 3,
             static_cast<int64_t>(imgH_),
             static_cast<int64_t>(bucketW)
         };
@@ -175,7 +197,8 @@ void ONNXOCRRecognizer::RunBatchAtWidth(const std::vector<cv::Mat>& crops,
         float* outputData = outputTensors[0].GetTensorMutableData<float>();
         auto outputShape  = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
 
-        // Expected output: [N, seqLen, numClasses]
+        // Expected output: [paddedN, seqLen, numClasses]. We only decode
+        // the first realN rows; rows [realN..paddedN) are zero-pad noise.
         if (outputShape.size() < 3) {
             std::cerr << "[ONNXOCRRecognizer] Unexpected batch output rank: "
                       << outputShape.size() << std::endl;
@@ -186,7 +209,7 @@ void ONNXOCRRecognizer::RunBatchAtWidth(const std::vector<cv::Mat>& crops,
         const int numClasses = static_cast<int>(outputShape[2]);
         const size_t perRow  = static_cast<size_t>(seqLen) * numClasses;
 
-        for (int i = 0; i < outBatch && i < static_cast<int>(batchN); ++i) {
+        for (size_t i = 0; i < realN && static_cast<int>(i) < outBatch; ++i) {
             TextLine tl = CTCDecode(outputData + i * perRow, seqLen, numClasses);
             out[origIndices[i]] = std::move(tl);
         }
@@ -288,30 +311,63 @@ void ONNXOCRRecognizer::Warmup() {
     cv::Mat dummy(imgH_ * 2, kRecBucketWidths[kRecNumBuckets - 1] * 2,
                   CV_8UC3, cv::Scalar(128, 128, 128));
 
-    for (int b = 0; b < kRecNumBuckets; ++b) {
-        const int bucketW = kRecBucketWidths[b];
-        try {
-            auto inputData = PreprocessCropToBucket(dummy, imgH_, bucketW);
-            std::array<int64_t, 4> inputShape = { 1, 3, imgH_, bucketW };
-            Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
-                *memory_info_handler, inputData.data(), inputData.size(),
-                inputShape.data(), inputShape.size());
+    // Pre-compile every (batchN, bucketW) shape we expect to hit on the
+    // hot path.  Each unique shape costs ~1.7 s to compile on OpenVINO and
+    // ~150 ms on DirectML; without this prewarm, that compile cost shows
+    // up as a multi-hundred-ms frame spike the first time a given batch
+    // count + plate-width combination occurs in the video stream.
+    //
+    // We deliberately warm only kRecWarmupBatchLadder ({1,2,4,8}) instead
+    // of the full kRecBatchLadder ({1,2,4,8,16,24}) — covers >95% of ALPR
+    // frames while keeping engine-init time bounded.  Frames with 9+
+    // plates pay a one-time spike on first occurrence of batch=16/24.
+    auto totalT0 = std::chrono::high_resolution_clock::now();
+    int warmedShapes = 0;
+    for (int bs = 0; bs < kRecNumWarmupBatchSizes; ++bs) {
+        const int batchN = kRecWarmupBatchLadder[bs];
+        for (int b = 0; b < kRecNumBuckets; ++b) {
+            const int bucketW = kRecBucketWidths[b];
+            try {
+                const size_t perImage = static_cast<size_t>(3) * imgH_ * bucketW;
+                std::vector<float> batchInput(static_cast<size_t>(batchN) * perImage, 0.0f);
+                auto perCrop = PreprocessCropToBucket(dummy, imgH_, bucketW);
+                for (int i = 0; i < batchN; ++i) {
+                    std::memcpy(&batchInput[static_cast<size_t>(i) * perImage],
+                                perCrop.data(),
+                                perImage * sizeof(float));
+                }
+                std::array<int64_t, 4> inputShape = {
+                    static_cast<int64_t>(batchN), 3,
+                    static_cast<int64_t>(imgH_),
+                    static_cast<int64_t>(bucketW)
+                };
+                Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
+                    *memory_info_handler, batchInput.data(), batchInput.size(),
+                    inputShape.data(), inputShape.size());
 
-            auto t0 = std::chrono::high_resolution_clock::now();
-            (void)ort_session->Run(
-                Ort::RunOptions{ nullptr },
-                input_node_names.data(), &inputTensor, 1,
-                output_node_names.data(), num_outputs);
-            auto t1 = std::chrono::high_resolution_clock::now();
-            double ms = std::chrono::duration<double, std::milli>(t1 - t0).count();
-            std::cout << "[ONNXOCRRecognizer] Warmup bucketW=" << bucketW
-                      << "  " << ms << " ms" << std::endl;
-        }
-        catch (const Ort::Exception& e) {
-            std::cerr << "[ONNXOCRRecognizer] Warmup failed at bucketW="
-                      << bucketW << ": " << e.what() << std::endl;
+                auto t0 = std::chrono::high_resolution_clock::now();
+                (void)ort_session->Run(
+                    Ort::RunOptions{ nullptr },
+                    input_node_names.data(), &inputTensor, 1,
+                    output_node_names.data(), num_outputs);
+                auto t1 = std::chrono::high_resolution_clock::now();
+                double ms = std::chrono::duration<double, std::milli>(t1 - t0).count();
+                std::cout << "[ONNXOCRRecognizer] Warmup batch=" << batchN
+                          << " bucketW=" << bucketW
+                          << "  " << ms << " ms" << std::endl;
+                ++warmedShapes;
+            }
+            catch (const Ort::Exception& e) {
+                std::cerr << "[ONNXOCRRecognizer] Warmup failed at batch="
+                          << batchN << " bucketW=" << bucketW << ": "
+                          << e.what() << std::endl;
+            }
         }
     }
+    auto totalT1 = std::chrono::high_resolution_clock::now();
+    double totalMs = std::chrono::duration<double, std::milli>(totalT1 - totalT0).count();
+    std::cout << "[ONNXOCRRecognizer] Warmup complete: " << warmedShapes
+              << " shapes in " << totalMs << " ms total" << std::endl;
     _warmedUp = true;
 }
 
diff --git a/modules/ANSOCR/ANSONNXOCR/ONNXOCRTypes.h b/modules/ANSOCR/ANSONNXOCR/ONNXOCRTypes.h
index c12e02d..62a0b82 100644
--- a/modules/ANSOCR/ANSONNXOCR/ONNXOCRTypes.h
+++ b/modules/ANSOCR/ANSONNXOCR/ONNXOCRTypes.h
@@ -62,6 +62,37 @@ constexpr int kRecBatchSize = 6;
 // reasonable for your expected plate count per frame.
 constexpr int kRecMaxBatch = 24;
 
+// Runtime batch-padding ladder.  Real batch sizes get rounded UP to one of
+// these values before the ORT Run() call, with zero-padded slots in the
+// input tensor and the padded outputs discarded after CTC decode.  The goal
+// is to bound the number of distinct (batch, bucketW) input shapes the
+// recognizer ever sees: with this ladder × kRecBucketWidths there are at
+// most 24 unique shapes total, small enough for OpenVINO/DirectML to keep
+// in their per-shape kernel cache instead of recompiling mid-stream.
+//
+// Without this padding, OpenVINO recompiles the recognizer for every new
+// (N, W) combination — measured at ~1.7 s per recompile on Intel Iris Xe,
+// which is exactly what produced the 1500–1800 ms frame spikes seen in the
+// OPENVINO_GPU video tests.
+//
+// The last entry MUST equal kRecMaxBatch so the whole ladder stays inside
+// the TRT dynamic profile [batch=1..kRecMaxBatch].
+constexpr int kRecBatchLadder[] = { 1, 2, 4, 8, 16, 24 };
+constexpr int kRecNumBatchSizes = sizeof(kRecBatchLadder) / sizeof(kRecBatchLadder[0]);
+static_assert(kRecBatchLadder[kRecNumBatchSizes - 1] == kRecMaxBatch,
+              "Last kRecBatchLadder entry must equal kRecMaxBatch so that "
+              "padded shapes stay inside the TRT profile.");
+
+// Warmup batch ladder — a subset of kRecBatchLadder used at engine init to
+// pre-compile the most common runtime shapes.  Kept smaller than the full
+// ladder because each warmup Run on OpenVINO costs ~1.7 s of kernel-compile
+// time, and {1,2,4,8} covers the realistic plate-count distribution for
+// ALPR (>95% of frames have ≤8 detected plates).  Frames with 9+ plates
+// will pay a one-time recompile spike on first occurrence of batch=16/24.
+constexpr int kRecWarmupBatchLadder[] = { 1, 2, 4, 8 };
+constexpr int kRecNumWarmupBatchSizes =
+    sizeof(kRecWarmupBatchLadder) / sizeof(kRecWarmupBatchLadder[0]);
+
 // A detected text box: 4 corner points (top-left, top-right, bottom-right, bottom-left)
 struct TextBox {
     std::array<cv::Point2f, 4> points;
diff --git a/tests/ANSLPR-UnitTest/ANSLPR-UnitTest.cpp b/tests/ANSLPR-UnitTest/ANSLPR-UnitTest.cpp
index 6bc2401..9f7d896 100644
--- a/tests/ANSLPR-UnitTest/ANSLPR-UnitTest.cpp
+++ b/tests/ANSLPR-UnitTest/ANSLPR-UnitTest.cpp
@@ -3936,8 +3936,8 @@ int ALPR_OCR_VideoTest() {
 
     ANSCENTER::ANSALPR* infHandle = nullptr;
     std::string licenseKey = "";
-    std::string modelFilePath = "C:\\Projects\\ANSVIS\\Models\\ANS_GenericALPR_v2.0.zip";
-    std::string videoFilePath = "E:\\Programs\\DemoAssets\\Videos\\ALRP\\PMH\\Day\\day.mp4";
+    std::string modelFilePath = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\ANSALPR\\ANS_GenericALPR_v2.0.zip";
+    std::string videoFilePath = "C:\\ProgramData\\ANSCENTER\\Shared\\HM1.mp4";
 
     int engineType = 2; // ANSALPR_OCR
     double detectionThreshold = 0.3;
@@ -4099,12 +4099,12 @@ int main()
     //}
     //ANSLPR_SingleTask_Test();
     //ANSLPR_CPU_StressTest();
-    ANSLPR_MultiGPU_StressTest();
+   // ANSLPR_MultiGPU_StressTest();
     //ANSLPR_MultiGPU_StressTest_SimulatedCam();
    // ANSLPR_MultiGPU_StressTest_FilePlayer();
     //ANSLPR_OD_CPU_VideoTest();
     //ALPR_OCR_Test();
-    //ALPR_OCR_VideoTest();
+    ALPR_OCR_VideoTest();
     return 0;
 
 }
diff --git a/tests/ANSOCR-UnitTest/ANSOCR-UnitTest.cpp b/tests/ANSOCR-UnitTest/ANSOCR-UnitTest.cpp
index c3250d6..98e3023 100644
--- a/tests/ANSOCR-UnitTest/ANSOCR-UnitTest.cpp
+++ b/tests/ANSOCR-UnitTest/ANSOCR-UnitTest.cpp
@@ -503,9 +503,9 @@ int main()
     SetConsoleOutputCP(CP_UTF8);
     SetConsoleCP(CP_UTF8);
 #endif
-    TestOCRv5mage();
+    //TestOCRv5mage();
 
-	//ANSOCR_VideoTest();
+	ANSOCR_VideoTest();
    // TestOCRImage();
  /*   for (int i = 0; i < 20; i++) {
 		TestOCRImage();