Fix AMD by changing from GetTensorData<T>() to GetTensorMutableData<T>()

2026-04-28 13:25:02 +10:00
parent f4b74c837e
commit dcf974c35c
18 changed files with 359 additions and 48 deletions
--- a/engines/ONNXEngine/ONNXEngine.cpp
+++ b/engines/ONNXEngine/ONNXEngine.cpp
@@ -819,7 +819,10 @@ namespace ANSCENTER {
            input_values_handler.clear();
            input_values_handler.shrink_to_fit();

-            const float* vals = output_tensors[0].GetTensorData<float>();
+            // GetTensorMutableData on DirectML — the const GetTensorData
+            // triggers a per-call host-readable mapping that on AMD DML
+            // exhausts a staging-buffer pool after ~8 calls and hangs.
+            const float* vals = output_tensors[0].GetTensorMutableData<float>();
            const unsigned int  hidden_dim =
                static_cast<unsigned int>(output_node_dims.at(0).at(1));

@@ -1377,7 +1380,9 @@ namespace ANSCENTER {
            Ort::RunOptions{ nullptr },
            in_names, &input_tensor, 1, out_names, 1);

-        out_result = post_processing(outputs[0].GetTensorData<float>());
+        // GetTensorMutableData (not GetTensorData) — see comment in this
+        // file's other output-read sites; const GetTensorData hangs on AMD DML.
+        out_result = post_processing(outputs[0].GetTensorMutableData<float>());
    }

    Ort::Value MOVINET::transform(const cv::Mat& mat)