Fix AMD by changing from GetTensorData<T>() to GetTensorMutableData<T>()

This commit is contained in:
2026-04-28 13:25:02 +10:00
parent f4b74c837e
commit dcf974c35c
18 changed files with 359 additions and 48 deletions

View File

@@ -819,7 +819,10 @@ namespace ANSCENTER {
input_values_handler.clear();
input_values_handler.shrink_to_fit();
const float* vals = output_tensors[0].GetTensorData<float>();
// GetTensorMutableData on DirectML — the const GetTensorData
// triggers a per-call host-readable mapping that on AMD DML
// exhausts a staging-buffer pool after ~8 calls and hangs.
const float* vals = output_tensors[0].GetTensorMutableData<float>();
const unsigned int hidden_dim =
static_cast<unsigned int>(output_node_dims.at(0).at(1));
@@ -1377,7 +1380,9 @@ namespace ANSCENTER {
Ort::RunOptions{ nullptr },
in_names, &input_tensor, 1, out_names, 1);
out_result = post_processing(outputs[0].GetTensorData<float>());
// GetTensorMutableData (not GetTensorData) — see comment in this
// file's other output-read sites; const GetTensorData hangs on AMD DML.
out_result = post_processing(outputs[0].GetTensorMutableData<float>());
}
Ort::Value MOVINET::transform(const cv::Mat& mat)

View File

@@ -463,7 +463,9 @@ namespace ANSCENTER
auto info = outputs[maskIdx].GetTensorTypeAndShapeInfo();
m_cachedLangMaskShape = info.GetShape();
size_t count = info.GetElementCount();
const bool* data = outputs[maskIdx].GetTensorData<bool>();
// GetTensorMutableData not GetTensorData on DML — const variant
// hangs after ~8 calls. Read-only despite the "Mutable" name.
const bool* data = outputs[maskIdx].GetTensorMutableData<bool>();
m_cachedLangMask.resize(count);
for (size_t i = 0; i < count; ++i)
m_cachedLangMask[i] = data[i] ? 1 : 0;
@@ -474,7 +476,7 @@ namespace ANSCENTER
auto info = outputs[featIdx].GetTensorTypeAndShapeInfo();
m_cachedLangFeaturesShape = info.GetShape();
size_t count = info.GetElementCount();
const float* data = outputs[featIdx].GetTensorData<float>();
const float* data = outputs[featIdx].GetTensorMutableData<float>();
m_cachedLangFeatures.assign(data, data + count);
}
@@ -649,7 +651,7 @@ namespace ANSCENTER
if (elemType == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT && !shape.empty()) {
size_t numElems = info.GetElementCount();
if (numElems > 0 && numElems < 100000000) {
const float* data = decInputs[di].GetTensorData<float>();
const float* data = decInputs[di].GetTensorMutableData<float>();
double sum = 0;
for (size_t k = 0; k < numElems; ++k) sum += data[k];
double mean = sum / numElems;
@@ -661,14 +663,14 @@ namespace ANSCENTER
// Print bool tensor values (for language_mask)
else if (elemType == ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL && !shape.empty()) {
size_t numElems = info.GetElementCount();
const bool* data = decInputs[di].GetTensorData<bool>();
const bool* data = decInputs[di].GetTensorMutableData<bool>();
std::cout << " vals:";
for (size_t k = 0; k < std::min(numElems, (size_t)32); ++k)
std::cout << " " << (int)data[k];
}
// Print int64 scalar value
else if (elemType == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64 && shape.empty()) {
const int64_t* data = decInputs[di].GetTensorData<int64_t>();
const int64_t* data = decInputs[di].GetTensorMutableData<int64_t>();
std::cout << " value=" << data[0];
}
std::cout << std::endl;
@@ -702,10 +704,10 @@ namespace ANSCENTER
auto boxInfo = decOutputs[boxesIdx].GetTensorTypeAndShapeInfo();
auto boxShape = boxInfo.GetShape();
int numBoxes = (boxShape.size() >= 1) ? static_cast<int>(boxShape[0]) : 0;
const float* boxesData = decOutputs[boxesIdx].GetTensorData<float>();
const float* boxesData = decOutputs[boxesIdx].GetTensorMutableData<float>();
// Get scores
const float* scoresData = decOutputs[scoresIdx].GetTensorData<float>();
const float* scoresData = decOutputs[scoresIdx].GetTensorMutableData<float>();
// Get masks
auto maskInfo = decOutputs[masksIdx].GetTensorTypeAndShapeInfo();
@@ -713,7 +715,7 @@ namespace ANSCENTER
// masks shape: [N, 1, H, W]
int maskH = (maskShape.size() >= 3) ? static_cast<int>(maskShape[2]) : 0;
int maskW = (maskShape.size() >= 4) ? static_cast<int>(maskShape[3]) : 0;
const bool* masksData = decOutputs[masksIdx].GetTensorData<bool>();
const bool* masksData = decOutputs[masksIdx].GetTensorMutableData<bool>();
m_maskH = maskH;
m_maskW = maskW;