Fix AMD by changing from GetTensorData<T>() to GetTensorMutableData<T>()

This commit is contained in:
2026-04-28 13:25:02 +10:00
parent f4b74c837e
commit dcf974c35c
18 changed files with 359 additions and 48 deletions

View File

@@ -575,7 +575,7 @@ namespace ANSCENTER
return false;
}
}
std::vector<Object> ANSONNXCL::postprocess(const std::vector<Ort::Value>& outputTensors, const std::string& camera_id) {
std::vector<Object> ANSONNXCL::postprocess(std::vector<Ort::Value>& outputTensors, const std::string& camera_id) {
ANS_DBG("ANSONNXCL_pp", "ENTRY tensors=%zu cam=%s this=%p",
outputTensors.size(), camera_id.c_str(), (void*)this);
std::lock_guard<std::recursive_mutex> lock(_mutex);
@@ -589,8 +589,16 @@ namespace ANSCENTER
return {};
}
ANS_DBG("ANSONNXCL_pp", "GetTensorData<float>");
const float* rawOutput = outputTensors[0].GetTensorData<float>();
ANS_DBG("ANSONNXCL_pp", "GetTensorMutableData<float>");
// GetTensorMutableData (not GetTensorData) on DirectML. The const
// GetTensorData triggers a per-call host-readable mapping that on
// AMD DML exhausts a small staging-buffer pool after ~8 calls and
// blocks indefinitely. GetTensorMutableData returns the existing
// host-accessible pointer directly with no per-call mapping cost.
// Same pattern used by every output-tensor read in ANSONNXYOLO
// and engines/ONNXEngine. Safe on all EPs (CUDA/OpenVINO/CPU);
// we read the data only, never mutate it.
const float* rawOutput = outputTensors[0].GetTensorMutableData<float>();
if (!rawOutput) {
ANS_DBG("ANSONNXCL_pp", "EARLY-RETURN rawOutput=null");
this->_logger.LogError("ANSONNXCL::postprocess", "rawOutput pointer is null", __FILE__, __LINE__);