Fix AMD by changing from GetTensorData<T>() to GetTensorMutableData<T>()
This commit is contained in:
@@ -575,7 +575,7 @@ namespace ANSCENTER
|
||||
return false;
|
||||
}
|
||||
}
|
||||
std::vector<Object> ANSONNXCL::postprocess(const std::vector<Ort::Value>& outputTensors, const std::string& camera_id) {
|
||||
std::vector<Object> ANSONNXCL::postprocess(std::vector<Ort::Value>& outputTensors, const std::string& camera_id) {
|
||||
ANS_DBG("ANSONNXCL_pp", "ENTRY tensors=%zu cam=%s this=%p",
|
||||
outputTensors.size(), camera_id.c_str(), (void*)this);
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
@@ -589,8 +589,16 @@ namespace ANSCENTER
|
||||
return {};
|
||||
}
|
||||
|
||||
ANS_DBG("ANSONNXCL_pp", "GetTensorData<float>");
|
||||
const float* rawOutput = outputTensors[0].GetTensorData<float>();
|
||||
ANS_DBG("ANSONNXCL_pp", "GetTensorMutableData<float>");
|
||||
// GetTensorMutableData (not GetTensorData) on DirectML. The const
|
||||
// GetTensorData triggers a per-call host-readable mapping that on
|
||||
// AMD DML exhausts a small staging-buffer pool after ~8 calls and
|
||||
// blocks indefinitely. GetTensorMutableData returns the existing
|
||||
// host-accessible pointer directly with no per-call mapping cost.
|
||||
// Same pattern used by every output-tensor read in ANSONNXYOLO
|
||||
// and engines/ONNXEngine. Safe on all EPs (CUDA/OpenVINO/CPU);
|
||||
// we read the data only, never mutate it.
|
||||
const float* rawOutput = outputTensors[0].GetTensorMutableData<float>();
|
||||
if (!rawOutput) {
|
||||
ANS_DBG("ANSONNXCL_pp", "EARLY-RETURN rawOutput=null");
|
||||
this->_logger.LogError("ANSONNXCL::postprocess", "rawOutput pointer is null", __FILE__, __LINE__);
|
||||
|
||||
Reference in New Issue
Block a user