Fix AMD and OpenVINO

This commit is contained in:
2026-04-08 13:45:52 +10:00
parent a4a8caaa86
commit 69787b0ff0
15 changed files with 1209 additions and 132 deletions

View File

@@ -122,36 +122,65 @@ namespace ANSCENTER {
// Use AppendExecutionProvider_OpenVINO_V2 instead of the generic string API,
// matching the pattern used in YOLOOD/YOLO12OD/ANSONNXCL etc.
// Try device configs in priority order, falling back gracefully.
//
// NPU availability is probed once per process. If AUTO:NPU,GPU fails on
// the first call, we skip it for all subsequent models to avoid repeated
// "Failed to load shared library" errors cluttering the log.
static bool s_npuProbed = false;
static bool s_npuAvailable = false;
const std::string precision = "FP16";
const std::string numberOfThreads = "4";
const std::string numberOfStreams = "4";
std::vector<std::unordered_map<std::string, std::string>> try_configs = {
{ {"device_type","AUTO:NPU,GPU"}, {"precision",precision},
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
{ {"device_type","GPU.0"}, {"precision",precision},
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
{ {"device_type","GPU.1"}, {"precision",precision},
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
{ {"device_type","AUTO:GPU,CPU"}, {"precision",precision},
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }
auto makeConfig = [&](const std::string& device) {
return std::unordered_map<std::string, std::string>{
{"device_type", device}, {"precision", precision},
{"num_of_threads", numberOfThreads}, {"num_streams", numberOfStreams},
{"enable_opencl_throttling", "False"}, {"enable_qdq_optimizer", "True"}
};
};
std::vector<std::unordered_map<std::string, std::string>> try_configs;
// Only try NPU if it hasn't been probed yet or was previously available
if (!s_npuProbed || s_npuAvailable) {
try_configs.push_back(makeConfig("AUTO:NPU,GPU"));
}
try_configs.push_back(makeConfig("GPU.0"));
try_configs.push_back(makeConfig("GPU.1"));
try_configs.push_back(makeConfig("AUTO:GPU,CPU"));
for (const auto& config : try_configs) {
try {
session_options.AppendExecutionProvider_OpenVINO_V2(config);
const auto& device = config.at("device_type");
std::cout << "[ORT] OpenVINO EP attached ("
<< config.at("device_type") << ", " << precision << ")." << std::endl;
<< device << ", " << precision << ")." << std::endl;
ANS_DBG("OrtHandler", "OpenVINO EP attached: %s", device.c_str());
// If NPU config succeeded, mark it available
if (device.find("NPU") != std::string::npos) {
s_npuProbed = true;
s_npuAvailable = true;
}
return true;
}
catch (const Ort::Exception& e) {
std::cerr << "[ORT] OpenVINO EP failed for device "
<< config.at("device_type") << ": " << e.what() << std::endl;
// try next config
const auto& device = config.at("device_type");
// If NPU config failed, remember so we skip it next time
if (device.find("NPU") != std::string::npos) {
if (!s_npuProbed) {
std::cout << "[ORT] NPU not available — skipping NPU configs for subsequent models." << std::endl;
ANS_DBG("OrtHandler", "NPU not available, will skip in future");
}
s_npuProbed = true;
s_npuAvailable = false;
} else {
std::cerr << "[ORT] OpenVINO EP failed for device "
<< device << ": " << e.what() << std::endl;
}
}
}
std::cerr << "[ORT] OpenVINO EP: all device configs failed." << std::endl;
@@ -164,7 +193,10 @@ namespace ANSCENTER {
void BasicOrtHandler::initialize_handler()
{
ANS_DBG("OrtHandler", "initialize_handler: m_engineType=%d", static_cast<int>(m_engineType));
const auto& epInfo = EPLoader::Current();
ANS_DBG("OrtHandler", "initialize_handler: EPLoader type=%d dir=%s",
static_cast<int>(epInfo.type), epInfo.libraryDir.c_str());
if (Ort::Global<void>::api_ == nullptr)
Ort::InitApi(static_cast<const OrtApi*>(EPLoader::GetOrtApiRaw()));
@@ -172,6 +204,12 @@ namespace ANSCENTER {
EngineType engine = (static_cast<int>(m_engineType) == -1)
? epInfo.type : m_engineType;
// Persist the resolved engine type so subclasses (e.g. ONNXYOLO)
// can branch on the actual EP at inference time (IoBinding for DML).
m_engineType = engine;
ANS_DBG("OrtHandler", "initialize_handler: resolved engine=%d (from %s)",
static_cast<int>(engine),
(static_cast<int>(m_engineType) == -1) ? "EPLoader" : "explicit");
ort_env = new Ort::Env(ORT_LOGGING_LEVEL_ERROR, log_id);
memory_info_handler = new Ort::MemoryInfo(
@@ -186,7 +224,17 @@ namespace ANSCENTER {
GraphOptimizationLevel::ORT_ENABLE_ALL);
session_options.SetLogSeverityLevel(4);
// DirectML REQUIRES these two settings per ORT documentation:
// - DisableMemPattern: DML manages its own memory; ORT's memory
// pattern optimization conflicts with DML's D3D12 allocator.
// - ORT_SEQUENTIAL: DML uses a single command queue and cannot
// handle parallel execution mode — doing so causes deadlocks
// when synchronizing GPU→CPU data transfers.
if (engine == EngineType::AMD_GPU) {
session_options.DisableMemPattern();
session_options.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
ANS_DBG("OrtHandler", "DirectML: DisableMemPattern + ORT_SEQUENTIAL set");
}
std::vector<std::string> available = Ort::GetAvailableProviders();
std::cout << "[ORT] Available providers: ";
@@ -206,41 +254,55 @@ namespace ANSCENTER {
{
// --------------------------------------------------------
case EngineType::NVIDIA_GPU:
ANS_DBG("OrtHandler", "Trying CUDA EP...");
if (hasProvider("CUDAExecutionProvider"))
epAttached = TryAppendCUDA(session_options);
if (!epAttached)
if (!epAttached) {
std::cerr << "[ORT] CUDA EP unavailable — falling back to CPU."
<< std::endl;
ANS_DBG("OrtHandler", "CUDA EP FAILED — fallback to CPU");
}
break;
// --------------------------------------------------------
case EngineType::AMD_GPU:
ANS_DBG("OrtHandler", "Trying DirectML EP...");
if (hasProvider("DmlExecutionProvider"))
epAttached = TryAppendDirectML(session_options);
if (!epAttached)
if (!epAttached) {
std::cerr << "[ORT] DirectML EP unavailable — falling back to CPU."
<< std::endl;
ANS_DBG("OrtHandler", "DirectML EP FAILED — fallback to CPU");
}
break;
// --------------------------------------------------------
case EngineType::OPENVINO_GPU:
ANS_DBG("OrtHandler", "Trying OpenVINO EP...");
if (hasProvider("OpenVINOExecutionProvider"))
epAttached = TryAppendOpenVINO(session_options);
if (!epAttached)
if (!epAttached) {
std::cerr << "[ORT] OpenVINO EP unavailable — falling back to CPU."
<< std::endl;
ANS_DBG("OrtHandler", "OpenVINO EP FAILED — fallback to CPU");
}
break;
// --------------------------------------------------------
case EngineType::CPU:
default:
std::cout << "[ORT] Using CPU EP." << std::endl;
ANS_DBG("OrtHandler", "Using CPU EP");
epAttached = true;
break;
}
if (!epAttached)
if (!epAttached) {
std::cout << "[ORT] Running on CPU EP (fallback)." << std::endl;
ANS_DBG("OrtHandler", "EP not attached — running on CPU fallback");
} else {
ANS_DBG("OrtHandler", "EP attached successfully");
}
// ----------------------------------------------------------------
// Create session
@@ -367,15 +429,19 @@ namespace ANSCENTER {
std::cout << "[ORT] Session created OK (" << label << ")." << std::endl;
};
ANS_DBG("OrtHandler", "Creating session for model: %ls", onnx_path);
try {
createSession(session_options, "primary EP");
ANS_DBG("OrtHandler", "Session created OK with primary EP");
}
catch (const Ort::Exception& e) {
ANS_DBG("OrtHandler", "Session FAILED with primary EP: %s", e.what());
std::cerr << "[ORT] Session creation FAILED with primary EP: "
<< e.what() << std::endl;
// If we were using a GPU EP, fall back to CPU
if (engine != EngineType::CPU && epAttached) {
ANS_DBG("OrtHandler", "Retrying with CPU fallback...");
std::cerr << "[ORT] Retrying with CPU EP (fallback)..." << std::endl;
// Build fresh session options — no GPU EP, no graph opt
@@ -404,6 +470,7 @@ namespace ANSCENTER {
}
}
catch (const std::exception& e) {
ANS_DBG("OrtHandler", "Session FAILED (std::exception): %s", e.what());
std::cerr << "[ORT] Session creation FAILED (std::exception): "
<< e.what() << std::endl;
throw;