Fix AMD and OpenVINO
This commit is contained in:
@@ -122,36 +122,65 @@ namespace ANSCENTER {
|
||||
// Use AppendExecutionProvider_OpenVINO_V2 instead of the generic string API,
|
||||
// matching the pattern used in YOLOOD/YOLO12OD/ANSONNXCL etc.
|
||||
// Try device configs in priority order, falling back gracefully.
|
||||
//
|
||||
// NPU availability is probed once per process. If AUTO:NPU,GPU fails on
|
||||
// the first call, we skip it for all subsequent models to avoid repeated
|
||||
// "Failed to load shared library" errors cluttering the log.
|
||||
static bool s_npuProbed = false;
|
||||
static bool s_npuAvailable = false;
|
||||
|
||||
const std::string precision = "FP16";
|
||||
const std::string numberOfThreads = "4";
|
||||
const std::string numberOfStreams = "4";
|
||||
|
||||
std::vector<std::unordered_map<std::string, std::string>> try_configs = {
|
||||
{ {"device_type","AUTO:NPU,GPU"}, {"precision",precision},
|
||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
||||
{ {"device_type","GPU.0"}, {"precision",precision},
|
||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
||||
{ {"device_type","GPU.1"}, {"precision",precision},
|
||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
|
||||
{ {"device_type","AUTO:GPU,CPU"}, {"precision",precision},
|
||||
{"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
|
||||
{"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }
|
||||
auto makeConfig = [&](const std::string& device) {
|
||||
return std::unordered_map<std::string, std::string>{
|
||||
{"device_type", device}, {"precision", precision},
|
||||
{"num_of_threads", numberOfThreads}, {"num_streams", numberOfStreams},
|
||||
{"enable_opencl_throttling", "False"}, {"enable_qdq_optimizer", "True"}
|
||||
};
|
||||
};
|
||||
|
||||
std::vector<std::unordered_map<std::string, std::string>> try_configs;
|
||||
|
||||
// Only try NPU if it hasn't been probed yet or was previously available
|
||||
if (!s_npuProbed || s_npuAvailable) {
|
||||
try_configs.push_back(makeConfig("AUTO:NPU,GPU"));
|
||||
}
|
||||
try_configs.push_back(makeConfig("GPU.0"));
|
||||
try_configs.push_back(makeConfig("GPU.1"));
|
||||
try_configs.push_back(makeConfig("AUTO:GPU,CPU"));
|
||||
|
||||
for (const auto& config : try_configs) {
|
||||
try {
|
||||
session_options.AppendExecutionProvider_OpenVINO_V2(config);
|
||||
const auto& device = config.at("device_type");
|
||||
std::cout << "[ORT] OpenVINO EP attached ("
|
||||
<< config.at("device_type") << ", " << precision << ")." << std::endl;
|
||||
<< device << ", " << precision << ")." << std::endl;
|
||||
ANS_DBG("OrtHandler", "OpenVINO EP attached: %s", device.c_str());
|
||||
|
||||
// If NPU config succeeded, mark it available
|
||||
if (device.find("NPU") != std::string::npos) {
|
||||
s_npuProbed = true;
|
||||
s_npuAvailable = true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
catch (const Ort::Exception& e) {
|
||||
std::cerr << "[ORT] OpenVINO EP failed for device "
|
||||
<< config.at("device_type") << ": " << e.what() << std::endl;
|
||||
// try next config
|
||||
const auto& device = config.at("device_type");
|
||||
|
||||
// If NPU config failed, remember so we skip it next time
|
||||
if (device.find("NPU") != std::string::npos) {
|
||||
if (!s_npuProbed) {
|
||||
std::cout << "[ORT] NPU not available — skipping NPU configs for subsequent models." << std::endl;
|
||||
ANS_DBG("OrtHandler", "NPU not available, will skip in future");
|
||||
}
|
||||
s_npuProbed = true;
|
||||
s_npuAvailable = false;
|
||||
} else {
|
||||
std::cerr << "[ORT] OpenVINO EP failed for device "
|
||||
<< device << ": " << e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cerr << "[ORT] OpenVINO EP: all device configs failed." << std::endl;
|
||||
@@ -164,7 +193,10 @@ namespace ANSCENTER {
|
||||
|
||||
void BasicOrtHandler::initialize_handler()
|
||||
{
|
||||
ANS_DBG("OrtHandler", "initialize_handler: m_engineType=%d", static_cast<int>(m_engineType));
|
||||
const auto& epInfo = EPLoader::Current();
|
||||
ANS_DBG("OrtHandler", "initialize_handler: EPLoader type=%d dir=%s",
|
||||
static_cast<int>(epInfo.type), epInfo.libraryDir.c_str());
|
||||
if (Ort::Global<void>::api_ == nullptr)
|
||||
Ort::InitApi(static_cast<const OrtApi*>(EPLoader::GetOrtApiRaw()));
|
||||
|
||||
@@ -172,6 +204,12 @@ namespace ANSCENTER {
|
||||
|
||||
EngineType engine = (static_cast<int>(m_engineType) == -1)
|
||||
? epInfo.type : m_engineType;
|
||||
// Persist the resolved engine type so subclasses (e.g. ONNXYOLO)
|
||||
// can branch on the actual EP at inference time (IoBinding for DML).
|
||||
m_engineType = engine;
|
||||
ANS_DBG("OrtHandler", "initialize_handler: resolved engine=%d (from %s)",
|
||||
static_cast<int>(engine),
|
||||
(static_cast<int>(m_engineType) == -1) ? "EPLoader" : "explicit");
|
||||
|
||||
ort_env = new Ort::Env(ORT_LOGGING_LEVEL_ERROR, log_id);
|
||||
memory_info_handler = new Ort::MemoryInfo(
|
||||
@@ -186,7 +224,17 @@ namespace ANSCENTER {
|
||||
GraphOptimizationLevel::ORT_ENABLE_ALL);
|
||||
session_options.SetLogSeverityLevel(4);
|
||||
|
||||
|
||||
// DirectML REQUIRES these two settings per ORT documentation:
|
||||
// - DisableMemPattern: DML manages its own memory; ORT's memory
|
||||
// pattern optimization conflicts with DML's D3D12 allocator.
|
||||
// - ORT_SEQUENTIAL: DML uses a single command queue and cannot
|
||||
// handle parallel execution mode — doing so causes deadlocks
|
||||
// when synchronizing GPU→CPU data transfers.
|
||||
if (engine == EngineType::AMD_GPU) {
|
||||
session_options.DisableMemPattern();
|
||||
session_options.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
|
||||
ANS_DBG("OrtHandler", "DirectML: DisableMemPattern + ORT_SEQUENTIAL set");
|
||||
}
|
||||
|
||||
std::vector<std::string> available = Ort::GetAvailableProviders();
|
||||
std::cout << "[ORT] Available providers: ";
|
||||
@@ -206,41 +254,55 @@ namespace ANSCENTER {
|
||||
{
|
||||
// --------------------------------------------------------
|
||||
case EngineType::NVIDIA_GPU:
|
||||
ANS_DBG("OrtHandler", "Trying CUDA EP...");
|
||||
if (hasProvider("CUDAExecutionProvider"))
|
||||
epAttached = TryAppendCUDA(session_options);
|
||||
if (!epAttached)
|
||||
if (!epAttached) {
|
||||
std::cerr << "[ORT] CUDA EP unavailable — falling back to CPU."
|
||||
<< std::endl;
|
||||
ANS_DBG("OrtHandler", "CUDA EP FAILED — fallback to CPU");
|
||||
}
|
||||
break;
|
||||
|
||||
// --------------------------------------------------------
|
||||
case EngineType::AMD_GPU:
|
||||
ANS_DBG("OrtHandler", "Trying DirectML EP...");
|
||||
if (hasProvider("DmlExecutionProvider"))
|
||||
epAttached = TryAppendDirectML(session_options);
|
||||
if (!epAttached)
|
||||
if (!epAttached) {
|
||||
std::cerr << "[ORT] DirectML EP unavailable — falling back to CPU."
|
||||
<< std::endl;
|
||||
ANS_DBG("OrtHandler", "DirectML EP FAILED — fallback to CPU");
|
||||
}
|
||||
break;
|
||||
|
||||
// --------------------------------------------------------
|
||||
case EngineType::OPENVINO_GPU:
|
||||
ANS_DBG("OrtHandler", "Trying OpenVINO EP...");
|
||||
if (hasProvider("OpenVINOExecutionProvider"))
|
||||
epAttached = TryAppendOpenVINO(session_options);
|
||||
if (!epAttached)
|
||||
if (!epAttached) {
|
||||
std::cerr << "[ORT] OpenVINO EP unavailable — falling back to CPU."
|
||||
<< std::endl;
|
||||
ANS_DBG("OrtHandler", "OpenVINO EP FAILED — fallback to CPU");
|
||||
}
|
||||
break;
|
||||
|
||||
// --------------------------------------------------------
|
||||
case EngineType::CPU:
|
||||
default:
|
||||
std::cout << "[ORT] Using CPU EP." << std::endl;
|
||||
ANS_DBG("OrtHandler", "Using CPU EP");
|
||||
epAttached = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!epAttached)
|
||||
if (!epAttached) {
|
||||
std::cout << "[ORT] Running on CPU EP (fallback)." << std::endl;
|
||||
ANS_DBG("OrtHandler", "EP not attached — running on CPU fallback");
|
||||
} else {
|
||||
ANS_DBG("OrtHandler", "EP attached successfully");
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Create session
|
||||
@@ -367,15 +429,19 @@ namespace ANSCENTER {
|
||||
std::cout << "[ORT] Session created OK (" << label << ")." << std::endl;
|
||||
};
|
||||
|
||||
ANS_DBG("OrtHandler", "Creating session for model: %ls", onnx_path);
|
||||
try {
|
||||
createSession(session_options, "primary EP");
|
||||
ANS_DBG("OrtHandler", "Session created OK with primary EP");
|
||||
}
|
||||
catch (const Ort::Exception& e) {
|
||||
ANS_DBG("OrtHandler", "Session FAILED with primary EP: %s", e.what());
|
||||
std::cerr << "[ORT] Session creation FAILED with primary EP: "
|
||||
<< e.what() << std::endl;
|
||||
|
||||
// If we were using a GPU EP, fall back to CPU
|
||||
if (engine != EngineType::CPU && epAttached) {
|
||||
ANS_DBG("OrtHandler", "Retrying with CPU fallback...");
|
||||
std::cerr << "[ORT] Retrying with CPU EP (fallback)..." << std::endl;
|
||||
|
||||
// Build fresh session options — no GPU EP, no graph opt
|
||||
@@ -404,6 +470,7 @@ namespace ANSCENTER {
|
||||
}
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
ANS_DBG("OrtHandler", "Session FAILED (std::exception): %s", e.what());
|
||||
std::cerr << "[ORT] Session creation FAILED (std::exception): "
|
||||
<< e.what() << std::endl;
|
||||
throw;
|
||||
|
||||
Reference in New Issue
Block a user