Fix AMD and OpenVINO

2026-04-08 13:45:52 +10:00
parent a4a8caaa86
commit 69787b0ff0
15 changed files with 1209 additions and 132 deletions
--- a/engines/ONNXEngine/ONNXEngine.cpp
+++ b/engines/ONNXEngine/ONNXEngine.cpp
@@ -122,36 +122,65 @@ namespace ANSCENTER {
        // Use AppendExecutionProvider_OpenVINO_V2 instead of the generic string API,
        // matching the pattern used in YOLOOD/YOLO12OD/ANSONNXCL etc.
        // Try device configs in priority order, falling back gracefully.
+        //
+        // NPU availability is probed once per process. If AUTO:NPU,GPU fails on
+        // the first call, we skip it for all subsequent models to avoid repeated
+        // "Failed to load shared library" errors cluttering the log.
+        static bool s_npuProbed = false;
+        static bool s_npuAvailable = false;
+
        const std::string precision = "FP16";
        const std::string numberOfThreads = "4";
        const std::string numberOfStreams = "4";

-        std::vector<std::unordered_map<std::string, std::string>> try_configs = {
-            { {"device_type","AUTO:NPU,GPU"}, {"precision",precision},
-              {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
-              {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
-            { {"device_type","GPU.0"},        {"precision",precision},
-              {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
-              {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
-            { {"device_type","GPU.1"},        {"precision",precision},
-              {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
-              {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} },
-            { {"device_type","AUTO:GPU,CPU"}, {"precision",precision},
-              {"num_of_threads",numberOfThreads}, {"num_streams",numberOfStreams},
-              {"enable_opencl_throttling","False"}, {"enable_qdq_optimizer","True"} }
+        auto makeConfig = [&](const std::string& device) {
+            return std::unordered_map<std::string, std::string>{
+                {"device_type", device}, {"precision", precision},
+                {"num_of_threads", numberOfThreads}, {"num_streams", numberOfStreams},
+                {"enable_opencl_throttling", "False"}, {"enable_qdq_optimizer", "True"}
+            };
        };

+        std::vector<std::unordered_map<std::string, std::string>> try_configs;
+
+        // Only try NPU if it hasn't been probed yet or was previously available
+        if (!s_npuProbed || s_npuAvailable) {
+            try_configs.push_back(makeConfig("AUTO:NPU,GPU"));
+        }
+        try_configs.push_back(makeConfig("GPU.0"));
+        try_configs.push_back(makeConfig("GPU.1"));
+        try_configs.push_back(makeConfig("AUTO:GPU,CPU"));
+
        for (const auto& config : try_configs) {
            try {
                session_options.AppendExecutionProvider_OpenVINO_V2(config);
+                const auto& device = config.at("device_type");
                std::cout << "[ORT] OpenVINO EP attached ("
-                    << config.at("device_type") << ", " << precision << ")." << std::endl;
+                    << device << ", " << precision << ")." << std::endl;
+                ANS_DBG("OrtHandler", "OpenVINO EP attached: %s", device.c_str());
+
+                // If NPU config succeeded, mark it available
+                if (device.find("NPU") != std::string::npos) {
+                    s_npuProbed = true;
+                    s_npuAvailable = true;
+                }
                return true;
            }
            catch (const Ort::Exception& e) {
-                std::cerr << "[ORT] OpenVINO EP failed for device "
-                    << config.at("device_type") << ": " << e.what() << std::endl;
-                // try next config
+                const auto& device = config.at("device_type");
+
+                // If NPU config failed, remember so we skip it next time
+                if (device.find("NPU") != std::string::npos) {
+                    if (!s_npuProbed) {
+                        std::cout << "[ORT] NPU not available — skipping NPU configs for subsequent models." << std::endl;
+                        ANS_DBG("OrtHandler", "NPU not available, will skip in future");
+                    }
+                    s_npuProbed = true;
+                    s_npuAvailable = false;
+                } else {
+                    std::cerr << "[ORT] OpenVINO EP failed for device "
+                        << device << ": " << e.what() << std::endl;
+                }
            }
        }
        std::cerr << "[ORT] OpenVINO EP: all device configs failed." << std::endl;
@@ -164,7 +193,10 @@ namespace ANSCENTER {

    void BasicOrtHandler::initialize_handler()
    {
+        ANS_DBG("OrtHandler", "initialize_handler: m_engineType=%d", static_cast<int>(m_engineType));
        const auto& epInfo = EPLoader::Current();
+        ANS_DBG("OrtHandler", "initialize_handler: EPLoader type=%d dir=%s",
+            static_cast<int>(epInfo.type), epInfo.libraryDir.c_str());
        if (Ort::Global<void>::api_ == nullptr)
            Ort::InitApi(static_cast<const OrtApi*>(EPLoader::GetOrtApiRaw()));

@@ -172,6 +204,12 @@ namespace ANSCENTER {

        EngineType engine = (static_cast<int>(m_engineType) == -1)
            ? epInfo.type : m_engineType;
+        // Persist the resolved engine type so subclasses (e.g. ONNXYOLO)
+        // can branch on the actual EP at inference time (IoBinding for DML).
+        m_engineType = engine;
+        ANS_DBG("OrtHandler", "initialize_handler: resolved engine=%d (from %s)",
+            static_cast<int>(engine),
+            (static_cast<int>(m_engineType) == -1) ? "EPLoader" : "explicit");
        
        ort_env = new Ort::Env(ORT_LOGGING_LEVEL_ERROR, log_id);
        memory_info_handler = new Ort::MemoryInfo(
@@ -186,7 +224,17 @@ namespace ANSCENTER {
            GraphOptimizationLevel::ORT_ENABLE_ALL);
        session_options.SetLogSeverityLevel(4);

-  
+        // DirectML REQUIRES these two settings per ORT documentation:
+        //  - DisableMemPattern: DML manages its own memory; ORT's memory
+        //    pattern optimization conflicts with DML's D3D12 allocator.
+        //  - ORT_SEQUENTIAL: DML uses a single command queue and cannot
+        //    handle parallel execution mode — doing so causes deadlocks
+        //    when synchronizing GPU→CPU data transfers.
+        if (engine == EngineType::AMD_GPU) {
+            session_options.DisableMemPattern();
+            session_options.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
+            ANS_DBG("OrtHandler", "DirectML: DisableMemPattern + ORT_SEQUENTIAL set");
+        }

        std::vector<std::string> available = Ort::GetAvailableProviders();
        std::cout << "[ORT] Available providers: ";
@@ -206,41 +254,55 @@ namespace ANSCENTER {
        {
            // --------------------------------------------------------
        case EngineType::NVIDIA_GPU:
+            ANS_DBG("OrtHandler", "Trying CUDA EP...");
            if (hasProvider("CUDAExecutionProvider"))
                epAttached = TryAppendCUDA(session_options);
-            if (!epAttached)
+            if (!epAttached) {
                std::cerr << "[ORT] CUDA EP unavailable — falling back to CPU."
                << std::endl;
+                ANS_DBG("OrtHandler", "CUDA EP FAILED — fallback to CPU");
+            }
            break;

            // --------------------------------------------------------
        case EngineType::AMD_GPU:
+            ANS_DBG("OrtHandler", "Trying DirectML EP...");
            if (hasProvider("DmlExecutionProvider"))
                epAttached = TryAppendDirectML(session_options);
-            if (!epAttached)
+            if (!epAttached) {
                std::cerr << "[ORT] DirectML EP unavailable — falling back to CPU."
                << std::endl;
+                ANS_DBG("OrtHandler", "DirectML EP FAILED — fallback to CPU");
+            }
            break;

            // --------------------------------------------------------
        case EngineType::OPENVINO_GPU:
+            ANS_DBG("OrtHandler", "Trying OpenVINO EP...");
            if (hasProvider("OpenVINOExecutionProvider"))
                epAttached = TryAppendOpenVINO(session_options);
-            if (!epAttached)
+            if (!epAttached) {
                std::cerr << "[ORT] OpenVINO EP unavailable — falling back to CPU."
                << std::endl;
+                ANS_DBG("OrtHandler", "OpenVINO EP FAILED — fallback to CPU");
+            }
            break;

            // --------------------------------------------------------
        case EngineType::CPU:
        default:
            std::cout << "[ORT] Using CPU EP." << std::endl;
+            ANS_DBG("OrtHandler", "Using CPU EP");
            epAttached = true;
            break;
        }

-        if (!epAttached)
+        if (!epAttached) {
            std::cout << "[ORT] Running on CPU EP (fallback)." << std::endl;
+            ANS_DBG("OrtHandler", "EP not attached — running on CPU fallback");
+        } else {
+            ANS_DBG("OrtHandler", "EP attached successfully");
+        }

        // ----------------------------------------------------------------
        // Create session
@@ -367,15 +429,19 @@ namespace ANSCENTER {
            std::cout << "[ORT] Session created OK (" << label << ")." << std::endl;
        };

+        ANS_DBG("OrtHandler", "Creating session for model: %ls", onnx_path);
        try {
            createSession(session_options, "primary EP");
+            ANS_DBG("OrtHandler", "Session created OK with primary EP");
        }
        catch (const Ort::Exception& e) {
+            ANS_DBG("OrtHandler", "Session FAILED with primary EP: %s", e.what());
            std::cerr << "[ORT] Session creation FAILED with primary EP: "
                      << e.what() << std::endl;

            // If we were using a GPU EP, fall back to CPU
            if (engine != EngineType::CPU && epAttached) {
+                ANS_DBG("OrtHandler", "Retrying with CPU fallback...");
                std::cerr << "[ORT] Retrying with CPU EP (fallback)..." << std::endl;

                // Build fresh session options — no GPU EP, no graph opt
@@ -404,6 +470,7 @@ namespace ANSCENTER {
            }
        }
        catch (const std::exception& e) {
+            ANS_DBG("OrtHandler", "Session FAILED (std::exception): %s", e.what());
            std::cerr << "[ORT] Session creation FAILED (std::exception): "
                      << e.what() << std::endl;
            throw;