Add CPU/GPU gate and support new ANSALPR using OCR
This commit is contained in:
57
modules/ANSODEngine/ANSODVendorGate.h
Normal file
57
modules/ANSODEngine/ANSODVendorGate.h
Normal file
@@ -0,0 +1,57 @@
|
||||
#pragma once
|
||||
// ANSODVendorGate.h — Cached NVIDIA hardware check for ANSODEngine.dll.
|
||||
//
|
||||
// ANSODEngine.dll links against CUDA::cudart_static + CUDA::cublas +
|
||||
// CUDA::cublasLt and hosts the TensorRT inference classes (ANSRTYOLO,
|
||||
// TENSORRTOD, TENSORRTCL, TENSORRTSEG, TENSORRTPOSE, ANSSAM3, ANSYOLOV10RTOD,
|
||||
// ANSYOLOV12RTOD, ANSTENSORRTPOSE) plus the NV12 preprocess helper and the
|
||||
// TRT engine pool.
|
||||
//
|
||||
// The dllmain factory already hard-gates TRT class instantiation on
|
||||
// NVIDIA_GPU and falls back to ONNX Runtime / OpenVINO on AMD/Intel/CPU.
|
||||
// However, several support paths still call into the CUDA runtime
|
||||
// unconditionally:
|
||||
// • GetNumGPUs() / GetPoolMaxSlotsPerGpu() / CheckGPUVRAM() helpers
|
||||
// (called from inside NVIDIA_GPU guards today, but safer to gate at
|
||||
// source so a future refactor cannot accidentally wake up cudart on
|
||||
// AMD/Intel).
|
||||
// • A few case labels in the model-type switch instantiate TRT classes
|
||||
// without an explicit NVIDIA_GPU check — they are currently unreachable
|
||||
// due to upstream modelType rewriting, but leaving them unguarded
|
||||
// creates a maintenance trap.
|
||||
//
|
||||
// Solution: a single process-wide cached predicate that evaluates
|
||||
// CheckHardwareInformation() exactly once. On AMD/Intel/CPU the predicate
|
||||
// returns false and every gated site short-circuits before touching any
|
||||
// CUDA API.
|
||||
//
|
||||
// Mirrors ANSCVVendorGate / ANSLPR_OD::isNvidiaEngine / ANSOCR factory gate
|
||||
// / ANSFR CreateANSRFHandle vendor log. Keeps the four shipped DLLs on a
|
||||
// single, auditable pattern.
|
||||
|
||||
#include "ANSLicense.h"
|
||||
#include <atomic>
|
||||
|
||||
namespace ansod_vendor_gate {
|
||||
|
||||
// Lazily evaluates ANSLicenseHelper::CheckHardwareInformation() once and
|
||||
// caches the result. Thread-safe via std::atomic<int> (0 = unknown,
|
||||
// 1 = NVIDIA, 2 = non-NVIDIA). No std::call_once overhead on the hot
|
||||
// inference path. Fails safe to non-NVIDIA on exception.
|
||||
[[nodiscard]] inline bool IsNvidiaGpuAvailable() noexcept {
|
||||
static std::atomic<int> s_state{0};
|
||||
int cached = s_state.load(std::memory_order_acquire);
|
||||
if (cached != 0) return cached == 1;
|
||||
try {
|
||||
const ANSCENTER::EngineType detected =
|
||||
ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();
|
||||
const bool isNvidia = (detected == ANSCENTER::EngineType::NVIDIA_GPU);
|
||||
s_state.store(isNvidia ? 1 : 2, std::memory_order_release);
|
||||
return isNvidia;
|
||||
} catch (...) {
|
||||
s_state.store(2, std::memory_order_release);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ansod_vendor_gate
|
||||
@@ -237,6 +237,38 @@ namespace ANSCENTER {
|
||||
output_node_names.data(),
|
||||
num_outputs);
|
||||
|
||||
// ── Output shape sanity check ───────────────────────────────────
|
||||
// DirectML on some AMD configurations has been observed to return
|
||||
// output tensors whose dim[1]/dim[2] values don't match what the
|
||||
// ONNX graph actually produced, which propagates into
|
||||
// postprocessLegacy / postprocessEndToEnd as huge numBoxes /
|
||||
// numChannels values and causes multi-terabyte cv::Mat allocations
|
||||
// inside the `cv::Mat(numChannels, numBoxes, CV_32F, ...).t()`
|
||||
// call (observed as "Failed to allocate 3522082959360 bytes" on
|
||||
// Ryzen APUs). Bail out early here instead of letting the
|
||||
// postprocess layer try to materialise a 3.5 TB buffer.
|
||||
//
|
||||
// Sane upper bounds for Ultralytics YOLO outputs:
|
||||
// • legacy [1, 84..300, 8400..25200] → max dim ≈ 30k
|
||||
// • end2end [1, 300, 6..56] → max dim ≈ 300
|
||||
// • segmentation proto mask [1, 32, 160, 160] → max dim ≈ 160
|
||||
// • classification [1, 1000] → max dim ≈ 1k
|
||||
// 1,000,000 is ~30x the largest real-world dim and catches the
|
||||
// garbage values without clipping any legitimate model.
|
||||
constexpr int64_t kMaxOutputDim = 1000000;
|
||||
for (size_t t = 0; t < outputTensors.size(); ++t) {
|
||||
const auto shape = outputTensors[t].GetTensorTypeAndShapeInfo().GetShape();
|
||||
for (size_t d = 0; d < shape.size(); ++d) {
|
||||
if (shape[d] < 0 || shape[d] > kMaxOutputDim) {
|
||||
std::cerr << "[ONNXYOLO] detect: output[" << t
|
||||
<< "] dim[" << d << "]=" << shape[d]
|
||||
<< " is out of range — refusing to postprocess."
|
||||
<< std::endl;
|
||||
return {};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const cv::Size resizedShape(
|
||||
static_cast<int>(input_node_dims[3]),
|
||||
static_cast<int>(input_node_dims[2]));
|
||||
@@ -1399,6 +1431,23 @@ namespace ANSCENTER {
|
||||
output_node_names.data(),
|
||||
num_outputs);
|
||||
|
||||
// Output shape sanity check — see detect() for rationale. Prevents
|
||||
// DirectML-returned garbage dims from propagating into postprocess
|
||||
// and triggering multi-terabyte cv::Mat allocations on AMD.
|
||||
constexpr int64_t kMaxOutputDim = 1000000;
|
||||
for (size_t t = 0; t < outputTensors.size(); ++t) {
|
||||
const auto sh = outputTensors[t].GetTensorTypeAndShapeInfo().GetShape();
|
||||
for (size_t d = 0; d < sh.size(); ++d) {
|
||||
if (sh[d] < 0 || sh[d] > kMaxOutputDim) {
|
||||
std::cerr << "[ONNXYOLO] detectBatch: output[" << t
|
||||
<< "] dim[" << d << "]=" << sh[d]
|
||||
<< " is out of range — refusing to postprocess."
|
||||
<< std::endl;
|
||||
return std::vector<std::vector<Object>>(N);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const cv::Size resizedShape(
|
||||
static_cast<int>(input_node_dims[3]),
|
||||
static_cast<int>(input_node_dims[2]));
|
||||
@@ -1589,59 +1638,92 @@ namespace ANSCENTER {
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// WarmUpEngine — run 2 dummy inferences after session creation
|
||||
// WarmUpEngine — run a dummy inference after session creation.
|
||||
//
|
||||
// On AMD RDNA2 iGPUs (e.g. Radeon 680M on Ryzen 6000-series APUs), the
|
||||
// very first detect() call triggers DirectML shader compile + GPU kernel
|
||||
// cache population for the entire YOLO graph. That first pass can
|
||||
// legitimately take several seconds of sustained GPU work, which is long
|
||||
// enough to coincide with TDR watchdog firing and has triggered
|
||||
// amdkmdag.sys bugchecks at +0xf03d under DirectML 1.15.4 (the latest).
|
||||
// Scope: **NVIDIA (CUDA EP) only.** On first inference, the CUDA EP
|
||||
// allocates its memory arena (capped at 2 GB via BasicOrtHandler config),
|
||||
// resolves cuDNN convolution algorithms, and populates the kernel launch
|
||||
// cache. Running one dummy inference at load time amortises this cost
|
||||
// so the first real frame doesn't see a latency spike.
|
||||
//
|
||||
// Running 2 dummy inferences at startup burns the compile cost under
|
||||
// controlled conditions so that the first real frame is already fast.
|
||||
// The second call should always be quick and confirms the cache is warm.
|
||||
// Explicitly disabled on AMD, Intel and CPU:
|
||||
// • AMD (DirectML) — calling detect() at load time has been observed
|
||||
// to hit a multi-terabyte cv::Mat allocation inside postprocessLegacy
|
||||
// on AMD RDNA iGPUs when DirectML returns garbage output tensor
|
||||
// dims. ONNXYOLO::detect() now has an output-shape sanity guard
|
||||
// that catches this at runtime, so the warm-up would add risk
|
||||
// without benefit. Earlier builds enabled warm-up specifically for
|
||||
// Radeon 680M TDR mitigation; that workaround is obsolete with
|
||||
// current DirectML 1.15.x drivers.
|
||||
// • Intel (OpenVINO) — running detect() at load time has been
|
||||
// observed to expose latent heap-corruption bugs
|
||||
// (ntdll +0x1176e5 / STATUS_HEAP_CORRUPTION 0xc0000374).
|
||||
// • CPU EP — no shader compile or kernel cache to warm up; the first
|
||||
// real frame has the same latency as any subsequent frame.
|
||||
//
|
||||
// Non-fatal on failure: if warm-up itself crashes, regular inference may
|
||||
// still succeed, or will fail with a clearer error message.
|
||||
// Non-fatal on failure: if warm-up itself throws, regular inference
|
||||
// still works — the engine is fully loaded before WarmUpEngine runs.
|
||||
// ========================================================================
|
||||
void ANSONNXYOLO::WarmUpEngine() {
|
||||
if (!m_ortEngine) return;
|
||||
|
||||
// Warm-up exists solely to pre-compile DirectML shaders on AMD RDNA2
|
||||
// iGPUs (Radeon 680M). It has no benefit on CPU / OpenVINO / CUDA
|
||||
// and running detect() at load time has been observed to expose
|
||||
// latent heap-corruption bugs (ntdll +0x1176e5 / STATUS_HEAP_CORRUPTION
|
||||
// 0xc0000374) on Intel machines. Gate strictly on AMD_GPU.
|
||||
if (m_ortEngine->getEngineType() != EngineType::AMD_GPU) {
|
||||
ANS_DBG("ONNXYOLO", "Warm-up skipped (non-AMD EP)");
|
||||
// Gate strictly on NVIDIA_GPU. Every other EP is a no-op.
|
||||
if (m_ortEngine->getEngineType() != EngineType::NVIDIA_GPU) {
|
||||
ANS_DBG("ONNXYOLO", "Warm-up skipped (non-NVIDIA EP)");
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const int w = _modelConfig.inpWidth > 0 ? _modelConfig.inpWidth : 640;
|
||||
const int h = _modelConfig.inpHeight > 0 ? _modelConfig.inpHeight : 640;
|
||||
// ── Strict dimension validation ─────────────────────────────────
|
||||
// Defensive: refuse to warm up with implausible model dimensions.
|
||||
// _modelConfig values come from the caller's ModelConfig and are
|
||||
// normally 224..640; anything outside [32, 4096] is almost certainly
|
||||
// a bug in the caller and we skip warm-up rather than risk a huge
|
||||
// cv::Mat allocation inside detect().
|
||||
constexpr int kMinDim = 32;
|
||||
constexpr int kMaxDim = 4096;
|
||||
const int rawW = _modelConfig.inpWidth;
|
||||
const int rawH = _modelConfig.inpHeight;
|
||||
if (rawW <= 0 || rawH <= 0 || rawW > kMaxDim || rawH > kMaxDim) {
|
||||
_logger.LogWarn("ANSONNXYOLO::WarmUpEngine",
|
||||
"Warm-up skipped — suspect input dims ("
|
||||
+ std::to_string(rawW) + "x" + std::to_string(rawH) + ")",
|
||||
__FILE__, __LINE__);
|
||||
return;
|
||||
}
|
||||
const int w = std::clamp(rawW, kMinDim, kMaxDim);
|
||||
const int h = std::clamp(rawH, kMinDim, kMaxDim);
|
||||
|
||||
try {
|
||||
// Mid-gray BGR image matches the letterbox fill colour used in
|
||||
// preprocessing (114,114,114 ~ 128) and avoids degenerate inputs.
|
||||
cv::Mat dummy(h, w, CV_8UC3, cv::Scalar(128, 128, 128));
|
||||
|
||||
ANS_DBG("ONNXYOLO", "Warm-up: running 2 dummy inferences (%dx%d)", w, h);
|
||||
ANS_DBG("ONNXYOLO", "Warm-up: running 1 dummy CUDA inference (%dx%d)", w, h);
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
auto t0 = std::chrono::steady_clock::now();
|
||||
(void)m_ortEngine->detect(dummy, _classes,
|
||||
PROBABILITY_THRESHOLD,
|
||||
NMS_THRESHOLD,
|
||||
NUM_KPS);
|
||||
auto t1 = std::chrono::steady_clock::now();
|
||||
auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(t1 - t0).count();
|
||||
ANS_DBG("ONNXYOLO", "Warm-up #%d: %lld ms", i, (long long)ms);
|
||||
}
|
||||
auto t0 = std::chrono::steady_clock::now();
|
||||
(void)m_ortEngine->detect(dummy, _classes,
|
||||
PROBABILITY_THRESHOLD,
|
||||
NMS_THRESHOLD,
|
||||
NUM_KPS);
|
||||
auto t1 = std::chrono::steady_clock::now();
|
||||
auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(t1 - t0).count();
|
||||
ANS_DBG("ONNXYOLO", "Warm-up done: %lld ms", (long long)ms);
|
||||
}
|
||||
catch (const cv::Exception& e) {
|
||||
// Defensive — should not fire on NVIDIA CUDA EP, but if it does
|
||||
// the engine itself is still loaded and real inference will work.
|
||||
_logger.LogWarn("ANSONNXYOLO::WarmUpEngine",
|
||||
std::string("Warm-up skipped (cv::Exception, non-fatal): ") + e.what(),
|
||||
__FILE__, __LINE__);
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
_logger.LogError("ANSONNXYOLO::WarmUpEngine",
|
||||
std::string("Warm-up failed (non-fatal): ") + e.what(),
|
||||
_logger.LogWarn("ANSONNXYOLO::WarmUpEngine",
|
||||
std::string("Warm-up skipped (std::exception, non-fatal): ") + e.what(),
|
||||
__FILE__, __LINE__);
|
||||
}
|
||||
catch (...) {
|
||||
_logger.LogWarn("ANSONNXYOLO::WarmUpEngine",
|
||||
"Warm-up skipped (unknown exception, non-fatal)",
|
||||
__FILE__, __LINE__);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include "engine/EnginePoolManager.h" // clearAll() on DLL_PROCESS_DETACH
|
||||
#include <climits> // INT_MIN
|
||||
#include "ANSLicense.h" // ANS_DBG macro for DebugView
|
||||
#include "ANSODVendorGate.h" // ansod_vendor_gate::IsNvidiaGpuAvailable()
|
||||
|
||||
// Process-wide flag: when true, all engines force single-GPU path (no pool, no idle timers).
|
||||
// Defined here, declared extern in EngineBuildLoadNetwork.inl.
|
||||
@@ -88,6 +89,17 @@ static std::mutex g_gpuCountMutex;
|
||||
static int GetNumGPUs() {
|
||||
std::lock_guard<std::mutex> lk(g_gpuCountMutex);
|
||||
if (g_numGPUs < 0) {
|
||||
// Defense-in-depth: all callers (AssignNextGPU, GetPoolMaxSlotsPerGpu,
|
||||
// CheckGPUVRAM) are invoked inside factory-level NVIDIA_GPU guards,
|
||||
// but skip the CUDA runtime entirely on AMD/Intel/CPU hardware so a
|
||||
// future refactor cannot accidentally wake up cudart on non-NVIDIA.
|
||||
// See ANSODVendorGate.h.
|
||||
if (!ansod_vendor_gate::IsNvidiaGpuAvailable()) {
|
||||
g_numGPUs = 1; // report a single "virtual" slot so round-robin is a no-op
|
||||
std::cout << "Info [GPU]: non-NVIDIA hardware — CUDA probe skipped, pool slots=1"
|
||||
<< std::endl;
|
||||
return g_numGPUs;
|
||||
}
|
||||
// Use yield mode before any CUDA call to avoid busy-wait spinning
|
||||
// that falsely reports 100% GPU utilization in nvidia-smi.
|
||||
cudaSetDeviceFlags(cudaDeviceScheduleYield);
|
||||
@@ -108,6 +120,13 @@ static int GetPoolMaxSlotsPerGpu() {
|
||||
static std::mutex s_mutex;
|
||||
std::lock_guard<std::mutex> lk(s_mutex);
|
||||
if (s_result != INT_MIN) return s_result;
|
||||
// Short-circuit on non-NVIDIA: no TRT engines will be built, no pool to
|
||||
// size, and cudaSetDevice/cudaMemGetInfo below should not be reached.
|
||||
// Safety net — callers today are already inside NVIDIA_GPU guards.
|
||||
if (!ansod_vendor_gate::IsNvidiaGpuAvailable()) {
|
||||
s_result = 1;
|
||||
return s_result;
|
||||
}
|
||||
const int n = GetNumGPUs();
|
||||
if (n <= 1) {
|
||||
s_result = 1;
|
||||
@@ -132,6 +151,9 @@ static int GetPoolMaxSlotsPerGpu() {
|
||||
// Returns the next GPU index in round-robin order.
|
||||
// Thread-safe: uses atomic fetch_add.
|
||||
static int AssignNextGPU() {
|
||||
// Non-NVIDIA short-circuit: no CUDA devices, return 0 and skip the
|
||||
// "assigning task" log to avoid polluting AMD/Intel/CPU logs.
|
||||
if (!ansod_vendor_gate::IsNvidiaGpuAvailable()) return 0;
|
||||
const int numGPUs = GetNumGPUs();
|
||||
const int idx = g_gpuRoundRobinCounter.fetch_add(1);
|
||||
const int gpuIndex = idx % numGPUs;
|
||||
@@ -144,6 +166,11 @@ static int AssignNextGPU() {
|
||||
// Returns true if sufficient, false if not.
|
||||
// minFreeBytes: minimum free VRAM required (default 512 MiB safety margin).
|
||||
static bool CheckGPUVRAM(int gpuIndex, size_t minFreeBytes = 512ULL * 1024 * 1024) {
|
||||
// Non-NVIDIA short-circuit: no CUDA devices present — report "OK"
|
||||
// silently so the TRT pool path is a no-op on AMD/Intel/CPU and the
|
||||
// log isn't polluted with spurious 0-byte VRAM warnings.
|
||||
if (!ansod_vendor_gate::IsNvidiaGpuAvailable()) return true;
|
||||
|
||||
int prevDevice = 0;
|
||||
cudaGetDevice(&prevDevice);
|
||||
cudaSetDevice(gpuIndex);
|
||||
@@ -253,6 +280,16 @@ BOOL APIENTRY DllMain( HMODULE hModule,
|
||||
// Pin the DLL so it is never unmapped while idle-timer or CUDA threads
|
||||
// are still running. During LabVIEW shutdown the CLR/COM teardown can
|
||||
// unload DLLs before all threads exit → crash at unmapped code.
|
||||
//
|
||||
// CRITICAL: do NOT call CheckHardwareInformation() or
|
||||
// ansod_vendor_gate::IsNvidiaGpuAvailable() from here. DllMain holds
|
||||
// the OS loader lock (LdrpLoaderLock). CheckHardwareInformation
|
||||
// touches hwinfo → DXGI / WMI / COM, which internally call
|
||||
// LoadLibrary; doing that while holding the loader lock causes a
|
||||
// classic loader-lock deadlock (observed as a full hang of the
|
||||
// ANSLPR-UnitTest stress test). The vendor gate will lazy-
|
||||
// initialise on the first real call from worker code, which runs
|
||||
// with the loader lock released.
|
||||
{
|
||||
HMODULE hSelf = nullptr;
|
||||
GetModuleHandleExW(
|
||||
@@ -511,8 +548,19 @@ extern "C" ANSODENGINE_API std::string CreateANSODHandle(ANSCENTER::ANSODBase**
|
||||
modelConfig.modelType = ANSCENTER::ModelType::ODHUBMODEL;
|
||||
break;
|
||||
case 14: //TensorRT for Object Detection Yolov10
|
||||
(*Handle) = new ANSCENTER::ANSYOLOV10RTOD();
|
||||
modelConfig.modelType = ANSCENTER::ModelType::YOLOV10RTOD;
|
||||
// Upstream modelType rewrite (see top of each factory) already
|
||||
// redirects 14 → 31 (RTYOLO) on NVIDIA or 14 → 30 (ONNXYOLO) on
|
||||
// non-NVIDIA, so this branch is unreachable in practice. Keep
|
||||
// an explicit vendor gate as defense-in-depth against future
|
||||
// refactors — ANSYOLOV10RTOD is a TensorRT class and must never
|
||||
// be constructed on AMD/Intel/CPU hardware.
|
||||
if (engineType == ANSCENTER::EngineType::NVIDIA_GPU) {
|
||||
(*Handle) = new ANSCENTER::ANSYOLOV10RTOD();
|
||||
modelConfig.modelType = ANSCENTER::ModelType::YOLOV10RTOD;
|
||||
} else {
|
||||
(*Handle) = new ANSCENTER::ANSONNXYOLO();
|
||||
modelConfig.modelType = ANSCENTER::ModelType::ONNXYOLO;
|
||||
}
|
||||
break;
|
||||
case 15: //OpenVino for Object Detection Yolov10
|
||||
(*Handle) = new ANSCENTER::ANSOYOLOV10OVOD();
|
||||
@@ -832,8 +880,19 @@ extern "C" ANSODENGINE_API int CreateANSODHandleEx(ANSCENTER::ANSODBase** Handl
|
||||
modelConfig.modelType = ANSCENTER::ModelType::ODHUBMODEL;
|
||||
break;
|
||||
case 14: //TensorRT for Object Detection Yolov10
|
||||
(*Handle) = new ANSCENTER::ANSYOLOV10RTOD();
|
||||
modelConfig.modelType = ANSCENTER::ModelType::YOLOV10RTOD;
|
||||
// Upstream modelType rewrite (see top of each factory) already
|
||||
// redirects 14 → 31 (RTYOLO) on NVIDIA or 14 → 30 (ONNXYOLO) on
|
||||
// non-NVIDIA, so this branch is unreachable in practice. Keep
|
||||
// an explicit vendor gate as defense-in-depth against future
|
||||
// refactors — ANSYOLOV10RTOD is a TensorRT class and must never
|
||||
// be constructed on AMD/Intel/CPU hardware.
|
||||
if (engineType == ANSCENTER::EngineType::NVIDIA_GPU) {
|
||||
(*Handle) = new ANSCENTER::ANSYOLOV10RTOD();
|
||||
modelConfig.modelType = ANSCENTER::ModelType::YOLOV10RTOD;
|
||||
} else {
|
||||
(*Handle) = new ANSCENTER::ANSONNXYOLO();
|
||||
modelConfig.modelType = ANSCENTER::ModelType::ONNXYOLO;
|
||||
}
|
||||
break;
|
||||
case 15: //OpenVino for Object Detection Yolov10
|
||||
(*Handle) = new ANSCENTER::ANSOYOLOV10OVOD();
|
||||
@@ -1193,8 +1252,19 @@ extern "C" __declspec(dllexport) int LoadModelFromFolder(ANSCENTER::ANSODBase**
|
||||
modelConfig.modelType = ANSCENTER::ModelType::ODHUBMODEL;
|
||||
break;
|
||||
case 14: //TensorRT for Object Detection Yolov10
|
||||
(*Handle) = new ANSCENTER::ANSYOLOV10RTOD();
|
||||
modelConfig.modelType = ANSCENTER::ModelType::YOLOV10RTOD;
|
||||
// Upstream modelType rewrite (see top of each factory) already
|
||||
// redirects 14 → 31 (RTYOLO) on NVIDIA or 14 → 30 (ONNXYOLO) on
|
||||
// non-NVIDIA, so this branch is unreachable in practice. Keep
|
||||
// an explicit vendor gate as defense-in-depth against future
|
||||
// refactors — ANSYOLOV10RTOD is a TensorRT class and must never
|
||||
// be constructed on AMD/Intel/CPU hardware.
|
||||
if (engineType == ANSCENTER::EngineType::NVIDIA_GPU) {
|
||||
(*Handle) = new ANSCENTER::ANSYOLOV10RTOD();
|
||||
modelConfig.modelType = ANSCENTER::ModelType::YOLOV10RTOD;
|
||||
} else {
|
||||
(*Handle) = new ANSCENTER::ANSONNXYOLO();
|
||||
modelConfig.modelType = ANSCENTER::ModelType::ONNXYOLO;
|
||||
}
|
||||
break;
|
||||
case 15: //OpenVino for Object Detection Yolov10
|
||||
(*Handle) = new ANSCENTER::ANSOYOLOV10OVOD();
|
||||
|
||||
Reference in New Issue
Block a user