Improve ANSCV
This commit is contained in:
@@ -426,27 +426,37 @@ extern "C" ANSODENGINE_API std::string CreateANSODHandle(ANSCENTER::ANSODBase**
|
||||
ANSCENTER::EngineType engineType = ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();
|
||||
if (autoDetectEngine==-1)engineType=ANSCENTER::EngineType::CPU;// We force to use CPU
|
||||
|
||||
//Force modelType to ANSONNXYOLO and ANSRTYOLO if detectionType is detection and modelType is TENSORRT or ONNX
|
||||
|
||||
if ((modelType == 4) || // TensorRT
|
||||
(modelType == 14)|| // TensorRT Yolov10
|
||||
(modelType == 22)|| // TensorRT Pose
|
||||
(modelType == 24)) // TensorRT Segmentation
|
||||
{
|
||||
if (engineType == ANSCENTER::EngineType::NVIDIA_GPU) modelType = 31; // RTYOLO
|
||||
else modelType=30;// ONNXYOLO
|
||||
}
|
||||
else if ((modelType == 3) || // YoloV8/YoloV11 (Object Detection)
|
||||
(modelType == 17)|| // YOLO V12
|
||||
(modelType == 20) || // ONNX Classification
|
||||
(modelType == 21) || // ONNX Pose
|
||||
(modelType == 23) || // ONNX Segmentation
|
||||
(modelType == 25)) // OBB Segmentation
|
||||
{
|
||||
modelType = 30; // ONNXYOLO
|
||||
}
|
||||
else {
|
||||
// do nothing, use the modelType specified by user
|
||||
// Route detection / pose / segmentation / OBB / classification to the best
|
||||
// available backend: prefer TensorRT on NVIDIA, otherwise the matching ONNX
|
||||
// handler. Unlisted modelType values are left untouched for the switch below.
|
||||
// See CreateANSODHandleEx for the full rationale — three correctness bugs
|
||||
// were fixed in that dispatcher and must be kept in sync across copies.
|
||||
const bool onNvidia = (engineType == ANSCENTER::EngineType::NVIDIA_GPU);
|
||||
switch (modelType) {
|
||||
// ── Detection family: YOLOv8 / V11 / V12 / generic TRT / V10-RTOD ──
|
||||
case 3: // YOLOV8 / YOLOV11
|
||||
case 4: // generic TensorRT
|
||||
case 14: // YOLOv10RTOD (TRT end-to-end NMS)
|
||||
case 17: // YOLOV12
|
||||
modelType = onNvidia ? 31 /* RTYOLO */ : 30 /* ONNXYOLO */;
|
||||
break;
|
||||
// ── Pose ─────────────────────────────────────────────────────────────
|
||||
case 21: // ONNXPOSE
|
||||
case 22: // RTPOSE
|
||||
modelType = onNvidia ? 22 /* RTPOSE */ : 21 /* ONNXPOSE */;
|
||||
break;
|
||||
// ── Segmentation ─────────────────────────────────────────────────────
|
||||
case 23: // ONNXSEG
|
||||
case 24: // RTSEG
|
||||
modelType = onNvidia ? 24 /* RTSEG */ : 23 /* ONNXSEG */;
|
||||
break;
|
||||
// ── OBB / Classification (ONNX-only today — leave as-is) ─────────────
|
||||
case 20: // ONNXCL
|
||||
case 25: // ONNXOBB
|
||||
break;
|
||||
default:
|
||||
// Any other modelType is handled directly by the switch below.
|
||||
break;
|
||||
}
|
||||
|
||||
switch (detectionType) {
|
||||
@@ -764,27 +774,53 @@ extern "C" ANSODENGINE_API int CreateANSODHandleEx(ANSCENTER::ANSODBase** Handl
|
||||
ANSCENTER::EngineType engineType = ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();
|
||||
if (autoDetectEngine==-1)engineType=ANSCENTER::EngineType::CPU;// We force to use CPU
|
||||
|
||||
//Force modelType to ANSONNXYOLO and ANSRTYOLO if detectionType is detection and modelType is TENSORRT or ONNX
|
||||
|
||||
if ((modelType == 4) || // TensorRT
|
||||
(modelType == 14)|| // TensorRT Yolov10
|
||||
(modelType == 22)|| // TensorRT Pose
|
||||
(modelType == 24)) // TensorRT Segmentation
|
||||
{
|
||||
if (engineType == ANSCENTER::EngineType::NVIDIA_GPU) modelType = 31; // RTYOLO
|
||||
else modelType=30;// ONNXYOLO
|
||||
}
|
||||
else if ((modelType == 3) || // YoloV8/YoloV11 (Object Detection)
|
||||
(modelType == 17)|| // YOLO V12
|
||||
(modelType == 20) || // ONNX Classification
|
||||
(modelType == 21) || // ONNX Pose
|
||||
(modelType == 23) || // ONNX Segmentation
|
||||
(modelType == 25)) // OBB Segmentation
|
||||
{
|
||||
modelType = 30; // ONNXYOLO
|
||||
}
|
||||
else {
|
||||
// do nothing, use the modelType specified by user
|
||||
// Route detection / pose / segmentation / OBB / classification to the best
|
||||
// available backend: prefer TensorRT on NVIDIA, otherwise the matching ONNX
|
||||
// handler. Unlisted modelType values are left untouched for the switch below.
|
||||
//
|
||||
// Previous revisions of this block had two correctness bugs:
|
||||
// (1) modelType == 3 / 17 (YoloV8/V11/V12 detection) was hard-wired to
|
||||
// ONNXYOLO even on NVIDIA — bypassing the TensorRT path entirely and
|
||||
// duplicating VRAM when multiple handles loaded the same .onnx (ORT
|
||||
// has no EnginePoolManager dedupe).
|
||||
// (2) modelType == 20 / 21 / 23 / 25 (ONNX CLS / POSE / SEG / OBB) was
|
||||
// rewritten to 30 (ONNXYOLO = detection), making the dedicated
|
||||
// case 20 / 21 / 23 / 25 handlers unreachable dead code. A user
|
||||
// passing modelType=20 for classification ended up with a YOLO head.
|
||||
// (3) modelType == 22 / 24 (TRT pose / TRT seg) on a non-NVIDIA box fell
|
||||
// back to ONNXYOLO instead of the correct ONNXPOSE / ONNXSEG handler.
|
||||
const bool onNvidia = (engineType == ANSCENTER::EngineType::NVIDIA_GPU);
|
||||
switch (modelType) {
|
||||
// ── Detection family: YOLOv8 / V11 / V12 / generic TRT / V10-RTOD ──
|
||||
case 3: // YOLOV8 / YOLOV11
|
||||
case 4: // generic TensorRT
|
||||
case 14: // YOLOv10RTOD (TRT end-to-end NMS)
|
||||
case 17: // YOLOV12
|
||||
modelType = onNvidia ? 31 /* RTYOLO */ : 30 /* ONNXYOLO */;
|
||||
break;
|
||||
// ── Pose ─────────────────────────────────────────────────────────────
|
||||
case 21: // ONNXPOSE
|
||||
case 22: // RTPOSE
|
||||
modelType = onNvidia ? 22 /* RTPOSE */ : 21 /* ONNXPOSE */;
|
||||
break;
|
||||
// ── Segmentation ─────────────────────────────────────────────────────
|
||||
case 23: // ONNXSEG
|
||||
case 24: // RTSEG
|
||||
modelType = onNvidia ? 24 /* RTSEG */ : 23 /* ONNXSEG */;
|
||||
break;
|
||||
// ── Oriented Bounding Box (ONNX-only today) ──────────────────────────
|
||||
case 25: // ONNXOBB — no TRT variant; leave as-is
|
||||
break;
|
||||
// ── Classification (ONNX-only in this dispatcher) ────────────────────
|
||||
case 20: // ONNXCL — no TRT variant; leave as-is
|
||||
break;
|
||||
default:
|
||||
// Any other modelType is handled directly by the switch below
|
||||
// (TENSORFLOW, YOLOV4, YOLOV5, FACEDETECT, FACERECOGNIZE, ALPR,
|
||||
// OCR, ANOMALIB, POSE, SAM, ODHUBMODEL, CUSTOMDETECTOR, CUSTOMPY,
|
||||
// MOTIONDETECTOR, MOVIENET, ONNXSAM3, RTSAM3, ONNXYOLO=30,
|
||||
// RTYOLO=31). Do nothing — keep user's value.
|
||||
break;
|
||||
}
|
||||
// returnModelType will be set after the switch to reflect the actual
|
||||
// model class that was instantiated (e.g. RTYOLO→ONNXYOLO on AMD).
|
||||
@@ -1151,26 +1187,39 @@ extern "C" __declspec(dllexport) int LoadModelFromFolder(ANSCENTER::ANSODBase**
|
||||
if (autoDetectEngine==-1)engineType=ANSCENTER::EngineType::CPU;// We force to use CPU
|
||||
|
||||
|
||||
//Force modelType to ANSONNXYOLO and ANSRTYOLO if detectionType is detection and modelType is TENSORRT or ONNX
|
||||
if ((modelType == 4) || // TensorRT
|
||||
(modelType == 14) || // TensorRT Yolov10
|
||||
(modelType == 22) || // TensorRT Pose
|
||||
(modelType == 24)) // TensorRT Segmentation
|
||||
// Route detection / pose / segmentation / OBB / classification to the best
|
||||
// available backend: prefer TensorRT on NVIDIA, otherwise the matching ONNX
|
||||
// handler. Unlisted modelType values are left untouched for the switch below.
|
||||
// See CreateANSODHandleEx for the full rationale — three correctness bugs
|
||||
// were fixed in that dispatcher and must be kept in sync across copies.
|
||||
{
|
||||
if (engineType == ANSCENTER::EngineType::NVIDIA_GPU)modelType = 31; // RTYOLO
|
||||
else modelType = 30;// ONNXYOLO
|
||||
}
|
||||
else if ((modelType == 3) || // YoloV8/YoloV11 (Object Detection)
|
||||
(modelType == 17) || // YOLO V12
|
||||
(modelType == 20) || // ONNX Classification
|
||||
(modelType == 21) || // ONNX Pose
|
||||
(modelType == 23) || // ONNX Segmentation
|
||||
(modelType == 25)) // OBB Segmentation
|
||||
{
|
||||
modelType = 30; // ONNXYOLO
|
||||
}
|
||||
else {
|
||||
// do nothing, use the modelType specified by user
|
||||
const bool onNvidia = (engineType == ANSCENTER::EngineType::NVIDIA_GPU);
|
||||
switch (modelType) {
|
||||
// ── Detection family: YOLOv8 / V11 / V12 / generic TRT / V10-RTOD ──
|
||||
case 3: // YOLOV8 / YOLOV11
|
||||
case 4: // generic TensorRT
|
||||
case 14: // YOLOv10RTOD (TRT end-to-end NMS)
|
||||
case 17: // YOLOV12
|
||||
modelType = onNvidia ? 31 /* RTYOLO */ : 30 /* ONNXYOLO */;
|
||||
break;
|
||||
// ── Pose ─────────────────────────────────────────────────────────
|
||||
case 21: // ONNXPOSE
|
||||
case 22: // RTPOSE
|
||||
modelType = onNvidia ? 22 /* RTPOSE */ : 21 /* ONNXPOSE */;
|
||||
break;
|
||||
// ── Segmentation ─────────────────────────────────────────────────
|
||||
case 23: // ONNXSEG
|
||||
case 24: // RTSEG
|
||||
modelType = onNvidia ? 24 /* RTSEG */ : 23 /* ONNXSEG */;
|
||||
break;
|
||||
// ── OBB / Classification (ONNX-only today — leave as-is) ─────────
|
||||
case 20: // ONNXCL
|
||||
case 25: // ONNXOBB
|
||||
break;
|
||||
default:
|
||||
// Any other modelType is handled directly by the switch below.
|
||||
break;
|
||||
}
|
||||
}
|
||||
// NOTE: We intentionally do NOT destroy any existing *Handle here.
|
||||
// LabVIEW reuses DLL parameter buffer addresses, so *Handle may point
|
||||
@@ -1461,26 +1510,39 @@ ANSODENGINE_API int OptimizeModelStr(const char* modelFilePath, const char* mode
|
||||
ANSCENTER::EngineType engineType = ANSCENTER::ANSLicenseHelper::CheckHardwareInformation();
|
||||
|
||||
|
||||
//Force modelType to ANSONNXYOLO and ANSRTYOLO if detectionType is detection and modelType is TENSORRT or ONNX
|
||||
if ((modelType == 4) || // TensorRT
|
||||
(modelType == 14) || // TensorRT Yolov10
|
||||
(modelType == 22) || // TensorRT Pose
|
||||
(modelType == 24)) // TensorRT Segmentation
|
||||
// Route detection / pose / segmentation / OBB / classification to the best
|
||||
// available backend: prefer TensorRT on NVIDIA, otherwise the matching ONNX
|
||||
// handler. Unlisted modelType values are left untouched for the switch below.
|
||||
// See CreateANSODHandleEx for the full rationale — three correctness bugs
|
||||
// were fixed in that dispatcher and must be kept in sync across copies.
|
||||
{
|
||||
if (engineType == ANSCENTER::EngineType::NVIDIA_GPU)modelType = 31; // RTYOLO
|
||||
else modelType = 30;// ONNXYOLO
|
||||
}
|
||||
else if ((modelType == 3) || // YoloV8/YoloV11 (Object Detection)
|
||||
(modelType == 17) || // YOLO V12
|
||||
(modelType == 20) || // ONNX Classification
|
||||
(modelType == 21) || // ONNX Pose
|
||||
(modelType == 23) || // ONNX Segmentation
|
||||
(modelType == 25)) // OBB Segmentation
|
||||
{
|
||||
modelType = 30; // ONNXYOLO
|
||||
}
|
||||
else {
|
||||
// do nothing, use the modelType specified by user
|
||||
const bool onNvidia = (engineType == ANSCENTER::EngineType::NVIDIA_GPU);
|
||||
switch (modelType) {
|
||||
// ── Detection family: YOLOv8 / V11 / V12 / generic TRT / V10-RTOD ──
|
||||
case 3: // YOLOV8 / YOLOV11
|
||||
case 4: // generic TensorRT
|
||||
case 14: // YOLOv10RTOD (TRT end-to-end NMS)
|
||||
case 17: // YOLOV12
|
||||
modelType = onNvidia ? 31 /* RTYOLO */ : 30 /* ONNXYOLO */;
|
||||
break;
|
||||
// ── Pose ─────────────────────────────────────────────────────────
|
||||
case 21: // ONNXPOSE
|
||||
case 22: // RTPOSE
|
||||
modelType = onNvidia ? 22 /* RTPOSE */ : 21 /* ONNXPOSE */;
|
||||
break;
|
||||
// ── Segmentation ─────────────────────────────────────────────────
|
||||
case 23: // ONNXSEG
|
||||
case 24: // RTSEG
|
||||
modelType = onNvidia ? 24 /* RTSEG */ : 23 /* ONNXSEG */;
|
||||
break;
|
||||
// ── OBB / Classification (ONNX-only today — leave as-is) ─────────
|
||||
case 20: // ONNXCL
|
||||
case 25: // ONNXOBB
|
||||
break;
|
||||
default:
|
||||
// Any other modelType is handled directly by the switch below.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -720,8 +720,24 @@ void Engine<T>::lockGpuClocks(int deviceIndex, int requestedMHz) {
|
||||
if (rc == nvml_types::SUCCESS) {
|
||||
m_clocksLocked = true;
|
||||
m_nvmlDeviceIdx = static_cast<unsigned int>(deviceIndex);
|
||||
// Always emit to DebugView so operators can confirm the lock took
|
||||
// effect without needing to read engine-level verbose output.
|
||||
ANS_DBG("TRT_Clock",
|
||||
"GPU clocks LOCKED at %u MHz (device %d) — P-state will stay high, "
|
||||
"no WDDM down-clock between inferences",
|
||||
targetMHz, deviceIndex);
|
||||
if (m_verbose) std::cout << "Info: GPU clocks locked at " << targetMHz << " MHz (device " << deviceIndex << ")" << std::endl;
|
||||
} else {
|
||||
// Surface the failure reason + remediation in DebugView. Most common
|
||||
// failure is access-denied (requires Administrator) or the driver
|
||||
// refusing the requested frequency. Users see this in the log and
|
||||
// know to elevate, set NVCP 'Prefer maximum performance', or run
|
||||
// `nvidia-smi -lgc <MHz>,<MHz>` before launching.
|
||||
ANS_DBG("TRT_Clock",
|
||||
"GPU clock lock FAILED (nvml rc=%s) — expect 2-3x inference latency from "
|
||||
"WDDM down-clocking. Fix: run as Admin, OR set NVCP 'Prefer maximum "
|
||||
"performance' for this app, OR: nvidia-smi -lgc %u,%u",
|
||||
errName(rc), targetMHz, targetMHz);
|
||||
if (m_verbose) {
|
||||
std::cout << "Warning: nvmlDeviceSetGpuLockedClocks failed: " << errName(rc) << std::endl;
|
||||
std::cout << " (Run as Administrator, or use: nvidia-smi -lgc " << targetMHz << "," << targetMHz << ")" << std::endl;
|
||||
|
||||
Reference in New Issue
Block a user