Fix AMD and OpenVINO

This commit is contained in:
2026-04-08 13:45:52 +10:00
parent a4a8caaa86
commit 69787b0ff0
15 changed files with 1209 additions and 132 deletions

View File

@@ -335,7 +335,7 @@ namespace ANSCENTER {
// to distinguish OBB (angle values in [-pi, pi]) from detection
bool likelyOBB = false;
if (extra >= 2) {
const float* rawOutput = outputTensors[0].GetTensorData<float>();
const float* rawOutput = outputTensors[0].GetTensorMutableData<float>();
int numSamples = std::min(numBoxes, 100);
int angleCount = 0;
for (int s = 0; s < numSamples; ++s) {
@@ -371,13 +371,13 @@ namespace ANSCENTER {
std::vector<Object> ONNXYOLO::postprocessEndToEnd(
const cv::Size& originalImageSize,
const cv::Size& resizedImageShape,
const std::vector<Ort::Value>& outputTensors,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
float confThreshold)
{
if (outputTensors.empty()) return {};
const float* rawOutput = outputTensors[0].GetTensorData<float>();
const float* rawOutput = outputTensors[0].GetTensorMutableData<float>();
const auto outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
if (outputShape.size() < 3) return {};
@@ -427,13 +427,13 @@ namespace ANSCENTER {
std::vector<Object> ONNXYOLO::postprocessLegacy(
const cv::Size& originalImageSize,
const cv::Size& resizedImageShape,
const std::vector<Ort::Value>& outputTensors,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
float confThreshold, float iouThreshold, int maxDet)
{
if (outputTensors.empty()) return {};
const float* rawOutput = outputTensors[0].GetTensorData<float>();
const float* rawOutput = outputTensors[0].GetTensorMutableData<float>();
const auto outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
if (outputShape.size() < 3) return {};
@@ -656,12 +656,12 @@ namespace ANSCENTER {
std::vector<Object> ONNXYOLO::postprocessOBBEndToEnd(
const cv::Size& originalImageSize,
const cv::Size& resizedImageShape,
const std::vector<Ort::Value>& outputTensors,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
float confThreshold)
{
if (outputTensors.empty()) return {};
const float* raw = outputTensors[0].GetTensorData<float>();
const float* raw = outputTensors[0].GetTensorMutableData<float>();
const auto shape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
if (shape.size() < 3) return {};
@@ -721,12 +721,12 @@ namespace ANSCENTER {
std::vector<Object> ONNXYOLO::postprocessOBBLegacy(
const cv::Size& originalImageSize,
const cv::Size& resizedImageShape,
const std::vector<Ort::Value>& outputTensors,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
float confThreshold, float iouThreshold, int maxDet)
{
if (outputTensors.empty()) return {};
const float* rawOutput = outputTensors[0].GetTensorData<float>();
const float* rawOutput = outputTensors[0].GetTensorMutableData<float>();
const auto outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
if (outputShape.size() < 3) return {};
@@ -822,13 +822,13 @@ namespace ANSCENTER {
std::vector<Object> ONNXYOLO::postprocessSegEndToEnd(
const cv::Size& originalImageSize,
const cv::Size& resizedImageShape,
const std::vector<Ort::Value>& outputTensors,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
float confThreshold)
{
if (outputTensors.size() < 2) return {};
const float* raw = outputTensors[0].GetTensorData<float>();
const float* raw = outputTensors[0].GetTensorMutableData<float>();
const auto shape0 = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
const auto protoShape = outputTensors[1].GetTensorTypeAndShapeInfo().GetShape();
if (shape0.size() < 3 || protoShape.size() < 4) return {};
@@ -884,7 +884,7 @@ namespace ANSCENTER {
// Generate masks: coeffs @ protos → sigmoid → crop-in-proto → resize-to-box → threshold
if (!objs.empty() && !maskCoeffs.empty()) {
const float* protoData = outputTensors[1].GetTensorData<float>();
const float* protoData = outputTensors[1].GetTensorMutableData<float>();
cv::Mat protos(nm, protoH * protoW, CV_32F, const_cast<float*>(protoData));
cv::Mat matmulRes = (maskCoeffs * protos).t();
@@ -951,13 +951,13 @@ namespace ANSCENTER {
std::vector<Object> ONNXYOLO::postprocessSegLegacy(
const cv::Size& originalImageSize,
const cv::Size& resizedImageShape,
const std::vector<Ort::Value>& outputTensors,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
float confThreshold, float iouThreshold, int maxDet)
{
if (outputTensors.size() < 2) return {};
const float* rawOutput = outputTensors[0].GetTensorData<float>();
const float* rawOutput = outputTensors[0].GetTensorMutableData<float>();
const auto shape0 = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
const auto protoShape = outputTensors[1].GetTensorTypeAndShapeInfo().GetShape();
if (shape0.size() < 3 || protoShape.size() < 4) return {};
@@ -1035,7 +1035,7 @@ namespace ANSCENTER {
// Generate masks
if (!objs.empty() && !masks.empty()) {
const float* protoData = outputTensors[1].GetTensorData<float>();
const float* protoData = outputTensors[1].GetTensorMutableData<float>();
cv::Mat protos(nm, protoH * protoW, CV_32F, const_cast<float*>(protoData));
cv::Mat matmulRes = (masks * protos).t();
@@ -1106,12 +1106,12 @@ namespace ANSCENTER {
std::vector<Object> ONNXYOLO::postprocessPoseEndToEnd(
const cv::Size& originalImageSize,
const cv::Size& resizedImageShape,
const std::vector<Ort::Value>& outputTensors,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
float confThreshold, int numKPS)
{
if (outputTensors.empty()) return {};
const float* raw = outputTensors[0].GetTensorData<float>();
const float* raw = outputTensors[0].GetTensorMutableData<float>();
const auto shape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
if (shape.size() < 3) return {};
@@ -1172,12 +1172,12 @@ namespace ANSCENTER {
std::vector<Object> ONNXYOLO::postprocessPoseLegacy(
const cv::Size& originalImageSize,
const cv::Size& resizedImageShape,
const std::vector<Ort::Value>& outputTensors,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
float confThreshold, float iouThreshold, int numKPS, int maxDet)
{
if (outputTensors.empty()) return {};
const float* rawOutput = outputTensors[0].GetTensorData<float>();
const float* rawOutput = outputTensors[0].GetTensorMutableData<float>();
const auto outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
if (outputShape.size() < 3) return {};
@@ -1273,12 +1273,12 @@ namespace ANSCENTER {
// ====================================================================
std::vector<Object> ONNXYOLO::postprocessClassify(
const std::vector<Ort::Value>& outputTensors,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
const cv::Size& imageSize)
{
if (outputTensors.empty()) return {};
const float* raw = outputTensors[0].GetTensorData<float>();
const float* raw = outputTensors[0].GetTensorMutableData<float>();
const auto shape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
if (shape.size() < 2) return {};
@@ -1339,7 +1339,7 @@ namespace ANSCENTER {
// ====================================================================
/*static*/ Ort::Value ONNXYOLO::sliceBatchOutput(
const Ort::Value& batchTensor,
Ort::Value& batchTensor,
int64_t batchIndex,
const std::vector<int64_t>& fullShape,
Ort::MemoryInfo& memInfo)
@@ -1349,8 +1349,8 @@ namespace ANSCENTER {
for (size_t d = 1; d < fullShape.size(); ++d)
elemsPerImage *= fullShape[d];
const float* batchData = batchTensor.GetTensorData<float>();
float* imageData = const_cast<float*>(batchData + batchIndex * elemsPerImage);
float* batchData = batchTensor.GetTensorMutableData<float>();
float* imageData = batchData + batchIndex * elemsPerImage;
// Shape for single image: [1, D1, D2, ...]
std::vector<int64_t> singleShape = fullShape;
@@ -1504,7 +1504,7 @@ namespace ANSCENTER {
// Class count mismatch — probe last channel for OBB angles
bool likelyOBB = false;
if (extra >= 2) {
const float* rawOutput = perImageOutputs[0].GetTensorData<float>();
const float* rawOutput = perImageOutputs[0].GetTensorMutableData<float>();
int numSamp = std::min(numBoxes, 100);
int angleCount = 0;
for (int s = 0; s < numSamp; ++s) {
@@ -1571,6 +1571,22 @@ namespace ANSCENTER {
}
}
bool ANSONNXYOLO::InitOrtEngine(ANSCENTER::EngineType engineType) {
try {
if (!FileExist(_modelFilePath)) {
_logger.LogError("ANSONNXYOLO::InitOrtEngine",
"Model file does not exist: " + _modelFilePath, __FILE__, __LINE__);
return false;
}
m_ortEngine = std::make_unique<ONNXYOLO>(_modelFilePath, engineType);
return true;
}
catch (const std::exception& e) {
_logger.LogFatal("ANSONNXYOLO::InitOrtEngine", e.what(), __FILE__, __LINE__);
return false;
}
}
bool ANSONNXYOLO::Initialize(std::string licenseKey, ModelConfig modelConfig,
const std::string& modelZipFilePath,
const std::string& modelZipPassword,
@@ -1807,9 +1823,12 @@ namespace ANSCENTER {
const std::string& camera_id)
{
try {
ANS_DBG("ONNXYOLO", "DetectObjects: cam=%s acquiring mutex...", camera_id.c_str());
std::lock_guard<std::recursive_mutex> lock(_mutex);
ANS_DBG("ONNXYOLO", "DetectObjects: mutex acquired, cam=%s", camera_id.c_str());
if (!m_ortEngine) {
_logger.LogError("ANSONNXYOLO::DetectObjects", "ORT engine is null", __FILE__, __LINE__);
ANS_DBG("ONNXYOLO", "DetectObjects: ORT engine is null!");
return {};
}
@@ -1880,6 +1899,7 @@ namespace ANSCENTER {
return results;
}
catch (const std::exception& e) {
ANS_DBG("ONNXYOLO", "DetectObjects EXCEPTION: %s cam=%s", e.what(), camera_id.c_str());
_logger.LogFatal("ANSONNXYOLO::DetectObjects", e.what(), __FILE__, __LINE__);
return {};
}

View File

@@ -83,55 +83,55 @@ namespace ANSCENTER {
// ── Detection postprocess ───────────────────────────────────────
std::vector<Object> postprocessEndToEnd(
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
const std::vector<Ort::Value>& outputTensors,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames, float confThreshold);
std::vector<Object> postprocessLegacy(
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
const std::vector<Ort::Value>& outputTensors,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
float confThreshold, float iouThreshold, int maxDet = 300);
// ── OBB postprocess ─────────────────────────────────────────────
std::vector<Object> postprocessOBBEndToEnd(
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
const std::vector<Ort::Value>& outputTensors,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames, float confThreshold);
std::vector<Object> postprocessOBBLegacy(
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
const std::vector<Ort::Value>& outputTensors,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
float confThreshold, float iouThreshold, int maxDet = 300);
// ── Segmentation postprocess ────────────────────────────────────
std::vector<Object> postprocessSegEndToEnd(
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
const std::vector<Ort::Value>& outputTensors,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames, float confThreshold);
std::vector<Object> postprocessSegLegacy(
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
const std::vector<Ort::Value>& outputTensors,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
float confThreshold, float iouThreshold, int maxDet = 300);
// ── Pose postprocess ────────────────────────────────────────────
std::vector<Object> postprocessPoseEndToEnd(
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
const std::vector<Ort::Value>& outputTensors,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
float confThreshold, int numKPS);
std::vector<Object> postprocessPoseLegacy(
const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
const std::vector<Ort::Value>& outputTensors,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
float confThreshold, float iouThreshold, int numKPS, int maxDet = 300);
// ── Classification postprocess ──────────────────────────────────
std::vector<Object> postprocessClassify(
const std::vector<Ort::Value>& outputTensors,
std::vector<Ort::Value>& outputTensors,
const std::vector<std::string>& classNames,
const cv::Size& imageSize);
@@ -154,7 +154,7 @@ namespace ANSCENTER {
// ── Batch output slicing helper ────────────────────────────────
static Ort::Value sliceBatchOutput(
const Ort::Value& batchTensor,
Ort::Value& batchTensor,
int64_t batchIndex,
const std::vector<int64_t>& fullShape,
Ort::MemoryInfo& memInfo);
@@ -224,6 +224,9 @@ namespace ANSCENTER {
// Initialise ORT engine from the resolved model path
bool InitOrtEngine();
public:
// Initialise ORT engine with explicit engine type override (e.g. CPU fallback for AMD iGPUs)
bool InitOrtEngine(ANSCENTER::EngineType engineType);
};
}
#endif

View File

@@ -218,6 +218,12 @@ namespace ANSCENTER
std::min(6, static_cast<int>(std::thread::hardware_concurrency())));
sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
// DirectML REQUIRES these two settings per ORT documentation
if (ep.type == ANSCENTER::EngineType::AMD_GPU) {
sessionOptions.DisableMemPattern();
sessionOptions.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
}
// ── Log available providers ─────────────────────────────────────────
std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
std::cout << "Available Execution Providers:" << std::endl;
@@ -519,7 +525,7 @@ namespace ANSCENTER
{
try {
// Get raw output pointer (NO COPY!)
const float* rawOutput = outputTensors[0].GetTensorData<float>();
const float* rawOutput = outputTensors[0].GetTensorMutableData<float>();
std::vector<int64_t> outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
const int numClasses = static_cast<int>(outputShape[2]) - 5;
@@ -647,11 +653,11 @@ namespace ANSCENTER
}
return result;
}
std::vector<Object> YOLOOD::postprocessv11(const cv::Size& originalImageSize,const cv::Size& resizedImageShape,const std::vector<Ort::Value>& outputTensors,float confThreshold,float iouThreshold)
std::vector<Object> YOLOOD::postprocessv11(const cv::Size& originalImageSize,const cv::Size& resizedImageShape,std::vector<Ort::Value>& outputTensors,float confThreshold,float iouThreshold)
{
try {
// Get raw output
const float* rawOutput = outputTensors[0].GetTensorData<float>();
const float* rawOutput = outputTensors[0].GetTensorMutableData<float>();
const std::vector<int64_t> outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
const size_t numFeatures = outputShape[1];
@@ -1448,7 +1454,7 @@ namespace ANSCENTER
);
// Parse output
const float* rawOutput = outputTensors[0].GetTensorData<float>();
const float* rawOutput = outputTensors[0].GetTensorMutableData<float>();
const std::vector<int64_t> outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
const int dimensions = static_cast<int>(outputShape[1]); // 4 + num_classes

View File

@@ -44,7 +44,7 @@ namespace ANSCENTER {
cv::Mat preprocessv11(const cv::Mat& image, std::vector<float>& blob, std::vector<int64_t>& inputTensorShape);
std::vector<Object> postprocessing(const cv::Size& resizedImageShape,const cv::Size& originalImageShape,std::vector<Ort::Value>& outputTensors,
const float& confThreshold, const float& iouThreshold);
std::vector<Object> postprocessv11(const cv::Size& originalImageSize,const cv::Size& resizedImageShape,const std::vector<Ort::Value>& outputTensors,float confThreshold,float iouThreshold);
std::vector<Object> postprocessv11(const cv::Size& originalImageSize,const cv::Size& resizedImageShape,std::vector<Ort::Value>& outputTensors,float confThreshold,float iouThreshold);
BoundingBox scaleCoordsv11(const cv::Size& imageShape, BoundingBox coords,const cv::Size& imageOriginalShape, bool p_Clip);
std::vector<const char*> inputNodeNames;
std::vector<const char*> outputNodeNames;

View File

@@ -355,6 +355,7 @@ extern "C" ANSODENGINE_API std::string CreateANSODHandle(ANSCENTER::ANSODBase**
// TEXTSCENSE = 6
//Force modelType to ANSONNXYOLO and ANSRTYOLO if detectionType is detection and modelType is TENSORRT or ONNX
if ((modelType == 4) || // TensorRT
(modelType == 14)|| // TensorRT Yolov10
(modelType == 22)|| // TensorRT Pose
@@ -376,7 +377,6 @@ extern "C" ANSODENGINE_API std::string CreateANSODHandle(ANSCENTER::ANSODBase**
}
switch (detectionType) {
case 0:
modelConfig.detectionType = ANSCENTER::DetectionType::CLASSIFICATION;